From caf4ec197dcd1e9b03ebf367e8cf72e98afaea87 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 29 Mar 2022 08:16:56 -0400 Subject: [PATCH] import valgrind-3.18.1-7.el8 --- .gitignore | 2 +- .valgrind.metadata | 2 +- .../valgrind-3.17.0-clone-parent-res.patch | 21 - SOURCES/valgrind-3.17.0-debuginfod.patch | 30 - .../valgrind-3.17.0-ppc64-isa-3.1-tests.patch | 1712 ------------ SOURCES/valgrind-3.17.0-ppc64-isa-3.1.patch | 1720 ------------ SOURCES/valgrind-3.17.0-s390-prep.patch | 2283 ---------------- .../valgrind-3.17.0-s390-z13-vec-fix.patch | 46 - SOURCES/valgrind-3.17.0-s390-z15.patch | 2413 ----------------- .../valgrind-3.17.0-s390_insn_as_string.patch | 54 - ...algrind-3.18.1-amd64-more-spec-rules.patch | 105 + .../valgrind-3.18.1-arm64-atomic-align.patch | 163 ++ .../valgrind-3.18.1-arm64-atomics-rdm.patch | 16 + ...valgrind-3.18.1-arm64-doubleword-cas.patch | 121 + .../valgrind-3.18.1-arm64-ldaxp-stlxp.patch | 1440 ++++++++++ SOURCES/valgrind-3.18.1-condvar.patch | 284 ++ .../valgrind-3.18.1-demangle-namespace.patch | 35 + SOURCES/valgrind-3.18.1-dhat-tests-copy.patch | 20 + ...algrind-3.18.1-gdbserver_tests-hwcap.patch | 25 + SOURCES/valgrind-3.18.1-ppc-pstq-tests.patch | 1876 +++++++++++++ SOURCES/valgrind-3.18.1-ppc-pstq.patch | 47 + ...algrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch | 60 + .../valgrind-3.18.1-rust-v0-demangle.patch | 137 + SOURCES/valgrind-3.18.1-s390x-EXRL.patch | 549 ++++ .../valgrind-3.9.0-helgrind-race-supp.patch | 15 - SPECS/valgrind.spec | 143 +- 26 files changed, 4956 insertions(+), 8363 deletions(-) delete mode 100644 SOURCES/valgrind-3.17.0-clone-parent-res.patch delete mode 100644 SOURCES/valgrind-3.17.0-debuginfod.patch delete mode 100644 SOURCES/valgrind-3.17.0-ppc64-isa-3.1-tests.patch delete mode 100644 SOURCES/valgrind-3.17.0-ppc64-isa-3.1.patch delete mode 100644 SOURCES/valgrind-3.17.0-s390-prep.patch delete mode 100644 SOURCES/valgrind-3.17.0-s390-z13-vec-fix.patch delete mode 100644 SOURCES/valgrind-3.17.0-s390-z15.patch delete mode 100644 SOURCES/valgrind-3.17.0-s390_insn_as_string.patch create mode 100644 SOURCES/valgrind-3.18.1-amd64-more-spec-rules.patch create mode 100644 SOURCES/valgrind-3.18.1-arm64-atomic-align.patch create mode 100644 SOURCES/valgrind-3.18.1-arm64-atomics-rdm.patch create mode 100644 SOURCES/valgrind-3.18.1-arm64-doubleword-cas.patch create mode 100644 SOURCES/valgrind-3.18.1-arm64-ldaxp-stlxp.patch create mode 100644 SOURCES/valgrind-3.18.1-condvar.patch create mode 100644 SOURCES/valgrind-3.18.1-demangle-namespace.patch create mode 100644 SOURCES/valgrind-3.18.1-dhat-tests-copy.patch create mode 100644 SOURCES/valgrind-3.18.1-gdbserver_tests-hwcap.patch create mode 100644 SOURCES/valgrind-3.18.1-ppc-pstq-tests.patch create mode 100644 SOURCES/valgrind-3.18.1-ppc-pstq.patch create mode 100644 SOURCES/valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch create mode 100644 SOURCES/valgrind-3.18.1-rust-v0-demangle.patch create mode 100644 SOURCES/valgrind-3.18.1-s390x-EXRL.patch delete mode 100644 SOURCES/valgrind-3.9.0-helgrind-race-supp.patch diff --git a/.gitignore b/.gitignore index 8036c6c..1219e4a 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/valgrind-3.17.0.tar.bz2 +SOURCES/valgrind-3.18.1.tar.bz2 diff --git a/.valgrind.metadata b/.valgrind.metadata index c47f2af..92f2922 100644 --- a/.valgrind.metadata +++ b/.valgrind.metadata @@ -1 +1 @@ -7770912c7465f93a90c5a9d5c1b1b036ebec04fd SOURCES/valgrind-3.17.0.tar.bz2 +0a694a8d0c2152978bf64b67ad0b3dd972bbeb54 SOURCES/valgrind-3.18.1.tar.bz2 diff --git a/SOURCES/valgrind-3.17.0-clone-parent-res.patch b/SOURCES/valgrind-3.17.0-clone-parent-res.patch deleted file mode 100644 index 8d03cea..0000000 --- a/SOURCES/valgrind-3.17.0-clone-parent-res.patch +++ /dev/null @@ -1,21 +0,0 @@ -commit e08a82991a9b9dc87c13f2b89273f25f97d14baf -Author: Tom Hughes -Date: Tue Apr 6 22:44:36 2021 +0100 - - Only process clone results in the parent thread - - Fixes BZ#423963 - -diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c -index 5ae4e6613..c59d8ee26 100644 ---- a/coregrind/m_syswrap/syswrap-linux.c -+++ b/coregrind/m_syswrap/syswrap-linux.c -@@ -940,7 +940,7 @@ PRE(sys_clone) - ("Valgrind does not support general clone()."); - } - -- if (SUCCESS) { -+ if (SUCCESS && RES != 0) { - if (ARG_FLAGS & (VKI_CLONE_PARENT_SETTID | VKI_CLONE_PIDFD)) - POST_MEM_WRITE(ARG3, sizeof(Int)); - if (ARG_FLAGS & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) diff --git a/SOURCES/valgrind-3.17.0-debuginfod.patch b/SOURCES/valgrind-3.17.0-debuginfod.patch deleted file mode 100644 index 5109a4b..0000000 --- a/SOURCES/valgrind-3.17.0-debuginfod.patch +++ /dev/null @@ -1,30 +0,0 @@ -commit 93104368952c37268da724231487058ea3eaf1dc -Author: Tom Hughes -Date: Thu May 20 17:16:06 2021 +0100 - - Don't look for separate debuginfo if the image has a .debug_info section - - Fixes BZ#435908 - -diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c -index b0f062ddc..e424e3e7e 100644 ---- a/coregrind/m_debuginfo/readelf.c -+++ b/coregrind/m_debuginfo/readelf.c -@@ -2879,13 +2879,15 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) - /* Look for a build-id */ - HChar* buildid = find_buildid(mimg, False, False); - -- /* Look for a debug image that matches either the build-id or -+ /* If we don't have a .debug_info section in the main image then -+ look for a debug image that matches either the build-id or - the debuglink-CRC32 in the main image. If the main image - doesn't contain either of those then this won't even bother - to try looking. This looks in all known places, including - the --extra-debuginfo-path if specified and on the - --debuginfo-server if specified. */ -- if (buildid != NULL || debuglink_escn.img != NULL) { -+ if (debug_info_escn.img == NULL && -+ (buildid != NULL || debuglink_escn.img != NULL)) { - /* Do have a debuglink section? */ - if (debuglink_escn.img != NULL) { - UInt crc_offset diff --git a/SOURCES/valgrind-3.17.0-ppc64-isa-3.1-tests.patch b/SOURCES/valgrind-3.17.0-ppc64-isa-3.1-tests.patch deleted file mode 100644 index b7f1a6c..0000000 --- a/SOURCES/valgrind-3.17.0-ppc64-isa-3.1-tests.patch +++ /dev/null @@ -1,1712 +0,0 @@ -commit 4bcc6c8a97c10c4dd41b35bd3b3035ec4037d524 -Author: Carl Love -Date: Mon Nov 16 19:09:47 2020 -0600 - - VSX Permute Control Vector Generate Operation tests. - -diff --git a/NEWS b/NEWS -index aa398cf54..2e42e74b2 100644 ---- a/NEWS -+++ b/NEWS -@@ -154,6 +154,7 @@ where XXXXXX is the bug number as listed below. - 428909 helgrind: need to intercept duplicate libc definitions for Fedora 33 - 429352 PPC ISA 3.1 support is missing, part 7 - 429354 PPC ISA 3.1 support is missing, part 8 -+429375 PPC ISA 3.1 support is missing, part 9 - 429692 unhandled ppc64le-linux syscall: 147 (getsid) - 429864 s390x: C++ atomic test_and_set yields false-positive memcheck - diagnostics -diff --git a/none/tests/ppc64/test_isa_3_1_XT.c b/none/tests/ppc64/test_isa_3_1_XT.c -index c16ddedac..a54e8763a 100644 ---- a/none/tests/ppc64/test_isa_3_1_XT.c -+++ b/none/tests/ppc64/test_isa_3_1_XT.c -@@ -491,6 +491,54 @@ static void test_pstxv_4 (void) { - static void test_pstxv_0 (void) { - __asm__ __volatile__ ("pstxv %x0, 0(%1), 0" :: "wa" (vec_xs), "r" (ra) ); - } -+static void test_xxgenpcvbm_imm0 (void) { -+ __asm__ __volatile__ ("xxgenpcvbm %x0, %1, 0" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvbm_imm1 (void) { -+ __asm__ __volatile__ ("xxgenpcvbm %x0, %1, 1" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvbm_imm2 (void) { -+ __asm__ __volatile__ ("xxgenpcvbm %x0, %1, 2" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvbm_imm3 (void) { -+ __asm__ __volatile__ ("xxgenpcvbm %x0, %1, 3" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvhm_imm0 (void) { -+ __asm__ __volatile__ ("xxgenpcvhm %x0, %1, 0" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvhm_imm1 (void) { -+ __asm__ __volatile__ ("xxgenpcvhm %x0, %1, 1" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvhm_imm2 (void) { -+ __asm__ __volatile__ ("xxgenpcvhm %x0, %1, 2" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvhm_imm3 (void) { -+ __asm__ __volatile__ ("xxgenpcvhm %x0, %1, 3" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvwm_imm0 (void) { -+ __asm__ __volatile__ ("xxgenpcvwm %x0, %1, 0" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvwm_imm1 (void) { -+ __asm__ __volatile__ ("xxgenpcvwm %x0, %1, 1" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvwm_imm2 (void) { -+ __asm__ __volatile__ ("xxgenpcvwm %x0, %1, 2" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvwm_imm3 (void) { -+ __asm__ __volatile__ ("xxgenpcvwm %x0, %1, 3" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvdm_imm0 (void) { -+ __asm__ __volatile__ ("xxgenpcvdm %x0, %1, 0" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvdm_imm1 (void) { -+ __asm__ __volatile__ ("xxgenpcvdm %x0, %1, 1" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvdm_imm2 (void) { -+ __asm__ __volatile__ ("xxgenpcvdm %x0, %1, 2" : "=wa" (vec_xt) : "v" (vrb) ); -+} -+static void test_xxgenpcvdm_imm3 (void) { -+ __asm__ __volatile__ ("xxgenpcvdm %x0, %1, 3" : "=wa" (vec_xt) : "v" (vrb) ); -+} - - static test_list_t testgroup_generic[] = { - { &test_lxvkq_imm1, "lxvkq imm1", "XT,UIM"}, /* bcwp */ -@@ -592,6 +640,22 @@ static test_list_t testgroup_generic[] = { - { &test_xxblendvw, "xxblendvw", "XT,XA,XB,XC"}, /* bcs */ - { &test_xxeval_imm0, "xxeval imm0", "XT,XA,XB,XC,IMM"}, /* bcwp */ - { &test_xxeval_imm3, "xxeval imm3", "XT,XA,XB,XC,IMM"}, /* bcwp */ -+ { &test_xxgenpcvbm_imm0, "xxgenpcvbm imm0", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvbm_imm1, "xxgenpcvbm imm1", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvbm_imm2, "xxgenpcvbm imm2", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvbm_imm3, "xxgenpcvbm imm3", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvdm_imm0, "xxgenpcvdm imm0", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvdm_imm1, "xxgenpcvdm imm1", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvdm_imm2, "xxgenpcvdm imm2", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvdm_imm3, "xxgenpcvdm imm3", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvhm_imm0, "xxgenpcvhm imm0", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvhm_imm1, "xxgenpcvhm imm1", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvhm_imm2, "xxgenpcvhm imm2", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvhm_imm3, "xxgenpcvhm imm3", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvwm_imm0, "xxgenpcvwm imm0", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvwm_imm1, "xxgenpcvwm imm1", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvwm_imm2, "xxgenpcvwm imm2", "XT,VRB,IMM"}, /* bcwp */ -+ { &test_xxgenpcvwm_imm3, "xxgenpcvwm imm3", "XT,VRB,IMM"}, /* bcwp */ - { &test_xxpermx_imm0, "xxpermx imm0", "XT,XA,XB,XC,UIM"}, /* bcwp */ - { &test_xxpermx_imm3, "xxpermx imm3", "XT,XA,XB,XC,UIM"}, /* bcwp */ - { &test_xxsplti32dx_ix0_imm0xa5a5a5a5, "xxsplti32dx ix0_imm0xa5a5a5a5", "XT,IX,IMM32"}, /* bcwp */ -diff --git a/none/tests/ppc64/test_isa_3_1_XT.stdout.exp b/none/tests/ppc64/test_isa_3_1_XT.stdout.exp -index efa95884e..8b5f1d1a3 100644 ---- a/none/tests/ppc64/test_isa_3_1_XT.stdout.exp -+++ b/none/tests/ppc64/test_isa_3_1_XT.stdout.exp -@@ -4644,6 +4644,230 @@ xxeval imm3 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 - xxeval imm3 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => 8000000000000000 7f800000ff800000 - xxeval imm3 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => 8000000000000000 7f800000ff800000 - -+xxgenpcvbm imm0 7f800000ff800000,ff8000007f800000 => 18031a1b04051e1f 1121314021617 -+xxgenpcvbm imm0 ff8000007f800000,ff7ffffe7f7ffffe => 5061a1b1c071e1f 11010214150304 -+xxgenpcvbm imm0 ff7ffffe7f7ffffe,0080000e8080000e => 31904051c1d0607 1000121301021617 -+xxgenpcvbm imm0 0080000e8080000e,0180055e0180077e => 18021a1b03041e1f 1000121314011617 -+xxgenpcvbm imm0 0180055e0180077e,0000111e8000222e => 18011a1b1c021e1f 1011121300151617 -+xxgenpcvbm imm0 0000111e8000222e,7ff0000000000000 => 18191a1b011d1e1f 1000121314151617 -+xxgenpcvbm imm0 7ff0000000000000,fff0000000000000 => 18021a1b1c1d1e1f 1121314151617 -+xxgenpcvbm imm0 fff0000000000000,2208400000000000 => 11a1b1c1d1e1f 1011121314151617 -+xxgenpcvbm imm0 2208400000000000,0000000000000009 => 18191a1b1c1d1e1f 1011121314151617 -+xxgenpcvbm imm0 0000000000000009,ffff000180000001 => 18191a1b1c1d1e1f 1121302151617 -+xxgenpcvbm imm0 ffff000180000001,0000000000000000 => 11a1b021d1e1f 1011121314151617 -+xxgenpcvbm imm0 0000000000000000,8000000000000000 => 18191a1b1c1d1e1f 11121314151617 -+xxgenpcvbm imm0 8000000000000000,7f800000ff800000 => 3191a1b1c1d1e1f 1000121301021617 -+ -+xxgenpcvbm imm1 7f800000ff800000,ff8000007f800000 => 0 105090c0d0000 -+xxgenpcvbm imm1 ff8000007f800000,ff7ffffe7f7ffffe => 0 203060708090d -+xxgenpcvbm imm1 ff7ffffe7f7ffffe,0080000e8080000e => 0 10405080a0b0e0f -+xxgenpcvbm imm1 0080000e8080000e,0180055e0180077e => 0 105090c0d000000 -+xxgenpcvbm imm1 0180055e0180077e,0000111e8000222e => 0 4090d0000000000 -+xxgenpcvbm imm1 0000111e8000222e,7ff0000000000000 => 0 10c000000000000 -+xxgenpcvbm imm1 7ff0000000000000,fff0000000000000 => 0 1090000000000 -+xxgenpcvbm imm1 fff0000000000000,2208400000000000 => 0 809000000000000 -+xxgenpcvbm imm1 2208400000000000,0000000000000009 => 0 0 -+xxgenpcvbm imm1 0000000000000009,ffff000180000001 => 0 1040000000000 -+xxgenpcvbm imm1 ffff000180000001,0000000000000000 => 0 8090c0000000000 -+xxgenpcvbm imm1 0000000000000000,8000000000000000 => 0 0 -+xxgenpcvbm imm1 8000000000000000,7f800000ff800000 => 0 104050800000000 -+ -+xxgenpcvbm imm2 7f800000ff800000,ff8000007f800000 => 1702151401001110 5041d1c1b031918 -+xxgenpcvbm imm2 ff8000007f800000,ff7ffffe7f7ffffe => 201151413001110 71e06051b1a0403 -+xxgenpcvbm imm2 ff7ffffe7f7ffffe,0080000e8080000e => 416030213120100 1f071d1c06051918 -+xxgenpcvbm imm2 0080000e8080000e,0180055e0180077e => 1702151401001110 1f041d1c1b031918 -+xxgenpcvbm imm2 0180055e0180077e,0000111e8000222e => 1701151413001110 1f1e1d1c021a1918 -+xxgenpcvbm imm2 0000111e8000222e,7ff0000000000000 => 1716151400121110 1f011d1c1b1a1918 -+xxgenpcvbm imm2 7ff0000000000000,fff0000000000000 => 1700151413121110 2011d1c1b1a1918 -+xxgenpcvbm imm2 fff0000000000000,2208400000000000 => 100151413121110 1f1e1d1c1b1a1918 -+xxgenpcvbm imm2 2208400000000000,0000000000000009 => 1716151413121110 1f1e1d1c1b1a1918 -+xxgenpcvbm imm2 0000000000000009,ffff000180000001 => 1716151413121110 2011d1c001a1918 -+xxgenpcvbm imm2 ffff000180000001,0000000000000000 => 201151400121110 1f1e1d1c1b1a1918 -+xxgenpcvbm imm2 0000000000000000,8000000000000000 => 1716151413121110 1e1d1c1b1a1918 -+xxgenpcvbm imm2 8000000000000000,7f800000ff800000 => 16151413121110 1f031d1c02011918 -+ -+xxgenpcvbm imm3 7f800000ff800000,ff8000007f800000 => f0e0a060302 0 -+xxgenpcvbm imm3 ff8000007f800000,ff7ffffe7f7ffffe => f0d0c0908070602 0 -+xxgenpcvbm imm3 ff7ffffe7f7ffffe,0080000e8080000e => e0b0a0705040100 0 -+xxgenpcvbm imm3 0080000e8080000e,0180055e0180077e => e0a060302 0 -+xxgenpcvbm imm3 0180055e0180077e,0000111e8000222e => b0602 0 -+xxgenpcvbm imm3 0000111e8000222e,7ff0000000000000 => e03 0 -+xxgenpcvbm imm3 7ff0000000000000,fff0000000000000 => f0e06 0 -+xxgenpcvbm imm3 fff0000000000000,2208400000000000 => 706 0 -+xxgenpcvbm imm3 2208400000000000,0000000000000009 => 0 0 -+xxgenpcvbm imm3 0000000000000009,ffff000180000001 => f0e0b 0 -+xxgenpcvbm imm3 ffff000180000001,0000000000000000 => 70603 0 -+xxgenpcvbm imm3 0000000000000000,8000000000000000 => f 0 -+xxgenpcvbm imm3 8000000000000000,7f800000ff800000 => e0b0a07 0 -+ -+xxgenpcvdm imm0 7f800000ff800000,ff8000007f800000 => 18191a1b1c1d1e1f 1020304050607 -+xxgenpcvdm imm0 ff8000007f800000,ff7ffffe7f7ffffe => 8090a0b0c0d0e0f 1020304050607 -+xxgenpcvdm imm0 ff7ffffe7f7ffffe,0080000e8080000e => 1020304050607 1011121314151617 -+xxgenpcvdm imm0 0080000e8080000e,0180055e0180077e => 18191a1b1c1d1e1f 1011121314151617 -+xxgenpcvdm imm0 0180055e0180077e,0000111e8000222e => 18191a1b1c1d1e1f 1011121314151617 -+xxgenpcvdm imm0 0000111e8000222e,7ff0000000000000 => 18191a1b1c1d1e1f 1011121314151617 -+xxgenpcvdm imm0 7ff0000000000000,fff0000000000000 => 18191a1b1c1d1e1f 1020304050607 -+xxgenpcvdm imm0 fff0000000000000,2208400000000000 => 1020304050607 1011121314151617 -+xxgenpcvdm imm0 2208400000000000,0000000000000009 => 18191a1b1c1d1e1f 1011121314151617 -+xxgenpcvdm imm0 0000000000000009,ffff000180000001 => 18191a1b1c1d1e1f 1020304050607 -+xxgenpcvdm imm0 ffff000180000001,0000000000000000 => 1020304050607 1011121314151617 -+xxgenpcvdm imm0 0000000000000000,8000000000000000 => 18191a1b1c1d1e1f 1020304050607 -+xxgenpcvdm imm0 8000000000000000,7f800000ff800000 => 1020304050607 1011121314151617 -+ -+xxgenpcvdm imm1 7f800000ff800000,ff8000007f800000 => 0 1020304050607 -+xxgenpcvdm imm1 ff8000007f800000,ff7ffffe7f7ffffe => 8090a0b0c0d0e0f 1020304050607 -+xxgenpcvdm imm1 ff7ffffe7f7ffffe,0080000e8080000e => 0 8090a0b0c0d0e0f -+xxgenpcvdm imm1 0080000e8080000e,0180055e0180077e => 0 0 -+xxgenpcvdm imm1 0180055e0180077e,0000111e8000222e => 0 0 -+xxgenpcvdm imm1 0000111e8000222e,7ff0000000000000 => 0 0 -+xxgenpcvdm imm1 7ff0000000000000,fff0000000000000 => 0 1020304050607 -+xxgenpcvdm imm1 fff0000000000000,2208400000000000 => 0 8090a0b0c0d0e0f -+xxgenpcvdm imm1 2208400000000000,0000000000000009 => 0 0 -+xxgenpcvdm imm1 0000000000000009,ffff000180000001 => 0 1020304050607 -+xxgenpcvdm imm1 ffff000180000001,0000000000000000 => 0 8090a0b0c0d0e0f -+xxgenpcvdm imm1 0000000000000000,8000000000000000 => 0 1020304050607 -+xxgenpcvdm imm1 8000000000000000,7f800000ff800000 => 0 8090a0b0c0d0e0f -+ -+xxgenpcvdm imm2 7f800000ff800000,ff8000007f800000 => 1716151413121110 706050403020100 -+xxgenpcvdm imm2 ff8000007f800000,ff7ffffe7f7ffffe => 706050403020100 f0e0d0c0b0a0908 -+xxgenpcvdm imm2 ff7ffffe7f7ffffe,0080000e8080000e => 706050403020100 1f1e1d1c1b1a1918 -+xxgenpcvdm imm2 0080000e8080000e,0180055e0180077e => 1716151413121110 1f1e1d1c1b1a1918 -+xxgenpcvdm imm2 0180055e0180077e,0000111e8000222e => 1716151413121110 1f1e1d1c1b1a1918 -+xxgenpcvdm imm2 0000111e8000222e,7ff0000000000000 => 1716151413121110 1f1e1d1c1b1a1918 -+xxgenpcvdm imm2 7ff0000000000000,fff0000000000000 => 1716151413121110 706050403020100 -+xxgenpcvdm imm2 fff0000000000000,2208400000000000 => 706050403020100 1f1e1d1c1b1a1918 -+xxgenpcvdm imm2 2208400000000000,0000000000000009 => 1716151413121110 1f1e1d1c1b1a1918 -+xxgenpcvdm imm2 0000000000000009,ffff000180000001 => 1716151413121110 706050403020100 -+xxgenpcvdm imm2 ffff000180000001,0000000000000000 => 706050403020100 1f1e1d1c1b1a1918 -+xxgenpcvdm imm2 0000000000000000,8000000000000000 => 1716151413121110 706050403020100 -+xxgenpcvdm imm2 8000000000000000,7f800000ff800000 => 706050403020100 1f1e1d1c1b1a1918 -+ -+xxgenpcvdm imm3 7f800000ff800000,ff8000007f800000 => f0e0d0c0b0a0908 0 -+xxgenpcvdm imm3 ff8000007f800000,ff7ffffe7f7ffffe => 706050403020100 f0e0d0c0b0a0908 -+xxgenpcvdm imm3 ff7ffffe7f7ffffe,0080000e8080000e => 706050403020100 0 -+xxgenpcvdm imm3 0080000e8080000e,0180055e0180077e => 0 0 -+xxgenpcvdm imm3 0180055e0180077e,0000111e8000222e => 0 0 -+xxgenpcvdm imm3 0000111e8000222e,7ff0000000000000 => 0 0 -+xxgenpcvdm imm3 7ff0000000000000,fff0000000000000 => f0e0d0c0b0a0908 0 -+xxgenpcvdm imm3 fff0000000000000,2208400000000000 => 706050403020100 0 -+xxgenpcvdm imm3 2208400000000000,0000000000000009 => 0 0 -+xxgenpcvdm imm3 0000000000000009,ffff000180000001 => f0e0d0c0b0a0908 0 -+xxgenpcvdm imm3 ffff000180000001,0000000000000000 => 706050403020100 0 -+xxgenpcvdm imm3 0000000000000000,8000000000000000 => f0e0d0c0b0a0908 0 -+xxgenpcvdm imm3 8000000000000000,7f800000ff800000 => 706050403020100 0 -+ -+xxgenpcvhm imm0 7f800000ff800000,ff8000007f800000 => 18191a1b02031e1f 1121314151617 -+xxgenpcvhm imm0 ff8000007f800000,ff7ffffe7f7ffffe => 6071a1b1c1d1e1f 1020314150405 -+xxgenpcvhm imm0 ff7ffffe7f7ffffe,0080000e8080000e => 20304051c1d0607 1011121300011617 -+xxgenpcvhm imm0 0080000e8080000e,0180055e0180077e => 18191a1b00011e1f 1011121314151617 -+xxgenpcvhm imm0 0180055e0180077e,0000111e8000222e => 18191a1b1c1d1e1f 1011121300011617 -+xxgenpcvhm imm0 0000111e8000222e,7ff0000000000000 => 18191a1b00011e1f 1011121314151617 -+xxgenpcvhm imm0 7ff0000000000000,fff0000000000000 => 18191a1b1c1d1e1f 1121314151617 -+xxgenpcvhm imm0 fff0000000000000,2208400000000000 => 11a1b1c1d1e1f 1011121314151617 -+xxgenpcvhm imm0 2208400000000000,0000000000000009 => 18191a1b1c1d1e1f 1011121314151617 -+xxgenpcvhm imm0 0000000000000009,ffff000180000001 => 18191a1b1c1d1e1f 1121302031617 -+xxgenpcvhm imm0 ffff000180000001,0000000000000000 => 11a1b02031e1f 1011121314151617 -+xxgenpcvhm imm0 0000000000000000,8000000000000000 => 18191a1b1c1d1e1f 1121314151617 -+xxgenpcvhm imm0 8000000000000000,7f800000ff800000 => 2031a1b1c1d1e1f 1011121300011617 -+ -+xxgenpcvhm imm1 7f800000ff800000,ff8000007f800000 => 0 10c0d00000000 -+xxgenpcvhm imm1 ff8000007f800000,ff7ffffe7f7ffffe => 0 1020306070809 -+xxgenpcvhm imm1 ff7ffffe7f7ffffe,0080000e8080000e => 0 40508090a0b0e0f -+xxgenpcvhm imm1 0080000e8080000e,0180055e0180077e => 0 c0d000000000000 -+xxgenpcvhm imm1 0180055e0180077e,0000111e8000222e => 0 405000000000000 -+xxgenpcvhm imm1 0000111e8000222e,7ff0000000000000 => 0 c0d000000000000 -+xxgenpcvhm imm1 7ff0000000000000,fff0000000000000 => 0 1000000000000 -+xxgenpcvhm imm1 fff0000000000000,2208400000000000 => 0 809000000000000 -+xxgenpcvhm imm1 2208400000000000,0000000000000009 => 0 0 -+xxgenpcvhm imm1 0000000000000009,ffff000180000001 => 0 1040500000000 -+xxgenpcvhm imm1 ffff000180000001,0000000000000000 => 0 8090c0d00000000 -+xxgenpcvhm imm1 0000000000000000,8000000000000000 => 0 1000000000000 -+xxgenpcvhm imm1 8000000000000000,7f800000ff800000 => 0 405080900000000 -+ -+xxgenpcvhm imm2 7f800000ff800000,ff8000007f800000 => 1716151401001110 3021d1c1b1a1918 -+xxgenpcvhm imm2 ff8000007f800000,ff7ffffe7f7ffffe => 100151413121110 70605041b1a0302 -+xxgenpcvhm imm2 ff7ffffe7f7ffffe,0080000e8080000e => 504030213120100 1f1e1d1c07061918 -+xxgenpcvhm imm2 0080000e8080000e,0180055e0180077e => 1716151401001110 1f1e1d1c1b1a1918 -+xxgenpcvhm imm2 0180055e0180077e,0000111e8000222e => 1716151413121110 1f1e1d1c01001918 -+xxgenpcvhm imm2 0000111e8000222e,7ff0000000000000 => 1716151401001110 1f1e1d1c1b1a1918 -+xxgenpcvhm imm2 7ff0000000000000,fff0000000000000 => 1716151413121110 1001d1c1b1a1918 -+xxgenpcvhm imm2 fff0000000000000,2208400000000000 => 100151413121110 1f1e1d1c1b1a1918 -+xxgenpcvhm imm2 2208400000000000,0000000000000009 => 1716151413121110 1f1e1d1c1b1a1918 -+xxgenpcvhm imm2 0000000000000009,ffff000180000001 => 1716151413121110 3021d1c01001918 -+xxgenpcvhm imm2 ffff000180000001,0000000000000000 => 302151401001110 1f1e1d1c1b1a1918 -+xxgenpcvhm imm2 0000000000000000,8000000000000000 => 1716151413121110 1001d1c1b1a1918 -+xxgenpcvhm imm2 8000000000000000,7f800000ff800000 => 100151413121110 1f1e1d1c03021918 -+ -+xxgenpcvhm imm3 7f800000ff800000,ff8000007f800000 => f0e0302 0 -+xxgenpcvhm imm3 ff8000007f800000,ff7ffffe7f7ffffe => f0e0d0c09080706 0 -+xxgenpcvhm imm3 ff7ffffe7f7ffffe,0080000e8080000e => b0a070605040100 0 -+xxgenpcvhm imm3 0080000e8080000e,0180055e0180077e => 302 0 -+xxgenpcvhm imm3 0180055e0180077e,0000111e8000222e => b0a 0 -+xxgenpcvhm imm3 0000111e8000222e,7ff0000000000000 => 302 0 -+xxgenpcvhm imm3 7ff0000000000000,fff0000000000000 => f0e 0 -+xxgenpcvhm imm3 fff0000000000000,2208400000000000 => 706 0 -+xxgenpcvhm imm3 2208400000000000,0000000000000009 => 0 0 -+xxgenpcvhm imm3 0000000000000009,ffff000180000001 => f0e0b0a 0 -+xxgenpcvhm imm3 ffff000180000001,0000000000000000 => 7060302 0 -+xxgenpcvhm imm3 0000000000000000,8000000000000000 => f0e 0 -+xxgenpcvhm imm3 8000000000000000,7f800000ff800000 => b0a0706 0 -+ -+xxgenpcvwm imm0 7f800000ff800000,ff8000007f800000 => 18191a1b04050607 1020314151617 -+xxgenpcvwm imm0 ff8000007f800000,ff7ffffe7f7ffffe => 40506071c1d1e1f 1020314151617 -+xxgenpcvwm imm0 ff7ffffe7f7ffffe,0080000e8080000e => 40506071c1d1e1f 1011121300010203 -+xxgenpcvwm imm0 0080000e8080000e,0180055e0180077e => 18191a1b00010203 1011121314151617 -+xxgenpcvwm imm0 0180055e0180077e,0000111e8000222e => 18191a1b1c1d1e1f 1011121300010203 -+xxgenpcvwm imm0 0000111e8000222e,7ff0000000000000 => 18191a1b00010203 1011121314151617 -+xxgenpcvwm imm0 7ff0000000000000,fff0000000000000 => 18191a1b1c1d1e1f 1020314151617 -+xxgenpcvwm imm0 fff0000000000000,2208400000000000 => 102031c1d1e1f 1011121314151617 -+xxgenpcvwm imm0 2208400000000000,0000000000000009 => 18191a1b1c1d1e1f 1011121314151617 -+xxgenpcvwm imm0 0000000000000009,ffff000180000001 => 18191a1b1c1d1e1f 1020304050607 -+xxgenpcvwm imm0 ffff000180000001,0000000000000000 => 1020304050607 1011121314151617 -+xxgenpcvwm imm0 0000000000000000,8000000000000000 => 18191a1b1c1d1e1f 1020314151617 -+xxgenpcvwm imm0 8000000000000000,7f800000ff800000 => 40506071c1d1e1f 1011121300010203 -+ -+xxgenpcvwm imm1 7f800000ff800000,ff8000007f800000 => 0 102030c0d0e0f -+xxgenpcvwm imm1 ff8000007f800000,ff7ffffe7f7ffffe => 0 1020308090a0b -+xxgenpcvwm imm1 ff7ffffe7f7ffffe,0080000e8080000e => 0 405060708090a0b -+xxgenpcvwm imm1 0080000e8080000e,0180055e0180077e => 0 c0d0e0f00000000 -+xxgenpcvwm imm1 0180055e0180077e,0000111e8000222e => 0 405060700000000 -+xxgenpcvwm imm1 0000111e8000222e,7ff0000000000000 => 0 c0d0e0f00000000 -+xxgenpcvwm imm1 7ff0000000000000,fff0000000000000 => 0 1020300000000 -+xxgenpcvwm imm1 fff0000000000000,2208400000000000 => 0 8090a0b00000000 -+xxgenpcvwm imm1 2208400000000000,0000000000000009 => 0 0 -+xxgenpcvwm imm1 0000000000000009,ffff000180000001 => 0 1020304050607 -+xxgenpcvwm imm1 ffff000180000001,0000000000000000 => 0 8090a0b0c0d0e0f -+xxgenpcvwm imm1 0000000000000000,8000000000000000 => 0 1020300000000 -+xxgenpcvwm imm1 8000000000000000,7f800000ff800000 => 0 405060708090a0b -+ -+xxgenpcvwm imm2 7f800000ff800000,ff8000007f800000 => 1716151403020100 70605041b1a1918 -+xxgenpcvwm imm2 ff8000007f800000,ff7ffffe7f7ffffe => 302010013121110 70605041b1a1918 -+xxgenpcvwm imm2 ff7ffffe7f7ffffe,0080000e8080000e => 302010013121110 1f1e1d1c07060504 -+xxgenpcvwm imm2 0080000e8080000e,0180055e0180077e => 1716151403020100 1f1e1d1c1b1a1918 -+xxgenpcvwm imm2 0180055e0180077e,0000111e8000222e => 1716151413121110 1f1e1d1c03020100 -+xxgenpcvwm imm2 0000111e8000222e,7ff0000000000000 => 1716151403020100 1f1e1d1c1b1a1918 -+xxgenpcvwm imm2 7ff0000000000000,fff0000000000000 => 1716151413121110 30201001b1a1918 -+xxgenpcvwm imm2 fff0000000000000,2208400000000000 => 302010013121110 1f1e1d1c1b1a1918 -+xxgenpcvwm imm2 2208400000000000,0000000000000009 => 1716151413121110 1f1e1d1c1b1a1918 -+xxgenpcvwm imm2 0000000000000009,ffff000180000001 => 1716151413121110 706050403020100 -+xxgenpcvwm imm2 ffff000180000001,0000000000000000 => 706050403020100 1f1e1d1c1b1a1918 -+xxgenpcvwm imm2 0000000000000000,8000000000000000 => 1716151413121110 30201001b1a1918 -+xxgenpcvwm imm2 8000000000000000,7f800000ff800000 => 302010013121110 1f1e1d1c07060504 -+ -+xxgenpcvwm imm3 7f800000ff800000,ff8000007f800000 => f0e0d0c03020100 0 -+xxgenpcvwm imm3 ff8000007f800000,ff7ffffe7f7ffffe => f0e0d0c07060504 0 -+xxgenpcvwm imm3 ff7ffffe7f7ffffe,0080000e8080000e => b0a090807060504 0 -+xxgenpcvwm imm3 0080000e8080000e,0180055e0180077e => 3020100 0 -+xxgenpcvwm imm3 0180055e0180077e,0000111e8000222e => b0a0908 0 -+xxgenpcvwm imm3 0000111e8000222e,7ff0000000000000 => 3020100 0 -+xxgenpcvwm imm3 7ff0000000000000,fff0000000000000 => f0e0d0c 0 -+xxgenpcvwm imm3 fff0000000000000,2208400000000000 => 7060504 0 -+xxgenpcvwm imm3 2208400000000000,0000000000000009 => 0 0 -+xxgenpcvwm imm3 0000000000000009,ffff000180000001 => f0e0d0c0b0a0908 0 -+xxgenpcvwm imm3 ffff000180000001,0000000000000000 => 706050403020100 0 -+xxgenpcvwm imm3 0000000000000000,8000000000000000 => f0e0d0c 0 -+xxgenpcvwm imm3 8000000000000000,7f800000ff800000 => b0a090807060504 0 -+ - xxpermx imm0 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ffff0000ffff ffff0000ffff - xxpermx imm0 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ffff0000ffff ffff0000ffff - xxpermx imm0 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ffff0000ffff ffff0000ffff -@@ -7094,4 +7318,4 @@ xxspltiw imm3 => 300000003 300000003 - - xxspltiw imm8 => 800000008 800000008 - --All done. Tested 142 different instruction groups -+All done. Tested 158 different instruction groups - -commit c8fa838be405d7ac43035dcf675bf490800c26ec -Author: root -Date: Fri Feb 12 15:59:32 2021 -0500 - - Reduced Precision bfloat16 outer product tests - -diff --git a/none/tests/ppc64/test_isa_3_1_AT.c b/none/tests/ppc64/test_isa_3_1_AT.c -index 1d6d42c61..fee76f8f4 100644 ---- a/none/tests/ppc64/test_isa_3_1_AT.c -+++ b/none/tests/ppc64/test_isa_3_1_AT.c -@@ -626,8 +626,228 @@ static void test_pmxvf64gernn_XM11_YM0 (void) { - static void test_pmxvf64gernn_XM11_YM1 (void) { - __asm__ __volatile__ ("pmxvf64gernn 4, 22, %x0, 11, 1" :: "wa" (vec_xa) ); - } -+static void test_xvbf16ger2 (void) { -+ __asm__ __volatile__ ("xvbf16ger2 4, %x0, %x1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_xvbf16ger2nn (void) { -+ __asm__ __volatile__ ("xvbf16ger2nn 4, %x0, %x1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_xvbf16ger2np (void) { -+ __asm__ __volatile__ ("xvbf16ger2np 4, %x0, %x1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_xvbf16ger2pn (void) { -+ __asm__ __volatile__ ("xvbf16ger2pn 4, %x0, %x1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_xvbf16ger2pp (void) { -+ __asm__ __volatile__ ("xvbf16ger2pp 4, %x0, %x1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2_XM0_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 0, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2_XM0_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 0, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2_XM0_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 0, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2_XM0_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 0, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2_XM11_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 11, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2_XM11_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 11, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2_XM11_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 11, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2_XM11_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 11, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2nn_XM0_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 0, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2nn_XM0_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 0, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2nn_XM0_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 0, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2nn_XM0_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 0, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2nn_XM11_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 11, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2nn_XM11_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 11, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2nn_XM11_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 11, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2nn_XM11_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 11, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2np_XM0_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 0, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2np_XM0_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 0, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2np_XM0_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 0, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2np_XM0_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 0, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2np_XM11_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 11, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2np_XM11_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 11, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2np_XM11_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 11, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2np_XM11_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 11, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pn_XM0_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 0, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pn_XM0_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 0, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pn_XM0_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 0, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pn_XM0_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 0, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pn_XM11_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 11, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pn_XM11_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 11, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pn_XM11_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 11, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pn_XM11_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 11, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pp_XM0_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 0, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pp_XM0_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 0, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pp_XM0_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 0, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pp_XM0_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 0, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pp_XM11_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 11, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pp_XM11_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 11, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pp_XM11_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 11, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvbf16ger2pp_XM11_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 11, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} - - static test_list_t testgroup_generic[] = { -+ { &test_pmxvbf16ger2nn_XM0_YM0_PM0, "pmxvbf16ger2nn XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2nn_XM0_YM0_PM1, "pmxvbf16ger2nn XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2nn_XM0_YM13_PM0, "pmxvbf16ger2nn XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2nn_XM0_YM13_PM1, "pmxvbf16ger2nn XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2nn_XM11_YM0_PM0, "pmxvbf16ger2nn XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2nn_XM11_YM0_PM1, "pmxvbf16ger2nn XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2nn_XM11_YM13_PM0, "pmxvbf16ger2nn XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2nn_XM11_YM13_PM1, "pmxvbf16ger2nn XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2np_XM0_YM0_PM0, "pmxvbf16ger2np XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2np_XM0_YM0_PM1, "pmxvbf16ger2np XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2np_XM0_YM13_PM0, "pmxvbf16ger2np XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2np_XM0_YM13_PM1, "pmxvbf16ger2np XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2np_XM11_YM0_PM0, "pmxvbf16ger2np XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2np_XM11_YM0_PM1, "pmxvbf16ger2np XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2np_XM11_YM13_PM0, "pmxvbf16ger2np XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2np_XM11_YM13_PM1, "pmxvbf16ger2np XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pn_XM0_YM0_PM0, "pmxvbf16ger2pn XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pn_XM0_YM0_PM1, "pmxvbf16ger2pn XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pn_XM0_YM13_PM0, "pmxvbf16ger2pn XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pn_XM0_YM13_PM1, "pmxvbf16ger2pn XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pn_XM11_YM0_PM0, "pmxvbf16ger2pn XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pn_XM11_YM0_PM1, "pmxvbf16ger2pn XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pn_XM11_YM13_PM0, "pmxvbf16ger2pn XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pn_XM11_YM13_PM1, "pmxvbf16ger2pn XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pp_XM0_YM0_PM0, "pmxvbf16ger2pp XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pp_XM0_YM0_PM1, "pmxvbf16ger2pp XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pp_XM0_YM13_PM0, "pmxvbf16ger2pp XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pp_XM0_YM13_PM1, "pmxvbf16ger2pp XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pp_XM11_YM0_PM0, "pmxvbf16ger2pp XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pp_XM11_YM0_PM1, "pmxvbf16ger2pp XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pp_XM11_YM13_PM0, "pmxvbf16ger2pp XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2pp_XM11_YM13_PM1, "pmxvbf16ger2pp XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2_XM0_YM0_PM0, "pmxvbf16ger2 XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2_XM0_YM0_PM1, "pmxvbf16ger2 XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2_XM0_YM13_PM0, "pmxvbf16ger2 XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2_XM0_YM13_PM1, "pmxvbf16ger2 XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2_XM11_YM0_PM0, "pmxvbf16ger2 XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2_XM11_YM0_PM1, "pmxvbf16ger2 XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2_XM11_YM13_PM0, "pmxvbf16ger2 XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -+ { &test_pmxvbf16ger2_XM11_YM13_PM1, "pmxvbf16ger2 XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ - { &test_pmxvf16ger2nn_XM0_YM0_PM0, "pmxvf16ger2nn XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ - { &test_pmxvf16ger2nn_XM0_YM0_PM1, "pmxvf16ger2nn XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ - { &test_pmxvf16ger2nn_XM0_YM13_PM0, "pmxvf16ger2nn XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -@@ -756,6 +976,11 @@ static test_list_t testgroup_generic[] = { - { &test_pmxvi16ger2s_XM11_YM0_PM1, "pmxvi16ger2s XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi16ger2s_XM11_YM13_PM0, "pmxvi16ger2s XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi16ger2s_XM11_YM13_PM1, "pmxvi16ger2s XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_xvbf16ger2nn, "xvbf16ger2nn", "AT,XA,XB", 0b00001111}, /* bcs */ -+ { &test_xvbf16ger2np, "xvbf16ger2np", "AT,XA,XB", 0b00001111}, /* bcs */ -+ { &test_xvbf16ger2pn, "xvbf16ger2pn", "AT,XA,XB", 0b00001111}, /* bcs */ -+ { &test_xvbf16ger2pp, "xvbf16ger2pp", "AT,XA,XB", 0b00001111}, /* bcs */ -+ { &test_xvbf16ger2, "xvbf16ger2", "AT,XA,XB", 0b00001111}, /* bcs */ - { &test_xvf16ger2nn, "xvf16ger2nn", "AT,XA,XB", 0b00001111}, /* bcs */ - { &test_xvf16ger2np, "xvf16ger2np", "AT,XA,XB", 0b00001111}, /* bcs */ - { &test_xvf16ger2pn, "xvf16ger2pn", "AT,XA,XB", 0b00001111}, /* bcs */ -diff --git a/none/tests/ppc64/test_isa_3_1_AT.stdout.exp b/none/tests/ppc64/test_isa_3_1_AT.stdout.exp -index 5ea998563..be3f17ec3 100644 ---- a/none/tests/ppc64/test_isa_3_1_AT.stdout.exp -+++ b/none/tests/ppc64/test_isa_3_1_AT.stdout.exp -@@ -1,3 +1,403 @@ -+pmxvbf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2nn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2nn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2nn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2nn XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2nn XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2nn XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2nn XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+ -+pmxvbf16ger2nn XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -3.63717e-28 -1.14437e-28 +Zero -1.14437e-28 -4.71628e-18 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2nn XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+ -+pmxvbf16ger2np XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2np XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2np XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2np XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2np XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2np XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2np XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2np XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+ -+pmxvbf16ger2np XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 -1.34843e-28 1.14437e-28 +Zero 1.14437e-28 -4.71628e-18 1.14437e-28) -+pmxvbf16ger2np XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2np XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+ -+pmxvbf16ger2pn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2pn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2pn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2pn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2pn XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2pn XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pn XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2pn XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+ -+pmxvbf16ger2pn XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 1.34843e-28 -1.14437e-28 +Zero -1.14437e-28 4.71628e-18 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+pmxvbf16ger2pn XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) -+ -+pmxvbf16ger2pp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2pp XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2pp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2pp XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2pp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2pp XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2pp XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2pp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+ -+pmxvbf16ger2pp XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 3.63717e-28 1.14437e-28 +Zero 1.14437e-28 4.71628e-18 1.14437e-28) -+pmxvbf16ger2pp XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+pmxvbf16ger2pp XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) -+ -+pmxvbf16ger2 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2 XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2 XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2 XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2 XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2 XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2 XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ -+pmxvbf16ger2 XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero 2.49280e-28 +Zero +Zero +Zero 4.71628e-18 +Zero) -+pmxvbf16ger2 XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+pmxvbf16ger2 XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Den +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Den +Zero) -+pmxvbf16ger2 XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+ - pmxvf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) - pmxvf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) - pmxvf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -@@ -1558,6 +1958,91 @@ pmxvi16ger2s XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,01 - pmxvi16ger2s XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000400000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0002400000000000] - pmxvi16ger2s XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] - -+xvbf16ger2nn 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf) -+xvbf16ger2nn 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( +Inf +Inf NaN NaN -Inf -Inf NaN NaN -Inf -Inf NaN NaN +Inf +Inf NaN NaN) -+xvbf16ger2nn 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( +Inf NaN NaN NaN -Inf NaN NaN NaN -Inf NaN NaN NaN +Inf NaN NaN NaN) -+xvbf16ger2nn 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => ( NaN NaN +Inf -Inf NaN NaN -Inf +Inf NaN NaN -Inf +Inf NaN NaN +Inf -Inf) -+xvbf16ger2nn 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN -Inf +Inf +Inf -Inf -Inf +Inf +Inf -Inf) -+xvbf16ger2nn 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => ( -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) -+xvbf16ger2nn 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => ( -3.63717e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -4.71628e-18 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14438e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) -+xvbf16ger2nn 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => ( -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 -Inf +Inf -1.14437e-28 -1.14437e-28 -Inf +Inf) -+xvbf16ger2nn 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN -Inf +Inf +Inf -Inf NaN NaN NaN NaN) -+xvbf16ger2nn 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => ( -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14438e-28 -3.63717e-28 -4.71628e-18 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) -+xvbf16ger2nn 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => ( -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -4.00000e+00 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) -+xvbf16ger2nn 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => ( -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 -Inf +Inf -1.14437e-28 -1.14437e-28 NaN NaN) -+xvbf16ger2nn 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => ( -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf NaN NaN NaN NaN NaN NaN NaN NaN) -+xvbf16ger2nn 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => ( -Inf -Inf NaN NaN +Inf +Inf NaN NaN -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) -+xvbf16ger2nn 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => ( -Inf NaN NaN NaN +Inf NaN NaN NaN -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) -+xvbf16ger2nn 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => ( NaN NaN -Inf +Inf NaN NaN +Inf -Inf -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 NaN NaN) -+ -+xvbf16ger2np 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf) -+xvbf16ger2np 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( +Inf +Inf NaN NaN -Inf -Inf NaN NaN -Inf -Inf NaN NaN +Inf +Inf NaN NaN) -+xvbf16ger2np 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( +Inf NaN NaN NaN -Inf NaN NaN NaN -Inf NaN NaN NaN +Inf NaN NaN NaN) -+xvbf16ger2np 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => ( NaN NaN +Inf -Inf NaN NaN -Inf +Inf NaN NaN -Inf +Inf NaN NaN +Inf -Inf) -+xvbf16ger2np 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN -Inf +Inf +Inf -Inf -Inf +Inf +Inf -Inf) -+xvbf16ger2np 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => ( * * * * * * * * * * * * * * * *) -+xvbf16ger2np 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => ( -1.34843e-28 1.14437e-28 * * -4.71628e-18 1.14437e-28 * * 1.14437e-28 1.14437e-28 * * 1.14437e-28 1.14437e-28 * *) -+xvbf16ger2np 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => ( * * NaN NaN * * NaN NaN * * -Inf +Inf * * -Inf +Inf) -+xvbf16ger2np 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN -Inf +Inf +Inf -Inf NaN NaN NaN NaN) -+xvbf16ger2np 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => ( * * * * * * * * 1.14437e-28 1.14437e-28 -1.34843e-28 -4.71628e-18 * * * *) -+xvbf16ger2np 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => ( * * * * * * * * -4.00000e+00 1.14437e-28 * * * * * *) -+xvbf16ger2np 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => ( * * NaN NaN * * NaN NaN * * -Inf +Inf * * NaN NaN) -+xvbf16ger2np 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => ( -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf NaN NaN NaN NaN NaN NaN NaN NaN) -+xvbf16ger2np 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => ( -Inf -Inf NaN NaN +Inf +Inf NaN NaN * * * * * * * *) -+xvbf16ger2np 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => ( -Inf NaN NaN NaN +Inf NaN NaN NaN * * * * * * * *) -+xvbf16ger2np 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => ( NaN NaN -Inf +Inf NaN NaN +Inf -Inf * * NaN NaN * * NaN NaN) -+ -+xvbf16ger2pn 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf) -+xvbf16ger2pn 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( -Inf -Inf NaN NaN +Inf +Inf NaN NaN +Inf +Inf NaN NaN -Inf -Inf NaN NaN) -+xvbf16ger2pn 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( -Inf NaN NaN NaN +Inf NaN NaN NaN +Inf NaN NaN NaN -Inf NaN NaN NaN) -+xvbf16ger2pn 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => ( NaN NaN -Inf +Inf NaN NaN +Inf -Inf NaN NaN +Inf -Inf NaN NaN -Inf +Inf) -+xvbf16ger2pn 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN +Inf -Inf -Inf +Inf +Inf -Inf -Inf +Inf) -+xvbf16ger2pn 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => ( -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) -+xvbf16ger2pn 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => ( 1.34843e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 4.71628e-18 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) -+xvbf16ger2pn 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => ( -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 +Inf -Inf -1.14437e-28 -1.14437e-28 +Inf -Inf) -+xvbf16ger2pn 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN +Inf -Inf -Inf +Inf NaN NaN NaN NaN) -+xvbf16ger2pn 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => ( -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 1.34843e-28 4.71628e-18 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) -+xvbf16ger2pn 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => ( -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 4.00000e+00 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) -+xvbf16ger2pn 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => ( -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 +Inf -Inf -1.14437e-28 -1.14437e-28 NaN NaN) -+xvbf16ger2pn 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => ( +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf NaN NaN NaN NaN NaN NaN NaN NaN) -+xvbf16ger2pn 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => ( +Inf +Inf NaN NaN -Inf -Inf NaN NaN -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) -+xvbf16ger2pn 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => ( +Inf NaN NaN NaN -Inf NaN NaN NaN -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) -+xvbf16ger2pn 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => ( NaN NaN +Inf -Inf NaN NaN -Inf +Inf -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 NaN NaN) -+ -+xvbf16ger2pp 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf) -+xvbf16ger2pp 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( -Inf -Inf NaN NaN +Inf +Inf NaN NaN +Inf +Inf NaN NaN -Inf -Inf NaN NaN) -+xvbf16ger2pp 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( -Inf NaN NaN NaN +Inf NaN NaN NaN +Inf NaN NaN NaN -Inf NaN NaN NaN) -+xvbf16ger2pp 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => ( NaN NaN -Inf +Inf NaN NaN +Inf -Inf NaN NaN +Inf -Inf NaN NaN -Inf +Inf) -+xvbf16ger2pp 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN +Inf -Inf -Inf +Inf +Inf -Inf -Inf +Inf) -+xvbf16ger2pp 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => ( * * * * * * * * * * * * * * * *) -+xvbf16ger2pp 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => ( 3.63717e-28 1.14437e-28 * * 4.71628e-18 1.14437e-28 * * 1.14437e-28 1.14437e-28 * * 1.14438e-28 1.14437e-28 * *) -+xvbf16ger2pp 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => ( * * NaN NaN * * NaN NaN * * +Inf -Inf * * +Inf -Inf) -+xvbf16ger2pp 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN +Inf -Inf -Inf +Inf NaN NaN NaN NaN) -+xvbf16ger2pp 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => ( * * * * * * * * 1.14437e-28 1.14438e-28 3.63717e-28 4.71628e-18 * * * *) -+xvbf16ger2pp 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => ( * * * * * * * * 4.00000e+00 1.14437e-28 * * * * * *) -+xvbf16ger2pp 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => ( * * NaN NaN * * NaN NaN * * +Inf -Inf * * NaN NaN) -+xvbf16ger2pp 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => ( +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf NaN NaN NaN NaN NaN NaN NaN NaN) -+xvbf16ger2pp 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => ( +Inf +Inf NaN NaN -Inf -Inf NaN NaN * * * * * * * *) -+xvbf16ger2pp 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => ( +Inf NaN NaN NaN -Inf NaN NaN NaN * * * * * * * *) -+xvbf16ger2pp 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => ( NaN NaN +Inf -Inf NaN NaN -Inf +Inf * * NaN NaN * * NaN NaN) -+ -+xvbf16ger2 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf) -+xvbf16ger2 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( -Inf -Inf NaN NaN +Inf +Inf NaN NaN +Inf +Inf NaN NaN -Inf -Inf NaN NaN) -+xvbf16ger2 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( -Inf NaN NaN NaN +Inf NaN NaN NaN +Inf NaN NaN NaN -Inf NaN NaN NaN) -+xvbf16ger2 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => ( NaN NaN -Inf +Inf NaN NaN +Inf -Inf NaN NaN +Inf -Inf NaN NaN -Inf +Inf) -+xvbf16ger2 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN +Inf -Inf -Inf +Inf +Inf -Inf -Inf +Inf) -+xvbf16ger2 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero 5.56082e-36 +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+xvbf16ger2 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => ( 2.49280e-28 +Zero +Zero +Zero 4.71628e-18 +Zero +Zero +Zero 2.08768e-35 +Zero +Zero +Zero 3.82177e-34 +Zero +Zero +Zero) -+xvbf16ger2 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => ( +Zero +Zero NaN NaN +Zero +Zero NaN NaN +Zero +Zero +Inf -Inf +Zero +Zero +Inf -Inf) -+xvbf16ger2 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN +Inf -Inf -Inf +Inf NaN NaN NaN NaN) -+xvbf16ger2 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero 2.08768e-35 3.82177e-34 2.49280e-28 4.71628e-18 +Zero +Zero +Zero +Zero) -+xvbf16ger2 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => ( +Zero +Zero +Zero +Zero +Den +Zero +Zero +Zero 4.00000e+00 +Zero +Zero +Den +Zero +Zero +Zero +Zero) -+xvbf16ger2 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => ( +Zero +Zero NaN NaN +Zero +Zero NaN NaN +Zero +Zero +Inf -Inf +Zero +Zero NaN NaN) -+xvbf16ger2 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => ( +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf NaN NaN NaN NaN NaN NaN NaN NaN) -+xvbf16ger2 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => ( +Inf +Inf NaN NaN -Inf -Inf NaN NaN +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+xvbf16ger2 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => ( +Inf NaN NaN NaN -Inf NaN NaN NaN +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) -+xvbf16ger2 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => ( NaN NaN +Inf -Inf NaN NaN -Inf +Inf +Zero +Zero NaN NaN +Zero +Zero NaN NaN) -+ - xvf16ger2nn 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN) - xvf16ger2nn 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN) - xvf16ger2nn 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN) -@@ -1921,4 +2406,4 @@ xxmtacc => [ 7f800000ff800000 ff8000007f800000 ff7ffffe7f7ffffe 0080000e8080000e - - xxsetaccz => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] - --All done. Tested 152 different instruction groups -+All done. Tested 197 different instruction groups -diff --git a/none/tests/ppc64/test_isa_3_1_XT.c b/none/tests/ppc64/test_isa_3_1_XT.c -index a54e8763a..905c17951 100644 ---- a/none/tests/ppc64/test_isa_3_1_XT.c -+++ b/none/tests/ppc64/test_isa_3_1_XT.c -@@ -491,6 +491,16 @@ static void test_pstxv_4 (void) { - static void test_pstxv_0 (void) { - __asm__ __volatile__ ("pstxv %x0, 0(%1), 0" :: "wa" (vec_xs), "r" (ra) ); - } -+static void test_xvcvspbf16 (void) { -+SET_FPSCR_ZERO; -+ __asm__ __volatile__ ("xvcvspbf16 %x0, %x1" -+ : "=wa" (vec_xt) : "wa" (vec_xb) ); -+GET_FPSCR(current_fpscr); -+} -+static void test_xvcvbf16spn (void) { -+ __asm__ __volatile__ ("xvcvbf16spn %x0, %x1" -+ : "=wa" (vec_xt) : "wa" (vec_xb) ); -+} - static void test_xxgenpcvbm_imm0 (void) { - __asm__ __volatile__ ("xxgenpcvbm %x0, %1, 0" : "=wa" (vec_xt) : "v" (vrb) ); - } -@@ -634,6 +644,8 @@ static test_list_t testgroup_generic[] = { - { &test_stxvrdx, "stxvrdx", "XS,RA,RB"}, /* bcs */ - { &test_stxvrhx, "stxvrhx", "XS,RA,RB"}, /* bcs */ - { &test_stxvrwx, "stxvrwx", "XS,RA,RB"}, /* bcs */ -+ { &test_xvcvbf16spn, "xvcvbf16spn", "XT,XB"}, /* bcs */ -+ { &test_xvcvspbf16, "xvcvspbf16", "XT,XB", 0b0101010100000000}, /* bcs */ - { &test_xxblendvb, "xxblendvb", "XT,XA,XB,XC"}, /* bcs */ - { &test_xxblendvd, "xxblendvd", "XT,XA,XB,XC"}, /* bcs */ - { &test_xxblendvh, "xxblendvh", "XT,XA,XB,XC"}, /* bcs */ -diff --git a/none/tests/ppc64/test_isa_3_1_XT.stdout.exp b/none/tests/ppc64/test_isa_3_1_XT.stdout.exp -index 8b5f1d1a3..b9d6cb7a8 100644 ---- a/none/tests/ppc64/test_isa_3_1_XT.stdout.exp -+++ b/none/tests/ppc64/test_isa_3_1_XT.stdout.exp -@@ -244,6 +244,34 @@ stxvrwx 18 (&buffer) 0000111e8000222e,0180055e0180077e => [ - - - 7f0000008 - stxvrwx 20 (&buffer) 7ff0000000000000,0000111e8000222e => [ - - - - 5a05a05a00000000 - - - ] - stxvrwx 28 (&buffer) fff0000000000000,7ff0000000000000 => [ - - - - - 0102030400000000 - - ] - -+xvcvbf16spn 7f800000ff800000,ff8000007f800000 => 0 0 -+xvcvbf16spn ff8000007f800000,ff7ffffe7f7ffffe => 0 fffe0000fffe0000 -+xvcvbf16spn ff7ffffe7f7ffffe,0080000e8080000e => fffe0000fffe0000 e0000000e0000 -+xvcvbf16spn 0080000e8080000e,0180055e0180077e => e0000000e0000 55e0000077e0000 -+xvcvbf16spn 0180055e0180077e,0000111e8000222e => 55e0000077e0000 111e0000222e0000 -+xvcvbf16spn 0000111e8000222e,7ff0000000000000 => 111e0000222e0000 0 -+xvcvbf16spn 7ff0000000000000,fff0000000000000 => 0 0 -+xvcvbf16spn fff0000000000000,2208400000000000 => 0 4000000000000000 -+xvcvbf16spn 2208400000000000,0000000000000009 => 4000000000000000 90000 -+xvcvbf16spn 0000000000000009,ffff000180000001 => 90000 1000000010000 -+xvcvbf16spn ffff000180000001,0000000000000000 => 1000000010000 0 -+xvcvbf16spn 0000000000000000,8000000000000000 => 0 0 -+xvcvbf16spn 8000000000000000,7f800000ff800000 => 0 0 -+ -+xvcvspbf16 7f800000ff800000,ff8000007f800000 => +Inf -Inf -Inf +Inf -+xvcvspbf16 ff8000007f800000,ff7ffffe7f7ffffe => -Inf +Inf -Inf +Inf -+xvcvspbf16 ff7ffffe7f7ffffe,0080000e8080000e => -Inf +Inf 0x0080 0x8080 -+xvcvspbf16 0080000e8080000e,0180055e0180077e => 0x0080 0x8080 0x0180 0x0180 -+xvcvspbf16 0180055e0180077e,0000111e8000222e => 0x0180 0x0180 +Zero -Zero -+xvcvspbf16 0000111e8000222e,7ff0000000000000 => +Zero -Zero NaN +Zero -+xvcvspbf16 7ff0000000000000,fff0000000000000 => NaN +Zero NaN +Zero -+xvcvspbf16 fff0000000000000,2208400000000000 => NaN +Zero 0x2208 +Zero -+xvcvspbf16 2208400000000000,0000000000000009 => 0x2208 +Zero +Zero +Zero -+xvcvspbf16 0000000000000009,ffff000180000001 => +Zero +Zero NaN -Zero -+xvcvspbf16 ffff000180000001,0000000000000000 => NaN -Zero +Zero +Zero -+xvcvspbf16 0000000000000000,8000000000000000 => +Zero +Zero -Zero +Zero -+xvcvspbf16 8000000000000000,7f800000ff800000 => -Zero +Zero +Inf -Inf -+ - xxblendvb 7f800000ff800000,ff8000007f800000 0000000000000000,00000000ffffffff 7f800000ff800000,ff8000007f800000 => 7f800000ff800000 ff8000007f800000 - xxblendvb 7f800000ff800000,ff8000007f800000 ffffffff55555555,5555aaaaaaaa5555 7f800000ff800000,ff8000007f800000 => 7f800000ff800000 ff8000007f800000 - xxblendvb 7f800000ff800000,ff8000007f800000 aaaa00000000aaaa,0000000000000000 7f800000ff800000,ff8000007f800000 => 7f800000ff800000 ff8000007f800000 -@@ -7318,4 +7346,4 @@ xxspltiw imm3 => 300000003 300000003 - - xxspltiw imm8 => 800000008 800000008 - --All done. Tested 158 different instruction groups -+All done. Tested 160 different instruction groups - -commit c589b652939655090c005a982a71f50c489fb5ce -Author: root -Date: Fri Feb 12 16:00:53 2021 -0500 - - Reduced precision Missing Integer based outer tests - -diff --git a/NEWS b/NEWS -index 2e42e74b2..52a51fd9e 100644 ---- a/NEWS -+++ b/NEWS -@@ -176,6 +176,7 @@ where XXXXXX is the bug number as listed below. - 433500 DRD regtest faulures when libstdc++ and libgcc debuginfo are installed - 433629 valgrind/README has type "abd" instead of "and" - 433641 Rust std::sys::unix::fs::try_statx Syscall param fstatat(file_name) -+433801 PPC ISA 3.1 support is missing, part 10 (ISA 3.1 support complete) - 433898 arm64: Handle sp, lr, fp as DwReg in CfiExpr - 434193 GCC 9+ inlined strcmp causes "Conditional jump or move [..] value" report - n-i-bz helgrind: If hg_cli__realloc fails, return NULL. -diff --git a/none/tests/ppc64/test_isa_3_1_AT.c b/none/tests/ppc64/test_isa_3_1_AT.c -index fee76f8f4..e9db9cc9a 100644 ---- a/none/tests/ppc64/test_isa_3_1_AT.c -+++ b/none/tests/ppc64/test_isa_3_1_AT.c -@@ -806,6 +806,114 @@ static void test_pmxvbf16ger2pp_XM11_YM13_PM1 (void) { - __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 11, 13, 1" - :: "wa" (vec_xa), "wa" (vec_xb) ); - } -+static void test_xvi8ger4spp (void) { -+ __asm__ __volatile__ ("xvi8ger4spp 4, %x0, %x1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi8ger4spp_XM0_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 0, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi8ger4spp_XM0_YM0_PM5 (void) { -+ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 0, 0, 5" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi8ger4spp_XM0_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 0, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi8ger4spp_XM0_YM13_PM5 (void) { -+ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 0, 13, 5" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi8ger4spp_XM11_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 11, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi8ger4spp_XM11_YM0_PM5 (void) { -+ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 11, 0, 5" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi8ger4spp_XM11_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 11, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi8ger4spp_XM11_YM13_PM5 (void) { -+ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 11, 13, 5" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_xvi16ger2 (void) { -+ __asm__ __volatile__ ("xvi16ger2 4, %x0, %x1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_xvi16ger2pp (void) { -+ __asm__ __volatile__ ("xvi16ger2pp 4, %x0, %x1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2_XM0_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 0, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2_XM0_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 0, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2_XM0_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 0, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2_XM0_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 0, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2_XM11_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 11, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2_XM11_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 11, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2_XM11_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 11, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2_XM11_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 11, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2pp_XM0_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 0, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2pp_XM0_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 0, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2pp_XM0_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 0, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2pp_XM0_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 0, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2pp_XM11_YM0_PM0 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 11, 0, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2pp_XM11_YM0_PM1 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 11, 0, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2pp_XM11_YM13_PM0 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 11, 13, 0" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} -+static void test_pmxvi16ger2pp_XM11_YM13_PM1 (void) { -+ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 11, 13, 1" -+ :: "wa" (vec_xa), "wa" (vec_xb) ); -+} - - static test_list_t testgroup_generic[] = { - { &test_pmxvbf16ger2nn_XM0_YM0_PM0, "pmxvbf16ger2nn XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ -@@ -952,6 +1060,14 @@ static test_list_t testgroup_generic[] = { - { &test_pmxvi8ger4pp_XM11_YM0_PM5, "pmxvi8ger4pp XM11_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi8ger4pp_XM11_YM13_PM0, "pmxvi8ger4pp XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi8ger4pp_XM11_YM13_PM5, "pmxvi8ger4pp XM11_YM13_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi8ger4spp_XM0_YM0_PM0, "pmxvi8ger4spp XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi8ger4spp_XM0_YM0_PM5, "pmxvi8ger4spp XM0_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi8ger4spp_XM0_YM13_PM0, "pmxvi8ger4spp XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi8ger4spp_XM0_YM13_PM5, "pmxvi8ger4spp XM0_YM13_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi8ger4spp_XM11_YM0_PM0, "pmxvi8ger4spp XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi8ger4spp_XM11_YM0_PM5, "pmxvi8ger4spp XM11_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi8ger4spp_XM11_YM13_PM0, "pmxvi8ger4spp XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi8ger4spp_XM11_YM13_PM5, "pmxvi8ger4spp XM11_YM13_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi8ger4_XM0_YM0_PM0, "pmxvi8ger4 XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi8ger4_XM0_YM0_PM5, "pmxvi8ger4 XM0_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi8ger4_XM0_YM13_PM0, "pmxvi8ger4 XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -@@ -960,6 +1076,14 @@ static test_list_t testgroup_generic[] = { - { &test_pmxvi8ger4_XM11_YM0_PM5, "pmxvi8ger4 XM11_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi8ger4_XM11_YM13_PM0, "pmxvi8ger4 XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi8ger4_XM11_YM13_PM5, "pmxvi8ger4 XM11_YM13_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2pp_XM0_YM0_PM0, "pmxvi16ger2pp XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2pp_XM0_YM0_PM1, "pmxvi16ger2pp XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2pp_XM0_YM13_PM0, "pmxvi16ger2pp XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2pp_XM0_YM13_PM1, "pmxvi16ger2pp XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2pp_XM11_YM0_PM0, "pmxvi16ger2pp XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2pp_XM11_YM0_PM1, "pmxvi16ger2pp XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2pp_XM11_YM13_PM0, "pmxvi16ger2pp XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2pp_XM11_YM13_PM1, "pmxvi16ger2pp XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi16ger2spp_XM0_YM0_PM0, "pmxvi16ger2spp XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi16ger2spp_XM0_YM0_PM1, "pmxvi16ger2spp XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi16ger2spp_XM0_YM13_PM0, "pmxvi16ger2spp XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -@@ -976,6 +1100,14 @@ static test_list_t testgroup_generic[] = { - { &test_pmxvi16ger2s_XM11_YM0_PM1, "pmxvi16ger2s XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi16ger2s_XM11_YM13_PM0, "pmxvi16ger2s XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_pmxvi16ger2s_XM11_YM13_PM1, "pmxvi16ger2s XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2_XM0_YM0_PM0, "pmxvi16ger2 XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2_XM0_YM0_PM1, "pmxvi16ger2 XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2_XM0_YM13_PM0, "pmxvi16ger2 XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2_XM0_YM13_PM1, "pmxvi16ger2 XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2_XM11_YM0_PM0, "pmxvi16ger2 XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2_XM11_YM0_PM1, "pmxvi16ger2 XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2_XM11_YM13_PM0, "pmxvi16ger2 XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ -+ { &test_pmxvi16ger2_XM11_YM13_PM1, "pmxvi16ger2 XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ - { &test_xvbf16ger2nn, "xvbf16ger2nn", "AT,XA,XB", 0b00001111}, /* bcs */ - { &test_xvbf16ger2np, "xvbf16ger2np", "AT,XA,XB", 0b00001111}, /* bcs */ - { &test_xvbf16ger2pn, "xvbf16ger2pn", "AT,XA,XB", 0b00001111}, /* bcs */ -@@ -999,9 +1131,12 @@ static test_list_t testgroup_generic[] = { - { &test_xvi4ger8pp, "xvi4ger8pp", "AT,XA,XB"}, /* bcs */ - { &test_xvi4ger8, "xvi4ger8", "AT,XA,XB"}, /* bcs */ - { &test_xvi8ger4pp, "xvi8ger4pp", "AT,XA,XB"}, /* bcs */ -+ { &test_xvi8ger4spp, "xvi8ger4spp", "AT,XA,XB"}, /* bcs */ - { &test_xvi8ger4, "xvi8ger4", "AT,XA,XB"}, /* bcs */ -+ { &test_xvi16ger2pp, "xvi16ger2pp", "AT,XA,XB"}, /* bcs */ - { &test_xvi16ger2spp, "xvi16ger2spp", "AT,XA,XB"}, /* bcs */ - { &test_xvi16ger2s, "xvi16ger2s", "AT,XA,XB"}, /* bcs */ -+ { &test_xvi16ger2, "xvi16ger2", "AT,XA,XB"}, /* bcs */ - { &test_xxmfacc, "xxmfacc", "AS"}, /* bcs */ - { &test_xxmtacc, "xxmtacc", "AT"}, /* bcs */ - { &test_xxsetaccz, "xxsetaccz", "AT"}, /* bcs */ -diff --git a/none/tests/ppc64/test_isa_3_1_AT.stdout.exp b/none/tests/ppc64/test_isa_3_1_AT.stdout.exp -index be3f17ec3..8d1be35b1 100644 ---- a/none/tests/ppc64/test_isa_3_1_AT.stdout.exp -+++ b/none/tests/ppc64/test_isa_3_1_AT.stdout.exp -@@ -1718,6 +1718,86 @@ pmxvi8ger4pp XM11_YM13_PM5 0000000000000009,ffff000180000001 0080000e8080000e,01 - pmxvi8ger4pp XM11_YM13_PM5 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000011111111 1111110911111111 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] - pmxvi8ger4pp XM11_YM13_PM5 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] - -+pmxvi8ger4spp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi8ger4spp XM0_YM0_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM5 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM5 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM5 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM5 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM5 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM5 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM0_PM5 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi8ger4spp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi8ger4spp XM0_YM13_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM5 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM5 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM5 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM5 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM5 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM5 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM0_YM13_PM5 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi8ger4spp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi8ger4spp XM11_YM0_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM5 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM5 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM5 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM5 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM5 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM5 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi8ger4spp XM11_YM0_PM5 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi8ger4spp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi8ger4spp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi8ger4spp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi8ger4spp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi8ger4spp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi8ger4spp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi8ger4spp XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi8ger4spp XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi8ger4spp XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+ -+pmxvi8ger4spp XM11_YM13_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000011115075 11114fd511114f95 0000000000000000 0000000000000000 000000001110d111 1110d1111110d111 000000001110d111 1110d1111110d111] -+pmxvi8ger4spp XM11_YM13_PM5 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000011111111 1111150911111111 0000000000000000 0000000000000000 0000000011111111 11110d1111111111 0000000011111111 11110d1111111111] -+pmxvi8ger4spp XM11_YM13_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi8ger4spp XM11_YM13_PM5 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000011110911 1111091111110911 0000000000000000 0000000000000000 00000000111112b5 11111c1511111fd5 0000000011111395 111121f5111127b5] -+pmxvi8ger4spp XM11_YM13_PM5 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000011111111 1111109111111111 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi8ger4spp XM11_YM13_PM5 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi8ger4spp XM11_YM13_PM5 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 000000001111109f 111110ef1111110f 0000000000000000 0000000000000000 0000000011111111 * 000000001111118f 1111145f1111157f] -+pmxvi8ger4spp XM11_YM13_PM5 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000011111111 1111110911111111 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi8ger4spp XM11_YM13_PM5 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+ - pmxvi8ger4 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] - pmxvi8ger4 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] - pmxvi8ger4 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -@@ -1798,6 +1878,86 @@ pmxvi8ger4 XM11_YM13_PM5 0000000000000009,ffff000180000001 0080000e8080000e,0180 - pmxvi8ger4 XM11_YM13_PM5 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 fffffff800000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] - pmxvi8ger4 XM11_YM13_PM5 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] - -+pmxvi16ger2pp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2pp XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2pp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2pp XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2pp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2pp XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2pp XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2pp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi16ger2pp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi16ger2pp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi16ger2pp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi16ger2pp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi16ger2pp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi16ger2pp XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi16ger2pp XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi16ger2pp XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+ -+pmxvi16ger2pp XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 00000000111110f5 1111065511110215 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi16ger2pp XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000011111111 1110911111111111 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi16ger2pp XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi16ger2pp XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 00000000111200b5 116cf01511914fd5 000000001112ef95 11c883f5121125b5] -+pmxvi16ger2pp XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 1558911111111111 0000000011111111 199c911111111111] -+pmxvi16ger2pp XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+pmxvi16ger2pp XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 000000001111111f 1111166f1111188f 0000000000000000 0000000000000000 0000000011111111 * 000000001111118f 1111415f1111547f] -+pmxvi16ger2pp XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000011111111 1111511111111111 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 1113511111111111] -+pmxvi16ger2pp XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] -+ - pmxvi16ger2spp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] - pmxvi16ger2spp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] - pmxvi16ger2spp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -@@ -1958,6 +2118,86 @@ pmxvi16ger2s XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,01 - pmxvi16ger2s XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000400000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0002400000000000] - pmxvi16ger2s XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] - -+pmxvi16ger2 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2 XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2 XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2 XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2 XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2 XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2 XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ -+pmxvi16ger2 XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 00000000ffffffe4 fffff544fffff104 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 ffff800000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 000000000000efa4 005bdf0400803ec4 000000000001de84 00b772e4010014a4] -+pmxvi16ger2 XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0447800000000000 0000000000000000 088b800000000000] -+pmxvi16ger2 XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+pmxvi16ger2 XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 000000000000000e 0000055e0000077e 0000000000000000 0000000000000000 0000000000000000 0000000000000000 000000000000007e 0000304e0000436e] -+pmxvi16ger2 XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000400000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0002400000000000] -+pmxvi16ger2 XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] -+ - xvbf16ger2nn 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf) - xvbf16ger2nn 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( +Inf +Inf NaN NaN -Inf -Inf NaN NaN -Inf -Inf NaN NaN +Inf +Inf NaN NaN) - xvbf16ger2nn 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( +Inf NaN NaN NaN -Inf NaN NaN NaN -Inf NaN NaN NaN +Inf NaN NaN NaN) -@@ -2349,6 +2589,23 @@ xvi8ger4pp 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e = - xvi8ger4pp 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => [ 11111def11111111 * 11110cef11111111 * 1111001111111111 * * *] - xvi8ger4pp 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [ 1111509111111111 1111101211114f92 1111109111111111 1110d0921110d012 1110d11111111111 1110d19111109191 * *] - -+xvi8ger4spp 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => [ 1110d0921110d012 1110d0121110d092 1111101211114f92 11114f9211111012 1111101211114f92 11114f9211111012 1110d0921110d012 1110d0121110d092] -+xvi8ger4spp 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => [ 1110d1101110d110 1111111111111091 1110d1901110d190 1111111111115091 1110d1901110d190 1111111111115091 1110d1101110d110 1111111111111091] -+xvi8ger4spp 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => [ 11110cef11111111 * 11111def11111111 * 11111def11111111 * 11110cef11111111 *] -+xvi8ger4spp 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => [ 1111109111111111 1110d0921110d012 1111509111111111 1111101211114f92 1111509111111111 1111101211114f92 1111109111111111 1110d0921110d012] -+xvi8ger4spp 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => [ * * 1110d19111109191 111091911110d191 1110d1901110d210 1110d2101110d190 1110d1901110d210 1110d2101110d190] -+xvi8ger4spp 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => [ 11111c6a1111204c 111115b6111118b7 1111221f11112823 111118b71110ddd9 1110f3af1110ff79 11111c6a1111231f 1110ff7911110f47 1111204c11112923] -+xvi8ger4spp 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => [ 1111155111111111 111111111111121f 1111089111111111 11111111111112af 11110e7311111111 111111111111145f 11110ef311111111 111111111111157f] -+xvi8ger4spp 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => [ * * 1110d11111111111 1110d19111109191 1111119111111111 1110d1901110d210 1111119111111111 1110d1901110d210] -+xvi8ger4spp 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => [ * * * * 111125ef111136ef 111136ef111125ef * *] -+xvi8ger4spp 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => [ * * 1111145f1111157f 1111121f111112af 11111673111116f3 1111155111112a91 * *] -+xvi8ger4spp 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => [ * * * 1111111111111162 111125d511111111 * * *] -+xvi8ger4spp 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => [ * * * * 1111221111111111 111125ef111136ef * *] -+xvi8ger4spp 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => [ 1111101211114f92 11114f9211111012 1110d0921110d012 1110d0121110d092 1110d19111109191 111091911110d191 * *] -+xvi8ger4spp 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => [ 1110d1901110d190 1111111111115091 1110d1101110d110 1111111111111091 1111109111111091 111111111110d111 * *] -+xvi8ger4spp 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => [ 11111def11111111 * 11110cef11111111 * 1111001111111111 * * *] -+xvi8ger4spp 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [ 1111509111111111 1111101211114f92 1111109111111111 1110d0921110d012 1110d11111111111 1110d19111109191 * *] -+ - xvi8ger4 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => [ ffffbf81ffffbf01 ffffbf01ffffbf81 ffffff0100003e81 00003e81ffffff01 ffffff0100003e81 00003e81ffffff01 ffffbf81ffffbf01 ffffbf01ffffbf81] - xvi8ger4 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => [ ffffbfffffffbfff 00000000ffffff80 ffffc07fffffc07f 0000000000003f80 ffffc07fffffc07f 0000000000003f80 ffffbfffffffbfff 00000000ffffff80] - xvi8ger4 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => [ fffffbde00000000 0000000000000000 00000cde00000000 0000000000000000 00000cde00000000 0000000000000000 fffffbde00000000 0000000000000000] -@@ -2366,6 +2623,23 @@ xvi8ger4 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => - xvi8ger4 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => [ 00000cde00000000 0000000000000000 fffffbde00000000 0000000000000000 ffffef0000000000 0000000000000000 0000000000000000 0000000000000000] - xvi8ger4 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [ 00003f8000000000 ffffff0100003e81 ffffff8000000000 ffffbf81ffffbf01 ffffc00000000000 ffffc080ffff8080 0000000000000000 0000000000000000] - -+xvi16ger2pp 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => [ 10d1511111115111 1111511110d15111 5091511110d15111 10d1511150915111 5091511110d15111 10d1511150915111 10d1511111115111 1111511110d15111] -+xvi16ger2pp 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => [ 1110511111105111 1111111111511111 11d0511111d05111 11111111d1511111 11d0511111d05111 11111111d1511111 1110511111105111 1111111111511111] -+xvi16ger2pp 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => [ 11000d1111111111 * 22040d1111111111 * 22040d1111111111 * 11000d1111111111 *] -+xvi16ger2pp 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => [ 1151111111111111 10d1511111115111 d151111111111111 5091511110d15111 d151111111111111 5091511110d15111 1151111111111111 10d1511111115111] -+xvi16ger2pp 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => [ * * d151111111511111 11511111d1511111 11d0511111105111 1110511111d05111 11d0511111105111 1110511111d05111] -+xvi16ger2pp 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => [ 116cf01511914fd5 12361095135a2075 110883f5115125b5 135a207555a15155 11301f95113b8755 116cf015110883f5 113b8755114b7315 11914fd5115125b5] -+xvi16ger2pp 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => [ 1558911111111111 111111111111ab1f 0898911111111111 11111111111244af 129b9d1111111111 111111111111415f 13239d1111111111 111111111111547f] -+xvi16ger2pp 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => [ * * 5111111111111111 d151111111511111 1051111111111111 11d0511111105111 1051111111111111 11d0511111105111] -+xvi16ger2pp 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => [ * * * * 22040d1111000d11 11000d1122040d11 * *] -+xvi16ger2pp 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => [ * * 1111415f1111547f 1111ab1f111244af 129b9d1113239d11 1558911108989111 * *] -+xvi16ger2pp 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => [ * * 1113511111111111 1111111111111162 2597315111111111 1111111111135111 * *] -+xvi16ger2pp 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => [ * * * * 000d111111111111 22040d1111000d11 * *] -+xvi16ger2pp 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => [ 5091511110d15111 10d1511150915111 10d1511111115111 1111511110d15111 d151111111511111 11511111d1511111 * *] -+xvi16ger2pp 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => [ 11d0511111d05111 11111111d1511111 1110511111105111 1111111111511111 1051111110511111 1111111151111111 * *] -+xvi16ger2pp 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => [ 22040d1111111111 * 11000d1111111111 * 000d111111111111 * * *] -+xvi16ger2pp 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [ d151111111111111 5091511110d15111 1151111111111111 10d1511111115111 5111111111111111 d151111111511111 * *] -+ - xvi16ger2spp 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => [ 10d1511111115111 1111511110d15111 5091511110d15111 10d1511150915111 5091511110d15111 10d1511150915111 10d1511111115111 1111511110d15111] - xvi16ger2spp 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => [ 1110511111105111 1111111111511111 11d0511111d05111 11111111d1511111 11d0511111d05111 11111111d1511111 1110511111105111 1111111111511111] - xvi16ger2spp 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => [ 11000d1111111111 * 22040d1111111111 * 22040d1111111111 * 11000d1111111111 *] -@@ -2400,10 +2674,27 @@ xvi16ger2s 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e = - xvi16ger2s 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => [ 10f2fc0000000000 0000000000000000 ffeefc0000000000 0000000000000000 eefc000000000000 0000000000000000 0000000000000000 0000000000000000] - xvi16ger2s 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [ c040000000000000 3f804000ffc04000 0040000000000000 ffc0400000004000 4000000000000000 c040000000400000 0000000000000000 0000000000000000] - -+xvi16ger2 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => [ ffc0400000004000 00004000ffc04000 3f804000ffc04000 ffc040003f804000 3f804000ffc04000 ffc040003f804000 ffc0400000004000 00004000ffc04000] -+xvi16ger2 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => [ ffff4000ffff4000 0000000000400000 00bf400000bf4000 00000000c0400000 00bf400000bf4000 00000000c0400000 ffff4000ffff4000 0000000000400000] -+xvi16ger2 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => [ ffeefc0000000000 0000000000000000 10f2fc0000000000 0000000000000000 10f2fc0000000000 0000000000000000 ffeefc0000000000 0000000000000000] -+xvi16ger2 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => [ 0040000000000000 ffc0400000004000 c040000000000000 3f804000ffc04000 c040000000000000 3f804000ffc04000 0040000000000000 ffc0400000004000] -+xvi16ger2 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => [ 0000000000000000 0000000000000000 c040000000400000 00400000c0400000 00bf4000ffff4000 ffff400000bf4000 00bf4000ffff4000 ffff400000bf4000] -+xvi16ger2 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => [ 005bdf0400803ec4 0124ff8402490f64 fff772e4004014a4 02490f6444904044 001f0e84002a7644 005bdf04fff772e4 002a7644003a6204 00803ec4004014a4] -+xvi16ger2 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => [ 0447800000000000 0000000000009a0e f787800000000000 000000000001339e 018a8c0000000000 000000000000304e 02128c0000000000 000000000000436e] -+xvi16ger2 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => [ 0000000000000000 0000000000000000 4000000000000000 c040000000400000 ff40000000000000 00bf4000ffff4000 ff40000000000000 00bf4000ffff4000] -+xvi16ger2 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 10f2fc00ffeefc00 ffeefc0010f2fc00 0000000000000000 0000000000000000] -+xvi16ger2 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => [ 0000000000000000 0000000000000000 0000304e0000436e 00009a0e0001339e 018a8c0002128c00 04478000f7878000 0000000000000000 0000000000000000] -+xvi16ger2 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => [ 0000000000000000 0000000000000000 0002400000000000 0000000000000051 1486204000000000 0000000000024000 0000000000000000 0000000000000000] -+xvi16ger2 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 eefc000000000000 10f2fc00ffeefc00 0000000000000000 0000000000000000] -+xvi16ger2 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => [ 3f804000ffc04000 ffc040003f804000 ffc0400000004000 00004000ffc04000 c040000000400000 00400000c0400000 0000000000000000 0000000000000000] -+xvi16ger2 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => [ 00bf400000bf4000 00000000c0400000 ffff4000ffff4000 0000000000400000 ff400000ff400000 0000000040000000 0000000000000000 0000000000000000] -+xvi16ger2 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => [ 10f2fc0000000000 0000000000000000 ffeefc0000000000 0000000000000000 eefc000000000000 0000000000000000 0000000000000000 0000000000000000] -+xvi16ger2 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [ c040000000000000 3f804000ffc04000 0040000000000000 ffc0400000004000 4000000000000000 c040000000400000 0000000000000000 0000000000000000] -+ - xxmfacc [ 7f800000ff800000 ff8000007f800000 ff7ffffe7f7ffffe 0080000e8080000e 0180055e0180077e 0000111e8000222e 7ff0000000000000 fff0000000000000] => [ 7f800000ff800000 ff8000007f800000 ff7ffffe7f7ffffe 0080000e8080000e 0180055e0180077e 0000111e8000222e 7ff0000000000000 fff0000000000000] - - xxmtacc => [ 7f800000ff800000 ff8000007f800000 ff7ffffe7f7ffffe 0080000e8080000e 0180055e0180077e 0000111e8000222e 7ff0000000000000 fff0000000000000] - - xxsetaccz => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] - --All done. Tested 197 different instruction groups -+All done. Tested 224 different instruction groups diff --git a/SOURCES/valgrind-3.17.0-ppc64-isa-3.1.patch b/SOURCES/valgrind-3.17.0-ppc64-isa-3.1.patch deleted file mode 100644 index fb8fa50..0000000 --- a/SOURCES/valgrind-3.17.0-ppc64-isa-3.1.patch +++ /dev/null @@ -1,1720 +0,0 @@ -commit 3cc0232c46a5905b4a6c2fbd302b58bf5f90b3d5 -Author: Carl Love -Date: Mon Jan 11 16:00:57 2021 -0600 - - PPC64: ISA 3.1 VSX PCV Generate Operations - - xgenpcvbm VSX Vector Generate PCV from Byte Mask - xxgenpcvdmVSX Vector Generate PCV from Doubleword Mask - xxgenpcvhmVSX Vector Generate PCV from Halfword Mask - xxgenpcvwmVSX Vector Generate PCV from Word Mask - -diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h -index deda4dfce..54ce923a9 100644 ---- a/VEX/priv/guest_ppc_defs.h -+++ b/VEX/priv/guest_ppc_defs.h -@@ -169,6 +169,23 @@ void write_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, - void get_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, - UInt reg, UInt *result); - -+extern void vector_gen_pvc_byte_mask_dirty_helper( VexGuestPPC64State* gst, -+ ULong src_hi, -+ ULong src_lo, -+ UInt rtn_val, UInt IMM ); -+extern void vector_gen_pvc_hword_mask_dirty_helper( VexGuestPPC64State* gst, -+ ULong src_hi, -+ ULong src_lo, -+ UInt rtn_val, UInt IMM ); -+extern void vector_gen_pvc_word_mask_dirty_helper( VexGuestPPC64State* gst, -+ ULong src_hi, -+ ULong src_lo, -+ UInt rtn_val, UInt IMM ); -+extern void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst, -+ ULong src_hi, -+ ULong src_lo, -+ UInt rtn_val, UInt IMM ); -+ - /* 8-bit XO value from instruction description */ - #define XVI4GER8 0b00100011 - #define XVI4GER8PP 0b00100010 -diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c -index c24191ef3..75497abb9 100644 ---- a/VEX/priv/guest_ppc_helpers.c -+++ b/VEX/priv/guest_ppc_helpers.c -@@ -701,6 +701,738 @@ ULong vector_evaluate64_helper( ULong srcA, ULong srcB, ULong srcC, - #undef MAX_IMM_BITS - } - -+/*--------------------------------------------------*/ -+/*---- VSX Vector Generate PCV from Mask helpers ---*/ -+/*--------------------------------------------------*/ -+static void write_VSX_entry (VexGuestPPC64State* gst, UInt reg_offset, -+ ULong *vsx_entry) -+{ -+ U128* pU128_dst; -+ pU128_dst = (U128*) (((UChar*) gst) + reg_offset); -+ -+ /* The U128 type is defined as an array of unsigned intetgers. */ -+ /* Writing in LE order */ -+ (*pU128_dst)[0] = (UInt)(vsx_entry[1] & 0xFFFFFFFF); -+ (*pU128_dst)[1] = (UInt)(vsx_entry[1] >> 32); -+ (*pU128_dst)[2] = (UInt)(vsx_entry[0] & 0xFFFFFFFF); -+ (*pU128_dst)[3] = (UInt)(vsx_entry[0] >> 32); -+ return; -+} -+ -+/* CALLED FROM GENERATED CODE */ -+void vector_gen_pvc_byte_mask_dirty_helper( VexGuestPPC64State* gst, -+ ULong src_hi, ULong src_lo, -+ UInt reg_offset, UInt imm ) { -+ /* The function computes the 128-bit result then writes it directly -+ into the guest state VSX register. */ -+ -+ UInt i, shift_by, sel_shift_by, half_sel; -+ ULong index, src, result[2]; -+ ULong j; -+ -+ result[0] = 0; -+ result[1] = 0; -+ j = 0; -+ -+ /* The algorithm in the ISA is written with IBM numbering zero on left and -+ N-1 on right. The loop index is converted to "i" to match the algorithm -+ for claritiy of matching the C code to the algorithm in the ISA. */ -+ -+ if (imm == 0b00) { // big endian expansion -+ for( index = 0; index < 16; index++) { -+ i = 15 - index; -+ -+ shift_by = i*8; -+ -+ if ( i >= 8) { -+ src = src_hi; -+ shift_by = shift_by - 64; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = shift_by + 7; -+ -+ if ( ((src >> sel_shift_by) & 0x1) == 1) { -+ result[half_sel] |= j << shift_by; -+ j++; -+ } else { -+ result[half_sel] |= (index + (unsigned long long)0x10) << shift_by; -+ } -+ } -+ -+ -+ } else if (imm == 0b01) { // big endian compression -+ /* If IMM=0b00001, let pcv be the permute control vector required to -+ enable a left-indexed permute (vperm or xxperm) to implement a -+ compression of the sparse byte elements in a source vector specified -+ by the byte-element mask in VSR[VRB+32] into the leftmost byte -+ elements of a result vector. -+ */ -+ for( index = 0; index < 16; index++) { -+ i = 15 - index; -+ shift_by = i*8; -+ -+ if ( i >= 8) { -+ src = src_hi; -+ shift_by = shift_by - 64; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = shift_by + 7; -+ -+ if ( ((src >> sel_shift_by) & 0x1) == 1) { -+ if (j >= 8) -+ result[1] |= (index) << (15 - j)*8; -+ else -+ result[0] |= (index) << (7 - j)*8; -+ j++; -+ } -+ } -+ /* The algorithim says set to undefined, leave as 0 -+ for( index = 3 - j; index < 4; index++) { -+ result |= (0 << (index*8)); -+ } -+ */ -+ -+ } else if (imm == 0b10) { //little-endian expansion -+ /* If IMM=0b00010, let pcv be the permute control vector required to -+ enable a right-indexed permute (vpermr or xxpermr) to implement an -+ expansion of the rightmost byte elements of a source vector into the -+ byte elements of a result vector specified by the byte-element mask -+ in VSR[VRB+32]. */ -+ for( index = 0; index < 16; index++) { -+ i = index; -+ -+ shift_by = i*8; -+ -+ if ( i >= 8) { -+ src = src_hi; -+ shift_by = shift_by - 64; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = shift_by + 7; -+ -+ /* mod shift amount by 8 since src is either the upper or lower -+ 64-bits. */ -+ if ( ((src >> sel_shift_by) & 0x1) == 1) { -+ result[half_sel] |= j << shift_by; -+ j++; -+ } else { -+ result[half_sel] |= (index + (unsigned long long)0x10) << shift_by; -+ } -+ } -+ -+ } else if (imm == 0b11) { //little-endian compression -+ /* If IMM=0b00011, let pcv be the permute control vector required to -+ enable a right-indexed permute (vpermr or xxpermr) to implement a -+ compression of the sparse byte elements in a source vector specified -+ by the byte-element mask in VSR[VRB+32] into the rightmost byte -+ elements of a result vector. */ -+ -+ for( index = 0; index < 16; index++) { -+ i = index; -+ -+ shift_by = i*8; -+ -+ if ( i >= 8) { -+ src = src_hi; -+ shift_by = shift_by - 64; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = shift_by + 7; -+ -+ if ( ((src >> sel_shift_by) & 0x1) == 1) { -+ if (j >= 8) -+ result[0] |= (index) << (j-8)*8; -+ else -+ result[1] |= (index) << j*8; -+ j++; -+ } -+ } -+ -+ /* The algorithim says set to undefined, leave as 0 -+ for( index = 3 - j; index < 4; index++) { -+ result |= (0 << (index*8)); -+ } -+ */ -+ -+ } else { -+ vex_printf("ERROR, vector_gen_pvc_byte_mask_dirty_helper, imm value %u not supported.\n", -+ imm); -+ vassert(0); -+ } -+ write_VSX_entry( gst, reg_offset, result); -+} -+ -+/* CALLED FROM GENERATED CODE */ -+void vector_gen_pvc_hword_mask_dirty_helper( VexGuestPPC64State* gst, -+ ULong src_hi, ULong src_lo, -+ UInt reg_offset, -+ UInt imm ) { -+ /* The function computes the 128-bit result then writes it directly -+ into the guest state VSX register. */ -+ UInt i, shift_by, sel_shift_by, half_sel; -+ ULong index, src, result[2]; -+ ULong j; -+ -+ result[0] = 0; -+ result[1] = 0; -+ j = 0; -+ -+ /* The algorithm in the ISA is written with IBM numbering zero on left and -+ N-1 on right. The loop index is converted to "i" to match the algorithm -+ for claritiy of matching the C code to the algorithm in the ISA. */ -+ -+ if (imm == 0b00) { // big endian expansion -+ /* If IMM=0b00000, let pcv be the permute control vector required to -+ enable a left-indexed permute (vperm or xxperm) to implement an -+ expansion of the leftmost halfword elements of a source vector into -+ the halfword elements of a result vector specified by the halfword- -+ element mask in VSR[VRB+32]. -+ */ -+ for( index = 0; index < 8; index++) { -+ i = 7 - index; -+ -+ shift_by = i*16; -+ -+ if ( i >= 4) { -+ src = src_hi; -+ shift_by = shift_by - 64; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = shift_by + 15; -+ -+ if ( ((src >> sel_shift_by) & 0x1) == 1) { -+ // half-word i, byte 0 -+ result[half_sel] |= (2*j + 0x0) << (shift_by+8); -+ // half-word i, byte 1 -+ result[half_sel] |= (2*j + 0x1) << shift_by; -+ j++; -+ } else { -+ result[half_sel] |= (2*index + 0x10) << (shift_by+8); -+ result[half_sel] |= (2*index + 0x11) << shift_by; -+ } -+ } -+ -+ } else if (imm == 0b01) { // big endian expansion -+ /* If IMM=0b00001,let pcv be the permute control vector required to -+ enable a left-indexed permute (vperm or xxperm) to implement a -+ compression of the sparse halfword elements in a source vector -+ specified by the halfword-element mask in VSR[VRB+32] into the -+ leftmost halfword elements of a result vector. -+ */ -+ for( index = 0; index < 8; index++) { -+ i = 7 - index; -+ -+ shift_by = i*16; -+ -+ if ( i >= 4) { -+ src = src_hi; -+ shift_by = shift_by - 64; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = shift_by + 15; -+ -+ if ( ((src >> sel_shift_by) & 0x1) == 1) { -+ if (j >= 4) { -+ // half-word i, byte 0 -+ result[1] |= (2*index + 0x0) << ((7 - j)*16 + 8); -+ // half-word i, byte 1 -+ result[1] |= (2*index + 0x1) << ((7 - j)*16); -+ } else { -+ // half-word i, byte 0 -+ result[0] |= (2*index + 0x0) << ((3 - j)*16 + 8); -+ // half-word i, byte 1 -+ result[0] |= (2*index + 0x1) << ((3 - j)*16); -+ } -+ j++; -+ } -+ } -+ -+ } else if (imm == 0b10) { //little-endian expansion -+ /* If IMM=0b00010, let pcv be the permute control vector required to -+ enable a right-indexed permute (vpermr or xxpermr) to implement an -+ expansion of the rightmost halfword elements of a source vector into -+ the halfword elements of a result vector specified by the halfword- -+ element mask in VSR[VRB+32]. -+ */ -+ for( index = 0; index < 8; index++) { -+ i = index; -+ shift_by = i*16; -+ -+ if ( i >= 4) { -+ src = src_hi; -+ shift_by = shift_by - 64; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = shift_by + 15; -+ -+ if ( ((src >> sel_shift_by) & 0x1) == 1) { -+ // half-word i, byte 0 -+ result[half_sel] |= (2*j + 0x00) << shift_by; -+ // half-word i, byte 1 -+ result[half_sel] |= (2*j + 0x01) << (shift_by+8); -+ j++; -+ -+ } else { -+ // half-word i, byte 0 -+ result[half_sel] |= (2*index + 0x10) << shift_by; -+ // half-word i, byte 1 -+ result[half_sel] |= (2*index + 0x11) << (shift_by+8); -+ } -+ } -+ -+ } else if (imm == 0b11) { //little-endian compression -+ /* If IMM=0b00011, let pcv be the permute control vector required to -+ enable a right-indexed permute (vpermr or xxpermr) to implement a -+ compression of the sparse halfword elements in a source vector -+ specified by the halfword-element mask in VSR[VRB+32] into the -+ rightmost halfword elements of a result vector. */ -+ for( index = 0; index < 8; index++) { -+ i = index; -+ shift_by = i*16; -+ -+ if ( i >= 4) { -+ src = src_hi; -+ shift_by = shift_by - 64; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = shift_by + 15; -+ -+ if ( ((src >> sel_shift_by) & 0x1) == 1) { -+ if (j >= 4) { -+ // half-word j, byte 0 -+ result[0] |= (2*index + 0x0) << ((j-4)*16); -+ // half-word j, byte 1 -+ result[0] |= (2*index + 0x1) << ((j-4)*16+8); -+ } else { -+ // half-word j, byte 0 -+ result[1] |= (2*index + 0x0) << (j*16); -+ // half-word j, byte 1 -+ result[1] |= (2*index + 0x1) << ((j*16)+8); -+ } -+ j++; -+ } -+ } -+ -+ } else { -+ vex_printf("ERROR, vector_gen_pvc_hword_dirty_mask_helper, imm value %u not supported.\n", -+ imm); -+ vassert(0); -+ } -+ write_VSX_entry( gst, reg_offset, result); -+} -+ -+/* CALLED FROM GENERATED CODE */ -+void vector_gen_pvc_word_mask_dirty_helper( VexGuestPPC64State* gst, -+ ULong src_hi, ULong src_lo, -+ UInt reg_offset, UInt imm ) { -+ /* The function computes the 128-bit result then writes it directly -+ into the guest state VSX register. */ -+ UInt i, shift_by, sel_shift_by, half_sel; -+ ULong index, src, result[2]; -+ ULong j; -+ -+ result[0] = 0; -+ result[1] = 0; -+ j = 0; -+ -+ /* The algorithm in the ISA is written with IBM numbering zero on left and -+ N-1 on right. The loop index is converted to "i" to match the algorithm -+ for claritiy of matching the C code to the algorithm in the ISA. */ -+ -+ if (imm == 0b00) { // big endian expansion -+ /* If IMM=0b00000, let pcv be the permute control vector required to -+ enable a left-indexed permute (vperm or xxperm) to implement an -+ expansion of the leftmost word elements of a source vector into the -+ word elements of a result vector specified by the word-element mask -+ in VSR[VRB+32]. -+ */ -+ for( index = 0; index < 4; index++) { -+ i = 3 - index; -+ -+ shift_by = i*32; -+ -+ if ( i >= 2) { -+ src = src_hi; -+ shift_by = shift_by - 64; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = shift_by + 31; -+ -+ if ( ((src >> sel_shift_by) & 0x1) == 1) { -+ result[half_sel] |= (4*j+0) << (shift_by+24); // word i, byte 0 -+ result[half_sel] |= (4*j+1) << (shift_by+16); // word i, byte 1 -+ result[half_sel] |= (4*j+2) << (shift_by+8); // word i, byte 2 -+ result[half_sel] |= (4*j+3) << shift_by; // word i, byte 3 -+ j++; -+ } else { -+ result[half_sel] |= (4*index + 0x10) << (shift_by+24); -+ result[half_sel] |= (4*index + 0x11) << (shift_by+16); -+ result[half_sel] |= (4*index + 0x12) << (shift_by+8); -+ result[half_sel] |= (4*index + 0x13) << shift_by; -+ } -+ } -+ -+ } else if (imm == 0b01) { // big endian compression -+ /* If IMM=0b00001, let pcv be the permute control vector required to -+ enable a left-indexed permute (vperm or xxperm) to implement a -+ compression of the sparse word elements in a source vector specified -+ by the word-element mask in VSR[VRB+32] into the leftmost word -+ elements of a result vector. -+ */ -+ for( index = 0; index < 4; index++) { -+ i = 3 - index; -+ -+ shift_by = i*32; -+ -+ if ( i >= 2) { -+ src = src_hi; -+ shift_by = shift_by - 64; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = shift_by + 31; -+ -+ if (((src >> sel_shift_by) & 0x1) == 1) { -+ if (j >= 2) { -+ // word j, byte 0 -+ result[1] |= (4*index+0) << ((3 - j)*32 + 24); -+ // word j, byte 1 -+ result[1] |= (4*index+1) << ((3 - j)*32 + 16); -+ // word j, byte 2 -+ result[1] |= (4*index+2) << ((3 - j)*32 + 8); -+ // word j, byte 3 -+ result[1] |= (4*index+3) << ((3 - j)*32 + 0); -+ } else { -+ result[0] |= (4*index+0) << ((1 - j)*32 + 24); -+ result[0] |= (4*index+1) << ((1 - j)*32 + 16); -+ result[0] |= (4*index+2) << ((1 - j)*32 + 8); -+ result[0] |= (4*index+3) << ((1 - j)*32 + 0); -+ } -+ j++; -+ } -+ } -+ -+ } else if (imm == 0b10) { //little-endian expansion -+ /* If IMM=0b00010, let pcv be the permute control vector required to -+ enable a right-indexed permute (vpermr or xxpermr) to implement an -+ expansion of the rightmost word elements of a source vector into the -+ word elements of a result vector specified by the word-element mask -+ in VSR[VRB+32]. -+ */ -+ for( index = 0; index < 4; index++) { -+ i = index; -+ -+ shift_by = i*32; -+ -+ if ( i >= 2) { -+ src = src_hi; -+ shift_by = shift_by - 64; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = shift_by + 31; -+ -+ if (((src >> sel_shift_by) & 0x1) == 1) { -+ result[half_sel] |= (4*j+0) << (shift_by + 0); // word j, byte 0 -+ result[half_sel] |= (4*j+1) << (shift_by + 8); // word j, byte 1 -+ result[half_sel] |= (4*j+2) << (shift_by + 16); // word j, byte 2 -+ result[half_sel] |= (4*j+3) << (shift_by + 24); // word j, byte 3 -+ j++; -+ } else { -+ result[half_sel] |= (4*index + 0x10) << (shift_by + 0); -+ result[half_sel] |= (4*index + 0x11) << (shift_by + 8); -+ result[half_sel] |= (4*index + 0x12) << (shift_by + 16); -+ result[half_sel] |= (4*index + 0x13) << (shift_by + 24); -+ } -+ } -+ -+ } else if (imm == 0b11) { //little-endian compression -+ /* If IMM=0b00011, let pcv be the permute control vector required to -+ enable a right-indexed permute (vpermr or xxpermr) to implement a -+ compression of the sparse word elements in a source vector specified -+ by the word-element mask in VSR[VRB+32] into the rightmost word -+ elements of a result vector. */ -+ for( index = 0; index < 4; index++) { -+ i =index; -+ -+ shift_by = i*32; -+ -+ if ( i >= 2) { -+ src = src_hi; -+ shift_by = shift_by - 64; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = shift_by + 31; -+ -+ if (((src >> sel_shift_by) & 0x1) == 1) { -+ if (j >= 2){ -+ // word j, byte 0 -+ result[0] |= (4*index + 0x0) << ((j-2)*32+0); -+ // word j, byte 1 -+ result[0] |= (4*index + 0x1) << ((j-2)*32+8); -+ // word j, byte 2 -+ result[0] |= (4*index + 0x2) << ((j-2)*32+16); -+ // word j, byte 3 -+ result[0] |= (4*index + 0x3) << ((j-2)*32+24); -+ } else { -+ result[1] |= (4*index + 0x0) << (j*32+0); -+ result[1] |= (4*index + 0x1) << (j*32+8); -+ result[1] |= (4*index + 0x2) << (j*32+16); -+ result[1] |= (4*index + 0x3) << (j*32+24); -+ } -+ j++; -+ } -+ } -+ } else { -+ vex_printf("ERROR, vector_gen_pvc_word_mask_dirty_helper, imm value %u not supported.\n", -+ imm); -+ vassert(0); -+ } -+ -+ write_VSX_entry( gst, reg_offset, result); -+} -+ -+/* CALLED FROM GENERATED CODE */ -+void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst, -+ ULong src_hi, ULong src_lo, -+ UInt reg_offset, UInt imm ) { -+ /* The function computes the 128-bit result then writes it directly -+ into the guest state VSX register. */ -+ UInt sel_shift_by, half_sel; -+ ULong index, src, result[2]; -+ ULong j, i; -+ -+ result[0] = 0; -+ result[1] = 0; -+ j = 0; -+ -+ /* The algorithm in the ISA is written with IBM numbering zero on left and -+ N-1 on right. The loop index is converted to "i" to match the algorithm -+ for claritiy of matching the C code to the algorithm in the ISA. */ -+ -+ if (imm == 0b00) { // big endian expansion -+ /* If IMM=0b00000, let pcv be the permute control vector required to -+ enable a left-indexed permute (vperm or xxperm) to implement an -+ expansion of the leftmost doubleword elements of a source vector into -+ the doubleword elements of a result vector specified by the -+ doubleword-element mask in VSR[VRB+32]. -+ */ -+ for( index = 0; index < 2; index++) { -+ i = 1 - index; -+ -+ if ( i == 1) { -+ src = src_hi; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = 63; -+ -+ if ( ((src >> sel_shift_by) & 0x1) == 1) { -+ result[half_sel] |= (8*j + 0x0) << 56; // dword i, byte 0 -+ result[half_sel] |= (8*j + 0x1) << 48; // dword i, byte 1 -+ result[half_sel] |= (8*j + 0x2) << 40; // dword i, byte 2 -+ result[half_sel] |= (8*j + 0x3) << 32; // dword i, byte 3 -+ result[half_sel] |= (8*j + 0x4) << 24; // dword i, byte 4 -+ result[half_sel] |= (8*j + 0x5) << 16; // dword i, byte 5 -+ result[half_sel] |= (8*j + 0x6) << 8; // dword i, byte 6 -+ result[half_sel] |= (8*j + 0x7) << 0; // dword i, byte 7 -+ j++; -+ } else { -+ result[half_sel] |= (8*index + 0x10) << 56; -+ result[half_sel] |= (8*index + 0x11) << 48; -+ result[half_sel] |= (8*index + 0x12) << 40; -+ result[half_sel] |= (8*index + 0x13) << 32; -+ result[half_sel] |= (8*index + 0x14) << 24; -+ result[half_sel] |= (8*index + 0x15) << 16; -+ result[half_sel] |= (8*index + 0x16) << 8; -+ result[half_sel] |= (8*index + 0x17) << 0; -+ } -+ } -+ } else if (imm == 0b01) { // big endian compression -+ /* If IMM=0b00001, let pcv be the the permute control vector required to -+ enable a left-indexed permute (vperm or xxperm) to implement a -+ compression of the sparse doubleword elements in a source vector -+ specified by the doubleword-element mask in VSR[VRB+32] into the -+ leftmost doubleword elements of a result vector. -+ */ -+ for( index = 0; index < 2; index++) { -+ i = 1 - index; -+ -+ if ( i == 1) { -+ src = src_hi; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = 63; -+ -+ if ( ((src >> sel_shift_by) & 0x1) == 1) { -+ if (j == 1) { -+ result[1] |= (8*index + 0x0) << 56; // double-word j, byte 0 -+ result[1] |= (8*index + 0x1) << 48; // double-word j, byte 1 -+ result[1] |= (8*index + 0x2) << 40; // double-word j, byte 2 -+ result[1] |= (8*index + 0x3) << 32; // double-word j, byte 3 -+ result[1] |= (8*index + 0x4) << 24; // double-word j, byte 4 -+ result[1] |= (8*index + 0x5) << 16; // double-word j, byte 5 -+ result[1] |= (8*index + 0x6) << 8; // double-word j, byte 6 -+ result[1] |= (8*index + 0x7) << 0; // double-word j, byte 7 -+ } else { -+ result[0] |= (8*index + 0x0) << 56; // double-word j, byte 0 -+ result[0] |= (8*index + 0x1) << 48; // double-word j, byte 1 -+ result[0] |= (8*index + 0x2) << 40; // double-word j, byte 2 -+ result[0] |= (8*index + 0x3) << 32; // double-word j, byte 3 -+ result[0] |= (8*index + 0x4) << 24; // double-word j, byte 4 -+ result[0] |= (8*index + 0x5) << 16; // double-word j, byte 5 -+ result[0] |= (8*index + 0x6) << 8; // double-word j, byte 6 -+ result[0] |= (8*index + 0x7) << 0; // double-word j, byte 7 -+ } -+ j++; -+ } -+ } -+ } else if (imm == 0b10) { //little-endian expansion -+ /* If IMM=0b00010, let pcv be the permute control vector required to -+ enable a right-indexed permute (vpermr or xxpermr) to implement an -+ expansion of the rightmost doubleword elements of a source vector -+ into the doubleword elements of a result vector specified by the -+ doubleword-element mask in VSR[VRB+32]. -+ */ -+ -+ for( index = 0; index < 2; index++) { -+ i = index; -+ -+ if ( i == 1) { -+ src = src_hi; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = 63; -+ -+ if ( ((src >> sel_shift_by) & 0x1) == 1) { -+ result[half_sel] |= (8*j+0) << 0; // double-word i, byte 0 -+ result[half_sel] |= (8*j+1) << 8; // double-word i, byte 1 -+ result[half_sel] |= (8*j+2) << 16; // double-word i, byte 2 -+ result[half_sel] |= (8*j+3) << 24; // double-word i, byte 3 -+ result[half_sel] |= (8*j+4) << 32; // double-word i, byte 4 -+ result[half_sel] |= (8*j+5) << 40; // double-word i, byte 5 -+ result[half_sel] |= (8*j+6) << 48; // double-word i, byte 6 -+ result[half_sel] |= (8*j+7) << 56; // double-word i, byte 7 -+ j++; -+ } else { -+ result[half_sel] |= (8*index + 0x10) << 0; -+ result[half_sel] |= (8*index + 0x11) << 8; -+ result[half_sel] |= (8*index + 0x12) << 16; -+ result[half_sel] |= (8*index + 0x13) << 24; -+ result[half_sel] |= (8*index + 0x14) << 32; -+ result[half_sel] |= (8*index + 0x15) << 40; -+ result[half_sel] |= (8*index + 0x16) << 48; -+ result[half_sel] |= (8*index + 0x17) << 56; -+ } -+ } -+ -+ } else if (imm == 0b11) { //little-endian compression -+ /* If IMM=0b00011, let pcv be the permute control vector required to -+ enable a right-indexed permute (vpermr or xxpermr) to implement a -+ compression of the sparse doubleword elements in a source vector -+ specified by the doubleword-element mask in VSR[VRB+32] into the -+ rightmost doubleword elements of a result vector. */ -+ for( index = 0; index < 2; index++) { -+ i = index; -+ -+ if ( i == 1) { -+ src = src_hi; -+ half_sel = 0; -+ } else { -+ src = src_lo; -+ half_sel = 1; -+ } -+ -+ sel_shift_by = 63; -+ -+ if (((src >> sel_shift_by) & 0x1) == 1) { -+ if (j == 1) { -+ result[0] |= (8*index + 0x0) << 0; // double-word j, byte 0 -+ result[0] |= (8*index + 0x1) << 8; // double-word j, byte 1 -+ result[0] |= (8*index + 0x2) << 16; // double-word j, byte 2 -+ result[0] |= (8*index + 0x3) << 24; // double-word j, byte 3 -+ result[0] |= (8*index + 0x4) << 32; // double-word j, byte 4 -+ result[0] |= (8*index + 0x5) << 40; // double-word j, byte 5 -+ result[0] |= (8*index + 0x6) << 48; // double-word j, byte 6 -+ result[0] |= (8*index + 0x7) << 56; // double-word j, byte 7 -+ } else { -+ result[1] |= (8*index + 0x0) << 0; -+ result[1] |= (8*index + 0x1) << 8; -+ result[1] |= (8*index + 0x2) << 16; -+ result[1] |= (8*index + 0x3) << 24; -+ result[1] |= (8*index + 0x4) << 32; -+ result[1] |= (8*index + 0x5) << 40; -+ result[1] |= (8*index + 0x6) << 48; -+ result[1] |= (8*index + 0x7) << 56; -+ } -+ j++; -+ } -+ } -+ } else { -+ vex_printf("ERROR, vector_gen_pvc_dword_mask_helper, imm value %u not supported.\n", -+ imm); -+ vassert(0); -+ } -+ -+ write_VSX_entry( gst, reg_offset, result); -+} - - /*------------------------------------------------*/ - /*---- VSX Matrix signed integer GER functions ---*/ -diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c -index bcabf69dd..354be6b53 100644 ---- a/VEX/priv/guest_ppc_toIR.c -+++ b/VEX/priv/guest_ppc_toIR.c -@@ -3322,6 +3322,7 @@ static IRExpr * locate_vector_ele_eq ( IRTemp src, IRExpr *value, - #define DFORM_IMMASK 0xffffffff - #define DSFORM_IMMASK 0xfffffffc - #define DQFORM_IMMASK 0xfffffff0 -+#define DA8LSFORM_IMMASK 0x3fffffff // Algebraic 8LS Dform - - #define ISA_3_1_PREFIX_CHECK if (prefix) {if (!allow_isa_3_1) goto decode_noIsa3_1;} - -@@ -6109,6 +6110,87 @@ static void vsx_matrix_64bit_float_ger ( const VexAbiInfo* vbi, - stmt( IRStmt_Dirty(d) ); - } - -+static void vector_gen_pvc_mask ( const VexAbiInfo* vbi, -+ IRExpr *src, UInt IMM, -+ UInt opc2, UInt VSX_addr ) { -+ /* The function takes a 64-bit source and an immediate value. The function -+ calls a helper to execute the xxgenpcvbm, xxgenpcvhm, xxgenpcvwm, -+ xxgenpcvdm instruction. The instructions are not practical to do with -+ Iops. The instruction is implemented with a dirty helper that -+ calculates the 128-bit result and writes it directly into the guest -+ state VSX register. -+ */ -+ IRTemp src_hi = newTemp( Ity_I64); -+ IRTemp src_lo = newTemp( Ity_I64); -+ -+ IRDirty* d; -+ -+ vassert( (VSX_addr >= 0) && (VSX_addr < 64) ); -+ UInt reg_offset = offsetofPPCGuestState( guest_VSR0 ) -+ + sizeof(U128) * VSX_addr; -+ -+ assign( src_hi, unop( Iop_V128HIto64, src ) ); -+ assign( src_lo, unop( Iop_V128to64, src ) ); -+ -+ IRExpr** args = mkIRExprVec_5( -+ IRExpr_GSPTR(), -+ mkexpr( src_hi ), -+ mkexpr( src_lo ), -+ mkU32( reg_offset ), -+ mkU64( IMM ) ); -+ -+ switch( opc2 ) { -+ case 0x394: // xxgenpcvbm -+ d = unsafeIRDirty_0_N ( -+ 0 /*regparms*/, -+ "vector_gen_pvc_byte_mask_dirty_helper", -+ fnptr_to_fnentry( vbi, -+ &vector_gen_pvc_byte_mask_dirty_helper ), -+ args); -+ break; -+ -+ case 0x395: // xxgenpcvhm -+ d = unsafeIRDirty_0_N ( -+ 0 /*regparms*/, -+ "vector_gen_pvc_hword_mask_dirty_helper", -+ fnptr_to_fnentry( vbi, -+ &vector_gen_pvc_hword_mask_dirty_helper ), -+ args); -+ break; -+ -+ case 0x3B4: // xxgenpcvwm -+ d = unsafeIRDirty_0_N ( -+ 0 /*regparms*/, -+ "vector_gen_pvc_word_mask_dirty_helper", -+ fnptr_to_fnentry( vbi, -+ &vector_gen_pvc_word_mask_dirty_helper ), -+ args); -+ break; -+ -+ case 0x3B5: // xxgenpcvdm -+ d = unsafeIRDirty_0_N ( -+ 0 /*regparms*/, -+ "vector_gen_pvc_dword_mask_dirty_helper", -+ fnptr_to_fnentry( vbi, -+ &vector_gen_pvc_dword_mask_dirty_helper ), -+ args); -+ break; -+ default: -+ vex_printf("ERROR: Unkown instruction = %u in vector_gen_pvc_mask()\n", -+ opc2); -+ return; -+ } -+ -+ d->nFxState = 1; -+ vex_bzero(&d->fxState, sizeof(d->fxState)); -+ d->fxState[0].fx = Ifx_Modify; -+ d->fxState[0].size = sizeof(U128); -+ d->fxState[0].offset = reg_offset; -+ -+ /* execute the dirty call, side-effecting guest state */ -+ stmt( IRStmt_Dirty(d) ); -+} -+ - static IRExpr * UNSIGNED_CMP_GT_V128 ( IRExpr *vA, IRExpr *vB ) { - /* This function does an unsigned compare of two V128 values. The - * function is for use in 32-bit mode only as it is expensive. The -@@ -35227,6 +35309,54 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, - return True; - } - -+static Bool dis_vector_generate_pvc_from_mask ( UInt prefix, -+ UInt theInstr, -+ const VexAbiInfo* vbi ) -+{ -+ UChar XT_addr = ifieldRegXT(theInstr); -+ UChar vB_addr = ifieldRegB(theInstr); -+ IRTemp vB = newTemp( Ity_V128 ); -+ UInt opc2 = ifieldOPClo10(theInstr); -+ UInt IMM = IFIELD(theInstr, (31-15), 5); // bits[11:15] -+ -+ assign( vB, getVReg( vB_addr ) ); -+ -+ switch( opc2 ) { -+ case 0x394: -+ DIP("xxgenpcvbm v%u,v%u,%u\n", XT_addr, vB_addr, IMM); -+ /* vector_gen_pvc_mask uses a dirty helper to calculate the result and -+ write it to the VSX result register. */ -+ vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr ); -+ break; -+ -+ case 0x395: -+ DIP("xxgenpcvhm v%u,v%u,%u\n", XT_addr, vB_addr, IMM); -+ /* vector_gen_pvc_mask uses a dirty helper to calculate the result and -+ write it to the VSX result register. */ -+ vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr ); -+ break; -+ -+ case 0x3B4: -+ DIP("xxgenpcvwm v%u,v%u,%u\n", XT_addr, vB_addr, IMM); -+ /* vector_gen_pvc_mask uses a dirty helper to calculate the result and -+ write it to the VSX result register. */ -+ vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr ); -+ break; -+ -+ case 0x3B5: -+ DIP("xxgenpcvdm v%u,v%u,%u\n", XT_addr, vB_addr, IMM); -+ /* vector_gen_pvc_mask uses a dirty helper to calculate the result and -+ write it to the VSX result register. */ -+ vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr ); -+ break; -+ -+ default: -+ return False; -+ } -+ -+ return True; -+} -+ - static Int dis_nop_prefix ( UInt prefix, UInt theInstr ) - { - Bool is_prefix = prefix_instruction( prefix ); -@@ -35748,14 +35878,9 @@ DisResult disInstr_PPC_WRK ( - } - goto decode_failure; - -- case 0x31: // lfsu, stxv -+ case 0x31: // lfsu - if (!allow_F) goto decode_noF; -- if (prefix_instruction( prefix )) { // stxv -- if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; -- if (dis_fp_pair_prefix( prefix, theInstr )) goto decode_success; -- } else { // lfsu -- if (dis_fp_load( prefix, theInstr )) goto decode_success; -- } -+ if (dis_fp_load( prefix, theInstr )) goto decode_success; - goto decode_failure; - - case 0x32: -@@ -35842,7 +35967,6 @@ DisResult disInstr_PPC_WRK ( - case 0x39: // pld, lxsd, lxssp, lfdp - { - UInt opc2tmp = ifieldOPC0o2(theInstr); -- - if (!allow_F) goto decode_noF; - if (prefix_instruction( prefix )) { // pld - if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; -@@ -36125,12 +36249,6 @@ DisResult disInstr_PPC_WRK ( - goto decode_failure; - } - -- /* The vsxOpc2 returned is the "normalized" value, representing the -- * instructions secondary opcode as taken from the standard secondary -- * opcode field [21:30] (IBM notatition), even if the actual field -- * is non-standard. These normalized values are given in the opcode -- * appendices of the ISA 2.06 document. -- */ - if ( ( opc2 == 0x168 ) && ( IFIELD( theInstr, 19, 2 ) == 0 ) )// xxspltib - { - /* This is a special case of the XX1 form where the RA, RB -@@ -36153,6 +36271,23 @@ DisResult disInstr_PPC_WRK ( - goto decode_failure; - } - -+ if ( ( opc2 == 0x394 ) || // xxgenpcvbm -+ ( opc2 == 0x395 ) || // xxgenpcvwm -+ ( opc2 == 0x3B4 ) || // xxgenpcvhm -+ ( opc2 == 0x3B5 ) ) { // xxgenpcvdm -+ if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; -+ if (dis_vector_generate_pvc_from_mask( prefix, theInstr, -+ abiinfo )) -+ goto decode_success; -+ goto decode_failure; -+ } -+ -+ /* The vsxOpc2 returned is the "normalized" value, representing the -+ * instructions secondary opcode as taken from the standard secondary -+ * opcode field [21:30] (IBM notatition), even if the actual field -+ * is non-standard. These normalized values are given in the opcode -+ * appendices of the ISA 2.06 document. -+ */ - vsxOpc2 = get_VSX60_opc2(opc2, theInstr); - - switch (vsxOpc2) { -commit 078f89e99b6f62e043f6138c6a7ae238befc1f2a -Author: Carl Love -Date: Fri Feb 26 15:46:55 2021 -0600 - - PPC64: Reduced-Precision - bfloat16 Outer Product & Format Conversion Operations - - Add support for: - - pmxvbf16ger2 Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) - pmxvbf16ger2pp Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive - multiply, Positive accumulate - pmxvbf16ger2pn Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive - multiply, Negative accumulate - pmxvbf16ger2np Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative - multiply, Positive accumulate - pmxvbf16ger2nn Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative - multiply, Negative accumulate - xvbf16ger2VSX Vector bfloat16 GER (Rank-2 Update) - xvbf16ger2pp VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive - accumulate - xvbf16ger2pn VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative - accumulate - xvbf16ger2np VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive - accumulate - xvbf16ger2nn VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Negative - accumulate - xvcvbf16sp VSX Vector Convert bfloat16 to Single-Precision format - xvcvspbf16 VSX Vector Convert with round Single-Precision to bfloat16 format - -diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h -index 54ce923a9..d36d6c07d 100644 ---- a/VEX/priv/guest_ppc_defs.h -+++ b/VEX/priv/guest_ppc_defs.h -@@ -150,6 +150,8 @@ extern ULong convert_to_zoned_helper( ULong src_hi, ULong src_low, - ULong return_upper ); - extern ULong convert_to_national_helper( ULong src, ULong return_upper ); - extern ULong convert_from_zoned_helper( ULong src_hi, ULong src_low ); -+extern ULong convert_from_floattobf16_helper( ULong src ); -+extern ULong convert_from_bf16tofloat_helper( ULong src ); - extern ULong convert_from_national_helper( ULong src_hi, ULong src_low ); - extern ULong generate_C_FPCC_helper( ULong size, ULong src_hi, ULong src ); - extern ULong extract_bits_under_mask_helper( ULong src, ULong mask, -@@ -201,6 +203,11 @@ extern void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst, - #define XVF16GER2PN 0b10010010 - #define XVF16GER2NP 0b01010010 - #define XVF16GER2NN 0b11010010 -+#define XVBF16GER2 0b00110011 -+#define XVBF16GER2PP 0b00110010 -+#define XVBF16GER2PN 0b10110010 -+#define XVBF16GER2NP 0b01110010 -+#define XVBF16GER2NN 0b11110010 - #define XVF32GER 0b00011011 - #define XVF32GERPP 0b00011010 - #define XVF32GERPN 0b10011010 -diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c -index 75497abb9..6bcee966d 100644 ---- a/VEX/priv/guest_ppc_helpers.c -+++ b/VEX/priv/guest_ppc_helpers.c -@@ -1905,6 +1905,125 @@ static Double conv_f16_to_double( ULong input ) - # endif - } - -+#define BF16_SIGN_MASK 0x8000 -+#define BF16_EXP_MASK 0x7F80 -+#define BF16_FRAC_MASK 0x007F -+#define BF16_BIAS 127 -+#define BF16_MAX_UNBIASED_EXP 127 -+#define BF16_MIN_UNBIASED_EXP -126 -+#define FLOAT_SIGN_MASK 0x80000000 -+#define FLOAT_EXP_MASK 0x7F800000 -+#define FLOAT_FRAC_MASK 0x007FFFFF -+#define FLOAT_FRAC_BIT8 0x00008000 -+#define FLOAT_BIAS 127 -+ -+static Float conv_bf16_to_float( UInt input ) -+{ -+ /* input is 16-bit bfloat. -+ bias +127, exponent 8-bits, fraction 7-bits -+ -+ output is 32-bit float. -+ bias +127, exponent 8-bits, fraction 22-bits -+ */ -+ -+ UInt input_exp, input_fraction, unbiased_exp; -+ UInt output_exp, output_fraction; -+ UInt sign; -+ union convert_t conv; -+ -+ sign = (UInt)(input & BF16_SIGN_MASK); -+ input_exp = input & BF16_EXP_MASK; -+ unbiased_exp = (input_exp >> 7) - (UInt)BF16_BIAS; -+ input_fraction = input & BF16_FRAC_MASK; -+ -+ if (((input_exp & BF16_EXP_MASK) == BF16_EXP_MASK) && -+ (input_fraction != 0)) { -+ /* input is NaN or SNaN, exp all 1's, fraction != 0 */ -+ output_exp = FLOAT_EXP_MASK; -+ output_fraction = input_fraction; -+ -+ } else if(((input_exp & BF16_EXP_MASK) == BF16_EXP_MASK) && -+ ( input_fraction == 0)) { -+ /* input is infinity, exp all 1's, fraction = 0 */ -+ output_exp = FLOAT_EXP_MASK; -+ output_fraction = 0; -+ -+ } else if((input_exp == 0) && (input_fraction == 0)) { -+ /* input is zero */ -+ output_exp = 0; -+ output_fraction = 0; -+ -+ } else if((input_exp == 0) && (input_fraction != 0)) { -+ /* input is denormal */ -+ output_fraction = input_fraction; -+ output_exp = (-(Int)BF16_BIAS + (Int)FLOAT_BIAS ) << 23; -+ -+ } else { -+ /* result is normal */ -+ output_exp = (unbiased_exp + FLOAT_BIAS) << 23; -+ output_fraction = input_fraction; -+ } -+ -+ conv.u32 = sign << (31 - 15) | output_exp | (output_fraction << (23-7)); -+ return conv.f; -+} -+ -+static UInt conv_float_to_bf16( UInt input ) -+{ -+ /* input is 32-bit float stored as unsigned 32-bit. -+ bias +127, exponent 8-bits, fraction 23-bits -+ -+ output is 16-bit bfloat. -+ bias +127, exponent 8-bits, fraction 7-bits -+ -+ If the unbiased exponent of the input is greater than the max floating -+ point unbiased exponent value, the result of the floating point 16-bit -+ value is infinity. -+ */ -+ -+ UInt input_exp, input_fraction; -+ UInt output_exp, output_fraction; -+ UInt result, sign; -+ -+ sign = input & FLOAT_SIGN_MASK; -+ input_exp = input & FLOAT_EXP_MASK; -+ input_fraction = input & FLOAT_FRAC_MASK; -+ -+ if (((input_exp & FLOAT_EXP_MASK) == FLOAT_EXP_MASK) && -+ (input_fraction != 0)) { -+ /* input is NaN or SNaN, exp all 1's, fraction != 0 */ -+ output_exp = BF16_EXP_MASK; -+ output_fraction = (ULong)input_fraction >> (23 - 7); -+ } else if (((input_exp & FLOAT_EXP_MASK) == FLOAT_EXP_MASK) && -+ ( input_fraction == 0)) { -+ /* input is infinity, exp all 1's, fraction = 0 */ -+ output_exp = BF16_EXP_MASK; -+ output_fraction = 0; -+ } else if ((input_exp == 0) && (input_fraction == 0)) { -+ /* input is zero */ -+ output_exp = 0; -+ output_fraction = 0; -+ } else if ((input_exp == 0) && (input_fraction != 0)) { -+ /* input is denormal */ -+ output_exp = 0; -+ output_fraction = (ULong)input_fraction >> (23 - 7); -+ } else { -+ /* result is normal */ -+ output_exp = (input_exp - BF16_BIAS + FLOAT_BIAS) >> (23 - 7); -+ output_fraction = (ULong)input_fraction >> (23 - 7); -+ -+ /* Round result. Look at the 8th bit position of the 32-bit floating -+ pointt fraction. The F16 fraction is only 7 bits wide so if the 8th -+ bit of the F32 is a 1 we need to round up by adding 1 to the output -+ fraction. */ -+ if ((input_fraction & FLOAT_FRAC_BIT8) == FLOAT_FRAC_BIT8) -+ /* Round the F16 fraction up by 1 */ -+ output_fraction = output_fraction + 1; -+ } -+ -+ result = sign >> (31 - 15) | output_exp | output_fraction; -+ return result; -+} - - static Float conv_double_to_float( Double src ) - { -@@ -1942,6 +2061,36 @@ static Float negate_float( Float input ) - return -input; - } - -+/* This C-helper takes a vector of two 32-bit floating point values -+ * and returns a vector containing two 16-bit bfloats. -+ input: word0 word1 -+ output 0x0 hword1 0x0 hword3 -+ Called from generated code. -+ */ -+ULong convert_from_floattobf16_helper( ULong src ) { -+ ULong resultHi, resultLo; -+ -+ resultHi = (ULong)conv_float_to_bf16( (UInt)(src >> 32)); -+ resultLo = (ULong)conv_float_to_bf16( (UInt)(src & 0xFFFFFFFF)); -+ return (resultHi << 32) | resultLo; -+ -+} -+ -+/* This C-helper takes a vector of two 16-bit bfloating point values -+ * and returns a vector containing one 32-bit float. -+ input: 0x0 hword1 0x0 hword3 -+ output: word0 word1 -+ */ -+ULong convert_from_bf16tofloat_helper( ULong src ) { -+ ULong result; -+ union convert_t conv; -+ conv.f = conv_bf16_to_float( (UInt)(src >> 32) ); -+ result = (ULong) conv.u32; -+ conv.f = conv_bf16_to_float( (UInt)(src & 0xFFFFFFFF)); -+ result = (result << 32) | (ULong) conv.u32; -+ return result; -+ } -+ - void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst, - UInt offset_ACC, - ULong srcA_hi, ULong srcA_lo, -@@ -2002,24 +2151,44 @@ void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst, - srcB_word[0][j] = (UInt)((srcB_lo >> (16-16*j)) & mask); - } - -+ /* Note the isa is not consistent in the src naming. Will use the -+ naming src10, src11, src20, src21 used with xvf16ger2 instructions. -+ */ - for( j = 0; j < 4; j++) { - if (((pmsk >> 1) & 0x1) == 0) { - src10 = 0; - src20 = 0; - } else { -- src10 = conv_f16_to_double((ULong)srcA_word[i][0]); -- src20 = conv_f16_to_double((ULong)srcB_word[j][0]); -+ if (( inst == XVF16GER2 ) || ( inst == XVF16GER2PP ) -+ || ( inst == XVF16GER2PN ) || ( inst == XVF16GER2NP ) -+ || ( inst == XVF16GER2NN )) { -+ src10 = conv_f16_to_double((ULong)srcA_word[i][0]); -+ src20 = conv_f16_to_double((ULong)srcB_word[j][0]); -+ } else { -+ /* Input is in bfloat format, result is stored in the -+ "traditional" 64-bit float format. */ -+ src10 = (double)conv_bf16_to_float((ULong)srcA_word[i][0]); -+ src20 = (double)conv_bf16_to_float((ULong)srcB_word[j][0]); -+ } - } - - if ((pmsk & 0x1) == 0) { - src11 = 0; - src21 = 0; - } else { -- src11 = conv_f16_to_double((ULong)srcA_word[i][1]); -- src21 = conv_f16_to_double((ULong)srcB_word[j][1]); -+ if (( inst == XVF16GER2 ) || ( inst == XVF16GER2PP ) -+ || ( inst == XVF16GER2PN ) || ( inst == XVF16GER2NP ) -+ || ( inst == XVF16GER2NN )) { -+ src11 = conv_f16_to_double((ULong)srcA_word[i][1]); -+ src21 = conv_f16_to_double((ULong)srcB_word[j][1]); -+ } else { -+ /* Input is in bfloat format, result is stored in the -+ "traditional" 64-bit float format. */ -+ src11 = (double)conv_bf16_to_float((ULong)srcA_word[i][1]); -+ src21 = (double)conv_bf16_to_float((ULong)srcB_word[j][1]); -+ } - } - -- - prod = src10 * src20; - msum = prod + src11 * src21; - -@@ -2027,26 +2196,26 @@ void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst, - /* Note, we do not track the exception handling bits - ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */ - -- if ( inst == XVF16GER2 ) -+ if (( inst == XVF16GER2 ) || ( inst == XVBF16GER2 ) ) - result[j] = reinterpret_float_as_int( - conv_double_to_float(msum) ); - -- else if ( inst == XVF16GER2PP ) -+ else if (( inst == XVF16GER2PP ) || (inst == XVBF16GER2PP )) - result[j] = reinterpret_float_as_int( - conv_double_to_float(msum) - + acc_word[j] ); - -- else if ( inst == XVF16GER2PN ) -+ else if (( inst == XVF16GER2PN ) || ( inst == XVBF16GER2PN )) - result[j] = reinterpret_float_as_int( - conv_double_to_float(msum) - + negate_float( acc_word[j] ) ); - -- else if ( inst == XVF16GER2NP ) -+ else if (( inst == XVF16GER2NP ) || ( inst == XVBF16GER2NP )) - result[j] = reinterpret_float_as_int( - conv_double_to_float( negate_double( msum ) ) - + acc_word[j] ); - -- else if ( inst == XVF16GER2NN ) -+ else if (( inst == XVF16GER2NN ) || ( inst == XVBF16GER2NN )) - result[j] = reinterpret_float_as_int( - conv_double_to_float( negate_double( msum ) ) - + negate_float( acc_word[j] ) ); -diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c -index 354be6b53..20553a539 100644 ---- a/VEX/priv/guest_ppc_toIR.c -+++ b/VEX/priv/guest_ppc_toIR.c -@@ -5688,6 +5688,57 @@ static IRExpr * convert_from_national ( const VexAbiInfo* vbi, IRExpr *src ) { - return mkexpr( result ); - } - -+static IRExpr * vector_convert_floattobf16 ( const VexAbiInfo* vbi, -+ IRExpr *src ) { -+ /* The function takes 128-bit value containing four 32-bit floats and -+ returns a 128-bit value containint four 16-bit bfloats in the lower -+ halfwords. */ -+ -+ IRTemp resultHi = newTemp( Ity_I64); -+ IRTemp resultLo = newTemp( Ity_I64); -+ -+ assign( resultHi, -+ mkIRExprCCall( Ity_I64, 0 /*regparms*/, -+ "vector_convert_floattobf16_helper", -+ fnptr_to_fnentry( vbi, -+ &convert_from_floattobf16_helper ), -+ mkIRExprVec_1( unop( Iop_V128HIto64, src ) ) ) ); -+ -+ assign( resultLo, -+ mkIRExprCCall( Ity_I64, 0 /*regparms*/, -+ "vector_convert_floattobf16_helper", -+ fnptr_to_fnentry( vbi, -+ &convert_from_floattobf16_helper ), -+ mkIRExprVec_1( unop( Iop_V128to64, src ) ) ) ); -+ -+ return binop( Iop_64HLtoV128, mkexpr( resultHi ), mkexpr( resultLo ) ); -+} -+ -+static IRExpr * vector_convert_bf16tofloat ( const VexAbiInfo* vbi, -+ IRExpr *src ) { -+ /* The function takes 128-bit value containing four 16-bit bfloats in -+ the lower halfwords and returns a 128-bit value containint four -+ 32-bit floats. */ -+ IRTemp resultHi = newTemp( Ity_I64); -+ IRTemp resultLo = newTemp( Ity_I64); -+ -+ assign( resultHi, -+ mkIRExprCCall( Ity_I64, 0 /*regparms*/, -+ "vector_convert_bf16tofloat_helper", -+ fnptr_to_fnentry( vbi, -+ &convert_from_bf16tofloat_helper ), -+ mkIRExprVec_1( unop( Iop_V128HIto64, src ) ) ) ); -+ -+ assign( resultLo, -+ mkIRExprCCall( Ity_I64, 0 /*regparms*/, -+ "vector_convert_bf16tofloat_helper", -+ fnptr_to_fnentry( vbi, -+ &convert_from_bf16tofloat_helper ), -+ mkIRExprVec_1( unop( Iop_V128to64, src ) ) ) ); -+ -+ return binop( Iop_64HLtoV128, mkexpr( resultHi ), mkexpr( resultLo ) ); -+} -+ - static IRExpr * popcnt64 ( const VexAbiInfo* vbi, - IRExpr *src ){ - /* The function takes a 64-bit source and counts the number of bits in the -@@ -5936,6 +5987,7 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi, - case XVI16GER2: - case XVI16GER2S: - case XVF16GER2: -+ case XVBF16GER2: - case XVF32GER: - AT_fx = Ifx_Write; - break; -@@ -5943,6 +5995,10 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi, - case XVI8GER4PP: - case XVI16GER2PP: - case XVI16GER2SPP: -+ case XVBF16GER2PP: -+ case XVBF16GER2PN: -+ case XVBF16GER2NP: -+ case XVBF16GER2NN: - case XVF16GER2PP: - case XVF16GER2PN: - case XVF16GER2NP: -@@ -23899,6 +23955,24 @@ dis_vxs_misc( UInt prefix, UInt theInstr, const VexAbiInfo* vbi, UInt opc2, - mkexpr( sub_element1 ), - mkexpr( sub_element0 ) ) ) ); - -+ } else if ((inst_select == 16) && !prefix) { -+ IRTemp result = newTemp(Ity_V128); -+ UChar xT_addr = ifieldRegXT ( theInstr ); -+ UChar xB_addr = ifieldRegXB ( theInstr ); -+ /* Convert 16-bit bfloat to 32-bit float, not a prefix inst */ -+ DIP("xvcvbf16sp v%u,v%u\n", xT_addr, xB_addr); -+ assign( result, vector_convert_bf16tofloat( vbi, mkexpr( vB ) ) ); -+ putVSReg( XT, mkexpr( result) ); -+ -+ } else if ((inst_select == 17) && !prefix) { -+ IRTemp result = newTemp(Ity_V128); -+ UChar xT_addr = ifieldRegXT ( theInstr ); -+ UChar xB_addr = ifieldRegXB ( theInstr ); -+ /* Convert 32-bit float to 16-bit bfloat, not a prefix inst */ -+ DIP("xvcvspbf16 v%u,v%u\n", xT_addr, xB_addr); -+ assign( result, vector_convert_floattobf16( vbi, mkexpr( vB ) ) ); -+ putVSReg( XT, mkexpr( result) ); -+ - } else if (inst_select == 23) { - DIP("xxbrd v%u, v%u\n", (UInt)XT, (UInt)XB); - -@@ -34956,6 +35030,41 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, - getVSReg( rB_addr ), AT, - ( ( inst_prefix << 8 ) | XO ) ); - break; -+ case XVBF16GER2: -+ DIP("xvbf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, -+ getVSReg( rA_addr ), -+ getVSReg( rB_addr ), AT, -+ ( ( inst_prefix << 8 ) | XO ) ); -+ break; -+ case XVBF16GER2PP: -+ DIP("xvbf16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, -+ getVSReg( rA_addr ), -+ getVSReg( rB_addr ), AT, -+ ( ( inst_prefix << 8 ) | XO ) ); -+ break; -+ case XVBF16GER2PN: -+ DIP("xvbf16ger2pn %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, -+ getVSReg( rA_addr ), -+ getVSReg( rB_addr ), AT, -+ ( ( inst_prefix << 8 ) | XO ) ); -+ break; -+ case XVBF16GER2NP: -+ DIP("xvbf16ger2np %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, -+ getVSReg( rA_addr ), -+ getVSReg( rB_addr ), AT, -+ ( ( inst_prefix << 8 ) | XO ) ); -+ break; -+ case XVBF16GER2NN: -+ DIP("xvbf16ger2nn %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, -+ getVSReg( rA_addr ), -+ getVSReg( rB_addr ), AT, -+ ( ( inst_prefix << 8 ) | XO ) ); -+ break; - case XVF32GER: - DIP("xvf32ger %u,r%u, r%u\n", AT, rA_addr, rB_addr); - vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER, -@@ -35106,6 +35215,61 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, - AT, - ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); - break; -+ case XVBF16GER2: -+ PMSK = IFIELD( prefix, 14, 2); -+ XMSK = IFIELD( prefix, 4, 4); -+ YMSK = IFIELD( prefix, 0, 4); -+ DIP("pmxvbf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, -+ getVSReg( rA_addr ), -+ getVSReg( rB_addr ), -+ AT, ( (MASKS << 9 ) -+ | ( inst_prefix << 8 ) | XO ) ); -+ break; -+ case XVBF16GER2PP: -+ PMSK = IFIELD( prefix, 14, 2); -+ XMSK = IFIELD( prefix, 4, 4); -+ YMSK = IFIELD( prefix, 0, 4); -+ DIP("pmxvbf16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, -+ getVSReg( rA_addr ), -+ getVSReg( rB_addr ), -+ AT, ( (MASKS << 9 ) -+ | ( inst_prefix << 8 ) | XO ) ); -+ break; -+ case XVBF16GER2PN: -+ PMSK = IFIELD( prefix, 14, 2); -+ XMSK = IFIELD( prefix, 4, 4); -+ YMSK = IFIELD( prefix, 0, 4); -+ DIP("pmxvbf16ger2pn %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, -+ getVSReg( rA_addr ), -+ getVSReg( rB_addr ), -+ AT, ( (MASKS << 9 ) -+ | ( inst_prefix << 8 ) | XO ) ); -+ break; -+ case XVBF16GER2NP: -+ PMSK = IFIELD( prefix, 14, 2); -+ XMSK = IFIELD( prefix, 4, 4); -+ YMSK = IFIELD( prefix, 0, 4); -+ DIP("pmxvbf16ger2np %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, -+ getVSReg( rA_addr ), -+ getVSReg( rB_addr ), -+ AT, ( (MASKS << 9 ) -+ | ( inst_prefix << 8 ) | XO ) ); -+ break; -+ case XVBF16GER2NN: -+ PMSK = IFIELD( prefix, 14, 2); -+ XMSK = IFIELD( prefix, 4, 4); -+ YMSK = IFIELD( prefix, 0, 4); -+ DIP("pmxvbf16ger2nn %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, -+ getVSReg( rA_addr ), -+ getVSReg( rB_addr ), -+ AT, ( (MASKS << 9 ) -+ | ( inst_prefix << 8 ) | XO ) ); -+ break; - case XVF16GER2: - PMSK = IFIELD( prefix, 14, 2); - XMSK = IFIELD( prefix, 4, 4); -@@ -36181,6 +36345,11 @@ DisResult disInstr_PPC_WRK ( - (opc2 == XVI4GER8PP) || // xvi4ger8pp - (opc2 == XVI8GER4) || // xvi8ger4 - (opc2 == XVI8GER4PP) || // xvi8ger4pp -+ (opc2 == XVBF16GER2) || // xvbf16ger2 -+ (opc2 == XVBF16GER2PP) || // xvbf16ger2pp -+ (opc2 == XVBF16GER2PN) || // xvbf16ger2pn -+ (opc2 == XVBF16GER2NP) || // xvbf16ger2np -+ (opc2 == XVBF16GER2NN) || // xvbf16ger2nn - (opc2 == XVF16GER2) || // xvf16ger2 - (opc2 == XVF16GER2PP) || // xvf16ger2pp - (opc2 == XVF16GER2PN) || // xvf16ger2pn -commit e09fdaf569b975717465ed8043820d0198d4d47d -Author: Carl Love -Date: Fri Feb 26 16:05:12 2021 -0600 - - PPC64: Reduced-Precision: Missing Integer-based Outer Product Operations - - Add support for: - - pmxvi16ger2 VSX Vector 16-bit Signed Integer GER (rank-2 update), Prefixed - Masked - pmxvi16ger2pp VSX Vector 16-bit Signed Integer GER (rank-2 update) (Positive - multiply, Positive accumulate), Prefixed Masked - pmxvi8ger4spp VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with - Saturation (Positive multiply, Positive accumulate), Prefixed Masked - xvi16ger2 VSX Vector 16-bit Signed Integer GER (rank-2 update) - xvi16ger2pp VSX Vector 16-bit Signed Integer GER (rank-2 update) (Positive - multiply, Positive accumulate) - xvi8ger4spp VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with - Saturation (Positive multiply, Positive accumulate) - -diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c -index 6bcee966d..d8131eb60 100644 ---- a/VEX/priv/guest_ppc_helpers.c -+++ b/VEX/priv/guest_ppc_helpers.c -@@ -1446,16 +1446,16 @@ static UInt exts4( UInt src) - return src & 0xF; /* make sure high order bits are zero */ - } - --static UInt exts8( UInt src) -+static ULong exts8( UInt src) - { -- /* Input is an 8-bit value. Extend bit 7 to bits [31:8] */ -+ /* Input is an 8-bit value. Extend bit 7 to bits [63:8] */ - if (( src >> 7 ) & 0x1) -- return src | 0xFFFFFF00; /* sign bit is a 1, extend */ -+ return src | 0xFFFFFFFFFFFFFF00ULL; /* sign bit is a 1, extend */ - else - return src & 0xFF; /* make sure high order bits are zero */ - } - --static UInt extz8( UInt src) -+static ULong extz8( UInt src) - { - /* Input is an 8-bit value. Extend src on the left with zeros. */ - return src & 0xFF; /* make sure high order bits are zero */ -@@ -1662,12 +1662,12 @@ void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst, - ULong srcB_hi, ULong srcB_lo, - UInt masks_inst ) - { -- UInt i, j, mask, sum, inst, acc_entry, prefix_inst; -+ UInt i, j, mask, inst, acc_entry, prefix_inst; - - UInt srcA_bytes[4][4]; /* word, byte */ - UInt srcB_bytes[4][4]; /* word, byte */ - UInt acc_word[4]; -- UInt prod0, prod1, prod2, prod3; -+ ULong prod0, prod1, prod2, prod3, sum; - UInt result[4]; - UInt pmsk = 0; - UInt xmsk = 0; -@@ -1742,10 +1742,13 @@ void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst, - sum = prod0 + prod1 + prod2 + prod3; - - if ( inst == XVI8GER4 ) -- result[j] = sum; -+ result[j] = chop64to32( sum ); - - else if ( inst == XVI8GER4PP ) -- result[j] = sum + acc_word[j]; -+ result[j] = chop64to32( sum + acc_word[j] ); -+ -+ else if ( inst == XVI8GER4SPP ) -+ result[j] = clampS64toS32(sum + acc_word[j]); - - } else { - result[j] = 0; -@@ -1821,7 +1824,7 @@ void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst, - else - prod1 = exts16to64( srcA_word[i][1] ) - * exts16to64( srcB_word[j][1] ); -- /* sum is UInt so the result is choped to 32-bits */ -+ - sum = prod0 + prod1; - - if ( inst == XVI16GER2 ) -@@ -1830,13 +1833,11 @@ void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst, - else if ( inst == XVI16GER2S ) - result[j] = clampS64toS32( sum ); - -- else if ( inst == XVI16GER2PP ) { -+ else if ( inst == XVI16GER2PP ) - result[j] = chop64to32( sum + acc_word[j] ); -- } - -- else if ( inst == XVI16GER2SPP ) { -+ else if ( inst == XVI16GER2SPP ) - result[j] = clampS64toS32( sum + acc_word[j] ); -- } - - } else { - result[j] = 0; -diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c -index 20553a539..e54f0f389 100644 ---- a/VEX/priv/guest_ppc_toIR.c -+++ b/VEX/priv/guest_ppc_toIR.c -@@ -5993,6 +5993,7 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi, - break; - case XVI4GER8PP: - case XVI8GER4PP: -+ case XVI8GER4SPP: - case XVI16GER2PP: - case XVI16GER2SPP: - case XVBF16GER2PP: -@@ -34983,6 +34984,12 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, - getVSReg( rA_addr ), getVSReg( rB_addr ), - AT, ( ( inst_prefix << 8 ) | XO ) ); - break; -+ case XVI8GER4SPP: -+ DIP("xvi8ger4spp %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER, -+ getVSReg( rA_addr ), getVSReg( rB_addr ), -+ AT, ( ( inst_prefix << 8 ) | XO ) ); -+ break; - case XVI16GER2S: - DIP("xvi16ger2s %u,r%u, r%u\n", AT, rA_addr, rB_addr); - vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, -@@ -34995,6 +35002,19 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, - getVSReg( rA_addr ), getVSReg( rB_addr ), - AT, ( ( inst_prefix << 8 ) | XO ) ); - break; -+ case XVI16GER2: -+ DIP("xvi16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, -+ getVSReg( rA_addr ), getVSReg( rB_addr ), -+ AT, ( ( inst_prefix << 8 ) | XO ) ); -+ break; -+ case XVI16GER2PP: -+ DIP("xvi16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, -+ getVSReg( rA_addr ), getVSReg( rB_addr ), -+ AT, ( ( inst_prefix << 8 ) | XO ) ); -+ break; -+ - case XVF16GER2: - DIP("xvf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr); - vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, -@@ -35193,6 +35213,39 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, - AT, - ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); - break; -+ case XVI8GER4SPP: -+ PMSK = IFIELD( prefix, 12, 4); -+ XMSK = IFIELD( prefix, 4, 4); -+ YMSK = IFIELD( prefix, 0, 4); -+ DIP("pmxvi8ger4spp %u,r%u, r%u,%u,%u,%u\n", -+ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); -+ vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER, -+ getVSReg( rA_addr ), getVSReg( rB_addr ), -+ AT, -+ ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); -+ break; -+ case XVI16GER2: -+ PMSK = IFIELD( prefix, 12, 4); -+ XMSK = IFIELD( prefix, 4, 4); -+ YMSK = IFIELD( prefix, 0, 4); -+ DIP("pmxvi16ger2 %u,r%u, r%u,%u,%u,%u\n", -+ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, -+ getVSReg( rA_addr ), getVSReg( rB_addr ), -+ AT, -+ ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); -+ break; -+ case XVI16GER2PP: -+ PMSK = IFIELD( prefix, 12, 4); -+ XMSK = IFIELD( prefix, 4, 4); -+ YMSK = IFIELD( prefix, 0, 4); -+ DIP("pmxvi16ger2pp %u,r%u, r%u,%u,%u,%u\n", -+ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); -+ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, -+ getVSReg( rA_addr ), getVSReg( rB_addr ), -+ AT, -+ ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); -+ break; - case XVI16GER2S: - PMSK = IFIELD( prefix, 14, 2); - XMSK = IFIELD( prefix, 4, 4); -@@ -36345,6 +36398,9 @@ DisResult disInstr_PPC_WRK ( - (opc2 == XVI4GER8PP) || // xvi4ger8pp - (opc2 == XVI8GER4) || // xvi8ger4 - (opc2 == XVI8GER4PP) || // xvi8ger4pp -+ (opc2 == XVI8GER4SPP) || // xvi8ger4spp -+ (opc2 == XVI16GER2) || // xvi16ger2 -+ (opc2 == XVI16GER2PP) || // xvi16ger2pp - (opc2 == XVBF16GER2) || // xvbf16ger2 - (opc2 == XVBF16GER2PP) || // xvbf16ger2pp - (opc2 == XVBF16GER2PN) || // xvbf16ger2pn diff --git a/SOURCES/valgrind-3.17.0-s390-prep.patch b/SOURCES/valgrind-3.17.0-s390-prep.patch deleted file mode 100644 index 8f2dbb1..0000000 --- a/SOURCES/valgrind-3.17.0-s390-prep.patch +++ /dev/null @@ -1,2283 +0,0 @@ -commit d74a637206ef5532ccd2ccb2e31ee2762f184e60 -Author: Andreas Arnez -Date: Wed Apr 28 18:52:30 2021 +0200 - - Bug 433863 - s390x: Remove memcheck test cases for cs, cds, and csg - - The fix for bug 429864 - "s390x: C++ atomic test_and_set yields - false-positive memcheck diagnostics" changes the memcheck behavior at - various compare-and-swap instructions. The comparison between the old and - expected value now always yields a defined result, even if the input - values are (partially) undefined. However, some existing test cases - explicitly verify that memcheck complains about the use of uninitialised - values here. These test cases are no longer valid. Remove them. - -diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am -index 67ae8c293..e4e69eb38 100644 ---- a/memcheck/tests/s390x/Makefile.am -+++ b/memcheck/tests/s390x/Makefile.am -@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am - - dist_noinst_SCRIPTS = filter_stderr - --INSN_TESTS = cs csg cds cdsg cu21 cu42 ltgjhe -+INSN_TESTS = cdsg cu21 cu42 ltgjhe - - check_PROGRAMS = $(INSN_TESTS) - -@@ -14,7 +14,3 @@ EXTRA_DIST = \ - AM_CFLAGS += @FLAG_M64@ - AM_CXXFLAGS += @FLAG_M64@ - AM_CCASFLAGS += @FLAG_M64@ -- --cs_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@ --csg_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@ --cds_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@ -diff --git a/memcheck/tests/s390x/cds.c b/memcheck/tests/s390x/cds.c -deleted file mode 100644 -index ec5c533e0..000000000 ---- a/memcheck/tests/s390x/cds.c -+++ /dev/null -@@ -1,82 +0,0 @@ --#include --#include -- --typedef struct { -- uint64_t high; -- uint64_t low; --} quad_word; -- --void --test(quad_word op1_init, uint64_t op2_init, quad_word op3_init) --{ -- int cc; // unused -- quad_word op1 = op1_init; -- uint64_t op2 = op2_init; -- quad_word op3 = op3_init; -- -- __asm__ volatile ( -- "lmg %%r0,%%r1,%1\n\t" -- "lmg %%r2,%%r3,%3\n\t" -- "cds %%r0,%%r2,%2\n\t" // cds 1st,3rd,2nd -- "stmg %%r0,%%r1,%1\n" // store r0,r1 to op1 -- "stmg %%r2,%%r3,%3\n" // store r2,r3 to op3 -- : "=d" (cc), "+QS" (op1), "+QS" (op2), "+QS" (op3) -- : -- : "r0", "r1", "r2", "r3", "cc"); -- --} -- --// Return a quad-word that only bits low[32:63] are undefined --quad_word --make_undefined(void) --{ -- quad_word val; -- -- val.high = 0; -- val.low |= 0xFFFFFFFF00000000ull; -- -- return val; --} -- --void op1_undefined(void) --{ -- quad_word op1, op3; -- uint64_t op2; -- -- // op1 undefined -- op1 = make_undefined(); -- op2 = 42; -- op3.high = op3.low = 0xdeadbeefdeadbabeull; -- test(op1, op2, op3); // complaint --} -- --void op2_undefined(void) --{ -- quad_word op1, op3; -- uint64_t op2; -- -- op1.high = op1.low = 42; -- // op2 undefined -- op3.high = op3.low = 0xdeadbeefdeadbabeull; -- test(op1, op2, op3); // complaint --} -- --void op3_undefined(void) --{ -- quad_word op1, op3; -- uint64_t op2; -- -- op1.high = op1.low = 42; -- op2 = 100; -- op3 = make_undefined(); -- test(op1, op2, op3); // no complaint; op3 is just copied around --} -- --int main () --{ -- op1_undefined(); -- op2_undefined(); -- op3_undefined(); -- -- return 0; --} -diff --git a/memcheck/tests/s390x/cds.stderr.exp b/memcheck/tests/s390x/cds.stderr.exp -deleted file mode 100644 -index e72de94c8..000000000 ---- a/memcheck/tests/s390x/cds.stderr.exp -+++ /dev/null -@@ -1,10 +0,0 @@ --Conditional jump or move depends on uninitialised value(s) -- at 0x........: test (cds.c:17) -- by 0x........: op1_undefined (cds.c:50) -- by 0x........: main (cds.c:77) -- --Conditional jump or move depends on uninitialised value(s) -- at 0x........: test (cds.c:17) -- by 0x........: op2_undefined (cds.c:61) -- by 0x........: main (cds.c:78) -- -diff --git a/memcheck/tests/s390x/cds.stdout.exp b/memcheck/tests/s390x/cds.stdout.exp -deleted file mode 100644 -index e69de29bb..000000000 -diff --git a/memcheck/tests/s390x/cds.vgtest b/memcheck/tests/s390x/cds.vgtest -deleted file mode 100644 -index 5195887e2..000000000 ---- a/memcheck/tests/s390x/cds.vgtest -+++ /dev/null -@@ -1,2 +0,0 @@ --prog: cds --vgopts: -q -diff --git a/memcheck/tests/s390x/cs.c b/memcheck/tests/s390x/cs.c -deleted file mode 100644 -index 9a298cef9..000000000 ---- a/memcheck/tests/s390x/cs.c -+++ /dev/null -@@ -1,32 +0,0 @@ --#include --#include --#include -- --void --test(int32_t op1_init, int32_t op2_init, int32_t op3_init) --{ -- register int32_t op1 asm("8") = op1_init; -- register int32_t op3 asm("9") = op3_init; -- -- int32_t op2 = op2_init; -- int cc = 1; -- -- __asm__ volatile ( -- "cs 8,9,%1\n\t" -- "ipm %0\n\t" -- "srl %0,28\n\t" -- : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3) -- : -- : "cc"); --} -- --int main () --{ -- int op1, op2, op3; -- -- test(op1, 0x10000000, 0x12345678); // complaint -- test(0x10000000, op2, 0x12345678); // complaint -- test(0x10000000, 0x01000000, op3); // no complaint -- -- return 0; --} -diff --git a/memcheck/tests/s390x/cs.stderr.exp b/memcheck/tests/s390x/cs.stderr.exp -deleted file mode 100644 -index e45dc99cd..000000000 ---- a/memcheck/tests/s390x/cs.stderr.exp -+++ /dev/null -@@ -1,8 +0,0 @@ --Conditional jump or move depends on uninitialised value(s) -- at 0x........: test (cs.c:14) -- by 0x........: main (cs.c:27) -- --Conditional jump or move depends on uninitialised value(s) -- at 0x........: test (cs.c:14) -- by 0x........: main (cs.c:28) -- -diff --git a/memcheck/tests/s390x/cs.stdout.exp b/memcheck/tests/s390x/cs.stdout.exp -deleted file mode 100644 -index e69de29bb..000000000 -diff --git a/memcheck/tests/s390x/cs.vgtest b/memcheck/tests/s390x/cs.vgtest -deleted file mode 100644 -index 323cce80c..000000000 ---- a/memcheck/tests/s390x/cs.vgtest -+++ /dev/null -@@ -1,2 +0,0 @@ --prog: cs --vgopts: -q -diff --git a/memcheck/tests/s390x/csg.c b/memcheck/tests/s390x/csg.c -deleted file mode 100644 -index 7f9d8c88e..000000000 ---- a/memcheck/tests/s390x/csg.c -+++ /dev/null -@@ -1,32 +0,0 @@ --#include --#include --#include -- --void --test(int64_t op1_init, int64_t op2_init, int64_t op3_init) --{ -- register int64_t op1 asm("8") = op1_init; -- register int64_t op3 asm("9") = op3_init; -- -- int64_t op2 = op2_init; -- int cc = 1; -- -- __asm__ volatile ( -- "csg 8,9,%1\n\t" -- "ipm %0\n\t" -- "srl %0,28\n\t" -- : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3) -- : -- : "cc"); --} -- --int main () --{ -- int64_t op1, op2, op3; -- -- test(op1, 0x1000000000000000ull, 0x1234567887654321ull); // complaint -- test(0x1000000000000000ull, op2, 0x1234567887654321ull); // complaint -- test(0x1000000000000000ull, 0x1000000000000000ull, op3); // no complaint -- -- return 0; --} -diff --git a/memcheck/tests/s390x/csg.stderr.exp b/memcheck/tests/s390x/csg.stderr.exp -deleted file mode 100644 -index fda2021ce..000000000 ---- a/memcheck/tests/s390x/csg.stderr.exp -+++ /dev/null -@@ -1,8 +0,0 @@ --Conditional jump or move depends on uninitialised value(s) -- at 0x........: test (csg.c:14) -- by 0x........: main (csg.c:27) -- --Conditional jump or move depends on uninitialised value(s) -- at 0x........: test (csg.c:14) -- by 0x........: main (csg.c:28) -- -diff --git a/memcheck/tests/s390x/csg.stdout.exp b/memcheck/tests/s390x/csg.stdout.exp -deleted file mode 100644 -index e69de29bb..000000000 -diff --git a/memcheck/tests/s390x/csg.vgtest b/memcheck/tests/s390x/csg.vgtest -deleted file mode 100644 -index 6de75c1d6..000000000 ---- a/memcheck/tests/s390x/csg.vgtest -+++ /dev/null -@@ -1,2 +0,0 @@ --prog: csg --vgopts: -q - -commit 18ddcc47c951427efd3b790ba2481159b9bd1598 -Author: Andreas Arnez -Date: Wed Apr 7 16:48:29 2021 +0200 - - s390x: Support "expensive" comparisons Iop_ExpCmpNE32/64 - - Add support for Iop_ExpCmpNE32 and Iop_ExpCmpNE64 in the s390x instruction - selector. Handle them exactly like the "inexpensive" variants Iop_CmpNE32 - and Iop_CmpNE64. - -diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c -index 2000ec224..5f79280c0 100644 ---- a/VEX/priv/host_s390_isel.c -+++ b/VEX/priv/host_s390_isel.c -@@ -3611,6 +3611,8 @@ s390_isel_cc(ISelEnv *env, IRExpr *cond) - - case Iop_CmpNE32: - case Iop_CmpNE64: -+ case Iop_ExpCmpNE32: -+ case Iop_ExpCmpNE64: - case Iop_CasCmpNE32: - case Iop_CasCmpNE64: - result = S390_CC_NE; - -commit 5db3f929c43bf46f4707178706cfe90f43acdd19 -Author: Andreas Arnez -Date: Wed Apr 7 12:30:20 2021 +0200 - - s390x: Add convenience function mkV128() - - Provide mkV128() as a short-hand notation for creating a vector constant from - a bit pattern, similar to other such functions like mkU64(). - -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index 339377007..7d54cb551 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -376,6 +376,13 @@ mkU64(ULong value) - return IRExpr_Const(IRConst_U64(value)); - } - -+/* Create an expression node for a 128-bit vector constant */ -+static __inline__ IRExpr * -+mkV128(UShort value) -+{ -+ return IRExpr_Const(IRConst_V128(value)); -+} -+ - /* Create an expression node for a 32-bit floating point constant - whose value is given by a bit pattern. */ - static __inline__ IRExpr * -@@ -16249,7 +16256,7 @@ s390_irgen_VLGV(UChar r1, IRTemp op2addr, UChar v3, UChar m4) - static const HChar * - s390_irgen_VGBM(UChar v1, UShort i2, UChar m3 __attribute__((unused))) - { -- put_vr_qw(v1, IRExpr_Const(IRConst_V128(i2))); -+ put_vr_qw(v1, mkV128(i2)); - - return "vgbm"; - } -@@ -18160,11 +18167,11 @@ s390_irgen_VSUM(UChar v1, UChar v2, UChar v3, UChar m4) - switch(type) { - case Ity_I8: - sum = unop(Iop_PwAddL16Ux8, unop(Iop_PwAddL8Ux16, get_vr_qw(v2))); -- mask = IRExpr_Const(IRConst_V128(0b0001000100010001)); -+ mask = mkV128(0b0001000100010001); - break; - case Ity_I16: - sum = unop(Iop_PwAddL16Ux8, get_vr_qw(v2)); -- mask = IRExpr_Const(IRConst_V128(0b0011001100110011)); -+ mask = mkV128(0b0011001100110011); - break; - default: - vpanic("s390_irgen_VSUM: invalid type "); -@@ -18185,11 +18192,11 @@ s390_irgen_VSUMG(UChar v1, UChar v2, UChar v3, UChar m4) - switch(type) { - case Ity_I16: - sum = unop(Iop_PwAddL32Ux4, unop(Iop_PwAddL16Ux8, get_vr_qw(v2))); -- mask = IRExpr_Const(IRConst_V128(0b0000001100000011)); -+ mask = mkV128(0b0000001100000011); - break; - case Ity_I32: - sum = unop(Iop_PwAddL32Ux4, get_vr_qw(v2)); -- mask = IRExpr_Const(IRConst_V128(0b0000111100001111)); -+ mask = mkV128(0b0000111100001111); - break; - default: - vpanic("s390_irgen_VSUMG: invalid type "); -@@ -18210,11 +18217,11 @@ s390_irgen_VSUMQ(UChar v1, UChar v2, UChar v3, UChar m4) - switch(type) { - case Ity_I32: - sum = unop(Iop_PwAddL64Ux2, unop(Iop_PwAddL32Ux4, get_vr_qw(v2))); -- mask = IRExpr_Const(IRConst_V128(0b0000000000001111)); -+ mask = mkV128(0b0000000000001111); - break; - case Ity_I64: - sum = unop(Iop_PwAddL64Ux2, get_vr_qw(v2)); -- mask = IRExpr_Const(IRConst_V128(0b0000000011111111)); -+ mask = mkV128(0b0000000011111111); - break; - default: - vpanic("s390_irgen_VSUMQ: invalid type "); -@@ -18943,8 +18950,8 @@ s390_irgen_VFCx(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6, - assign(cond, binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp))); - } - put_vr_qw(v1, mkite(mkexpr(cond), -- IRExpr_Const(IRConst_V128(0xffff)), -- IRExpr_Const(IRConst_V128(0)))); -+ mkV128(0xffff), -+ mkV128(0))); - if (s390_vr_is_cs_set(m6)) { - IRTemp cc = newTemp(Ity_I64); - assign(cc, mkite(mkexpr(cond), mkU64(0), mkU64(3))); - -commit e78bd78d3043729033b426218ab8c6dae9c51e96 -Author: Andreas Arnez -Date: Thu Mar 18 18:01:10 2021 +0100 - - Bug 434296 - s390x: Rework IR conversion of VSTRC, VFAE, and VFEE - - The z/Architecture instructions "vector string range compare" (VSTRC), - "vector find any element equal" (VFAE), and "vector find element - equal" (VFEE) are each implemented with a dirty helper that executes the - instruction. Unfortunately this approach leads to memcheck false - positives, because these instructions may yield a defined result even if - parts of the input vectors are undefined. There are multiple ways this - can happen: Wherever the flags in the fourth operand to VSTRC indicate - "match always" or "match never", the corresponding elements in the third - operand don't affect the result. The same is true for the elements - following the first zero-element in the second operand if the ZS flag is - set, or for the elements following the first matching element, if any. - - Re-implement the instructions without dirty helpers and transform into - lengthy IR instead. - -diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h -index 905429015..49b6cd5dd 100644 ---- a/VEX/priv/guest_s390_defs.h -+++ b/VEX/priv/guest_s390_defs.h -@@ -265,11 +265,8 @@ typedef enum { - S390_VEC_OP_INVALID = 0, - S390_VEC_OP_VPKS, - S390_VEC_OP_VPKLS, -- S390_VEC_OP_VFAE, -- S390_VEC_OP_VFEE, - S390_VEC_OP_VFENE, - S390_VEC_OP_VISTR, -- S390_VEC_OP_VSTRC, - S390_VEC_OP_VCEQ, - S390_VEC_OP_VTM, - S390_VEC_OP_VGFM, -diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c -index b71b621ae..63d2e8ce5 100644 ---- a/VEX/priv/guest_s390_helpers.c -+++ b/VEX/priv/guest_s390_helpers.c -@@ -2538,11 +2538,8 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, - {0x00, 0x00}, /* invalid */ - [S390_VEC_OP_VPKS] = {0xe7, 0x97}, - [S390_VEC_OP_VPKLS] = {0xe7, 0x95}, -- [S390_VEC_OP_VFAE] = {0xe7, 0x82}, -- [S390_VEC_OP_VFEE] = {0xe7, 0x80}, - [S390_VEC_OP_VFENE] = {0xe7, 0x81}, - [S390_VEC_OP_VISTR] = {0xe7, 0x5c}, -- [S390_VEC_OP_VSTRC] = {0xe7, 0x8a}, - [S390_VEC_OP_VCEQ] = {0xe7, 0xf8}, - [S390_VEC_OP_VTM] = {0xe7, 0xd8}, - [S390_VEC_OP_VGFM] = {0xe7, 0xb4}, -@@ -2630,8 +2627,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, - - case S390_VEC_OP_VPKS: - case S390_VEC_OP_VPKLS: -- case S390_VEC_OP_VFAE: -- case S390_VEC_OP_VFEE: - case S390_VEC_OP_VFENE: - case S390_VEC_OP_VCEQ: - case S390_VEC_OP_VGFM: -@@ -2645,7 +2640,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, - the_insn.VRR.m5 = d->m5; - break; - -- case S390_VEC_OP_VSTRC: - case S390_VEC_OP_VGFMA: - case S390_VEC_OP_VMAH: - case S390_VEC_OP_VMALH: -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index 7d54cb551..26a947813 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -17156,90 +17156,205 @@ s390_irgen_PPNO(UChar r1, UChar r2) - return "ppno"; - } - --static const HChar * --s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) --{ -- IRDirty* d; -- IRTemp cc = newTemp(Ity_I64); -+enum s390_VStrX { -+ s390_VStrX_VSTRC, -+ s390_VStrX_VFAE, -+ s390_VStrX_VFEE -+}; - -- /* Check for specification exception */ -- vassert(m4 < 3); -+#define S390_VEC_OP3(m, op0, op1, op2) \ -+ (m) == 0 ? op0 : (m) == 1 ? op1 : (m) == 2 ? op2 : Iop_INVALID; - -- s390x_vec_op_details_t details = { .serialized = 0ULL }; -- details.op = S390_VEC_OP_VFAE; -- details.v1 = v1; -- details.v2 = v2; -- details.v3 = v3; -- details.m4 = m4; -- details.m5 = m5; -- -- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", -- &s390x_dirtyhelper_vec_op, -- mkIRExprVec_2(IRExpr_GSPTR(), -- mkU64(details.serialized))); -+/* Helper function for transforming VSTRC, VFAE, or VFEE. These instructions -+ share much of the same logic. */ -+static void -+s390_irgen_VStrX(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, -+ UChar m6, enum s390_VStrX which_insn) -+{ -+ IRTemp op2 = newTemp(Ity_V128); -+ IRTemp op3 = newTemp(Ity_V128); -+ IRExpr* tmp; -+ IRExpr* match = NULL; -+ UChar bitwidth = 8 << m5; -+ UChar n_elem = 16 >> m5; -+ IROp sub_op = S390_VEC_OP3(m5, Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4); -+ IROp sar_op = S390_VEC_OP3(m5, Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4); -+ IROp shl_op = S390_VEC_OP3(m5, Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4); -+ IROp dup_op = S390_VEC_OP3(m5, Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4); -+ IROp cmpeq_op = S390_VEC_OP3(m5, Iop_CmpEQ8x16, -+ Iop_CmpEQ16x8, Iop_CmpEQ32x4); -+ IROp cmpgt_op = S390_VEC_OP3(m5, Iop_CmpGT8Ux16, -+ Iop_CmpGT16Ux8, Iop_CmpGT32Ux4); -+ IROp getelem_op = S390_VEC_OP3(m5, Iop_GetElem8x16, -+ Iop_GetElem16x8, Iop_GetElem32x4); -+ -+ assign(op2, get_vr_qw(v2)); -+ assign(op3, get_vr_qw(v3)); -+ -+ switch (which_insn) { -+ -+ case s390_VStrX_VSTRC: { -+ IRTemp op4 = newTemp(Ity_V128); -+ assign(op4, get_vr_qw(v4)); -+ -+ /* Mask off insignificant range boundaries from op3, i.e., all those for -+ which the corresponding field in op4 has all or no bits set ("match -+ always" / "match never"). */ -+ IRTemp bounds = newTemp(Ity_V128); -+ tmp = unop(Iop_NotV128, -+ binop(cmpeq_op, mkV128(0), -+ binop(sar_op, -+ binop(sub_op, -+ binop(sar_op, mkexpr(op4), -+ mkU8(bitwidth - 3)), -+ mkV128(-1)), -+ mkU8(1)))); -+ assign(bounds, binop(Iop_AndV128, mkexpr(op3), tmp)); -+ -+ IRTemp flags_eq = newTemp(Ity_V128); -+ IRTemp flags_lt = newTemp(Ity_V128); -+ IRTemp flags_gt = newTemp(Ity_V128); -+ assign(flags_eq, binop(sar_op, mkexpr(op4), mkU8(bitwidth - 1))); -+ assign(flags_lt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(1)), -+ mkU8(bitwidth - 1))); -+ assign(flags_gt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(2)), -+ mkU8(bitwidth - 1))); -+ -+ for (UChar idx = 0; idx < n_elem; idx += 2) { -+ /* Match according to the even/odd pairs in op3 and op4 at idx */ -+ IRTemp part[2]; -+ -+ for (UChar j = 0; j < 2; j++) { -+ IRTemp a = newTemp(Ity_V128); -+ assign(a, unop(dup_op, -+ binop(getelem_op, mkexpr(bounds), mkU8(idx + j)))); -+ -+ IRExpr* m[] = { -+ binop(cmpeq_op, mkexpr(op2), mkexpr(a)), -+ binop(cmpgt_op, mkexpr(a), mkexpr(op2)), -+ binop(cmpgt_op, mkexpr(op2), mkexpr(a)) -+ }; -+ IRExpr* f[] = { -+ unop(dup_op, binop(getelem_op, mkexpr(flags_eq), mkU8(idx + j))), -+ unop(dup_op, binop(getelem_op, mkexpr(flags_lt), mkU8(idx + j))), -+ unop(dup_op, binop(getelem_op, mkexpr(flags_gt), mkU8(idx + j))) -+ }; -+ part[j] = newTemp(Ity_V128); -+ assign(part[j], binop(Iop_OrV128, -+ binop(Iop_OrV128, -+ binop(Iop_AndV128, f[0], m[0]), -+ binop(Iop_AndV128, f[1], m[1])), -+ binop(Iop_AndV128, f[2], m[2]))); -+ } -+ tmp = binop(Iop_AndV128, mkexpr(part[0]), mkexpr(part[1])); -+ match = idx == 0 ? tmp : binop(Iop_OrV128, match, tmp); -+ } -+ break; -+ } - -- d->nFxState = 3; -- vex_bzero(&d->fxState, sizeof(d->fxState)); -- d->fxState[0].fx = Ifx_Read; -- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); -- d->fxState[0].size = sizeof(V128); -- d->fxState[1].fx = Ifx_Read; -- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); -- d->fxState[1].size = sizeof(V128); -- d->fxState[2].fx = Ifx_Write; -- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -- d->fxState[2].size = sizeof(V128); -+ case s390_VStrX_VFAE: -+ for (UChar idx = 0; idx < n_elem; idx++) { -+ IRTemp a = newTemp(Ity_V128); -+ assign(a, binop(cmpeq_op, mkexpr(op2), -+ unop(dup_op, -+ binop(getelem_op, mkexpr(op3), mkU8(idx))))); -+ match = idx == 0 ? mkexpr(a) : binop(Iop_OrV128, match, mkexpr(a)); -+ } -+ break; - -- stmt(IRStmt_Dirty(d)); -+ case s390_VStrX_VFEE: -+ match = binop(cmpeq_op, mkexpr(op2), mkexpr(op3)); -+ break; - -- if (s390_vr_is_cs_set(m5)) { -- s390_cc_set(cc); -+ default: -+ vpanic("s390_irgen_VStrX: unknown insn"); - } - -- return "vfae"; --} -+ /* Invert first intermediate result if requested */ -+ if (m6 & 8) -+ match = unop(Iop_NotV128, match); - --static const HChar * --s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) --{ -- IRDirty* d; -- IRTemp cc = newTemp(Ity_I64); -+ IRTemp inter1 = newTemp(Ity_V128); -+ IRTemp inter2 = newTemp(Ity_V128); -+ IRTemp accu = newTemp(Ity_V128); -+ assign(inter1, match); - -- /* Check for specification exception */ -- vassert(m4 < 3); -- vassert((m5 & 0b1100) == 0); -+ /* Determine second intermediate and accumulated result */ -+ if (s390_vr_is_zs_set(m6)) { -+ assign(inter2, binop(cmpeq_op, mkexpr(op2), mkV128(0))); -+ assign(accu, binop(Iop_OrV128, mkexpr(inter1), mkexpr(inter2))); -+ } else { -+ assign(inter2, mkV128(0)); -+ assign(accu, mkexpr(inter1)); -+ } - -- s390x_vec_op_details_t details = { .serialized = 0ULL }; -- details.op = S390_VEC_OP_VFEE; -- details.v1 = v1; -- details.v2 = v2; -- details.v3 = v3; -- details.m4 = m4; -- details.m5 = m5; -+ IRTemp accu0 = newTemp(Ity_I64); -+ IRTemp is_match0 = newTemp(Ity_I1); -+ IRTemp mismatch_bits = newTemp(Ity_I64); - -- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", -- &s390x_dirtyhelper_vec_op, -- mkIRExprVec_2(IRExpr_GSPTR(), -- mkU64(details.serialized))); -+ assign(accu0, unop(Iop_V128HIto64, mkexpr(accu))); -+ assign(is_match0, binop(Iop_ExpCmpNE64, mkexpr(accu0), mkU64(0))); -+ assign(mismatch_bits, unop(Iop_ClzNat64, -+ mkite(mkexpr(is_match0), mkexpr(accu0), -+ unop(Iop_V128to64, mkexpr(accu))))); - -- d->nFxState = 3; -- vex_bzero(&d->fxState, sizeof(d->fxState)); -- d->fxState[0].fx = Ifx_Read; -- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); -- d->fxState[0].size = sizeof(V128); -- d->fxState[1].fx = Ifx_Read; -- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); -- d->fxState[1].size = sizeof(V128); -- d->fxState[2].fx = Ifx_Write; -- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -- d->fxState[2].size = sizeof(V128); -+ if (m6 & 4) { -+ put_vr_qw(v1, mkexpr(inter1)); -+ } else { -+ /* Determine byte position of first match */ -+ tmp = binop(Iop_Add64, -+ binop(Iop_Shr64, mkexpr(mismatch_bits), mkU8(3)), -+ mkite(mkexpr(is_match0), mkU64(0), mkU64(8))); -+ put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0))); -+ } - -- stmt(IRStmt_Dirty(d)); -+ if (s390_vr_is_cs_set(m6)) { -+ /* Set condition code depending on... -+ zero found -+ n y -+ +------ -+ match n | 3 0 -+ found y | 1 2 */ - -- if (s390_vr_is_cs_set(m5)) { -+ IRTemp cc = newTemp(Ity_I64); -+ -+ tmp = binop(Iop_Shr64, -+ mkite(mkexpr(is_match0), -+ unop(Iop_V128HIto64, mkexpr(inter1)), -+ unop(Iop_V128to64, mkexpr(inter1))), -+ unop(Iop_64to8, -+ binop(Iop_Sub64, mkU64(63), mkexpr(mismatch_bits)))); -+ tmp = binop(Iop_Shl64, tmp, mkU8(1)); -+ if (s390_vr_is_zs_set(m6)) { -+ tmp = binop(Iop_Xor64, tmp, -+ mkite(binop(Iop_ExpCmpNE64, mkU64(0), -+ binop(Iop_Or64, -+ unop(Iop_V128HIto64, mkexpr(inter2)), -+ unop(Iop_V128to64, mkexpr(inter2)))), -+ mkU64(0), -+ mkU64(3))); -+ } else { -+ tmp = binop(Iop_Xor64, tmp, mkU64(3)); -+ } -+ assign(cc, tmp); - s390_cc_set(cc); - } -+ dis_res->hint = Dis_HintVerbose; -+} - -+static const HChar * -+s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) -+{ -+ s390_insn_assert("vfae", m4 <= 2); -+ s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFAE); -+ return "vfae"; -+} -+ -+static const HChar * -+s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) -+{ -+ s390_insn_assert("vfee", m4 < 3 && m5 == (m5 & 3)); -+ s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFEE); - return "vfee"; - } - -@@ -17406,47 +17521,8 @@ s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5) - static const HChar * - s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) - { -- IRDirty* d; -- IRTemp cc = newTemp(Ity_I64); -- -- /* Check for specification exception */ -- vassert(m5 < 3); -- -- s390x_vec_op_details_t details = { .serialized = 0ULL }; -- details.op = S390_VEC_OP_VSTRC; -- details.v1 = v1; -- details.v2 = v2; -- details.v3 = v3; -- details.v4 = v4; -- details.m4 = m5; -- details.m5 = m6; -- -- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", -- &s390x_dirtyhelper_vec_op, -- mkIRExprVec_2(IRExpr_GSPTR(), -- mkU64(details.serialized))); -- -- d->nFxState = 4; -- vex_bzero(&d->fxState, sizeof(d->fxState)); -- d->fxState[0].fx = Ifx_Read; -- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); -- d->fxState[0].size = sizeof(V128); -- d->fxState[1].fx = Ifx_Read; -- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); -- d->fxState[1].size = sizeof(V128); -- d->fxState[2].fx = Ifx_Read; -- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128); -- d->fxState[2].size = sizeof(V128); -- d->fxState[3].fx = Ifx_Write; -- d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -- d->fxState[3].size = sizeof(V128); -- -- stmt(IRStmt_Dirty(d)); -- -- if (s390_vr_is_cs_set(m6)) { -- s390_cc_set(cc); -- } -- -+ s390_insn_assert("vstrc", m5 <= 2); -+ s390_irgen_VStrX(v1, v2, v3, v4, m5, m6, s390_VStrX_VSTRC); - return "vstrc"; - } - - -commit 4f17a067c4f8245c05611d6e8aa36e8841bab376 -Author: Andreas Arnez -Date: Tue Mar 2 14:12:29 2021 +0100 - - Bug 434296 - s390x: Rework IR conversion of VFENE - - So far the z/Architecture instruction "vector find element not - equal" (VFENE) is transformed to a loop. This can cause spurious - "conditional jump or move depends on uninitialised value(s)" messages by - memcheck. Re-implement without a loop. - -diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h -index 49b6cd5dd..caec3108e 100644 ---- a/VEX/priv/guest_s390_defs.h -+++ b/VEX/priv/guest_s390_defs.h -@@ -265,7 +265,6 @@ typedef enum { - S390_VEC_OP_INVALID = 0, - S390_VEC_OP_VPKS, - S390_VEC_OP_VPKLS, -- S390_VEC_OP_VFENE, - S390_VEC_OP_VISTR, - S390_VEC_OP_VCEQ, - S390_VEC_OP_VTM, -diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c -index 63d2e8ce5..2188ce5c1 100644 ---- a/VEX/priv/guest_s390_helpers.c -+++ b/VEX/priv/guest_s390_helpers.c -@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, - {0x00, 0x00}, /* invalid */ - [S390_VEC_OP_VPKS] = {0xe7, 0x97}, - [S390_VEC_OP_VPKLS] = {0xe7, 0x95}, -- [S390_VEC_OP_VFENE] = {0xe7, 0x81}, - [S390_VEC_OP_VISTR] = {0xe7, 0x5c}, - [S390_VEC_OP_VCEQ] = {0xe7, 0xf8}, - [S390_VEC_OP_VTM] = {0xe7, 0xd8}, -@@ -2627,7 +2626,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, - - case S390_VEC_OP_VPKS: - case S390_VEC_OP_VPKLS: -- case S390_VEC_OP_VFENE: - case S390_VEC_OP_VCEQ: - case S390_VEC_OP_VGFM: - case S390_VEC_OP_VCH: -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index 26a947813..c8dc3ec18 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -17361,120 +17361,86 @@ s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) - static const HChar * - s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) - { -- const Bool negateComparison = True; -- const IRType type = s390_vr_get_type(m4); -+ s390_insn_assert("vfene", m4 < 3 && m5 == (m5 & 3)); - -- /* Check for specification exception */ -- vassert(m4 < 3); -- vassert((m5 & 0b1100) == 0); -- -- static const IROp elementGetters[] = { -- Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4 -+ static const IROp compare_op[3] = { -+ Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4 - }; -- IROp getter = elementGetters[m4]; -- -- static const IROp elementComparators[] = { -- Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32 -+ static const IROp abs_op[3] = { -+ Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4 - }; -- IROp comparator = elementComparators[m4]; -- -- static const IROp resultConverter[] = {Iop_64to8, Iop_64to16, Iop_64to32}; -- IROp converter = resultConverter[m4]; -- -- IRTemp isZeroElem; -- -- IRTemp counter = newTemp(Ity_I64); -- assign(counter, get_counter_dw0()); -- -- IRTemp arg1 = newTemp(type); -- assign(arg1, binop(getter, get_vr_qw(v2), unop(Iop_64to8, mkexpr(counter)))); -- IRTemp arg2 = newTemp(type); -- assign(arg2, binop(getter, get_vr_qw(v3), unop(Iop_64to8, mkexpr(counter)))); -+ IRTemp op2 = newTemp(Ity_V128); -+ IRTemp op3 = newTemp(Ity_V128); -+ IRTemp op2zero = newTemp(Ity_V128); -+ IRTemp diff = newTemp(Ity_V128); -+ IRTemp diff0 = newTemp(Ity_I64); -+ IRTemp neq0 = newTemp(Ity_I1); -+ IRTemp samebits = newTemp(Ity_I64); -+ IRExpr* tmp; - -- IRTemp isGoodPair = newTemp(Ity_I1); -- if(negateComparison) { -- assign(isGoodPair, unop(Iop_Not1, binop(comparator, mkexpr(arg1), -- mkexpr(arg2)))); -- } else { -- assign(isGoodPair, binop(comparator, mkexpr(arg1), mkexpr(arg2))); -- } -+ assign(op2, get_vr_qw(v2)); -+ assign(op3, get_vr_qw(v3)); - -- if(s390_vr_is_zs_set(m5)) { -- isZeroElem = newTemp(Ity_I1); -- assign(isZeroElem, binop(comparator, mkexpr(arg1), -- unop(converter, mkU64(0)))); -+ tmp = mkV128(0); -+ if (s390_vr_is_zs_set(m5)) { -+ tmp = binop(compare_op[m4], mkexpr(op2), tmp); -+ if (s390_vr_is_cs_set(m5) && v3 != v2) { -+ /* Count leading equal bits in the terminating element too */ -+ tmp = unop(abs_op[m4], tmp); -+ } -+ assign(op2zero, tmp); -+ tmp = mkexpr(op2zero); - } -- -- static const UChar invalidIndices[] = {16, 8, 4}; -- const UChar invalidIndex = invalidIndices[m4]; -- IRTemp endOfVectorIsReached = newTemp(Ity_I1); -- assign(endOfVectorIsReached, binop(Iop_CmpEQ64, mkexpr(counter), -- mkU64(invalidIndex))); -- -- put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); -- IRExpr* shouldBreak = binop(Iop_Or32, -- unop(Iop_1Uto32, mkexpr(isGoodPair)), -- unop(Iop_1Uto32, mkexpr(endOfVectorIsReached)) -- ); -- if(s390_vr_is_zs_set(m5)) { -- shouldBreak = binop(Iop_Or32, -- shouldBreak, -- unop(Iop_1Uto32, mkexpr(isZeroElem))); -- } -- iterate_if(binop(Iop_CmpEQ32, shouldBreak, mkU32(0))); -- -- IRExpr* foundIndex = binop(Iop_Sub64, get_counter_dw0(), mkU64(1)); -- if(m4 > 0) { -- /* We should return index of byte but we found index of element in -- general case. -- if byte elem (m4 == 0) then indexOfByte = indexOfElement -- if halfword elem (m4 == 1) then indexOfByte = 2 * indexOfElement -- = indexOfElement << 1 -- if word elem (m4 == 2) then indexOfByte = 4 * indexOfElement -- = indexOfElement << 2 -- */ -- foundIndex = binop(Iop_Shl64, foundIndex, mkU8(m4)); -+ if (v3 != v2) { -+ tmp = binop(Iop_XorV128, mkexpr(op2), mkexpr(op3)); -+ if (s390_vr_is_zs_set(m5)) -+ tmp = binop(Iop_OrV128, tmp, mkexpr(op2zero)); - } - -- IRTemp result = newTemp(Ity_I64); -- assign(result, mkite(mkexpr(endOfVectorIsReached), -- mkU64(16), -- foundIndex)); -- put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0))); -+ assign(diff, tmp); -+ assign(diff0, unop(Iop_V128HIto64, mkexpr(diff))); -+ assign(neq0, binop(Iop_ExpCmpNE64, mkexpr(diff0), mkU64(0))); -+ assign(samebits, unop(Iop_ClzNat64, -+ mkite(mkexpr(neq0), mkexpr(diff0), -+ unop(Iop_V128to64, mkexpr(diff))))); - -+ /* Determine the byte size of the initial equal-elements sequence */ -+ tmp = binop(Iop_Shr64, mkexpr(samebits), mkU8(m4 + 3)); -+ if (m4 != 0) -+ tmp = binop(Iop_Shl64, tmp, mkU8(m4)); -+ tmp = binop(Iop_Add64, tmp, mkite(mkexpr(neq0), mkU64(0), mkU64(8))); -+ put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0))); - - if (s390_vr_is_cs_set(m5)) { -- static const IROp to64Converters[] = {Iop_8Uto64, Iop_16Uto64, Iop_32Uto64}; -- IROp to64Converter = to64Converters[m4]; -- -- IRExpr* arg1IsLessThanArg2 = binop(Iop_CmpLT64U, -- unop(to64Converter, mkexpr(arg1)), -- unop(to64Converter, mkexpr(arg2))); -- -- IRExpr* ccexp = mkite(binop(Iop_CmpEQ32, -- unop(Iop_1Uto32, mkexpr(isGoodPair)), -- mkU32(1)), -- mkite(arg1IsLessThanArg2, mkU64(1), mkU64(2)), -- mkU64(3)); -- -- if(s390_vr_is_zs_set(m5)) { -- IRExpr* arg2IsZero = binop(comparator, mkexpr(arg2), -- unop(converter, mkU64(0))); -- IRExpr* bothArgsAreZero = binop(Iop_And32, -- unop(Iop_1Uto32, mkexpr(isZeroElem)), -- unop(Iop_1Uto32, arg2IsZero)); -- ccexp = mkite(binop(Iop_CmpEQ32, bothArgsAreZero, mkU32(1)), -- mkU64(0), -- ccexp); -- } -+ /* Set condition code like follows -- -+ 0: operands equal up to and including zero element -+ 1: op2 < op3 2: op2 > op3 3: op2 = op3 */ - IRTemp cc = newTemp(Ity_I64); -- assign(cc, ccexp); -- -+ if (v3 == v2) { -+ tmp = mkU64(0); -+ } else { -+ IRTemp shift = newTemp(Ity_I8); -+ IRExpr* op2half = mkite(mkexpr(neq0), -+ unop(Iop_V128HIto64, mkexpr(op2)), -+ unop(Iop_V128to64, mkexpr(op2))); -+ IRExpr* op3half = mkite(mkexpr(neq0), -+ unop(Iop_V128HIto64, mkexpr(op3)), -+ unop(Iop_V128to64, mkexpr(op3))); -+ assign(shift, unop(Iop_64to8, -+ binop(Iop_Sub64, mkU64(63), mkexpr(samebits)))); -+ tmp = binop(Iop_Or64, -+ binop(Iop_Shl64, -+ binop(Iop_And64, mkU64(1), -+ binop(Iop_Shr64, op2half, mkexpr(shift))), -+ mkU8(1)), -+ binop(Iop_And64, mkU64(1), -+ binop(Iop_Shr64, op3half, mkexpr(shift)))); -+ } -+ assign(cc, mkite(binop(Iop_CmpEQ64, mkexpr(samebits), mkU64(64)), -+ mkU64(3), tmp)); - s390_cc_set(cc); - } -- -- -- put_counter_dw0(mkU64(0)); -+ dis_res->hint = Dis_HintVerbose; - return "vfene"; - } - - -commit 9bd78ebd8bb5cd4ebb3f081ceba46836cc485551 -Author: Andreas Arnez -Date: Tue Apr 27 20:13:26 2021 +0200 - - Bug 434296 - s390x: Rework IR conversion of VISTR - - The z/Architecture instruction VISTR is currently transformed to a dirty - helper that executes the instruction. This can cause false positives with - memcheck if the input string contains undefined characters after the - string terminator. Implement without a dirty helper and emulate the - instruction instead. - -diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h -index caec3108e..24f3798c1 100644 ---- a/VEX/priv/guest_s390_defs.h -+++ b/VEX/priv/guest_s390_defs.h -@@ -265,7 +265,6 @@ typedef enum { - S390_VEC_OP_INVALID = 0, - S390_VEC_OP_VPKS, - S390_VEC_OP_VPKLS, -- S390_VEC_OP_VISTR, - S390_VEC_OP_VCEQ, - S390_VEC_OP_VTM, - S390_VEC_OP_VGFM, -diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c -index 2188ce5c1..1e04f601a 100644 ---- a/VEX/priv/guest_s390_helpers.c -+++ b/VEX/priv/guest_s390_helpers.c -@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, - {0x00, 0x00}, /* invalid */ - [S390_VEC_OP_VPKS] = {0xe7, 0x97}, - [S390_VEC_OP_VPKLS] = {0xe7, 0x95}, -- [S390_VEC_OP_VISTR] = {0xe7, 0x5c}, - [S390_VEC_OP_VCEQ] = {0xe7, 0xf8}, - [S390_VEC_OP_VTM] = {0xe7, 0xd8}, - [S390_VEC_OP_VGFM] = {0xe7, 0xb4}, -@@ -2610,14 +2609,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, - the_insn.VRR.op2 = opcodes[d->op][1]; - - switch(d->op) { -- case S390_VEC_OP_VISTR: -- the_insn.VRR.v1 = 1; -- the_insn.VRR.v2 = 2; -- the_insn.VRR.rxb = 0b1100; -- the_insn.VRR.m4 = d->m4; -- the_insn.VRR.m5 = d->m5; -- break; -- - case S390_VEC_OP_VTM: - the_insn.VRR.v1 = 2; - the_insn.VRR.v2 = 3; -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index c8dc3ec18..dfea54259 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -17447,40 +17447,34 @@ s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) - static const HChar * - s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5) - { -- IRDirty* d; -- IRTemp cc = newTemp(Ity_I64); -- -- /* Check for specification exception */ -- vassert(m3 < 3); -- vassert((m5 & 0b1110) == 0); -+ s390_insn_assert("vistr", m3 < 3 && m5 == (m5 & 1)); - -- s390x_vec_op_details_t details = { .serialized = 0ULL }; -- details.op = S390_VEC_OP_VISTR; -- details.v1 = v1; -- details.v2 = v2; -- details.m4 = m3; -- details.m5 = m5; -- -- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", -- &s390x_dirtyhelper_vec_op, -- mkIRExprVec_2(IRExpr_GSPTR(), -- mkU64(details.serialized))); -+ static const IROp compare_op[3] = { -+ Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4 -+ }; -+ IRExpr* t; -+ IRTemp op2 = newTemp(Ity_V128); -+ IRTemp op2term = newTemp(Ity_V128); -+ IRTemp mask = newTemp(Ity_V128); - -- d->nFxState = 2; -- vex_bzero(&d->fxState, sizeof(d->fxState)); -- d->fxState[0].fx = Ifx_Read; -- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); -- d->fxState[0].size = sizeof(V128); -- d->fxState[1].fx = Ifx_Write; -- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -- d->fxState[1].size = sizeof(V128); -+ assign(op2, get_vr_qw(v2)); -+ assign(op2term, binop(compare_op[m3], mkexpr(op2), mkV128(0))); -+ t = mkexpr(op2term); - -- stmt(IRStmt_Dirty(d)); -+ for (UChar i = m3; i < 4; i++) { -+ IRTemp s = newTemp(Ity_V128); -+ assign(s, binop(Iop_OrV128, t, binop(Iop_ShrV128, t, mkU8(8 << i)))); -+ t = mkexpr(s); -+ } -+ assign(mask, unop(Iop_NotV128, t)); -+ put_vr_qw(v1, binop(Iop_AndV128, mkexpr(op2), mkexpr(mask))); - - if (s390_vr_is_cs_set(m5)) { -+ IRTemp cc = newTemp(Ity_I64); -+ assign(cc, binop(Iop_And64, mkU64(3), unop(Iop_V128to64, mkexpr(mask)))); - s390_cc_set(cc); - } -- -+ dis_res->hint = Dis_HintVerbose; - return "vistr"; - } - - -commit 32312d588b77c5b5b5a0145bb0cc6f795b447790 -Author: Andreas Arnez -Date: Fri Apr 16 12:44:44 2021 +0200 - - Bug 434296 - s390x: Add memcheck test cases for vector string insns - - Bug 434296 addresses memcheck false positives with the vector string - instructions VISTR, VSTRC, VFAE, VFEE, and VFENE. Add test cases that - verify the fix for that bug. Without the fix, memcheck yields many - complains with these tests, most of which are false positives. - -diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am -index e4e69eb38..d183841ef 100644 ---- a/memcheck/tests/s390x/Makefile.am -+++ b/memcheck/tests/s390x/Makefile.am -@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am - - dist_noinst_SCRIPTS = filter_stderr - --INSN_TESTS = cdsg cu21 cu42 ltgjhe -+INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr - - check_PROGRAMS = $(INSN_TESTS) - -@@ -14,3 +14,7 @@ EXTRA_DIST = \ - AM_CFLAGS += @FLAG_M64@ - AM_CXXFLAGS += @FLAG_M64@ - AM_CCASFLAGS += @FLAG_M64@ -+ -+vstrc_CFLAGS = $(AM_CFLAGS) -march=z13 -+vfae_CFLAGS = $(AM_CFLAGS) -march=z13 -+vistr_CFLAGS = $(AM_CFLAGS) -march=z13 -diff --git a/memcheck/tests/s390x/vfae.c b/memcheck/tests/s390x/vfae.c -new file mode 100644 -index 000000000..68781e7fb ---- /dev/null -+++ b/memcheck/tests/s390x/vfae.c -@@ -0,0 +1,72 @@ -+#include -+#include -+ -+#define VECTOR __attribute__ ((vector_size (16))) -+ -+typedef char VECTOR char_v; -+ -+volatile char tmp; -+static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV"; -+ -+static char_v to_char_vec(const char *str) -+{ -+ char_v v; -+ char buf[17]; -+ int len = strlen(str); -+ -+ memcpy(buf, str, (len && str[len - 1] == '~') ? len - 1 : len + 1); -+ v = *(char_v *) buf; -+ return v; -+} -+ -+#define GENERATE_TEST(mnem) \ -+static void test_ ## mnem ## _char(const char *str, const char *match, \ -+ int expect_res, int expect_cc) \ -+{ \ -+ int cc; \ -+ char_v v1; \ -+ char_v v2 = to_char_vec(str); \ -+ char_v v3 = to_char_vec(match); \ -+ \ -+ __asm__( \ -+ "cr 0,0\n\t" /* Clear CC */ \ -+ #mnem " %[v1],%[v2],%[v3],0,3\n\t" \ -+ "ipm %[cc]\n\t" \ -+ "srl %[cc],28" \ -+ : [v1] "=v" (v1), \ -+ [cc] "=d" (cc) \ -+ : [v2] "v" (v2), \ -+ [v3] "v" (v3) \ -+ : "cc"); \ -+ \ -+ tmp = hex_digit[v1[7] & 0x1f]; \ -+ if (expect_res >= 0 && v1[7] != expect_res) \ -+ printf("result %u != %d\n", v1[7], expect_res); \ -+ \ -+ tmp = hex_digit[cc & 0xf]; \ -+ if (expect_cc >= 0 && cc != expect_cc) \ -+ printf("CC %d != %d\n", cc, expect_cc); \ -+} -+ -+GENERATE_TEST(vfae) -+ -+GENERATE_TEST(vfee) -+ -+GENERATE_TEST(vfene) -+ -+int main() -+{ -+ test_vfae_char("not found", "................", 9, 0); -+ test_vfae_char("xy", "zzzzzzzzyyyyyyyy", 1, 2); -+ test_vfae_char("incomplete~", "xxxxxxxxxxxxxxxx", -1, -1); -+ -+ test_vfee_char("same char here", "..........here", 10, 2); -+ test_vfee_char("and here too ...", "_________t~", 9, 1); -+ test_vfee_char("equality!~", "========!!~", 8, -1); -+ -+ test_vfene_char("strings equal", "strings equal", 13, 0); -+ test_vfene_char(hex_digit, hex_digit, 16, 3); -+ test_vfene_char("undef~", "undefined", -1, -1); -+ test_vfene_char("active~", "actually ok", 3, 1); -+ return 0; -+} -diff --git a/memcheck/tests/s390x/vfae.stderr.exp b/memcheck/tests/s390x/vfae.stderr.exp -new file mode 100644 -index 000000000..8aad3c87f ---- /dev/null -+++ b/memcheck/tests/s390x/vfae.stderr.exp -@@ -0,0 +1,20 @@ -+Use of uninitialised value of size 8 -+ at 0x........: test_vfae_char (vfae.c:51) -+ by 0x........: main (vfae.c:61) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vfae_char (vfae.c:51) -+ by 0x........: main (vfae.c:61) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vfee_char (vfae.c:53) -+ by 0x........: main (vfae.c:65) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vfene_char (vfae.c:55) -+ by 0x........: main (vfae.c:69) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vfene_char (vfae.c:55) -+ by 0x........: main (vfae.c:69) -+ -diff --git a/memcheck/tests/s390x/vfae.stdout.exp b/memcheck/tests/s390x/vfae.stdout.exp -new file mode 100644 -index 000000000..e69de29bb -diff --git a/memcheck/tests/s390x/vfae.vgtest b/memcheck/tests/s390x/vfae.vgtest -new file mode 100644 -index 000000000..ae36c22fe ---- /dev/null -+++ b/memcheck/tests/s390x/vfae.vgtest -@@ -0,0 +1,2 @@ -+prog: vfae -+vgopts: -q -diff --git a/memcheck/tests/s390x/vistr.c b/memcheck/tests/s390x/vistr.c -new file mode 100644 -index 000000000..7ed59b94b ---- /dev/null -+++ b/memcheck/tests/s390x/vistr.c -@@ -0,0 +1,76 @@ -+#include -+#include -+ -+#define VECTOR __attribute__ ((vector_size (16))) -+ -+typedef char VECTOR char_v; -+ -+volatile char tmp; -+static const char *hex_digit = "0123456789abcdef"; -+ -+static char_v to_char_vec(const char *str, char_v *maskp) -+{ -+ char buf[17]; -+ char_v v; -+ char_v mask = {0}; -+ -+ for (int i = 0; i < sizeof(buf); i++) { -+ char ch = str[i]; -+ if (ch == '\0') -+ break; -+ else if (ch == '$') { -+ buf[i] = '\0'; -+ mask[i] = -1; -+ } else if (ch != '~') { -+ buf[i] = ch; -+ mask[i] = -1; -+ } -+ } -+ v = *(char_v *) buf; -+ *maskp = mask; -+ return v; -+} -+ -+static void test_vistr_char(const char *str, const char *expect_res, -+ int expect_cc) -+{ -+ int cc, count; -+ char_v v1, mask; -+ char_v v2 = to_char_vec(str, &mask); -+ char_v exp_v1 = to_char_vec(expect_res, &mask); -+ char equal[16]; -+ -+ __asm__( -+ "cr 0,0\n\t" /* Clear CC */ -+ "vistr %[v1],%[v2],0,1\n\t" -+ "ipm %[cc]\n\t" -+ "srl %[cc],28" -+ : [v1] "=v" (v1), -+ [cc] "=d" (cc) -+ : [v2] "v" (v2) -+ : "cc"); -+ -+ *(char_v *) equal = (v1 & mask) == (exp_v1 & mask); -+ if (memchr(equal, 0, sizeof(equal))) -+ printf("Result doesn't match `%s'\n", expect_res); -+ -+ count = 0; -+ for (int i = 0; i < 16; i++) { -+ if (v1[i] == 0) count++; -+ } -+ tmp = hex_digit[count]; -+ -+ tmp = hex_digit[cc & 0xf]; -+ if (expect_cc >= 0 && cc != expect_cc) -+ printf("CC %d != %d\n", cc, expect_cc); -+} -+ -+int main() -+{ -+ test_vistr_char("terminated$====~", "terminated$$$$$$", 0); -+ test_vistr_char("undef~~~~~~~~~~~", "undef", -1); -+ test_vistr_char("undef, 2nd half~", "undef, 2nd half", -1); -+ test_vistr_char("Not. Terminated.", "Not. Terminated.", 3); -+ test_vistr_char("partiallyOK~~$~~", "partiallyOK~~$$$", 0); -+ return 0; -+} -diff --git a/memcheck/tests/s390x/vistr.stderr.exp b/memcheck/tests/s390x/vistr.stderr.exp -new file mode 100644 -index 000000000..e4f35fd74 ---- /dev/null -+++ b/memcheck/tests/s390x/vistr.stderr.exp -@@ -0,0 +1,20 @@ -+Conditional jump or move depends on uninitialised value(s) -+ at 0x........: test_vistr_char (vistr.c:59) -+ by 0x........: main (vistr.c:71) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vistr_char (vistr.c:63) -+ by 0x........: main (vistr.c:71) -+ -+Conditional jump or move depends on uninitialised value(s) -+ at 0x........: test_vistr_char (vistr.c:59) -+ by 0x........: main (vistr.c:72) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vistr_char (vistr.c:63) -+ by 0x........: main (vistr.c:72) -+ -+Conditional jump or move depends on uninitialised value(s) -+ at 0x........: test_vistr_char (vistr.c:59) -+ by 0x........: main (vistr.c:74) -+ -diff --git a/memcheck/tests/s390x/vistr.vgtest b/memcheck/tests/s390x/vistr.vgtest -new file mode 100644 -index 000000000..f99749d85 ---- /dev/null -+++ b/memcheck/tests/s390x/vistr.vgtest -@@ -0,0 +1,2 @@ -+prog: vistr -+vgopts: -q -diff --git a/memcheck/tests/s390x/vstrc.c b/memcheck/tests/s390x/vstrc.c -new file mode 100644 -index 000000000..268e2f858 ---- /dev/null -+++ b/memcheck/tests/s390x/vstrc.c -@@ -0,0 +1,92 @@ -+#include -+#include -+ -+#define VECTOR __attribute__ ((vector_size (16))) -+ -+typedef char VECTOR char_v; -+ -+struct vstrc_char_rng { -+ unsigned char range[16]; -+ unsigned char flags[16]; -+}; -+ -+#define RNG_FLAG_EQ 0x80 -+#define RNG_FLAG_LT 0x40 -+#define RNG_FLAG_GT 0x20 -+#define RNG_FLAG_ANY 0xe0 -+#define RNG_FLAG_NONE 0x00 -+ -+volatile char tmp; -+static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV"; -+ -+static void test_vstrc_char(const char *str, const struct vstrc_char_rng *rng, -+ int expect_res, int expect_cc) -+{ -+ int cc; -+ char_v v1; -+ char_v v2 = *(const char_v *) str; -+ char_v v3 = *(const char_v *) rng->range; -+ char_v v4 = *(const char_v *) rng->flags; -+ -+ __asm__( -+ "cr 0,0\n\t" /* Clear CC */ -+ "vstrc %[v1],%[v2],%[v3],%[v4],0,3\n\t" -+ "ipm %[cc]\n\t" -+ "srl %[cc],28" -+ : [v1] "=v" (v1), -+ [cc] "=d" (cc) -+ : [v2] "v" (v2), -+ [v3] "v" (v3), -+ [v4] "v" (v4) -+ : "cc"); -+ -+ tmp = hex_digit[v1[7] & 0x1f]; -+ if (expect_res >= 0 && v1[7] != expect_res) -+ printf("result %u != %d\n", v1[7], expect_res); -+ -+ tmp = hex_digit[cc & 0xf]; -+ if (expect_cc >= 0 && cc != expect_cc) -+ printf("CC %d != %d\n", cc, expect_cc); -+} -+ -+int main() -+{ -+ struct vstrc_char_rng rng; -+ char buf[16]; -+ -+ memset(rng.flags, RNG_FLAG_NONE, 16); -+ -+ rng.range[4] = 'z'; -+ rng.flags[4] = RNG_FLAG_GT | RNG_FLAG_EQ; -+ rng.flags[5] = RNG_FLAG_ANY; -+ /* OK: match at the 'z' */ -+ test_vstrc_char("find the z", &rng, 9, 2); -+ -+ rng.flags[12] = RNG_FLAG_GT | RNG_FLAG_EQ; -+ rng.flags[13] = RNG_FLAG_LT | RNG_FLAG_EQ; -+ /* Bad: undefined range */ -+ test_vstrc_char("undefined", &rng, -1, -1); -+ -+ rng.range[12] = 'a'; -+ rng.range[13] = 'c'; -+ /* OK: match at the 'a' */ -+ test_vstrc_char("get the abc", &rng, 8, 2); -+ -+ rng.flags[12] = RNG_FLAG_LT; -+ rng.flags[13] = RNG_FLAG_GT; -+ /* OK: no match up to null terminator */ -+ test_vstrc_char("no match", &rng, 8, 0); -+ -+ /* OK: no match, no null terminator */ -+ test_vstrc_char("0123456789abcdef", &rng, 16, 3); -+ -+ buf[0] = 'x'; -+ /* Bad: undefined string */ -+ test_vstrc_char(buf, &rng, -1, -1); -+ -+ buf[1] = 'z'; -+ /* Bad: valid match, but CC undefined */ -+ test_vstrc_char(buf, &rng, 1, -1); -+ -+ return 0; -+} -diff --git a/memcheck/tests/s390x/vstrc.stderr.exp b/memcheck/tests/s390x/vstrc.stderr.exp -new file mode 100644 -index 000000000..c1125bea1 ---- /dev/null -+++ b/memcheck/tests/s390x/vstrc.stderr.exp -@@ -0,0 +1,20 @@ -+Use of uninitialised value of size 8 -+ at 0x........: test_vstrc_char (vstrc.c:43) -+ by 0x........: main (vstrc.c:68) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vstrc_char (vstrc.c:47) -+ by 0x........: main (vstrc.c:68) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vstrc_char (vstrc.c:43) -+ by 0x........: main (vstrc.c:85) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vstrc_char (vstrc.c:47) -+ by 0x........: main (vstrc.c:85) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vstrc_char (vstrc.c:47) -+ by 0x........: main (vstrc.c:89) -+ -diff --git a/memcheck/tests/s390x/vstrc.stdout.exp b/memcheck/tests/s390x/vstrc.stdout.exp -new file mode 100644 -index 000000000..e69de29bb -diff --git a/memcheck/tests/s390x/vstrc.vgtest b/memcheck/tests/s390x/vstrc.vgtest -new file mode 100644 -index 000000000..26f5db99b ---- /dev/null -+++ b/memcheck/tests/s390x/vstrc.vgtest -@@ -0,0 +1,2 @@ -+prog: vstrc -+vgopts: -q - -commit a0bb049ace14ab52d386bb1d49a399f39eec4986 -Author: Andreas Arnez -Date: Tue Mar 23 14:55:09 2021 +0100 - - s390x: Improve handling of amodes without base register - - Addressing modes without a base or index register represent constants. - They can occur in some special cases such as shift operations and when - accessing individual vector elements. Perform some minor improvements to - the handling of such amodes. - -diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c -index 6e0734ae0..2587f81a1 100644 ---- a/VEX/priv/host_s390_defs.c -+++ b/VEX/priv/host_s390_defs.c -@@ -360,7 +360,8 @@ s390_amode_is_sane(const s390_amode *am) - { - switch (am->tag) { - case S390_AMODE_B12: -- return is_virtual_gpr(am->b) && fits_unsigned_12bit(am->d); -+ return (is_virtual_gpr(am->b) || sameHReg(am->b, s390_hreg_gpr(0))) && -+ fits_unsigned_12bit(am->d); - - case S390_AMODE_B20: - return is_virtual_gpr(am->b) && fits_signed_20bit(am->d); -@@ -378,47 +379,31 @@ s390_amode_is_sane(const s390_amode *am) - } - } - -+static Bool -+s390_amode_is_constant(const s390_amode *am) -+{ -+ return am->tag == S390_AMODE_B12 && sameHReg(am->b, s390_hreg_gpr(0)); -+} -+ - - /* Record the register use of an amode */ - static void - s390_amode_get_reg_usage(HRegUsage *u, const s390_amode *am) - { -- switch (am->tag) { -- case S390_AMODE_B12: -- case S390_AMODE_B20: -- addHRegUse(u, HRmRead, am->b); -- return; -- -- case S390_AMODE_BX12: -- case S390_AMODE_BX20: -+ if (!sameHReg(am->b, s390_hreg_gpr(0))) - addHRegUse(u, HRmRead, am->b); -+ if (!sameHReg(am->x, s390_hreg_gpr(0))) - addHRegUse(u, HRmRead, am->x); -- return; -- -- default: -- vpanic("s390_amode_get_reg_usage"); -- } - } - - - static void - s390_amode_map_regs(HRegRemap *m, s390_amode *am) - { -- switch (am->tag) { -- case S390_AMODE_B12: -- case S390_AMODE_B20: -- am->b = lookupHRegRemap(m, am->b); -- return; -- -- case S390_AMODE_BX12: -- case S390_AMODE_BX20: -+ if (!sameHReg(am->b, s390_hreg_gpr(0))) - am->b = lookupHRegRemap(m, am->b); -+ if (!sameHReg(am->x, s390_hreg_gpr(0))) - am->x = lookupHRegRemap(m, am->x); -- return; -- -- default: -- vpanic("s390_amode_map_regs"); -- } - } - - -@@ -653,6 +638,16 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off) - insn->variant.alu.dst, vreg_opnd); - } - -+ /* v-vgetelem , */ -+ if (insn->tag == S390_INSN_VEC_AMODEOP -+ && insn->variant.vec_amodeop.tag == S390_VEC_GET_ELEM -+ && insn->size == 8 -+ && sameHReg(insn->variant.vec_amodeop.op1, vreg) -+ && s390_amode_is_constant(insn->variant.vec_amodeop.op2)) { -+ vreg_am->d += 8 * insn->variant.vec_amodeop.op2->d; -+ return s390_insn_load(insn->size, insn->variant.vec_amodeop.dst, vreg_am); -+ } -+ - /* v- , */ - if (insn->tag == S390_INSN_UNOP - && insn->variant.unop.src.tag == S390_OPND_REG -diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c -index 5f79280c0..ceca6836e 100644 ---- a/VEX/priv/host_s390_isel.c -+++ b/VEX/priv/host_s390_isel.c -@@ -312,7 +312,18 @@ s390_isel_amode_wrk(ISelEnv *env, IRExpr *expr, - Bool no_index __attribute__((unused)), - Bool short_displacement) - { -- if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) { -+ if (expr->tag == Iex_Unop && expr->Iex.Unop.op == Iop_8Uto64 && -+ expr->Iex.Unop.arg->tag == Iex_Const) { -+ UChar value = expr->Iex.Unop.arg->Iex.Const.con->Ico.U8; -+ return s390_amode_b12((Int)value, s390_hreg_gpr(0)); -+ -+ } else if (expr->tag == Iex_Const) { -+ ULong value = expr->Iex.Const.con->Ico.U64; -+ if (ulong_fits_unsigned_12bit(value)) { -+ return s390_amode_b12((Int)value, s390_hreg_gpr(0)); -+ } -+ -+ } else if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) { - IRExpr *arg1 = expr->Iex.Binop.arg1; - IRExpr *arg2 = expr->Iex.Binop.arg2; - - -commit fd935e238d907d9c523a311ba795077d95ad6912 -Author: Andreas Arnez -Date: Fri Mar 26 19:27:47 2021 +0100 - - s390x: Rework insn "v-vdup" and add "v-vrep" - - So far the only s390x insn for filling a vector with copies of the same - element is "v-vdup" (S390_VEC_DUPLICATE), which replicates the first - element of its vector argument. This is fairly restrictive and can lead - to unnecessarily long code sequences. - - Redefine "v-vdup" to replicate any scalar value instead. And add - "v-vrep" (S390_INSN_VEC_REPLICATE) for replicating any given element of a - vector. Select the latter for suitable expressions like - - Iop_Dup8x16(Iop_GetElem8x16(vector_expr, i)) - - This improves the generated code for some vector string instructions, - where a lot of element replications are performed. - -diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c -index 2587f81a1..c764d6ef9 100644 ---- a/VEX/priv/host_s390_defs.c -+++ b/VEX/priv/host_s390_defs.c -@@ -670,6 +670,14 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off) - insn->variant.unop.dst, vreg_opnd); - } - -+ /* v-vrep ,, */ -+ if (insn->tag == S390_INSN_VEC_REPLICATE -+ && sameHReg(insn->variant.vec_replicate.op1, vreg)) { -+ vreg_am->d += insn->size * insn->variant.vec_replicate.idx; -+ return s390_insn_unop(insn->size, S390_VEC_DUPLICATE, -+ insn->variant.vec_replicate.dst, vreg_opnd); -+ } -+ - no_match: - return NULL; - } -@@ -1050,6 +1058,11 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn) - addHRegUse(u, HRmRead, insn->variant.vec_triop.op3); - break; - -+ case S390_INSN_VEC_REPLICATE: -+ addHRegUse(u, HRmWrite, insn->variant.vec_replicate.dst); -+ addHRegUse(u, HRmRead, insn->variant.vec_replicate.op1); -+ break; -+ - default: - vpanic("s390_insn_get_reg_usage"); - } -@@ -1433,6 +1446,14 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn) - insn->variant.vec_triop.op3 = - lookupHRegRemap(m, insn->variant.vec_triop.op3); - break; -+ -+ case S390_INSN_VEC_REPLICATE: -+ insn->variant.vec_replicate.dst = -+ lookupHRegRemap(m, insn->variant.vec_replicate.dst); -+ insn->variant.vec_replicate.op1 = -+ lookupHRegRemap(m, insn->variant.vec_replicate.op1); -+ break; -+ - default: - vpanic("s390_insn_map_regs"); - } -@@ -1767,7 +1788,39 @@ emit_VRI_VI(UChar *p, ULong op, UChar v1, UShort i2) - - - static UChar * --emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2) -+emit_VRI_VIM(UChar *p, ULong op, UChar v1, UShort i2, UChar m3) -+{ -+ ULong the_insn = op; -+ ULong rxb = s390_update_rxb(0, 1, &v1); -+ -+ the_insn |= ((ULong)v1) << 36; -+ the_insn |= ((ULong)i2) << 16; -+ the_insn |= ((ULong)m3) << 12; -+ the_insn |= ((ULong)rxb)<< 8; -+ -+ return emit_6bytes(p, the_insn); -+} -+ -+ -+static UChar * -+emit_VRI_VVMM(UChar *p, ULong op, UChar v1, UChar v3, UShort i2, UChar m4) -+{ -+ ULong the_insn = op; -+ ULong rxb = s390_update_rxb(0, 1, &v1); -+ rxb = s390_update_rxb(rxb, 2, &v3); -+ -+ the_insn |= ((ULong)v1) << 36; -+ the_insn |= ((ULong)v3) << 32; -+ the_insn |= ((ULong)i2) << 16; -+ the_insn |= ((ULong)m4) << 12; -+ the_insn |= ((ULong)rxb) << 8; -+ -+ return emit_6bytes(p, the_insn); -+} -+ -+ -+static UChar * -+emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2, UChar m3) - { - ULong the_insn = op; - ULong rxb = s390_update_rxb(0, 1, &v1); -@@ -1776,6 +1829,7 @@ emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2) - the_insn |= ((ULong)x2) << 32; - the_insn |= ((ULong)b2) << 28; - the_insn |= ((ULong)d2) << 16; -+ the_insn |= ((ULong)m3) << 12; - the_insn |= ((ULong)rxb)<< 8; - - return emit_6bytes(p, the_insn); -@@ -5782,7 +5836,7 @@ s390_emit_VL(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2) - if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) - s390_disasm(ENC3(MNM, VR, UDXB), "vl", v1, d2, x2, b2); - -- return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2); -+ return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2, 0); - } - - static UChar * -@@ -5795,13 +5849,23 @@ s390_emit_VLR(UChar *p, UChar v1, UChar v2) - } - - -+static UChar * -+s390_emit_VLREP(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2, UShort m3) -+{ -+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) -+ s390_disasm(ENC4(MNM, VR, UDXB, UINT), "vlrep", v1, d2, x2, b2, m3); -+ -+ return emit_VRX(p, 0xE70000000005ULL, v1, x2, b2, d2, m3); -+} -+ -+ - static UChar * - s390_emit_VST(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2) - { - if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) - s390_disasm(ENC3(MNM, VR, UDXB), "vst", v1, d2, x2, b2); - -- return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2); -+ return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2, 0); - } - - -@@ -5912,15 +5976,24 @@ s390_emit_VPKLS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) - - - static UChar * --s390_emit_VREP(UChar *p, UChar v1, UChar v3, UChar m3) -+s390_emit_VREP(UChar *p, UChar v1, UChar v3, UShort i2, UChar m4) - { - if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) -- s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, 0, m3); -+ s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, i2, m4); - -- return emit_VRR_VVM(p, 0xE7000000004DULL, v1, v3, m3); -+ return emit_VRI_VVMM(p, 0xE7000000004DULL, v1, v3, i2, m4); - } - - -+static UChar * -+s390_emit_VREPI(UChar *p, UChar v1, UShort i2, UChar m3) -+{ -+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) -+ s390_disasm(ENC4(MNM, VR, UINT, UINT), "vrepi", v1, i2, m3); -+ -+ return emit_VRI_VIM(p, 0xE70000000045ULL, v1, i2, m3); -+} -+ - - static UChar * - s390_emit_VUPH(UChar *p, UChar v1, UChar v3, UChar m3) -@@ -7560,6 +7633,20 @@ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t tag, HReg dst, - return insn; - } - -+s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1, -+ UChar idx) -+{ -+ s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn)); -+ -+ insn->tag = S390_INSN_VEC_REPLICATE; -+ insn->size = size; -+ insn->variant.vec_replicate.dst = dst; -+ insn->variant.vec_replicate.op1 = op1; -+ insn->variant.vec_replicate.idx = idx; -+ -+ return insn; -+} -+ - /*---------------------------------------------------------------*/ - /*--- Debug print ---*/ - /*---------------------------------------------------------------*/ -@@ -8284,6 +8371,13 @@ s390_insn_as_string(const s390_insn *insn) - insn->variant.vec_triop.op3); - break; - -+ case S390_INSN_VEC_REPLICATE: -+ s390_sprintf(buf, "%M %R, %R, %I", "v-vrep", -+ insn->variant.vec_replicate.dst, -+ insn->variant.vec_replicate.op1, -+ insn->variant.vec_replicate.idx); -+ break; -+ - default: goto fail; - } - -@@ -9386,6 +9480,56 @@ s390_negate_emit(UChar *buf, const s390_insn *insn) - } - - -+static UChar * -+s390_vec_duplicate_emit(UChar *buf, const s390_insn *insn) -+{ -+ UChar v1 = hregNumber(insn->variant.unop.dst); -+ s390_opnd_RMI opnd = insn->variant.unop.src; -+ UChar r2; -+ -+ switch (opnd.tag) { -+ case S390_OPND_AMODE: { -+ s390_amode* am = opnd.variant.am; -+ UInt b = hregNumber(am->b); -+ UInt x = hregNumber(am->x); -+ UInt d = am->d; -+ -+ if (fits_unsigned_12bit(d)) { -+ return s390_emit_VLREP(buf, v1, x, b, d, -+ s390_getM_from_size(insn->size)); -+ } -+ buf = s390_emit_load_mem(buf, insn->size, R0, am); -+ r2 = R0; -+ goto duplicate_from_gpr; -+ } -+ -+ case S390_OPND_IMMEDIATE: { -+ ULong val = opnd.variant.imm; -+ -+ if (ulong_fits_signed_16bit(val)) { -+ return s390_emit_VREPI(buf, v1, val, s390_getM_from_size(insn->size)); -+ } -+ buf = s390_emit_load_64imm(buf, R0, val); -+ r2 = R0; -+ goto duplicate_from_gpr; -+ } -+ -+ case S390_OPND_REG: -+ r2 = hregNumber(opnd.variant.reg); -+ -+ duplicate_from_gpr: -+ buf = s390_emit_VLVGP(buf, v1, r2, r2); -+ if (insn->size != 8) { -+ buf = s390_emit_VREP(buf, v1, v1, 8 / insn->size - 1, -+ s390_getM_from_size(insn->size)); -+ } -+ return buf; -+ } -+ -+ vpanic("s390_vec_duplicate_emit"); -+} -+ -+ - static UChar * - s390_insn_unop_emit(UChar *buf, const s390_insn *insn) - { -@@ -9405,12 +9549,7 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn) - UShort i2 = insn->variant.unop.src.variant.imm; - return s390_emit_VGBM(buf, v1, i2); - } -- case S390_VEC_DUPLICATE: { -- vassert(insn->variant.unop.src.tag == S390_OPND_REG); -- UChar v1 = hregNumber(insn->variant.unop.dst); -- UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); -- return s390_emit_VREP(buf, v1, v2, s390_getM_from_size(insn->size)); -- } -+ case S390_VEC_DUPLICATE: return s390_vec_duplicate_emit(buf, insn); - case S390_VEC_UNPACKLOWS: { - vassert(insn->variant.unop.src.tag == S390_OPND_REG); - vassert(insn->size < 8); -@@ -11595,6 +11734,16 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn) - } - - -+static UChar * -+s390_insn_vec_replicate_emit(UChar *buf, const s390_insn *insn) -+{ -+ UChar v1 = hregNumber(insn->variant.vec_replicate.dst); -+ UChar v2 = hregNumber(insn->variant.vec_replicate.op1); -+ UShort idx = (UShort) insn->variant.vec_replicate.idx; -+ return s390_emit_VREP(buf, v1, v2, idx, s390_getM_from_size(insn->size)); -+} -+ -+ - Int - emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn, - Bool mode64, VexEndness endness_host, -@@ -11791,6 +11940,11 @@ emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn, - case S390_INSN_VEC_TRIOP: - end = s390_insn_vec_triop_emit(buf, insn); - break; -+ -+ case S390_INSN_VEC_REPLICATE: -+ end = s390_insn_vec_replicate_emit(buf, insn); -+ break; -+ - fail: - default: - vpanic("emit_S390Instr"); -diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h -index 9b69f4d38..063fd3800 100644 ---- a/VEX/priv/host_s390_defs.h -+++ b/VEX/priv/host_s390_defs.h -@@ -166,7 +166,8 @@ typedef enum { - S390_INSN_VEC_AMODEINTOP, - S390_INSN_VEC_UNOP, - S390_INSN_VEC_BINOP, -- S390_INSN_VEC_TRIOP -+ S390_INSN_VEC_TRIOP, -+ S390_INSN_VEC_REPLICATE - } s390_insn_tag; - - -@@ -738,6 +739,11 @@ typedef struct { - HReg op2; /* 128-bit second operand */ - HReg op3; /* 128-bit third operand */ - } vec_triop; -+ struct { -+ HReg dst; /* 128-bit result */ -+ HReg op1; /* 128-bit first operand */ -+ UChar idx; /* index of element to replicate */ -+ } vec_replicate; - } variant; - } s390_insn; - -@@ -853,6 +859,7 @@ s390_insn *s390_insn_vec_binop(UChar size, s390_vec_binop_t, HReg dst, HReg op1, - HReg op2); - s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t, HReg dst, HReg op1, - HReg op2, HReg op3); -+s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1, UChar idx); - - const HChar *s390_insn_as_string(const s390_insn *); - -diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c -index ceca6836e..968122596 100644 ---- a/VEX/priv/host_s390_isel.c -+++ b/VEX/priv/host_s390_isel.c -@@ -3778,12 +3778,12 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) - } - /* --------- UNARY OP --------- */ - case Iex_Unop: { -- UChar size_for_int_arg = 0; - HReg dst = INVALID_HREG; - HReg reg1 = INVALID_HREG; - s390_unop_t vec_unop = S390_UNOP_T_INVALID; - s390_vec_binop_t vec_binop = S390_VEC_BINOP_T_INVALID; - IROp op = expr->Iex.Unop.op; -+ IROp arg_op = Iop_INVALID; - IRExpr* arg = expr->Iex.Unop.arg; - switch(op) { - case Iop_NotV128: -@@ -3839,59 +3839,63 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) - } - - case Iop_Dup8x16: -- size = size_for_int_arg = 1; -- vec_unop = S390_VEC_DUPLICATE; -- goto Iop_V_int_wrk; -+ size = 1; -+ arg_op = Iop_GetElem8x16; -+ goto Iop_V_dup_wrk; - case Iop_Dup16x8: -- size = size_for_int_arg = 2; -- vec_unop = S390_VEC_DUPLICATE; -- goto Iop_V_int_wrk; -+ size = 2; -+ arg_op = Iop_GetElem16x8; -+ goto Iop_V_dup_wrk; - case Iop_Dup32x4: -- size = size_for_int_arg = 4; -- vec_unop = S390_VEC_DUPLICATE; -- goto Iop_V_int_wrk; -+ size = 4; -+ arg_op = Iop_GetElem32x4; -+ goto Iop_V_dup_wrk; -+ -+ Iop_V_dup_wrk: { -+ dst = newVRegV(env); -+ if (arg->tag == Iex_Binop && arg->Iex.Binop.op == arg_op && -+ arg->Iex.Binop.arg2->tag == Iex_Const) { -+ ULong idx; -+ idx = get_const_value_as_ulong(arg->Iex.Binop.arg2-> Iex.Const.con); -+ reg1 = s390_isel_vec_expr(env, arg->Iex.Binop.arg1); -+ addInstr(env, s390_insn_vec_replicate(size, dst, reg1, (UChar)idx)); -+ } else { -+ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg); -+ addInstr(env, s390_insn_unop(size, S390_VEC_DUPLICATE, dst, src)); -+ } -+ return dst; -+ } - - case Iop_Widen8Sto16x8: - size = 1; -- size_for_int_arg = 8; - vec_unop = S390_VEC_UNPACKLOWS; -- goto Iop_V_int_wrk; -+ goto Iop_V_widen_wrk; - case Iop_Widen16Sto32x4: - size = 2; -- size_for_int_arg = 8; - vec_unop = S390_VEC_UNPACKLOWS; -- goto Iop_V_int_wrk; -+ goto Iop_V_widen_wrk; - case Iop_Widen32Sto64x2: - size = 4; -- size_for_int_arg = 8; - vec_unop = S390_VEC_UNPACKLOWS; -- goto Iop_V_int_wrk; -+ goto Iop_V_widen_wrk; - case Iop_Widen8Uto16x8: - size = 1; -- size_for_int_arg = 8; - vec_unop = S390_VEC_UNPACKLOWU; -- goto Iop_V_int_wrk; -+ goto Iop_V_widen_wrk; - case Iop_Widen16Uto32x4: - size = 2; -- size_for_int_arg = 8; - vec_unop = S390_VEC_UNPACKLOWU; -- goto Iop_V_int_wrk; -+ goto Iop_V_widen_wrk; - case Iop_Widen32Uto64x2: - size = 4; -- size_for_int_arg = 8; - vec_unop = S390_VEC_UNPACKLOWU; -- goto Iop_V_int_wrk; -- -- Iop_V_int_wrk: { -- HReg vr1 = vec_generate_zeroes(env); -- s390_amode* amode2 = s390_isel_amode(env, IRExpr_Const(IRConst_U64(0))); -- reg1 = s390_isel_int_expr(env, arg); -+ goto Iop_V_widen_wrk; - -+ Iop_V_widen_wrk: { - vassert(vec_unop != S390_UNOP_T_INVALID); -- addInstr(env, -- s390_insn_vec_amodeintop(size_for_int_arg, S390_VEC_SET_ELEM, -- vr1, amode2, reg1)); -- -+ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg); -+ HReg vr1 = newVRegV(env); -+ addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, vr1, src)); - dst = newVRegV(env); - addInstr(env, s390_insn_unop(size, vec_unop, dst, s390_opnd_reg(vr1))); - return dst; - -commit 6c1cb1a0128b00858b973ef9344e12d6ddbaaf57 -Author: Andreas Arnez -Date: Thu Mar 25 18:48:07 2021 +0100 - - s390x: Add support for emitting "vector or with complement" - - In the instruction selector, look out for IR expressions that fit "vector - or with complement (VOC)". Emit when applicable. - - This slighly reduces the generated code sometimes, such as for certain - vector string instructions, where such expressions occur quite frequently. - -diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c -index c764d6ef9..239d9d299 100644 ---- a/VEX/priv/host_s390_defs.c -+++ b/VEX/priv/host_s390_defs.c -@@ -5907,6 +5907,15 @@ s390_emit_VO(UChar *p, UChar v1, UChar v2, UChar v3) - return emit_VRR_VVV(p, 0xE7000000006aULL, v1, v2, v3); - } - -+static UChar * -+s390_emit_VOC(UChar *p, UChar v1, UChar v2, UChar v3) -+{ -+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) -+ s390_disasm(ENC4(MNM, VR, VR, VR), "voc", v1, v2, v3); -+ -+ return emit_VRR_VVV(p, 0xE7000000006fULL, v1, v2, v3); -+} -+ - static UChar * - s390_emit_VX(UChar *p, UChar v1, UChar v2, UChar v3) - { -@@ -8312,6 +8321,7 @@ s390_insn_as_string(const s390_insn *insn) - case S390_VEC_PACK_SATURU: op = "v-vpacksaturu"; break; - case S390_VEC_COMPARE_EQUAL: op = "v-vcmpeq"; break; - case S390_VEC_OR: op = "v-vor"; break; -+ case S390_VEC_ORC: op = "v-vorc"; break; - case S390_VEC_XOR: op = "v-vxor"; break; - case S390_VEC_AND: op = "v-vand"; break; - case S390_VEC_MERGEL: op = "v-vmergel"; break; -@@ -11609,6 +11619,8 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn) - return s390_emit_VCEQ(buf, v1, v2, v3, s390_getM_from_size(size)); - case S390_VEC_OR: - return s390_emit_VO(buf, v1, v2, v3); -+ case S390_VEC_ORC: -+ return s390_emit_VOC(buf, v1, v2, v3); - case S390_VEC_XOR: - return s390_emit_VX(buf, v1, v2, v3); - case S390_VEC_AND: -diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h -index 063fd3800..dc116106e 100644 ---- a/VEX/priv/host_s390_defs.h -+++ b/VEX/priv/host_s390_defs.h -@@ -366,6 +366,7 @@ typedef enum { - S390_VEC_PACK_SATURU, - S390_VEC_COMPARE_EQUAL, - S390_VEC_OR, -+ S390_VEC_ORC, - S390_VEC_XOR, - S390_VEC_AND, - S390_VEC_MERGEL, -diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c -index 968122596..53d76fe8a 100644 ---- a/VEX/priv/host_s390_isel.c -+++ b/VEX/priv/host_s390_isel.c -@@ -4102,6 +4102,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) - case Iop_OrV128: - size = 16; - vec_binop = S390_VEC_OR; -+ if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) { -+ IRExpr* orig_arg1 = arg1; -+ arg1 = arg2; -+ arg2 = orig_arg1->Iex.Unop.arg; -+ vec_binop = S390_VEC_ORC; -+ } else if (arg2->tag == Iex_Unop && arg2->Iex.Unop.op == Iop_NotV128) { -+ arg2 = arg2->Iex.Unop.arg; -+ vec_binop = S390_VEC_ORC; -+ } - goto Iop_VV_wrk; - - case Iop_XorV128: - -commit 0bd4263326b2d48f782339a9bbe1a069c7de45c7 -Author: Andreas Arnez -Date: Tue Mar 30 17:45:20 2021 +0200 - - s390x: Fix/optimize Iop_64HLtoV128 - - In s390_vr_fill() in guest_s390_toIR.c, filling a vector with two copies - of a 64-bit value is realized with Iop_64HLtoV128, since there is no such - operator as Iop_Dup64x2. But the two args to Iop_64HLtoV128 use the same - expression, referenced twice. Although this hasn't been seen to cause - real trouble yet, it's problematic and potentially inefficient, so change - it: Assign to a temp and pass that twice instead. - - In the instruction selector, if Iop_64HLtoV128 is found to be used for a - duplication as above, select "v-vdup" instead of "v-vinitfromgprs". This - mimicks the behavior we'd get if there actually was an operator - Iop_Dup64x2. - -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index dfea54259..a73dcfb14 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -2299,9 +2299,12 @@ s390_vr_fill(UChar v1, IRExpr *o2) - case Ity_I32: - put_vr_qw(v1, unop(Iop_Dup32x4, o2)); - break; -- case Ity_I64: -- put_vr_qw(v1, binop(Iop_64HLtoV128, o2, o2)); -+ case Ity_I64: { -+ IRTemp val = newTemp(Ity_I64); -+ assign(val, o2); -+ put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(val), mkexpr(val))); - break; -+ } - default: - ppIRType(o2type); - vpanic("s390_vr_fill: invalid IRType"); -diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c -index 53d76fe8a..ee20c6711 100644 ---- a/VEX/priv/host_s390_isel.c -+++ b/VEX/priv/host_s390_isel.c -@@ -4662,12 +4662,16 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) - } - - case Iop_64HLtoV128: -- reg1 = s390_isel_int_expr(env, arg1); -- reg2 = s390_isel_int_expr(env, arg2); -- -- addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS, -- dst, reg1, reg2)); -- -+ if (arg1->tag == Iex_RdTmp && arg2->tag == Iex_RdTmp && -+ arg1->Iex.RdTmp.tmp == arg2->Iex.RdTmp.tmp) { -+ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg1); -+ addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, dst, src)); -+ } else { -+ reg1 = s390_isel_int_expr(env, arg1); -+ reg2 = s390_isel_int_expr(env, arg2); -+ addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS, -+ dst, reg1, reg2)); -+ } - return dst; - - default: - -commit cae5062b05b95e0303b1122a0ea9aadc197e4f0a -Author: Andreas Arnez -Date: Fri May 7 18:13:03 2021 +0200 - - s390x: Add missing stdout.exp for vector string memcheck test - - The file vistr.stdout.exp was missing from commit 32312d588. Add it. - -diff --git a/memcheck/tests/s390x/vistr.stdout.exp b/memcheck/tests/s390x/vistr.stdout.exp -new file mode 100644 -index 000000000..e69de29bb diff --git a/SOURCES/valgrind-3.17.0-s390-z13-vec-fix.patch b/SOURCES/valgrind-3.17.0-s390-z13-vec-fix.patch deleted file mode 100644 index 959e5f8..0000000 --- a/SOURCES/valgrind-3.17.0-s390-z13-vec-fix.patch +++ /dev/null @@ -1,46 +0,0 @@ -commit 124ae6cfa303f0cc71ffd685620cb57c4f8f02bb -Author: Andreas Arnez -Date: Mon Jun 7 14:01:53 2021 +0200 - - s390x: Don't emit "vector or with complement" on z13 - - The z/Architecture instruction "vector or with complement" (VOC) can be - used as an optimization to combine "vector or" with "vector nor". This is - exploited in Valgrind since commit 6c1cb1a0128b00858b973e. However, VOC - requires the vector-enhancements facility 1, which is not installed on a - z13 CPU. Thus Valgrind can now run into SIGILL on z13 when trying to - execute vector string instructions. - - Fix this by suppressing the VOC optimization unless the - vector-enhancements facility 1 is recognized on the host. - -diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c -index ee20c6711..15ca92a6b 100644 ---- a/VEX/priv/host_s390_isel.c -+++ b/VEX/priv/host_s390_isel.c -@@ -4102,14 +4102,17 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) - case Iop_OrV128: - size = 16; - vec_binop = S390_VEC_OR; -- if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) { -- IRExpr* orig_arg1 = arg1; -- arg1 = arg2; -- arg2 = orig_arg1->Iex.Unop.arg; -- vec_binop = S390_VEC_ORC; -- } else if (arg2->tag == Iex_Unop && arg2->Iex.Unop.op == Iop_NotV128) { -- arg2 = arg2->Iex.Unop.arg; -- vec_binop = S390_VEC_ORC; -+ if (s390_host_has_vxe) { -+ if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) { -+ IRExpr* orig_arg1 = arg1; -+ arg1 = arg2; -+ arg2 = orig_arg1->Iex.Unop.arg; -+ vec_binop = S390_VEC_ORC; -+ } else if (arg2->tag == Iex_Unop && -+ arg2->Iex.Unop.op == Iop_NotV128) { -+ arg2 = arg2->Iex.Unop.arg; -+ vec_binop = S390_VEC_ORC; -+ } - } - goto Iop_VV_wrk; - diff --git a/SOURCES/valgrind-3.17.0-s390-z15.patch b/SOURCES/valgrind-3.17.0-s390-z15.patch deleted file mode 100644 index 2ec3c2f..0000000 --- a/SOURCES/valgrind-3.17.0-s390-z15.patch +++ /dev/null @@ -1,2413 +0,0 @@ -From 3fbde55a5696c9273084ee2c44daca752e407597 Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Tue, 26 Jan 2021 15:06:47 +0100 -Subject: [PATCH 01/13] s390x: Misc-insn-3, bitwise logical 3-way instructions - -Add support for the instructions NCRK, NCGRK, NNRK, NNGRK, NORK, NOGRK, -NXRK, NXGRK, OCRK, and OCGRK. Introduce a common helper and use it for -the existing instructions NRK, NGRK, XRK, XGRK, ORK, and OGRK as well. ---- - VEX/priv/guest_s390_toIR.c | 154 ++++++++++++++++++++++++++----------- - 1 file changed, 109 insertions(+), 45 deletions(-) - -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index a73dcfb14..f8afd5b96 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -5022,8 +5022,12 @@ s390_irgen_NGR(UChar r1, UChar r2) - return "ngr"; - } - -+/* Helper for bitwise logical instructions with two 32-bit input operands and a -+ 32-bit output operand. `inv3' and `inv' indicate whether to invert (build -+ bitwise complement of) operand 3 or the result, respectively. */ - static const HChar * --s390_irgen_NRK(UChar r3, UChar r1, UChar r2) -+s390_irgen_logicalK32(UChar r3, UChar r1, UChar r2, -+ const HChar *mnem, IROp op, Bool inv3, Bool inv) - { - IRTemp op2 = newTemp(Ity_I32); - IRTemp op3 = newTemp(Ity_I32); -@@ -5031,15 +5035,19 @@ s390_irgen_NRK(UChar r3, UChar r1, UChar r2) - - assign(op2, get_gpr_w1(r2)); - assign(op3, get_gpr_w1(r3)); -- assign(result, binop(Iop_And32, mkexpr(op2), mkexpr(op3))); -+ IRExpr* tmp = binop(op, mkexpr(op2), -+ inv3 ? unop(Iop_Not32, mkexpr(op3)) : mkexpr(op3)); -+ assign(result, inv ? unop(Iop_Not32, tmp) : tmp); - s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); - put_gpr_w1(r1, mkexpr(result)); - -- return "nrk"; -+ return mnem; - } - -+/* Same as s390_irgen_logicalK32, but for 64-bit operands. */ - static const HChar * --s390_irgen_NGRK(UChar r3, UChar r1, UChar r2) -+s390_irgen_logicalK64(UChar r3, UChar r1, UChar r2, -+ const HChar *mnem, IROp op, Bool inv3, Bool inv) - { - IRTemp op2 = newTemp(Ity_I64); - IRTemp op3 = newTemp(Ity_I64); -@@ -5047,11 +5055,49 @@ s390_irgen_NGRK(UChar r3, UChar r1, UChar r2) - - assign(op2, get_gpr_dw0(r2)); - assign(op3, get_gpr_dw0(r3)); -- assign(result, binop(Iop_And64, mkexpr(op2), mkexpr(op3))); -+ IRExpr* tmp = binop(op, mkexpr(op2), -+ inv3 ? unop(Iop_Not64, mkexpr(op3)) : mkexpr(op3)); -+ assign(result, inv ? unop(Iop_Not64, tmp) : tmp); - s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); - put_gpr_dw0(r1, mkexpr(result)); - -- return "ngrk"; -+ return mnem; -+} -+ -+static const HChar * -+s390_irgen_NRK(UChar r3, UChar r1, UChar r2) -+{ -+ return s390_irgen_logicalK32(r3, r1, r2, "nrk", Iop_And32, False, False); -+} -+ -+static const HChar * -+s390_irgen_NGRK(UChar r3, UChar r1, UChar r2) -+{ -+ return s390_irgen_logicalK64(r3, r1, r2, "ngrk", Iop_And64, False, False); -+} -+ -+static const HChar * -+s390_irgen_NCRK(UChar r3, UChar r1, UChar r2) -+{ -+ return s390_irgen_logicalK32(r3, r1, r2, "ncrk", Iop_And32, True, False); -+} -+ -+static const HChar * -+s390_irgen_NCGRK(UChar r3, UChar r1, UChar r2) -+{ -+ return s390_irgen_logicalK64(r3, r1, r2, "ncgrk", Iop_And64, True, False); -+} -+ -+static const HChar * -+s390_irgen_NNRK(UChar r3, UChar r1, UChar r2) -+{ -+ return s390_irgen_logicalK32(r3, r1, r2, "nnrk", Iop_And32, False, True); -+} -+ -+static const HChar * -+s390_irgen_NNGRK(UChar r3, UChar r1, UChar r2) -+{ -+ return s390_irgen_logicalK64(r3, r1, r2, "nngrk", Iop_And64, False, True); - } - - static const HChar * -@@ -7071,33 +7117,25 @@ s390_irgen_XGR(UChar r1, UChar r2) - static const HChar * - s390_irgen_XRK(UChar r3, UChar r1, UChar r2) - { -- IRTemp op2 = newTemp(Ity_I32); -- IRTemp op3 = newTemp(Ity_I32); -- IRTemp result = newTemp(Ity_I32); -- -- assign(op2, get_gpr_w1(r2)); -- assign(op3, get_gpr_w1(r3)); -- assign(result, binop(Iop_Xor32, mkexpr(op2), mkexpr(op3))); -- s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); -- put_gpr_w1(r1, mkexpr(result)); -- -- return "xrk"; -+ return s390_irgen_logicalK32(r3, r1, r2, "xrk", Iop_Xor32, False, False); - } - - static const HChar * - s390_irgen_XGRK(UChar r3, UChar r1, UChar r2) - { -- IRTemp op2 = newTemp(Ity_I64); -- IRTemp op3 = newTemp(Ity_I64); -- IRTemp result = newTemp(Ity_I64); -+ return s390_irgen_logicalK64(r3, r1, r2, "xgrk", Iop_Xor64, False, False); -+} - -- assign(op2, get_gpr_dw0(r2)); -- assign(op3, get_gpr_dw0(r3)); -- assign(result, binop(Iop_Xor64, mkexpr(op2), mkexpr(op3))); -- s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); -- put_gpr_dw0(r1, mkexpr(result)); -+static const HChar * -+s390_irgen_NXRK(UChar r3, UChar r1, UChar r2) -+{ -+ return s390_irgen_logicalK32(r3, r1, r2, "nxrk", Iop_Xor32, False, True); -+} - -- return "xgrk"; -+static const HChar * -+s390_irgen_NXGRK(UChar r3, UChar r1, UChar r2) -+{ -+ return s390_irgen_logicalK64(r3, r1, r2, "nxgrk", Iop_Xor64, False, True); - } - - static const HChar * -@@ -8920,33 +8958,37 @@ s390_irgen_OGR(UChar r1, UChar r2) - static const HChar * - s390_irgen_ORK(UChar r3, UChar r1, UChar r2) - { -- IRTemp op2 = newTemp(Ity_I32); -- IRTemp op3 = newTemp(Ity_I32); -- IRTemp result = newTemp(Ity_I32); -+ return s390_irgen_logicalK32(r3, r1, r2, "ork", Iop_Or32, False, False); -+} - -- assign(op2, get_gpr_w1(r2)); -- assign(op3, get_gpr_w1(r3)); -- assign(result, binop(Iop_Or32, mkexpr(op2), mkexpr(op3))); -- s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); -- put_gpr_w1(r1, mkexpr(result)); -+static const HChar * -+s390_irgen_OGRK(UChar r3, UChar r1, UChar r2) -+{ -+ return s390_irgen_logicalK64(r3, r1, r2, "ogrk", Iop_Or64, False, False); -+} - -- return "ork"; -+static const HChar * -+s390_irgen_OCRK(UChar r3, UChar r1, UChar r2) -+{ -+ return s390_irgen_logicalK32(r3, r1, r2, "ocrk", Iop_Or32, True, False); - } - - static const HChar * --s390_irgen_OGRK(UChar r3, UChar r1, UChar r2) -+s390_irgen_OCGRK(UChar r3, UChar r1, UChar r2) - { -- IRTemp op2 = newTemp(Ity_I64); -- IRTemp op3 = newTemp(Ity_I64); -- IRTemp result = newTemp(Ity_I64); -+ return s390_irgen_logicalK64(r3, r1, r2, "ocgrk", Iop_Or64, True, False); -+} - -- assign(op2, get_gpr_dw0(r2)); -- assign(op3, get_gpr_dw0(r3)); -- assign(result, binop(Iop_Or64, mkexpr(op2), mkexpr(op3))); -- s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); -- put_gpr_dw0(r1, mkexpr(result)); -+static const HChar * -+s390_irgen_NORK(UChar r3, UChar r1, UChar r2) -+{ -+ return s390_irgen_logicalK32(r3, r1, r2, "nork", Iop_Or32, False, True); -+} - -- return "ogrk"; -+static const HChar * -+s390_irgen_NOGRK(UChar r3, UChar r1, UChar r2) -+{ -+ return s390_irgen_logicalK64(r3, r1, r2, "nogrk", Iop_Or64, False, True); - } - - static const HChar * -@@ -20031,12 +20073,28 @@ s390_decode_4byte_and_irgen(const UChar *bytes) - case 0xb961: s390_format_RRF_U0RR(s390_irgen_CLGRT, RRF2_m3(ovl), - RRF2_r1(ovl), RRF2_r2(ovl), - S390_XMNM_CAB); goto ok; -+ case 0xb964: s390_format_RRF_R0RR2(s390_irgen_NNGRK, RRF4_r3(ovl), -+ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; -+ case 0xb965: s390_format_RRF_R0RR2(s390_irgen_OCGRK, RRF4_r3(ovl), -+ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; -+ case 0xb966: s390_format_RRF_R0RR2(s390_irgen_NOGRK, RRF4_r3(ovl), -+ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; -+ case 0xb967: s390_format_RRF_R0RR2(s390_irgen_NXGRK, RRF4_r3(ovl), -+ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; - case 0xb972: s390_format_RRF_U0RR(s390_irgen_CRT, RRF2_m3(ovl), - RRF2_r1(ovl), RRF2_r2(ovl), - S390_XMNM_CAB); goto ok; - case 0xb973: s390_format_RRF_U0RR(s390_irgen_CLRT, RRF2_m3(ovl), - RRF2_r1(ovl), RRF2_r2(ovl), - S390_XMNM_CAB); goto ok; -+ case 0xb974: s390_format_RRF_R0RR2(s390_irgen_NNRK, RRF4_r3(ovl), -+ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; -+ case 0xb975: s390_format_RRF_R0RR2(s390_irgen_OCRK, RRF4_r3(ovl), -+ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; -+ case 0xb976: s390_format_RRF_R0RR2(s390_irgen_NORK, RRF4_r3(ovl), -+ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; -+ case 0xb977: s390_format_RRF_R0RR2(s390_irgen_NXRK, RRF4_r3(ovl), -+ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; - case 0xb980: s390_format_RRE_RR(s390_irgen_NGR, RRE_r1(ovl), - RRE_r2(ovl)); goto ok; - case 0xb981: s390_format_RRE_RR(s390_irgen_OGR, RRE_r1(ovl), -@@ -20148,6 +20206,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) - case 0xb9e4: s390_format_RRF_R0RR2(s390_irgen_NGRK, RRF4_r3(ovl), - RRF4_r1(ovl), RRF4_r2(ovl)); - goto ok; -+ case 0xb9e5: s390_format_RRF_R0RR2(s390_irgen_NCGRK, RRF4_r3(ovl), -+ RRF4_r1(ovl), RRF4_r2(ovl)); -+ goto ok; - case 0xb9e6: s390_format_RRF_R0RR2(s390_irgen_OGRK, RRF4_r3(ovl), - RRF4_r1(ovl), RRF4_r2(ovl)); - goto ok; -@@ -20178,6 +20239,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) - case 0xb9f4: s390_format_RRF_R0RR2(s390_irgen_NRK, RRF4_r3(ovl), - RRF4_r1(ovl), RRF4_r2(ovl)); - goto ok; -+ case 0xb9f5: s390_format_RRF_R0RR2(s390_irgen_NCRK, RRF4_r3(ovl), -+ RRF4_r1(ovl), RRF4_r2(ovl)); -+ goto ok; - case 0xb9f6: s390_format_RRF_R0RR2(s390_irgen_ORK, RRF4_r3(ovl), - RRF4_r1(ovl), RRF4_r2(ovl)); - goto ok; --- -2.23.0 - -From 748421b31ab6b15cc849bd6b9588ad759b807324 Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Wed, 27 Jan 2021 18:11:06 +0100 -Subject: [PATCH 02/13] s390x: Misc-insn-3, "select" instructions - -Add support for the instructions SELR, SELGR, and SELFHR. ---- - VEX/priv/guest_s390_toIR.c | 43 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 43 insertions(+) - -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index f8afd5b96..41265631b 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -3113,6 +3113,16 @@ s390_format_RRF_FUFF2(const HChar *(*irgen)(UChar, UChar, UChar, UChar), - s390_disasm(ENC5(MNM, FPR, FPR, FPR, UINT), mnm, r1, r2, r3, m4); - } - -+static void -+s390_format_RRF_RURR(const HChar *(*irgen)(UChar, UChar, UChar, UChar), -+ UChar r3, UChar m4, UChar r1, UChar r2) -+{ -+ const HChar *mnm = irgen(r3, m4, r1, r2); -+ -+ if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) -+ s390_disasm(ENC5(MNM, GPR, GPR, GPR, UINT), mnm, r1, r3, r2, m4); -+} -+ - static void - s390_format_RRF_R0RR2(const HChar *(*irgen)(UChar r3, UChar r1, UChar r2), - UChar r3, UChar r1, UChar r2) -@@ -19254,6 +19264,30 @@ s390_irgen_VBPERM(UChar v1, UChar v2, UChar v3) - return "vbperm"; - } - -+static const HChar * -+s390_irgen_SELR(UChar r3, UChar m4, UChar r1, UChar r2) -+{ -+ IRExpr* cond = binop(Iop_CmpNE32, s390_call_calculate_cond(m4), mkU32(0)); -+ put_gpr_w1(r1, mkite(cond, get_gpr_w1(r2), get_gpr_w1(r3))); -+ return "selr"; -+} -+ -+static const HChar * -+s390_irgen_SELGR(UChar r3, UChar m4, UChar r1, UChar r2) -+{ -+ IRExpr* cond = binop(Iop_CmpNE32, s390_call_calculate_cond(m4), mkU32(0)); -+ put_gpr_dw0(r1, mkite(cond, get_gpr_dw0(r2), get_gpr_dw0(r3))); -+ return "selgr"; -+} -+ -+static const HChar * -+s390_irgen_SELFHR(UChar r3, UChar m4, UChar r1, UChar r2) -+{ -+ IRExpr* cond = binop(Iop_CmpNE32, s390_call_calculate_cond(m4), mkU32(0)); -+ put_gpr_w0(r1, mkite(cond, get_gpr_w0(r2), get_gpr_w0(r3))); -+ return "selfhr"; -+} -+ - /* New insns are added here. - If an insn is contingent on a facility being installed also - check whether the list of supported facilities in function -@@ -20163,6 +20197,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) - case 0xb9bd: /* TRTRE */ goto unimplemented; - case 0xb9be: /* SRSTU */ goto unimplemented; - case 0xb9bf: /* TRTE */ goto unimplemented; -+ case 0xb9c0: s390_format_RRF_RURR(s390_irgen_SELFHR, RRF4_r3(ovl), -+ RRF4_m4(ovl), RRF4_r1(ovl), -+ RRF4_r2(ovl)); goto ok; - case 0xb9c8: s390_format_RRF_R0RR2(s390_irgen_AHHHR, RRF4_r3(ovl), - RRF4_r1(ovl), RRF4_r2(ovl)); - goto ok; -@@ -20203,6 +20240,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) - case 0xb9e2: s390_format_RRF_U0RR(s390_irgen_LOCGR, RRF3_r3(ovl), - RRF3_r1(ovl), RRF3_r2(ovl), - S390_XMNM_LOCGR); goto ok; -+ case 0xb9e3: s390_format_RRF_RURR(s390_irgen_SELGR, RRF4_r3(ovl), -+ RRF4_m4(ovl), RRF4_r1(ovl), -+ RRF4_r2(ovl)); goto ok; - case 0xb9e4: s390_format_RRF_R0RR2(s390_irgen_NGRK, RRF4_r3(ovl), - RRF4_r1(ovl), RRF4_r2(ovl)); - goto ok; -@@ -20233,6 +20273,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) - case 0xb9ed: s390_format_RRF_R0RR2(s390_irgen_MSGRKC, RRF4_r3(ovl), - RRF4_r1(ovl), RRF4_r2(ovl)); - goto ok; -+ case 0xb9f0: s390_format_RRF_RURR(s390_irgen_SELR, RRF4_r3(ovl), -+ RRF4_m4(ovl), RRF4_r1(ovl), -+ RRF4_r2(ovl)); goto ok; - case 0xb9f2: s390_format_RRF_U0RR(s390_irgen_LOCR, RRF3_r3(ovl), - RRF3_r1(ovl), RRF3_r2(ovl), - S390_XMNM_LOCR); goto ok; --- -2.23.0 - -From 31cbd583e858f47a86ada087d21a6abc13ba04f2 Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Thu, 28 Jan 2021 19:47:00 +0100 -Subject: [PATCH 03/13] s390x: Misc-insn-3, new POPCNT variant - -Add support for the new POPCNT variant that has bit 0 of the M3 field set -and yields the total number of one bits in its 64-bit operand. ---- - VEX/priv/guest_s390_toIR.c | 44 ++++++++++++++++++++++++++------------ - 1 file changed, 30 insertions(+), 14 deletions(-) - -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index 41265631b..ca9e6dc03 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -3073,6 +3073,20 @@ s390_format_RRF_U0RR(const HChar *(*irgen)(UChar m3, UChar r1, UChar r2), - s390_disasm(ENC3(XMNM, GPR, GPR), xmnm_kind, m3, r1, r2); - } - -+static void -+s390_format_RRFa_U0RR(const HChar *(*irgen)(UChar m3, UChar r1, UChar r2), -+ UChar m3, UChar r1, UChar r2) -+{ -+ const HChar *mnm = irgen(m3, r1, r2); -+ -+ if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) { -+ if (m3 != 0) -+ s390_disasm(ENC4(MNM, GPR, GPR, UINT), mnm, r1, r2, m3); -+ else -+ s390_disasm(ENC3(MNM, GPR, GPR), mnm, r1, r2); -+ } -+} -+ - static void - s390_format_RRF_F0FF2(const HChar *(*irgen)(UChar, UChar, UChar), - UChar r3, UChar r1, UChar r2) -@@ -15112,30 +15126,32 @@ s390_irgen_FLOGR(UChar r1, UChar r2) - } - - static const HChar * --s390_irgen_POPCNT(UChar r1, UChar r2) -+s390_irgen_POPCNT(UChar m3, UChar r1, UChar r2) - { -- Int i; -+ s390_insn_assert("popcnt", (m3 & 7) == 0); -+ -+ static const ULong masks[] = { -+ 0x5555555555555555, 0x3333333333333333, 0x0F0F0F0F0F0F0F0F, -+ 0x00FF00FF00FF00FF, 0x0000FFFF0000FFFF, 0x00000000FFFFFFFF, -+ }; -+ Int i, n; - IRTemp val = newTemp(Ity_I64); -- IRTemp mask[3]; - - assign(val, get_gpr_dw0(r2)); -- for (i = 0; i < 3; i++) { -- mask[i] = newTemp(Ity_I64); -- } -- assign(mask[0], mkU64(0x5555555555555555ULL)); -- assign(mask[1], mkU64(0x3333333333333333ULL)); -- assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL)); -- for (i = 0; i < 3; i++) { -+ n = (m3 & 8) ? 6 : 3; -+ for (i = 0; i < n; i++) { -+ IRTemp mask = newTemp(Ity_I64); - IRTemp tmp = newTemp(Ity_I64); - -+ assign (mask, mkU64(masks[i])); - assign(tmp, - binop(Iop_Add64, - binop(Iop_And64, - mkexpr(val), -- mkexpr(mask[i])), -+ mkexpr(mask)), - binop(Iop_And64, - binop(Iop_Shr64, mkexpr(val), mkU8(1 << i)), -- mkexpr(mask[i])))); -+ mkexpr(mask)))); - val = tmp; - } - s390_cc_thunk_putZ(S390_CC_OP_BITWISE, val); -@@ -20235,8 +20251,8 @@ s390_decode_4byte_and_irgen(const UChar *bytes) - case 0xb9e0: s390_format_RRF_U0RR(s390_irgen_LOCFHR, RRF3_r3(ovl), - RRF3_r1(ovl), RRF3_r2(ovl), - S390_XMNM_LOCFHR); goto ok; -- case 0xb9e1: s390_format_RRE_RR(s390_irgen_POPCNT, RRE_r1(ovl), -- RRE_r2(ovl)); goto ok; -+ case 0xb9e1: s390_format_RRFa_U0RR(s390_irgen_POPCNT, RRF3_r3(ovl), -+ RRF3_r1(ovl), RRF3_r2(ovl)); goto ok; - case 0xb9e2: s390_format_RRF_U0RR(s390_irgen_LOCGR, RRF3_r3(ovl), - RRF3_r1(ovl), RRF3_r2(ovl), - S390_XMNM_LOCGR); goto ok; --- -2.23.0 - -From 64352d57f93711ce76fd481558dcf6d65e26b19f Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Fri, 29 Jan 2021 20:13:05 +0100 -Subject: [PATCH 04/13] s390x: Misc-insn-3, MVCRL - -Add support for the "move right to left" instruction MVCRL. ---- - VEX/priv/guest_s390_toIR.c | 47 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 47 insertions(+) - -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index ca9e6dc03..9f7d98f8c 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -3562,6 +3562,25 @@ s390_format_SS_L0RDRD(const HChar *(*irgen)(UChar, IRTemp, IRTemp), - s390_disasm(ENC3(MNM, UDLB, UDXB), mnm, d1, l, b1, d2, 0, b2); - } - -+static void -+s390_format_SSE_RDRD(const HChar *(*irgen)(IRTemp, IRTemp), -+ UChar b1, UShort d1, UChar b2, UShort d2) -+{ -+ const HChar *mnm; -+ IRTemp op1addr = newTemp(Ity_I64); -+ IRTemp op2addr = newTemp(Ity_I64); -+ -+ assign(op1addr, binop(Iop_Add64, mkU64(d1), b1 != 0 ? get_gpr_dw0(b1) : -+ mkU64(0))); -+ assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) : -+ mkU64(0))); -+ -+ mnm = irgen(op1addr, op2addr); -+ -+ if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) -+ s390_disasm(ENC2(UDXB, UDXB), mnm, d1, 0, b1, d2, 0, b2); -+} -+ - static void - s390_format_SIL_RDI(const HChar *(*irgen)(UShort i2, IRTemp op1addr), - UChar b1, UShort d1, UShort i2) -@@ -13667,6 +13686,31 @@ s390_irgen_MVCIN(UChar length, IRTemp start1, IRTemp start2) - return "mvcin"; - } - -+static const HChar * -+s390_irgen_MVCRL(IRTemp op1addr, IRTemp op2addr) -+{ -+ IRTemp counter = newTemp(Ity_I64); -+ IRTemp offset = newTemp(Ity_I64); -+ -+ assign(counter, get_counter_dw0()); -+ /* offset = length - 1 - counter, where length-1 is specified in r0 */ -+ assign(offset, -+ binop(Iop_Sub64, -+ unop(Iop_16Uto64, -+ binop(Iop_And16, get_gpr_hw3(0), mkU16(0xfff))), -+ mkexpr(counter))); -+ -+ store(binop(Iop_Add64, mkexpr(op1addr), mkexpr(offset)), -+ load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkexpr(offset)))); -+ -+ /* Check for end of field */ -+ put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); -+ iterate_if(binop(Iop_CmpNE64, mkexpr(offset), mkU64(0))); -+ put_counter_dw0(mkU64(0)); -+ -+ return "mvcrl"; -+} -+ - static const HChar * - s390_irgen_MVCL(UChar r1, UChar r2) - { -@@ -22217,6 +22261,9 @@ s390_decode_6byte_and_irgen(const UChar *bytes) - case 0xe500ULL: /* LASP */ goto unimplemented; - case 0xe501ULL: /* TPROT */ goto unimplemented; - case 0xe502ULL: /* STRAG */ goto unimplemented; -+ case 0xe50aULL: s390_format_SSE_RDRD(s390_irgen_MVCRL, -+ SS_b1(ovl), SS_d1(ovl), -+ SS_b2(ovl), SS_d2(ovl)); goto ok; - case 0xe50eULL: /* MVCSK */ goto unimplemented; - case 0xe50fULL: /* MVCDK */ goto unimplemented; - case 0xe544ULL: s390_format_SIL_RDI(s390_irgen_MVHHI, SIL_b1(ovl), --- -2.23.0 - -From 6cc4d66cc3a999253d9a57e2b5c75aeb67f77918 Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Tue, 2 Feb 2021 20:15:02 +0100 -Subject: [PATCH 05/13] s390x: Misc-insn-3, test case - -Add a test case for the new instructions in the miscellaneous instruction -extensions facitility 3. ---- - .gitignore | 1 + - none/tests/s390x/Makefile.am | 3 +- - none/tests/s390x/misc3.c | 182 ++++++++++++++++++++++++++++++ - none/tests/s390x/misc3.stderr.exp | 2 + - none/tests/s390x/misc3.stdout.exp | 103 +++++++++++++++++ - none/tests/s390x/misc3.vgtest | 1 + - 6 files changed, 291 insertions(+), 1 deletion(-) - create mode 100644 none/tests/s390x/misc3.c - create mode 100644 none/tests/s390x/misc3.stderr.exp - create mode 100644 none/tests/s390x/misc3.stdout.exp - create mode 100644 none/tests/s390x/misc3.vgtest - -diff --git a/none/tests/s390x/Makefile.am b/none/tests/s390x/Makefile.am -index a0fb92ef5..2fd45ec1e 100644 ---- a/none/tests/s390x/Makefile.am -+++ b/none/tests/s390x/Makefile.am -@@ -19,7 +19,8 @@ INSN_TESTS = clc clcle cvb cvd icm lpr tcxb lam_stam xc mvst add sub mul \ - spechelper-ltr spechelper-or \ - spechelper-icm-1 spechelper-icm-2 spechelper-tmll \ - spechelper-tm laa vector lsc2 ppno vector_string vector_integer \ -- vector_float add-z14 sub-z14 mul-z14 bic -+ vector_float add-z14 sub-z14 mul-z14 bic \ -+ misc3 - - if BUILD_DFP_TESTS - INSN_TESTS += dfp-1 dfp-2 dfp-3 dfp-4 dfptest dfpext dfpconv srnmt pfpo -diff --git a/none/tests/s390x/misc3.c b/none/tests/s390x/misc3.c -new file mode 100644 -index 000000000..ae6e8d4c2 ---- /dev/null -+++ b/none/tests/s390x/misc3.c -@@ -0,0 +1,182 @@ -+#include -+ -+/* -- Logical instructions -- */ -+ -+#define TEST_GENERATE(opcode,insn) \ -+ static void test_##insn(unsigned long a, unsigned long b) \ -+ { \ -+ unsigned long out = 0xdecaffee42424242; \ -+ int cc; \ -+ \ -+ __asm__( \ -+ "cr 0,0\n\t" /* Clear CC */ \ -+ ".insn rrf,0x" #opcode "0000,%[out],%[a],%[b],0\n\t" \ -+ "ipm %[cc]\n\t" \ -+ "srl %[cc],28\n" \ -+ : [out] "+d" (out), \ -+ [cc] "=d" (cc) \ -+ : [a] "d" (a), \ -+ [b] "d" (b) \ -+ : "cc"); \ -+ \ -+ printf("\t%016lx %016lx -> %016lx cc=%d\n", \ -+ a, b, out, cc); \ -+ } -+ -+#define TEST_EXEC(opcode,insn) \ -+ do { \ -+ puts(#insn); \ -+ test_##insn(0, 0); \ -+ test_##insn(0, -1); \ -+ test_##insn(-1, 0); \ -+ test_##insn(-1, -1); \ -+ test_##insn(0x012345678abcdef, 0); \ -+ test_##insn(0x012345678abcdef, -1); \ -+ test_##insn(0x55555555aaaaaaaa, 0xaaaaaaaa55555555); \ -+ } while (0) -+ -+#define INSNS \ -+ XTEST(b9f5,ncrk); \ -+ XTEST(b9e5,ncgrk); \ -+ XTEST(b974,nnrk); \ -+ XTEST(b964,nngrk); \ -+ XTEST(b976,nork); \ -+ XTEST(b966,nogrk); \ -+ XTEST(b977,nxrk); \ -+ XTEST(b967,nxgrk); \ -+ XTEST(b975,ocrk); \ -+ XTEST(b965,ocgrk); -+ -+#define XTEST TEST_GENERATE -+INSNS -+#undef XTEST -+ -+static void test_all_logical_insns() -+{ -+#define XTEST TEST_EXEC -+ INSNS -+#undef XTEST -+} -+#undef INSNS -+#undef TEST_GENERATE -+#undef TEST_EXEC -+ -+ -+/* -- Full population count -- */ -+ -+static void test_popcnt(unsigned long op2) -+{ -+ unsigned long result; -+ int cc; -+ -+ __asm__(".insn rrf,0xb9e10000,%[result],%[op2],8,0\n\t" -+ "ipm %[cc]\n\t" -+ "srl %[cc],28\n" -+ : [result]"=d" (result), -+ [cc]"=d" (cc) -+ : [op2]"d" (op2) -+ : "cc"); -+ printf("\t%016lx -> %2lu cc=%d\n", op2, result, cc); -+} -+ -+static int test_all_popcnt() -+{ -+ puts("popcnt"); -+ test_popcnt(0); -+ test_popcnt(1); -+ test_popcnt(0x8000000000000000); -+ test_popcnt(-1UL); -+ test_popcnt(0xff427e3800556bcd); -+ return 0; -+} -+ -+/* -- Select -- */ -+ -+#define TEST_GENERATE(opcode,insn) \ -+ static void test_##insn(unsigned long a, unsigned long b) \ -+ { \ -+ unsigned long out0 = 0x0cafebad0badcafe; \ -+ unsigned long out1 = 0x0badcafe0cafebad; \ -+ \ -+ __asm__( \ -+ "cr 0,0\n\t" /* Clear CC */ \ -+ ".insn rrf,0x" #opcode "0000,%[out0],%[a],%[b],8\n\t" \ -+ ".insn rrf,0x" #opcode "0000,%[out1],%[a],%[b],7\n\t" \ -+ : [out0] "+d" (out0), \ -+ [out1] "+d" (out1) \ -+ : [a] "d" (a), \ -+ [b] "d" (b) \ -+ : ); \ -+ \ -+ printf("\t%016lx %016lx -> %016lx %016lx\n", \ -+ a, b, out0, out1); \ -+ } -+ -+#define TEST_EXEC(opcode,insn) \ -+ do { \ -+ puts(#insn); \ -+ test_##insn(-1, 0); \ -+ test_##insn(0, -1); \ -+ test_##insn(0x1234567890abcdef, 0xfedcba9876543210); \ -+ } while (0) -+ -+#define INSNS \ -+ XTEST(b9f0,selr); \ -+ XTEST(b9e3,selgr); \ -+ XTEST(b9c0,selfhr); -+ -+#define XTEST TEST_GENERATE -+INSNS -+#undef XTEST -+ -+static void test_all_select() -+{ -+#define XTEST TEST_EXEC -+ INSNS -+#undef XTEST -+} -+#undef INSNS -+#undef TEST_GENERATE -+#undef TEST_EXEC -+ -+ -+/* -- Move right to left -- */ -+ -+static void test_mvcrl(void *to, void *from, size_t len) -+{ -+ len -= 1; -+ __asm__("lgr 0,%[len]\n\t" -+ ".insn sse,0xe50a00000000,%[to],%[from]\n\t" -+ : [to] "+Q" (*(struct { char c[len]; } *) to) -+ : [from] "Q" (*(struct { char c[len]; } *) from), -+ [len] "d" (len) -+ : ); -+} -+ -+static void test_all_mvcrl() -+{ -+ static const char pattern[] = -+ "abcdefghijklmnopqrstuvwxyz-0123456789.ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -+ char buf[4 * sizeof(pattern) - 2]; -+ -+ test_mvcrl(buf, (char *) pattern, sizeof(pattern)); -+ test_mvcrl(buf + sizeof(pattern) - 1, buf, sizeof(pattern)); -+ test_mvcrl(buf + 2 * sizeof(pattern) - 2, buf, 2 * sizeof(pattern) - 1); -+ test_mvcrl(buf + 32, buf + 10, 63); -+ test_mvcrl(buf + 2, buf + 1, 256); -+ test_mvcrl(buf + 254, buf + 256, 2); -+ puts("mvcrl"); -+ for (int i = 0; i < 256; i += 64) { -+ printf("\t%.64s\n", buf + i); -+ } -+} -+ -+ -+int main() -+{ -+ test_all_logical_insns(); -+ test_all_popcnt(); -+ test_all_select(); -+ test_all_mvcrl(); -+ return 0; -+} -diff --git a/none/tests/s390x/misc3.stderr.exp b/none/tests/s390x/misc3.stderr.exp -new file mode 100644 -index 000000000..139597f9c ---- /dev/null -+++ b/none/tests/s390x/misc3.stderr.exp -@@ -0,0 +1,2 @@ -+ -+ -diff --git a/none/tests/s390x/misc3.stdout.exp b/none/tests/s390x/misc3.stdout.exp -new file mode 100644 -index 000000000..caaba4960 ---- /dev/null -+++ b/none/tests/s390x/misc3.stdout.exp -@@ -0,0 +1,103 @@ -+ncrk -+ 0000000000000000 0000000000000000 -> decaffee00000000 cc=0 -+ 0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0 -+ ffffffffffffffff 0000000000000000 -> decaffeeffffffff cc=1 -+ ffffffffffffffff ffffffffffffffff -> decaffee00000000 cc=0 -+ 0012345678abcdef 0000000000000000 -> decaffee78abcdef cc=1 -+ 0012345678abcdef ffffffffffffffff -> decaffee00000000 cc=0 -+ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffeeaaaaaaaa cc=1 -+ncgrk -+ 0000000000000000 0000000000000000 -> 0000000000000000 cc=0 -+ 0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0 -+ ffffffffffffffff 0000000000000000 -> ffffffffffffffff cc=1 -+ ffffffffffffffff ffffffffffffffff -> 0000000000000000 cc=0 -+ 0012345678abcdef 0000000000000000 -> 0012345678abcdef cc=1 -+ 0012345678abcdef ffffffffffffffff -> 0000000000000000 cc=0 -+ 55555555aaaaaaaa aaaaaaaa55555555 -> 55555555aaaaaaaa cc=1 -+nnrk -+ 0000000000000000 0000000000000000 -> decaffeeffffffff cc=1 -+ 0000000000000000 ffffffffffffffff -> decaffeeffffffff cc=1 -+ ffffffffffffffff 0000000000000000 -> decaffeeffffffff cc=1 -+ ffffffffffffffff ffffffffffffffff -> decaffee00000000 cc=0 -+ 0012345678abcdef 0000000000000000 -> decaffeeffffffff cc=1 -+ 0012345678abcdef ffffffffffffffff -> decaffee87543210 cc=1 -+ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffeeffffffff cc=1 -+nngrk -+ 0000000000000000 0000000000000000 -> ffffffffffffffff cc=1 -+ 0000000000000000 ffffffffffffffff -> ffffffffffffffff cc=1 -+ ffffffffffffffff 0000000000000000 -> ffffffffffffffff cc=1 -+ ffffffffffffffff ffffffffffffffff -> 0000000000000000 cc=0 -+ 0012345678abcdef 0000000000000000 -> ffffffffffffffff cc=1 -+ 0012345678abcdef ffffffffffffffff -> ffedcba987543210 cc=1 -+ 55555555aaaaaaaa aaaaaaaa55555555 -> ffffffffffffffff cc=1 -+nork -+ 0000000000000000 0000000000000000 -> decaffeeffffffff cc=1 -+ 0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0 -+ ffffffffffffffff 0000000000000000 -> decaffee00000000 cc=0 -+ ffffffffffffffff ffffffffffffffff -> decaffee00000000 cc=0 -+ 0012345678abcdef 0000000000000000 -> decaffee87543210 cc=1 -+ 0012345678abcdef ffffffffffffffff -> decaffee00000000 cc=0 -+ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffee00000000 cc=0 -+nogrk -+ 0000000000000000 0000000000000000 -> ffffffffffffffff cc=1 -+ 0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0 -+ ffffffffffffffff 0000000000000000 -> 0000000000000000 cc=0 -+ ffffffffffffffff ffffffffffffffff -> 0000000000000000 cc=0 -+ 0012345678abcdef 0000000000000000 -> ffedcba987543210 cc=1 -+ 0012345678abcdef ffffffffffffffff -> 0000000000000000 cc=0 -+ 55555555aaaaaaaa aaaaaaaa55555555 -> 0000000000000000 cc=0 -+nxrk -+ 0000000000000000 0000000000000000 -> decaffeeffffffff cc=1 -+ 0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0 -+ ffffffffffffffff 0000000000000000 -> decaffee00000000 cc=0 -+ ffffffffffffffff ffffffffffffffff -> decaffeeffffffff cc=1 -+ 0012345678abcdef 0000000000000000 -> decaffee87543210 cc=1 -+ 0012345678abcdef ffffffffffffffff -> decaffee78abcdef cc=1 -+ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffee00000000 cc=0 -+nxgrk -+ 0000000000000000 0000000000000000 -> ffffffffffffffff cc=1 -+ 0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0 -+ ffffffffffffffff 0000000000000000 -> 0000000000000000 cc=0 -+ ffffffffffffffff ffffffffffffffff -> ffffffffffffffff cc=1 -+ 0012345678abcdef 0000000000000000 -> ffedcba987543210 cc=1 -+ 0012345678abcdef ffffffffffffffff -> 0012345678abcdef cc=1 -+ 55555555aaaaaaaa aaaaaaaa55555555 -> 0000000000000000 cc=0 -+ocrk -+ 0000000000000000 0000000000000000 -> decaffeeffffffff cc=1 -+ 0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0 -+ ffffffffffffffff 0000000000000000 -> decaffeeffffffff cc=1 -+ ffffffffffffffff ffffffffffffffff -> decaffeeffffffff cc=1 -+ 0012345678abcdef 0000000000000000 -> decaffeeffffffff cc=1 -+ 0012345678abcdef ffffffffffffffff -> decaffee78abcdef cc=1 -+ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffeeaaaaaaaa cc=1 -+ocgrk -+ 0000000000000000 0000000000000000 -> ffffffffffffffff cc=1 -+ 0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0 -+ ffffffffffffffff 0000000000000000 -> ffffffffffffffff cc=1 -+ ffffffffffffffff ffffffffffffffff -> ffffffffffffffff cc=1 -+ 0012345678abcdef 0000000000000000 -> ffffffffffffffff cc=1 -+ 0012345678abcdef ffffffffffffffff -> 0012345678abcdef cc=1 -+ 55555555aaaaaaaa aaaaaaaa55555555 -> 55555555aaaaaaaa cc=1 -+popcnt -+ 0000000000000000 -> 0 cc=0 -+ 0000000000000001 -> 1 cc=1 -+ 8000000000000000 -> 1 cc=1 -+ ffffffffffffffff -> 64 cc=1 -+ ff427e3800556bcd -> 33 cc=1 -+selr -+ ffffffffffffffff 0000000000000000 -> 0cafebadffffffff 0badcafe00000000 -+ 0000000000000000 ffffffffffffffff -> 0cafebad00000000 0badcafeffffffff -+ 1234567890abcdef fedcba9876543210 -> 0cafebad90abcdef 0badcafe76543210 -+selgr -+ ffffffffffffffff 0000000000000000 -> ffffffffffffffff 0000000000000000 -+ 0000000000000000 ffffffffffffffff -> 0000000000000000 ffffffffffffffff -+ 1234567890abcdef fedcba9876543210 -> 1234567890abcdef fedcba9876543210 -+selfhr -+ ffffffffffffffff 0000000000000000 -> ffffffff0badcafe 000000000cafebad -+ 0000000000000000 ffffffffffffffff -> 000000000badcafe ffffffff0cafebad -+ 1234567890abcdef fedcba9876543210 -> 123456780badcafe fedcba980cafebad -+mvcrl -+ abbcdefghijklmnopqrstuvwxyz-01234klmnopqrstuvwxyz-0123456789.ABC -+ DEFGHIJKLMNOPQRSTUVWXYZabcdefghi456789.ABCDEFGHIJKLMNOPQRSTUVWXY -+ Zabcdefghijklmnopqrstuvwxyz-0123456789.ABCDEFGHIJKLMNOPQRSTUVWXY -+ Zabcdefghijklmnopqrstuvwxyz-0123456789.ABCDEFGHIJKLMNOPQRSTUVWZ -diff --git a/none/tests/s390x/misc3.vgtest b/none/tests/s390x/misc3.vgtest -new file mode 100644 -index 000000000..d051a06bd ---- /dev/null -+++ b/none/tests/s390x/misc3.vgtest -@@ -0,0 +1 @@ -+prog: misc3 --- -2.23.0 - -From 401b51d79886362d1962dc487db45ac91462eaa0 Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Wed, 7 Apr 2021 12:29:32 +0200 -Subject: [PATCH 06/13] s390x: Vec-enh-2, extend VSL, VSRA, and VSRL - -The vector-enhancements facility 2 extends the existing bitwise vector -shift instructions VSL, VSRA, and VSRL. Now they allow the shift -vector (the third operand) to contain different shift amounts for each -byte. Add support for these new forms. ---- - VEX/priv/guest_s390_toIR.c | 58 ++++++++++++++++++++++++++++++-------- - 1 file changed, 47 insertions(+), 11 deletions(-) - -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index 9f7d98f8c..622d5a02e 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -17983,30 +17983,66 @@ s390_irgen_VERLL(UChar v1, IRTemp op2addr, UChar v3, UChar m4) - static const HChar * - s390_irgen_VSL(UChar v1, UChar v2, UChar v3) - { -- IRTemp shift_amount = newTemp(Ity_I8); -- assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111))); -- -- put_vr_qw(v1, binop(Iop_ShlV128, get_vr_qw(v2), mkexpr(shift_amount))); -+ IRTemp a = newTemp(Ity_V128); -+ IRTemp b = newTemp(Ity_V128); -+ -+ assign(a, get_vr_qw(v2)); -+ assign(b, get_vr_qw(v3)); -+ -+ put_vr_qw(v1, -+ binop(Iop_OrV128, -+ binop(Iop_Shl8x16, mkexpr(a), mkexpr(b)), -+ binop(Iop_Shr8x16, -+ binop(Iop_Shr8x16, -+ binop(Iop_ShlV128, mkexpr(a), mkU8(8)), -+ unop(Iop_NotV128, mkexpr(b))), -+ unop(Iop_Dup8x16, mkU8(1))))); - return "vsl"; - } - - static const HChar * - s390_irgen_VSRL(UChar v1, UChar v2, UChar v3) - { -- IRTemp shift_amount = newTemp(Ity_I8); -- assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111))); -+ IRTemp a = newTemp(Ity_V128); -+ IRTemp b = newTemp(Ity_V128); - -- put_vr_qw(v1, binop(Iop_ShrV128, get_vr_qw(v2), mkexpr(shift_amount))); -+ assign(a, get_vr_qw(v2)); -+ assign(b, get_vr_qw(v3)); -+ -+ put_vr_qw(v1, -+ binop(Iop_OrV128, -+ binop(Iop_Shr8x16, mkexpr(a), mkexpr(b)), -+ binop(Iop_Shl8x16, -+ binop(Iop_Shl8x16, -+ binop(Iop_ShrV128, mkexpr(a), mkU8(8)), -+ unop(Iop_NotV128, mkexpr(b))), -+ unop(Iop_Dup8x16, mkU8(1))))); - return "vsrl"; - } - - static const HChar * - s390_irgen_VSRA(UChar v1, UChar v2, UChar v3) - { -- IRTemp shift_amount = newTemp(Ity_I8); -- assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111))); -- -- put_vr_qw(v1, binop(Iop_SarV128, get_vr_qw(v2), mkexpr(shift_amount))); -+ IRTemp a = newTemp(Ity_V128); -+ IRTemp b = newTemp(Ity_V128); -+ -+ assign(a, get_vr_qw(v2)); -+ assign(b, get_vr_qw(v3)); -+ -+ /* Shift-right: first byte arithmetically, all others logically */ -+ IRExpr* elems_shifted = -+ binop(Iop_Sar8x16, -+ binop(Iop_Shr8x16, mkexpr(a), -+ binop(Iop_AndV128, mkexpr(b), mkV128(0x7fff))), -+ binop(Iop_AndV128, mkexpr(b), mkV128(0x8000))); -+ /* Then OR the appropriate bits from the byte to the left */ -+ put_vr_qw(v1, -+ binop(Iop_OrV128, elems_shifted, -+ binop(Iop_Shl8x16, -+ binop(Iop_Shl8x16, -+ binop(Iop_ShrV128, mkexpr(a), mkU8(8)), -+ unop(Iop_NotV128, mkexpr(b))), -+ unop(Iop_Dup8x16, mkU8(1))))); - return "vsra"; - } - --- -2.23.0 - -From 3fdf065d0bf26a02d6d93a812a6571a287379c36 Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Thu, 11 Feb 2021 20:02:03 +0100 -Subject: [PATCH 07/13] s390x: Vec-enh-2, extend VCDG, VCDLG, VCGD, and VCLGD - -The vector-enhancements facility 2 extends the vector floating-point -conversion instructions VCDG, VCDLG, VCGD, and VCLGD. In addition to -64-bit elements, they now also handle 32-bit elements. Add support for -these new forms. ---- - VEX/priv/guest_s390_toIR.c | 36 ++++++++++++++++++++---------------- - 1 file changed, 20 insertions(+), 16 deletions(-) - -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index 622d5a02e..11271a1c9 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -18794,44 +18794,48 @@ s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, Bool rounding, - static const HChar * - s390_irgen_VCDG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { -- s390_insn_assert("vcdg", m3 == 3); -- -- s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, True, -- v1, v2, m3, m4, m5); -+ s390_insn_assert("vcdg", m3 == 2 || m3 == 3); - -+ s390_vector_fp_convert(m3 == 2 ? Iop_I32StoF32 : Iop_I64StoF64, -+ m3 == 2 ? Ity_I32 : Ity_I64, -+ m3 == 2 ? Ity_F32 : Ity_F64, -+ True, v1, v2, m3, m4, m5); - return "vcdg"; - } - - static const HChar * - s390_irgen_VCDLG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { -- s390_insn_assert("vcdlg", m3 == 3); -- -- s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, True, -- v1, v2, m3, m4, m5); -+ s390_insn_assert("vcdlg", m3 == 2 || m3 == 3); - -+ s390_vector_fp_convert(m3 == 2 ? Iop_I32UtoF32 : Iop_I64UtoF64, -+ m3 == 2 ? Ity_I32 : Ity_I64, -+ m3 == 2 ? Ity_F32 : Ity_F64, -+ True, v1, v2, m3, m4, m5); - return "vcdlg"; - } - - static const HChar * - s390_irgen_VCGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { -- s390_insn_assert("vcgd", m3 == 3); -- -- s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, True, -- v1, v2, m3, m4, m5); -+ s390_insn_assert("vcgd", m3 == 2 || m3 == 3); - -+ s390_vector_fp_convert(m3 == 2 ? Iop_F32toI32S : Iop_F64toI64S, -+ m3 == 2 ? Ity_F32 : Ity_F64, -+ m3 == 2 ? Ity_I32 : Ity_I64, -+ True, v1, v2, m3, m4, m5); - return "vcgd"; - } - - static const HChar * - s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { -- s390_insn_assert("vclgd", m3 == 3); -- -- s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, True, -- v1, v2, m3, m4, m5); -+ s390_insn_assert("vclgd", m3 == 2 || m3 == 3); - -+ s390_vector_fp_convert(m3 == 2 ? Iop_F32toI32U : Iop_F64toI64U, -+ m3 == 2 ? Ity_F32 : Ity_F64, -+ m3 == 2 ? Ity_I32 : Ity_I64, -+ True, v1, v2, m3, m4, m5); - return "vclgd"; - } - --- -2.23.0 - -From d195bf17388572e85474c7ded4b5bd0e4774637d Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Tue, 16 Feb 2021 16:19:31 +0100 -Subject: [PATCH 08/13] s390x: Vec-enh-2, VLBR and friends - -Add support for the new byte- and element-swapping vector load/store -instructions VLEBRH, VLEBRG, VLEBRF, VLLEBRZ, VLBRREP, VLBR, VLER, -VSTEBRH, VSTEBRG, VSTEBRF, VSTBR, and VSTER. ---- - VEX/priv/guest_s390_toIR.c | 256 +++++++++++++++++++++++++++++++++++++ - VEX/priv/host_s390_isel.c | 9 ++ - 2 files changed, 265 insertions(+) - -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index 11271a1c9..f65b42705 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -19388,6 +19388,209 @@ s390_irgen_SELFHR(UChar r3, UChar m4, UChar r1, UChar r2) - return "selfhr"; - } - -+/* Helper function that byte-swaps each element of its V128 input operand */ -+static IRExpr * -+s390_byteswap_elements(IRExpr* v, UChar m) -+{ -+ static const ULong perm[4][2] = { -+ { 0x0100030205040706, 0x09080b0a0d0c0f0e }, /* 2-byte elements */ -+ { 0x0302010007060504, 0x0b0a09080f0e0d0c }, /* 4-byte elements */ -+ { 0x0706050403020100, 0x0f0e0d0c0b0a0908 }, /* 8-byte elements */ -+ { 0x0f0e0d0c0b0a0908, 0x0706050403020100 }, /* whole vector */ -+ }; -+ return binop(Iop_Perm8x16, v, binop(Iop_64HLtoV128, -+ mkU64(perm[m - 1][0]), -+ mkU64(perm[m - 1][1]))); -+} -+ -+/* Helper function that reverses the elements of its V128 input operand */ -+static IRExpr * -+s390_reverse_elements(IRExpr* v, UChar m) -+{ -+ static const ULong perm[3][2] = { -+ { 0x0e0f0c0d0a0b0809, 0x0607040502030001 }, /* 2-byte elements */ -+ { 0x0c0d0e0f08090a0b, 0x0405060700010203 }, /* 4-byte elements */ -+ { 0x08090a0b0c0d0e0f, 0x0001020304050607 }, /* 8-byte elements */ -+ }; -+ return binop(Iop_Perm8x16, v, binop(Iop_64HLtoV128, -+ mkU64(perm[m - 1][0]), -+ mkU64(perm[m - 1][1]))); -+} -+ -+static const HChar * -+s390_irgen_VLBR(UChar v1, IRTemp op2addr, UChar m3) -+{ -+ s390_insn_assert("vlbr", m3 >= 1 && m3 <= 4); -+ put_vr_qw(v1, s390_byteswap_elements(load(Ity_V128, mkexpr(op2addr)), m3)); -+ return "vlbr"; -+} -+ -+static const HChar * -+s390_irgen_VSTBR(UChar v1, IRTemp op2addr, UChar m3) -+{ -+ s390_insn_assert("vstbr", m3 >= 1 && m3 <= 4); -+ store(mkexpr(op2addr), s390_byteswap_elements(get_vr_qw(v1), m3)); -+ return "vstbr"; -+} -+ -+static const HChar * -+s390_irgen_VLER(UChar v1, IRTemp op2addr, UChar m3) -+{ -+ s390_insn_assert("vler", m3 >= 1 && m3 <= 3); -+ put_vr_qw(v1, s390_reverse_elements(load(Ity_V128, mkexpr(op2addr)), m3)); -+ return "vler"; -+} -+ -+static const HChar * -+s390_irgen_VSTER(UChar v1, IRTemp op2addr, UChar m3) -+{ -+ s390_insn_assert("vstbr", m3 >= 1 && m3 <= 4); -+ store(mkexpr(op2addr), s390_reverse_elements(get_vr_qw(v1), m3)); -+ return "vstbr"; -+} -+ -+/* Helper function that combines its two V128 operands by replacing element 'to' -+ in 'a' by byte-swapped element 'from' in 'b' */ -+static IRExpr * -+s390_insert_byteswapped(IRExpr* a, IRExpr* b, UChar m, UChar to, UChar from) -+{ -+ UInt elem_size = 1U << m; -+ UInt start = elem_size * to; -+ UInt end = start + elem_size - 1; -+ UInt offs = end + elem_size * from + 16; -+ UInt i; -+ -+ ULong permH = 0; -+ for (i = 0; i < 8; i++) { -+ permH = (permH << 8) | (i >= start && i <= end ? offs - i : i); -+ } -+ ULong permL = 0; -+ for (i = 8; i < 16; i++) { -+ permL = (permL << 8) | (i >= start && i <= end ? offs - i : i); -+ } -+ return triop(Iop_Perm8x16x2, a, b, binop(Iop_64HLtoV128, -+ mkU64(permH), mkU64(permL))); -+} -+ -+static const HChar * -+s390_irgen_VLEBRH(UChar v1, IRTemp op2addr, UChar m3) -+{ -+ s390_insn_assert("vlebrh", m3 <= 7); -+ IRTemp op2 = newTemp(Ity_I16); -+ assign(op2, load(Ity_I16, mkexpr(op2addr))); -+ put_vr(v1, Ity_I16, m3, binop(Iop_Or16, -+ binop(Iop_Shl16, mkexpr(op2), mkU8(8)), -+ binop(Iop_Shr16, mkexpr(op2), mkU8(8)))); -+ return "vlebrh"; -+} -+ -+static const HChar * -+s390_irgen_VLEBRF(UChar v1, IRTemp op2addr, UChar m3) -+{ -+ s390_insn_assert("vlebrf", m3 <= 3); -+ IRTemp op1 = newTemp(Ity_V128); -+ assign(op1, get_vr_qw(v1)); -+ IRTemp op2 = newTemp(Ity_I64); -+ assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkexpr(op2addr)))); -+ IRExpr* b = binop(Iop_64HLtoV128, mkexpr(op2), mkexpr(op2)); -+ put_vr_qw(v1, s390_insert_byteswapped(mkexpr(op1), b, 2, m3, 3)); -+ return "vlebrf"; -+} -+ -+static const HChar * -+s390_irgen_VLEBRG(UChar v1, IRTemp op2addr, UChar m3) -+{ -+ s390_insn_assert("vlebrg", m3 <= 1); -+ IRTemp op1 = newTemp(Ity_V128); -+ assign(op1, get_vr_qw(v1)); -+ IRTemp op2 = newTemp(Ity_I64); -+ assign(op2, load(Ity_I64, mkexpr(op2addr))); -+ IRExpr* b = binop(Iop_64HLtoV128, mkexpr(op2), mkexpr(op2)); -+ put_vr_qw(v1, s390_insert_byteswapped(mkexpr(op1), b, 3, m3, 1)); -+ return "vlebrg"; -+} -+ -+static const HChar * -+s390_irgen_VLBRREP(UChar v1, IRTemp op2addr, UChar m3) -+{ -+ s390_insn_assert("vlbrrep", m3 >= 1 && m3 <= 3); -+ static const ULong perm[3] = { -+ 0x0f0e0f0e0f0e0f0e, /* 2-byte element */ -+ 0x0f0e0d0c0f0e0d0c, /* 4-byte element */ -+ 0x0f0e0d0c0b0a0908 /* 8-byte element */ -+ }; -+ IRExpr* permHL = mkU64(perm[m3 - 1]); -+ IRTemp op2 = newTemp(Ity_I64); -+ if (m3 == 3) -+ assign(op2, load(Ity_I64, mkexpr(op2addr))); -+ else -+ assign(op2, unop(m3 == 2 ? Iop_32Uto64 : Iop_16Uto64, -+ load(s390_vr_get_type(m3), mkexpr(op2addr)))); -+ put_vr_qw(v1, binop(Iop_Perm8x16, -+ binop(Iop_64HLtoV128, mkexpr(op2), mkexpr(op2)), -+ binop(Iop_64HLtoV128, permHL, permHL))); -+ return "vlbrrep"; -+} -+ -+static const HChar * -+s390_irgen_VLLEBRZ(UChar v1, IRTemp op2addr, UChar m3) -+{ -+ s390_insn_assert("vllebrz", (m3 >= 1 && m3 <= 3) || m3 == 6); -+ static const ULong perm[6] = { -+ 0x0000000000000f0e, /* 2-byte element */ -+ 0x000000000f0e0d0c, /* 4-byte element */ -+ 0x0f0e0d0c0b0a0908, /* 8-byte element */ -+ 0, /* invalid (4) */ -+ 0, /* invalid (5) */ -+ 0x0f0e0d0c00000000, /* 4-byte element, left-aligned */ -+ }; -+ IRExpr* permH = mkU64(perm[m3 - 1]); -+ IRTemp op2 = newTemp(Ity_I64); -+ if (m3 == 3) -+ assign(op2, load(Ity_I64, mkexpr(op2addr))); -+ else -+ assign(op2, unop((m3 & 3) == 2 ? Iop_32Uto64 : Iop_16Uto64, -+ load(s390_vr_get_type(m3 & 3), mkexpr(op2addr)))); -+ put_vr_qw(v1, binop(Iop_Perm8x16, -+ binop(Iop_64HLtoV128, mkU64(0), mkexpr(op2)), -+ binop(Iop_64HLtoV128, permH, mkU64(0)))); -+ return "vllebrz"; -+} -+ -+static const HChar * -+s390_irgen_VSTEBRH(UChar v1, IRTemp op2addr, UChar m3) -+{ -+ s390_insn_assert("vstebrh", m3 <= 7); -+ IRTemp op1 = newTemp(Ity_I16); -+ assign(op1, get_vr(v1, Ity_I16, m3)); -+ store(mkexpr(op2addr), binop(Iop_Or16, -+ binop(Iop_Shl16, mkexpr(op1), mkU8(8)), -+ binop(Iop_Shr16, mkexpr(op1), mkU8(8)))); -+ return "vstebrh"; -+} -+ -+static const HChar * -+s390_irgen_VSTEBRF(UChar v1, IRTemp op2addr, UChar m3) -+{ -+ s390_insn_assert("vstebrf", m3 <= 3); -+ IRTemp op1 = newTemp(Ity_V128); -+ assign(op1, get_vr_qw(v1)); -+ IRExpr* b = s390_insert_byteswapped(mkexpr(op1), mkexpr(op1), 2, 3, m3); -+ store(mkexpr(op2addr), unop(Iop_V128to32, b)); -+ return "vstebrf"; -+} -+ -+static const HChar * -+s390_irgen_VSTEBRG(UChar v1, IRTemp op2addr, UChar m3) -+{ -+ s390_insn_assert("vstebrg", m3 <= 1); -+ IRTemp op1 = newTemp(Ity_V128); -+ assign(op1, get_vr_qw(v1)); -+ IRExpr* b = s390_insert_byteswapped(mkexpr(op1), mkexpr(op1), 3, 1, m3); -+ store(mkexpr(op2addr), unop(Iop_V128to64, b)); -+ return "vstebrg"; -+} -+ - /* New insns are added here. - If an insn is contingent on a facility being installed also - check whether the list of supported facilities in function -@@ -21003,6 +21206,59 @@ s390_decode_6byte_and_irgen(const UChar *bytes) - RXY_x2(ovl), RXY_b2(ovl), - RXY_dl2(ovl), - RXY_dh2(ovl)); goto ok; -+ case 0xe60000000001ULL: s390_format_VRX_VRRDM(s390_irgen_VLEBRH, VRX_v1(ovl), -+ VRX_x2(ovl), VRX_b2(ovl), -+ VRX_d2(ovl), VRX_m3(ovl), -+ VRX_rxb(ovl)); goto ok; -+ case 0xe60000000002ULL: s390_format_VRX_VRRDM(s390_irgen_VLEBRG, VRX_v1(ovl), -+ VRX_x2(ovl), VRX_b2(ovl), -+ VRX_d2(ovl), VRX_m3(ovl), -+ VRX_rxb(ovl)); goto ok; -+ case 0xe60000000003ULL: s390_format_VRX_VRRDM(s390_irgen_VLEBRF, VRX_v1(ovl), -+ VRX_x2(ovl), VRX_b2(ovl), -+ VRX_d2(ovl), VRX_m3(ovl), -+ VRX_rxb(ovl)); goto ok; -+ case 0xe60000000004ULL: s390_format_VRX_VRRDM(s390_irgen_VLLEBRZ, -+ VRX_v1(ovl), -+ VRX_x2(ovl), VRX_b2(ovl), -+ VRX_d2(ovl), VRX_m3(ovl), -+ VRX_rxb(ovl)); goto ok; -+ case 0xe60000000005ULL: s390_format_VRX_VRRDM(s390_irgen_VLBRREP, -+ VRX_v1(ovl), -+ VRX_x2(ovl), VRX_b2(ovl), -+ VRX_d2(ovl), VRX_m3(ovl), -+ VRX_rxb(ovl)); goto ok; -+ case 0xe60000000006ULL: s390_format_VRX_VRRDM(s390_irgen_VLBR, VRX_v1(ovl), -+ VRX_x2(ovl), VRX_b2(ovl), -+ VRX_d2(ovl), VRX_m3(ovl), -+ VRX_rxb(ovl)); goto ok; -+ case 0xe60000000007ULL: s390_format_VRX_VRRDM(s390_irgen_VLER, VRX_v1(ovl), -+ VRX_x2(ovl), VRX_b2(ovl), -+ VRX_d2(ovl), VRX_m3(ovl), -+ VRX_rxb(ovl)); goto ok; -+ case 0xe60000000009ULL: s390_format_VRX_VRRDM(s390_irgen_VSTEBRH, -+ VRX_v1(ovl), -+ VRX_x2(ovl), VRX_b2(ovl), -+ VRX_d2(ovl), VRX_m3(ovl), -+ VRX_rxb(ovl)); goto ok; -+ case 0xe6000000000aULL: s390_format_VRX_VRRDM(s390_irgen_VSTEBRG, -+ VRX_v1(ovl), -+ VRX_x2(ovl), VRX_b2(ovl), -+ VRX_d2(ovl), VRX_m3(ovl), -+ VRX_rxb(ovl)); goto ok; -+ case 0xe6000000000bULL: s390_format_VRX_VRRDM(s390_irgen_VSTEBRF, -+ VRX_v1(ovl), -+ VRX_x2(ovl), VRX_b2(ovl), -+ VRX_d2(ovl), VRX_m3(ovl), -+ VRX_rxb(ovl)); goto ok; -+ case 0xe6000000000eULL: s390_format_VRX_VRRDM(s390_irgen_VSTBR, VRX_v1(ovl), -+ VRX_x2(ovl), VRX_b2(ovl), -+ VRX_d2(ovl), VRX_m3(ovl), -+ VRX_rxb(ovl)); goto ok; -+ case 0xe6000000000fULL: s390_format_VRX_VRRDM(s390_irgen_VSTER, VRX_v1(ovl), -+ VRX_x2(ovl), VRX_b2(ovl), -+ VRX_d2(ovl), VRX_m3(ovl), -+ VRX_rxb(ovl)); goto ok; - case 0xe60000000034ULL: /* VPKZ */ goto unimplemented; - case 0xe60000000035ULL: s390_format_VSI_URDV(s390_irgen_VLRL, VSI_v1(ovl), - VSI_b2(ovl), VSI_d2(ovl), -diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c -index ee20c6711..06e195957 100644 ---- a/VEX/priv/host_s390_isel.c -+++ b/VEX/priv/host_s390_isel.c -@@ -4189,6 +4189,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) - return dst; - } - -+ case Iop_Perm8x16: -+ size = 16; -+ reg1 = s390_isel_vec_expr(env, arg1); -+ reg2 = s390_isel_vec_expr(env, arg2); -+ -+ addInstr(env, s390_insn_vec_triop(size, S390_VEC_PERM, -+ dst, reg1, reg1, reg2)); -+ return dst; -+ - case Iop_CmpEQ8x16: - size = 1; - vec_binop = S390_VEC_COMPARE_EQUAL; --- -2.23.0 - -From f7447f4c73b2d0fb4eb3827c3709f378f6c9c656 Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Tue, 23 Feb 2021 19:10:37 +0100 -Subject: [PATCH 09/13] s390x: Vec-enh-2, VSLD and VSRD - -Support the new "vector shift left/right double by bit" instructions VSLD -and VSRD. ---- - VEX/priv/guest_s390_toIR.c | 50 ++++++++++++++++++++++++++++++++++++++ - 1 file changed, 50 insertions(+) - -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index f65b42705..aa429d085 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -18228,6 +18228,48 @@ s390_irgen_VSLDB(UChar v1, UChar v2, UChar v3, UChar i4) - return "vsldb"; - } - -+static const HChar * -+s390_irgen_VSLD(UChar v1, UChar v2, UChar v3, UChar i4) -+{ -+ s390_insn_assert("vsld", i4 <= 7); -+ -+ if (i4 == 0) { -+ /* Just copy v2. */ -+ put_vr_qw(v1, get_vr_qw(v2)); -+ } else { -+ /* Concatenate v2's tail with v3's head. */ -+ put_vr_qw(v1, -+ binop(Iop_OrV128, -+ binop(Iop_ShlV128, get_vr_qw(v2), mkU8(i4)), -+ binop(Iop_ShrV128, get_vr_qw(v3), mkU8(128 - i4)) -+ ) -+ ); -+ } -+ -+ return "vsld"; -+} -+ -+static const HChar * -+s390_irgen_VSRD(UChar v1, UChar v2, UChar v3, UChar i4) -+{ -+ s390_insn_assert("vsrd", i4 <= 7); -+ -+ if (i4 == 0) { -+ /* Just copy v3. */ -+ put_vr_qw(v1, get_vr_qw(v3)); -+ } else { -+ /* Concatenate v2's tail with v3's head. */ -+ put_vr_qw(v1, -+ binop(Iop_OrV128, -+ binop(Iop_ShlV128, get_vr_qw(v2), mkU8(128 - i4)), -+ binop(Iop_ShrV128, get_vr_qw(v3), mkU8(i4)) -+ ) -+ ); -+ } -+ -+ return "vsrd"; -+} -+ - static const HChar * - s390_irgen_VMO(UChar v1, UChar v2, UChar v3, UChar m4) - { -@@ -21541,6 +21583,14 @@ s390_decode_6byte_and_irgen(const UChar *bytes) - case 0xe70000000085ULL: s390_format_VRR_VVV(s390_irgen_VBPERM, VRR_v1(ovl), - VRR_v2(ovl), VRR_r3(ovl), - VRR_rxb(ovl)); goto ok; -+ case 0xe70000000086ULL: s390_format_VRId_VVVI(s390_irgen_VSLD, VRId_v1(ovl), -+ VRId_v2(ovl), VRId_v3(ovl), -+ VRId_i4(ovl), -+ VRId_rxb(ovl)); goto ok; -+ case 0xe70000000087ULL: s390_format_VRId_VVVI(s390_irgen_VSRD, VRId_v1(ovl), -+ VRId_v2(ovl), VRId_v3(ovl), -+ VRId_i4(ovl), -+ VRId_rxb(ovl)); goto ok; - case 0xe7000000008aULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRC, VRRd_v1(ovl), - VRRd_v2(ovl), VRRd_v3(ovl), - VRRd_v4(ovl), VRRd_m5(ovl), --- -2.23.0 - -From 388082bca7146f8a15814798dbfe570af2aab2a9 Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Wed, 10 Mar 2021 19:22:51 +0100 -Subject: [PATCH 10/13] s390x: Vec-enh-2, VSTRS - -Support the new "vector string search" instruction VSTRS. The -implementation is a full emulation and follows a similar approach as for -the other vector string instructions. ---- - VEX/priv/guest_s390_toIR.c | 104 +++++++++++++++++++++++++++++++++++++ - 1 file changed, 104 insertions(+) - -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index aa429d085..46a867475 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -17601,6 +17601,105 @@ s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) - return "vstrc"; - } - -+static const HChar * -+s390_irgen_VSTRS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) -+{ -+ s390_insn_assert("vstrs", m5 <= 2 && m6 == (m6 & 2)); -+ -+ IRTemp op2 = newTemp(Ity_V128); -+ IRTemp op3 = newTemp(Ity_V128); -+ IRTemp op4 = newTemp(Ity_I8); -+ IRTemp op2clean = newTemp(Ity_V128); -+ IRTemp op3mask = newTemp(Ity_V128); -+ IRTemp result = newTemp(Ity_V128); -+ IRTemp ccnomatch = newTemp(Ity_I64); -+ IRExpr* tmp; -+ IRExpr* match = NULL; -+ UChar elem_bits = 8 << m5; -+ IROp cmpeq_op = S390_VEC_OP3(m5, Iop_CmpEQ8x16, -+ Iop_CmpEQ16x8, Iop_CmpEQ32x4); -+ -+ assign(op2, get_vr_qw(v2)); -+ assign(op3, get_vr_qw(v3)); -+ assign(op4, get_vr_b7(v4)); -+ -+ tmp = unop(Iop_Dup32x4, -+ unop(Iop_1Sto32, binop(Iop_CmpNE8, mkexpr(op4), mkU8(16)))); -+ tmp = binop(Iop_ShrV128, tmp, binop(Iop_Shl8, mkexpr(op4), mkU8(3))); -+ -+ if (s390_vr_is_zs_set(m6)) { -+ IRTemp op2eos = newTemp(Ity_V128); -+ IRExpr* t; -+ t = binop(cmpeq_op, mkexpr(op2), mkV128(0)); -+ for (UChar i = m5; i < 4; i++) { -+ IRTemp s = newTemp(Ity_V128); -+ assign(s, t); -+ t = binop(Iop_OrV128, mkexpr(s), binop(Iop_ShrV128, mkexpr(s), -+ mkU8(8 << i))); -+ } -+ assign(op2eos, t); -+ assign(op2clean, binop(Iop_AndV128, mkexpr(op2), -+ unop(Iop_NotV128, mkexpr(op2eos)))); -+ assign(ccnomatch, binop(Iop_And64, mkU64(1), -+ unop(Iop_V128to64, mkexpr(op2eos)))); -+ -+ t = binop(cmpeq_op, mkexpr(op3), mkV128(0)); -+ for (UChar i = m5; i < 4; i++) { -+ IRTemp s = newTemp(Ity_V128); -+ assign(s, t); -+ t = binop(Iop_OrV128, mkexpr(s), binop(Iop_ShrV128, mkexpr(s), -+ mkU8(8 << i))); -+ } -+ tmp = binop(Iop_OrV128, tmp, t); -+ } else { -+ assign(op2clean, mkexpr(op2)); -+ } -+ assign(op3mask, unop(Iop_NotV128, tmp)); -+ -+ for (UChar shift = 0; shift < 128; shift += elem_bits) { -+ IRTemp s = newTemp(Ity_V128); -+ tmp = unop(Iop_NotV128, -+ binop(cmpeq_op, mkexpr(op2clean), -+ binop(Iop_ShrV128, mkexpr(op3), mkU8(shift)))); -+ assign(s, binop(Iop_CmpEQ64x2, mkV128(0), -+ binop(Iop_AndV128, mkexpr(op3mask), -+ binop(Iop_ShlV128, tmp, mkU8(shift))))); -+ tmp = mkexpr(s); -+ if (shift < 64) { -+ tmp = binop(Iop_AndV128, tmp, -+ unop(Iop_Dup16x8, binop(Iop_GetElem16x8, tmp, mkU8(4)))); -+ } -+ tmp = binop(Iop_AndV128, tmp, -+ unop(Iop_Dup16x8, mkU16(1 << (15 - shift / 8)))); -+ if (shift) -+ match = binop(Iop_OrV128, mkexpr(mktemp(Ity_V128, match)), tmp); -+ else -+ match = tmp; -+ } -+ assign(result, unop(Iop_ClzNat64, -+ binop(Iop_Or64, -+ unop(Iop_V128HIto64, match), -+ mkU64((1UL << 48) - 1)))); -+ put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0))); -+ -+ /* Set condition code. -+ 0: no match, no string terminator in op2 -+ 1: no match, string terminator found -+ 2: full match -+ 3: partial match */ -+ IRTemp cc = newTemp(Ity_I64); -+ tmp = binop(Iop_CmpLE64U, -+ binop(Iop_Add64, mkexpr(result), unop(Iop_8Uto64, mkexpr(op4))), -+ mkU64(16)); -+ assign(cc, mkite(binop(Iop_CmpEQ64, mkexpr(result), mkU64(16)), -+ s390_vr_is_zs_set(m6) ? mkexpr(ccnomatch) : mkU64(0), -+ mkite(tmp, mkU64(2), mkU64(3)))); -+ s390_cc_set(cc); -+ -+ dis_res->hint = Dis_HintVerbose; -+ return "vstrs"; -+} -+ - static const HChar * - s390_irgen_VNC(UChar v1, UChar v2, UChar v3) - { -@@ -21596,6 +21695,11 @@ s390_decode_6byte_and_irgen(const UChar *bytes) - VRRd_v4(ovl), VRRd_m5(ovl), - VRRd_m6(ovl), - VRRd_rxb(ovl)); goto ok; -+ case 0xe7000000008bULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRS, VRRd_v1(ovl), -+ VRRd_v2(ovl), VRRd_v3(ovl), -+ VRRd_v4(ovl), VRRd_m5(ovl), -+ VRRd_m6(ovl), -+ VRRd_rxb(ovl)); goto ok; - case 0xe7000000008cULL: s390_format_VRR_VVVV(s390_irgen_VPERM, VRR_v1(ovl), - VRR_v2(ovl), VRR_r3(ovl), - VRR_m4(ovl), VRR_rxb(ovl)); goto ok; --- -2.23.0 - -From 8a079b405467fa127c6c311d7ae3c649e76106c6 Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Tue, 16 Feb 2021 17:52:09 +0100 -Subject: [PATCH 11/13] s390x: Mark arch13 features as supported - -Make the STFLE instruction report the miscellaneous-instruction-extensions -facility 3 and the vector-enhancements facility 2 as supported. Indicate -support for the latter in the HWCAP vector as well. ---- - VEX/priv/guest_s390_helpers.c | 9 +++------ - coregrind/m_initimg/initimg-linux.c | 3 ++- - include/vki/vki-s390x-linux.h | 1 + - 3 files changed, 6 insertions(+), 7 deletions(-) - -diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c -index 1e04f601a..804b92a29 100644 ---- a/VEX/priv/guest_s390_helpers.c -+++ b/VEX/priv/guest_s390_helpers.c -@@ -356,9 +356,7 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr) - | s390_stfle_range(51, 55) - /* 56: unassigned */ - /* 57: MSA5, not supported */ -- | s390_stfle_range(58, 60) -- /* 61: miscellaneous-instruction 3, not supported */ -- | s390_stfle_range(62, 63)), -+ | s390_stfle_range(58, 63)), - - /* === 64 .. 127 === */ - (s390_stfle_range(64, 72) -@@ -384,11 +382,10 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr) - /* 143: unassigned */ - | s390_stfle_range(144, 145) - /* 146: MSA8, not supported */ -- | s390_stfle_range(147, 147) -- /* 148: vector-enhancements 2, not supported */ -- | s390_stfle_range(149, 149) -+ | s390_stfle_range(147, 149) - /* 150: unassigned */ - /* 151: DEFLATE-conversion, not supported */ -+ /* 152: vector packed decimal enhancement, not supported */ - /* 153: unassigned */ - /* 154: unassigned */ - /* 155: MSA9, not supported */ -diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c -index fc1a32ecf..37d005168 100644 ---- a/coregrind/m_initimg/initimg-linux.c -+++ b/coregrind/m_initimg/initimg-linux.c -@@ -703,7 +703,8 @@ Addr setup_client_stack( void* init_sp, - itself, is not supported by Valgrind. */ - auxv->u.a_val &= ((VKI_HWCAP_S390_TE - 1) - | VKI_HWCAP_S390_VXRS -- | VKI_HWCAP_S390_VXRS_EXT); -+ | VKI_HWCAP_S390_VXRS_EXT -+ | VKI_HWCAP_S390_VXRS_EXT2); - } - # elif defined(VGP_arm64_linux) - { -diff --git a/include/vki/vki-s390x-linux.h b/include/vki/vki-s390x-linux.h -index 4ab2d3334..71b363029 100644 ---- a/include/vki/vki-s390x-linux.h -+++ b/include/vki/vki-s390x-linux.h -@@ -807,6 +807,7 @@ typedef vki_s390_regs vki_elf_gregset_t; - #define VKI_HWCAP_S390_TE 1024 - #define VKI_HWCAP_S390_VXRS 2048 - #define VKI_HWCAP_S390_VXRS_EXT 8192 -+#define VKI_HWCAP_S390_VXRS_EXT2 32768 - - - //---------------------------------------------------------------------- --- -2.23.0 - -From 1461d9b8d0b12e55b648fbf50c5dcee30785afa2 Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Mon, 17 May 2021 15:34:15 +0200 -Subject: [PATCH 12/13] s390x: Vec-enh-2, test cases - -Add test cases for verifying the new/enhanced instructions in the -vector-enhancements facility 2. For "vector string search" VSTRS add a -memcheck test case. ---- - .gitignore | 2 + - memcheck/tests/s390x/Makefile.am | 3 +- - memcheck/tests/s390x/vstrs.c | 68 ++++++ - memcheck/tests/s390x/vstrs.stderr.exp | 16 ++ - memcheck/tests/s390x/vstrs.stdout.exp | 0 - memcheck/tests/s390x/vstrs.vgtest | 2 + - none/tests/s390x/Makefile.am | 3 +- - none/tests/s390x/vec2.c | 314 ++++++++++++++++++++++++++ - none/tests/s390x/vec2.stderr.exp | 2 + - none/tests/s390x/vec2.stdout.exp | 168 ++++++++++++++ - none/tests/s390x/vec2.vgtest | 2 + - tests/s390x_features.c | 4 + - 12 files changed, 582 insertions(+), 2 deletions(-) - create mode 100644 memcheck/tests/s390x/vstrs.c - create mode 100644 memcheck/tests/s390x/vstrs.stderr.exp - create mode 100644 memcheck/tests/s390x/vstrs.stdout.exp - create mode 100644 memcheck/tests/s390x/vstrs.vgtest - create mode 100644 none/tests/s390x/vec2.c - create mode 100644 none/tests/s390x/vec2.stderr.exp - create mode 100644 none/tests/s390x/vec2.stdout.exp - create mode 100644 none/tests/s390x/vec2.vgtest - -diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am -index d183841ef..668fd9933 100644 ---- a/memcheck/tests/s390x/Makefile.am -+++ b/memcheck/tests/s390x/Makefile.am -@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am - - dist_noinst_SCRIPTS = filter_stderr - --INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr -+INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr vstrs - - check_PROGRAMS = $(INSN_TESTS) - -@@ -18,3 +18,4 @@ AM_CCASFLAGS += @FLAG_M64@ - vstrc_CFLAGS = $(AM_CFLAGS) -march=z13 - vfae_CFLAGS = $(AM_CFLAGS) -march=z13 - vistr_CFLAGS = $(AM_CFLAGS) -march=z13 -+vstrs_CFLAGS = $(AM_CFLAGS) -march=z13 -diff --git a/memcheck/tests/s390x/vstrs.c b/memcheck/tests/s390x/vstrs.c -new file mode 100644 -index 000000000..3354c2e53 ---- /dev/null -+++ b/memcheck/tests/s390x/vstrs.c -@@ -0,0 +1,68 @@ -+#include -+#include -+ -+#define VECTOR __attribute__ ((vector_size (16))) -+ -+typedef char VECTOR char_v; -+ -+volatile char tmp; -+static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV"; -+ -+static char_v to_char_vec(const char *str) -+{ -+ char buf[17]; -+ char_v v; -+ -+ for (int i = 0; i < sizeof(buf); i++) { -+ char ch = str[i]; -+ if (ch == '\0') -+ break; -+ else if (ch == '$') -+ buf[i] = '\0'; -+ else if (ch != '~') -+ buf[i] = ch; -+ } -+ v = *(char_v *) buf; -+ return v; -+} -+ -+static void test_vstrs_char(const char *haystack, const char *needle, -+ int expect_res, int expect_cc) -+{ -+ int cc; -+ char_v v2val = to_char_vec(haystack); -+ char_v v3val = to_char_vec(needle); -+ -+ register unsigned long VECTOR v4 __asm__("v4") = { strlen(needle), 0 }; -+ register char_v v1 __asm__("v1"); -+ register char_v v2 __asm__("v2") = v2val; -+ register char_v v3 __asm__("v3") = v3val; -+ -+ __asm__( -+ "cr 0,0\n\t" /* Clear CC */ -+ ".short 0xe712,0x3020,0x408b\n\t" /* vstrs %v1,%v2,%v3,%v4,0,2 */ -+ "ipm %[cc]\n\t" -+ "srl %[cc],28" -+ : "=v" (v1), [cc] "=d" (cc) -+ : "v" (v2), "v" (v3), "v" (v4) -+ : "cc"); -+ -+ tmp = hex_digit[v1[7] & 0x1f]; -+ if (expect_res >= 0 && v1[7] != expect_res) -+ printf("result %u != %d\n", v1[7], expect_res); -+ -+ tmp = hex_digit[cc & 0xf]; -+ if (expect_cc >= 0 && cc != expect_cc) -+ printf("CC %d != %d\n", cc, expect_cc); -+} -+ -+int main() -+{ -+ test_vstrs_char("haystack$needle", "needle$haystack", 16, 1); -+ test_vstrs_char("haystack, needle", "needle, haystack", 10, 3); -+ test_vstrs_char("ABCDEFGH", "DEFGHI", -1, -1); -+ test_vstrs_char("match in UNDEF", "UN", 9, 2); -+ test_vstrs_char("after ~ UNDEF", "DEF", -1, -1); -+ test_vstrs_char("", "", 0, 2); -+ return 0; -+} -diff --git a/memcheck/tests/s390x/vstrs.stderr.exp b/memcheck/tests/s390x/vstrs.stderr.exp -new file mode 100644 -index 000000000..c5c3ef705 ---- /dev/null -+++ b/memcheck/tests/s390x/vstrs.stderr.exp -@@ -0,0 +1,16 @@ -+Use of uninitialised value of size 8 -+ at 0x........: test_vstrs_char (vstrs.c:50) -+ by 0x........: main (vstrs.c:63) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vstrs_char (vstrs.c:54) -+ by 0x........: main (vstrs.c:63) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vstrs_char (vstrs.c:50) -+ by 0x........: main (vstrs.c:65) -+ -+Use of uninitialised value of size 8 -+ at 0x........: test_vstrs_char (vstrs.c:54) -+ by 0x........: main (vstrs.c:65) -+ -diff --git a/memcheck/tests/s390x/vstrs.stdout.exp b/memcheck/tests/s390x/vstrs.stdout.exp -new file mode 100644 -index 000000000..e69de29bb -diff --git a/memcheck/tests/s390x/vstrs.vgtest b/memcheck/tests/s390x/vstrs.vgtest -new file mode 100644 -index 000000000..fd2a29873 ---- /dev/null -+++ b/memcheck/tests/s390x/vstrs.vgtest -@@ -0,0 +1,2 @@ -+prog: vstrs -+vgopts: -q -diff --git a/none/tests/s390x/Makefile.am b/none/tests/s390x/Makefile.am -index 2fd45ec1e..ca38db935 100644 ---- a/none/tests/s390x/Makefile.am -+++ b/none/tests/s390x/Makefile.am -@@ -20,7 +20,7 @@ INSN_TESTS = clc clcle cvb cvd icm lpr tcxb lam_stam xc mvst add sub mul \ - spechelper-icm-1 spechelper-icm-2 spechelper-tmll \ - spechelper-tm laa vector lsc2 ppno vector_string vector_integer \ - vector_float add-z14 sub-z14 mul-z14 bic \ -- misc3 -+ misc3 vec2 - - if BUILD_DFP_TESTS - INSN_TESTS += dfp-1 dfp-2 dfp-3 dfp-4 dfptest dfpext dfpconv srnmt pfpo -@@ -74,3 +74,4 @@ lsc2_CFLAGS = -march=z13 -DS390_TESTS_NOCOLOR - vector_string_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=5 - vector_integer_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=4 - vector_float_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=4 -+vec2_CFLAGS = $(AM_CFLAGS) -march=z13 -diff --git a/none/tests/s390x/vec2.c b/none/tests/s390x/vec2.c -new file mode 100644 -index 000000000..73b04dee4 ---- /dev/null -+++ b/none/tests/s390x/vec2.c -@@ -0,0 +1,314 @@ -+#include -+ -+#define VECTOR __attribute__ ((vector_size (16))) -+ -+typedef unsigned long VECTOR ulong_v; -+typedef float VECTOR float_v; -+ -+static const ulong_v vec_a = { 0x0123456789abcdef, 0xfedcba9876543210 }; -+static const ulong_v vec_b = { 0xfedcba9876543210, 0x0123456789abcdef }; -+static const ulong_v vec_c = { 0x8040201008040201, 0x7fbfdfeff7fbfdfe }; -+static const ulong_v vec_one = { -1, -1 }; -+static const ulong_v vec_ini = { 0x0112233445566778, 0x899aabbccddeeff0 }; -+ -+static const float_v vec_fa = { 16777215., -16777215., 42.5, 10000. }; -+static const float_v vec_fb = { 4., 3., 2., 1. }; -+ -+/* -- Vector shift -- */ -+ -+#define TEST_GENERATE(insn) \ -+ static void test_##insn(ulong_v a, ulong_v b) \ -+ { \ -+ ulong_v out; \ -+ __asm__( \ -+ #insn " %[out],%[a],%[b]" \ -+ : [out] "=v" (out) \ -+ : [a] "v" (a), \ -+ [b] "v" (b) \ -+ : ); \ -+ printf("\t%016lx %016lx\n", out[0], out[1]); \ -+ } -+ -+#define TEST_EXEC(insn) \ -+ do { \ -+ puts(#insn); \ -+ test_##insn(vec_a, vec_b); \ -+ test_##insn(vec_b, vec_a); \ -+ test_##insn(vec_c, vec_a); \ -+ test_##insn(vec_one, vec_b); \ -+ } while (0) -+ -+#define INSNS \ -+ XTEST(vsl); \ -+ XTEST(vsrl); \ -+ XTEST(vsra); -+ -+#define XTEST TEST_GENERATE -+INSNS -+#undef XTEST -+ -+static void test_all_single_bitshifts() -+{ -+#define XTEST TEST_EXEC -+ INSNS -+#undef XTEST -+} -+#undef INSNS -+#undef TEST_EXEC -+#undef TEST_GENERATE -+ -+/* -- Vector load element-/byte-swapped -- */ -+ -+#define TEST_EXEC(opc1,opc2,insn,m3) \ -+ do { \ -+ puts(#insn " " #m3); \ -+ test_##insn##_##m3(vec_a); \ -+ test_##insn##_##m3(vec_b); \ -+ } while (0) -+ -+#define TEST_GENERATE(opc1,opc2,insn,m3) \ -+ static void test_##insn##_##m3(ulong_v a) \ -+ { \ -+ ulong_v out = vec_ini; \ -+ __asm__( \ -+ ".insn vrx,0x" #opc1 "00000000" #opc2 ",%[out],%[a]," #m3 \ -+ : [out] "+v" (out) \ -+ : [a] "R" (a) \ -+ : ); \ -+ printf("\t%016lx %016lx\n", out[0], out[1]); \ -+ } -+ -+#define INSNS \ -+ XTEST(e6,01, vlebrh, 0); \ -+ XTEST(e6,01, vlebrh, 7); \ -+ XTEST(e6,01, vlebrh, 2); \ -+ XTEST(e6,03, vlebrf, 0); \ -+ XTEST(e6,03, vlebrf, 3); \ -+ XTEST(e6,03, vlebrf, 1); \ -+ XTEST(e6,02, vlebrg, 0); \ -+ XTEST(e6,02, vlebrg, 1); \ -+ XTEST(e6,04, vllebrz, 1); \ -+ XTEST(e6,04, vllebrz, 2); \ -+ XTEST(e6,04, vllebrz, 3); \ -+ XTEST(e6,04, vllebrz, 6); \ -+ XTEST(e6,05, vlbrrep, 1); \ -+ XTEST(e6,05, vlbrrep, 2); \ -+ XTEST(e6,05, vlbrrep, 3); \ -+ XTEST(e6,06, vlbr, 1); \ -+ XTEST(e6,06, vlbr, 2); \ -+ XTEST(e6,06, vlbr, 3); \ -+ XTEST(e6,06, vlbr, 4); \ -+ XTEST(e6,07, vler, 1); \ -+ XTEST(e6,07, vler, 2); \ -+ XTEST(e6,07, vler, 3); -+ -+#define XTEST TEST_GENERATE -+INSNS -+#undef XTEST -+ -+static void test_all_swapped_loads() -+{ -+#define XTEST TEST_EXEC -+ INSNS -+#undef XTEST -+} -+ -+#undef INSNS -+#undef TEST_GENERATE -+ -+/* -- Vector store element-/byte-swapped -- */ -+ -+#define TEST_GENERATE(opc1,opc2,insn,m3) \ -+ static void test_##insn##_##m3(ulong_v a) \ -+ { \ -+ ulong_v out = vec_ini; \ -+ __asm__( \ -+ ".insn vrx,0x" #opc1 "00000000" #opc2 ",%[a],%[out]," #m3 \ -+ : [out] "+R" (out) \ -+ : [a] "v" (a) \ -+ : ); \ -+ printf("\t%016lx %016lx\n", out[0], out[1]); \ -+ } -+ -+#define INSNS \ -+ XTEST(e6,09, vstebrh, 0); \ -+ XTEST(e6,09, vstebrh, 7); \ -+ XTEST(e6,09, vstebrh, 2); \ -+ XTEST(e6,0b, vstebrf, 0); \ -+ XTEST(e6,0b, vstebrf, 3); \ -+ XTEST(e6,0b, vstebrf, 1); \ -+ XTEST(e6,0a, vstebrg, 0); \ -+ XTEST(e6,0a, vstebrg, 1); \ -+ XTEST(e6,0e, vstbr, 1); \ -+ XTEST(e6,0e, vstbr, 2); \ -+ XTEST(e6,0e, vstbr, 3); \ -+ XTEST(e6,0e, vstbr, 4); \ -+ XTEST(e6,0f, vster, 1); \ -+ XTEST(e6,0f, vster, 2); \ -+ XTEST(e6,0f, vster, 3); -+ -+#define XTEST TEST_GENERATE -+INSNS -+#undef XTEST -+ -+static void test_all_swapped_stores() -+{ -+#define XTEST TEST_EXEC -+ INSNS -+#undef XTEST -+} -+ -+#undef INSNS -+#undef TEST_EXEC -+#undef TEST_GENERATE -+ -+/* -- Vector shift double by bit -- */ -+ -+#define TEST_GENERATE(opc1,opc2,insn,i4) \ -+ static void test_##insn##_##i4(ulong_v a, ulong_v b) \ -+ { \ -+ ulong_v out = vec_ini; \ -+ __asm__( \ -+ ".insn vrr,0x" #opc1 "00000000" #opc2 \ -+ ",%[out],%[a],%[b],0," #i4 ",0" \ -+ : [out] "+v" (out) \ -+ : [a] "v" (a), \ -+ [b] "v" (b) \ -+ : ); \ -+ printf("\t%016lx %016lx\n", out[0], out[1]); \ -+ } -+ -+#define TEST_EXEC(opc1,opc2,insn,i4) \ -+ do { \ -+ puts(#insn " " #i4); \ -+ test_##insn##_##i4(vec_a, vec_one); \ -+ test_##insn##_##i4(vec_b, vec_a); \ -+ } while (0) -+ -+#define INSNS \ -+ XTEST(e7,86,vsld,0); \ -+ XTEST(e7,86,vsld,7); \ -+ XTEST(e7,86,vsld,4); \ -+ XTEST(e7,87,vsrd,0); \ -+ XTEST(e7,87,vsrd,7); \ -+ XTEST(e7,87,vsrd,4); -+ -+#define XTEST TEST_GENERATE -+INSNS -+#undef XTEST -+ -+static void test_all_double_bitshifts() -+{ -+#define XTEST TEST_EXEC -+ INSNS -+#undef XTEST -+} -+ -+#undef INSNS -+#undef TEST_EXEC -+#undef TEST_GENERATE -+ -+/* -- Vector integer -> FP conversions -- */ -+ -+#define TEST_GENERATE(opc1,opc2,insn,m4) \ -+ static void test_##insn##_##m4(ulong_v a) \ -+ { \ -+ float_v out; \ -+ __asm__( \ -+ ".insn vrr,0x" #opc1 "00000000" #opc2 \ -+ ",%[out],%[a],0,2," #m4 ",0" \ -+ : [out] "=v" (out) \ -+ : [a] "v" (a) \ -+ : ); \ -+ if (m4 & 8) \ -+ printf("\t%a - - -\n", out[0]); \ -+ else \ -+ printf("\t%a %a %a %a\n", out[0], out[1], out[2], out[3]); \ -+ } -+ -+#define TEST_EXEC(opc1,opc2,insn,m4) \ -+ do { \ -+ puts(#insn " " #m4); \ -+ test_##insn##_##m4(vec_a); \ -+ test_##insn##_##m4(vec_c); \ -+ } while (0) -+ -+#define INSNS \ -+ XTEST(e7,c1,vcfpl,0); \ -+ XTEST(e7,c1,vcfpl,8); \ -+ XTEST(e7,c3,vcfps,0); \ -+ XTEST(e7,c3,vcfps,8); -+ -+#define XTEST TEST_GENERATE -+INSNS -+#undef XTEST -+ -+static void test_all_int_fp_conversions() -+{ -+#define XTEST TEST_EXEC -+ INSNS -+#undef XTEST -+} -+ -+#undef INSNS -+#undef TEST_EXEC -+#undef TEST_GENERATE -+ -+/* -- Vector FP -> integer conversions -- */ -+ -+#define TEST_GENERATE(opc1,opc2,insn,m4) \ -+ static void test_##insn##_##m4(float_v a) \ -+ { \ -+ unsigned int VECTOR out; \ -+ __asm__( \ -+ ".insn vrr,0x" #opc1 "00000000" #opc2 \ -+ ",%[out],%[a],0,2," #m4 ",0" \ -+ : [out] "=v" (out) \ -+ : [a] "v" (a) \ -+ : ); \ -+ if (m4 & 8) \ -+ printf("\t%08x - - -\n", out[0]); \ -+ else \ -+ printf("\t%08x %08x %08x %08x\n", \ -+ out[0], out[1], out[2], out[3]); \ -+ } -+ -+#define TEST_EXEC(opc1,opc2,insn,m4) \ -+ do { \ -+ puts(#insn " " #m4); \ -+ test_##insn##_##m4(vec_fa); \ -+ test_##insn##_##m4(vec_fb); \ -+ } while (0) -+ -+#define INSNS \ -+ XTEST(e7,c0,vclfp,0); \ -+ XTEST(e7,c0,vclfp,8); \ -+ XTEST(e7,c2,vcsfp,0); \ -+ XTEST(e7,c2,vcsfp,8); -+ -+#define XTEST TEST_GENERATE -+INSNS -+#undef XTEST -+ -+static void test_all_fp_int_conversions() -+{ -+#define XTEST TEST_EXEC -+ INSNS -+#undef XTEST -+} -+ -+#undef INSNS -+#undef TEST_EXEC -+#undef TEST_GENERATE -+ -+ -+int main() -+{ -+ test_all_single_bitshifts(); -+ test_all_swapped_loads(); -+ test_all_swapped_stores(); -+ test_all_double_bitshifts(); -+ test_all_int_fp_conversions(); -+ test_all_fp_int_conversions(); -+ return 0; -+} -diff --git a/none/tests/s390x/vec2.stderr.exp b/none/tests/s390x/vec2.stderr.exp -new file mode 100644 -index 000000000..139597f9c ---- /dev/null -+++ b/none/tests/s390x/vec2.stderr.exp -@@ -0,0 +1,2 @@ -+ -+ -diff --git a/none/tests/s390x/vec2.stdout.exp b/none/tests/s390x/vec2.stdout.exp -new file mode 100644 -index 000000000..b32cbe1bc ---- /dev/null -+++ b/none/tests/s390x/vec2.stdout.exp -@@ -0,0 +1,168 @@ -+vsl -+ 483415676abc37ef fde5533beca14200 -+ fde5533beca14200 483415676abc37ef -+ 00010204102040bf effd7feffebff7fe -+ ffffffffffffffff ffffffffffffff80 -+vsrl -+ 0012d1679e9af3ef ffdbe5753bcaa164 -+ 7fdbe5753bcaa164 4012d1679e9af3ef -+ 4008014004002004 05fbf7efbf7ffffe -+ 03ffffffffffffff ffffffffffffffff -+vsra -+ 0012d1679e9af3ef ffdbe5753bcaa164 -+ ffdbe5753bcaa164 4012d1679e9af3ef -+ c008014004002004 05fbf7efbf7ffffe -+ ffffffffffffffff ffffffffffffffff -+vlebrh 0 -+ 2301233445566778 899aabbccddeeff0 -+ dcfe233445566778 899aabbccddeeff0 -+vlebrh 7 -+ 0112233445566778 899aabbccdde2301 -+ 0112233445566778 899aabbccddedcfe -+vlebrh 2 -+ 0112233423016778 899aabbccddeeff0 -+ 01122334dcfe6778 899aabbccddeeff0 -+vlebrf 0 -+ 6745230145566778 899aabbccddeeff0 -+ 98badcfe45566778 899aabbccddeeff0 -+vlebrf 3 -+ 0112233445566778 899aabbc67452301 -+ 0112233445566778 899aabbc98badcfe -+vlebrf 1 -+ 0112233467452301 899aabbccddeeff0 -+ 0112233498badcfe 899aabbccddeeff0 -+vlebrg 0 -+ efcdab8967452301 899aabbccddeeff0 -+ 1032547698badcfe 899aabbccddeeff0 -+vlebrg 1 -+ 0112233445566778 efcdab8967452301 -+ 0112233445566778 1032547698badcfe -+vllebrz 1 -+ 0000000000002301 0000000000000000 -+ 000000000000dcfe 0000000000000000 -+vllebrz 2 -+ 0000000067452301 0000000000000000 -+ 0000000098badcfe 0000000000000000 -+vllebrz 3 -+ efcdab8967452301 0000000000000000 -+ 1032547698badcfe 0000000000000000 -+vllebrz 6 -+ 6745230100000000 0000000000000000 -+ 98badcfe00000000 0000000000000000 -+vlbrrep 1 -+ 2301230123012301 2301230123012301 -+ dcfedcfedcfedcfe dcfedcfedcfedcfe -+vlbrrep 2 -+ 6745230167452301 6745230167452301 -+ 98badcfe98badcfe 98badcfe98badcfe -+vlbrrep 3 -+ efcdab8967452301 efcdab8967452301 -+ 1032547698badcfe 1032547698badcfe -+vlbr 1 -+ 23016745ab89efcd dcfe98ba54761032 -+ dcfe98ba54761032 23016745ab89efcd -+vlbr 2 -+ 67452301efcdab89 98badcfe10325476 -+ 98badcfe10325476 67452301efcdab89 -+vlbr 3 -+ efcdab8967452301 1032547698badcfe -+ 1032547698badcfe efcdab8967452301 -+vlbr 4 -+ 1032547698badcfe efcdab8967452301 -+ efcdab8967452301 1032547698badcfe -+vler 1 -+ 32107654ba98fedc cdef89ab45670123 -+ cdef89ab45670123 32107654ba98fedc -+vler 2 -+ 76543210fedcba98 89abcdef01234567 -+ 89abcdef01234567 76543210fedcba98 -+vler 3 -+ fedcba9876543210 0123456789abcdef -+ 0123456789abcdef fedcba9876543210 -+vstebrh 0 -+ 2301233445566778 899aabbccddeeff0 -+ dcfe233445566778 899aabbccddeeff0 -+vstebrh 7 -+ 1032233445566778 899aabbccddeeff0 -+ efcd233445566778 899aabbccddeeff0 -+vstebrh 2 -+ ab89233445566778 899aabbccddeeff0 -+ 5476233445566778 899aabbccddeeff0 -+vstebrf 0 -+ 6745230145566778 899aabbccddeeff0 -+ 98badcfe45566778 899aabbccddeeff0 -+vstebrf 3 -+ 1032547645566778 899aabbccddeeff0 -+ efcdab8945566778 899aabbccddeeff0 -+vstebrf 1 -+ efcdab8945566778 899aabbccddeeff0 -+ 1032547645566778 899aabbccddeeff0 -+vstebrg 0 -+ efcdab8967452301 899aabbccddeeff0 -+ 1032547698badcfe 899aabbccddeeff0 -+vstebrg 1 -+ 1032547698badcfe 899aabbccddeeff0 -+ efcdab8967452301 899aabbccddeeff0 -+vstbr 1 -+ 23016745ab89efcd dcfe98ba54761032 -+ dcfe98ba54761032 23016745ab89efcd -+vstbr 2 -+ 67452301efcdab89 98badcfe10325476 -+ 98badcfe10325476 67452301efcdab89 -+vstbr 3 -+ efcdab8967452301 1032547698badcfe -+ 1032547698badcfe efcdab8967452301 -+vstbr 4 -+ 1032547698badcfe efcdab8967452301 -+ efcdab8967452301 1032547698badcfe -+vster 1 -+ 32107654ba98fedc cdef89ab45670123 -+ cdef89ab45670123 32107654ba98fedc -+vster 2 -+ 76543210fedcba98 89abcdef01234567 -+ 89abcdef01234567 76543210fedcba98 -+vster 3 -+ fedcba9876543210 0123456789abcdef -+ 0123456789abcdef fedcba9876543210 -+vsld 0 -+ 0123456789abcdef fedcba9876543210 -+ fedcba9876543210 0123456789abcdef -+vsld 7 -+ 91a2b3c4d5e6f7ff 6e5d4c3b2a19087f -+ 6e5d4c3b2a190800 91a2b3c4d5e6f780 -+vsld 4 -+ 123456789abcdeff edcba9876543210f -+ edcba98765432100 123456789abcdef0 -+vsrd 0 -+ ffffffffffffffff ffffffffffffffff -+ 0123456789abcdef fedcba9876543210 -+vsrd 7 -+ 21ffffffffffffff ffffffffffffffff -+ de02468acf13579b dffdb97530eca864 -+vsrd 4 -+ 0fffffffffffffff ffffffffffffffff -+ f0123456789abcde ffedcba987654321 -+vcfpl 0 -+ 0x1.234568p+24 0x1.13579cp+31 0x1.fdb976p+31 0x1.d950c8p+30 -+ 0x1.00804p+31 0x1.00804p+27 0x1.feff8p+30 0x1.eff7fcp+31 -+vcfpl 8 -+ 0x1.234568p+24 - - - -+ 0x1.00804p+31 - - - -+vcfps 0 -+ 0x1.234568p+24 -0x1.d950c8p+30 -0x1.234568p+24 0x1.d950c8p+30 -+ -0x1.feff8p+30 0x1.00804p+27 0x1.feff8p+30 -0x1.00804p+27 -+vcfps 8 -+ 0x1.234568p+24 - - - -+ -0x1.feff8p+30 - - - -+vclfp 0 -+ 00ffffff 00000000 0000002a 00002710 -+ 00000004 00000003 00000002 00000001 -+vclfp 8 -+ 00ffffff - - - -+ 00000004 - - - -+vcsfp 0 -+ 00ffffff ff000001 0000002a 00002710 -+ 00000004 00000003 00000002 00000001 -+vcsfp 8 -+ 00ffffff - - - -+ 00000004 - - - -diff --git a/none/tests/s390x/vec2.vgtest b/none/tests/s390x/vec2.vgtest -new file mode 100644 -index 000000000..45e942e64 ---- /dev/null -+++ b/none/tests/s390x/vec2.vgtest -@@ -0,0 +1,2 @@ -+prog: vec2 -+prereq: test -e vec2 && ../../../tests/s390x_features s390x-vx -diff --git a/tests/s390x_features.c b/tests/s390x_features.c -index 25b98f3a3..e7939c463 100644 ---- a/tests/s390x_features.c -+++ b/tests/s390x_features.c -@@ -270,6 +270,10 @@ static int go(char *feature, char *cpu) - match = facilities[0] & FAC_BIT(57); /* message security assist 5 facility */ - } else if (strcmp(feature, "s390x-mi2") == 0 ) { - match = facilities[0] & FAC_BIT(58); -+ } else if (strcmp(feature, "s390x-mi3") == 0 ) { -+ match = facilities[0] & FAC_BIT(61); -+ } else if (strcmp(feature, "s390x-vx2") == 0 ) { -+ match = facilities[2] & FAC_BIT(20); - } else { - return 2; // Unrecognised feature. - } --- -2.23.0 - -From d9364bc90ee894c43ee742840f806571edc08ab3 Mon Sep 17 00:00:00 2001 -From: Andreas Arnez -Date: Tue, 18 May 2021 19:59:32 +0200 -Subject: [PATCH 13/13] s390x: Wrap up misc-insn-3 and vec-enh-2 support - -Wrap up support for the miscellaneous-instruction-extensions facility 3 -and the vector-enhancements facility 2: Add 'case' statements for the -remaining unhandled arch13 instructions to 'guest_s390_toIR.c', document -the new support in 's390-opcodes.csv', adjust 's390-check-opcodes.pl', and -announce the new feature in 'NEWS'. ---- - NEWS | 5 ++ - VEX/priv/guest_s390_toIR.c | 5 +- - auxprogs/s390-check-opcodes.pl | 22 ++++++++- - docs/internals/s390-opcodes.csv | 81 +++++++++++++++++++++++++++++++-- - 4 files changed, 108 insertions(+), 5 deletions(-) - -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index 46a867475..1bd18f760 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -8,7 +8,7 @@ - This file is part of Valgrind, a dynamic binary instrumentation - framework. - -- Copyright IBM Corp. 2010-2020 -+ Copyright IBM Corp. 2010-2021 - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as -@@ -20503,6 +20503,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) - RRE_r2(ovl)); goto ok; - case 0xb931: s390_format_RRE_RR(s390_irgen_CLGFR, RRE_r1(ovl), - RRE_r2(ovl)); goto ok; -+ case 0xb938: /* SORTL */ goto unimplemented; -+ case 0xb939: /* DFLTCC */ goto unimplemented; -+ case 0xb93a: /* KDSA */ goto unimplemented; - case 0xb93c: s390_format_RRE_RR(s390_irgen_PPNO, RRE_r1(ovl), - RRE_r2(ovl)); goto ok; - case 0xb93e: /* KIMD */ goto unimplemented; --- -2.23.0 - diff --git a/SOURCES/valgrind-3.17.0-s390_insn_as_string.patch b/SOURCES/valgrind-3.17.0-s390_insn_as_string.patch deleted file mode 100644 index 39c956c..0000000 --- a/SOURCES/valgrind-3.17.0-s390_insn_as_string.patch +++ /dev/null @@ -1,54 +0,0 @@ -commit 45873298ff2d17accc65654d64758360616aade5 -Author: Andreas Arnez -Date: Tue Mar 30 18:10:43 2021 +0200 - - s390x: Add missing UNOP insns to s390_insn_as_string - - Some unary operator insns are not handled by s390_insn_as_string(). If - they are encountered while the appropriate trace flag is set, a vpanic - occurs. Fix this: add handling for the missing insns. - -diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c -index 8762975b2..6e0734ae0 100644 ---- a/VEX/priv/host_s390_defs.c -+++ b/VEX/priv/host_s390_defs.c -@@ -7860,12 +7860,24 @@ s390_insn_as_string(const s390_insn *insn) - op = "v-vunpacku"; - break; - -- case S390_VEC_FLOAT_NEG: -- op = "v-vfloatneg"; -+ case S390_VEC_ABS: -+ op = "v-vabs"; - break; - -- case S390_VEC_FLOAT_SQRT: -- op = "v-vfloatsqrt"; -+ case S390_VEC_COUNT_LEADING_ZEROES: -+ op = "v-vclz"; -+ break; -+ -+ case S390_VEC_COUNT_TRAILING_ZEROES: -+ op = "v-vctz"; -+ break; -+ -+ case S390_VEC_COUNT_ONES: -+ op = "v-vpopct"; -+ break; -+ -+ case S390_VEC_FLOAT_NEG: -+ op = "v-vfloatneg"; - break; - - case S390_VEC_FLOAT_ABS: -@@ -7876,6 +7888,10 @@ s390_insn_as_string(const s390_insn *insn) - op = "v-vfloatnabs"; - break; - -+ case S390_VEC_FLOAT_SQRT: -+ op = "v-vfloatsqrt"; -+ break; -+ - default: - goto fail; - } diff --git a/SOURCES/valgrind-3.18.1-amd64-more-spec-rules.patch b/SOURCES/valgrind-3.18.1-amd64-more-spec-rules.patch new file mode 100644 index 0000000..87794ee --- /dev/null +++ b/SOURCES/valgrind-3.18.1-amd64-more-spec-rules.patch @@ -0,0 +1,105 @@ +commit 595341b150312d2407bd43304449bf39ec3e1fa8 +Author: Julian Seward +Date: Sat Nov 13 19:59:07 2021 +0100 + + amd64 front end: add more spec rules: + + S after SHRQ + Z after SHLQ + NZ after SHLQ + Z after SHLL + S after SHLL + + The lack of at least one of these was observed to cause occasional false + positives in Memcheck. + + Plus add commented-out cases so as to complete the set of 12 rules + {Z,NZ,S,NS} after {SHRQ,SHLQ,SHLL}. The commented-out ones are commented + out because I so far didn't find any use cases for them. + +diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c +index 9d61e7a0f..ba71c1b62 100644 +--- a/VEX/priv/guest_amd64_helpers.c ++++ b/VEX/priv/guest_amd64_helpers.c +@@ -1823,16 +1823,26 @@ IRExpr* guest_amd64_spechelper ( const HChar* function_name, + /*---------------- SHRQ ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondZ)) { +- /* SHRQ, then Z --> test dep1 == 0 */ ++ /* SHRQ, then Z --> test result[63:0] == 0 */ + return unop(Iop_1Uto64, + binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); + } + if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNZ)) { +- /* SHRQ, then NZ --> test dep1 != 0 */ ++ /* SHRQ, then NZ --> test result[63:0] != 0 */ + return unop(Iop_1Uto64, + binop(Iop_CmpNE64, cc_dep1, mkU64(0))); + } + ++ if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondS)) { ++ /* SHRQ, then S --> (ULong)result[63] (result is in dep1) */ ++ return binop(Iop_Shr64, cc_dep1, mkU8(63)); ++ } ++ // No known test case for this, hence disabled: ++ //if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNS)) { ++ // /* SHRQ, then NS --> (ULong) ~ result[63] */ ++ // vassert(0); ++ //} ++ + /*---------------- SHRL ----------------*/ + + if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondZ)) { +@@ -1881,6 +1891,52 @@ IRExpr* guest_amd64_spechelper ( const HChar* function_name, + // mkU32(0))); + //} + ++ /*---------------- SHLQ ----------------*/ ++ ++ if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondZ)) { ++ /* SHLQ, then Z --> test dep1 == 0 */ ++ return unop(Iop_1Uto64, ++ binop(Iop_CmpEQ64, cc_dep1, mkU64(0))); ++ } ++ if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondNZ)) { ++ /* SHLQ, then NZ --> test dep1 != 0 */ ++ return unop(Iop_1Uto64, ++ binop(Iop_CmpNE64, cc_dep1, mkU64(0))); ++ } ++ ++ //if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondS)) { ++ // /* SHLQ, then S --> (ULong)result[63] */ ++ // vassert(0); ++ //} ++ //if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondNS)) { ++ // /* SHLQ, then NS --> (ULong) ~ result[63] */ ++ // vassert(0); ++ //} ++ ++ /*---------------- SHLL ----------------*/ ++ ++ if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondZ)) { ++ /* SHLL, then Z --> test result[31:0] == 0 */ ++ return unop(Iop_1Uto64, ++ binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1), ++ mkU32(0))); ++ } ++ //if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondNZ)) { ++ // /* SHLL, then NZ --> test dep1 != 0 */ ++ // vassert(0); ++ //} ++ ++ if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondS)) { ++ /* SHLL, then S --> (ULong)result[31] */ ++ return binop(Iop_And64, ++ binop(Iop_Shr64, cc_dep1, mkU8(31)), ++ mkU64(1)); ++ } ++ //if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondNS)) { ++ // /* SHLL, then NS --> (ULong) ~ result[31] */ ++ // vassert(0); ++ //} ++ + /*---------------- COPY ----------------*/ + /* This can happen, as a result of amd64 FP compares: "comisd ... ; + jbe" for example. */ diff --git a/SOURCES/valgrind-3.18.1-arm64-atomic-align.patch b/SOURCES/valgrind-3.18.1-arm64-atomic-align.patch new file mode 100644 index 0000000..8cce35f --- /dev/null +++ b/SOURCES/valgrind-3.18.1-arm64-atomic-align.patch @@ -0,0 +1,163 @@ +commit 2be719921e700a9ac9b85f470ed87cb8adf8151b +Author: Julian Seward +Date: Sat Nov 13 09:27:01 2021 +0100 + + Bug 445415 - arm64 front end: alignment checks missing for atomic instructions. + + For the arm64 front end, none of the atomic instructions have address + alignment checks included in their IR. They all should. The effect of + missing alignment checks in the IR is that, since this IR will in most cases + be translated back to atomic instructions in the back end, we will get + alignment traps (SIGBUS) on the host side and not on the guest side, which is + (very) incorrect behaviour of the simulation. + + +diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c +index ee018c6a9..16a7e075f 100644 +--- a/VEX/priv/guest_arm64_toIR.c ++++ b/VEX/priv/guest_arm64_toIR.c +@@ -4833,6 +4833,34 @@ static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr ) + } + + ++/* Generate a SIGBUS followed by a restart of the current instruction if ++ `effective_addr` is `align`-aligned. This is required behaviour for atomic ++ instructions. This assumes that guest_RIP_curr_instr is set correctly! ++ ++ This is hardwired to generate SIGBUS because so far the only supported arm64 ++ (arm64-linux) does that. Should we need to later extend it to generate some ++ other signal, use the same scheme as with gen_SIGNAL_if_not_XX_aligned in ++ guest_amd64_toIR.c. */ ++static ++void gen_SIGBUS_if_not_XX_aligned ( IRTemp effective_addr, ULong align ) ++{ ++ if (align == 1) { ++ return; ++ } ++ vassert(align == 16 || align == 8 || align == 4 || align == 2); ++ stmt( ++ IRStmt_Exit( ++ binop(Iop_CmpNE64, ++ binop(Iop_And64,mkexpr(effective_addr),mkU64(align-1)), ++ mkU64(0)), ++ Ijk_SigBUS, ++ IRConst_U64(guest_PC_curr_instr), ++ OFFB_PC ++ ) ++ ); ++} ++ ++ + /* Generate a "standard 7" name, from bitQ and size. But also + allow ".1d" since that's occasionally useful. */ + static +@@ -6670,7 +6698,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + + IRTemp ea = newTemp(Ity_I64); + assign(ea, getIReg64orSP(nn)); +- /* FIXME generate check that ea is szB-aligned */ ++ gen_SIGBUS_if_not_XX_aligned(ea, szB); + + if (isLD && ss == BITS5(1,1,1,1,1)) { + IRTemp res = newTemp(ty); +@@ -6803,7 +6831,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + + IRTemp ea = newTemp(Ity_I64); + assign(ea, getIReg64orSP(nn)); +- /* FIXME generate check that ea is 2*elemSzB-aligned */ ++ gen_SIGBUS_if_not_XX_aligned(ea, fullSzB); + + if (isLD && ss == BITS5(1,1,1,1,1)) { + if (abiinfo->guest__use_fallback_LLSC) { +@@ -7044,7 +7072,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + + IRTemp ea = newTemp(Ity_I64); + assign(ea, getIReg64orSP(nn)); +- /* FIXME generate check that ea is szB-aligned */ ++ gen_SIGBUS_if_not_XX_aligned(ea, szB); + + if (isLD) { + IRTemp res = newTemp(ty); +@@ -7159,6 +7187,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + + IRTemp ea = newTemp(Ity_I64); + assign(ea, getIReg64orSP(nn)); ++ gen_SIGBUS_if_not_XX_aligned(ea, szB); + + // Insert barrier before loading for acquire and acquire-release variants: + // A and AL. +@@ -7266,6 +7295,10 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + IRType ty = integerIRTypeOfSize(szB); + Bool is64 = szB == 8; + ++ IRTemp ea = newTemp(Ity_I64); ++ assign(ea, getIReg64orSP(nn)); ++ gen_SIGBUS_if_not_XX_aligned(ea, szB); ++ + IRExpr *exp = narrowFrom64(ty, getIReg64orZR(ss)); + IRExpr *new = narrowFrom64(ty, getIReg64orZR(tt)); + +@@ -7275,7 +7308,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + // Store the result back if LHS remains unchanged in memory. + IRTemp old = newTemp(ty); + stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old, +- Iend_LE, getIReg64orSP(nn), ++ Iend_LE, mkexpr(ea), + /*expdHi*/NULL, exp, + /*dataHi*/NULL, new)) ); + +@@ -7307,6 +7340,10 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + if ((ss & 0x1) || (tt & 0x1)) { + /* undefined; fall through */ + } else { ++ IRTemp ea = newTemp(Ity_I64); ++ assign(ea, getIReg64orSP(nn)); ++ gen_SIGBUS_if_not_XX_aligned(ea, is64 ? 16 : 8); ++ + IRExpr *expLo = getIRegOrZR(is64, ss); + IRExpr *expHi = getIRegOrZR(is64, ss + 1); + IRExpr *newLo = getIRegOrZR(is64, tt); +@@ -7318,7 +7355,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + stmt(IRStmt_MBE(Imbe_Fence)); + + stmt( IRStmt_CAS(mkIRCAS(oldHi, oldLo, +- Iend_LE, getIReg64orSP(nn), ++ Iend_LE, mkexpr(ea), + expHi, expLo, + newHi, newLo)) ); + +diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c +index b65e27db4..39c6aaa46 100644 +--- a/VEX/priv/host_arm64_defs.c ++++ b/VEX/priv/host_arm64_defs.c +@@ -4033,6 +4033,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, + case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break; + case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break; + case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break; ++ case Ijk_SigBUS: trcval = VEX_TRC_JMP_SIGBUS; break; + //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break; + case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break; + /* We don't expect to see the following being assisted. */ +diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c +index 094e7e74b..82cb2d78c 100644 +--- a/VEX/priv/host_arm64_isel.c ++++ b/VEX/priv/host_arm64_isel.c +@@ -4483,6 +4483,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) + case Ijk_InvalICache: + case Ijk_FlushDCache: + case Ijk_SigTRAP: ++ case Ijk_SigBUS: + case Ijk_Yield: { + HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst)); + addInstr(env, ARM64Instr_XAssisted(r, amPC, cc, +@@ -4576,8 +4577,8 @@ static void iselNext ( ISelEnv* env, + case Ijk_InvalICache: + case Ijk_FlushDCache: + case Ijk_SigTRAP: +- case Ijk_Yield: +- { ++ case Ijk_SigBUS: ++ case Ijk_Yield: { + HReg r = iselIntExpr_R(env, next); + ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP); + addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk)); diff --git a/SOURCES/valgrind-3.18.1-arm64-atomics-rdm.patch b/SOURCES/valgrind-3.18.1-arm64-atomics-rdm.patch new file mode 100644 index 0000000..47cb91a --- /dev/null +++ b/SOURCES/valgrind-3.18.1-arm64-atomics-rdm.patch @@ -0,0 +1,16 @@ +diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c +index 1253cf588..482047c7a 100644 +--- a/VEX/priv/main_main.c ++++ b/VEX/priv/main_main.c +@@ -2163,11 +2163,6 @@ static void check_hwcaps ( VexArch arch, UInt hwcaps ) + if (have_fp16 != have_vfp16) + invalid_hwcaps(arch, hwcaps, + "Mismatch detected between scalar and vector FP16 features.\n"); +- Bool have_rdm = ((hwcaps & VEX_HWCAPS_ARM64_RDM) != 0); +- Bool have_atomics = ((hwcaps & VEX_HWCAPS_ARM64_ATOMICS) != 0); +- if (have_rdm != have_atomics) +- invalid_hwcaps(arch, hwcaps, +- "Mismatch detected between RDMA and atomics features.\n"); + return; + } + diff --git a/SOURCES/valgrind-3.18.1-arm64-doubleword-cas.patch b/SOURCES/valgrind-3.18.1-arm64-doubleword-cas.patch new file mode 100644 index 0000000..7cf0bf5 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-arm64-doubleword-cas.patch @@ -0,0 +1,121 @@ +commit 7dbe2fed72886874f2eaf57dc07929542ae55b58 +Author: Julian Seward +Date: Fri Nov 12 10:40:48 2021 +0100 + + Bug 445354 - arm64 backend: incorrect code emitted for doubleword CAS. + + The sequence of instructions emitted by the arm64 backend for doubleword + compare-and-swap is incorrect. This could lead to incorrect simulation of the + AArch8.1 atomic instructions (CASP, at least). It also causes failures in the + upcoming fix for v8.0 support for LD{,A}XP/ST{,L}XP in bug 444399, at least + when running with the fallback LL/SC implementation + (`--sim-hints=fallback-llsc`, or as autoselected at startup). In the worst + case it can cause segfaulting in the generated code, because it could jump + backwards unexpectedly far. + + The problem is the sequence emitted for ARM64in_CASP: + + * the jump offsets are incorrect, both for `bne out` (x 2) and `cbnz w1, loop`. + + * using w1 to hold the success indication of the stxp instruction trashes the + previous value in x1. But the value in x1 is an output of ARM64in_CASP, + hence one of the two output registers is corrupted. That confuses any code + downstream that want to inspect those values to find out whether or not the + transaction succeeded. + + The fixes are to + + * fix the branch offsets + + * use a different register to hold the stxp success indication. w3 is a + convenient check. + +diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c +index 5dccc0495..5657bcab9 100644 +--- a/VEX/priv/host_arm64_defs.c ++++ b/VEX/priv/host_arm64_defs.c +@@ -2271,6 +2271,7 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) + addHRegUse(u, HRmWrite, hregARM64_X1()); + addHRegUse(u, HRmWrite, hregARM64_X9()); + addHRegUse(u, HRmWrite, hregARM64_X8()); ++ addHRegUse(u, HRmWrite, hregARM64_X3()); + break; + case ARM64in_MFence: + return; +@@ -4254,16 +4255,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, + + -- always: + cmp x0, x8 // EB08001F +- bne out // 540000E1 (b.ne #28 ) ++ bne out // 540000A1 + cmp x1, x9 // EB09003F +- bne out // 540000A1 (b.ne #20 ) ++ bne out // 54000061 + + -- one of: +- stxp w1, x6, x7, [x2] // C8211C46 +- stxp w1, w6, w7, [x2] // 88211C46 ++ stxp w3, x6, x7, [x2] // C8231C46 ++ stxp w3, w6, w7, [x2] // 88231C46 + + -- always: +- cbnz w1, loop // 35FFFE81 (cbnz w1, #-48 ) ++ cbnz w3, loop // 35FFFF03 + out: + */ + switch (i->ARM64in.CASP.szB) { +@@ -4277,15 +4278,15 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, + default: vassert(0); + } + *p++ = 0xEB08001F; +- *p++ = 0x540000E1; +- *p++ = 0xEB09003F; + *p++ = 0x540000A1; ++ *p++ = 0xEB09003F; ++ *p++ = 0x54000061; + switch (i->ARM64in.CASP.szB) { +- case 8: *p++ = 0xC8211C46; break; +- case 4: *p++ = 0x88211C46; break; ++ case 8: *p++ = 0xC8231C46; break; ++ case 4: *p++ = 0x88231C46; break; + default: vassert(0); + } +- *p++ = 0x35FFFE81; ++ *p++ = 0x35FFFF03; + goto done; + } + case ARM64in_MFence: { +diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h +index f0737f2c6..01fb5708e 100644 +--- a/VEX/priv/host_arm64_defs.h ++++ b/VEX/priv/host_arm64_defs.h +@@ -720,6 +720,7 @@ typedef + Int szB; /* 1, 2, 4 or 8 */ + } StrEX; + /* x1 = CAS(x3(addr), x5(expected) -> x7(new)), ++ and trashes x8 + where x1[8*szB-1 : 0] == x5[8*szB-1 : 0] indicates success, + x1[8*szB-1 : 0] != x5[8*szB-1 : 0] indicates failure. + Uses x8 as scratch (but that's not allocatable). +@@ -738,7 +739,7 @@ typedef + -- if branch taken, failure; x1[[8*szB-1 : 0] holds old value + -- attempt to store + stxr w8, x7, [x3] +- -- if store successful, x1==0, so the eor is "x1 := x5" ++ -- if store successful, x8==0 + -- if store failed, branch back and try again. + cbne w8, loop + after: +@@ -746,6 +747,12 @@ typedef + struct { + Int szB; /* 1, 2, 4 or 8 */ + } CAS; ++ /* Doubleworld CAS, 2 x 32 bit or 2 x 64 bit ++ x0(oldLSW),x1(oldMSW) ++ = DCAS(x2(addr), x4(expectedLSW),x5(expectedMSW) ++ -> x6(newLSW),x7(newMSW)) ++ and trashes x8, x9 and x3 ++ */ + struct { + Int szB; /* 4 or 8 */ + } CASP; diff --git a/SOURCES/valgrind-3.18.1-arm64-ldaxp-stlxp.patch b/SOURCES/valgrind-3.18.1-arm64-ldaxp-stlxp.patch new file mode 100644 index 0000000..d118cc6 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-arm64-ldaxp-stlxp.patch @@ -0,0 +1,1440 @@ +commit 530df882b8f60ecacaf2b9b8a719f7ea1c1d1650 +Author: Julian Seward +Date: Fri Nov 12 12:13:45 2021 +0100 + + Bug 444399 - disInstr(arm64): unhandled instruction 0xC87F2D89 (LD{,A}XP and ST{,L}XP). + + This is unfortunately a big and complex patch, to implement LD{,A}XP and + ST{,L}XP. These were omitted from the original AArch64 v8.0 implementation + for unknown reasons. + + (Background) the patch is made significantly more complex because for AArch64 + we actually have two implementations of the underlying + Load-Linked/Store-Conditional (LL/SC) machinery: a "primary" implementation, + which translates LL/SC more or less directly into IR and re-emits them at the + back end, and a "fallback" implementation that implements LL/SC "manually", by + taking advantage of the fact that V serialises thread execution, so we can + "implement" LL/SC by simulating a reservation using fields LLSC_* in the guest + state, and invalidating the reservation at every thread switch. + + (Background) the fallback scheme is needed because the primary scheme is in + violation of the ARMv8 semantics in that it can (easily) introduce extra + memory references between the LL and SC, hence on some hardware causing the + reservation to always fail and so the simulated program to wind up looping + forever. + + For these instructions, big picture: + + * for the primary implementation, we take advantage of the fact that + IRStmt_LLSC allows I128 bit transactions to be represented. Hence we bundle + up the two 64-bit data elements into an I128 (or vice versa) and present a + single I128-typed IRStmt_LLSC in the IR. In the backend, those are + re-emitted as LDXP/STXP respectively. For LL/SC on 32-bit register pairs, + that bundling produces a single 64-bit item, and so the existing LL/SC + backend machinery handles it. The effect is that a doubleword 32-bit LL/SC + in the front end translates into a single 64-bit LL/SC in the back end. + Overall, though, the implementation is straightforward. + + * for the fallback implementation, it is necessary to extend the guest state + field `guest_LLSC_DATA` to represent a 128-bit transaction, by splitting it + into _DATA_LO64 and DATA_HI64. Then, the implementation is an exact + analogue of the fallback implementation for single-word LL/SC. It takes + advantage of the fact that the backend already supports 128-bit CAS, as + fixed in bug 445354. As with the primary implementation, doubleword 32-bit + LL/SC is bundled into a single 64-bit transaction. + + Detailed changes: + + * new arm64 guest state fields LLSC_DATA_LO64/LLSC_DATA_LO64 to replace + guest_LLSC_DATA + + * (ridealong fix) arm64 front end: a fix to a minor and harmless decoding bug + for the single-word LDX/STX case. + + * arm64 front end: IR generation for LD{,A}XP/ST{,L}XP: tedious and + longwinded, but per comments above, an exact(ish) analogue of the singleword + case + + * arm64 backend: new insns ARM64Instr_LdrEXP / ARM64Instr_StrEXP to wrap up 2 + x 64 exclusive loads/stores. Per comments above, there's no need to handle + the 2 x 32 case. + + * arm64 isel: translate I128-typed IRStmt_LLSC into the above two insns + + * arm64 isel: some auxiliary bits and pieces needed to handle I128 values; + this is standard doubleword isel stuff + + * arm64 isel: (ridealong fix): Ist_CAS: check for endianness of the CAS! + + * arm64 isel: (ridealong) a couple of formatting fixes + + * IR infrastructure: add support for I128 constants, done the same as V128 + constants + + * memcheck: handle shadow loads and stores for I128 values + + * testcase: memcheck/tests/atomic_incs.c: on arm64, also test 128-bit atomic + addition, to check we really have atomicity right + + * testcase: new test none/tests/arm64/ldxp_stxp.c, tests operation but not + atomicity. (Smoke test). + +diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c +index 12a1c5978..ee018c6a9 100644 +--- a/VEX/priv/guest_arm64_toIR.c ++++ b/VEX/priv/guest_arm64_toIR.c +@@ -1184,9 +1184,10 @@ static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e ) + #define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART) + #define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN) + +-#define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE) +-#define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR) +-#define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA) ++#define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE) ++#define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR) ++#define OFFB_LLSC_DATA_LO64 offsetof(VexGuestARM64State,guest_LLSC_DATA_LO64) ++#define OFFB_LLSC_DATA_HI64 offsetof(VexGuestARM64State,guest_LLSC_DATA_HI64) + + + /* ---------------- Integer registers ---------------- */ +@@ -6652,7 +6653,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while() + has to do this bit) + */ +- if (INSN(29,23) == BITS7(0,0,1,0,0,0,0) ++ if (INSN(29,24) == BITS6(0,0,1,0,0,0) + && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0) + && INSN(14,10) == BITS5(1,1,1,1,1)) { + UInt szBlg2 = INSN(31,30); +@@ -6678,7 +6679,8 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + // if it faults. + IRTemp loaded_data64 = newTemp(Ity_I64); + assign(loaded_data64, widenUto64(ty, loadLE(ty, mkexpr(ea)))); +- stmt( IRStmt_Put( OFFB_LLSC_DATA, mkexpr(loaded_data64) )); ++ stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64, mkexpr(loaded_data64) )); ++ stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64, mkU64(0) )); + stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) )); + stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(szB) )); + putIReg64orZR(tt, mkexpr(loaded_data64)); +@@ -6729,7 +6731,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + )); + // Fail if the data doesn't match the LL data + IRTemp llsc_data64 = newTemp(Ity_I64); +- assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64)); ++ assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64)); + stmt( IRStmt_Exit( + binop(Iop_CmpNE64, widenUto64(ty, loadLE(ty, mkexpr(ea))), + mkexpr(llsc_data64)), +@@ -6771,6 +6773,257 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn, + /* else fall through */ + } + ++ /* -------------------- LD{,A}XP -------------------- */ ++ /* -------------------- ST{,L}XP -------------------- */ ++ /* 31 30 29 23 20 15 14 9 4 ++ 1 sz 001000 011 11111 0 t2 n t1 LDXP Rt1, Rt2, [Xn|SP] ++ 1 sz 001000 011 11111 1 t2 n t1 LDAXP Rt1, Rt2, [Xn|SP] ++ 1 sz 001000 001 s 0 t2 n t1 STXP Ws, Rt1, Rt2, [Xn|SP] ++ 1 sz 001000 001 s 1 t2 n t1 STLXP Ws, Rt1, Rt2, [Xn|SP] ++ */ ++ /* See just above, "LD{,A}X{R,RH,RB} / ST{,L}X{R,RH,RB}", for detailed ++ comments about this implementation. Note the 'sz' field here is only 1 ++ bit; above, it is 2 bits, and has a different encoding. ++ */ ++ if (INSN(31,31) == 1 ++ && INSN(29,24) == BITS6(0,0,1,0,0,0) ++ && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,1)) { ++ Bool elemIs64 = INSN(30,30) == 1; ++ Bool isLD = INSN(22,22) == 1; ++ Bool isAcqOrRel = INSN(15,15) == 1; ++ UInt ss = INSN(20,16); ++ UInt tt2 = INSN(14,10); ++ UInt nn = INSN(9,5); ++ UInt tt1 = INSN(4,0); ++ ++ UInt elemSzB = elemIs64 ? 8 : 4; ++ UInt fullSzB = 2 * elemSzB; ++ IRType elemTy = integerIRTypeOfSize(elemSzB); ++ IRType fullTy = integerIRTypeOfSize(fullSzB); ++ ++ IRTemp ea = newTemp(Ity_I64); ++ assign(ea, getIReg64orSP(nn)); ++ /* FIXME generate check that ea is 2*elemSzB-aligned */ ++ ++ if (isLD && ss == BITS5(1,1,1,1,1)) { ++ if (abiinfo->guest__use_fallback_LLSC) { ++ // Fallback implementation of LL. ++ // Do the load first so we don't update any guest state if it ++ // faults. Assumes little-endian guest. ++ if (fullTy == Ity_I64) { ++ vassert(elemSzB == 4); ++ IRTemp loaded_data64 = newTemp(Ity_I64); ++ assign(loaded_data64, loadLE(fullTy, mkexpr(ea))); ++ stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64, mkexpr(loaded_data64) )); ++ stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64, mkU64(0) )); ++ stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) )); ++ stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(8) )); ++ putIReg64orZR(tt1, unop(Iop_32Uto64, ++ unop(Iop_64to32, ++ mkexpr(loaded_data64)))); ++ putIReg64orZR(tt2, unop(Iop_32Uto64, ++ unop(Iop_64HIto32, ++ mkexpr(loaded_data64)))); ++ } else { ++ vassert(elemSzB == 8 && fullTy == Ity_I128); ++ IRTemp loaded_data128 = newTemp(Ity_I128); ++ // Hack: do the load as V128 rather than I128 so as to avoid ++ // having to implement I128 loads in the arm64 back end. ++ assign(loaded_data128, unop(Iop_ReinterpV128asI128, ++ loadLE(Ity_V128, mkexpr(ea)))); ++ IRTemp loaded_data_lo64 = newTemp(Ity_I64); ++ IRTemp loaded_data_hi64 = newTemp(Ity_I64); ++ assign(loaded_data_lo64, unop(Iop_128to64, ++ mkexpr(loaded_data128))); ++ assign(loaded_data_hi64, unop(Iop_128HIto64, ++ mkexpr(loaded_data128))); ++ stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64, ++ mkexpr(loaded_data_lo64) )); ++ stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64, ++ mkexpr(loaded_data_hi64) )); ++ stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) )); ++ stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(16) )); ++ putIReg64orZR(tt1, mkexpr(loaded_data_lo64)); ++ putIReg64orZR(tt2, mkexpr(loaded_data_hi64)); ++ } ++ } else { ++ // Non-fallback implementation of LL. ++ IRTemp res = newTemp(fullTy); // I64 or I128 ++ stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/)); ++ // Assuming a little-endian guest here. Rt1 goes at the lower ++ // address, so it must live in the least significant half of `res`. ++ IROp opGetLO = fullTy == Ity_I128 ? Iop_128to64 : Iop_64to32; ++ IROp opGetHI = fullTy == Ity_I128 ? Iop_128HIto64 : Iop_64HIto32; ++ putIReg64orZR(tt1, widenUto64(elemTy, unop(opGetLO, mkexpr(res)))); ++ putIReg64orZR(tt2, widenUto64(elemTy, unop(opGetHI, mkexpr(res)))); ++ } ++ if (isAcqOrRel) { ++ stmt(IRStmt_MBE(Imbe_Fence)); ++ } ++ DIP("ld%sxp %s, %s, [%s] %s\n", ++ isAcqOrRel ? (isLD ? "a" : "l") : "", ++ nameIRegOrZR(elemSzB == 8, tt1), ++ nameIRegOrZR(elemSzB == 8, tt2), ++ nameIReg64orSP(nn), ++ abiinfo->guest__use_fallback_LLSC ++ ? "(fallback implementation)" : ""); ++ return True; ++ } ++ if (!isLD) { ++ if (isAcqOrRel) { ++ stmt(IRStmt_MBE(Imbe_Fence)); ++ } ++ if (abiinfo->guest__use_fallback_LLSC) { ++ // Fallback implementation of SC. ++ // This is really ugly, since we don't have any way to do ++ // proper if-then-else. First, set up as if the SC failed, ++ // and jump forwards if it really has failed. ++ ++ // Continuation address ++ IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4); ++ ++ // "the SC failed". Any non-zero value means failure. ++ putIReg64orZR(ss, mkU64(1)); ++ ++ IRTemp tmp_LLsize = newTemp(Ity_I64); ++ assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64)); ++ stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction" ++ )); ++ // Fail if no or wrong-size transaction ++ vassert((fullSzB == 8 && fullTy == Ity_I64) ++ || (fullSzB == 16 && fullTy == Ity_I128)); ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(fullSzB)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ // Fail if the address doesn't match the LL address ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, mkexpr(ea), ++ IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ // The data to be stored. ++ IRTemp store_data = newTemp(fullTy); ++ if (fullTy == Ity_I64) { ++ assign(store_data, ++ binop(Iop_32HLto64, ++ narrowFrom64(Ity_I32, getIReg64orZR(tt2)), ++ narrowFrom64(Ity_I32, getIReg64orZR(tt1)))); ++ } else { ++ assign(store_data, ++ binop(Iop_64HLto128, ++ getIReg64orZR(tt2), getIReg64orZR(tt1))); ++ } ++ ++ if (fullTy == Ity_I64) { ++ // 64 bit (2x32 bit) path ++ // Fail if the data in memory doesn't match the data stashed by ++ // the LL. ++ IRTemp llsc_data_lo64 = newTemp(Ity_I64); ++ assign(llsc_data_lo64, ++ IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64)); ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, loadLE(Ity_I64, mkexpr(ea)), ++ mkexpr(llsc_data_lo64)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ // Try to CAS the new value in. ++ IRTemp old = newTemp(Ity_I64); ++ IRTemp expd = newTemp(Ity_I64); ++ assign(expd, mkexpr(llsc_data_lo64)); ++ stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old, ++ Iend_LE, mkexpr(ea), ++ /*expdHi*/NULL, mkexpr(expd), ++ /*dataHi*/NULL, mkexpr(store_data) ++ ))); ++ // Fail if the CAS failed (viz, old != expd) ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, mkexpr(old), mkexpr(expd)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ } else { ++ // 128 bit (2x64 bit) path ++ // Fail if the data in memory doesn't match the data stashed by ++ // the LL. ++ IRTemp llsc_data_lo64 = newTemp(Ity_I64); ++ assign(llsc_data_lo64, ++ IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64)); ++ IRTemp llsc_data_hi64 = newTemp(Ity_I64); ++ assign(llsc_data_hi64, ++ IRExpr_Get(OFFB_LLSC_DATA_HI64, Ity_I64)); ++ IRTemp data_at_ea = newTemp(Ity_I128); ++ assign(data_at_ea, ++ unop(Iop_ReinterpV128asI128, ++ loadLE(Ity_V128, mkexpr(ea)))); ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, ++ unop(Iop_128to64, mkexpr(data_at_ea)), ++ mkexpr(llsc_data_lo64)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, ++ unop(Iop_128HIto64, mkexpr(data_at_ea)), ++ mkexpr(llsc_data_hi64)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ // Try to CAS the new value in. ++ IRTemp old_lo64 = newTemp(Ity_I64); ++ IRTemp old_hi64 = newTemp(Ity_I64); ++ IRTemp expd_lo64 = newTemp(Ity_I64); ++ IRTemp expd_hi64 = newTemp(Ity_I64); ++ IRTemp store_data_lo64 = newTemp(Ity_I64); ++ IRTemp store_data_hi64 = newTemp(Ity_I64); ++ assign(expd_lo64, mkexpr(llsc_data_lo64)); ++ assign(expd_hi64, mkexpr(llsc_data_hi64)); ++ assign(store_data_lo64, unop(Iop_128to64, mkexpr(store_data))); ++ assign(store_data_hi64, unop(Iop_128HIto64, mkexpr(store_data))); ++ stmt( IRStmt_CAS(mkIRCAS(old_hi64, old_lo64, ++ Iend_LE, mkexpr(ea), ++ mkexpr(expd_hi64), mkexpr(expd_lo64), ++ mkexpr(store_data_hi64), ++ mkexpr(store_data_lo64) ++ ))); ++ // Fail if the CAS failed (viz, old != expd) ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, mkexpr(old_lo64), mkexpr(expd_lo64)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ stmt( IRStmt_Exit( ++ binop(Iop_CmpNE64, mkexpr(old_hi64), mkexpr(expd_hi64)), ++ Ijk_Boring, nia, OFFB_PC ++ )); ++ } ++ // Otherwise we succeeded (!) ++ putIReg64orZR(ss, mkU64(0)); ++ } else { ++ // Non-fallback implementation of SC. ++ IRTemp res = newTemp(Ity_I1); ++ IRExpr* dataLO = narrowFrom64(elemTy, getIReg64orZR(tt1)); ++ IRExpr* dataHI = narrowFrom64(elemTy, getIReg64orZR(tt2)); ++ IROp opMerge = fullTy == Ity_I128 ? Iop_64HLto128 : Iop_32HLto64; ++ IRExpr* data = binop(opMerge, dataHI, dataLO); ++ // Assuming a little-endian guest here. Rt1 goes at the lower ++ // address, so it must live in the least significant half of `data`. ++ stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data)); ++ /* IR semantics: res is 1 if store succeeds, 0 if it fails. ++ Need to set rS to 1 on failure, 0 on success. */ ++ putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)), ++ mkU64(1))); ++ } ++ DIP("st%sxp %s, %s, %s, [%s] %s\n", ++ isAcqOrRel ? (isLD ? "a" : "l") : "", ++ nameIRegOrZR(False, ss), ++ nameIRegOrZR(elemSzB == 8, tt1), ++ nameIRegOrZR(elemSzB == 8, tt2), ++ nameIReg64orSP(nn), ++ abiinfo->guest__use_fallback_LLSC ++ ? "(fallback implementation)" : ""); ++ return True; ++ } ++ /* else fall through */ ++ } ++ + /* ------------------ LDA{R,RH,RB} ------------------ */ + /* ------------------ STL{R,RH,RB} ------------------ */ + /* 31 29 23 20 14 9 4 +diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c +index 5657bcab9..b65e27db4 100644 +--- a/VEX/priv/host_arm64_defs.c ++++ b/VEX/priv/host_arm64_defs.c +@@ -1059,6 +1059,16 @@ ARM64Instr* ARM64Instr_StrEX ( Int szB ) { + vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1); + return i; + } ++ARM64Instr* ARM64Instr_LdrEXP ( void ) { ++ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); ++ i->tag = ARM64in_LdrEXP; ++ return i; ++} ++ARM64Instr* ARM64Instr_StrEXP ( void ) { ++ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); ++ i->tag = ARM64in_StrEXP; ++ return i; ++} + ARM64Instr* ARM64Instr_CAS ( Int szB ) { + ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); + i->tag = ARM64in_CAS; +@@ -1699,12 +1709,19 @@ void ppARM64Instr ( const ARM64Instr* i ) { + sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w'); + return; + } ++ case ARM64in_LdrEXP: ++ vex_printf("ldxp x2, x3, [x4]"); ++ return; ++ case ARM64in_StrEXP: ++ vex_printf("stxp w0, x2, x3, [x4]"); ++ return; + case ARM64in_CAS: { + vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB); + return; + } + case ARM64in_CASP: { +- vex_printf("x0,x1 = casp(%dbit)(x2, x4,x5 -> x6,x7)", 8 * i->ARM64in.CASP.szB); ++ vex_printf("x0,x1 = casp(2x%dbit)(x2, x4,x5 -> x6,x7)", ++ 8 * i->ARM64in.CASP.szB); + return; + } + case ARM64in_MFence: +@@ -2253,6 +2270,17 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) + addHRegUse(u, HRmWrite, hregARM64_X0()); + addHRegUse(u, HRmRead, hregARM64_X2()); + return; ++ case ARM64in_LdrEXP: ++ addHRegUse(u, HRmRead, hregARM64_X4()); ++ addHRegUse(u, HRmWrite, hregARM64_X2()); ++ addHRegUse(u, HRmWrite, hregARM64_X3()); ++ return; ++ case ARM64in_StrEXP: ++ addHRegUse(u, HRmRead, hregARM64_X4()); ++ addHRegUse(u, HRmWrite, hregARM64_X0()); ++ addHRegUse(u, HRmRead, hregARM64_X2()); ++ addHRegUse(u, HRmRead, hregARM64_X3()); ++ return; + case ARM64in_CAS: + addHRegUse(u, HRmRead, hregARM64_X3()); + addHRegUse(u, HRmRead, hregARM64_X5()); +@@ -2571,6 +2599,10 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) + return; + case ARM64in_StrEX: + return; ++ case ARM64in_LdrEXP: ++ return; ++ case ARM64in_StrEXP: ++ return; + case ARM64in_CAS: + return; + case ARM64in_CASP: +@@ -4167,6 +4199,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, + } + goto bad; + } ++ case ARM64in_LdrEXP: { ++ // 820C7FC8 ldxp x2, x3, [x4] ++ *p++ = 0xC87F0C82; ++ goto done; ++ } ++ case ARM64in_StrEXP: { ++ // 820C20C8 stxp w0, x2, x3, [x4] ++ *p++ = 0xC8200C82; ++ goto done; ++ } + case ARM64in_CAS: { + /* This isn't simple. For an explanation see the comment in + host_arm64_defs.h on the definition of ARM64Instr case CAS. +diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h +index 01fb5708e..dc686dff7 100644 +--- a/VEX/priv/host_arm64_defs.h ++++ b/VEX/priv/host_arm64_defs.h +@@ -509,8 +509,10 @@ typedef + ARM64in_AddToSP, /* move SP by small, signed constant */ + ARM64in_FromSP, /* move SP to integer register */ + ARM64in_Mul, +- ARM64in_LdrEX, +- ARM64in_StrEX, ++ ARM64in_LdrEX, /* load exclusive, single register */ ++ ARM64in_StrEX, /* store exclusive, single register */ ++ ARM64in_LdrEXP, /* load exclusive, register pair, 2x64-bit only */ ++ ARM64in_StrEXP, /* store exclusive, register pair, 2x64-bit only */ + ARM64in_CAS, + ARM64in_CASP, + ARM64in_MFence, +@@ -719,6 +721,12 @@ typedef + struct { + Int szB; /* 1, 2, 4 or 8 */ + } StrEX; ++ /* LDXP x2, x3, [x4]. This is 2x64-bit only. */ ++ struct { ++ } LdrEXP; ++ /* STXP w0, x2, x3, [x4]. This is 2x64-bit only. */ ++ struct { ++ } StrEXP; + /* x1 = CAS(x3(addr), x5(expected) -> x7(new)), + and trashes x8 + where x1[8*szB-1 : 0] == x5[8*szB-1 : 0] indicates success, +@@ -1037,6 +1045,8 @@ extern ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR, + ARM64MulOp op ); + extern ARM64Instr* ARM64Instr_LdrEX ( Int szB ); + extern ARM64Instr* ARM64Instr_StrEX ( Int szB ); ++extern ARM64Instr* ARM64Instr_LdrEXP ( void ); ++extern ARM64Instr* ARM64Instr_StrEXP ( void ); + extern ARM64Instr* ARM64Instr_CAS ( Int szB ); + extern ARM64Instr* ARM64Instr_CASP ( Int szB ); + extern ARM64Instr* ARM64Instr_MFence ( void ); +diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c +index 4b1d8c846..094e7e74b 100644 +--- a/VEX/priv/host_arm64_isel.c ++++ b/VEX/priv/host_arm64_isel.c +@@ -196,9 +196,9 @@ static HReg iselCondCode_R ( ISelEnv* env, IRExpr* e ); + static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ); + static HReg iselIntExpr_R ( ISelEnv* env, IRExpr* e ); + +-static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, HReg* rLo, ++static void iselInt128Expr_wrk ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo, + ISelEnv* env, IRExpr* e ); +-static void iselInt128Expr ( /*OUT*/HReg* rHi, HReg* rLo, ++static void iselInt128Expr ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo, + ISelEnv* env, IRExpr* e ); + + static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ); +@@ -1759,9 +1759,12 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) + + /* AND/OR/XOR(e1, e2) (for any e1, e2) */ + switch (e->Iex.Binop.op) { +- case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop; +- case Iop_Or64: case Iop_Or32: case Iop_Or16: lop = ARM64lo_OR; goto log_binop; +- case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop; ++ case Iop_And64: case Iop_And32: ++ lop = ARM64lo_AND; goto log_binop; ++ case Iop_Or64: case Iop_Or32: case Iop_Or16: ++ lop = ARM64lo_OR; goto log_binop; ++ case Iop_Xor64: case Iop_Xor32: ++ lop = ARM64lo_XOR; goto log_binop; + log_binop: { + HReg dst = newVRegI(env); + HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1); +@@ -2013,6 +2016,11 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) + iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); + return rHi; /* and abandon rLo */ + } ++ case Iop_128to64: { ++ HReg rHi, rLo; ++ iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg); ++ return rLo; /* and abandon rHi */ ++ } + case Iop_8Sto32: case Iop_8Sto64: { + IRExpr* arg = e->Iex.Unop.arg; + HReg src = iselIntExpr_R(env, arg); +@@ -2185,13 +2193,19 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) + } + return dst; + } ++ case Iop_64HIto32: { ++ HReg dst = newVRegI(env); ++ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg); ++ addInstr(env, ARM64Instr_Shift(dst, src, ARM64RI6_I6(32), ++ ARM64sh_SHR)); ++ return dst; ++ } + case Iop_64to32: + case Iop_64to16: + case Iop_64to8: + case Iop_32to16: + /* These are no-ops. */ + return iselIntExpr_R(env, e->Iex.Unop.arg); +- + default: + break; + } +@@ -2335,6 +2349,43 @@ static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo, + vassert(e); + vassert(typeOfIRExpr(env->type_env,e) == Ity_I128); + ++ /* --------- TEMP --------- */ ++ if (e->tag == Iex_RdTmp) { ++ lookupIRTempPair(rHi, rLo, env, e->Iex.RdTmp.tmp); ++ return; ++ } ++ ++ /* --------- CONST --------- */ ++ if (e->tag == Iex_Const) { ++ IRConst* c = e->Iex.Const.con; ++ vassert(c->tag == Ico_U128); ++ if (c->Ico.U128 == 0) { ++ // The only case we need to handle (so far) ++ HReg zero = newVRegI(env); ++ addInstr(env, ARM64Instr_Imm64(zero, 0)); ++ *rHi = *rLo = zero; ++ return; ++ } ++ } ++ ++ /* --------- UNARY ops --------- */ ++ if (e->tag == Iex_Unop) { ++ switch (e->Iex.Unop.op) { ++ case Iop_ReinterpV128asI128: { ++ HReg dstHi = newVRegI(env); ++ HReg dstLo = newVRegI(env); ++ HReg src = iselV128Expr(env, e->Iex.Unop.arg); ++ addInstr(env, ARM64Instr_VXfromQ(dstHi, src, 1)); ++ addInstr(env, ARM64Instr_VXfromQ(dstLo, src, 0)); ++ *rHi = dstHi; ++ *rLo = dstLo; ++ return; ++ } ++ default: ++ break; ++ } ++ } ++ + /* --------- BINARY ops --------- */ + if (e->tag == Iex_Binop) { + switch (e->Iex.Binop.op) { +@@ -4086,6 +4137,14 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) + addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src)); + return; + } ++ if (ty == Ity_I128) { ++ HReg rHi, rLo, dstHi, dstLo; ++ iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data); ++ lookupIRTempPair( &dstHi, &dstLo, env, tmp); ++ addInstr(env, ARM64Instr_MovI(dstHi, rHi)); ++ addInstr(env, ARM64Instr_MovI(dstLo, rLo)); ++ return; ++ } + if (ty == Ity_V128) { + HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data); + HReg dst = lookupIRTemp(env, tmp); +@@ -4183,42 +4242,67 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) + /* LL */ + IRTemp res = stmt->Ist.LLSC.result; + IRType ty = typeOfIRTemp(env->type_env, res); +- if (ty == Ity_I64 || ty == Ity_I32 ++ if (ty == Ity_I128 || ty == Ity_I64 || ty == Ity_I32 + || ty == Ity_I16 || ty == Ity_I8) { + Int szB = 0; +- HReg r_dst = lookupIRTemp(env, res); + HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr); + switch (ty) { +- case Ity_I8: szB = 1; break; +- case Ity_I16: szB = 2; break; +- case Ity_I32: szB = 4; break; +- case Ity_I64: szB = 8; break; +- default: vassert(0); ++ case Ity_I8: szB = 1; break; ++ case Ity_I16: szB = 2; break; ++ case Ity_I32: szB = 4; break; ++ case Ity_I64: szB = 8; break; ++ case Ity_I128: szB = 16; break; ++ default: vassert(0); ++ } ++ if (szB == 16) { ++ HReg r_dstMSword = INVALID_HREG; ++ HReg r_dstLSword = INVALID_HREG; ++ lookupIRTempPair(&r_dstMSword, &r_dstLSword, env, res); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr)); ++ addInstr(env, ARM64Instr_LdrEXP()); ++ addInstr(env, ARM64Instr_MovI(r_dstLSword, hregARM64_X2())); ++ addInstr(env, ARM64Instr_MovI(r_dstMSword, hregARM64_X3())); ++ } else { ++ vassert(szB != 0); ++ HReg r_dst = lookupIRTemp(env, res); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr)); ++ addInstr(env, ARM64Instr_LdrEX(szB)); ++ addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2())); + } +- addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr)); +- addInstr(env, ARM64Instr_LdrEX(szB)); +- addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2())); + return; + } + goto stmt_fail; + } else { + /* SC */ + IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata); +- if (tyd == Ity_I64 || tyd == Ity_I32 ++ if (tyd == Ity_I128 || tyd == Ity_I64 || tyd == Ity_I32 + || tyd == Ity_I16 || tyd == Ity_I8) { + Int szB = 0; +- HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); + HReg rA = iselIntExpr_R(env, stmt->Ist.LLSC.addr); + switch (tyd) { +- case Ity_I8: szB = 1; break; +- case Ity_I16: szB = 2; break; +- case Ity_I32: szB = 4; break; +- case Ity_I64: szB = 8; break; +- default: vassert(0); ++ case Ity_I8: szB = 1; break; ++ case Ity_I16: szB = 2; break; ++ case Ity_I32: szB = 4; break; ++ case Ity_I64: szB = 8; break; ++ case Ity_I128: szB = 16; break; ++ default: vassert(0); ++ } ++ if (szB == 16) { ++ HReg rD_MSword = INVALID_HREG; ++ HReg rD_LSword = INVALID_HREG; ++ iselInt128Expr(&rD_MSword, ++ &rD_LSword, env, stmt->Ist.LLSC.storedata); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD_LSword)); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rD_MSword)); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA)); ++ addInstr(env, ARM64Instr_StrEXP()); ++ } else { ++ vassert(szB != 0); ++ HReg rD = iselIntExpr_R(env, stmt->Ist.LLSC.storedata); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD)); ++ addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA)); ++ addInstr(env, ARM64Instr_StrEX(szB)); + } +- addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD)); +- addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA)); +- addInstr(env, ARM64Instr_StrEX(szB)); + } else { + goto stmt_fail; + } +@@ -4243,10 +4327,10 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) + + /* --------- ACAS --------- */ + case Ist_CAS: { +- if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) { ++ IRCAS* cas = stmt->Ist.CAS.details; ++ if (cas->oldHi == IRTemp_INVALID && cas->end == Iend_LE) { + /* "normal" singleton CAS */ + UChar sz; +- IRCAS* cas = stmt->Ist.CAS.details; + IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); + switch (ty) { + case Ity_I64: sz = 8; break; +@@ -4281,10 +4365,9 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) + addInstr(env, ARM64Instr_MovI(rOld, rResult)); + return; + } +- else { ++ if (cas->oldHi != IRTemp_INVALID && cas->end == Iend_LE) { + /* Paired register CAS, i.e. CASP */ + UChar sz; +- IRCAS* cas = stmt->Ist.CAS.details; + IRType ty = typeOfIRExpr(env->type_env, cas->dataLo); + switch (ty) { + case Ity_I64: sz = 8; break; +diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c +index 25566c41c..2d82c41a1 100644 +--- a/VEX/priv/ir_defs.c ++++ b/VEX/priv/ir_defs.c +@@ -76,6 +76,7 @@ void ppIRConst ( const IRConst* con ) + case Ico_U16: vex_printf( "0x%x:I16", (UInt)(con->Ico.U16)); break; + case Ico_U32: vex_printf( "0x%x:I32", (UInt)(con->Ico.U32)); break; + case Ico_U64: vex_printf( "0x%llx:I64", (ULong)(con->Ico.U64)); break; ++ case Ico_U128: vex_printf( "I128{0x%04x}", (UInt)(con->Ico.U128)); break; + case Ico_F32: u.f32 = con->Ico.F32; + vex_printf( "F32{0x%x}", u.i32); + break; +@@ -2266,6 +2267,13 @@ IRConst* IRConst_U64 ( ULong u64 ) + c->Ico.U64 = u64; + return c; + } ++IRConst* IRConst_U128 ( UShort con ) ++{ ++ IRConst* c = LibVEX_Alloc_inline(sizeof(IRConst)); ++ c->tag = Ico_U128; ++ c->Ico.U128 = con; ++ return c; ++} + IRConst* IRConst_F32 ( Float f32 ) + { + IRConst* c = LibVEX_Alloc_inline(sizeof(IRConst)); +@@ -4230,6 +4238,7 @@ IRType typeOfIRConst ( const IRConst* con ) + case Ico_U16: return Ity_I16; + case Ico_U32: return Ity_I32; + case Ico_U64: return Ity_I64; ++ case Ico_U128: return Ity_I128; + case Ico_F32: return Ity_F32; + case Ico_F32i: return Ity_F32; + case Ico_F64: return Ity_F64; +@@ -5129,7 +5138,7 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) + tyRes = typeOfIRTemp(tyenv, stmt->Ist.LLSC.result); + if (stmt->Ist.LLSC.storedata == NULL) { + /* it's a LL */ +- if (tyRes != Ity_I64 && tyRes != Ity_I32 ++ if (tyRes != Ity_I128 && tyRes != Ity_I64 && tyRes != Ity_I32 + && tyRes != Ity_I16 && tyRes != Ity_I8) + sanityCheckFail(bb,stmt,"Ist.LLSC(LL).result :: bogus"); + } else { +@@ -5137,7 +5146,7 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy ) + if (tyRes != Ity_I1) + sanityCheckFail(bb,stmt,"Ist.LLSC(SC).result: not :: Ity_I1"); + tyData = typeOfIRExpr(tyenv, stmt->Ist.LLSC.storedata); +- if (tyData != Ity_I64 && tyData != Ity_I32 ++ if (tyData != Ity_I128 && tyData != Ity_I64 && tyData != Ity_I32 + && tyData != Ity_I16 && tyData != Ity_I8) + sanityCheckFail(bb,stmt, + "Ist.LLSC(SC).result :: storedata bogus"); +@@ -5385,6 +5394,7 @@ Int sizeofIRType ( IRType ty ) + IRType integerIRTypeOfSize ( Int szB ) + { + switch (szB) { ++ case 16: return Ity_I128; + case 8: return Ity_I64; + case 4: return Ity_I32; + case 2: return Ity_I16; +diff --git a/VEX/pub/libvex_guest_arm64.h b/VEX/pub/libvex_guest_arm64.h +index 39b6ecdc2..91d06bd75 100644 +--- a/VEX/pub/libvex_guest_arm64.h ++++ b/VEX/pub/libvex_guest_arm64.h +@@ -157,14 +157,18 @@ typedef + note of bits 23 and 22. */ + UInt guest_FPCR; + +- /* Fallback LL/SC support. See bugs 344524 and 369459. */ +- ULong guest_LLSC_SIZE; // 0==no current transaction, else 1,2,4 or 8. ++ /* Fallback LL/SC support. See bugs 344524 and 369459. _LO64 and _HI64 ++ contain the original contents of _ADDR+0 .. _ADDR+15, but only _SIZE ++ number of bytes of it. The remaining 16-_SIZE bytes of them must be ++ zero. */ ++ ULong guest_LLSC_SIZE; // 0==no current transaction, else 1,2,4,8 or 16. + ULong guest_LLSC_ADDR; // Address of transaction. +- ULong guest_LLSC_DATA; // Original value at _ADDR, zero-extended. ++ ULong guest_LLSC_DATA_LO64; // Original value at _ADDR+0. ++ ULong guest_LLSC_DATA_HI64; // Original value at _ADDR+8. + + /* Padding to make it have an 16-aligned size */ + /* UInt pad_end_0; */ +- ULong pad_end_1; ++ /* ULong pad_end_1; */ + } + VexGuestARM64State; + +diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h +index deaa044c1..85805bb69 100644 +--- a/VEX/pub/libvex_ir.h ++++ b/VEX/pub/libvex_ir.h +@@ -269,6 +269,8 @@ typedef + Ico_U16, + Ico_U32, + Ico_U64, ++ Ico_U128, /* 128-bit restricted integer constant, ++ same encoding scheme as V128 */ + Ico_F32, /* 32-bit IEEE754 floating */ + Ico_F32i, /* 32-bit unsigned int to be interpreted literally + as a IEEE754 single value. */ +@@ -295,6 +297,7 @@ typedef + UShort U16; + UInt U32; + ULong U64; ++ UShort U128; + Float F32; + UInt F32i; + Double F64; +@@ -311,6 +314,7 @@ extern IRConst* IRConst_U8 ( UChar ); + extern IRConst* IRConst_U16 ( UShort ); + extern IRConst* IRConst_U32 ( UInt ); + extern IRConst* IRConst_U64 ( ULong ); ++extern IRConst* IRConst_U128 ( UShort ); + extern IRConst* IRConst_F32 ( Float ); + extern IRConst* IRConst_F32i ( UInt ); + extern IRConst* IRConst_F64 ( Double ); +diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c +index 919c7fae8..176c8e5cb 100644 +--- a/memcheck/mc_machine.c ++++ b/memcheck/mc_machine.c +@@ -1115,9 +1115,10 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB ) + if (o == GOF(CMSTART) && sz == 8) return -1; // untracked + if (o == GOF(CMLEN) && sz == 8) return -1; // untracked + +- if (o == GOF(LLSC_SIZE) && sz == 8) return -1; // untracked +- if (o == GOF(LLSC_ADDR) && sz == 8) return o; +- if (o == GOF(LLSC_DATA) && sz == 8) return o; ++ if (o == GOF(LLSC_SIZE) && sz == 8) return -1; // untracked ++ if (o == GOF(LLSC_ADDR) && sz == 8) return o; ++ if (o == GOF(LLSC_DATA_LO64) && sz == 8) return o; ++ if (o == GOF(LLSC_DATA_HI64) && sz == 8) return o; + + VG_(printf)("MC_(get_otrack_shadow_offset)(arm64)(off=%d,sz=%d)\n", + offset,szB); +diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c +index c6fd2653f..72ccb3c8c 100644 +--- a/memcheck/mc_translate.c ++++ b/memcheck/mc_translate.c +@@ -5497,8 +5497,11 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, + the address (shadow) to 'defined' following the test. */ + complainIfUndefined( mce, addr, guard ); + +- /* Now cook up a call to the relevant helper function, to read the +- data V bits from shadow memory. */ ++ /* Now cook up a call to the relevant helper function, to read the data V ++ bits from shadow memory. Note that I128 loads are done by pretending ++ we're doing a V128 load, and then converting the resulting V128 vbits ++ word to an I128, right at the end of this function -- see `castedToI128` ++ below. (It's only a minor hack :-) This pertains to bug 444399. */ + ty = shadowTypeV(ty); + + void* helper = NULL; +@@ -5511,6 +5514,7 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, + hname = "MC_(helperc_LOADV256le)"; + ret_via_outparam = True; + break; ++ case Ity_I128: // fallthrough. See comment above. + case Ity_V128: helper = &MC_(helperc_LOADV128le); + hname = "MC_(helperc_LOADV128le)"; + ret_via_outparam = True; +@@ -5576,7 +5580,7 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, + + /* We need to have a place to park the V bits we're just about to + read. */ +- IRTemp datavbits = newTemp(mce, ty, VSh); ++ IRTemp datavbits = newTemp(mce, ty == Ity_I128 ? Ity_V128 : ty, VSh); + + /* Here's the call. */ + IRDirty* di; +@@ -5603,7 +5607,14 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce, + } + stmt( 'V', mce, IRStmt_Dirty(di) ); + +- return mkexpr(datavbits); ++ if (ty == Ity_I128) { ++ IRAtom* castedToI128 ++ = assignNew('V', mce, Ity_I128, ++ unop(Iop_ReinterpV128asI128, mkexpr(datavbits))); ++ return castedToI128; ++ } else { ++ return mkexpr(datavbits); ++ } + } + + +@@ -5631,6 +5642,7 @@ IRAtom* expr2vbits_Load ( MCEnv* mce, + case Ity_I16: + case Ity_I32: + case Ity_I64: ++ case Ity_I128: + case Ity_V128: + case Ity_V256: + return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard); +@@ -5928,6 +5940,7 @@ void do_shadow_Store ( MCEnv* mce, + c = IRConst_V256(V_BITS32_DEFINED); break; + case Ity_V128: // V128 weirdness -- used twice + c = IRConst_V128(V_BITS16_DEFINED); break; ++ case Ity_I128: c = IRConst_U128(V_BITS16_DEFINED); break; + case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break; + case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break; + case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break; +@@ -5948,6 +5961,7 @@ void do_shadow_Store ( MCEnv* mce, + switch (ty) { + case Ity_V256: /* we'll use the helper four times */ + case Ity_V128: /* we'll use the helper twice */ ++ case Ity_I128: /* we'll use the helper twice */ + case Ity_I64: helper = &MC_(helperc_STOREV64le); + hname = "MC_(helperc_STOREV64le)"; + break; +@@ -6051,9 +6065,9 @@ void do_shadow_Store ( MCEnv* mce, + stmt( 'V', mce, IRStmt_Dirty(diQ3) ); + + } +- else if (UNLIKELY(ty == Ity_V128)) { ++ else if (UNLIKELY(ty == Ity_V128 || ty == Ity_I128)) { + +- /* V128-bit case */ ++ /* V128/I128-bit case */ + /* See comment in next clause re 64-bit regparms */ + /* also, need to be careful about endianness */ + +@@ -6062,6 +6076,7 @@ void do_shadow_Store ( MCEnv* mce, + IRAtom *addrLo64, *addrHi64; + IRAtom *vdataLo64, *vdataHi64; + IRAtom *eBiasLo64, *eBiasHi64; ++ IROp opGetLO64, opGetHI64; + + if (end == Iend_LE) { + offLo64 = 0; +@@ -6071,9 +6086,17 @@ void do_shadow_Store ( MCEnv* mce, + offHi64 = 0; + } + ++ if (ty == Ity_V128) { ++ opGetLO64 = Iop_V128to64; ++ opGetHI64 = Iop_V128HIto64; ++ } else { ++ opGetLO64 = Iop_128to64; ++ opGetHI64 = Iop_128HIto64; ++ } ++ + eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64); + addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) ); +- vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata)); ++ vdataLo64 = assignNew('V', mce, Ity_I64, unop(opGetLO64, vdata)); + diLo64 = unsafeIRDirty_0_N( + 1/*regparms*/, + hname, VG_(fnptr_to_fnentry)( helper ), +@@ -6081,7 +6104,7 @@ void do_shadow_Store ( MCEnv* mce, + ); + eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64); + addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) ); +- vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata)); ++ vdataHi64 = assignNew('V', mce, Ity_I64, unop(opGetHI64, vdata)); + diHi64 = unsafeIRDirty_0_N( + 1/*regparms*/, + hname, VG_(fnptr_to_fnentry)( helper ), +@@ -6888,7 +6911,7 @@ static void do_shadow_LLSC ( MCEnv* mce, + /* Just treat this as a normal load, followed by an assignment of + the value to .result. */ + /* Stay sane */ +- tl_assert(resTy == Ity_I64 || resTy == Ity_I32 ++ tl_assert(resTy == Ity_I128 || resTy == Ity_I64 || resTy == Ity_I32 + || resTy == Ity_I16 || resTy == Ity_I8); + assign( 'V', mce, resTmp, + expr2vbits_Load( +@@ -6899,7 +6922,7 @@ static void do_shadow_LLSC ( MCEnv* mce, + /* Stay sane */ + IRType dataTy = typeOfIRExpr(mce->sb->tyenv, + stStoredata); +- tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32 ++ tl_assert(dataTy == Ity_I128 || dataTy == Ity_I64 || dataTy == Ity_I32 + || dataTy == Ity_I16 || dataTy == Ity_I8); + do_shadow_Store( mce, stEnd, + stAddr, 0/* addr bias */, +@@ -7684,7 +7707,7 @@ static void schemeS ( MCEnv* mce, IRStmt* st ) + = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result); + IRExpr* vanillaLoad + = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr); +- tl_assert(resTy == Ity_I64 || resTy == Ity_I32 ++ tl_assert(resTy == Ity_I128 || resTy == Ity_I64 || resTy == Ity_I32 + || resTy == Ity_I16 || resTy == Ity_I8); + assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result), + schemeE(mce, vanillaLoad)); +diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am +index 449710020..2b43ef7d7 100644 +--- a/memcheck/tests/Makefile.am ++++ b/memcheck/tests/Makefile.am +@@ -90,6 +90,7 @@ EXTRA_DIST = \ + addressable.stderr.exp addressable.stdout.exp addressable.vgtest \ + atomic_incs.stderr.exp atomic_incs.vgtest \ + atomic_incs.stdout.exp-32bit atomic_incs.stdout.exp-64bit \ ++ atomic_incs.stdout.exp-64bit-and-128bit \ + badaddrvalue.stderr.exp \ + badaddrvalue.stdout.exp badaddrvalue.vgtest \ + exit_on_first_error.stderr.exp \ +diff --git a/memcheck/tests/atomic_incs.c b/memcheck/tests/atomic_incs.c +index f931750f4..1c738c530 100644 +--- a/memcheck/tests/atomic_incs.c ++++ b/memcheck/tests/atomic_incs.c +@@ -22,6 +22,17 @@ + #define NNN 3456987 + + #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7)) ++#define IS_16_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 15)) ++ ++// U128 from libvex_basictypes.h is a 4-x-UInt array, which is a bit ++// inconvenient, hence: ++typedef ++ struct { ++ // assuming little-endianness ++ unsigned long long int lo64; ++ unsigned long long int hi64; ++ } ++ MyU128; + + + __attribute__((noinline)) void atomic_add_8bit ( char* p, int n ) +@@ -712,6 +723,40 @@ __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n ) + #endif + } + ++__attribute__((noinline)) void atomic_add_128bit ( MyU128* p, ++ unsigned long long int n ) ++{ ++#if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32) \ ++ || defined (VGA_nanomips) || defined(VGA_mips64) \ ++ || defined(VGA_amd64) \ ++ || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ ++ || defined(VGA_arm) \ ++ || defined(VGA_s390x) ++ /* do nothing; is not supported */ ++#elif defined(VGA_arm64) ++ unsigned long long int block[3] ++ = { (unsigned long long int)p, (unsigned long long int)n, ++ 0xFFFFFFFFFFFFFFFFULL}; ++ do { ++ __asm__ __volatile__( ++ "mov x5, %0" "\n\t" // &block[0] ++ "ldr x9, [x5, #0]" "\n\t" // p ++ "ldr x10, [x5, #8]" "\n\t" // n ++ "ldxp x7, x8, [x9]" "\n\t" ++ "adds x7, x7, x10" "\n\t" ++ "adc x8, x8, xzr" "\n\t" ++ "stxp w4, x7, x8, [x9]" "\n\t" ++ "str x4, [x5, #16]" "\n\t" ++ : /*out*/ ++ : /*in*/ "r"(&block[0]) ++ : /*trash*/ "memory", "cc", "x5", "x7", "x8", "x9", "x10", "x4" ++ ); ++ } while (block[2] != 0); ++#else ++# error "Unsupported arch" ++#endif ++} ++ + int main ( int argc, char** argv ) + { + int i, status; +@@ -720,8 +765,12 @@ int main ( int argc, char** argv ) + short* p16; + int* p32; + long long int* p64; ++ MyU128* p128; + pid_t child, p2; + ++ assert(sizeof(MyU128) == 16); ++ assert(sysconf(_SC_PAGESIZE) >= 4096); ++ + printf("parent, pre-fork\n"); + + page = mmap( 0, sysconf(_SC_PAGESIZE), +@@ -736,11 +785,13 @@ int main ( int argc, char** argv ) + p16 = (short*)(page+256); + p32 = (int*)(page+512); + p64 = (long long int*)(page+768); ++ p128 = (MyU128*)(page+1024); + + assert( IS_8_ALIGNED(p8) ); + assert( IS_8_ALIGNED(p16) ); + assert( IS_8_ALIGNED(p32) ); + assert( IS_8_ALIGNED(p64) ); ++ assert( IS_16_ALIGNED(p128) ); + + memset(page, 0, 1024); + +@@ -748,6 +799,7 @@ int main ( int argc, char** argv ) + *p16 = 0; + *p32 = 0; + *p64 = 0; ++ p128->lo64 = p128->hi64 = 0; + + child = fork(); + if (child == -1) { +@@ -763,6 +815,7 @@ int main ( int argc, char** argv ) + atomic_add_16bit(p16, 1); + atomic_add_32bit(p32, 1); + atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ ++ atomic_add_128bit(p128, 0x1000000013374771ULL); // ditto re upper 64 + } + return 1; + /* NOTREACHED */ +@@ -778,6 +831,7 @@ int main ( int argc, char** argv ) + atomic_add_16bit(p16, 1); + atomic_add_32bit(p32, 1); + atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */ ++ atomic_add_128bit(p128, 0x1000000013374771ULL); // ditto re upper 64 + } + + p2 = waitpid(child, &status, 0); +@@ -788,11 +842,17 @@ int main ( int argc, char** argv ) + + printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n", + (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 ); ++ printf(" 128 bit 0x%016llx:0x%016llx\n", ++ p128->hi64, p128->lo64); + + if (-74 == (int)(*(signed char*)p8) + && 32694 == (int)(*p16) + && 6913974 == *p32 +- && (0LL == *p64 || 682858642110LL == *p64)) { ++ && (0LL == *p64 || 682858642110LL == *p64) ++ && ((0 == p128->hi64 && 0 == p128->lo64) ++ || (0x00000000000697fb == p128->hi64 ++ && 0x6007eb426316d956ULL == p128->lo64)) ++ ) { + printf("PASS\n"); + } else { + printf("FAIL -- see source code for expected values\n"); +diff --git a/memcheck/tests/atomic_incs.stdout.exp-32bit b/memcheck/tests/atomic_incs.stdout.exp-32bit +index c5b8781e5..55e5044b5 100644 +--- a/memcheck/tests/atomic_incs.stdout.exp-32bit ++++ b/memcheck/tests/atomic_incs.stdout.exp-32bit +@@ -3,5 +3,6 @@ child + parent, pre-fork + parent + FINAL VALUES: 8 bit -74, 16 bit 32694, 32 bit 6913974, 64 bit 0 ++ 128 bit 0x0000000000000000:0x0000000000000000 + PASS + parent exits +diff --git a/memcheck/tests/atomic_incs.stdout.exp-64bit b/memcheck/tests/atomic_incs.stdout.exp-64bit +index 82405c520..ca2f4fc97 100644 +--- a/memcheck/tests/atomic_incs.stdout.exp-64bit ++++ b/memcheck/tests/atomic_incs.stdout.exp-64bit +@@ -3,5 +3,6 @@ child + parent, pre-fork + parent + FINAL VALUES: 8 bit -74, 16 bit 32694, 32 bit 6913974, 64 bit 682858642110 ++ 128 bit 0x0000000000000000:0x0000000000000000 + PASS + parent exits +diff --git a/memcheck/tests/atomic_incs.stdout.exp-64bit-and-128bit b/memcheck/tests/atomic_incs.stdout.exp-64bit-and-128bit +new file mode 100644 +index 000000000..ef6580917 +--- /dev/null ++++ b/memcheck/tests/atomic_incs.stdout.exp-64bit-and-128bit +@@ -0,0 +1,8 @@ ++parent, pre-fork ++child ++parent, pre-fork ++parent ++FINAL VALUES: 8 bit -74, 16 bit 32694, 32 bit 6913974, 64 bit 682858642110 ++ 128 bit 0x00000000000697fb:0x6007eb426316d956 ++PASS ++parent exits +diff --git a/none/tests/arm64/Makefile.am b/none/tests/arm64/Makefile.am +index 00cbfa52c..9efb49b27 100644 +--- a/none/tests/arm64/Makefile.am ++++ b/none/tests/arm64/Makefile.am +@@ -12,7 +12,10 @@ EXTRA_DIST = \ + atomics_v81.stdout.exp atomics_v81.stderr.exp atomics_v81.vgtest \ + simd_v81.stdout.exp simd_v81.stderr.exp simd_v81.vgtest \ + fmadd_sub.stdout.exp fmadd_sub.stderr.exp fmadd_sub.vgtest \ +- fp_and_simd_v82.stdout.exp fp_and_simd_v82.stderr.exp fp_and_simd_v82.vgtest ++ fp_and_simd_v82.stdout.exp fp_and_simd_v82.stderr.exp \ ++ fp_and_simd_v82.vgtest \ ++ ldxp_stxp.stdout.exp ldxp_stxp.stderr.exp \ ++ ldxp_stxp_basisimpl.vgtest ldxp_stxp_fallbackimpl.vgtest + + check_PROGRAMS = \ + allexec \ +@@ -20,7 +23,8 @@ check_PROGRAMS = \ + fp_and_simd \ + integer \ + memory \ +- fmadd_sub ++ fmadd_sub \ ++ ldxp_stxp + + if BUILD_ARMV8_CRC_TESTS + check_PROGRAMS += crc32 +diff --git a/none/tests/arm64/ldxp_stxp.c b/none/tests/arm64/ldxp_stxp.c +new file mode 100644 +index 000000000..b5f6ea121 +--- /dev/null ++++ b/none/tests/arm64/ldxp_stxp.c +@@ -0,0 +1,93 @@ ++ ++/* Note, this is only a basic smoke test of LD{A}XP and ST{L}XP. Their ++ atomicity properties are tested by memcheck/tests/atomic_incs.c. */ ++ ++#include ++#include ++#include ++#include ++ ++typedef unsigned int UInt; ++typedef unsigned long long int ULong; ++ ++ ++void initBlock ( ULong* block ) ++{ ++ block[0] = 0x0001020304050607ULL; ++ block[1] = 0x1011121314151617ULL; ++ block[2] = 0x2021222324252627ULL; ++ block[3] = 0x3031323334353637ULL; ++ block[4] = 0x4041424344454647ULL; ++ block[5] = 0x5051525354555657ULL; ++} ++ ++void printBlock ( const char* who, ++ ULong* block, ULong rt1contents, ULong rt2contents, ++ UInt zeroIfSuccess ) ++{ ++ printf("Block %s (%s)\n", who, zeroIfSuccess == 0 ? "success" : "FAILURE" ); ++ for (int i = 0; i < 6; i++) { ++ printf("0x%016llx\n", block[i]); ++ } ++ printf("0x%016llx rt1contents\n", rt1contents); ++ printf("0x%016llx rt2contents\n", rt2contents); ++ printf("\n"); ++} ++ ++int main ( void ) ++{ ++ ULong* block = memalign(16, 6 * sizeof(ULong)); ++ assert(block); ++ ++ ULong rt1in, rt2in, rt1out, rt2out; ++ UInt scRes; ++ ++ // Do ldxp then stxp with x-registers ++ initBlock(block); ++ rt1in = 0x5555666677778888ULL; ++ rt2in = 0xAAAA9999BBBB0000ULL; ++ rt1out = 0x1111222233334444ULL; ++ rt2out = 0xFFFFEEEEDDDDCCCCULL; ++ scRes = 0x55555555; ++ __asm__ __volatile__( ++ "ldxp %1, %2, [%5]" "\n\t" ++ "stxp %w0, %3, %4, [%5]" "\n\t" ++ : /*OUT*/ ++ "=&r"(scRes), // %0 ++ "=&r"(rt1out), // %1 ++ "=&r"(rt2out) // %2 ++ : /*IN*/ ++ "r"(rt1in), // %3 ++ "r"(rt2in), // %4 ++ "r"(&block[2]) // %5 ++ : /*TRASH*/ ++ "memory","cc" ++ ); ++ printBlock("after ldxp/stxp 2x64-bit", block, rt1out, rt2out, scRes); ++ ++ // Do ldxp then stxp with w-registers ++ initBlock(block); ++ rt1in = 0x5555666677778888ULL; ++ rt2in = 0xAAAA9999BBBB0000ULL; ++ rt1out = 0x1111222233334444ULL; ++ rt2out = 0xFFFFEEEEDDDDCCCCULL; ++ scRes = 0x55555555; ++ __asm__ __volatile__( ++ "ldxp %w1, %w2, [%5]" "\n\t" ++ "stxp %w0, %w3, %w4, [%5]" "\n\t" ++ : /*OUT*/ ++ "=&r"(scRes), // %0 ++ "=&r"(rt1out), // %1 ++ "=&r"(rt2out) // %2 ++ : /*IN*/ ++ "r"(rt1in), // %3 ++ "r"(rt2in), // %4 ++ "r"(&block[2]) // %5 ++ : /*TRASH*/ ++ "memory","cc" ++ ); ++ printBlock("after ldxp/stxp 2x32-bit", block, rt1out, rt2out, scRes); ++ ++ free(block); ++ return 0; ++} +diff --git a/none/tests/arm64/ldxp_stxp_basisimpl.stderr.exp b/none/tests/arm64/ldxp_stxp_basisimpl.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/arm64/ldxp_stxp_basisimpl.stdout.exp b/none/tests/arm64/ldxp_stxp_basisimpl.stdout.exp +new file mode 100644 +index 000000000..f269ecdcc +--- /dev/null ++++ b/none/tests/arm64/ldxp_stxp_basisimpl.stdout.exp +@@ -0,0 +1,20 @@ ++Block after ldxp/stxp 2x64-bit (success) ++0x0001020304050607 ++0x1011121314151617 ++0x5555666677778888 ++0xaaaa9999bbbb0000 ++0x4041424344454647 ++0x5051525354555657 ++0x2021222324252627 rt1contents ++0x3031323334353637 rt2contents ++ ++Block after ldxp/stxp 2x32-bit (success) ++0x0001020304050607 ++0x1011121314151617 ++0xbbbb000077778888 ++0x3031323334353637 ++0x4041424344454647 ++0x5051525354555657 ++0x0000000024252627 rt1contents ++0x0000000020212223 rt2contents ++ +diff --git a/none/tests/arm64/ldxp_stxp_basisimpl.vgtest b/none/tests/arm64/ldxp_stxp_basisimpl.vgtest +new file mode 100644 +index 000000000..29133729a +--- /dev/null ++++ b/none/tests/arm64/ldxp_stxp_basisimpl.vgtest +@@ -0,0 +1,2 @@ ++prog: ldxp_stxp ++vgopts: -q +diff --git a/none/tests/arm64/ldxp_stxp_fallbackimpl.stderr.exp b/none/tests/arm64/ldxp_stxp_fallbackimpl.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/arm64/ldxp_stxp_fallbackimpl.stdout.exp b/none/tests/arm64/ldxp_stxp_fallbackimpl.stdout.exp +new file mode 100644 +index 000000000..f269ecdcc +--- /dev/null ++++ b/none/tests/arm64/ldxp_stxp_fallbackimpl.stdout.exp +@@ -0,0 +1,20 @@ ++Block after ldxp/stxp 2x64-bit (success) ++0x0001020304050607 ++0x1011121314151617 ++0x5555666677778888 ++0xaaaa9999bbbb0000 ++0x4041424344454647 ++0x5051525354555657 ++0x2021222324252627 rt1contents ++0x3031323334353637 rt2contents ++ ++Block after ldxp/stxp 2x32-bit (success) ++0x0001020304050607 ++0x1011121314151617 ++0xbbbb000077778888 ++0x3031323334353637 ++0x4041424344454647 ++0x5051525354555657 ++0x0000000024252627 rt1contents ++0x0000000020212223 rt2contents ++ +diff --git a/none/tests/arm64/ldxp_stxp_fallbackimpl.vgtest b/none/tests/arm64/ldxp_stxp_fallbackimpl.vgtest +new file mode 100644 +index 000000000..474282a03 +--- /dev/null ++++ b/none/tests/arm64/ldxp_stxp_fallbackimpl.vgtest +@@ -0,0 +1,2 @@ ++prog: ldxp_stxp ++vgopts: -q --sim-hints=fallback-llsc + +commit 0d38ca5dd6b446c70738031132d41f09de0f7a8a +Author: Julian Seward +Date: Fri Nov 12 13:08:45 2021 +0100 + + Bug 444399 - disInstr(arm64): unhandled instruction 0xC87F2D89 (LD{,A}XP and ST{,L}XP). FOLLOWUP FIX. + + This is an attempt to un-break 'make dist', as broken by the main commit for + this bug, which was 530df882b8f60ecacaf2b9b8a719f7ea1c1d1650. + +diff --git a/none/tests/arm64/Makefile.am b/none/tests/arm64/Makefile.am +index 9efb49b27..4a06f0996 100644 +--- a/none/tests/arm64/Makefile.am ++++ b/none/tests/arm64/Makefile.am +@@ -14,8 +14,10 @@ EXTRA_DIST = \ + fmadd_sub.stdout.exp fmadd_sub.stderr.exp fmadd_sub.vgtest \ + fp_and_simd_v82.stdout.exp fp_and_simd_v82.stderr.exp \ + fp_and_simd_v82.vgtest \ +- ldxp_stxp.stdout.exp ldxp_stxp.stderr.exp \ +- ldxp_stxp_basisimpl.vgtest ldxp_stxp_fallbackimpl.vgtest ++ ldxp_stxp_basisimpl.stdout.exp ldxp_stxp_basisimpl.stderr.exp \ ++ ldxp_stxp_basisimpl.vgtest \ ++ ldxp_stxp_fallbackimpl.stdout.exp ldxp_stxp_fallbackimpl.stderr.exp \ ++ ldxp_stxp_fallbackimpl.vgtest + + check_PROGRAMS = \ + allexec \ diff --git a/SOURCES/valgrind-3.18.1-condvar.patch b/SOURCES/valgrind-3.18.1-condvar.patch new file mode 100644 index 0000000..e129326 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-condvar.patch @@ -0,0 +1,284 @@ +commit 9abfed23c0d430aafb85de6397d171316c982792 +Author: Paul Floyd +Date: Fri Nov 19 08:34:53 2021 +0100 + + Bug 445504 Using C++ condition_variable results in bogus "mutex is locked simultaneously by two threads" warning(edit) + + Add intercepts for pthread_cond_clockwait to DRD and Helgrind + Also testcase from bugzilla done by Bart, with configure check + +diff --git a/configure.ac b/configure.ac +index e7381f205..cb836dbff 100755 +--- a/configure.ac ++++ b/configure.ac +@@ -1989,6 +1989,27 @@ AC_LANG(C) + + AM_CONDITIONAL(CXX_CAN_INCLUDE_THREAD_HEADER, test x$ac_cxx_can_include_thread_header = xyes) + ++# Check whether compiler can process #include without errors ++ ++AC_MSG_CHECKING([that C++ compiler can include header file]) ++AC_LANG(C++) ++safe_CXXFLAGS=$CXXFLAGS ++CXXFLAGS=-std=c++0x ++ ++AC_COMPILE_IFELSE([AC_LANG_SOURCE([ ++#include ++])], ++[ ++ac_cxx_can_include_condition_variable_header=yes ++AC_MSG_RESULT([yes]) ++], [ ++ac_cxx_can_include_condition_variable_header=no ++AC_MSG_RESULT([no]) ++]) ++CXXFLAGS=$safe_CXXFLAGS ++AC_LANG(C) ++ ++AM_CONDITIONAL(CXX_CAN_INCLUDE_CONDITION_VARIABLE_HEADER, test x$ac_cxx_can_include_condition_variable_header = xyes) + + # On aarch64 before glibc 2.20 we would get the kernel user_pt_regs instead + # of the user_regs_struct from sys/user.h. They are structurally the same +diff --git a/drd/drd_pthread_intercepts.c b/drd/drd_pthread_intercepts.c +index 8b4454364..95127b42c 100644 +--- a/drd/drd_pthread_intercepts.c ++++ b/drd/drd_pthread_intercepts.c +@@ -1175,6 +1175,30 @@ PTH_FUNCS(int, condZureltimedwait, pthread_cond_timedwait_intercept, + (cond, mutex, timeout)); + #endif /* VGO_solaris */ + ++ ++static __always_inline ++int pthread_cond_clockwait_intercept(pthread_cond_t *cond, ++ pthread_mutex_t *mutex, ++ clockid_t clockid, ++ const struct timespec* abstime) ++{ ++ int ret; ++ OrigFn fn; ++ VALGRIND_GET_ORIG_FN(fn); ++ VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__PRE_COND_WAIT, ++ cond, mutex, DRD_(mutex_type)(mutex), 0, 0); ++ CALL_FN_W_WWWW(ret, fn, cond, mutex, clockid, abstime); ++ VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__POST_COND_WAIT, ++ cond, mutex, 1, 0, 0); ++ return ret; ++} ++ ++PTH_FUNCS(int, pthreadZucondZuclockwait, pthread_cond_clockwait_intercept, ++ (pthread_cond_t *cond, pthread_mutex_t *mutex, ++ clockid_t clockid, const struct timespec* abstime), ++ (cond, mutex, clockid, abstime)); ++ ++ + // NOTE: be careful to intercept only pthread_cond_signal() and not Darwin's + // pthread_cond_signal_thread_np(). The former accepts one argument; the latter + // two. Intercepting all pthread_cond_signal* functions will cause only one +diff --git a/drd/tests/Makefile.am b/drd/tests/Makefile.am +index 4cb2f7f84..c804391e8 100755 +--- a/drd/tests/Makefile.am ++++ b/drd/tests/Makefile.am +@@ -105,6 +105,8 @@ EXTRA_DIST = \ + circular_buffer.vgtest \ + concurrent_close.stderr.exp \ + concurrent_close.vgtest \ ++ condvar.stderr.exp \ ++ condvar.vgtest \ + custom_alloc.stderr.exp \ + custom_alloc.vgtest \ + custom_alloc_fiw.stderr.exp \ +@@ -458,6 +460,11 @@ check_PROGRAMS += \ + endif + endif + ++if CXX_CAN_INCLUDE_CONDITION_VARIABLE_HEADER ++check_PROGRAMS += \ ++ condvar ++endif ++ + if HAVE_OPENMP + check_PROGRAMS += omp_matinv omp_prime omp_printf + endif +@@ -502,6 +509,8 @@ LDADD = -lpthread + + + bug322621_SOURCES = bug322621.cpp ++condvar_SOURCES = condvar.cpp ++condvar_CXXFLAGS = $(AM_CXXFLAGS) -std=c++0x + concurrent_close_SOURCES = concurrent_close.cpp + if !VGCONF_OS_IS_FREEBSD + dlopen_main_LDADD = -ldl +diff --git a/drd/tests/condvar.cpp b/drd/tests/condvar.cpp +new file mode 100644 +index 000000000..18ecb3f8a +--- /dev/null ++++ b/drd/tests/condvar.cpp +@@ -0,0 +1,55 @@ ++/* See also https://bugs.kde.org/show_bug.cgi?id=445504 */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++using lock_guard = std::lock_guard; ++using unique_lock = std::unique_lock; ++ ++struct state { ++ std::mutex m; ++ std::vector v; ++ std::condition_variable cv; ++ ++ state() { ++ // Call pthread_cond_init() explicitly to let DRD know about 'cv'. ++ pthread_cond_init(cv.native_handle(), NULL); ++ } ++}; ++ ++void other_thread(state *sp) { ++ state &s = *sp; ++ std::cerr << "Other thread: waiting for notify\n"; ++ unique_lock l{s.m}; ++ while (true) { ++ if (s.cv.wait_for(l, std::chrono::seconds(3)) != ++ std::cv_status::timeout) { ++ std::cerr << "Other thread: notified\n"; ++ break; ++ } ++ } ++ return; ++} ++ ++ ++int main() { ++ state s; ++ auto future = std::async(std::launch::async, other_thread, &s); ++ ++ if (future.wait_for(std::chrono::seconds(1)) != std::future_status::timeout) { ++ std::cerr << "Main: other thread returned too early!\n"; ++ return 2; ++ } ++ ++ { ++ std::lock_guard g{s.m}; ++ s.v.push_back(1); ++ s.v.push_back(2); ++ s.cv.notify_all(); ++ } ++ return 0; ++} +diff --git a/drd/tests/condvar.stderr.exp b/drd/tests/condvar.stderr.exp +new file mode 100644 +index 000000000..be1de9f97 +--- /dev/null ++++ b/drd/tests/condvar.stderr.exp +@@ -0,0 +1,5 @@ ++ ++Other thread: waiting for notify ++Other thread: notified ++ ++ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) +diff --git a/drd/tests/condvar.vgtest b/drd/tests/condvar.vgtest +new file mode 100644 +index 000000000..2e7d49f5a +--- /dev/null ++++ b/drd/tests/condvar.vgtest +@@ -0,0 +1,3 @@ ++prereq: ./supported_libpthread && [ -e condvar ] ++vgopts: --check-stack-var=yes --read-var-info=yes ++prog: condvar +diff --git a/helgrind/hg_intercepts.c b/helgrind/hg_intercepts.c +index 866efdbaa..49c3ddcd9 100644 +--- a/helgrind/hg_intercepts.c ++++ b/helgrind/hg_intercepts.c +@@ -1409,6 +1409,88 @@ static int pthread_cond_timedwait_WRK(pthread_cond_t* cond, + # error "Unsupported OS" + #endif + ++//----------------------------------------------------------- ++// glibc: pthread_cond_clockwait ++// ++__attribute__((noinline)) ++static int pthread_cond_clockwait_WRK(pthread_cond_t* cond, ++ pthread_mutex_t* mutex, ++ clockid_t clockid, ++ struct timespec* abstime, ++ int timeout_error) ++{ ++ int ret; ++ OrigFn fn; ++ unsigned long mutex_is_valid; ++ Bool abstime_is_valid; ++ VALGRIND_GET_ORIG_FN(fn); ++ ++ if (TRACE_PTH_FNS) { ++ fprintf(stderr, "<< pthread_cond_clockwait %p %p %p", ++ cond, mutex, abstime); ++ fflush(stderr); ++ } ++ ++ /* Tell the tool a cond-wait is about to happen, so it can check ++ for bogus argument values. In return it tells us whether it ++ thinks the mutex is valid or not. */ ++ DO_CREQ_W_WW(mutex_is_valid, ++ _VG_USERREQ__HG_PTHREAD_COND_WAIT_PRE, ++ pthread_cond_t*,cond, pthread_mutex_t*,mutex); ++ assert(mutex_is_valid == 1 || mutex_is_valid == 0); ++ ++ abstime_is_valid = abstime->tv_nsec >= 0 && abstime->tv_nsec < 1000000000; ++ ++ /* Tell the tool we're about to drop the mutex. This reflects the ++ fact that in a cond_wait, we show up holding the mutex, and the ++ call atomically drops the mutex and waits for the cv to be ++ signalled. */ ++ if (mutex_is_valid && abstime_is_valid) { ++ DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_PRE, ++ pthread_mutex_t*,mutex); ++ } ++ ++ CALL_FN_W_WWWW(ret, fn, cond,mutex,clockid,abstime); ++ ++ if (mutex_is_valid && !abstime_is_valid && ret != EINVAL) { ++ DO_PthAPIerror("Bug in libpthread: pthread_cond_clockwait " ++ "invalid abstime did not cause" ++ " EINVAL", ret); ++ } ++ ++ if (mutex_is_valid && abstime_is_valid) { ++ /* and now we have the mutex again if (ret == 0 || ret == timeout) */ ++ DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST, ++ pthread_mutex_t *, mutex, ++ long, (ret == 0 || ret == timeout_error) ? True : False); ++ } ++ ++ DO_CREQ_v_WWWW(_VG_USERREQ__HG_PTHREAD_COND_WAIT_POST, ++ pthread_cond_t*,cond, pthread_mutex_t*,mutex, ++ long,ret == timeout_error, ++ long, (ret == 0 || ret == timeout_error) && mutex_is_valid ++ ? True : False); ++ ++ if (ret != 0 && ret != timeout_error) { ++ DO_PthAPIerror( "pthread_cond_clockwait", ret ); ++ } ++ ++ if (TRACE_PTH_FNS) { ++ fprintf(stderr, " cotimedwait -> %d >>\n", ret); ++ } ++ ++ return ret; ++} ++ ++#if defined(VGO_linux) ++ PTH_FUNC(int, pthreadZucondZuclockwait, // pthread_cond_clockwait ++ pthread_cond_t* cond, pthread_mutex_t* mutex, ++ clockid_t clockid, ++ struct timespec* abstime) { ++ return pthread_cond_clockwait_WRK(cond, mutex, clockid, abstime, ETIMEDOUT); ++ } ++#endif ++ + + //----------------------------------------------------------- + // glibc: pthread_cond_signal@GLIBC_2.0 diff --git a/SOURCES/valgrind-3.18.1-demangle-namespace.patch b/SOURCES/valgrind-3.18.1-demangle-namespace.patch new file mode 100644 index 0000000..25ddf92 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-demangle-namespace.patch @@ -0,0 +1,35 @@ +commit 542447d4708d4418a08e678dcf467af92b90b7ad +Author: Mark Wielaard +Date: Mon Nov 22 13:07:59 2021 +0100 + + readdwarf3.c (parse_inl_DIE) inlined_subroutine can appear in namespaces + + This was broken by commit 75e3ef0f3 "readdwarf3: Skip units without + addresses when looking for inlined functions". Specifically by this + part: "Also use skip_DIE instead of read_DIE when not parsing + (skipping) children" + + rustc puts concrete function instances in namespaces (which is + allowed in DWARF since there is no strict separation between type + declarations and program scope entries in a DIE tree), the inline + parser didn't expect this and so skipped any DIE under a namespace + entry. This wasn't an issue before because "skipping" a DIE tree was + done by reading it, so it wasn't actually skipped. But now that we + really skip the DIE (sub)tree (which is faster than actually parsing + it) some entries were missed in the rustc case. + + https://bugs.kde.org/show_bug.cgi?id=445668 + +diff --git a/coregrind/m_debuginfo/readdwarf3.c b/coregrind/m_debuginfo/readdwarf3.c +index 18eecea9f..5489f8d13 100644 +--- a/coregrind/m_debuginfo/readdwarf3.c ++++ b/coregrind/m_debuginfo/readdwarf3.c +@@ -3358,7 +3358,7 @@ static Bool parse_inl_DIE ( + // might maybe contain a DW_TAG_inlined_subroutine: + Bool ret = (unit_has_addrs + || dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram +- || dtag == DW_TAG_inlined_subroutine); ++ || dtag == DW_TAG_inlined_subroutine || dtag == DW_TAG_namespace); + return ret; + + bad_DIE: diff --git a/SOURCES/valgrind-3.18.1-dhat-tests-copy.patch b/SOURCES/valgrind-3.18.1-dhat-tests-copy.patch new file mode 100644 index 0000000..8e183b9 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-dhat-tests-copy.patch @@ -0,0 +1,20 @@ +commit 33aba8eef68b1745d3de96b609ff8296b70d9a1c +Author: Paul Floyd +Date: Wed Oct 27 21:37:00 2021 +0200 + + Bug 444495 - dhat/tests/copy fails on s390x + + Add -fno-builtin to ensure that the copy functions get called and so dhat + can intercept and count them. + +diff --git a/dhat/tests/Makefile.am b/dhat/tests/Makefile.am +index 86a9b6d64..b86fc416d 100644 +--- a/dhat/tests/Makefile.am ++++ b/dhat/tests/Makefile.am +@@ -29,3 +29,6 @@ AM_CXXFLAGS += $(AM_FLAG_M3264_PRI) + # We don't care about uninitialized or unused malloc results + basic_CFLAGS = $(AM_CFLAGS) -Wno-uninitialized + big_CFLAGS = $(AM_CFLAGS) -Wno-unused-result ++ ++# Prevent the copying functions from being inlined ++copy_CFLAGS = $(AM_CFLAGS) -fno-builtin diff --git a/SOURCES/valgrind-3.18.1-gdbserver_tests-hwcap.patch b/SOURCES/valgrind-3.18.1-gdbserver_tests-hwcap.patch new file mode 100644 index 0000000..2d952cd --- /dev/null +++ b/SOURCES/valgrind-3.18.1-gdbserver_tests-hwcap.patch @@ -0,0 +1,25 @@ +commit 64ab89162906d5b9e2de6c3afe476fec861ef7ec +Author: Mark Wielaard +Date: Tue Nov 2 14:27:45 2021 +0100 + + gdbserver_tests: Filter out glibc hwcaps libc.so + + On some systems the gdbserver_tests would fail because the filter + for the optimized hwcaps subdir didn't match because the file is + called slightly differently, with the version number before .so + instead of after. For example: /lib64/glibc-hwcaps/power9/libc-2.28.so + + Add one extra filter for this pattern. + +diff --git a/gdbserver_tests/filter_gdb.in b/gdbserver_tests/filter_gdb.in +index d0c94f3f1..b753e0168 100755 +--- a/gdbserver_tests/filter_gdb.in ++++ b/gdbserver_tests/filter_gdb.in +@@ -134,6 +134,7 @@ s/in \(.__\)\{0,1\}select () from \/.*$/in syscall .../ + /^ from \/lib\/libc.so.*$/d + /^ from \/lib64\/libc.so.*$/d + /^ from \/lib64\/.*\/libc.so.*$/d ++/^ from \/lib64\/.*\/libc-.*.so/d + + # and yet another (gdb 7.0 way) to get a system call + s/in select ()$/in syscall .../ diff --git a/SOURCES/valgrind-3.18.1-ppc-pstq-tests.patch b/SOURCES/valgrind-3.18.1-ppc-pstq-tests.patch new file mode 100644 index 0000000..58498f2 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-ppc-pstq-tests.patch @@ -0,0 +1,1876 @@ +commit 3950c5d661ee09526cddcf24daf5fc22bc83f70c +Author: Carl Love +Date: Mon Nov 1 11:18:32 2021 -0500 + + Valgrind Add powerpc R=1 tests + + Contributed by Will Schmidt + + This includes updates and adjustments as suggested by Carl. + + Add tests that exercise PCRelative instructions. + These instructions are encoded with R==1, which indicate that + the memory accessed by the instruction is at a location + relative to the currently executing instruction. + + These tests are built using -Wl,-text and -Wl,-bss + options to ensure the location of the target array is at a + location with a specific offset from the currently + executing instruction. + + The write instructions are aimed at a large buffer in + the bss section; which is checked for updates at the + completion of each test. + + In order to ensure consistent output across assorted + systems, the tests have been padded with ori, nop instructions + and align directives. + + Detailed changes: + * Makefile.am: Add test_isa_3_1_R1_RT and test_isa_3_1_R1_XT tests. + * isa_3_1_helpers.h: Add identify_instruction_by_func_name() helper function + to indicate if the test is for R==1. + Add helpers to initialize and print changes to the pcrelative_write_target + array. + Add #define to help pad code with a series of eyecatcher ORI instructions. + * test_isa_3_1_R1_RT.c: New test. + * test_isa_3_1_R1_XT.c: New test. + * test_isa_3_1_R1_XT.stdout.exp: New expected output. + * test_isa_3_1_R1_XT.stdout.exp: New expected output. + * test_isa_3_1_R1_RT.stderr.exp: New expected output. + * test_isa_3_1_R1_RT.stderr.exp: New expected output. + + * test_isa_3_1_R1_RT.vgtest: New test handler. + * test_isa_3_1_R1_XT.vgtest: New test handler. + + * test_isa_3_1_common.c: Add indicators (updates_byte,updates_halfword, + updates_word) indicators to control the output from the R==1 tests. + Add helper check for "_R1" to indicate if instruction is coded with R==1. + Add init and print helpers for the pcrelative_write_target array. + +diff --git a/none/tests/ppc64/Makefile.am b/none/tests/ppc64/Makefile.am +index b709f3ef4..f8eab9fc0 100644 +--- a/none/tests/ppc64/Makefile.am ++++ b/none/tests/ppc64/Makefile.am +@@ -61,6 +61,8 @@ EXTRA_DIST = \ + test_isa_3_1_VRT.vgtest test_isa_3_1_VRT.stderr.exp test_isa_3_1_VRT.stdout.exp \ + test_isa_3_1_Misc.vgtest test_isa_3_1_Misc.stderr.exp test_isa_3_1_Misc.stdout.exp \ + test_isa_3_1_AT.vgtest test_isa_3_1_AT.stderr.exp test_isa_3_1_AT.stdout.exp \ ++ test_isa_3_1_R1_RT.vgtest test_isa_3_1_R1_RT.stderr.exp test_isa_3_1_R1_RT.stdout.exp \ ++ test_isa_3_1_R1_XT.vgtest test_isa_3_1_R1_XT.stderr.exp test_isa_3_1_R1_XT.stdout.exp \ + subnormal_test.stderr.exp subnormal_test.stdout.exp \ + subnormal_test.vgtest test_darn_inst.stderr.exp \ + test_darn_inst.stdout.exp test_darn_inst.vgtest \ +@@ -68,8 +70,8 @@ EXTRA_DIST = \ + test_copy_paste.stderr.exp test_copy_paste.stdout.exp \ + test_copy_paste.vgtest \ + test_mcrxrx.vgtest test_mcrxrx.stderr.exp test_mcrxrx.stdout.exp \ +- test_lxvx_stxvx.vgtest test_lxvx_stxvx.stderr.exp test_lxvx_stxvx.stdout.exp-p8 test_lxvx_stxvx.stdout.exp-p9 +- ++ test_lxvx_stxvx.vgtest test_lxvx_stxvx.stderr.exp \ ++ test_lxvx_stxvx.stdout.exp-p8 test_lxvx_stxvx.stdout.exp-p9 + + check_PROGRAMS = \ + allexec \ +@@ -80,11 +82,12 @@ check_PROGRAMS = \ + test_isa_3_0 test_mod_instructions \ + test_isa_3_1_RT test_isa_3_1_XT test_isa_3_1_VRT \ + test_isa_3_1_Misc test_isa_3_1_AT \ ++ test_isa_3_1_R1_RT test_isa_3_1_R1_XT \ + subnormal_test test_darn_inst test_copy_paste \ + test_tm test_touch_tm data-cache-instructions \ + std_reg_imm \ + twi_tdi tw_td power6_bcmp scv_test \ +- test_mcrxrx test_lxvx_stxvx ++ test_mcrxrx test_lxvx_stxvx + + # lmw, stmw, lswi, lswx, stswi, stswx compile (and run) only on big endian. + if VGCONF_PLATFORMS_INCLUDE_PPC64BE_LINUX +@@ -106,6 +109,8 @@ test_isa_3_1_RT_SOURCES = test_isa_3_1_RT.c test_isa_3_1_common.c + test_isa_3_1_VRT_SOURCES = test_isa_3_1_VRT.c test_isa_3_1_common.c + test_isa_3_1_Misc_SOURCES = test_isa_3_1_Misc.c test_isa_3_1_common.c + test_isa_3_1_AT_SOURCES = test_isa_3_1_AT.c test_isa_3_1_common.c ++test_isa_3_1_R1_XT_SOURCES = test_isa_3_1_R1_XT.c test_isa_3_1_common.c ++test_isa_3_1_R1_RT_SOURCES = test_isa_3_1_R1_RT.c test_isa_3_1_common.c + test_darn_inst_SOURCES = test_darn_inst.c + + if HAS_ALTIVEC +@@ -224,6 +229,11 @@ test_isa_3_1_VRT_CFLAGS = $(test_isa_3_1_CFLAGS) + test_isa_3_1_Misc_CFLAGS = $(test_isa_3_1_CFLAGS) + test_isa_3_1_AT_CFLAGS = $(test_isa_3_1_CFLAGS) + ++# The _R1_foo tests exercise pc-relative instructions, so require the bss and text sections ++# exist at known offsets with respect to each other. ++test_isa_3_1_R1_RT_CFLAGS = $(test_isa_3_1_CFLAGS) -Wl,-Tbss,0x20000 -Wl,-Ttext,0x40000 ++test_isa_3_1_R1_XT_CFLAGS = $(test_isa_3_1_CFLAGS) -Wl,-Tbss,0x20000 -Wl,-Ttext,0x40000 ++ + subnormal_test_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(VSX_FLAG) $(ISA_2_06_FLAG) \ + @FLAG_M64@ $(ALTIVEC_FLAG) $(BUILD_FLAG_VSX) $(BUILD_FLAGS_ISA_2_06) + +diff --git a/none/tests/ppc64/isa_3_1_helpers.h b/none/tests/ppc64/isa_3_1_helpers.h +index 338f55526..716a6277b 100644 +--- a/none/tests/ppc64/isa_3_1_helpers.h ++++ b/none/tests/ppc64/isa_3_1_helpers.h +@@ -43,6 +43,9 @@ extern void debug_show_current_iteration(); + extern void debug_dump_buffer(); + + extern void identify_form_components(const char *, const char *); ++extern void identify_instruction_by_func_name(const char *); ++extern void init_pcrelative_write_target(); ++extern void print_pcrelative_write_target(); + extern void dump_vsxargs(); + extern void generic_prologue(); + extern void build_args_table(); +@@ -58,6 +61,21 @@ extern void initialize_source_registers(); + extern void set_up_iterators(); + extern void initialize_buffer(int); + ++/* This (TEXT_BSS_DELTA) is the relative distance between those ++ sections as set by the linker options for the R==1 tests. */ ++#define TEXT_BSS_DELTA 0x20000 ++#define RELOC_BUFFER_SIZE 0x1000 ++extern unsigned long long pcrelative_buff_addr(int); ++#define PAD_ORI \ ++ __asm__ __volatile__ ("ori 21,21,21"); \ ++ __asm__ __volatile__ ("ori 22,22,22");\ ++ __asm__ __volatile__ ("ori 23,23,23");\ ++ __asm__ __volatile__ ("ori 24,24,24");\ ++ __asm__ __volatile__ ("ori 25,25,25");\ ++ __asm__ __volatile__ ("ori 26,26,26");\ ++ __asm__ __volatile__ ("ori 27,27,27");\ ++ __asm__ __volatile__ ("ori 28,28,28"); ++ + extern int verbose; + #define debug_printf(X) if (verbose>0) printf(X); + #define debug_show_labels (verbose>0) +diff --git a/none/tests/ppc64/test_isa_3_1_R1_RT.c b/none/tests/ppc64/test_isa_3_1_R1_RT.c +new file mode 100644 +index 000000000..d73b84b10 +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_RT.c +@@ -0,0 +1,624 @@ ++/* ++ * Valgrind testcase for PowerPC ISA 3.1 ++ * ++ * Copyright (C) 2019-2020 Will Schmidt ++ * ++ * 64bit build: ++ * gcc -Winline -Wall -g -O -mregnames -maltivec -m64 ++ */ ++ ++/* ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#include ++#ifdef HAS_ISA_3_1 ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++/* Condition Register fields. ++ These are used to capture the condition register values immediately after ++ the instruction under test is executed. This is done to help prevent other ++ test overhead (switch statements, result compares, etc) from disturbing ++ the test case results. */ ++unsigned long current_cr; ++unsigned long current_fpscr; ++ ++struct test_list_t current_test; ++ ++#include "isa_3_1_helpers.h" ++ ++static void test_plxvp_off0_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plxvp 20, +0(0),1" ); ++ PAD_ORI ++} ++static void test_plxvp_off8_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plxvp 20, +8(0),1" ); ++ PAD_ORI ++} ++static void test_plxvp_off16_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plxvp 20, +16(0),1" ); ++ PAD_ORI ++} ++static void test_plxvp_off24_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plxvp 20, +24(0),1" ); ++ PAD_ORI ++} ++static void test_plxvp_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plxvp 20, +32(0),1" ); ++ PAD_ORI ++} ++static void test_plbz_off0_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plbz %0, +0(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plbz_off8_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plbz %0, +8(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plbz_off16_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plbz %0, +16(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plbz_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plbz %0, +32(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plbz_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plbz %0, +64(0), 1" : "=r" (rt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plhz_off0_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plhz %0, +0(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plhz_off8_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plhz %0, +8(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plhz_off16_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plhz %0, +16(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plhz_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plhz %0, +32(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plhz_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plhz %0, +64(0), 1" : "=r" (rt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plha_off0_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plha %0, +0(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plha_off8_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plha %0, +8(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plha_off16_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plha %0, +16(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plha_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plha %0, +32(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plha_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plha %0, +64(0), 1" : "=r" (rt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plwz_off0_R1 (void) { ++ __asm__ __volatile__ ("plwz %0, +0(0), 1" : "=r" (rt) ); ++} ++static void test_plwz_off8_R1 (void) { ++ __asm__ __volatile__ ("plwz %0, +8(0), 1" : "=r" (rt) ); ++} ++static void test_plwz_off16_R1 (void) { ++ __asm__ __volatile__ ("plwz %0, +16(0), 1" : "=r" (rt) ); ++} ++static void test_plwz_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plwz %0, +32(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plwz_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plwz %0, +64(0), 1" : "=r" (rt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plwa_off0_R1 (void) { ++ __asm__ __volatile__ ("plwa %0, +0(0), 1" : "=r" (rt) ); ++} ++static void test_plwa_off8_R1 (void) { ++ __asm__ __volatile__ ("plwa %0, +8(0), 1" : "=r" (rt) ); ++} ++static void test_plwa_off16_R1 (void) { ++ __asm__ __volatile__ ("plwa %0, +16(0), 1" : "=r" (rt) ); ++} ++static void test_plwa_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plwa %0, +32(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_plwa_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plwa %0, +64(0), 1" : "=r" (rt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_pld_off0_R1 (void) { ++ __asm__ __volatile__ ("pld %0, +0(0), 1" : "=r" (rt) ); ++} ++static void test_pld_off8_R1 (void) { ++ __asm__ __volatile__ ("pld %0, +8(0), 1" : "=r" (rt) ); ++} ++static void test_pld_off16_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("pld %0, +16(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_pld_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("pld %0, +32(0), 1" : "=r" (rt) ); ++ PAD_ORI ++} ++static void test_pld_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("pld %0, +64(0), 1" : "=r" (rt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_pstb_off0_R1 (void) { ++ __asm__ __volatile__ ("pstb %0, -0x1f400+0(0), 1" :: "r" (rs) ); ++} ++static void test_pstb_off8_R1 (void) { ++ __asm__ __volatile__ ("pstb %0, -0x1f400+8(0), 1" :: "r" (rs) ); ++} ++static void test_pstb_off16_R1 (void) { ++ __asm__ __volatile__ ("pstb %0, -0x1f400+16(0), 1" :: "r" (rs) ); ++} ++static void test_pstb_off32_R1 (void) { ++ __asm__ __volatile__ ("pstb %0, -0x1f400+32(0), 1" :: "r" (rs) ); ++} ++static void test_psth_off0_R1 (void) { ++ __asm__ __volatile__ ("psth %0, -0x1f400+0(0), 1" :: "r" (rs) ); ++} ++static void test_psth_off8_R1 (void) { ++ __asm__ __volatile__ ("psth %0, -0x1f400+8(0), 1" :: "r" (rs) ); ++} ++static void test_psth_off16_R1 (void) { ++ __asm__ __volatile__ ("psth %0, -0x1f400+16(0), 1" :: "r" (rs) ); ++} ++static void test_psth_off32_R1 (void) { ++ __asm__ __volatile__ ("psth %0, -0x1f400+32(0), 1" :: "r" (rs) ); ++} ++static void test_pstw_off0_R1 (void) { ++ __asm__ __volatile__ ("pstw %0, -0x1f400+0(0), 1" :: "r" (rs) ); ++} ++static void test_pstw_off8_R1 (void) { ++ __asm__ __volatile__ ("pstw %0, -0x1f400+8(0), 1" :: "r" (rs) ); ++} ++static void test_pstw_off16_R1 (void) { ++ __asm__ __volatile__ ("pstw %0, -0x1f400+16(0), 1" :: "r" (rs) ); ++} ++static void test_pstw_off32_R1 (void) { ++ __asm__ __volatile__ ("pstw %0, -0x1f400+32(0), 1" :: "r" (rs) ); ++} ++static void test_pstd_off0_R1 (void) { ++ __asm__ __volatile__ ("pstd %0, -0x1f400+0(0), 1" :: "r" (rs) ); ++} ++static void test_pstd_off8_R1 (void) { ++ __asm__ __volatile__ ("pstd %0, -0x1f400+8(0), 1" :: "r" (rs) ); ++} ++static void test_pstd_off16_R1 (void) { ++ __asm__ __volatile__ ("pstd %0, -0x1f400+16(0), 1" :: "r" (rs) ); ++} ++static void test_pstd_off32_R1 (void) { ++ __asm__ __volatile__ ("pstd %0, -0x1f400+32(0), 1" :: "r" (rs) ); ++} ++ /* For the paddi tests; although we can get close to a read/write target ++ due to forcing where the .text and .bss sections are placed, there is ++ still enough codegen variability that having a raw value in the exp ++ file will not be determinative for these instructions. ++ Thus, compromise and just ensure that the generated value is an ++ address that lands within the reloc buffer, and use quasi magic ++ eyecatcher values in the return to indicate success. */ ++static void test_paddi_0_R1 (void) { ++ __asm__ __volatile__ ("paddi %0, 0, 0+0, 1" : "=r" (rt) ); ++ rt = rt - TEXT_BSS_DELTA; ++ if (rt > pcrelative_buff_addr(0) && ++ rt < pcrelative_buff_addr(RELOC_BUFFER_SIZE)) ++ rt = 0xffff0000; ++} ++static void test_paddi_12_R1 (void) { ++ __asm__ __volatile__ ("paddi %0, 0, 0+12, 1" : "=r" (rt) ); ++ rt = rt - TEXT_BSS_DELTA; ++ if (rt > pcrelative_buff_addr(0) && ++ rt < pcrelative_buff_addr(RELOC_BUFFER_SIZE)) ++ rt = 0xffff0012; ++} ++static void test_paddi_48_R1 (void) { ++ __asm__ __volatile__ ("paddi %0, 0, 0+48, 1" : "=r" (rt) ); ++ rt = rt - TEXT_BSS_DELTA; ++ if (rt > pcrelative_buff_addr(0) && ++ rt < pcrelative_buff_addr(RELOC_BUFFER_SIZE)) ++ rt = 0xffff0048; ++} ++static void test_paddi_98_R1 (void) { ++ __asm__ __volatile__ ("paddi %0, 0, 0+98, 1" : "=r" (rt) ); ++ rt = rt - TEXT_BSS_DELTA; ++ if (rt > pcrelative_buff_addr(0) && ++ rt < pcrelative_buff_addr(RELOC_BUFFER_SIZE)) ++ rt = 0xffff0098; ++} ++static void test_plq_off0_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plq 26, +0(0), 1" ); ++ PAD_ORI ++} ++static void test_plq_off8_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plq 26, +8(0), 1" ); ++ PAD_ORI ++} ++static void test_plq_off16_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plq 26, +16(0), 1" ); ++ PAD_ORI ++} ++static void test_plq_off32_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plq 26, +32(0), 1" ); ++ PAD_ORI ++} ++static void test_plq_off48_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plq 26, +48(0), 1" ); ++ PAD_ORI ++} ++static void test_plq_off64_R1 (void) { ++ PAD_ORI ++ __asm__ __volatile__ ("plq 26, +64(0), 1" ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_pstq_off0_R1 (void) { ++ __asm__ __volatile__ ("pstq 24, -0x1f400+0(0), 1" ); ++} ++static void test_pstq_off8_R1 (void) { ++ __asm__ __volatile__ ("pstq 24, -0x1f400+8(0), 1" ); ++} ++static void test_pstq_off16_R1 (void) { ++ __asm__ __volatile__ ("pstq 24, -0x1f400+16(0), 1" ); ++} ++static void test_pstq_off32_R1 (void) { ++ __asm__ __volatile__ ("pstq 24, -0x1f400+32(0), 1" ); ++} ++static void test_pstq_off64_R1 (void) { ++ __asm__ __volatile__ ("pstq 24, -0x1f400+64(0), 1" ); ++} ++ ++static test_list_t testgroup_generic[] = { ++ { &test_paddi_0_R1, "paddi 0_R1", "RT,RA,SI,R"}, /* bcwp */ ++ { &test_paddi_12_R1, "paddi 12_R1", "RT,RA,SI,R"}, /* bcwp */ ++ { &test_paddi_48_R1, "paddi 48_R1", "RT,RA,SI,R"}, /* bcwp */ ++ { &test_paddi_98_R1, "paddi 98_R1", "RT,RA,SI,R"}, /* bcwp */ ++ { &test_plbz_off0_R1, "plbz off0_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plbz_off8_R1, "plbz off8_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plbz_off16_R1, "plbz off16_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plbz_off32_R1, "plbz off32_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plbz_off64_R1, "plbz off64_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_pld_off0_R1, "pld off0_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_pld_off8_R1, "pld off8_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_pld_off16_R1, "pld off16_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_pld_off32_R1, "pld off32_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_pld_off64_R1, "pld off64_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plha_off0_R1, "plha off0_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plha_off8_R1, "plha off8_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plha_off16_R1, "plha off16_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plha_off32_R1, "plha off32_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plha_off64_R1, "plha off64_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plhz_off0_R1, "plhz off0_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plhz_off8_R1, "plhz off8_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plhz_off16_R1, "plhz off16_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plhz_off32_R1, "plhz off32_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plhz_off64_R1, "plhz off64_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plq_off0_R1, "plq off0_R1", "RTp,D(RA),R"}, /* bcwp */ ++ { &test_plq_off8_R1, "plq off8_R1", "RTp,D(RA),R"}, /* bcwp */ ++ { &test_plq_off16_R1, "plq off16_R1", "RTp,D(RA),R"}, /* bcwp */ ++ { &test_plq_off32_R1, "plq off32_R1", "RTp,D(RA),R"}, /* bcwp */ ++ { &test_plq_off48_R1, "plq off48_R1", "RTp,D(RA),R"}, /* bcwp */ ++ { &test_plq_off64_R1, "plq off64_R1", "RTp,D(RA),R"}, /* bcwp */ ++ { &test_plwa_off0_R1, "plwa off0_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwa_off8_R1, "plwa off8_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwa_off16_R1, "plwa off16_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwa_off32_R1, "plwa off32_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwa_off64_R1, "plwa off64_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwz_off0_R1, "plwz off0_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwz_off8_R1, "plwz off8_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwz_off16_R1, "plwz off16_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwz_off32_R1, "plwz off32_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plwz_off64_R1, "plwz off64_R1", "RT,D(RA),R"}, /* bcwp */ ++ { &test_plxvp_off0_R1, "plxvp off0_R1", "XTp,D(RA),R"}, /* bcwp */ ++ { &test_plxvp_off8_R1, "plxvp off8_R1", "XTp,D(RA),R"}, /* bcwp */ ++ { &test_plxvp_off16_R1, "plxvp off16_R1", "XTp,D(RA),R"}, /* bcwp */ ++ { &test_plxvp_off24_R1, "plxvp off24_R1", "XTp,D(RA),R"}, /* bcwp */ ++ { &test_plxvp_off32_R1, "plxvp off32_R1", "XTp,D(RA),R"}, /* bcwp */ ++ { &test_pstb_off0_R1, "pstb off0_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstb_off8_R1, "pstb off8_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstb_off16_R1, "pstb off16_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstb_off32_R1, "pstb off32_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstd_off0_R1, "pstd off0_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstd_off8_R1, "pstd off8_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstd_off16_R1, "pstd off16_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstd_off32_R1, "pstd off32_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_psth_off0_R1, "psth off0_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_psth_off8_R1, "psth off8_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_psth_off16_R1, "psth off16_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_psth_off32_R1, "psth off32_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstq_off0_R1, "pstq off0_R1", "RSp,D(RA),R"}, /* bcwp */ ++ { &test_pstq_off8_R1, "pstq off8_R1", "RSp,D(RA),R"}, /* bcwp */ ++ { &test_pstq_off16_R1, "pstq off16_R1", "RSp,D(RA),R"}, /* bcwp */ ++ { &test_pstq_off32_R1, "pstq off32_R1", "RSp,D(RA),R"}, /* bcwp */ ++ { &test_pstq_off64_R1, "pstq off64_R1", "RSp,D(RA),R"}, /* bcwp */ ++ { &test_pstw_off0_R1, "pstw off0_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstw_off8_R1, "pstw off8_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstw_off16_R1, "pstw off16_R1", "RS,D(RA),R"}, /* bcwp */ ++ { &test_pstw_off32_R1, "pstw off32_R1", "RS,D(RA),R"}, /* bcwp */ ++ { NULL, NULL }, ++}; ++ ++/* Allow skipping of tests. */ ++unsigned long test_count=0xffff; ++unsigned long skip_count=0; ++unsigned long setup_only=0; ++ ++/* Set up a setjmp/longjmp to gently handle our SIGILLs and SIGSEGVs. */ ++static jmp_buf mybuf; ++ ++/* This (testfunction_generic) is meant to handle all of the instruction ++ variations. The helpers set up the register and iterator values ++ as is appropriate for the instruction being tested. */ ++static void testfunction_generic (const char* instruction_name, ++ test_func_t test_function, ++ unsigned int ignore_flags, ++ char * cur_form) { ++ ++ identify_form_components (instruction_name , cur_form); ++ debug_show_form (instruction_name, cur_form); ++ set_up_iterators (); ++ debug_show_iter_ranges (); ++ initialize_buffer (0); ++ init_pcrelative_write_target (); ++ debug_dump_buffer (); ++ ++ for (vrai = a_start; vrai < a_iters ; vrai+=a_inc) { ++ for (vrbi = b_start; vrbi < b_iters ; vrbi+=b_inc) { ++ for (vrci = c_start; vrci < c_iters ; vrci+=c_inc) { ++ for (vrmi = m_start; (vrmi < m_iters) ; vrmi+=m_inc) { ++ CHECK_OVERRIDES ++ debug_show_current_iteration (); ++ // Be sure to initialize the target registers first. ++ initialize_target_registers (); ++ initialize_source_registers (); ++ printf ("%s", instruction_name); ++ print_register_header (); ++ printf( " =>"); fflush (stdout); ++ if (!setup_only) { ++ if (enable_setjmp) { ++ if ( setjmp ( mybuf ) ) { ++ printf("signal tripped. (FIXME)\n"); ++ continue; ++ } ++ } ++ (*test_function) (); ++ } ++ print_register_footer (); ++ print_result_buffer (); ++ print_pcrelative_write_target (); ++ printf ("\n"); ++ } ++ } ++ } ++ } ++} ++ ++void mykillhandler ( int x ) { longjmp (mybuf, 1); } ++void mysegvhandler ( int x ) { longjmp (mybuf, 1); } ++ ++static void do_tests ( void ) ++{ ++ int groupcount; ++ char * cur_form; ++ test_group_t group_function = &testfunction_generic; ++ test_list_t *tests = testgroup_generic; ++ ++ struct sigaction kill_action, segv_action; ++ struct sigaction old_kill_action, old_segv_action; ++ if (enable_setjmp) { ++ kill_action.sa_handler = mykillhandler; ++ segv_action.sa_handler = mysegvhandler; ++ sigemptyset ( &kill_action.sa_mask ); ++ sigemptyset ( &segv_action.sa_mask ); ++ kill_action.sa_flags = SA_NODEFER; ++ segv_action.sa_flags = SA_NODEFER; ++ sigaction ( SIGILL, &kill_action, &old_kill_action); ++ sigaction ( SIGSEGV, &segv_action, &old_segv_action); ++ } ++ ++ for (groupcount = 0; tests[groupcount].name != NULL; groupcount++) { ++ cur_form = strdup(tests[groupcount].form); ++ current_test = tests[groupcount]; ++ identify_instruction_by_func_name (current_test.name); ++ if (groupcount < skip_count) continue; ++ if (verbose) printf("Test #%d ,", groupcount); ++ if (verbose > 1) printf(" instruction %s (v=%d)", current_test.name, verbose); ++ (*group_function) (current_test.name, current_test.func, 0, cur_form ); ++ printf ("\n"); ++ if (groupcount >= (skip_count+test_count)) break; ++ } ++ if (debug_show_labels) printf("\n"); ++ printf ("All done. Tested %d different instruction groups\n", groupcount); ++} ++ ++static void usage (void) ++{ ++ fprintf(stderr, ++ "Usage: test_isa_XXX [OPTIONS]\n" ++ "\t-h: display this help and exit\n" ++ "\t-v: increase verbosity\n" ++ "\t-a : limit number of a-iterations to \n" ++ "\t-b : limit number of b-iterations to \n" ++ "\t-c : limit number of c-iterations to \n" ++ "\t-n : limit to this number of tests.\n" ++ "\t-r : run only test # \n" ++ "\t\n" ++ "\t-j :enable setjmp to recover from illegal insns. \n" ++ "\t-m :(dev only?) lock VRM value to zero.\n" ++ "\t-z :(dev only?) lock MC value to zero.\n" ++ "\t-p :(dev only?) disable prefix instructions\n" ++ "\t-s : skip tests \n" ++ "\t-c : stop after running # of tests \n" ++ "\t-f : Do the test setup but do not actually execute the test instruction. \n" ++ ); ++} ++ ++int main (int argc, char **argv) ++{ ++ int c; ++ while ((c = getopt(argc, argv, "dhjvmpfzs:a:b:c:n:r:")) != -1) { ++ switch (c) { ++ case 'h': ++ usage(); ++ return 0; ++ ++ case 'v': ++ verbose++; ++ break; ++ ++ /* Options related to limiting the test iterations. */ ++ case 'a': ++ a_limit=atoi (optarg); ++ printf ("limiting a-iters to %ld.\n", a_limit); ++ break; ++ case 'b': ++ b_limit=atoi (optarg); ++ printf ("limiting b-iters to %ld.\n", b_limit); ++ break; ++ case 'c': ++ c_limit=atoi (optarg); ++ printf ("limiting c-iters to %ld.\n", c_limit); ++ break; ++ case 'n': // run this number of tests. ++ test_count=atoi (optarg); ++ printf ("limiting to %ld tests\n", test_count); ++ break; ++ case 'r': // run just test #. ++ skip_count=atoi (optarg); ++ test_count=0; ++ if (verbose) printf("Running test number %ld\n", skip_count); ++ break; ++ case 's': // skip this number of tests. ++ skip_count=atoi (optarg); ++ printf ("skipping %ld tests\n", skip_count); ++ break; ++ ++ /* debug options. */ ++ case 'd': ++ dump_tables=1; ++ printf("DEBUG:dump_tables.\n"); ++ break; ++ case 'f': ++ setup_only=1; ++ printf("DEBUG:setup_only.\n"); ++ break; ++ case 'j': ++ enable_setjmp=1; ++ printf ("DEBUG:setjmp enabled.\n"); ++ break; ++ case 'm': ++ vrm_override=1; ++ printf ("DEBUG:vrm override enabled.\n"); ++ break; ++ case 'p': ++ prefix_override=1; ++ printf ("DEBUG:prefix override enabled.\n"); ++ break; ++ case 'z': ++ mc_override=1; ++ printf ("DEBUG:MC override enabled.\n"); ++ break; ++ default: ++ usage(); ++ fprintf(stderr, "Unknown argument: '%c'\n", c); ++ } ++ } ++ ++ generic_prologue (); ++ build_vsx_table (); ++ build_args_table (); ++ build_float_vsx_tables (); ++ ++ if (dump_tables) { ++ dump_float_vsx_tables (); ++ dump_vsxargs (); ++ } ++ ++ do_tests (); ++ ++ return 0; ++} ++ ++#else // HAS_ISA_3_1 ++int main (int argc, char **argv) ++{ ++ printf("NO ISA 3.1 SUPPORT\n"); ++ return 0; ++} ++#endif +diff --git a/none/tests/ppc64/test_isa_3_1_R1_RT.stderr.exp b/none/tests/ppc64/test_isa_3_1_R1_RT.stderr.exp +new file mode 100644 +index 000000000..139597f9c +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_RT.stderr.exp +@@ -0,0 +1,2 @@ ++ ++ +diff --git a/none/tests/ppc64/test_isa_3_1_R1_RT.stdout.exp b/none/tests/ppc64/test_isa_3_1_R1_RT.stdout.exp +new file mode 100644 +index 000000000..87594748f +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_RT.stdout.exp +@@ -0,0 +1,138 @@ ++paddi 0_R1 => ffff0000 ++ ++paddi 12_R1 => ffff0012 ++ ++paddi 48_R1 => ffff0048 ++ ++paddi 98_R1 => ffff0098 ++ ++plbz off0_R1 => 1a ++ ++plbz off8_R1 => 1f ++ ++plbz off16_R1 => 1f ++ ++plbz off32_R1 => 1b ++ ++plbz off64_R1 => 1b ++ ++pld off0_R1 => e740000004100000 ++ ++pld off8_R1 => 4e800020 ++ ++pld off16_R1 => 6318001862f7001f ++ ++pld off32_R1 => 639c001c637b001b ++ ++pld off64_R1 => 639c001c637b001b ++ ++plha off0_R1 => 1a ++ ++plha off8_R1 => 1f ++ ++plha off16_R1 => 1f ++ ++plha off32_R1 => 1b ++ ++plha off64_R1 => 1b ++ ++plhz off0_R1 => 1a ++ ++plhz off8_R1 => 1f ++ ++plhz off16_R1 => 1f ++ ++plhz off32_R1 => 1b ++ ++plhz off64_R1 => 1b ++ ++plq off0_R1 => e34000000410001a 62d6001662b5001f ++ ++plq off8_R1 => 62d6001662b5001f 6318001862f7001f ++ ++plq off16_R1 => 6318001862f7001f 635a001a6339001b ++ ++plq off32_R1 => 639c001c637b001b 4e80003b ++ ++plq off48_R1 => 1a 62d6001662b5001f ++ ++plq off64_R1 => 639c001c637b001b 4e80003b ++ ++plwa off0_R1 => 4100000 ++ ++plwa off8_R1 => 4e800020 ++ ++plwa off16_R1 => 0 ++ ++plwa off32_R1 => 637b001b ++ ++plwa off64_R1 => 637b001b ++ ++plwz off0_R1 => 6100000 ++ ++plwz off8_R1 => 4e800020 ++ ++plwz off16_R1 => 0 ++ ++plwz off32_R1 => 637b001b ++ ++plwz off64_R1 => 637b001b ++ ++plxvp off0_R1 => 6318001862f70017 635a001a63390019 ea80000004100000 62d6001662b50015 ++ ++plxvp off8_R1 => 635a001a63390019 639c001c637b001b 62d6001662b50015 6318001862f70017 ++ ++plxvp off16_R1 => 639c001c637b001b 000000004e800020 6318001862f70017 635a001a63390019 ++ ++plxvp off24_R1 => 000000004e800020 0000000000000000 635a001a63390019 639c001c637b001b ++ ++plxvp off32_R1 => 0000000000000000 62d6001662b50015 639c001c637b001b 000000004e800020 ++ ++pstb off0_R1 102030405060708 => 08 ++ ++pstb off8_R1 102030405060708 => 08 ++ ++pstb off16_R1 102030405060708 => 08 ++ ++pstb off32_R1 102030405060708 => 08 ++ ++pstd off0_R1 102030405060708 => 0102030405060708 ++ ++pstd off8_R1 102030405060708 => 0102030405060708 ++ ++pstd off16_R1 102030405060708 => 0102030405060708 ++ ++pstd off32_R1 102030405060708 => 0102030405060708 ++ ++psth off0_R1 102030405060708 => 0708 ++ ++psth off8_R1 102030405060708 => 0708 ++ ++psth off16_R1 102030405060708 => 0708 ++ ++psth off32_R1 102030405060708 => 0708 ++ ++pstq off0_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++pstq off0_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++ ++pstq off8_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++pstq off8_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++ ++pstq off16_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++pstq off16_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++ ++pstq off32_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++pstq off32_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++ ++pstq off64_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++pstq off64_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708 a5b4c3d2e1f00918 ++ ++pstw off0_R1 102030405060708 => 05060708 ++ ++pstw off8_R1 102030405060708 => 05060708 ++ ++pstw off16_R1 102030405060708 => 05060708 ++ ++pstw off32_R1 102030405060708 => 05060708 ++ ++All done. Tested 66 different instruction groups +diff --git a/none/tests/ppc64/test_isa_3_1_R1_RT.vgtest b/none/tests/ppc64/test_isa_3_1_R1_RT.vgtest +new file mode 100644 +index 000000000..61d7f65a1 +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_RT.vgtest +@@ -0,0 +1,2 @@ ++prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 ++prog: test_isa_3_1_R1_RT +diff --git a/none/tests/ppc64/test_isa_3_1_R1_XT.c b/none/tests/ppc64/test_isa_3_1_R1_XT.c +new file mode 100644 +index 000000000..58885b8d3 +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_XT.c +@@ -0,0 +1,534 @@ ++/* ++ * Valgrind testcase for PowerPC ISA 3.1 ++ * ++ * Copyright (C) 2019-2020 Will Schmidt ++ * ++ * 64bit build: ++ * gcc -Winline -Wall -g -O -mregnames -maltivec -m64 ++ */ ++ ++/* ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation; either version 2 of the ++ * License, or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ */ ++ ++#include ++#ifdef HAS_ISA_3_1 ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++/* Condition Register fields. ++ These are used to capture the condition register values immediately after ++ the instruction under test is executed. This is done to help prevent other ++ test overhead (switch statements, result compares, etc) from disturbing ++ the test case results. */ ++unsigned long current_cr; ++unsigned long current_fpscr; ++ ++struct test_list_t current_test; ++ ++#include "isa_3_1_helpers.h" ++static void test_pstxvp_off0_R1 (void) { ++ __asm__ __volatile__ ("pstxvp 20, -0x1f400+0(0),1"); ++} ++static void test_pstxvp_off16_R1 (void) { ++ __asm__ __volatile__ ("pstxvp 20, -0x1f400+16(0),1"); ++} ++static void test_pstxvp_off32_R1 (void) { ++ __asm__ __volatile__ ("pstxvp 20, -0x1f400+32(0),1"); ++} ++static void test_pstxvp_off48_R1 (void) { ++ __asm__ __volatile__ ("pstxvp 20, -0x1f400+48(0),1"); ++} ++static void test_plfd_64_R1 (void) { ++ __asm__ __volatile__ ("plfd 28, +64(0), 1"); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plfd_32_R1 (void) { ++ __asm__ __volatile__ ("plfd 28, +32(0), 1"); ++ PAD_ORI ++} ++static void test_plfd_16_R1 (void) { ++ __asm__ __volatile__ ("plfd 28, +16(0), 1"); ++ PAD_ORI ++} ++static void test_plfd_8_R1 (void) { ++ __asm__ __volatile__ ("plfd 28, +8(0), 1"); ++ PAD_ORI ++} ++static void test_plfd_4_R1 (void) { ++ __asm__ __volatile__ ("plfd 28, +4(0), 1"); ++ PAD_ORI ++} ++static void test_plfd_0_R1 (void) { ++ __asm__ __volatile__ ("plfd 28, +0(0), 1"); ++ PAD_ORI ++} ++static void test_plfs_64_R1 (void) { ++ __asm__ __volatile__ ("plfs 28, +64(0), 1"); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plfs_32_R1 (void) { ++ __asm__ __volatile__ ("plfs 28, +32(0), 1"); ++ PAD_ORI ++} ++static void test_plfs_16_R1 (void) { ++ __asm__ __volatile__ ("plfs 28, +16(0), 1"); ++ PAD_ORI ++} ++static void test_plfs_8_R1 (void) { ++ __asm__ __volatile__ ("plfs 28, +8(0), 1"); ++ PAD_ORI ++} ++static void test_plfs_4_R1 (void) { ++ __asm__ __volatile__ ("plfs 28, +4(0), 1"); ++ PAD_ORI ++} ++static void test_plfs_0_R1 (void) { ++ __asm__ __volatile__ ("plfs 28, +0(0), 1"); ++ PAD_ORI ++} ++static void test_pstfd_32_R1 (void) { ++ __asm__ __volatile__ ("pstfd 26, -0x1f400+32(0), 1"); ++} ++static void test_pstfd_16_R1 (void) { ++ __asm__ __volatile__ ("pstfd 26, -0x1f400+16(0), 1"); ++} ++static void test_pstfd_8_R1 (void) { ++ __asm__ __volatile__ ("pstfd 26, -0x1f400+8(0), 1"); ++} ++static void test_pstfd_4_R1 (void) { ++ __asm__ __volatile__ ("pstfd 26, -0x1f400+4(0), 1"); ++} ++static void test_pstfd_0_R1 (void) { ++ __asm__ __volatile__ ("pstfd 26, -0x1f400+0(0), 1"); ++} ++static void test_pstfs_32_R1 (void) { ++ __asm__ __volatile__ ("pstfs 26, -0x1f400+32(0), 1"); ++} ++static void test_pstfs_16_R1 (void) { ++ __asm__ __volatile__ ("pstfs 26, -0x1f400+16(0), 1"); ++} ++static void test_pstfs_8_R1 (void) { ++ __asm__ __volatile__ ("pstfs 26, -0x1f400+8(0), 1"); ++} ++static void test_pstfs_4_R1 (void) { ++ __asm__ __volatile__ ("pstfs 26, -0x1f400+4(0), 1"); ++} ++static void test_pstfs_0_R1 (void) { ++ __asm__ __volatile__ ("pstfs 26, -0x1f400+0(0), 1"); ++} ++static void test_plxsd_64_R1 (void) { ++ __asm__ __volatile__ ("plxsd %0, +64(0), 1" : "=v" (vrt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plxsd_32_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; plxsd %0, +32(0), 1" : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxsd_16_R1 (void) { ++ __asm__ __volatile__ ("plxsd %0, +16(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxsd_8_R1 (void) { ++ __asm__ __volatile__ ("plxsd %0, +8(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxsd_4_R1 (void) { ++ __asm__ __volatile__ ("plxsd %0, +4(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxsd_0_R1 (void) { ++ __asm__ __volatile__ ("plxsd %0, +0(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxssp_64_R1 (void) { ++ __asm__ __volatile__ ("plxssp %0, +64(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++ PAD_ORI ++} ++static void test_plxssp_32_R1 (void) { ++ __asm__ __volatile__ ("plxssp %0, +32(0), 1; pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxssp_16_R1 (void) { ++ __asm__ __volatile__ ("plxssp %0, +16(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxssp_8_R1 (void) { ++ __asm__ __volatile__ ("plxssp %0, +8(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxssp_4_R1 (void) { ++ __asm__ __volatile__ ("plxssp %0, +4(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++static void test_plxssp_0_R1 (void) { ++ __asm__ __volatile__ ("plxssp %0, +0(0), 1; pnop;pnop;pnop; " : "=v" (vrt) ); ++ PAD_ORI ++} ++/* Follow the short-range plxv instructions with nop in order to ++ pad out subsequent instructions. When written there are found ++ to be fluctuations in the instructions to store the result back ++ into the target variable. (pla,pstxv...). ++ */ ++static void test_plxv_16_R1 (void) { ++ __asm__ __volatile__ ("plxv %x0, +16(0), 1; pnop;pnop;pnop;" : "=wa" (vec_xt) ); ++ PAD_ORI ++} ++static void test_plxv_8_R1 (void) { ++ __asm__ __volatile__ ("plxv %x0, +8(0), 1; pnop;pnop;pnop;" : "=wa" (vec_xt) ); ++ PAD_ORI ++} ++static void test_plxv_4_R1 (void) { ++ __asm__ __volatile__ ("plxv %x0, +4(0), 1; pnop;pnop;pnop;" : "=wa" (vec_xt) ); ++ PAD_ORI ++} ++static void test_plxv_0_R1 (void) { ++ __asm__ __volatile__ ("plxv %x0, +0(0), 1; pnop;pnop;pnop; " : "=wa" (vec_xt) ); ++ PAD_ORI ++} ++static void test_pstxsd_64_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+64(0), 1" ); ++} ++static void test_pstxsd_32_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+32(0), 1" ); ++} ++static void test_pstxsd_16_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+16(0), 1" ); ++} ++static void test_pstxsd_8_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+8(0), 1" ); ++} ++static void test_pstxsd_4_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+4(0), 1" ); ++} ++static void test_pstxsd_0_R1 (void) { ++ __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+0(0), 1" ); ++} ++static void test_pstxssp_64_R1 (void) { ++ __asm__ __volatile__ ("pstxssp 22, -0x1f400+64(0), 1" ); ++} ++static void test_pstxssp_32_R1 (void) { ++ __asm__ __volatile__ ("pstxssp 22, -0x1f400+32(0), 1"); ++} ++static void test_pstxssp_16_R1 (void) { ++ __asm__ __volatile__ ("pstxssp 22, -0x1f400+16(0), 1"); ++} ++static void test_pstxssp_8_R1 (void) { ++ __asm__ __volatile__ ("pstxssp 22, -0x1f400+8(0), 1"); ++} ++static void test_pstxssp_4_R1 (void) { ++ __asm__ __volatile__ ("pstxssp 22, -0x1f400+4(0), 1"); ++} ++static void test_pstxssp_0_R1 (void) { ++ __asm__ __volatile__ ("pstxssp 22, -0x1f400+0(0), 1"); ++} ++static void test_pstxv_16_R1 (void) { ++ __asm__ __volatile__ ("pstxv %x0, -0x1f400+16(0), 1" :: "wa" (vec_xs)); ++} ++static void test_pstxv_8_R1 (void) { ++ __asm__ __volatile__ ("pstxv %x0, -0x1f400+8(0), 1" :: "wa" (vec_xs)); ++} ++static void test_pstxv_4_R1 (void) { ++ __asm__ __volatile__ ("pstxv %x0, -0x1f400+4(0), 1" :: "wa" (vec_xs)); ++} ++static void test_pstxv_0_R1 (void) { ++ __asm__ __volatile__ ("pstxv %x0, -0x1f400+0(0), 1" :: "wa" (vec_xs)); ++} ++ ++static test_list_t testgroup_generic[] = { ++ { &test_plfd_0_R1, "plfd 0_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfd_4_R1, "plfd 4_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfd_8_R1, "plfd 8_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfd_16_R1, "plfd 16_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfd_32_R1, "plfd 32_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfd_64_R1, "plfd 64_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfs_0_R1, "plfs 0_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfs_4_R1, "plfs 4_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfs_8_R1, "plfs 8_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfs_16_R1, "plfs 16_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfs_32_R1, "plfs 32_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plfs_64_R1, "plfs 64_R1", "FRT,D(RA),R"}, /* bcwp */ ++ { &test_plxsd_0_R1, "plxsd 0_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_plxsd_4_R1, "plxsd 4_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_plxsd_8_R1, "plxsd 8_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_plxsd_16_R1, "plxsd 16_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_plxsd_32_R1, "plxsd 32_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_plxsd_64_R1, "plxsd 64_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_plxssp_0_R1, "plxssp 0_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_plxssp_4_R1, "plxssp 4_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_plxssp_8_R1, "plxssp 8_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_plxssp_16_R1, "plxssp 16_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_plxssp_32_R1, "plxssp 32_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_plxssp_64_R1, "plxssp 64_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_plxv_0_R1, "plxv 0_R1", "XT,D(RA),R"}, /* bcwp */ ++ { &test_plxv_4_R1, "plxv 4_R1", "XT,D(RA),R"}, /* bcwp */ ++ { &test_plxv_8_R1, "plxv 8_R1", "XT,D(RA),R"}, /* bcwp */ ++ { &test_plxv_16_R1, "plxv 16_R1", "XT,D(RA),R"}, /* bcwp */ ++ { &test_pstfd_0_R1, "pstfd 0_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_pstfd_4_R1, "pstfd 4_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_pstfd_8_R1, "pstfd 8_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_pstfd_16_R1, "pstfd 16_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_pstfd_32_R1, "pstfd 32_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */ ++ { &test_pstfs_0_R1, "pstfs 0_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_pstfs_4_R1, "pstfs 4_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_pstfs_8_R1, "pstfs 8_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_pstfs_16_R1, "pstfs 16_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_pstfs_32_R1, "pstfs 32_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */ ++ { &test_pstxsd_0_R1, "pstxsd 0_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxsd_4_R1, "pstxsd 4_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxsd_8_R1, "pstxsd 8_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxsd_16_R1, "pstxsd 16_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxsd_32_R1, "pstxsd 32_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxsd_64_R1, "pstxsd 64_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxssp_0_R1, "pstxssp 0_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxssp_4_R1, "pstxssp 4_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxssp_8_R1, "pstxssp 8_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxssp_16_R1, "pstxssp 16_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxssp_32_R1, "pstxssp 32_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxssp_64_R1, "pstxssp 64_R1", "VRS,D(RA),R"}, /* bcwp */ ++ { &test_pstxvp_off0_R1, "pstxvp off0_R1", "XSp,D(RA),R"}, /* bcwp */ ++ { &test_pstxvp_off16_R1, "pstxvp off16_R1", "XSp,D(RA),R"}, /* bcwp */ ++ { &test_pstxvp_off32_R1, "pstxvp off32_R1", "XSp,D(RA),R"}, /* bcwp */ ++ { &test_pstxvp_off48_R1, "pstxvp off48_R1", "XSp,D(RA),R"}, /* bcwp */ ++ { &test_pstxv_0_R1, "pstxv 0_R1", "XS,D(RA),R"}, /* bcwp */ ++ { &test_pstxv_4_R1, "pstxv 4_R1", "XS,D(RA),R"}, /* bcwp */ ++ { &test_pstxv_8_R1, "pstxv 8_R1", "XS,D(RA),R"}, /* bcwp */ ++ { &test_pstxv_16_R1, "pstxv 16_R1", "XS,D(RA),R"}, /* bcwp */ ++ { NULL, NULL }, ++}; ++ ++/* Allow skipping of tests. */ ++unsigned long test_count=0xffff; ++unsigned long skip_count=0; ++unsigned long setup_only=0; ++ ++/* Set up a setjmp/longjmp to gently handle our SIGILLs and SIGSEGVs. */ ++static jmp_buf mybuf; ++ ++/* This (testfunction_generic) is meant to handle all of the instruction ++ variations. The helpers set up the register and iterator values ++ as is appropriate for the instruction being tested. */ ++static void testfunction_generic (const char* instruction_name, ++ test_func_t test_function, ++ unsigned int ignore_flags, ++ char * cur_form) { ++ ++ identify_form_components (instruction_name , cur_form); ++ debug_show_form (instruction_name, cur_form); ++ set_up_iterators (); ++ debug_show_iter_ranges (); ++ initialize_buffer (0); ++ init_pcrelative_write_target (); ++ debug_dump_buffer (); ++ ++ for (vrai = a_start; vrai < a_iters ; vrai+=a_inc) { ++ for (vrbi = b_start; vrbi < b_iters ; vrbi+=b_inc) { ++ for (vrci = c_start; vrci < c_iters ; vrci+=c_inc) { ++ for (vrmi = m_start; (vrmi < m_iters) ; vrmi+=m_inc) { ++ CHECK_OVERRIDES ++ debug_show_current_iteration (); ++ // Be sure to initialize the target registers first. ++ initialize_target_registers (); ++ initialize_source_registers (); ++ vec_xa[0]=0x1234; ++ vec_xa[1]=0x4567; ++ printf ("%s", instruction_name); ++ print_register_header (); ++ printf( " =>"); fflush (stdout); ++ if (!setup_only) { ++ if (enable_setjmp) { ++ if ( setjmp ( mybuf ) ) { ++ printf("signal tripped. (FIXME)\n"); ++ continue; ++ } ++ } ++ (*test_function) (); ++ } ++ print_register_footer (); ++ print_result_buffer (); ++ print_pcrelative_write_target (); ++ printf ("\n"); ++ } ++ } ++ } ++ } ++} ++ ++void mykillhandler ( int x ) { longjmp (mybuf, 1); } ++void mysegvhandler ( int x ) { longjmp (mybuf, 1); } ++ ++static void do_tests ( void ) ++{ ++ int groupcount; ++ char * cur_form; ++ test_group_t group_function = &testfunction_generic; ++ test_list_t *tests = testgroup_generic; ++ ++ struct sigaction kill_action, segv_action; ++ struct sigaction old_kill_action, old_segv_action; ++ if (enable_setjmp) { ++ kill_action.sa_handler = mykillhandler; ++ segv_action.sa_handler = mysegvhandler; ++ sigemptyset ( &kill_action.sa_mask ); ++ sigemptyset ( &segv_action.sa_mask ); ++ kill_action.sa_flags = SA_NODEFER; ++ segv_action.sa_flags = SA_NODEFER; ++ sigaction ( SIGILL, &kill_action, &old_kill_action); ++ sigaction ( SIGSEGV, &segv_action, &old_segv_action); ++ } ++ ++ for (groupcount = 0; tests[groupcount].name != NULL; groupcount++) { ++ cur_form = strdup(tests[groupcount].form); ++ current_test = tests[groupcount]; ++ identify_instruction_by_func_name (current_test.name); ++ if (groupcount < skip_count) continue; ++ if (verbose) printf("Test #%d ,", groupcount); ++ if (verbose > 1) printf(" instruction %s (v=%d)", current_test.name, verbose); ++ (*group_function) (current_test.name, current_test.func, 0, cur_form ); ++ printf ("\n"); ++ if (groupcount >= (skip_count+test_count)) break; ++ } ++ if (debug_show_labels) printf("\n"); ++ printf ("All done. Tested %d different instruction groups\n", groupcount); ++} ++ ++static void usage (void) ++{ ++ fprintf(stderr, ++ "Usage: test_isa_XXX [OPTIONS]\n" ++ "\t-h: display this help and exit\n" ++ "\t-v: increase verbosity\n" ++ "\t-a : limit number of a-iterations to \n" ++ "\t-b : limit number of b-iterations to \n" ++ "\t-c : limit number of c-iterations to \n" ++ "\t-n : limit to this number of tests.\n" ++ "\t-r : run only test # \n" ++ "\t\n" ++ "\t-j :enable setjmp to recover from illegal insns. \n" ++ "\t-m :(dev only?) lock VRM value to zero.\n" ++ "\t-z :(dev only?) lock MC value to zero.\n" ++ "\t-p :(dev only?) disable prefix instructions\n" ++ "\t-s : skip tests \n" ++ "\t-c : stop after running # of tests \n" ++ "\t-f : Do the test setup but do not actually execute the test instruction. \n" ++ ); ++} ++ ++int main (int argc, char **argv) ++{ ++ int c; ++ while ((c = getopt(argc, argv, "dhjvmpfzs:a:b:c:n:r:")) != -1) { ++ switch (c) { ++ case 'h': ++ usage(); ++ return 0; ++ ++ case 'v': ++ verbose++; ++ break; ++ ++ /* Options related to limiting the test iterations. */ ++ case 'a': ++ a_limit=atoi (optarg); ++ printf ("limiting a-iters to %ld.\n", a_limit); ++ break; ++ case 'b': ++ b_limit=atoi (optarg); ++ printf ("limiting b-iters to %ld.\n", b_limit); ++ break; ++ case 'c': ++ c_limit=atoi (optarg); ++ printf ("limiting c-iters to %ld.\n", c_limit); ++ break; ++ case 'n': // run this number of tests. ++ test_count=atoi (optarg); ++ printf ("limiting to %ld tests\n", test_count); ++ break; ++ case 'r': // run just test #. ++ skip_count=atoi (optarg); ++ test_count=0; ++ if (verbose) printf("Running test number %ld\n", skip_count); ++ break; ++ case 's': // skip this number of tests. ++ skip_count=atoi (optarg); ++ printf ("skipping %ld tests\n", skip_count); ++ break; ++ ++ /* debug options. */ ++ case 'd': ++ dump_tables=1; ++ printf("DEBUG:dump_tables.\n"); ++ break; ++ case 'f': ++ setup_only=1; ++ printf("DEBUG:setup_only.\n"); ++ break; ++ case 'j': ++ enable_setjmp=1; ++ printf ("DEBUG:setjmp enabled.\n"); ++ break; ++ case 'm': ++ vrm_override=1; ++ printf ("DEBUG:vrm override enabled.\n"); ++ break; ++ case 'p': ++ prefix_override=1; ++ printf ("DEBUG:prefix override enabled.\n"); ++ break; ++ case 'z': ++ mc_override=1; ++ printf ("DEBUG:MC override enabled.\n"); ++ break; ++ default: ++ usage(); ++ fprintf(stderr, "Unknown argument: '%c'\n", c); ++ } ++ } ++ ++ generic_prologue (); ++ build_vsx_table (); ++ build_args_table (); ++ build_float_vsx_tables (); ++ ++ if (dump_tables) { ++ dump_float_vsx_tables (); ++ dump_vsxargs (); ++ } ++ ++ do_tests (); ++ ++ return 0; ++} ++ ++#else // HAS_ISA_3_1 ++int main (int argc, char **argv) ++{ ++ printf("NO ISA 3.1 SUPPORT\n"); ++ return 0; ++} ++#endif +diff --git a/none/tests/ppc64/test_isa_3_1_R1_XT.stderr.exp b/none/tests/ppc64/test_isa_3_1_R1_XT.stderr.exp +new file mode 100644 +index 000000000..139597f9c +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_XT.stderr.exp +@@ -0,0 +1,2 @@ ++ ++ +diff --git a/none/tests/ppc64/test_isa_3_1_R1_XT.stdout.exp b/none/tests/ppc64/test_isa_3_1_R1_XT.stdout.exp +new file mode 100644 +index 000000000..48d591f4d +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_XT.stdout.exp +@@ -0,0 +1,127 @@ ++plfd 0_R1 =>_ -4.903986e+55 _ cb80000006100000, 0 ++ ++plfd 4_R1 =>_ 3.095878e+167 _ 62b50015cb800004, 0 ++ ++plfd 8_R1 =>_ 1.297320e+168 _ 62d6001662b50015, 0 ++ ++plfd 16_R1 =>_ 2.264413e+169 _ 6318001862f70017, 0 ++ ++plfd 32_R1 =>_ 6.763045e+171 _ 639c001c637b001b, 0 ++ ++plfd 64_R1 =>_ 6.763045e+171 _ 639c001c637b001b, 0 ++ ++plfs 0_R1 =>_ 2.708339e-35 _ 38c2000000000000, 0 ++ ++plfs 4_R1 =>_ -2.560001e+02 _ c070000080000000, 0 ++ ++plfs 8_R1 =>_ 1.669433e+21 _ 4456a002a0000000, 0 ++ ++plfs 16_R1 =>_ 2.278176e+21 _ 445ee002e0000000, 0 ++ ++plfs 32_R1 =>_ 4.630140e+21 _ 446f600360000000, 0 ++ ++plfs 64_R1 =>_ 4.630140e+21 _ 446f600360000000, 0 ++ ++plxsd 0_R1 => a800000004100000,0000000000000000 -5.07588375e-116 +Zero ++ ++plxsd 4_R1 => 7000000a8000004,0000000000000000 5.77662562e-275 +Zero ++ ++plxsd 8_R1 => 700000060000000,0000000000000000 5.77662407e-275 +Zero ++ ++plxsd 16_R1 => 7000000,0000000000000000 +Den +Zero ++ ++plxsd 32_R1 => 6339001963180018,0000000000000000 9.43505226e+169 +Zero ++ ++plxsd 64_R1 => 6339001963180018,0000000000000000 9.43505226e+169 +Zero ++ ++plxssp 0_R1 => 3882000000000000,0000000000000000 6.19888e-05 +Zero +Zero +Zero ++ ++plxssp 4_R1 => bd80000080000000,0000000000000000 -6.25000e-02 -Zero +Zero +Zero ++ ++plxssp 8_R1 => 38e0000000000000,0000000000000000 1.06812e-04 +Zero +Zero +Zero ++ ++plxssp 16_R1 => 38e0000000000000,0000000000000000 1.06812e-04 +Zero +Zero +Zero ++ ++plxssp 32_R1 => 445ac002c0000000,0000000000000000 8.75000e+02 -2.00000e+00 +Zero +Zero ++ ++plxssp 64_R1 => 446b400340000000,0000000000000000 9.41000e+02 2.00000e+00 +Zero +Zero ++ ++plxv 0_R1 => c800000004100000 7000000 ++ ++plxv 4_R1 => 7000000c8000004 700000000000000 ++ ++plxv 8_R1 => 7000000 7000000 ++ ++plxv 16_R1 => 7000000 7000000 ++ ++pstfd 0_R1 43dfe000003fe000 43eff000000ff000 => e000003fe00043df ++pstfd 0_R1 43eff000000ff000 43efefffffcff000 => f000000ff00043ef ++ ++pstfd 4_R1 43dfe000003fe000 43eff000000ff000 => e000003f e00043df ++pstfd 4_R1 43eff000000ff000 43efefffffcff000 => f000000f f00043ef ++ ++pstfd 8_R1 43dfe000003fe000 43eff000000ff000 => e000003fe00043df ++pstfd 8_R1 43eff000000ff000 43efefffffcff000 => f000000ff00043ef ++ ++pstfd 16_R1 43dfe000003fe000 43eff000000ff000 => e000003fe00043df ++pstfd 16_R1 43eff000000ff000 43efefffffcff000 => f000000ff00043ef ++ ++pstfd 32_R1 43dfe000003fe000 43eff000000ff000 => e000003fe00043df ++pstfd 32_R1 43eff000000ff000 43efefffffcff000 => f000000ff00043ef ++ ++pstfs 0_R1 000000005eff0000 000000005f7f8000 => 00005eff ++pstfs 0_R1 000000005f7f8000 000000005f7f8000 => 80005f7f ++ ++pstfs 4_R1 000000005eff0000 000000005f7f8000 => 00005eff ++pstfs 4_R1 000000005f7f8000 000000005f7f8000 => 80005f7f ++ ++pstfs 8_R1 000000005eff0000 000000005f7f8000 => 00005eff ++pstfs 8_R1 000000005f7f8000 000000005f7f8000 => 80005f7f ++ ++pstfs 16_R1 000000005eff0000 000000005f7f8000 => 00005eff ++pstfs 16_R1 000000005f7f8000 000000005f7f8000 => 80005f7f ++ ++pstfs 32_R1 000000005eff0000 000000005f7f8000 => 00005eff ++pstfs 32_R1 000000005f7f8000 000000005f7f8000 => 80005f7f ++ ++pstxsd 0_R1 => 0000000000000000 ++ ++pstxsd 4_R1 => 00000000 00000000 ++ ++pstxsd 8_R1 => 0000000000000000 ++ ++pstxsd 16_R1 => 0000000000000000 ++ ++pstxsd 32_R1 => 0000000000000000 ++ ++pstxsd 64_R1 => 0000000000000000 ++ ++pstxssp 0_R1 => 00000000 ++ ++pstxssp 4_R1 => 00000000 ++ ++pstxssp 8_R1 => 00000000 ++ ++pstxssp 16_R1 => 00000000 ++ ++pstxssp 32_R1 => 00000000 ++ ++pstxssp 64_R1 => 00000000 ++ ++pstxvp off0_R1 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 077e0180055e0180 000e8080000e0080 ++ ++pstxvp off16_R1 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 077e0180055e0180 000e8080000e0080 ++ ++pstxvp off32_R1 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 077e0180055e0180 000e8080000e0080 ++ ++pstxvp off48_R1 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 077e0180055e0180 000e8080000e0080 ++ ++pstxv 0_R1 ff7ffffe7f7ffffe,ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 ++ ++pstxv 4_R1 ff7ffffe7f7ffffe,ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 ++ ++pstxv 8_R1 ff7ffffe7f7ffffe,ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 ++ ++pstxv 16_R1 ff7ffffe7f7ffffe,ff8000007f800000 => fffe7f7f fffeff7f00007f80 0000ff80 ++ ++All done. Tested 58 different instruction groups +diff --git a/none/tests/ppc64/test_isa_3_1_R1_XT.vgtest b/none/tests/ppc64/test_isa_3_1_R1_XT.vgtest +new file mode 100644 +index 000000000..7331aafad +--- /dev/null ++++ b/none/tests/ppc64/test_isa_3_1_R1_XT.vgtest +@@ -0,0 +1,2 @@ ++prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 ++prog: test_isa_3_1_R1_XT +diff --git a/none/tests/ppc64/test_isa_3_1_common.c b/none/tests/ppc64/test_isa_3_1_common.c +index 7c3dc6f00..b3320277b 100644 +--- a/none/tests/ppc64/test_isa_3_1_common.c ++++ b/none/tests/ppc64/test_isa_3_1_common.c +@@ -134,11 +134,13 @@ bool uses_acc_vsrs; + bool uses_pmsk; + bool uses_buffer; // Buffer related. + bool uses_load_buffer, uses_store_buffer, uses_any_buffer; ++bool updates_byte, updates_halfword, updates_word; // output helpers. + bool uses_quad; + unsigned long output_mask; // Output field special handling. + bool instruction_is_sp, instruction_is_sp_estimate; + bool instruction_is_dp, instruction_is_dp_estimate; + bool instruction_is_b16; ++bool instruction_is_relative; + + unsigned long long min (unsigned long long a, unsigned long long b) { + if ( a < b ) +@@ -236,6 +238,18 @@ void identify_form_components (const char *instruction_name, + (strncmp (instruction_name, "pmst", 4) == 0) || + (strncmp (instruction_name, "pst", 3) == 0) || + (strncmp (instruction_name, "st", 2) == 0)); ++ updates_byte = ( ++ (strncmp (instruction_name, "pstb", 4) == 0) ); ++ updates_halfword = ( ++ (strncmp (instruction_name, "psth", 4) == 0) || ++ (strncmp (instruction_name, "pstfs", 4) == 0) || ++ (strncmp (instruction_name, "pstxsd", 4) == 0) || ++ (strncmp (instruction_name, "pstxssp", 4) == 0) || ++ (strncmp (instruction_name, "pstxv", 4) == 0) || ++ (strncmp (instruction_name, "psfs", 4) == 0) ); ++ updates_word = ( ++ (strncmp (instruction_name, "pstw", 4) == 0) ); ++ + uses_any_buffer = (strstr (cur_form, "(RA)") != NULL); + uses_buffer = uses_any_buffer||uses_load_buffer||uses_store_buffer; + +@@ -268,6 +282,15 @@ void identify_form_components (const char *instruction_name, + instruction_is_b16 = ( current_test.mask & B16_MASK ); + } + ++/* Parse the provided function name to set assorted values. ++ In particular, set an indicator when the instruction test has ++ indicated it will run with R==1 that indicates it is a PC-relative ++ instruction. Those tests should all have "_R1" as part of ++ the function name. */ ++void identify_instruction_by_func_name(const char * function_name) { ++ instruction_is_relative = ( (strstr (function_name, "R1") != NULL)); ++} ++ + void display_form_components (char * cur_form) { + printf (" %s\n", cur_form); + printf ("Instruction form elements: "); +@@ -288,7 +311,7 @@ void display_form_components (char * cur_form) { + if (has_frbp) printf ("frbp "); + if (has_frs) printf ("frs "); + if (has_frsp) printf ("frsp "); +- if (has_frt) printf ("frt "); ++ if (has_frt) printf ("frt%s ",(instruction_is_relative)?"-raw":""); + if (has_frtp) printf ("frtp "); + if (has_xa) printf ("xa "); + if (has_xap) printf ("xap "); +@@ -298,6 +321,7 @@ void display_form_components (char * cur_form) { + if (has_xsp) printf ("xsp "); + if (has_xt) printf ("xt "); + if (has_xtp) printf ("xtp "); ++ if (instruction_is_relative) printf ("R==1 "); + if (uses_acc_src) printf ("AS "); + if (uses_acc_dest) printf ("AT "); + printf ("\n"); +@@ -991,6 +1015,107 @@ if (debug_show_values) printf (" buffer:"); + } + } + ++/* **** Reloc Buffer **************************************** */ ++/* Create a large buffer to be the destination for pc-relative ++ * writes. This test is built with linker hints in order ++ * to ensure our buffer, stored in the .bss section, is at a ++ * mostly known offset from the instructions being exercised, ++ * so a hardcoded offset from the PC (pc-relative) will be ++ * on-target. ++ * If there are significant reworks to the code, the bss or ++ * text sections, or the offsets used may need to change. ++ * ++ * The linker hints are specifically -Tbss and -Ttext. ++ * gcc foo.c test_isa_3_1_common.c -I../../../ -Wl,-Tbss 0x20000 -Wl,-Ttext 0x40000 ++ */ ++ /* RELOC_BUFFER_SIZE is defined to 0x1000 in isa_3_1_helpers.h */ ++#define RELOC_BUFFER_PATTERN 0x0001000100010001 ++volatile unsigned long long pcrelative_write_target[RELOC_BUFFER_SIZE]; ++ ++/* Initialize the buffer to known values. */ ++void init_pcrelative_write_target() { ++ int i; ++ for (i=0;i %llx\n",i,ref_value,curr_value); ++ if (updates_byte) { ++ for (z=0;z<8;z++) { ++ rshift=z*8; ++ if (verbose) printf("z:%d ",z); ++ init_token = (ref_value>>rshift) & 0xff; ++ curr_token = (curr_value>>rshift) & 0xff; ++ if (verbose) ++ printf("wms byte:: %llx -> %llx \n",init_token,curr_token); ++ if (init_token == curr_token && (updates_byte||updates_halfword||updates_word) ) { ++ printf("%2s"," "); ++ } else { ++ printf("%02llx",curr_token); ++ } ++ } ++ } ++ else if (updates_halfword) { ++ for (z=0;z<4;z++) { ++ rshift=z*16; ++ if (verbose) printf("z:%d ",z); ++ init_token = (ref_value>>rshift) & 0xffff; ++ curr_token = (curr_value>>rshift) & 0xffff; ++ if (verbose) ++ printf("wms half:: %llx -> %llx \n",init_token,curr_token); ++ if (init_token == curr_token) { ++ printf("%2s"," "); ++ } else { ++ printf("%04llx",curr_token); ++ } ++ } ++ } ++ else if (updates_word) { ++ for (z=0;z<2;z++) { ++ rshift=z*32; ++ if (verbose) printf("z:%d ",z); ++ init_token = (ref_value>>rshift) & 0xffffffff; ++ curr_token = (curr_value>>rshift) & 0xffffffff; ++ if (verbose) ++ printf("wms word:: %llx -> %llx \n",init_token,curr_token); ++ if (init_token == curr_token ) { ++ printf("%2s"," "); ++ } else { ++ printf("%08llx",curr_token); ++ } ++ } ++ } ++ else { ++ printf("%016llx ",curr_value); ++ } ++ } ++ } ++} ++ ++/* Helper that returns the address of the pcrelative_write_target buffer. ++ Due to variances in where the sections land in memory, this value is ++ used to normalize the results. (see paddi tests for usage). */ ++unsigned long long pcrelative_buff_addr(int x) { ++ /* Return the base address of the array. The base address will be ++ a function of the code load address. */ ++ return (unsigned long long) &pcrelative_write_target[x]; ++} ++ + void print_undefined () { + if (debug_show_values) + printf (" [Undef]"); +@@ -1339,7 +1464,7 @@ void print_frt () { + /* If the result is a dfp128 value, the dfp128 value is + contained in the frt, frtp values which are split across + a pair of VSRs. */ +- if (uses_dfp128_output) { ++ if (!instruction_is_relative && uses_dfp128_output) { + if (verbose) print_vsr (28); + if (verbose) print_vsr (29); + value1 = get_vsrhd_vs28 (); +@@ -1347,7 +1472,12 @@ void print_frt () { + dissect_dfp128_float (value1, value3); + } else { + if (debug_show_raw_values) generic_print_float_as_hex (frt); +- printf (" %e", frt); ++ if (instruction_is_relative) { ++ printf ("_ %e _ ", frt); ++ print_vsr (28); ++ } else { ++ printf (" %e", frt); ++ } + if (has_frtp) { + if (debug_show_raw_values) generic_print_float_as_hex (frtp); + printf (" %e", frtp); +@@ -1652,7 +1782,15 @@ void print_all() { + void print_register_header () { + post_test = 0; + if (debug_show_all_regs) print_all(); +- if (has_ra) print_ra (); ++ ++ if (has_ra) { ++ /* Suppress the print of RA if the instruction has ++ R==1, since the ra value must be zero for the ++ instruction to be valid. */ ++ if (!instruction_is_relative) ++ print_ra(); ++ } ++ + if (has_rb) print_rb (); + if (has_rc) print_rc (); + if (has_rs) print_rs(); +@@ -1894,6 +2032,11 @@ void set_up_iterators () { + } else { + a_start=0; b_start=0; c_start=0; m_start=0; + } ++ /* Special casing for R==1 tests. */ ++ if (instruction_is_relative) { ++ a_iters = 1; ++ m_start=3; m_iters=4; ++ } + if ((has_vra+has_vrb+has_vrc+has_vrm+has_xa+has_xb+uses_MC > 2) && + (!debug_enable_all_iters)) { + /* Instruction tests using multiple fields will generate a lot of +@@ -2196,15 +2339,12 @@ void initialize_source_registers () { + vrb[0] = vsxargs[ (vrbi ) % isr_modulo]; + vrb[1] = vsxargs[ (vrbi+1) % isr_modulo]; + } +- +- if (has_xa) { +- vec_xa[0] = vsxargs[ (vrai ) % isr_modulo]; +- vec_xa[1] = vsxargs[ (vrai+1) % isr_modulo]; +- } +- if (has_xb) { +- vec_xb[0] = vsxargs[ (vrbi ) % isr_modulo]; +- vec_xb[1] = vsxargs[ (vrbi+1) % isr_modulo]; +- } ++ ++ if (instruction_is_relative) { ++ /* for pstxsd and friends using R=1 */ ++ vec_xa[0] = vsxargs[ (vrai+2 ) % isr_modulo]; ++ vec_xa[1] = vsxargs[ (vrai+3 ) % isr_modulo]; ++ } + + // xap 'shares' with the second half of an xa-pair. + if (has_xap ) { diff --git a/SOURCES/valgrind-3.18.1-ppc-pstq.patch b/SOURCES/valgrind-3.18.1-ppc-pstq.patch new file mode 100644 index 0000000..2e23d18 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-ppc-pstq.patch @@ -0,0 +1,47 @@ +commit ae8c6de01417023e78763de145b1c0e6ddd87277 +Author: Carl Love +Date: Wed Oct 20 20:40:13 2021 +0000 + + Fix for the prefixed stq instruction in PC relative mode. + + The pstq instruction for R=1, was not using the correct effective address. + The EA_hi and EA_lo should have been based on the value of EA as calculated + by the function calculate_prefix_EA. Unfortuanely, the EA_hi and EA_lo + addresses were still using the previous code (not PC relative) to calculate + the address from the contants of RA plus the offset. + +diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c +index 8afd77490..543fa9574 100644 +--- a/VEX/priv/guest_ppc_toIR.c ++++ b/VEX/priv/guest_ppc_toIR.c +@@ -9838,23 +9838,24 @@ static Bool dis_int_store_ds_prefix ( UInt prefix, + if (host_endness == VexEndnessBE) { + + /* upper 64-bits */ +- assign( EA_hi, ea_rAor0_simm( rA_addr, immediate_val ) ); ++ assign( EA_hi, mkexpr(EA)); + + /* lower 64-bits */ +- assign( EA_lo, ea_rAor0_simm( rA_addr, immediate_val+8 ) ); ++ assign( EA_lo, binop(Iop_Add64, mkexpr(EA), mkU64(8))); ++ + } else { + /* upper 64-bits */ +- assign( EA_hi, ea_rAor0_simm( rA_addr, immediate_val+8 ) ); ++ assign( EA_hi, binop(Iop_Add64, mkexpr(EA), mkU64(8))); + + /* lower 64-bits */ +- assign( EA_lo, ea_rAor0_simm( rA_addr, immediate_val ) ); ++ assign( EA_lo, mkexpr(EA)); + } + } else { + /* upper half of upper 64-bits */ +- assign( EA_hi, ea_rAor0_simm( rA_addr, immediate_val+4 ) ); ++ assign( EA_hi, binop(Iop_Add32, mkexpr(EA), mkU32(4))); + + /* lower half of upper 64-bits */ +- assign( EA_lo, ea_rAor0_simm( rA_addr, immediate_val+12 ) ); ++ assign( EA_lo, binop(Iop_Add32, mkexpr(EA), mkU32(12))); + } + + /* Note, the store order for stq instruction is the same for BE diff --git a/SOURCES/valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch b/SOURCES/valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch new file mode 100644 index 0000000..bb36c80 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch @@ -0,0 +1,60 @@ +commit 6e08ee95f7f1b1c3fd434fa380cc5b2cc3e3f7c7 +Author: Carl Love +Date: Fri Oct 29 16:30:33 2021 -0500 + + Bug 444571 - PPC, fix the lxsibzx and lxsihzx so they only load their respective sized data. + + The lxsibzx was doing a 64-bit load. The result was initializing + additional bytes in the register that should not have been initialized. + The memcheck/tests/linux/dlclose_leak test detected the issue. The + code generation uses lxsibzx and stxsibx with -mcpu=power9. Previously + the lbz and stb instructions were generated. + + The same issue was noted and fixed with the lxsihzx instruction. The + memcheck/tests/linux/badrw test now passes as well. + + https://bugs.kde.org/show_bug.cgi?id=444571 + +diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c +index d90d566ed..8afd77490 100644 +--- a/VEX/priv/guest_ppc_toIR.c ++++ b/VEX/priv/guest_ppc_toIR.c +@@ -25359,19 +25359,17 @@ dis_vx_load ( UInt prefix, UInt theInstr ) + + else + irx_addr = mkexpr( EA ); +- +- byte = load( Ity_I64, irx_addr ); ++ /* byte load */ ++ byte = load( Ity_I8, irx_addr ); + putVSReg( XT, binop( Iop_64HLtoV128, +- binop( Iop_And64, +- byte, +- mkU64( 0xFF ) ), ++ unop( Iop_8Uto64, byte ), + mkU64( 0 ) ) ); + break; + } + + case 0x32D: // lxsihzx + { +- IRExpr *byte; ++ IRExpr *hword; + IRExpr* irx_addr; + + DIP("lxsihzx %u,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr); +@@ -25382,11 +25380,10 @@ dis_vx_load ( UInt prefix, UInt theInstr ) + else + irx_addr = mkexpr( EA ); + +- byte = load( Ity_I64, irx_addr ); ++ hword = load( Ity_I16, irx_addr ); + putVSReg( XT, binop( Iop_64HLtoV128, +- binop( Iop_And64, +- byte, +- mkU64( 0xFFFF ) ), ++ unop( Iop_16Uto64, ++ hword ), + mkU64( 0 ) ) ); + break; + } diff --git a/SOURCES/valgrind-3.18.1-rust-v0-demangle.patch b/SOURCES/valgrind-3.18.1-rust-v0-demangle.patch new file mode 100644 index 0000000..e48a106 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-rust-v0-demangle.patch @@ -0,0 +1,137 @@ +commit 4831385c6706b377851284adc4c4545fff4c6564 +Author: Nicholas Nethercote +Date: Tue Nov 9 12:30:07 2021 +1100 + + Fix Rust v0 demangling. + + It's currently broken due to a silly test that prevents the v0 + demangling code from even running. + + The commit also adds a test, to avoid such problems in the future. + +diff --git a/coregrind/m_demangle/demangle.c b/coregrind/m_demangle/demangle.c +index 16161da2a..3fd7cb75f 100644 +--- a/coregrind/m_demangle/demangle.c ++++ b/coregrind/m_demangle/demangle.c +@@ -118,8 +118,13 @@ void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling, + } + + /* Possibly undo (1) */ ++ // - C++ mangled symbols start with "_Z" (possibly with exceptions?) ++ // - Rust "legacy" mangled symbols start with "_Z". ++ // - Rust "v0" mangled symbols start with "_R". ++ // XXX: the Java/Rust/Ada demangling here probably doesn't work. See ++ // https://bugs.kde.org/show_bug.cgi?id=445235 for details. + if (do_cxx_demangling && VG_(clo_demangle) +- && orig != NULL && orig[0] == '_' && orig[1] == 'Z') { ++ && orig != NULL && orig[0] == '_' && (orig[1] == 'Z' || orig[1] == 'R')) { + /* !!! vvv STATIC vvv !!! */ + static HChar* demangled = NULL; + /* !!! ^^^ STATIC ^^^ !!! */ +diff --git a/memcheck/tests/demangle-rust.c b/memcheck/tests/demangle-rust.c +new file mode 100644 +index 000000000..f2a458b2a +--- /dev/null ++++ b/memcheck/tests/demangle-rust.c +@@ -0,0 +1,31 @@ ++// Valgrind supports demangling Rust symbols (both the "v0" and "legacy" ++// mangling schemes), but we don't want to add a dependency on the Rust ++// compiler for a single test. So this is a C program with function names that ++// are mangled Rust symbols. In the output, they become demangled Rust names. ++// It's a hack, but a useful one. ++ ++#include ++ ++// A v0 symbol that demangles to: ::fold_with:: ++int _RINvYNtNtCs4uGc65yWeeX_12rustc_middle2ty13PredicateKindNtNtB5_4fold12TypeFoldable9fold_withNtNtNtCsgI90OQiJWEs_11rustc_infer5infer7resolve24OpportunisticVarResolverECsdozMG8X9FIu_21rustc_trait_selection(int *p) ++{ ++ return *p ? 1 : 2; ++} ++ ++// A v0 symbol that demangles to: rustc_expand::mbe::macro_parser::parse_tt ++int _RNvNtNtCsaqSe1lZGvEL_12rustc_expand3mbe12macro_parser8parse_tt(int* p) ++{ ++ return _RINvYNtNtCs4uGc65yWeeX_12rustc_middle2ty13PredicateKindNtNtB5_4fold12TypeFoldable9fold_withNtNtNtCsgI90OQiJWEs_11rustc_infer5infer7resolve24OpportunisticVarResolverECsdozMG8X9FIu_21rustc_trait_selection(p); ++} ++ ++// A legacy symbol that demangles to: core::str::lossy::Utf8Lossy::from_bytes ++int _ZN4core3str5lossy9Utf8Lossy10from_bytes17heb1677c8cb728b0bE(int* p) ++{ ++ return _RNvNtNtCsaqSe1lZGvEL_12rustc_expand3mbe12macro_parser8parse_tt(p); ++} ++ ++int main(void) ++{ ++ return _ZN4core3str5lossy9Utf8Lossy10from_bytes17heb1677c8cb728b0bE(malloc(sizeof(int))); ++} ++ +diff --git a/memcheck/tests/demangle-rust.stderr.exp b/memcheck/tests/demangle-rust.stderr.exp +new file mode 100644 +index 000000000..f04bb625b +--- /dev/null ++++ b/memcheck/tests/demangle-rust.stderr.exp +@@ -0,0 +1,6 @@ ++Conditional jump or move depends on uninitialised value(s) ++ at 0x........: ::fold_with:: (demangle-rust.c:12) ++ by 0x........: rustc_expand::mbe::macro_parser::parse_tt (demangle-rust.c:18) ++ by 0x........: core::str::lossy::Utf8Lossy::from_bytes (demangle-rust.c:24) ++ by 0x........: main (demangle-rust.c:29) ++ +diff --git a/memcheck/tests/demangle-rust.vgtest b/memcheck/tests/demangle-rust.vgtest +new file mode 100644 +index 000000000..d726c6b2e +--- /dev/null ++++ b/memcheck/tests/demangle-rust.vgtest +@@ -0,0 +1,2 @@ ++prog: demangle-rust ++vgopts: -q + +commit c1bfa115f985633722f25922d2996c231e8c9d8d +Author: Mark Wielaard +Date: Wed Nov 10 09:02:36 2021 +0100 + + Add demangle-rust.vgtest demangle-rust.stderr.exp to EXTRA_DIST + +diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am +index 4d0476e2d..7837d87c7 100644 +--- a/memcheck/tests/Makefile.am ++++ b/memcheck/tests/Makefile.am +@@ -281,6 +281,7 @@ EXTRA_DIST = \ + realloc3.stderr.exp realloc3.vgtest \ + recursive-merge.stderr.exp recursive-merge.vgtest \ + resvn_stack.stderr.exp resvn_stack.vgtest \ ++ demangle-rust.vgtest demangle-rust.stderr.exp \ + sbfragment.stdout.exp sbfragment.stderr.exp sbfragment.vgtest \ + sem.stderr.exp sem.vgtest \ + sendmsg.stderr.exp sendmsg.stderr.exp-solaris sendmsg.vgtest \ + +commit d151907e5d8ff393f4fef126c8ae445ea8813661 +Author: Mark Wielaard +Date: Thu Nov 11 18:02:09 2021 +0100 + + Add demangle-rust to check_PROGRAMS + + The demangle-rust.vgtest would fail because the demangle-rust binary + wasn't build by default. Add it to check_PROGRAMS and define + demangle_rust_SOURCES to make sure it is always build. + +diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am +index 7837d87c7..449710020 100644 +--- a/memcheck/tests/Makefile.am ++++ b/memcheck/tests/Makefile.am +@@ -392,6 +392,7 @@ check_PROGRAMS = \ + custom_alloc \ + custom-overlap \ + demangle \ ++ demangle-rust \ + big_debuginfo_symbol \ + deep-backtrace \ + describe-block \ +@@ -505,6 +506,7 @@ endif + leak_cpp_interior_SOURCES = leak_cpp_interior.cpp + + demangle_SOURCES = demangle.cpp ++demangle_rust_SOURCES = demangle-rust.c + + # Suppress various gcc warnings which are correct, but for things + # we are actually testing for at runtime. diff --git a/SOURCES/valgrind-3.18.1-s390x-EXRL.patch b/SOURCES/valgrind-3.18.1-s390x-EXRL.patch new file mode 100644 index 0000000..6927cc3 --- /dev/null +++ b/SOURCES/valgrind-3.18.1-s390x-EXRL.patch @@ -0,0 +1,549 @@ +commit b77dbefe72e4a5c7bcf1576a02c909010bd56991 +Author: Andreas Arnez +Date: Fri Oct 22 19:55:12 2021 +0200 + + Bug 444242 - s390x: Sign-extend "relative long" offset in EXRL + + In s390_irgen_EXRL, the offset is zero-extended instead of sign-extended, + typically causing Valgrind to crash when a negative offset occurs. + + Fix this with a new helper function that calculates a "relative long" + address from a 32-bit offset. Replace other calculations of "relative + long" addresses by invocations of this function as well. And for + consistency, do the same with "relative" (short) addresses. + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 72222ab04..fffc563d4 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -399,6 +399,22 @@ mkF64i(ULong value) + return IRExpr_Const(IRConst_F64i(value)); + } + ++/* Return the 64-bit address with the given 32-bit "relative long" offset from ++ the current guest instruction being translated. */ ++static __inline__ Addr64 ++addr_rel_long(UInt offset) ++{ ++ return guest_IA_curr_instr + ((Addr64)(Long)(Int)offset << 1); ++} ++ ++/* Return the 64-bit address with the given 16-bit "relative" offset from the ++ current guest instruction being translated. */ ++static __inline__ Addr64 ++addr_relative(UShort offset) ++{ ++ return guest_IA_curr_instr + ((Addr64)(Long)(Short)offset << 1); ++} ++ + /* Little helper function for my sanity. ITE = if-then-else */ + static IRExpr * + mkite(IRExpr *condition, IRExpr *iftrue, IRExpr *iffalse) +@@ -5516,7 +5532,7 @@ static const HChar * + s390_irgen_BRAS(UChar r1, UShort i2) + { + put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + 4ULL)); +- call_function_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ call_function_and_chase(addr_relative(i2)); + + return "bras"; + } +@@ -5525,7 +5541,7 @@ static const HChar * + s390_irgen_BRASL(UChar r1, UInt i2) + { + put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + 6ULL)); +- call_function_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)); ++ call_function_and_chase(addr_rel_long(i2)); + + return "brasl"; + } +@@ -5538,12 +5554,11 @@ s390_irgen_BRC(UChar r1, UShort i2) + if (r1 == 0) { + } else { + if (r1 == 15) { +- always_goto_and_chase( +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ always_goto_and_chase(addr_relative(i2)); + } else { + assign(cond, s390_call_calculate_cond(r1)); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + } + } +@@ -5561,11 +5576,11 @@ s390_irgen_BRCL(UChar r1, UInt i2) + if (r1 == 0) { + } else { + if (r1 == 15) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)); ++ always_goto_and_chase(addr_rel_long(i2)); + } else { + assign(cond, s390_call_calculate_cond(r1)); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)); ++ addr_rel_long(i2)); + } + } + if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) +@@ -5579,7 +5594,7 @@ s390_irgen_BRCT(UChar r1, UShort i2) + { + put_gpr_w1(r1, binop(Iop_Sub32, get_gpr_w1(r1), mkU32(1))); + if_condition_goto(binop(Iop_CmpNE32, get_gpr_w1(r1), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brct"; + } +@@ -5589,7 +5604,7 @@ s390_irgen_BRCTH(UChar r1, UInt i2) + { + put_gpr_w0(r1, binop(Iop_Sub32, get_gpr_w0(r1), mkU32(1))); + if_condition_goto(binop(Iop_CmpNE32, get_gpr_w0(r1), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brcth"; + } +@@ -5599,7 +5614,7 @@ s390_irgen_BRCTG(UChar r1, UShort i2) + { + put_gpr_dw0(r1, binop(Iop_Sub64, get_gpr_dw0(r1), mkU64(1))); + if_condition_goto(binop(Iop_CmpNE64, get_gpr_dw0(r1), mkU64(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brctg"; + } +@@ -5612,7 +5627,7 @@ s390_irgen_BRXH(UChar r1, UChar r3, UShort i2) + assign(value, get_gpr_w1(r3 | 1)); + put_gpr_w1(r1, binop(Iop_Add32, get_gpr_w1(r1), get_gpr_w1(r3))); + if_condition_goto(binop(Iop_CmpLT32S, mkexpr(value), get_gpr_w1(r1)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brxh"; + } +@@ -5625,7 +5640,7 @@ s390_irgen_BRXHG(UChar r1, UChar r3, UShort i2) + assign(value, get_gpr_dw0(r3 | 1)); + put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), get_gpr_dw0(r3))); + if_condition_goto(binop(Iop_CmpLT64S, mkexpr(value), get_gpr_dw0(r1)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brxhg"; + } +@@ -5638,7 +5653,7 @@ s390_irgen_BRXLE(UChar r1, UChar r3, UShort i2) + assign(value, get_gpr_w1(r3 | 1)); + put_gpr_w1(r1, binop(Iop_Add32, get_gpr_w1(r1), get_gpr_w1(r3))); + if_condition_goto(binop(Iop_CmpLE32S, get_gpr_w1(r1), mkexpr(value)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brxle"; + } +@@ -5651,7 +5666,7 @@ s390_irgen_BRXLG(UChar r1, UChar r3, UShort i2) + assign(value, get_gpr_dw0(r3 | 1)); + put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), get_gpr_dw0(r3))); + if_condition_goto(binop(Iop_CmpLE64S, get_gpr_dw0(r1), mkexpr(value)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1)); ++ addr_relative(i2)); + + return "brxlg"; + } +@@ -5782,8 +5797,7 @@ s390_irgen_CRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I32); + + assign(op1, get_gpr_w1(r1)); +- assign(op2, load(Ity_I32, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int) +- i2 << 1)))); ++ assign(op2, load(Ity_I32, mkU64(addr_rel_long(i2)))); + s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2); + + return "crl"; +@@ -5796,8 +5810,7 @@ s390_irgen_CGRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I64); + + assign(op1, get_gpr_dw0(r1)); +- assign(op2, load(Ity_I64, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int) +- i2 << 1)))); ++ assign(op2, load(Ity_I64, mkU64(addr_rel_long(i2)))); + s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2); + + return "cgrl"; +@@ -5810,8 +5823,7 @@ s390_irgen_CGFRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I64); + + assign(op1, get_gpr_dw0(r1)); +- assign(op2, unop(Iop_32Sto64, load(Ity_I32, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ assign(op2, unop(Iop_32Sto64, load(Ity_I32, mkU64(addr_rel_long(i2))))); + s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2); + + return "cgfrl"; +@@ -5875,15 +5887,14 @@ s390_irgen_CRJ(UChar r1, UChar r2, UShort i4, UChar m3) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase( +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_w1(r1)); + assign(op2, get_gpr_w1(r2)); + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_SIGNED_COMPARE, + op1, op2)); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -5901,15 +5912,14 @@ s390_irgen_CGRJ(UChar r1, UChar r2, UShort i4, UChar m3) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase( +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_dw0(r1)); + assign(op2, get_gpr_dw0(r2)); + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_SIGNED_COMPARE, + op1, op2)); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -5975,14 +5985,14 @@ s390_irgen_CIJ(UChar r1, UChar m3, UShort i4, UChar i2) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_w1(r1)); + op2 = (Int)(Char)i2; + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_SIGNED_COMPARE, op1, + mktemp(Ity_I32, mkU32((UInt)op2)))); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -6000,14 +6010,14 @@ s390_irgen_CGIJ(UChar r1, UChar m3, UShort i4, UChar i2) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_dw0(r1)); + op2 = (Long)(Char)i2; + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_SIGNED_COMPARE, op1, + mktemp(Ity_I64, mkU64((ULong)op2)))); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -6131,8 +6141,7 @@ s390_irgen_CHRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I32); + + assign(op1, get_gpr_w1(r1)); +- assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkU64(addr_rel_long(i2))))); + s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2); + + return "chrl"; +@@ -6145,8 +6154,7 @@ s390_irgen_CGHRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I64); + + assign(op1, get_gpr_dw0(r1)); +- assign(op2, unop(Iop_16Sto64, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ assign(op2, unop(Iop_16Sto64, load(Ity_I16, mkU64(addr_rel_long(i2))))); + s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2); + + return "cghrl"; +@@ -6401,8 +6409,7 @@ s390_irgen_CLRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I32); + + assign(op1, get_gpr_w1(r1)); +- assign(op2, load(Ity_I32, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int) +- i2 << 1)))); ++ assign(op2, load(Ity_I32, mkU64(addr_rel_long(i2)))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2); + + return "clrl"; +@@ -6415,8 +6422,7 @@ s390_irgen_CLGRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I64); + + assign(op1, get_gpr_dw0(r1)); +- assign(op2, load(Ity_I64, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int) +- i2 << 1)))); ++ assign(op2, load(Ity_I64, mkU64(addr_rel_long(i2)))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2); + + return "clgrl"; +@@ -6429,8 +6435,7 @@ s390_irgen_CLGFRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I64); + + assign(op1, get_gpr_dw0(r1)); +- assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkU64(addr_rel_long(i2))))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2); + + return "clgfrl"; +@@ -6443,8 +6448,7 @@ s390_irgen_CLHRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I32); + + assign(op1, get_gpr_w1(r1)); +- assign(op2, unop(Iop_16Uto32, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ assign(op2, unop(Iop_16Uto32, load(Ity_I16, mkU64(addr_rel_long(i2))))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2); + + return "clhrl"; +@@ -6457,8 +6461,7 @@ s390_irgen_CLGHRL(UChar r1, UInt i2) + IRTemp op2 = newTemp(Ity_I64); + + assign(op1, get_gpr_dw0(r1)); +- assign(op2, unop(Iop_16Uto64, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ assign(op2, unop(Iop_16Uto64, load(Ity_I16, mkU64(addr_rel_long(i2))))); + s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2); + + return "clghrl"; +@@ -6730,14 +6733,14 @@ s390_irgen_CLRJ(UChar r1, UChar r2, UShort i4, UChar m3) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_w1(r1)); + assign(op2, get_gpr_w1(r2)); + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_UNSIGNED_COMPARE, + op1, op2)); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -6755,14 +6758,14 @@ s390_irgen_CLGRJ(UChar r1, UChar r2, UShort i4, UChar m3) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_dw0(r1)); + assign(op2, get_gpr_dw0(r2)); + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_UNSIGNED_COMPARE, + op1, op2)); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -6828,14 +6831,14 @@ s390_irgen_CLIJ(UChar r1, UChar m3, UShort i4, UChar i2) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_w1(r1)); + op2 = (UInt)i2; + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_UNSIGNED_COMPARE, op1, + mktemp(Ity_I32, mkU32(op2)))); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -6853,14 +6856,14 @@ s390_irgen_CLGIJ(UChar r1, UChar m3, UShort i4, UChar i2) + if (m3 == 0) { + } else { + if (m3 == 14) { +- always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ always_goto_and_chase(addr_relative(i4)); + } else { + assign(op1, get_gpr_dw0(r1)); + op2 = (ULong)i2; + assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_UNSIGNED_COMPARE, op1, + mktemp(Ity_I64, mkU64(op2)))); + if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)), +- guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1)); ++ addr_relative(i4)); + + } + } +@@ -7539,8 +7542,7 @@ s390_irgen_LGFI(UChar r1, UInt i2) + static const HChar * + s390_irgen_LRL(UChar r1, UInt i2) + { +- put_gpr_w1(r1, load(Ity_I32, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int) +- i2 << 1)))); ++ put_gpr_w1(r1, load(Ity_I32, mkU64(addr_rel_long(i2)))); + + return "lrl"; + } +@@ -7548,8 +7550,7 @@ s390_irgen_LRL(UChar r1, UInt i2) + static const HChar * + s390_irgen_LGRL(UChar r1, UInt i2) + { +- put_gpr_dw0(r1, load(Ity_I64, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int) +- i2 << 1)))); ++ put_gpr_dw0(r1, load(Ity_I64, mkU64(addr_rel_long(i2)))); + + return "lgrl"; + } +@@ -7557,8 +7558,7 @@ s390_irgen_LGRL(UChar r1, UInt i2) + static const HChar * + s390_irgen_LGFRL(UChar r1, UInt i2) + { +- put_gpr_dw0(r1, unop(Iop_32Sto64, load(Ity_I32, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ put_gpr_dw0(r1, unop(Iop_32Sto64, load(Ity_I32, mkU64(addr_rel_long(i2))))); + + return "lgfrl"; + } +@@ -7598,7 +7598,7 @@ s390_irgen_LAEY(UChar r1, IRTemp op2addr) + static const HChar * + s390_irgen_LARL(UChar r1, UInt i2) + { +- put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1))); ++ put_gpr_dw0(r1, mkU64(addr_rel_long(i2))); + + return "larl"; + } +@@ -8038,8 +8038,7 @@ s390_irgen_LGHI(UChar r1, UShort i2) + static const HChar * + s390_irgen_LHRL(UChar r1, UInt i2) + { +- put_gpr_w1(r1, unop(Iop_16Sto32, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ put_gpr_w1(r1, unop(Iop_16Sto32, load(Ity_I16, mkU64(addr_rel_long(i2))))); + + return "lhrl"; + } +@@ -8047,8 +8046,7 @@ s390_irgen_LHRL(UChar r1, UInt i2) + static const HChar * + s390_irgen_LGHRL(UChar r1, UInt i2) + { +- put_gpr_dw0(r1, unop(Iop_16Sto64, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ put_gpr_dw0(r1, unop(Iop_16Sto64, load(Ity_I16, mkU64(addr_rel_long(i2))))); + + return "lghrl"; + } +@@ -8088,8 +8086,7 @@ s390_irgen_LLGF(UChar r1, IRTemp op2addr) + static const HChar * + s390_irgen_LLGFRL(UChar r1, UInt i2) + { +- put_gpr_dw0(r1, unop(Iop_32Uto64, load(Ity_I32, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ put_gpr_dw0(r1, unop(Iop_32Uto64, load(Ity_I32, mkU64(addr_rel_long(i2))))); + + return "llgfrl"; + } +@@ -8169,8 +8166,7 @@ s390_irgen_LLGH(UChar r1, IRTemp op2addr) + static const HChar * + s390_irgen_LLHRL(UChar r1, UInt i2) + { +- put_gpr_w1(r1, unop(Iop_16Uto32, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ put_gpr_w1(r1, unop(Iop_16Uto32, load(Ity_I16, mkU64(addr_rel_long(i2))))); + + return "llhrl"; + } +@@ -8178,8 +8174,7 @@ s390_irgen_LLHRL(UChar r1, UInt i2) + static const HChar * + s390_irgen_LLGHRL(UChar r1, UInt i2) + { +- put_gpr_dw0(r1, unop(Iop_16Uto64, load(Ity_I16, mkU64(guest_IA_curr_instr + +- ((ULong)(Long)(Int)i2 << 1))))); ++ put_gpr_dw0(r1, unop(Iop_16Uto64, load(Ity_I16, mkU64(addr_rel_long(i2))))); + + return "llghrl"; + } +@@ -10064,8 +10059,7 @@ s390_irgen_STG(UChar r1, IRTemp op2addr) + static const HChar * + s390_irgen_STRL(UChar r1, UInt i2) + { +- store(mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)), +- get_gpr_w1(r1)); ++ store(mkU64(addr_rel_long(i2)), get_gpr_w1(r1)); + + return "strl"; + } +@@ -10073,8 +10067,7 @@ s390_irgen_STRL(UChar r1, UInt i2) + static const HChar * + s390_irgen_STGRL(UChar r1, UInt i2) + { +- store(mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)), +- get_gpr_dw0(r1)); ++ store(mkU64(addr_rel_long(i2)), get_gpr_dw0(r1)); + + return "stgrl"; + } +@@ -10203,8 +10196,7 @@ s390_irgen_STHY(UChar r1, IRTemp op2addr) + static const HChar * + s390_irgen_STHRL(UChar r1, UInt i2) + { +- store(mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)), +- get_gpr_hw3(r1)); ++ store(mkU64(addr_rel_long(i2)), get_gpr_hw3(r1)); + + return "sthrl"; + } +@@ -13282,7 +13274,7 @@ static const HChar * + s390_irgen_EXRL(UChar r1, UInt offset) + { + IRTemp addr = newTemp(Ity_I64); +- Addr64 bytes_addr = guest_IA_curr_instr + offset * 2UL; ++ Addr64 bytes_addr = addr_rel_long(offset); + UChar *bytes = (UChar *)(HWord)bytes_addr; + /* we might save one round trip because we know the target */ + if (!last_execute_target) +diff --git a/none/tests/s390x/exrl.c b/none/tests/s390x/exrl.c +index 2c99602d8..e669e484f 100644 +--- a/none/tests/s390x/exrl.c ++++ b/none/tests/s390x/exrl.c +@@ -54,6 +54,17 @@ int main(void) + printf("|\n"); + printf("\n"); + ++ printf("------- EXRL with negative offset\n"); ++ asm volatile( "j 2f\n\t" ++ "1:\n\t" ++ "mvc 2(1,%0),0(%0)\n\t" ++ "2:\n\t" ++ "lghi 1,8\n\t" ++ ".insn ril,0xc60000000000,1,1b\n\t" // exrl 1, 1b ++ : : "a" (target) ++ : "1", "2", "3", "4"); ++ printf(" target = |%s|\n", target); ++ + return 0; + } + +diff --git a/none/tests/s390x/exrl.stdout.exp b/none/tests/s390x/exrl.stdout.exp +index 520919e92..30dcde829 100644 +--- a/none/tests/s390x/exrl.stdout.exp ++++ b/none/tests/s390x/exrl.stdout.exp +@@ -11,3 +11,5 @@ after: target = |0123456789aXXXXX| + ------- EXRL to OR in the syscall number (writes out target) + target = |0123456789aXXXXX| + ++------- EXRL with negative offset ++ target = |01010101010XXXXX| diff --git a/SOURCES/valgrind-3.9.0-helgrind-race-supp.patch b/SOURCES/valgrind-3.9.0-helgrind-race-supp.patch deleted file mode 100644 index 759d151..0000000 --- a/SOURCES/valgrind-3.9.0-helgrind-race-supp.patch +++ /dev/null @@ -1,15 +0,0 @@ ---- valgrind/glibc-2.34567-NPTL-helgrind.supp.jj 2009-08-19 15:37:48.000000000 +0200 -+++ valgrind/glibc-2.34567-NPTL-helgrind.supp 2009-10-21 16:46:31.000000000 +0200 -@@ -88,6 +88,12 @@ - obj:*/lib*/libpthread-2.*so* - } - { -+ helgrind-glibc2X-102a -+ Helgrind:Race -+ fun:mythread_wrapper -+ obj:*vgpreload_helgrind*.so -+} -+{ - helgrind-glibc2X-103 - Helgrind:Race - fun:pthread_cond_*@@GLIBC_2.* diff --git a/SPECS/valgrind.spec b/SPECS/valgrind.spec index f312a21..d696689 100644 --- a/SPECS/valgrind.spec +++ b/SPECS/valgrind.spec @@ -2,8 +2,8 @@ Summary: Tool for finding memory management bugs in programs Name: %{?scl_prefix}valgrind -Version: 3.17.0 -Release: 5%{?dist} +Version: 3.18.1 +Release: 7%{?dist} Epoch: 1 License: GPLv2+ URL: http://www.valgrind.org/ @@ -72,14 +72,11 @@ Group: Development/Debuggers # So those will already have their full symbol table. %undefine _include_minidebuginfo -Source0: ftp://sourceware.org/pub/valgrind/valgrind-%{version}.tar.bz2 +Source0: https://sourceware.org/pub/valgrind/valgrind-%{version}.tar.bz2 # Needs investigation and pushing upstream Patch1: valgrind-3.9.0-cachegrind-improvements.patch -# KDE#211352 - helgrind races in helgrind's own mythread_wrapper -Patch2: valgrind-3.9.0-helgrind-race-supp.patch - # Make ld.so supressions slightly less specific. Patch3: valgrind-3.9.0-ldso-supp.patch @@ -89,66 +86,51 @@ Patch4: valgrind-3.16.0-some-stack-protector.patch # Add some -Wl,z,now. Patch5: valgrind-3.16.0-some-Wl-z-now.patch -# Upstream commits that provide additional ppc64le ISA 3.1 support -# commit 3cc0232c46a5905b4a6c2fbd302b58bf5f90b3d5 -# PPC64: ISA 3.1 VSX PCV Generate Operations -# commit 078f89e99b6f62e043f6138c6a7ae238befc1f2a -# PPC64: Reduced-Precision bfloat16 Outer Product & Format Conversion Operations -# commit e09fdaf569b975717465ed8043820d0198d4d47d -# PPC64: Reduced-Precision: Missing Integer-based Outer Product Operations -Patch6: valgrind-3.17.0-ppc64-isa-3.1.patch +# KDE#444495 dhat/tests/copy fails on s390x +Patch6: valgrind-3.18.1-dhat-tests-copy.patch -# Upstream commits that provide extra tests for ppc64le ISA 3.1 support -# commit c8fa838be405d7ac43035dcf675bf490800c26ec -# Reduced Precision bfloat16 outer product tests -# commit 4bcc6c8a97c10c4dd41b35bd3b3035ec4037d524 -# VSX Permute Control Vector Generate Operation tests. -# commit c589b652939655090c005a982a71f50c489fb5ce -# Reduced precision Missing Integer based outer tests -Patch7: valgrind-3.17.0-ppc64-isa-3.1-tests.patch +# KDE#444242 s390x: Sign-extend "relative long" offset in EXRL +Patch7: valgrind-3.18.1-s390x-EXRL.patch -# commit 45873298ff2d17accc65654d64758360616aade5 -# s390x: Add missing UNOP insns to s390_insn_as_string -Patch8: valgrind-3.17.0-s390_insn_as_string.patch +# KDE#444571 - PPC, fix lxsibzx and lxsihzx +Patch8: valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch -# KDE#435908 Don't look for separate debuginfo if image already has .debug_info -Patch9: valgrind-3.17.0-debuginfod.patch +# commit ae8c6de01417023e78763de145b1c0e6ddd87277 +# commit 3950c5d661ee09526cddcf24daf5fc22bc83f70c +# Fix for the prefixed stq instruction in PC relative mode. +# KDE#444836 pstq instruction for R=1 is not storing to the correct address +Patch9: valgrind-3.18.1-ppc-pstq.patch +Patch10: valgrind-3.18.1-ppc-pstq-tests.patch -# KDE#423963 Only process clone results in the parent thread -Patch10: valgrind-3.17.0-clone-parent-res.patch +# commit 64ab89162906d5b9e2de6c3afe476fec861ef7ec +# gdbserver_tests: Filter out glibc hwcaps libc.so +Patch11: valgrind-3.18.1-gdbserver_tests-hwcap.patch -# commit d74a637206ef5532ccd2ccb2e31ee2762f184e60 -# Bug 433863 - s390x: Remove memcheck test cases for cs, cds, and csg -# commit 18ddcc47c951427efd3b790ba2481159b9bd1598 -# s390x: Support "expensive" comparisons Iop_ExpCmpNE32/64 -# commit 5db3f929c43bf46f4707178706cfe90f43acdd19 -# s390x: Add convenience function mkV128() -# commit e78bd78d3043729033b426218ab8c6dae9c51e96 -# Bug 434296 - s390x: Rework IR conversion of VSTRC, VFAE, and VFEE -# commit 4f17a067c4f8245c05611d6e8aa36e8841bab376 -# Bug 434296 - s390x: Rework IR conversion of VFENE -# commit 9bd78ebd8bb5cd4ebb3f081ceba46836cc485551 -# Bug 434296 - s390x: Rework IR conversion of VISTR -# commit 32312d588b77c5b5b5a0145bb0cc6f795b447790 -# Bug 434296 - s390x: Add memcheck test cases for vector string insns -# commit a0bb049ace14ab52d386bb1d49a399f39eec4986 -# s390x: Improve handling of amodes without base register -# commit fd935e238d907d9c523a311ba795077d95ad6912 -# s390x: Rework insn "v-vdup" and add "v-vrep" -# commit 6c1cb1a0128b00858b973ef9344e12d6ddbaaf57 -# s390x: Add support for emitting "vector or with complement" -# commit 0bd4263326b2d48f782339a9bbe1a069c7de45c7 -# s390x: Fix/optimize Iop_64HLtoV128 -# commit cae5062b05b95e0303b1122a0ea9aadc197e4f0a -# s390x: Add missing stdout.exp for vector string memcheck test -Patch11: valgrind-3.17.0-s390-prep.patch +# KDE#445184 Rust v0 symbol demangling is broken +Patch12: valgrind-3.18.1-rust-v0-demangle.patch -# KDE#432387 - s390x: z15 instructions support -Patch12: valgrind-3.17.0-s390-z15.patch +# KDE#445354 arm64 backend: incorrect code emitted for doubleword CAS +Patch13: valgrind-3.18.1-arm64-doubleword-cas.patch -# commit 124ae6cfa303f0cc71ffd685620cb57c4f8f02bb -# s390x: Don't emit "vector or with complement" on z13 -Patch13: valgrind-3.17.0-s390-z13-vec-fix.patch +# KDE#444399 arm64: unhandled instruction LD{,A}XP and ST{,L}XP +Patch14: valgrind-3.18.1-arm64-ldaxp-stlxp.patch + +# KDE#445415 arm64 front end: alignment checks missing for atomic instructions. +Patch15: valgrind-3.18.1-arm64-atomic-align.patch + +# commit 595341b150312d2407bd43304449bf39ec3e1fa8 +# amd64 front end: add more spec rules +Patch16: valgrind-3.18.1-amd64-more-spec-rules.patch + +# KDE#445504 Using C++ condition_variable results in bogus +# "mutex is locked simultaneously by two threads" warning +Patch17: valgrind-3.18.1-condvar.patch + +# KDE#445668 Inline stack frame generation is broken for Rust binaries +Patch18: valgrind-3.18.1-demangle-namespace.patch + +# KDE#449494 arm64: Mismatch detected between RDMA and atomics features +Patch19: valgrind-3.18.1-arm64-atomics-rdm.patch BuildRequires: make BuildRequires: glibc-devel @@ -180,9 +162,6 @@ BuildRequires: autoconf # For make check validating the documentation BuildRequires: docbook-dtds -# configure might use which -BuildRequires: which - # For testing debuginfod-find %if 0%{?fedora} > 29 || 0%{?rhel} > 7 BuildRequires: elfutils-debuginfod-client @@ -285,7 +264,6 @@ Valgrind User Manual for details. %setup -q -n %{?scl:%{pkg_name}}%{!?scl:%{name}}-%{version} %patch1 -p1 -%patch2 -p1 %patch3 -p1 # Old rhel gcc doesn't have -fstack-protector-strong. @@ -296,15 +274,18 @@ Valgrind User Manual for details. %patch6 -p1 %patch7 -p1 - %patch8 -p1 %patch9 -p1 %patch10 -p1 - %patch11 -p1 -touch memcheck/tests/s390x/vistr.stdout.exp %patch12 -p1 %patch13 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 %build @@ -473,8 +454,11 @@ fi cat diffs echo ===============END TESTING=============== +%{!?_licensedir:%global license %%doc} + %files -%doc COPYING NEWS README_* +%license COPYING COPYING.DOCS +%doc NEWS README_* %doc docs/installed/html docs/installed/*.pdf %{_bindir}/* %dir %{_libexecdir}/valgrind @@ -498,6 +482,7 @@ echo ===============END TESTING=============== %if %{build_tools_devel} %files tools-devel +%license COPYING %{_includedir}/valgrind/config.h %{_includedir}/valgrind/libvex*h %{_includedir}/valgrind/pub_tool_*h @@ -526,6 +511,30 @@ fi %endif %changelog +* Thu Feb 10 2022 Mark Wielaard - 3.18.1-7 +- Add valgrind-3.18.1-arm64-atomics-rdm.patch + +* Tue Nov 30 2021 Mark Wielaard - 3.18.1-6 +- Rebuild against fresh toolchain + +* Wed Nov 24 2021 Mark Wielaard - 3.18.1-5 +- Add valgrind-3.18.1-dhat-tests-copy.patch +- Add valgrind-3.18.1-s390x-EXRL.patch +- Add valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch +- Add valgrind-3.18.1-ppc-pstq.patch +- Add valgrind-3.18.1-ppc-pstq-tests.patch +- Add valgrind-3.18.1-gdbserver_tests-hwcap.patch +- Add valgrind-3.18.1-rust-v0-demangle.patch +- Add valgrind-3.18.1-arm64-doubleword-cas.patch +- Add valgrind-3.18.1-arm64-ldaxp-stlxp.patch +- Add valgrind-3.18.1-arm64-atomic-align.patch +- Add valgrind-3.18.1-amd64-more-spec-rules.patch +- Add valgrind-3.18.1-condvar.patch +- Add valgrind-3.18.1-demangle-namespace.patch + +* Wed Oct 20 2021 Mark Wielaard - 3.18.1-1 +- Update to upstream 3.18.1 final + * Thu Jun 24 2021 Mark Wielaard - 3.17.0-5 - Add valgrind-3.17.0-s390-prep.patch - Add valgrind-3.17.0-s390-z15.patch