206 lines
7.3 KiB
Diff
206 lines
7.3 KiB
Diff
|
From 54d3e58aabf9716f9a07aeb7044d7b7997e28123 Mon Sep 17 00:00:00 2001
|
||
|
From: Paolo Bonzini <pbonzini@redhat.com>
|
||
|
Date: Tue, 31 Jan 2023 09:48:03 +0100
|
||
|
Subject: [PATCH 5/8] target/i386: fix ADOX followed by ADCX
|
||
|
|
||
|
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
||
|
RH-MergeRequest: 154: target/i386: fix bugs in emulation of BMI instructions
|
||
|
RH-Bugzilla: 2173590
|
||
|
RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
|
||
|
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
||
|
RH-Acked-by: Bandan Das <None>
|
||
|
RH-Commit: [5/7] 64dbe4e602f08e4a88fdeacee5a8993ca4383563 (bonzini/rhel-qemu-kvm)
|
||
|
|
||
|
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173590
|
||
|
Upstream-Status: merged
|
||
|
|
||
|
When ADCX is followed by ADOX or vice versa, the second instruction's
|
||
|
carry comes from EFLAGS and the condition codes use the CC_OP_ADCOX
|
||
|
operation. Retrieving the carry from EFLAGS is handled by this bit
|
||
|
of gen_ADCOX:
|
||
|
|
||
|
tcg_gen_extract_tl(carry_in, cpu_cc_src,
|
||
|
ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1);
|
||
|
|
||
|
Unfortunately, in this case cc_op has been overwritten by the previous
|
||
|
"if" statement to CC_OP_ADCOX. This works by chance when the first
|
||
|
instruction is ADCX; however, if the first instruction is ADOX,
|
||
|
ADCX will incorrectly take its carry from OF instead of CF.
|
||
|
|
||
|
Fix by moving the computation of the new cc_op at the end of the function.
|
||
|
The included exhaustive test case fails without this patch and passes
|
||
|
afterwards.
|
||
|
|
||
|
Because ADCX/ADOX need not be invoked through the VEX prefix, this
|
||
|
regression bisects to commit 16fc5726a6e2 ("target/i386: reimplement
|
||
|
0x0f 0x38, add AVX", 2022-10-18). However, the mistake happened a
|
||
|
little earlier, when BMI instructions were rewritten using the new
|
||
|
decoder framework.
|
||
|
|
||
|
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1471
|
||
|
Reported-by: Paul Jolly <https://gitlab.com/myitcv>
|
||
|
Fixes: 1d0b926150e5 ("target/i386: move scalar 0F 38 and 0F 3A instruction to new decoder", 2022-10-18)
|
||
|
Cc: qemu-stable@nongnu.org
|
||
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||
|
(cherry picked from commit 60c7dd22e1383754d5f150bc9f7c2785c662a7b6)
|
||
|
---
|
||
|
target/i386/tcg/emit.c.inc | 20 +++++----
|
||
|
tests/tcg/i386/Makefile.target | 6 ++-
|
||
|
tests/tcg/i386/test-i386-adcox.c | 75 ++++++++++++++++++++++++++++++++
|
||
|
3 files changed, 91 insertions(+), 10 deletions(-)
|
||
|
create mode 100644 tests/tcg/i386/test-i386-adcox.c
|
||
|
|
||
|
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
|
||
|
index 4d7702c106..0d7c6e80ae 100644
|
||
|
--- a/target/i386/tcg/emit.c.inc
|
||
|
+++ b/target/i386/tcg/emit.c.inc
|
||
|
@@ -1015,6 +1015,7 @@ VSIB_AVX(VPGATHERQ, vpgatherq)
|
||
|
|
||
|
static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
|
||
|
{
|
||
|
+ int opposite_cc_op;
|
||
|
TCGv carry_in = NULL;
|
||
|
TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
|
||
|
TCGv zero;
|
||
|
@@ -1022,14 +1023,8 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
|
||
|
if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) {
|
||
|
/* Re-use the carry-out from a previous round. */
|
||
|
carry_in = carry_out;
|
||
|
- cc_op = s->cc_op;
|
||
|
- } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) {
|
||
|
- /* Merge with the carry-out from the opposite instruction. */
|
||
|
- cc_op = CC_OP_ADCOX;
|
||
|
- }
|
||
|
-
|
||
|
- /* If we don't have a carry-in, get it out of EFLAGS. */
|
||
|
- if (!carry_in) {
|
||
|
+ } else {
|
||
|
+ /* We don't have a carry-in, get it out of EFLAGS. */
|
||
|
if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
|
||
|
gen_compute_eflags(s);
|
||
|
}
|
||
|
@@ -1053,7 +1048,14 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
|
||
|
tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero);
|
||
|
break;
|
||
|
}
|
||
|
- set_cc_op(s, cc_op);
|
||
|
+
|
||
|
+ opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX;
|
||
|
+ if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) {
|
||
|
+ /* Merge with the carry-out from the opposite instruction. */
|
||
|
+ set_cc_op(s, CC_OP_ADCOX);
|
||
|
+ } else {
|
||
|
+ set_cc_op(s, cc_op);
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
|
||
|
diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
|
||
|
index 81831cafbc..bafd8c2180 100644
|
||
|
--- a/tests/tcg/i386/Makefile.target
|
||
|
+++ b/tests/tcg/i386/Makefile.target
|
||
|
@@ -14,7 +14,7 @@ config-cc.mak: Makefile
|
||
|
I386_SRCS=$(notdir $(wildcard $(I386_SRC)/*.c))
|
||
|
ALL_X86_TESTS=$(I386_SRCS:.c=)
|
||
|
SKIP_I386_TESTS=test-i386-ssse3 test-avx test-3dnow test-mmx
|
||
|
-X86_64_TESTS:=$(filter test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
|
||
|
+X86_64_TESTS:=$(filter test-i386-adcox test-i386-bmi2 $(SKIP_I386_TESTS), $(ALL_X86_TESTS))
|
||
|
|
||
|
test-i386-sse-exceptions: CFLAGS += -msse4.1 -mfpmath=sse
|
||
|
run-test-i386-sse-exceptions: QEMU_OPTS += -cpu max
|
||
|
@@ -28,6 +28,10 @@ test-i386-bmi2: CFLAGS=-O2
|
||
|
run-test-i386-bmi2: QEMU_OPTS += -cpu max
|
||
|
run-plugin-test-i386-bmi2-%: QEMU_OPTS += -cpu max
|
||
|
|
||
|
+test-i386-adcox: CFLAGS=-O2
|
||
|
+run-test-i386-adcox: QEMU_OPTS += -cpu max
|
||
|
+run-plugin-test-i386-adcox-%: QEMU_OPTS += -cpu max
|
||
|
+
|
||
|
#
|
||
|
# hello-i386 is a barebones app
|
||
|
#
|
||
|
diff --git a/tests/tcg/i386/test-i386-adcox.c b/tests/tcg/i386/test-i386-adcox.c
|
||
|
new file mode 100644
|
||
|
index 0000000000..16169efff8
|
||
|
--- /dev/null
|
||
|
+++ b/tests/tcg/i386/test-i386-adcox.c
|
||
|
@@ -0,0 +1,75 @@
|
||
|
+/* See if various BMI2 instructions give expected results */
|
||
|
+#include <assert.h>
|
||
|
+#include <stdint.h>
|
||
|
+#include <stdio.h>
|
||
|
+
|
||
|
+#define CC_C 1
|
||
|
+#define CC_O (1 << 11)
|
||
|
+
|
||
|
+#ifdef __x86_64__
|
||
|
+#define REG uint64_t
|
||
|
+#else
|
||
|
+#define REG uint32_t
|
||
|
+#endif
|
||
|
+
|
||
|
+void test_adox_adcx(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand)
|
||
|
+{
|
||
|
+ REG flags;
|
||
|
+ REG out_adcx, out_adox;
|
||
|
+
|
||
|
+ asm("pushf; pop %0" : "=r"(flags));
|
||
|
+ flags &= ~(CC_C | CC_O);
|
||
|
+ flags |= (in_c ? CC_C : 0);
|
||
|
+ flags |= (in_o ? CC_O : 0);
|
||
|
+
|
||
|
+ out_adcx = adcx_operand;
|
||
|
+ out_adox = adox_operand;
|
||
|
+ asm("push %0; popf;"
|
||
|
+ "adox %3, %2;"
|
||
|
+ "adcx %3, %1;"
|
||
|
+ "pushf; pop %0"
|
||
|
+ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
|
||
|
+ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
|
||
|
+
|
||
|
+ assert(out_adcx == in_c + adcx_operand - 1);
|
||
|
+ assert(out_adox == in_o + adox_operand - 1);
|
||
|
+ assert(!!(flags & CC_C) == (in_c || adcx_operand));
|
||
|
+ assert(!!(flags & CC_O) == (in_o || adox_operand));
|
||
|
+}
|
||
|
+
|
||
|
+void test_adcx_adox(uint32_t in_c, uint32_t in_o, REG adcx_operand, REG adox_operand)
|
||
|
+{
|
||
|
+ REG flags;
|
||
|
+ REG out_adcx, out_adox;
|
||
|
+
|
||
|
+ asm("pushf; pop %0" : "=r"(flags));
|
||
|
+ flags &= ~(CC_C | CC_O);
|
||
|
+ flags |= (in_c ? CC_C : 0);
|
||
|
+ flags |= (in_o ? CC_O : 0);
|
||
|
+
|
||
|
+ out_adcx = adcx_operand;
|
||
|
+ out_adox = adox_operand;
|
||
|
+ asm("push %0; popf;"
|
||
|
+ "adcx %3, %1;"
|
||
|
+ "adox %3, %2;"
|
||
|
+ "pushf; pop %0"
|
||
|
+ : "+r" (flags), "+r" (out_adcx), "+r" (out_adox)
|
||
|
+ : "r" ((REG)-1), "0" (flags), "1" (out_adcx), "2" (out_adox));
|
||
|
+
|
||
|
+ assert(out_adcx == in_c + adcx_operand - 1);
|
||
|
+ assert(out_adox == in_o + adox_operand - 1);
|
||
|
+ assert(!!(flags & CC_C) == (in_c || adcx_operand));
|
||
|
+ assert(!!(flags & CC_O) == (in_o || adox_operand));
|
||
|
+}
|
||
|
+
|
||
|
+int main(int argc, char *argv[]) {
|
||
|
+ /* try all combinations of input CF, input OF, CF from op1+op2, OF from op2+op1 */
|
||
|
+ int i;
|
||
|
+ for (i = 0; i <= 15; i++) {
|
||
|
+ printf("%d\n", i);
|
||
|
+ test_adcx_adox(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8));
|
||
|
+ test_adox_adcx(!!(i & 1), !!(i & 2), !!(i & 4), !!(i & 8));
|
||
|
+ }
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
--
|
||
|
2.39.1
|
||
|
|