llvm/SOURCES/0001-DAG-Fix-crash-in-replaceStoreOfInsertLoad.patch

99 lines
4.0 KiB
Diff
Raw Normal View History

From dbc6b9344bde269a2499d47e7f08c172a88f289a Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve@amd.com>
Date: Thu, 3 Aug 2023 10:53:08 +0200
Subject: [PATCH] [DAG] Fix crash in replaceStoreOfInsertLoad
Idx's type can be different from Ptr's, causing a "Binary operator types must match" assertion failure when emitting the MUL.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D156972
(cherry picked from commit 98ccc70b93a39a7ea3e26f7f5b5fe40d39b5a7e5)
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
.../AMDGPU/replace-store-of-insert-load.ll | 58 +++++++++++++++++++
2 files changed, 59 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 235f0da86b90..dbc8be3c52b8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20517,7 +20517,7 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
EVT PtrVT = Ptr.getValueType();
SDValue Offset =
- DAG.getNode(ISD::MUL, DL, PtrVT, Idx,
+ DAG.getNode(ISD::MUL, DL, PtrVT, DAG.getZExtOrTrunc(Idx, DL, PtrVT),
DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT));
SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, Offset);
MachinePointerInfo PointerInfo(ST->getAddressSpace());
diff --git a/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll b/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll
new file mode 100644
index 000000000000..35a602af68c0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
+
+; Regression test for a bug in `DAGCombiner::replaceStoreOfInsertLoad` where
+; Idx could be smaller than PtrVT, causing a MUL to be emitted with inconsistent
+; LHS/RHS types.
+
+define void @testcase_0(ptr addrspace(1) %in, float %arg) {
+; CHECK-LABEL: testcase_0:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: global_store_dword v[0:1], v2, off offset:12
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %loaded = load <4 x float>, ptr addrspace(1) %in
+ %modified = insertelement <4 x float> %loaded, float %arg, i64 3
+ store <4 x float> %modified, ptr addrspace(1) %in
+ ret void
+}
+
+define void @testcase_1(ptr addrspace(1) %in, float %arg) {
+; CHECK-LABEL: testcase_1:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: global_store_dword v[0:1], v2, off offset:16
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %loaded = load <6 x float>, ptr addrspace(1) %in
+ %modified = insertelement <6 x float> %loaded, float %arg, i64 4
+ store <6 x float> %modified, ptr addrspace(1) %in
+ ret void
+}
+
+define void @testcase_2(ptr addrspace(1) %in, double %arg) {
+; CHECK-LABEL: testcase_2:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:8
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %loaded = load <4 x double>, ptr addrspace(1) %in
+ %modified = insertelement <4 x double> %loaded, double %arg, i64 1
+ store <4 x double> %modified, ptr addrspace(1) %in
+ ret void
+}
+
+define void @testcase_3(ptr addrspace(1) %in, double %arg) {
+; CHECK-LABEL: testcase_3:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:56
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %loaded = load <8 x double>, ptr addrspace(1) %in
+ %modified = insertelement <8 x double> %loaded, double %arg, i64 7
+ store <8 x double> %modified, ptr addrspace(1) %in
+ ret void
+}
--
2.44.0