Compare commits
No commits in common. "c9s" and "c8-stream-rhel8" have entirely different histories.
c9s
...
c8-stream-
@ -1 +0,0 @@
|
|||||||
1
|
|
13
.gitignore
vendored
13
.gitignore
vendored
@ -1,7 +1,6 @@
|
|||||||
/*.src.rpm
|
SOURCES/cmake-17.0.6.src.tar.xz
|
||||||
/*.src.tar.xz
|
SOURCES/cmake-17.0.6.src.tar.xz.sig
|
||||||
/*.src.tar.xz.sig
|
SOURCES/llvm-17.0.6.src.tar.xz
|
||||||
/cmake/
|
SOURCES/llvm-17.0.6.src.tar.xz.sig
|
||||||
/llvm-*.src/
|
SOURCES/third-party-17.0.6.src.tar.xz
|
||||||
/results_llvm/
|
SOURCES/third-party-17.0.6.src.tar.xz.sig
|
||||||
/third-party/
|
|
||||||
|
6
.llvm.metadata
Normal file
6
.llvm.metadata
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
4b397344260c934e687be7efa0f8456a9dd46f44 SOURCES/cmake-17.0.6.src.tar.xz
|
||||||
|
fa31d348b6780478403484e22139d25f403503d4 SOURCES/cmake-17.0.6.src.tar.xz.sig
|
||||||
|
860a3605f08a0a56a8de4e073e26a259871623a6 SOURCES/llvm-17.0.6.src.tar.xz
|
||||||
|
2ad479ab00a6d5e61ecb953997cfeef6650a687a SOURCES/llvm-17.0.6.src.tar.xz.sig
|
||||||
|
a35dc22cd3d983a556f6e4a63c8dac6a84e01caf SOURCES/third-party-17.0.6.src.tar.xz
|
||||||
|
12128cdab7414aeedd573c61cbc2fa82e75491db SOURCES/third-party-17.0.6.src.tar.xz.sig
|
@ -1,13 +0,0 @@
|
|||||||
diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py
|
|
||||||
index cf8a75980b53..b208ad138e89 100644
|
|
||||||
--- a/llvm/docs/conf.py
|
|
||||||
+++ b/llvm/docs/conf.py
|
|
||||||
@@ -26,7 +26,7 @@ from datetime import date
|
|
||||||
|
|
||||||
# Add any Sphinx extension module names here, as strings. They can be extensions
|
|
||||||
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
|
||||||
-extensions = ["myst_parser", "sphinx.ext.intersphinx", "sphinx.ext.todo"]
|
|
||||||
+extensions = ["sphinx.ext.intersphinx", "sphinx.ext.todo"]
|
|
||||||
|
|
||||||
# Automatic anchors for markdown titles
|
|
||||||
from llvm_slug import make_slug
|
|
893
99273.patch
893
99273.patch
@ -1,893 +0,0 @@
|
|||||||
From 91052169960477fbc39169c10f9fae3bec732510 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Carl Ritson <carl.ritson@amd.com>
|
|
||||||
Date: Wed, 17 Jul 2024 15:07:42 +0900
|
|
||||||
Subject: [PATCH 1/3] [AMDGPU] Implement workaround for GFX11.5 export priority
|
|
||||||
|
|
||||||
On GFX11.5 shaders having completed exports need to execute/wait
|
|
||||||
at a lower priority than shaders still executing exports.
|
|
||||||
Add code to maintain normal priority of 2 for shaders that export
|
|
||||||
and drop to priority 0 after exports.
|
|
||||||
---
|
|
||||||
llvm/lib/Target/AMDGPU/AMDGPU.td | 15 +-
|
|
||||||
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 112 ++++++
|
|
||||||
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
|
|
||||||
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 +
|
|
||||||
.../AMDGPU/required-export-priority.ll | 344 ++++++++++++++++++
|
|
||||||
.../AMDGPU/required-export-priority.mir | 293 +++++++++++++++
|
|
||||||
6 files changed, 765 insertions(+), 3 deletions(-)
|
|
||||||
create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
|
||||||
create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.mir
|
|
||||||
|
|
||||||
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
|
|
||||||
index dfc8eaea66f7b..14fcf6a210a78 100644
|
|
||||||
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
|
|
||||||
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
|
|
||||||
@@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
|
|
||||||
"Has restricted SOffset (immediate not supported)."
|
|
||||||
>;
|
|
||||||
|
|
||||||
+def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
|
|
||||||
+ "HasRequiredExportPriority",
|
|
||||||
+ "true",
|
|
||||||
+ "Export priority must be explicitly manipulated on GFX11.5"
|
|
||||||
+>;
|
|
||||||
+
|
|
||||||
//===------------------------------------------------------------===//
|
|
||||||
// Subtarget Features (options and debugging)
|
|
||||||
//===------------------------------------------------------------===//
|
|
||||||
@@ -1597,14 +1603,16 @@ def FeatureISAVersion11_5_0 : FeatureSet<
|
|
||||||
!listconcat(FeatureISAVersion11_Common.Features,
|
|
||||||
[FeatureSALUFloatInsts,
|
|
||||||
FeatureDPPSrc1SGPR,
|
|
||||||
- FeatureVGPRSingleUseHintInsts])>;
|
|
||||||
+ FeatureVGPRSingleUseHintInsts,
|
|
||||||
+ FeatureRequiredExportPriority])>;
|
|
||||||
|
|
||||||
def FeatureISAVersion11_5_1 : FeatureSet<
|
|
||||||
!listconcat(FeatureISAVersion11_Common.Features,
|
|
||||||
[FeatureSALUFloatInsts,
|
|
||||||
FeatureDPPSrc1SGPR,
|
|
||||||
FeatureVGPRSingleUseHintInsts,
|
|
||||||
- FeatureGFX11FullVGPRs])>;
|
|
||||||
+ FeatureGFX11FullVGPRs,
|
|
||||||
+ FeatureRequiredExportPriority])>;
|
|
||||||
|
|
||||||
def FeatureISAVersion12 : FeatureSet<
|
|
||||||
[FeatureGFX12,
|
|
||||||
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
|
|
||||||
index a402fc6d7e611..a8b171aa82840 100644
|
|
||||||
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
|
|
||||||
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
|
|
||||||
@@ -14,6 +14,7 @@
|
|
||||||
#include "GCNSubtarget.h"
|
|
||||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
||||||
#include "SIMachineFunctionInfo.h"
|
|
||||||
+#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
|
||||||
#include "llvm/CodeGen/ScheduleDAG.h"
|
|
||||||
#include "llvm/TargetParser/TargetParser.h"
|
|
||||||
@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
|
|
||||||
fixWMMAHazards(MI);
|
|
||||||
fixShift64HighRegBug(MI);
|
|
||||||
fixVALUMaskWriteHazard(MI);
|
|
||||||
+ fixRequiredExportPriority(MI);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
|
|
||||||
@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
|
|
||||||
+ const SIInstrInfo &TII) {
|
|
||||||
+ MachineBasicBlock &EntryMBB = MF->front();
|
|
||||||
+ if (EntryMBB.begin() != EntryMBB.end()) {
|
|
||||||
+ auto &EntryMI = *EntryMBB.begin();
|
|
||||||
+ if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
|
|
||||||
+ EntryMI.getOperand(0).getImm() >= Priority)
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
|
|
||||||
+ .addImm(Priority);
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
|
|
||||||
+ if (!ST.hasRequiredExportPriority())
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ // Assume the following shader types will never have exports,
|
|
||||||
+ // and avoid adding or adjusting S_SETPRIO.
|
|
||||||
+ MachineBasicBlock *MBB = MI->getParent();
|
|
||||||
+ MachineFunction *MF = MBB->getParent();
|
|
||||||
+ auto CC = MF->getFunction().getCallingConv();
|
|
||||||
+ switch (CC) {
|
|
||||||
+ case CallingConv::AMDGPU_CS:
|
|
||||||
+ case CallingConv::AMDGPU_CS_Chain:
|
|
||||||
+ case CallingConv::AMDGPU_CS_ChainPreserve:
|
|
||||||
+ case CallingConv::AMDGPU_KERNEL:
|
|
||||||
+ return false;
|
|
||||||
+ default:
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ const int MaxPriority = 3;
|
|
||||||
+ const int NormalPriority = 2;
|
|
||||||
+ const int PostExportPriority = 0;
|
|
||||||
+
|
|
||||||
+ auto It = MI->getIterator();
|
|
||||||
+ switch (MI->getOpcode()) {
|
|
||||||
+ case AMDGPU::S_ENDPGM:
|
|
||||||
+ case AMDGPU::S_ENDPGM_SAVED:
|
|
||||||
+ case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
|
|
||||||
+ case AMDGPU::SI_RETURN_TO_EPILOG:
|
|
||||||
+ // Ensure shader with calls raises priority at entry.
|
|
||||||
+ // This ensures correct priority if exports exist in callee.
|
|
||||||
+ if (MF->getFrameInfo().hasCalls())
|
|
||||||
+ return ensureEntrySetPrio(MF, NormalPriority, TII);
|
|
||||||
+ return false;
|
|
||||||
+ case AMDGPU::S_SETPRIO: {
|
|
||||||
+ // Raise minimum priority unless in workaround.
|
|
||||||
+ auto &PrioOp = MI->getOperand(0);
|
|
||||||
+ int Prio = PrioOp.getImm();
|
|
||||||
+ bool InWA = (Prio == PostExportPriority) &&
|
|
||||||
+ (It != MBB->begin() && TII.isEXP(*std::prev(It)));
|
|
||||||
+ if (InWA || Prio >= NormalPriority)
|
|
||||||
+ return false;
|
|
||||||
+ PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+ default:
|
|
||||||
+ if (!TII.isEXP(*MI))
|
|
||||||
+ return false;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ // Check entry priority at each export (as there will only be a few).
|
|
||||||
+ // Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
|
|
||||||
+ bool Changed = false;
|
|
||||||
+ if (CC != CallingConv::AMDGPU_Gfx)
|
|
||||||
+ Changed = ensureEntrySetPrio(MF, NormalPriority, TII);
|
|
||||||
+
|
|
||||||
+ auto NextMI = std::next(It);
|
|
||||||
+ bool EndOfShader = false;
|
|
||||||
+ if (NextMI != MBB->end()) {
|
|
||||||
+ // Only need WA at end of sequence of exports.
|
|
||||||
+ if (TII.isEXP(*NextMI))
|
|
||||||
+ return Changed;
|
|
||||||
+ // Assume appropriate S_SETPRIO after export means WA already applied.
|
|
||||||
+ if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
|
|
||||||
+ NextMI->getOperand(0).getImm() == PostExportPriority)
|
|
||||||
+ return Changed;
|
|
||||||
+ EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ const DebugLoc &DL = MI->getDebugLoc();
|
|
||||||
+
|
|
||||||
+ // Lower priority.
|
|
||||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
|
|
||||||
+ .addImm(PostExportPriority);
|
|
||||||
+
|
|
||||||
+ if (!EndOfShader) {
|
|
||||||
+ // Wait for exports to complete.
|
|
||||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
|
|
||||||
+ .addReg(AMDGPU::SGPR_NULL)
|
|
||||||
+ .addImm(0);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
|
|
||||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
|
|
||||||
+
|
|
||||||
+ if (!EndOfShader) {
|
|
||||||
+ // Return to normal (higher) priority.
|
|
||||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
|
|
||||||
+ .addImm(NormalPriority);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
|
|
||||||
index 3ccca527c626b..f2a64ab48e180 100644
|
|
||||||
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
|
|
||||||
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
|
|
||||||
@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
|
|
||||||
bool fixWMMAHazards(MachineInstr *MI);
|
|
||||||
bool fixShift64HighRegBug(MachineInstr *MI);
|
|
||||||
bool fixVALUMaskWriteHazard(MachineInstr *MI);
|
|
||||||
+ bool fixRequiredExportPriority(MachineInstr *MI);
|
|
||||||
|
|
||||||
int checkMAIHazards(MachineInstr *MI);
|
|
||||||
int checkMAIHazards908(MachineInstr *MI);
|
|
||||||
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
|
|
||||||
index e5817594a4521..def89c785b855 100644
|
|
||||||
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
|
|
||||||
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
|
|
||||||
@@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
|
|
||||||
bool HasVOPDInsts = false;
|
|
||||||
bool HasVALUTransUseHazard = false;
|
|
||||||
bool HasForceStoreSC0SC1 = false;
|
|
||||||
+ bool HasRequiredExportPriority = false;
|
|
||||||
|
|
||||||
// Dummy feature to use for assembler in tablegen.
|
|
||||||
bool FeatureDisable = false;
|
|
||||||
@@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
|
|
||||||
|
|
||||||
bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
|
|
||||||
|
|
||||||
+ bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
|
|
||||||
+
|
|
||||||
/// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
|
|
||||||
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
|
|
||||||
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
|
|
||||||
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000000000..377902f3f0d1a
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
|
||||||
@@ -0,0 +1,344 @@
|
|
||||||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
||||||
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_export_zeroes_f32() #0 {
|
|
||||||
+; GCN-LABEL: test_export_zeroes_f32:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v0, 0
|
|
||||||
+; GCN-NEXT: exp mrt0 off, off, off, off
|
|
||||||
+; GCN-NEXT: exp mrt0 off, off, off, off done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_export_en_src0_f32() #0 {
|
|
||||||
+; GCN-LABEL: test_export_en_src0_f32:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
|
||||||
+; GCN-NEXT: exp mrt0 v3, off, off, off done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_gs void @test_export_gs() #0 {
|
|
||||||
+; GCN-LABEL: test_export_gs:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
|
||||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_hs void @test_export_hs() #0 {
|
|
||||||
+; GCN-LABEL: test_export_hs:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
|
||||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_gfx void @test_export_gfx(float %v) #0 {
|
|
||||||
+; GCN-LABEL: test_export_gfx:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 4.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 0.5
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 2.0
|
|
||||||
+; GCN-NEXT: exp mrt0 off, v3, off, off done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_waitcnt expcnt(0)
|
|
||||||
+; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_cs void @test_export_cs() #0 {
|
|
||||||
+; GCN-LABEL: test_export_cs:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
|
||||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_kernel void @test_export_kernel() #0 {
|
|
||||||
+; GCN-LABEL: test_export_kernel:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
|
||||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_gfx void @test_no_export_gfx(float %v) #0 {
|
|
||||||
+; GCN-LABEL: test_no_export_gfx:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
||||||
+; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_no_export_ps(float %v) #0 {
|
|
||||||
+; GCN-LABEL: test_no_export_ps:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
|
||||||
+; GCN-LABEL: test_if_export_f32:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
|
||||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
|
||||||
+; GCN-NEXT: s_cbranch_execz .LBB9_2
|
|
||||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
|
||||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: .LBB9_2: ; %end
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %cc = icmp eq i32 %flag, 0
|
|
||||||
+ br i1 %cc, label %end, label %exp
|
|
||||||
+
|
|
||||||
+exp:
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
|
|
||||||
+ br label %end
|
|
||||||
+
|
|
||||||
+end:
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
|
||||||
+; GCN-LABEL: test_if_export_vm_f32:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
|
||||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
|
||||||
+; GCN-NEXT: s_cbranch_execz .LBB10_2
|
|
||||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
|
||||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: .LBB10_2: ; %end
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %cc = icmp eq i32 %flag, 0
|
|
||||||
+ br i1 %cc, label %end, label %exp
|
|
||||||
+
|
|
||||||
+exp:
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
|
|
||||||
+ br label %end
|
|
||||||
+
|
|
||||||
+end:
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
|
||||||
+; GCN-LABEL: test_if_export_done_f32:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
|
||||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
|
||||||
+; GCN-NEXT: s_cbranch_execz .LBB11_2
|
|
||||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
|
||||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: .LBB11_2: ; %end
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %cc = icmp eq i32 %flag, 0
|
|
||||||
+ br i1 %cc, label %end, label %exp
|
|
||||||
+
|
|
||||||
+exp:
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
|
|
||||||
+ br label %end
|
|
||||||
+
|
|
||||||
+end:
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
|
||||||
+; GCN-LABEL: test_if_export_vm_done_f32:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
|
||||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
|
||||||
+; GCN-NEXT: s_cbranch_execz .LBB12_2
|
|
||||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
|
||||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: .LBB12_2: ; %end
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %cc = icmp eq i32 %flag, 0
|
|
||||||
+ br i1 %cc, label %end, label %exp
|
|
||||||
+
|
|
||||||
+exp:
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
|
|
||||||
+ br label %end
|
|
||||||
+
|
|
||||||
+end:
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
|
|
||||||
+; GCN-LABEL: test_export_pos_before_param_across_load:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 1.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 0.5
|
|
||||||
+; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
||||||
+; GCN-NEXT: exp pos0 v1, v1, v1, v0 done
|
|
||||||
+; GCN-NEXT: exp invalid_target_32 v2, v2, v2, v2
|
|
||||||
+; GCN-NEXT: exp invalid_target_33 v2, v2, v2, v3
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
|
|
||||||
+ %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
|
|
||||||
+; GCN-LABEL: test_export_across_store_load:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 24
|
|
||||||
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
|
|
||||||
+; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
|
||||||
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 8, vcc_lo
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
||||||
+; GCN-NEXT: scratch_store_b32 v0, v1, off
|
|
||||||
+; GCN-NEXT: scratch_load_b32 v0, off, off
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 1.0
|
|
||||||
+; GCN-NEXT: exp pos0 v2, v2, v2, v1 done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
||||||
+; GCN-NEXT: exp invalid_target_32 v0, v2, v1, v2
|
|
||||||
+; GCN-NEXT: exp invalid_target_33 v0, v2, v1, v2
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %data0 = alloca <4 x float>, align 8, addrspace(5)
|
|
||||||
+ %data1 = alloca <4 x float>, align 8, addrspace(5)
|
|
||||||
+ %cmp = icmp eq i32 %idx, 1
|
|
||||||
+ %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
|
|
||||||
+ store float %v, ptr addrspace(5) %data, align 8
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
|
|
||||||
+ %load0 = load float, ptr addrspace(5) %data0, align 8
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_export_in_callee(float %v) #0 {
|
|
||||||
+; GCN-LABEL: test_export_in_callee:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_getpc_b64 s[0:1]
|
|
||||||
+; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
|
|
||||||
+; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
|
|
||||||
+; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
|
|
||||||
+; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
|
||||||
+; GCN-NEXT: s_mov_b32 s32, 0
|
|
||||||
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
||||||
+; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %x = fadd float %v, 1.0
|
|
||||||
+ call void @test_export_gfx(float %x)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
|
|
||||||
+; GCN-LABEL: test_export_in_callee_prio:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_mov_b32 s32, 0
|
|
||||||
+; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_getpc_b64 s[0:1]
|
|
||||||
+; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
|
|
||||||
+; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
|
|
||||||
+; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
|
||||||
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
||||||
+; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %x = fadd float %v, 1.0
|
|
||||||
+ call void @llvm.amdgcn.s.setprio(i16 0)
|
|
||||||
+ call void @test_export_gfx(float %x)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
|
|
||||||
+declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
|
|
||||||
+declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
|
|
||||||
+declare void @llvm.amdgcn.s.setprio(i16)
|
|
||||||
+
|
|
||||||
+attributes #0 = { nounwind }
|
|
||||||
+attributes #1 = { nounwind inaccessiblememonly }
|
|
||||||
+attributes #2 = { nounwind readnone }
|
|
||||||
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.mir b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000000000..eee04468036e5
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
|
|
||||||
@@ -0,0 +1,293 @@
|
|
||||||
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
|
||||||
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=post-RA-hazard-rec -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX1150 %s
|
|
||||||
+
|
|
||||||
+--- |
|
|
||||||
+ define amdgpu_ps void @end_of_shader() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @end_of_shader_return_to_epilogue() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @end_of_block() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @start_of_block() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @block_of_exports() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @sparse_exports() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @existing_setprio_1() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @existing_setprio_2() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: end_of_shader
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ ; GFX1150-LABEL: name: end_of_shader
|
|
||||||
+ ; GFX1150: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: end_of_shader_return_to_epilogue
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ ; GFX1150-LABEL: name: end_of_shader_return_to_epilogue
|
|
||||||
+ ; GFX1150: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ SI_RETURN_TO_EPILOG $vgpr0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: end_of_block
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ ; GFX1150-LABEL: name: end_of_block
|
|
||||||
+ ; GFX1150: bb.0:
|
|
||||||
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: bb.1:
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+
|
|
||||||
+ bb.1:
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: start_of_block
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ ; GFX1150-LABEL: name: start_of_block
|
|
||||||
+ ; GFX1150: bb.0:
|
|
||||||
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: bb.1:
|
|
||||||
+ ; GFX1150-NEXT: successors: %bb.2(0x80000000)
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: bb.2:
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+
|
|
||||||
+ bb.1:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+
|
|
||||||
+ bb.2:
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: block_of_exports
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ ; GFX1150-LABEL: name: block_of_exports
|
|
||||||
+ ; GFX1150: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: sparse_exports
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ ; GFX1150-LABEL: name: sparse_exports
|
|
||||||
+ ; GFX1150: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
|
||||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: existing_setprio_1
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ ; GFX1150-LABEL: name: existing_setprio_1
|
|
||||||
+ ; GFX1150: bb.0:
|
|
||||||
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: bb.1:
|
|
||||||
+ ; GFX1150-NEXT: successors: %bb.2(0x80000000)
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
|
||||||
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: bb.2:
|
|
||||||
+ ; GFX1150-NEXT: successors: %bb.3(0x80000000)
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
|
||||||
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: bb.3:
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
|
||||||
+
|
|
||||||
+ bb.1:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ S_SETPRIO 3
|
|
||||||
+ $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
|
||||||
+ S_SETPRIO 0
|
|
||||||
+
|
|
||||||
+ bb.2:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ S_SETPRIO 1
|
|
||||||
+ $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
|
|
||||||
+ S_SETPRIO 0
|
|
||||||
+
|
|
||||||
+ bb.3:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: existing_setprio_2
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ ; GFX1150-LABEL: name: existing_setprio_2
|
|
||||||
+ ; GFX1150: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
|
||||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ S_SETPRIO 3
|
|
||||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ S_SETPRIO 3
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
|
|
||||||
From 8ea44e65f2c19facff751aeb2ac960f907fb210f Mon Sep 17 00:00:00 2001
|
|
||||||
From: Carl Ritson <carl.ritson@amd.com>
|
|
||||||
Date: Wed, 17 Jul 2024 16:18:02 +0900
|
|
||||||
Subject: [PATCH 2/3] Remove -verify-machineinstrs from test.
|
|
||||||
|
|
||||||
---
|
|
||||||
llvm/test/CodeGen/AMDGPU/required-export-priority.ll | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
|
||||||
index 377902f3f0d1a..ebc209bd4d451 100644
|
|
||||||
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
|
||||||
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
||||||
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
||||||
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s
|
|
||||||
|
|
||||||
define amdgpu_ps void @test_export_zeroes_f32() #0 {
|
|
||||||
; GCN-LABEL: test_export_zeroes_f32:
|
|
46
D156379.diff
46
D156379.diff
@ -1,46 +0,0 @@
|
|||||||
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
|
|
||||||
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
|
|
||||||
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
|
|
||||||
@@ -1152,6 +1152,11 @@
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
+ // Type legalization (via getNumberOfParts) can't handle structs
|
|
||||||
+ if (TLI->getValueType(DL, Src, true) == MVT::Other)
|
|
||||||
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
|
||||||
+ CostKind);
|
|
||||||
+
|
|
||||||
unsigned NumOps =
|
|
||||||
(Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
|
|
||||||
|
|
||||||
diff --git a/llvm/test/Analysis/CostModel/SystemZ/struct-cost-crash.ll b/llvm/test/Analysis/CostModel/SystemZ/struct-cost-crash.ll
|
|
||||||
new file mode 100644
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/llvm/test/Analysis/CostModel/SystemZ/struct-cost-crash.ll
|
|
||||||
@@ -0,0 +1,25 @@
|
|
||||||
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
|
|
||||||
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output < %s | FileCheck %s
|
|
||||||
+;
|
|
||||||
+; Check that SystemZTTIImpl::getMemoryOpCost doesn't try to legalize structs,
|
|
||||||
+; which was failing llvm_unreachable in MVT::getVT.
|
|
||||||
+
|
|
||||||
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
|
|
||||||
+target triple = "s390x-unknown-linux-gnu"
|
|
||||||
+
|
|
||||||
+declare { i64, i32 } @bar()
|
|
||||||
+
|
|
||||||
+define i8 @foo() {
|
|
||||||
+; CHECK-LABEL: 'foo'
|
|
||||||
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %1
|
|
||||||
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call { i64, i32 } @bar()
|
|
||||||
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store { i64, i32 } %2, ptr inttoptr (i64 16 to ptr), align 16
|
|
||||||
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %1
|
|
||||||
+;
|
|
||||||
+ br label %1
|
|
||||||
+
|
|
||||||
+1: ; preds = %1, %0
|
|
||||||
+ %2 = call { i64, i32 } @bar()
|
|
||||||
+ store { i64, i32 } %2, ptr inttoptr (i64 16 to ptr), align 16
|
|
||||||
+ br label %1
|
|
||||||
+}
|
|
||||||
|
|
98
SOURCES/0001-DAG-Fix-crash-in-replaceStoreOfInsertLoad.patch
Normal file
98
SOURCES/0001-DAG-Fix-crash-in-replaceStoreOfInsertLoad.patch
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
From dbc6b9344bde269a2499d47e7f08c172a88f289a Mon Sep 17 00:00:00 2001
|
||||||
|
From: pvanhout <pierre.vanhoutryve@amd.com>
|
||||||
|
Date: Thu, 3 Aug 2023 10:53:08 +0200
|
||||||
|
Subject: [PATCH] [DAG] Fix crash in replaceStoreOfInsertLoad
|
||||||
|
|
||||||
|
Idx's type can be different from Ptr's, causing a "Binary operator types must match" assertion failure when emitting the MUL.
|
||||||
|
|
||||||
|
Reviewed By: arsenm
|
||||||
|
|
||||||
|
Differential Revision: https://reviews.llvm.org/D156972
|
||||||
|
|
||||||
|
(cherry picked from commit 98ccc70b93a39a7ea3e26f7f5b5fe40d39b5a7e5)
|
||||||
|
---
|
||||||
|
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +-
|
||||||
|
.../AMDGPU/replace-store-of-insert-load.ll | 58 +++++++++++++++++++
|
||||||
|
2 files changed, 59 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll
|
||||||
|
|
||||||
|
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||||
|
index 235f0da86b90..dbc8be3c52b8 100644
|
||||||
|
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||||
|
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||||
|
@@ -20517,7 +20517,7 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
|
||||||
|
EVT PtrVT = Ptr.getValueType();
|
||||||
|
|
||||||
|
SDValue Offset =
|
||||||
|
- DAG.getNode(ISD::MUL, DL, PtrVT, Idx,
|
||||||
|
+ DAG.getNode(ISD::MUL, DL, PtrVT, DAG.getZExtOrTrunc(Idx, DL, PtrVT),
|
||||||
|
DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT));
|
||||||
|
SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, Offset);
|
||||||
|
MachinePointerInfo PointerInfo(ST->getAddressSpace());
|
||||||
|
diff --git a/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll b/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000000..35a602af68c0
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll
|
||||||
|
@@ -0,0 +1,58 @@
|
||||||
|
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
||||||
|
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
|
||||||
|
+
|
||||||
|
+; Regression test for a bug in `DAGCombiner::replaceStoreOfInsertLoad` where
|
||||||
|
+; Idx could be smaller than PtrVT, causing a MUL to be emitted with inconsistent
|
||||||
|
+; LHS/RHS types.
|
||||||
|
+
|
||||||
|
+define void @testcase_0(ptr addrspace(1) %in, float %arg) {
|
||||||
|
+; CHECK-LABEL: testcase_0:
|
||||||
|
+; CHECK: ; %bb.0:
|
||||||
|
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
|
+; CHECK-NEXT: global_store_dword v[0:1], v2, off offset:12
|
||||||
|
+; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||||
|
+; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
+ %loaded = load <4 x float>, ptr addrspace(1) %in
|
||||||
|
+ %modified = insertelement <4 x float> %loaded, float %arg, i64 3
|
||||||
|
+ store <4 x float> %modified, ptr addrspace(1) %in
|
||||||
|
+ ret void
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+define void @testcase_1(ptr addrspace(1) %in, float %arg) {
|
||||||
|
+; CHECK-LABEL: testcase_1:
|
||||||
|
+; CHECK: ; %bb.0:
|
||||||
|
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
|
+; CHECK-NEXT: global_store_dword v[0:1], v2, off offset:16
|
||||||
|
+; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||||
|
+; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
+ %loaded = load <6 x float>, ptr addrspace(1) %in
|
||||||
|
+ %modified = insertelement <6 x float> %loaded, float %arg, i64 4
|
||||||
|
+ store <6 x float> %modified, ptr addrspace(1) %in
|
||||||
|
+ ret void
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+define void @testcase_2(ptr addrspace(1) %in, double %arg) {
|
||||||
|
+; CHECK-LABEL: testcase_2:
|
||||||
|
+; CHECK: ; %bb.0:
|
||||||
|
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
|
+; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:8
|
||||||
|
+; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||||
|
+; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
+ %loaded = load <4 x double>, ptr addrspace(1) %in
|
||||||
|
+ %modified = insertelement <4 x double> %loaded, double %arg, i64 1
|
||||||
|
+ store <4 x double> %modified, ptr addrspace(1) %in
|
||||||
|
+ ret void
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+define void @testcase_3(ptr addrspace(1) %in, double %arg) {
|
||||||
|
+; CHECK-LABEL: testcase_3:
|
||||||
|
+; CHECK: ; %bb.0:
|
||||||
|
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||||
|
+; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:56
|
||||||
|
+; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||||
|
+; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||||
|
+ %loaded = load <8 x double>, ptr addrspace(1) %in
|
||||||
|
+ %modified = insertelement <8 x double> %loaded, double %arg, i64 7
|
||||||
|
+ store <8 x double> %modified, ptr addrspace(1) %in
|
||||||
|
+ ret void
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.44.0
|
||||||
|
|
74
SOURCES/0001-PEI-Don-t-zero-out-noreg-operands.patch
Normal file
74
SOURCES/0001-PEI-Don-t-zero-out-noreg-operands.patch
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
From 9d1f05a7b8537deb5f626cd1b7b26ef2678f4c8e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Arthur Eubanks <aeubanks@google.com>
|
||||||
|
Date: Thu, 27 Jul 2023 13:27:58 -0700
|
||||||
|
Subject: [PATCH] [PEI] Don't zero out noreg operands
|
||||||
|
|
||||||
|
A tail call may have $noreg operands.
|
||||||
|
|
||||||
|
Fixes a crash.
|
||||||
|
|
||||||
|
Reviewed By: xgupta
|
||||||
|
|
||||||
|
Differential Revision: https://reviews.llvm.org/D156485
|
||||||
|
|
||||||
|
(cherry picked from commit f800c1f3b207e7bcdc8b4c7192928d9a078242a0)
|
||||||
|
---
|
||||||
|
llvm/lib/CodeGen/PrologEpilogInserter.cpp | 9 +++++++--
|
||||||
|
llvm/test/CodeGen/X86/zero-call-used-regs.ll | 14 ++++++++++++++
|
||||||
|
2 files changed, 21 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
|
||||||
|
index e323aaaeefaf..49047719fdaa 100644
|
||||||
|
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
|
||||||
|
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
|
||||||
|
@@ -1285,6 +1285,8 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
|
||||||
|
continue;
|
||||||
|
|
||||||
|
MCRegister Reg = MO.getReg();
|
||||||
|
+ if (!Reg)
|
||||||
|
+ continue;
|
||||||
|
|
||||||
|
// This picks up sibling registers (e.q. %al -> %ah).
|
||||||
|
for (MCRegUnit Unit : TRI.regunits(Reg))
|
||||||
|
@@ -1308,8 +1310,11 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
|
||||||
|
if (!MO.isReg())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
- for (const MCPhysReg &Reg :
|
||||||
|
- TRI.sub_and_superregs_inclusive(MO.getReg()))
|
||||||
|
+ MCRegister Reg = MO.getReg();
|
||||||
|
+ if (!Reg)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ for (const MCPhysReg Reg : TRI.sub_and_superregs_inclusive(Reg))
|
||||||
|
RegsToZero.reset(Reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs.ll b/llvm/test/CodeGen/X86/zero-call-used-regs.ll
|
||||||
|
index 63d51c916bb9..97ad5ce9c8cb 100644
|
||||||
|
--- a/llvm/test/CodeGen/X86/zero-call-used-regs.ll
|
||||||
|
+++ b/llvm/test/CodeGen/X86/zero-call-used-regs.ll
|
||||||
|
@@ -241,6 +241,20 @@ entry:
|
||||||
|
ret i32 %x
|
||||||
|
}
|
||||||
|
|
||||||
|
+define dso_local void @tailcall(ptr %p) local_unnamed_addr #0 "zero-call-used-regs"="used-gpr" {
|
||||||
|
+; I386-LABEL: tailcall:
|
||||||
|
+; I386: # %bb.0:
|
||||||
|
+; I386-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
|
+; I386-NEXT: jmpl *(%eax) # TAILCALL
|
||||||
|
+;
|
||||||
|
+; X86-64-LABEL: tailcall:
|
||||||
|
+; X86-64: # %bb.0:
|
||||||
|
+; X86-64-NEXT: jmpq *(%rdi) # TAILCALL
|
||||||
|
+ %c = load ptr, ptr %p
|
||||||
|
+ tail call void %c()
|
||||||
|
+ ret void
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
; Don't emit zeroing registers in "main" function.
|
||||||
|
define dso_local i32 @main() local_unnamed_addr #1 {
|
||||||
|
; I386-LABEL: main:
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
26
SOURCES/0101-Deactivate-markdown-doc.patch
Normal file
26
SOURCES/0101-Deactivate-markdown-doc.patch
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
diff -Naur a/llvm/docs/conf.py b/llvm/docs/conf.py
|
||||||
|
--- a/llvm/docs/conf.py 2020-09-15 09:12:24.318287611 +0000
|
||||||
|
+++ b/llvm/docs/conf.py 2020-09-15 15:01:00.025893199 +0000
|
||||||
|
@@ -36,21 +36,7 @@
|
||||||
|
".rst": "restructuredtext",
|
||||||
|
}
|
||||||
|
|
||||||
|
-try:
|
||||||
|
- import recommonmark
|
||||||
|
-except ImportError:
|
||||||
|
- # manpages do not use any .md sources
|
||||||
|
- if not tags.has("builder-man"):
|
||||||
|
- raise
|
||||||
|
-else:
|
||||||
|
- import sphinx
|
||||||
|
-
|
||||||
|
- if sphinx.version_info >= (3, 0):
|
||||||
|
- # This requires 0.5 or later.
|
||||||
|
- extensions.append("recommonmark")
|
||||||
|
- else:
|
||||||
|
- source_parsers = {".md": "recommonmark.parser.CommonMarkParser"}
|
||||||
|
- source_suffix[".md"] = "markdown"
|
||||||
|
+import sphinx
|
||||||
|
|
||||||
|
# The encoding of source files.
|
||||||
|
# source_encoding = 'utf-8-sig'
|
1034
SPECS/llvm.spec
Normal file
1034
SPECS/llvm.spec
Normal file
File diff suppressed because it is too large
Load Diff
20
gating.yaml
20
gating.yaml
@ -1,20 +0,0 @@
|
|||||||
--- !Policy
|
|
||||||
product_versions:
|
|
||||||
- fedora-*
|
|
||||||
decision_context: bodhi_update_push_testing
|
|
||||||
rules:
|
|
||||||
- !PassingTestCaseRule {test_case_name: fedora-ci.koji-build.tier0.functional}
|
|
||||||
--- !Policy
|
|
||||||
product_versions:
|
|
||||||
- fedora-*
|
|
||||||
decision_context: bodhi_update_push_stable
|
|
||||||
rules:
|
|
||||||
- !PassingTestCaseRule {test_case_name: fedora-ci.koji-build.tier0.functional}
|
|
||||||
--- !Policy
|
|
||||||
product_versions:
|
|
||||||
- rhel-9
|
|
||||||
decision_context: osci_compose_gate
|
|
||||||
rules:
|
|
||||||
- !PassingTestCaseRule {test_case_name: baseos-ci.brew-build.tier0-tmt-x86_64-aarch64.functional}
|
|
||||||
- !PassingTestCaseRule {test_case_name: baseos-ci.brew-build.tier0-tmt-s390x-ppc64le.functional}
|
|
||||||
- !PassingTestCaseRule {test_case_name: osci.brew-build.rebuild.validation}
|
|
@ -1,18 +0,0 @@
|
|||||||
# This library has no dependencies.
|
|
||||||
addFilter("llvm-libs.x86_64: E: shared-lib-without-dependency-information /usr/lib64/libRemarks.so.[0-9]+")
|
|
||||||
addFilter("llvm-googletest.x86_64: W: devel-file-in-non-devel-package")
|
|
||||||
|
|
||||||
# same for llvm-test utilities
|
|
||||||
addFilter("llvm-test.x86_64: W: no-manual-page-for-binary")
|
|
||||||
|
|
||||||
# Don't warn about libs in llvm-libs
|
|
||||||
addFilter("llvm-libs.x86_64: W: devel-file-in-non-devel-package /usr/lib64/lib")
|
|
||||||
# These is ok in the llvm gold plugin
|
|
||||||
addFilter("llvm-libs.x86_64: W: shared-lib-calls-exit /usr/lib64/LLVMgold.so")
|
|
||||||
addFilter("llvm-libs.x86_64: W: no-soname /usr/lib64/LLVMgold.so")
|
|
||||||
|
|
||||||
# These are without documentation
|
|
||||||
addFilter("llvm-googletest.x86_64: W: no-documentation")
|
|
||||||
addFilter("llvm-libs.x86_64: W: no-documentation")
|
|
||||||
addFilter("llvm-static.x86_64: W: no-documentation")
|
|
||||||
addFilter("llvm-test.x86_64: W: no-documentation")
|
|
8
sources
8
sources
@ -1,8 +0,0 @@
|
|||||||
SHA512 (cmake-18.1.8.src.tar.xz) = e02243b491f9e688db28d7b53270fcf87debf09d3c95b136a7c7b96e26890de68712c60a1e85f5a448a95ad8c81f2d8ae77047780822443bbe39f1a9e6211007
|
|
||||||
SHA512 (cmake-18.1.8.src.tar.xz.sig) = 99191e95130fe4363a8db8f411a0e61af0549ad182a1280f99f0dd3ee679a321b993d103c6915d535a55d9f8a4d7fea86b7fdcc77605e02150e8edf1e18dee57
|
|
||||||
SHA512 (llvm-18.1.8.src.tar.xz) = 930814730bb2d80cf7f7b2968f0f1f1442009ca62a7ca29992b69d63823270584b059d16aa845bb381411da566e7e4f255fcfbc38acbdf865eb0419b4dfd7459
|
|
||||||
SHA512 (llvm-18.1.8.src.tar.xz.sig) = aab7cb61a6b5dd3776a9b306d91d08763710725b72ba6a4263d3cca5ae5959e3b073b27dbfd95f9a53a78600c6f414e2fd1cc0dbe3176d7cf142996f7af700ca
|
|
||||||
SHA512 (third-party-18.1.8.src.tar.xz) = bedaa5d29ebeaf0ee1c700eb8492d0fef185e7c16528202927c81117d94fadd568829aa0e1873e1217e8e72866f3876a9681bbdb2a6a0a5466fc911f7b3620d4
|
|
||||||
SHA512 (third-party-18.1.8.src.tar.xz.sig) = 32c4d779a56a3908b291a4f0cf1df72ccb86b55439ad66f9cbad1b48a77cb92b129b131806d2914d0e63cb319cde3181a2c03b75856ec36cee5f88120bb58214
|
|
||||||
SHA512 (llvm-17.0.6.src.tar.xz) = bf9b04d0d45c67168b195c550cd8326e3a01176f92776705846aad3956a494bcb7a053b0b0bde19abd68dc0068e5c97ef99dee7eadfdb727bc0d758b2684f3bd
|
|
||||||
SHA512 (llvm-17.0.6.src.tar.xz.sig) = 904066c34ec0adf5b9e789af640329cadc7919b111aca77fa3ce26450696bace20e299e2592251f96ee33fb83da603423cc0ca63a67ad627916fcab0bed59689
|
|
@ -1,6 +0,0 @@
|
|||||||
# Gating testplans for LLVM
|
|
||||||
|
|
||||||
The tests for LLVM are in a separate repo: https://src.fedoraproject.org/tests/llvm
|
|
||||||
This directory should contain only fmf plans (such as build-gating.fmf) which import
|
|
||||||
the tests from the tests repo. This can be done using the "url" parameter of the
|
|
||||||
plan's "discover" step. Reference: https://tmt.readthedocs.io/en/stable/spec/plans.html#fmf
|
|
@ -1,51 +0,0 @@
|
|||||||
#
|
|
||||||
# Build/PR gating tests for *LLVM 13*
|
|
||||||
#
|
|
||||||
# Imports and runs tests provided by Fedora LLVM git for the matching LLVM version.
|
|
||||||
#
|
|
||||||
# NOTE: *always* keep this file in sync with upstream, i.e. Fedora. Since we cannot "discover" a plan,
|
|
||||||
# we must duplicate at least some part of upstream plan setup, like `adjust` or `provision`. Not necessarily
|
|
||||||
# all steps, btu if we do need some of them here, let's focus on making changes in upstream first, to preserve
|
|
||||||
# one source of truth. Once TMT learns to include whole plans, we could drop the copied content from here.
|
|
||||||
#
|
|
||||||
|
|
||||||
summary: LLVM tests for build/PR gating
|
|
||||||
|
|
||||||
adjust:
|
|
||||||
- because: "Plan to be ran when either executed locally, or executed by CI system to gate a build or PR."
|
|
||||||
when: >-
|
|
||||||
trigger is defined
|
|
||||||
and trigger != commit
|
|
||||||
and trigger != build
|
|
||||||
enabled: false
|
|
||||||
|
|
||||||
# Unfortunately, TMT does not support more declarative approach, we need to run commands on our own.
|
|
||||||
- because: "On RHEL, CRB must be enabled to provide rarer packages"
|
|
||||||
when: >-
|
|
||||||
distro == rhel-9
|
|
||||||
or distro == rhel-8
|
|
||||||
prepare+:
|
|
||||||
- name: Enable CRB
|
|
||||||
how: shell
|
|
||||||
script: dnf config-manager --set-enabled rhel-CRB
|
|
||||||
- because: "On CentOS, CRB must be enabled to provide rarer packages"
|
|
||||||
when: >-
|
|
||||||
distro == centos
|
|
||||||
prepare+:
|
|
||||||
- name: Enable CRB
|
|
||||||
how: shell
|
|
||||||
script: dnf config-manager --set-enabled crb
|
|
||||||
|
|
||||||
discover:
|
|
||||||
- name: "Upstream LLVM tests for build/PR gating"
|
|
||||||
how: fmf
|
|
||||||
url: https://src.fedoraproject.org/tests/llvm.git
|
|
||||||
ref: main
|
|
||||||
filter: "tag:-spoils-installation"
|
|
||||||
|
|
||||||
execute:
|
|
||||||
how: tmt
|
|
||||||
|
|
||||||
provision:
|
|
||||||
hardware:
|
|
||||||
memory: ">= 4 GiB"
|
|
Loading…
Reference in New Issue
Block a user