Compare commits
No commits in common. "c8-stream-rhel8" and "a9-beta" have entirely different histories.
c8-stream-
...
a9-beta
12
.gitignore
vendored
12
.gitignore
vendored
@ -1,6 +1,6 @@
|
|||||||
SOURCES/cmake-18.1.8.src.tar.xz
|
SOURCES/cmake-16.0.6.src.tar.xz
|
||||||
SOURCES/cmake-18.1.8.src.tar.xz.sig
|
SOURCES/cmake-16.0.6.src.tar.xz.sig
|
||||||
SOURCES/llvm-18.1.8.src.tar.xz
|
SOURCES/llvm-16.0.6.src.tar.xz
|
||||||
SOURCES/llvm-18.1.8.src.tar.xz.sig
|
SOURCES/llvm-16.0.6.src.tar.xz.sig
|
||||||
SOURCES/third-party-18.1.8.src.tar.xz
|
SOURCES/third-party-16.0.6.src.tar.xz
|
||||||
SOURCES/third-party-18.1.8.src.tar.xz.sig
|
SOURCES/third-party-16.0.6.src.tar.xz.sig
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
1ea03e355b705b4cada3051bd7301a57daa19283 SOURCES/cmake-18.1.8.src.tar.xz
|
0de534cfef38697e115c3ae80634765f05e78e5b SOURCES/cmake-16.0.6.src.tar.xz
|
||||||
33c2f4327abc20c6098be064ab6bbc15536031f2 SOURCES/cmake-18.1.8.src.tar.xz.sig
|
2db5c88fe9277bb0fa85f49b58e946e49ff235c2 SOURCES/cmake-16.0.6.src.tar.xz.sig
|
||||||
f9befa4cbef3f688ab48fca42449e13c5bcb872d SOURCES/llvm-18.1.8.src.tar.xz
|
072d2fb4b10f95d06189de00eb7f7e9b35c54e9a SOURCES/llvm-16.0.6.src.tar.xz
|
||||||
8310ebfda8205233b5ecb6baa7f5272efae31155 SOURCES/llvm-18.1.8.src.tar.xz.sig
|
bfc74b3868c69ce674a583c91e938b6d4cf0fded SOURCES/llvm-16.0.6.src.tar.xz.sig
|
||||||
ada9cf5deaec0a730c751ffd84145acedc6eafeb SOURCES/third-party-18.1.8.src.tar.xz
|
5b1a58de6ed9d154a38edb6386a5749576e0b96a SOURCES/third-party-16.0.6.src.tar.xz
|
||||||
b87b233f778b610a7f8ed1cf9aea4112dfcd7a06 SOURCES/third-party-18.1.8.src.tar.xz.sig
|
51ad6a8ccc5ccd40faff6f1c98a2f33a9b600f88 SOURCES/third-party-16.0.6.src.tar.xz.sig
|
||||||
|
25
SOURCES/0001-Deactivate-markdown-doc.patch
Normal file
25
SOURCES/0001-Deactivate-markdown-doc.patch
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
diff -Naur a/llvm/docs/conf.py b/llvm/docs/conf.py
|
||||||
|
--- a/llvm/docs/conf.py 2020-09-15 09:12:24.318287611 +0000
|
||||||
|
+++ b/llvm/docs/conf.py 2020-09-15 15:01:00.025893199 +0000
|
||||||
|
@@ -36,20 +36,7 @@
|
||||||
|
'.rst': 'restructuredtext',
|
||||||
|
}
|
||||||
|
|
||||||
|
-try:
|
||||||
|
- import recommonmark
|
||||||
|
-except ImportError:
|
||||||
|
- # manpages do not use any .md sources
|
||||||
|
- if not tags.has('builder-man'):
|
||||||
|
- raise
|
||||||
|
-else:
|
||||||
|
- import sphinx
|
||||||
|
- if sphinx.version_info >= (3, 0):
|
||||||
|
- # This requires 0.5 or later.
|
||||||
|
- extensions.append('recommonmark')
|
||||||
|
- else:
|
||||||
|
- source_parsers = {'.md': 'recommonmark.parser.CommonMarkParser'}
|
||||||
|
- source_suffix['.md'] = 'markdown'
|
||||||
|
+import sphinx
|
||||||
|
|
||||||
|
# The encoding of source files.
|
||||||
|
#source_encoding = 'utf-8-sig'
|
@ -0,0 +1,184 @@
|
|||||||
|
From efbaf8bc61f4c0e29a3eaafb11ac0ddda8bd3dff Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
|
||||||
|
Date: Fri, 30 Jun 2023 16:02:56 +0200
|
||||||
|
Subject: [PATCH] [SystemZ] Improve error messages for unsupported relocations
|
||||||
|
|
||||||
|
In the SystemZMCObjectWriter, we currently just abort in case
|
||||||
|
some unsupported relocation in requested. However, as this
|
||||||
|
situation can be triggered by invalid (inline) assembler input,
|
||||||
|
we should really get a regular error message instead.
|
||||||
|
---
|
||||||
|
.../MCTargetDesc/SystemZMCObjectWriter.cpp | 59 +++++++++++--------
|
||||||
|
1 file changed, 35 insertions(+), 24 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
|
||||||
|
index c23463ab9bde..0b11468afc52 100644
|
||||||
|
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
|
||||||
|
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
|
||||||
|
@@ -9,6 +9,7 @@
|
||||||
|
#include "MCTargetDesc/SystemZMCFixups.h"
|
||||||
|
#include "MCTargetDesc/SystemZMCTargetDesc.h"
|
||||||
|
#include "llvm/BinaryFormat/ELF.h"
|
||||||
|
+#include "llvm/MC/MCContext.h"
|
||||||
|
#include "llvm/MC/MCELFObjectWriter.h"
|
||||||
|
#include "llvm/MC/MCExpr.h"
|
||||||
|
#include "llvm/MC/MCFixup.h"
|
||||||
|
@@ -40,7 +41,7 @@ SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
|
||||||
|
/*HasRelocationAddend_=*/ true) {}
|
||||||
|
|
||||||
|
// Return the relocation type for an absolute value of MCFixupKind Kind.
|
||||||
|
-static unsigned getAbsoluteReloc(unsigned Kind) {
|
||||||
|
+static unsigned getAbsoluteReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
|
||||||
|
switch (Kind) {
|
||||||
|
case FK_Data_1: return ELF::R_390_8;
|
||||||
|
case FK_Data_2: return ELF::R_390_16;
|
||||||
|
@@ -49,11 +50,12 @@ static unsigned getAbsoluteReloc(unsigned Kind) {
|
||||||
|
case SystemZ::FK_390_12: return ELF::R_390_12;
|
||||||
|
case SystemZ::FK_390_20: return ELF::R_390_20;
|
||||||
|
}
|
||||||
|
- llvm_unreachable("Unsupported absolute address");
|
||||||
|
+ Ctx.reportError(Loc, "Unsupported absolute address");
|
||||||
|
+ return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the relocation type for a PC-relative value of MCFixupKind Kind.
|
||||||
|
-static unsigned getPCRelReloc(unsigned Kind) {
|
||||||
|
+static unsigned getPCRelReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
|
||||||
|
switch (Kind) {
|
||||||
|
case FK_Data_2: return ELF::R_390_PC16;
|
||||||
|
case FK_Data_4: return ELF::R_390_PC32;
|
||||||
|
@@ -63,62 +65,69 @@ static unsigned getPCRelReloc(unsigned Kind) {
|
||||||
|
case SystemZ::FK_390_PC24DBL: return ELF::R_390_PC24DBL;
|
||||||
|
case SystemZ::FK_390_PC32DBL: return ELF::R_390_PC32DBL;
|
||||||
|
}
|
||||||
|
- llvm_unreachable("Unsupported PC-relative address");
|
||||||
|
+ Ctx.reportError(Loc, "Unsupported PC-relative address");
|
||||||
|
+ return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the R_390_TLS_LE* relocation type for MCFixupKind Kind.
|
||||||
|
-static unsigned getTLSLEReloc(unsigned Kind) {
|
||||||
|
+static unsigned getTLSLEReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
|
||||||
|
switch (Kind) {
|
||||||
|
case FK_Data_4: return ELF::R_390_TLS_LE32;
|
||||||
|
case FK_Data_8: return ELF::R_390_TLS_LE64;
|
||||||
|
}
|
||||||
|
- llvm_unreachable("Unsupported absolute address");
|
||||||
|
+ Ctx.reportError(Loc, "Unsupported thread-local address (local-exec)");
|
||||||
|
+ return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the R_390_TLS_LDO* relocation type for MCFixupKind Kind.
|
||||||
|
-static unsigned getTLSLDOReloc(unsigned Kind) {
|
||||||
|
+static unsigned getTLSLDOReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
|
||||||
|
switch (Kind) {
|
||||||
|
case FK_Data_4: return ELF::R_390_TLS_LDO32;
|
||||||
|
case FK_Data_8: return ELF::R_390_TLS_LDO64;
|
||||||
|
}
|
||||||
|
- llvm_unreachable("Unsupported absolute address");
|
||||||
|
+ Ctx.reportError(Loc, "Unsupported thread-local address (local-dynamic)");
|
||||||
|
+ return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the R_390_TLS_LDM* relocation type for MCFixupKind Kind.
|
||||||
|
-static unsigned getTLSLDMReloc(unsigned Kind) {
|
||||||
|
+static unsigned getTLSLDMReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
|
||||||
|
switch (Kind) {
|
||||||
|
case FK_Data_4: return ELF::R_390_TLS_LDM32;
|
||||||
|
case FK_Data_8: return ELF::R_390_TLS_LDM64;
|
||||||
|
case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_LDCALL;
|
||||||
|
}
|
||||||
|
- llvm_unreachable("Unsupported absolute address");
|
||||||
|
+ Ctx.reportError(Loc, "Unsupported thread-local address (local-dynamic)");
|
||||||
|
+ return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the R_390_TLS_GD* relocation type for MCFixupKind Kind.
|
||||||
|
-static unsigned getTLSGDReloc(unsigned Kind) {
|
||||||
|
+static unsigned getTLSGDReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
|
||||||
|
switch (Kind) {
|
||||||
|
case FK_Data_4: return ELF::R_390_TLS_GD32;
|
||||||
|
case FK_Data_8: return ELF::R_390_TLS_GD64;
|
||||||
|
case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_GDCALL;
|
||||||
|
}
|
||||||
|
- llvm_unreachable("Unsupported absolute address");
|
||||||
|
+ Ctx.reportError(Loc, "Unsupported thread-local address (general-dynamic)");
|
||||||
|
+ return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the PLT relocation counterpart of MCFixupKind Kind.
|
||||||
|
-static unsigned getPLTReloc(unsigned Kind) {
|
||||||
|
+static unsigned getPLTReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
|
||||||
|
switch (Kind) {
|
||||||
|
case SystemZ::FK_390_PC12DBL: return ELF::R_390_PLT12DBL;
|
||||||
|
case SystemZ::FK_390_PC16DBL: return ELF::R_390_PLT16DBL;
|
||||||
|
case SystemZ::FK_390_PC24DBL: return ELF::R_390_PLT24DBL;
|
||||||
|
case SystemZ::FK_390_PC32DBL: return ELF::R_390_PLT32DBL;
|
||||||
|
}
|
||||||
|
- llvm_unreachable("Unsupported absolute address");
|
||||||
|
+ Ctx.reportError(Loc, "Unsupported PC-relative PLT address");
|
||||||
|
+ return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
|
||||||
|
const MCValue &Target,
|
||||||
|
const MCFixup &Fixup,
|
||||||
|
bool IsPCRel) const {
|
||||||
|
+ SMLoc Loc = Fixup.getLoc();
|
||||||
|
unsigned Kind = Fixup.getKind();
|
||||||
|
if (Kind >= FirstLiteralRelocationKind)
|
||||||
|
return Kind - FirstLiteralRelocationKind;
|
||||||
|
@@ -126,38 +135,40 @@ unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
|
||||||
|
switch (Modifier) {
|
||||||
|
case MCSymbolRefExpr::VK_None:
|
||||||
|
if (IsPCRel)
|
||||||
|
- return getPCRelReloc(Kind);
|
||||||
|
- return getAbsoluteReloc(Kind);
|
||||||
|
+ return getPCRelReloc(Ctx, Loc, Kind);
|
||||||
|
+ return getAbsoluteReloc(Ctx, Loc, Kind);
|
||||||
|
|
||||||
|
case MCSymbolRefExpr::VK_NTPOFF:
|
||||||
|
assert(!IsPCRel && "NTPOFF shouldn't be PC-relative");
|
||||||
|
- return getTLSLEReloc(Kind);
|
||||||
|
+ return getTLSLEReloc(Ctx, Loc, Kind);
|
||||||
|
|
||||||
|
case MCSymbolRefExpr::VK_INDNTPOFF:
|
||||||
|
if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
|
||||||
|
return ELF::R_390_TLS_IEENT;
|
||||||
|
- llvm_unreachable("Only PC-relative INDNTPOFF accesses are supported for now");
|
||||||
|
+ Ctx.reportError(Loc, "Only PC-relative INDNTPOFF accesses are supported for now");
|
||||||
|
+ return 0;
|
||||||
|
|
||||||
|
case MCSymbolRefExpr::VK_DTPOFF:
|
||||||
|
assert(!IsPCRel && "DTPOFF shouldn't be PC-relative");
|
||||||
|
- return getTLSLDOReloc(Kind);
|
||||||
|
+ return getTLSLDOReloc(Ctx, Loc, Kind);
|
||||||
|
|
||||||
|
case MCSymbolRefExpr::VK_TLSLDM:
|
||||||
|
assert(!IsPCRel && "TLSLDM shouldn't be PC-relative");
|
||||||
|
- return getTLSLDMReloc(Kind);
|
||||||
|
+ return getTLSLDMReloc(Ctx, Loc, Kind);
|
||||||
|
|
||||||
|
case MCSymbolRefExpr::VK_TLSGD:
|
||||||
|
assert(!IsPCRel && "TLSGD shouldn't be PC-relative");
|
||||||
|
- return getTLSGDReloc(Kind);
|
||||||
|
+ return getTLSGDReloc(Ctx, Loc, Kind);
|
||||||
|
|
||||||
|
case MCSymbolRefExpr::VK_GOT:
|
||||||
|
if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
|
||||||
|
return ELF::R_390_GOTENT;
|
||||||
|
- llvm_unreachable("Only PC-relative GOT accesses are supported for now");
|
||||||
|
+ Ctx.reportError(Loc, "Only PC-relative GOT accesses are supported for now");
|
||||||
|
+ return 0;
|
||||||
|
|
||||||
|
case MCSymbolRefExpr::VK_PLT:
|
||||||
|
- assert(IsPCRel && "@PLT shouldt be PC-relative");
|
||||||
|
- return getPLTReloc(Kind);
|
||||||
|
+ assert(IsPCRel && "@PLT shouldn't be PC-relative");
|
||||||
|
+ return getPLTReloc(Ctx, Loc, Kind);
|
||||||
|
|
||||||
|
default:
|
||||||
|
llvm_unreachable("Modifier not supported");
|
||||||
|
--
|
||||||
|
2.41.0
|
||||||
|
|
32
SOURCES/0001-llvm-Add-install-targets-for-gtest.patch
Normal file
32
SOURCES/0001-llvm-Add-install-targets-for-gtest.patch
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
From 8cc3870f09d728d9017c72eba9520117a4283fee Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Stellard <tstellar@redhat.com>
|
||||||
|
Date: Thu, 17 Nov 2022 09:01:10 +0000
|
||||||
|
Subject: Add install targets for gtest
|
||||||
|
|
||||||
|
Stand-alone builds need an installed version of gtest in order to run
|
||||||
|
the unittests.
|
||||||
|
|
||||||
|
Differential Revision: https://reviews.llvm.org/D137890
|
||||||
|
---
|
||||||
|
llvm/CMakeLists.txt | 5 +++++
|
||||||
|
1 file changed, 5 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
|
||||||
|
index 60e1f29620af..d91338532815 100644
|
||||||
|
--- a/llvm/CMakeLists.txt
|
||||||
|
+++ b/llvm/CMakeLists.txt
|
||||||
|
@@ -693,6 +693,11 @@ option(LLVM_BUILD_TESTS
|
||||||
|
"Build LLVM unit tests. If OFF, just generate build targets." OFF)
|
||||||
|
option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON)
|
||||||
|
|
||||||
|
+option(LLVM_INSTALL_GTEST
|
||||||
|
+ "Install the llvm gtest library. This should be on if you want to do
|
||||||
|
+ stand-alone builds of the other projects and run their unit tests." OFF)
|
||||||
|
+
|
||||||
|
+
|
||||||
|
option(LLVM_BUILD_BENCHMARKS "Add LLVM benchmark targets to the list of default
|
||||||
|
targets. If OFF, benchmarks still could be built using Benchmarks target." OFF)
|
||||||
|
option(LLVM_INCLUDE_BENCHMARKS "Generate benchmark targets. If OFF, benchmarks can't be built." ON)
|
||||||
|
--
|
||||||
|
2.34.3
|
||||||
|
|
@ -1,13 +0,0 @@
|
|||||||
diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py
|
|
||||||
index cf8a75980b53..b208ad138e89 100644
|
|
||||||
--- a/llvm/docs/conf.py
|
|
||||||
+++ b/llvm/docs/conf.py
|
|
||||||
@@ -26,7 +26,7 @@ from datetime import date
|
|
||||||
|
|
||||||
# Add any Sphinx extension module names here, as strings. They can be extensions
|
|
||||||
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
|
||||||
-extensions = ["myst_parser", "sphinx.ext.intersphinx", "sphinx.ext.todo"]
|
|
||||||
+extensions = ["sphinx.ext.intersphinx", "sphinx.ext.todo"]
|
|
||||||
|
|
||||||
# Automatic anchors for markdown titles
|
|
||||||
from llvm_slug import make_slug
|
|
47
SOURCES/0201-third-party-Add-install-targets-for-gtest.patch
Normal file
47
SOURCES/0201-third-party-Add-install-targets-for-gtest.patch
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
From 8cc3870f09d728d9017c72eba9520117a4283fee Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tom Stellard <tstellar@redhat.com>
|
||||||
|
Date: Thu, 17 Nov 2022 09:01:10 +0000
|
||||||
|
Subject: Add install targets for gtest
|
||||||
|
|
||||||
|
Stand-alone builds need an installed version of gtest in order to run
|
||||||
|
the unittests.
|
||||||
|
|
||||||
|
Differential Revision: https://reviews.llvm.org/D137890
|
||||||
|
---
|
||||||
|
third-party/unittest/CMakeLists.txt | 15 ++++++++++++++-
|
||||||
|
1 file changed, 14 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/third-party/unittest/CMakeLists.txt b/third-party/unittest/CMakeLists.txt
|
||||||
|
index 0e54e0e57c35..1d2a52730d7d 100644
|
||||||
|
--- a/third-party/unittest/CMakeLists.txt
|
||||||
|
+++ b/third-party/unittest/CMakeLists.txt
|
||||||
|
@@ -65,12 +65,25 @@ if (NOT LLVM_ENABLE_THREADS)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
target_include_directories(llvm_gtest
|
||||||
|
- PUBLIC googletest/include googlemock/include
|
||||||
|
+ PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/googletest/include>
|
||||||
|
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/googlemock/include>
|
||||||
|
+ $<INSTALL_INTERFACE:include/llvm-gtest/>
|
||||||
|
+ $<INSTALL_INTERFACE:include/llvm-gmock/>
|
||||||
|
PRIVATE googletest googlemock
|
||||||
|
)
|
||||||
|
|
||||||
|
add_subdirectory(UnitTestMain)
|
||||||
|
|
||||||
|
+if (LLVM_INSTALL_GTEST)
|
||||||
|
+export(TARGETS llvm_gtest llvm_gtest_main LLVMTestingSupport FILE LLVMGTestConfig.cmake)
|
||||||
|
+install(TARGETS llvm_gtest llvm_gtest_main LLVMTestingSupport EXPORT LLVMGTestConfig
|
||||||
|
+ ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT llvm_gtest)
|
||||||
|
+ install(EXPORT LLVMGTestConfig DESTINATION ${LLVM_INSTALL_PACKAGE_DIR} COMPONENT llvm_gtest)
|
||||||
|
+ add_llvm_install_targets(install-llvm_gtest COMPONENT llvm_gtest DEPENDS llvm_gtest LLVMGTestConfig.cmake)
|
||||||
|
+ install(DIRECTORY googletest/include/gtest/ DESTINATION include/llvm-gtest/gtest/ COMPONENT llvm_gtest)
|
||||||
|
+ install(DIRECTORY googlemock/include/gmock/ DESTINATION include/llvm-gmock/gmock/ COMPONENT llvm_gtest)
|
||||||
|
+endif()
|
||||||
|
+
|
||||||
|
# When LLVM_LINK_LLVM_DYLIB is enabled, libLLVM.so is added to the interface
|
||||||
|
# link libraries for gtest and gtest_main. This means that any target, like
|
||||||
|
# unittests for example, that links against gtest will be forced to link
|
||||||
|
--
|
||||||
|
2.34.3
|
||||||
|
|
@ -1,893 +0,0 @@
|
|||||||
From 91052169960477fbc39169c10f9fae3bec732510 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Carl Ritson <carl.ritson@amd.com>
|
|
||||||
Date: Wed, 17 Jul 2024 15:07:42 +0900
|
|
||||||
Subject: [PATCH 1/3] [AMDGPU] Implement workaround for GFX11.5 export priority
|
|
||||||
|
|
||||||
On GFX11.5 shaders having completed exports need to execute/wait
|
|
||||||
at a lower priority than shaders still executing exports.
|
|
||||||
Add code to maintain normal priority of 2 for shaders that export
|
|
||||||
and drop to priority 0 after exports.
|
|
||||||
---
|
|
||||||
llvm/lib/Target/AMDGPU/AMDGPU.td | 15 +-
|
|
||||||
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 112 ++++++
|
|
||||||
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
|
|
||||||
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 +
|
|
||||||
.../AMDGPU/required-export-priority.ll | 344 ++++++++++++++++++
|
|
||||||
.../AMDGPU/required-export-priority.mir | 293 +++++++++++++++
|
|
||||||
6 files changed, 765 insertions(+), 3 deletions(-)
|
|
||||||
create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
|
||||||
create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.mir
|
|
||||||
|
|
||||||
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
|
|
||||||
index dfc8eaea66f7b..14fcf6a210a78 100644
|
|
||||||
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
|
|
||||||
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
|
|
||||||
@@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
|
|
||||||
"Has restricted SOffset (immediate not supported)."
|
|
||||||
>;
|
|
||||||
|
|
||||||
+def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
|
|
||||||
+ "HasRequiredExportPriority",
|
|
||||||
+ "true",
|
|
||||||
+ "Export priority must be explicitly manipulated on GFX11.5"
|
|
||||||
+>;
|
|
||||||
+
|
|
||||||
//===------------------------------------------------------------===//
|
|
||||||
// Subtarget Features (options and debugging)
|
|
||||||
//===------------------------------------------------------------===//
|
|
||||||
@@ -1597,14 +1603,16 @@ def FeatureISAVersion11_5_0 : FeatureSet<
|
|
||||||
!listconcat(FeatureISAVersion11_Common.Features,
|
|
||||||
[FeatureSALUFloatInsts,
|
|
||||||
FeatureDPPSrc1SGPR,
|
|
||||||
- FeatureVGPRSingleUseHintInsts])>;
|
|
||||||
+ FeatureVGPRSingleUseHintInsts,
|
|
||||||
+ FeatureRequiredExportPriority])>;
|
|
||||||
|
|
||||||
def FeatureISAVersion11_5_1 : FeatureSet<
|
|
||||||
!listconcat(FeatureISAVersion11_Common.Features,
|
|
||||||
[FeatureSALUFloatInsts,
|
|
||||||
FeatureDPPSrc1SGPR,
|
|
||||||
FeatureVGPRSingleUseHintInsts,
|
|
||||||
- FeatureGFX11FullVGPRs])>;
|
|
||||||
+ FeatureGFX11FullVGPRs,
|
|
||||||
+ FeatureRequiredExportPriority])>;
|
|
||||||
|
|
||||||
def FeatureISAVersion12 : FeatureSet<
|
|
||||||
[FeatureGFX12,
|
|
||||||
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
|
|
||||||
index a402fc6d7e611..a8b171aa82840 100644
|
|
||||||
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
|
|
||||||
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
|
|
||||||
@@ -14,6 +14,7 @@
|
|
||||||
#include "GCNSubtarget.h"
|
|
||||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
||||||
#include "SIMachineFunctionInfo.h"
|
|
||||||
+#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
|
||||||
#include "llvm/CodeGen/ScheduleDAG.h"
|
|
||||||
#include "llvm/TargetParser/TargetParser.h"
|
|
||||||
@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
|
|
||||||
fixWMMAHazards(MI);
|
|
||||||
fixShift64HighRegBug(MI);
|
|
||||||
fixVALUMaskWriteHazard(MI);
|
|
||||||
+ fixRequiredExportPriority(MI);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
|
|
||||||
@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
|
|
||||||
+ const SIInstrInfo &TII) {
|
|
||||||
+ MachineBasicBlock &EntryMBB = MF->front();
|
|
||||||
+ if (EntryMBB.begin() != EntryMBB.end()) {
|
|
||||||
+ auto &EntryMI = *EntryMBB.begin();
|
|
||||||
+ if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
|
|
||||||
+ EntryMI.getOperand(0).getImm() >= Priority)
|
|
||||||
+ return false;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
|
|
||||||
+ .addImm(Priority);
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
|
|
||||||
+ if (!ST.hasRequiredExportPriority())
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ // Assume the following shader types will never have exports,
|
|
||||||
+ // and avoid adding or adjusting S_SETPRIO.
|
|
||||||
+ MachineBasicBlock *MBB = MI->getParent();
|
|
||||||
+ MachineFunction *MF = MBB->getParent();
|
|
||||||
+ auto CC = MF->getFunction().getCallingConv();
|
|
||||||
+ switch (CC) {
|
|
||||||
+ case CallingConv::AMDGPU_CS:
|
|
||||||
+ case CallingConv::AMDGPU_CS_Chain:
|
|
||||||
+ case CallingConv::AMDGPU_CS_ChainPreserve:
|
|
||||||
+ case CallingConv::AMDGPU_KERNEL:
|
|
||||||
+ return false;
|
|
||||||
+ default:
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ const int MaxPriority = 3;
|
|
||||||
+ const int NormalPriority = 2;
|
|
||||||
+ const int PostExportPriority = 0;
|
|
||||||
+
|
|
||||||
+ auto It = MI->getIterator();
|
|
||||||
+ switch (MI->getOpcode()) {
|
|
||||||
+ case AMDGPU::S_ENDPGM:
|
|
||||||
+ case AMDGPU::S_ENDPGM_SAVED:
|
|
||||||
+ case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
|
|
||||||
+ case AMDGPU::SI_RETURN_TO_EPILOG:
|
|
||||||
+ // Ensure shader with calls raises priority at entry.
|
|
||||||
+ // This ensures correct priority if exports exist in callee.
|
|
||||||
+ if (MF->getFrameInfo().hasCalls())
|
|
||||||
+ return ensureEntrySetPrio(MF, NormalPriority, TII);
|
|
||||||
+ return false;
|
|
||||||
+ case AMDGPU::S_SETPRIO: {
|
|
||||||
+ // Raise minimum priority unless in workaround.
|
|
||||||
+ auto &PrioOp = MI->getOperand(0);
|
|
||||||
+ int Prio = PrioOp.getImm();
|
|
||||||
+ bool InWA = (Prio == PostExportPriority) &&
|
|
||||||
+ (It != MBB->begin() && TII.isEXP(*std::prev(It)));
|
|
||||||
+ if (InWA || Prio >= NormalPriority)
|
|
||||||
+ return false;
|
|
||||||
+ PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
|
|
||||||
+ return true;
|
|
||||||
+ }
|
|
||||||
+ default:
|
|
||||||
+ if (!TII.isEXP(*MI))
|
|
||||||
+ return false;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ // Check entry priority at each export (as there will only be a few).
|
|
||||||
+ // Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
|
|
||||||
+ bool Changed = false;
|
|
||||||
+ if (CC != CallingConv::AMDGPU_Gfx)
|
|
||||||
+ Changed = ensureEntrySetPrio(MF, NormalPriority, TII);
|
|
||||||
+
|
|
||||||
+ auto NextMI = std::next(It);
|
|
||||||
+ bool EndOfShader = false;
|
|
||||||
+ if (NextMI != MBB->end()) {
|
|
||||||
+ // Only need WA at end of sequence of exports.
|
|
||||||
+ if (TII.isEXP(*NextMI))
|
|
||||||
+ return Changed;
|
|
||||||
+ // Assume appropriate S_SETPRIO after export means WA already applied.
|
|
||||||
+ if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
|
|
||||||
+ NextMI->getOperand(0).getImm() == PostExportPriority)
|
|
||||||
+ return Changed;
|
|
||||||
+ EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ const DebugLoc &DL = MI->getDebugLoc();
|
|
||||||
+
|
|
||||||
+ // Lower priority.
|
|
||||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
|
|
||||||
+ .addImm(PostExportPriority);
|
|
||||||
+
|
|
||||||
+ if (!EndOfShader) {
|
|
||||||
+ // Wait for exports to complete.
|
|
||||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
|
|
||||||
+ .addReg(AMDGPU::SGPR_NULL)
|
|
||||||
+ .addImm(0);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
|
|
||||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
|
|
||||||
+
|
|
||||||
+ if (!EndOfShader) {
|
|
||||||
+ // Return to normal (higher) priority.
|
|
||||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
|
|
||||||
+ .addImm(NormalPriority);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return true;
|
|
||||||
+}
|
|
||||||
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
|
|
||||||
index 3ccca527c626b..f2a64ab48e180 100644
|
|
||||||
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
|
|
||||||
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
|
|
||||||
@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
|
|
||||||
bool fixWMMAHazards(MachineInstr *MI);
|
|
||||||
bool fixShift64HighRegBug(MachineInstr *MI);
|
|
||||||
bool fixVALUMaskWriteHazard(MachineInstr *MI);
|
|
||||||
+ bool fixRequiredExportPriority(MachineInstr *MI);
|
|
||||||
|
|
||||||
int checkMAIHazards(MachineInstr *MI);
|
|
||||||
int checkMAIHazards908(MachineInstr *MI);
|
|
||||||
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
|
|
||||||
index e5817594a4521..def89c785b855 100644
|
|
||||||
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
|
|
||||||
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
|
|
||||||
@@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
|
|
||||||
bool HasVOPDInsts = false;
|
|
||||||
bool HasVALUTransUseHazard = false;
|
|
||||||
bool HasForceStoreSC0SC1 = false;
|
|
||||||
+ bool HasRequiredExportPriority = false;
|
|
||||||
|
|
||||||
// Dummy feature to use for assembler in tablegen.
|
|
||||||
bool FeatureDisable = false;
|
|
||||||
@@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
|
|
||||||
|
|
||||||
bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
|
|
||||||
|
|
||||||
+ bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
|
|
||||||
+
|
|
||||||
/// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
|
|
||||||
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
|
|
||||||
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
|
|
||||||
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000000000..377902f3f0d1a
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
|
||||||
@@ -0,0 +1,344 @@
|
|
||||||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
||||||
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_export_zeroes_f32() #0 {
|
|
||||||
+; GCN-LABEL: test_export_zeroes_f32:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v0, 0
|
|
||||||
+; GCN-NEXT: exp mrt0 off, off, off, off
|
|
||||||
+; GCN-NEXT: exp mrt0 off, off, off, off done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_export_en_src0_f32() #0 {
|
|
||||||
+; GCN-LABEL: test_export_en_src0_f32:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
|
||||||
+; GCN-NEXT: exp mrt0 v3, off, off, off done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_gs void @test_export_gs() #0 {
|
|
||||||
+; GCN-LABEL: test_export_gs:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
|
||||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_hs void @test_export_hs() #0 {
|
|
||||||
+; GCN-LABEL: test_export_hs:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
|
||||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_gfx void @test_export_gfx(float %v) #0 {
|
|
||||||
+; GCN-LABEL: test_export_gfx:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 4.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 0.5
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 2.0
|
|
||||||
+; GCN-NEXT: exp mrt0 off, v3, off, off done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_waitcnt expcnt(0)
|
|
||||||
+; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_cs void @test_export_cs() #0 {
|
|
||||||
+; GCN-LABEL: test_export_cs:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
|
||||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_kernel void @test_export_kernel() #0 {
|
|
||||||
+; GCN-LABEL: test_export_kernel:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
|
||||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_gfx void @test_no_export_gfx(float %v) #0 {
|
|
||||||
+; GCN-LABEL: test_no_export_gfx:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
||||||
+; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_no_export_ps(float %v) #0 {
|
|
||||||
+; GCN-LABEL: test_no_export_ps:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
|
||||||
+; GCN-LABEL: test_if_export_f32:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
|
||||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
|
||||||
+; GCN-NEXT: s_cbranch_execz .LBB9_2
|
|
||||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
|
||||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: .LBB9_2: ; %end
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %cc = icmp eq i32 %flag, 0
|
|
||||||
+ br i1 %cc, label %end, label %exp
|
|
||||||
+
|
|
||||||
+exp:
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
|
|
||||||
+ br label %end
|
|
||||||
+
|
|
||||||
+end:
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
|
||||||
+; GCN-LABEL: test_if_export_vm_f32:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
|
||||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
|
||||||
+; GCN-NEXT: s_cbranch_execz .LBB10_2
|
|
||||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
|
||||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: .LBB10_2: ; %end
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %cc = icmp eq i32 %flag, 0
|
|
||||||
+ br i1 %cc, label %end, label %exp
|
|
||||||
+
|
|
||||||
+exp:
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
|
|
||||||
+ br label %end
|
|
||||||
+
|
|
||||||
+end:
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
|
||||||
+; GCN-LABEL: test_if_export_done_f32:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
|
||||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
|
||||||
+; GCN-NEXT: s_cbranch_execz .LBB11_2
|
|
||||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
|
||||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: .LBB11_2: ; %end
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %cc = icmp eq i32 %flag, 0
|
|
||||||
+ br i1 %cc, label %end, label %exp
|
|
||||||
+
|
|
||||||
+exp:
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
|
|
||||||
+ br label %end
|
|
||||||
+
|
|
||||||
+end:
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
|
||||||
+; GCN-LABEL: test_if_export_vm_done_f32:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
|
||||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
|
||||||
+; GCN-NEXT: s_cbranch_execz .LBB12_2
|
|
||||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
|
||||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: .LBB12_2: ; %end
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %cc = icmp eq i32 %flag, 0
|
|
||||||
+ br i1 %cc, label %end, label %exp
|
|
||||||
+
|
|
||||||
+exp:
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
|
|
||||||
+ br label %end
|
|
||||||
+
|
|
||||||
+end:
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
|
|
||||||
+; GCN-LABEL: test_export_pos_before_param_across_load:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 1.0
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v3, 0.5
|
|
||||||
+; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
||||||
+; GCN-NEXT: exp pos0 v1, v1, v1, v0 done
|
|
||||||
+; GCN-NEXT: exp invalid_target_32 v2, v2, v2, v2
|
|
||||||
+; GCN-NEXT: exp invalid_target_33 v2, v2, v2, v3
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
|
|
||||||
+ %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
|
|
||||||
+; GCN-LABEL: test_export_across_store_load:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 24
|
|
||||||
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
|
|
||||||
+; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
|
||||||
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 8, vcc_lo
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v2, 0
|
|
||||||
+; GCN-NEXT: scratch_store_b32 v0, v1, off
|
|
||||||
+; GCN-NEXT: scratch_load_b32 v0, off, off
|
|
||||||
+; GCN-NEXT: v_mov_b32_e32 v1, 1.0
|
|
||||||
+; GCN-NEXT: exp pos0 v2, v2, v2, v1 done
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_waitcnt vmcnt(0)
|
|
||||||
+; GCN-NEXT: exp invalid_target_32 v0, v2, v1, v2
|
|
||||||
+; GCN-NEXT: exp invalid_target_33 v0, v2, v1, v2
|
|
||||||
+; GCN-NEXT: s_setprio 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_nop 0
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %data0 = alloca <4 x float>, align 8, addrspace(5)
|
|
||||||
+ %data1 = alloca <4 x float>, align 8, addrspace(5)
|
|
||||||
+ %cmp = icmp eq i32 %idx, 1
|
|
||||||
+ %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
|
|
||||||
+ store float %v, ptr addrspace(5) %data, align 8
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
|
|
||||||
+ %load0 = load float, ptr addrspace(5) %data0, align 8
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
|
|
||||||
+ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_export_in_callee(float %v) #0 {
|
|
||||||
+; GCN-LABEL: test_export_in_callee:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_getpc_b64 s[0:1]
|
|
||||||
+; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
|
|
||||||
+; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
|
|
||||||
+; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
|
|
||||||
+; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
|
||||||
+; GCN-NEXT: s_mov_b32 s32, 0
|
|
||||||
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
||||||
+; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %x = fadd float %v, 1.0
|
|
||||||
+ call void @test_export_gfx(float %x)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
|
|
||||||
+; GCN-LABEL: test_export_in_callee_prio:
|
|
||||||
+; GCN: ; %bb.0:
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_mov_b32 s32, 0
|
|
||||||
+; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
|
|
||||||
+; GCN-NEXT: s_setprio 2
|
|
||||||
+; GCN-NEXT: s_getpc_b64 s[0:1]
|
|
||||||
+; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
|
|
||||||
+; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
|
|
||||||
+; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
|
||||||
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
||||||
+; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
|
||||||
+; GCN-NEXT: s_endpgm
|
|
||||||
+ %x = fadd float %v, 1.0
|
|
||||||
+ call void @llvm.amdgcn.s.setprio(i16 0)
|
|
||||||
+ call void @test_export_gfx(float %x)
|
|
||||||
+ ret void
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
|
|
||||||
+declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
|
|
||||||
+declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
|
|
||||||
+declare void @llvm.amdgcn.s.setprio(i16)
|
|
||||||
+
|
|
||||||
+attributes #0 = { nounwind }
|
|
||||||
+attributes #1 = { nounwind inaccessiblememonly }
|
|
||||||
+attributes #2 = { nounwind readnone }
|
|
||||||
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.mir b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000000000..eee04468036e5
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
|
|
||||||
@@ -0,0 +1,293 @@
|
|
||||||
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
|
||||||
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=post-RA-hazard-rec -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX1150 %s
|
|
||||||
+
|
|
||||||
+--- |
|
|
||||||
+ define amdgpu_ps void @end_of_shader() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @end_of_shader_return_to_epilogue() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @end_of_block() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @start_of_block() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @block_of_exports() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @sparse_exports() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @existing_setprio_1() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+ define amdgpu_ps void @existing_setprio_2() {
|
|
||||||
+ ret void
|
|
||||||
+ }
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: end_of_shader
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ ; GFX1150-LABEL: name: end_of_shader
|
|
||||||
+ ; GFX1150: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: end_of_shader_return_to_epilogue
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ ; GFX1150-LABEL: name: end_of_shader_return_to_epilogue
|
|
||||||
+ ; GFX1150: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ SI_RETURN_TO_EPILOG $vgpr0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: end_of_block
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ ; GFX1150-LABEL: name: end_of_block
|
|
||||||
+ ; GFX1150: bb.0:
|
|
||||||
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: bb.1:
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+
|
|
||||||
+ bb.1:
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: start_of_block
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ ; GFX1150-LABEL: name: start_of_block
|
|
||||||
+ ; GFX1150: bb.0:
|
|
||||||
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: bb.1:
|
|
||||||
+ ; GFX1150-NEXT: successors: %bb.2(0x80000000)
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: bb.2:
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+
|
|
||||||
+ bb.1:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+
|
|
||||||
+ bb.2:
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: block_of_exports
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ ; GFX1150-LABEL: name: block_of_exports
|
|
||||||
+ ; GFX1150: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: sparse_exports
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ ; GFX1150-LABEL: name: sparse_exports
|
|
||||||
+ ; GFX1150: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
|
||||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: existing_setprio_1
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ ; GFX1150-LABEL: name: existing_setprio_1
|
|
||||||
+ ; GFX1150: bb.0:
|
|
||||||
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: bb.1:
|
|
||||||
+ ; GFX1150-NEXT: successors: %bb.2(0x80000000)
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
|
||||||
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: bb.2:
|
|
||||||
+ ; GFX1150-NEXT: successors: %bb.3(0x80000000)
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
|
||||||
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: bb.3:
|
|
||||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
|
||||||
+
|
|
||||||
+ bb.1:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ S_SETPRIO 3
|
|
||||||
+ $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
|
||||||
+ S_SETPRIO 0
|
|
||||||
+
|
|
||||||
+ bb.2:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ S_SETPRIO 1
|
|
||||||
+ $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
|
|
||||||
+ S_SETPRIO 0
|
|
||||||
+
|
|
||||||
+ bb.3:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
+
|
|
||||||
+---
|
|
||||||
+name: existing_setprio_2
|
|
||||||
+tracksRegLiveness: true
|
|
||||||
+liveins:
|
|
||||||
+ - { reg: '$vgpr0' }
|
|
||||||
+body: |
|
|
||||||
+ bb.0:
|
|
||||||
+ liveins: $vgpr0
|
|
||||||
+ ; GFX1150-LABEL: name: existing_setprio_2
|
|
||||||
+ ; GFX1150: liveins: $vgpr0
|
|
||||||
+ ; GFX1150-NEXT: {{ $}}
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
|
||||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
|
||||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_NOP 0
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
|
||||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
|
||||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
|
||||||
+ S_SETPRIO 3
|
|
||||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
|
||||||
+ S_SETPRIO 3
|
|
||||||
+ S_ENDPGM 0
|
|
||||||
+...
|
|
||||||
|
|
||||||
From 8ea44e65f2c19facff751aeb2ac960f907fb210f Mon Sep 17 00:00:00 2001
|
|
||||||
From: Carl Ritson <carl.ritson@amd.com>
|
|
||||||
Date: Wed, 17 Jul 2024 16:18:02 +0900
|
|
||||||
Subject: [PATCH 2/3] Remove -verify-machineinstrs from test.
|
|
||||||
|
|
||||||
---
|
|
||||||
llvm/test/CodeGen/AMDGPU/required-export-priority.ll | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
|
||||||
index 377902f3f0d1a..ebc209bd4d451 100644
|
|
||||||
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
|
||||||
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
|
||||||
@@ -1,5 +1,5 @@
|
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
||||||
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
||||||
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s
|
|
||||||
|
|
||||||
define amdgpu_ps void @test_export_zeroes_f32() #0 {
|
|
||||||
; GCN-LABEL: test_export_zeroes_f32:
|
|
46
SOURCES/D156379.diff
Normal file
46
SOURCES/D156379.diff
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
|
||||||
|
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
|
||||||
|
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
|
||||||
|
@@ -1152,6 +1152,11 @@
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ // Type legalization (via getNumberOfParts) can't handle structs
|
||||||
|
+ if (TLI->getValueType(DL, Src, true) == MVT::Other)
|
||||||
|
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
|
||||||
|
+ CostKind);
|
||||||
|
+
|
||||||
|
unsigned NumOps =
|
||||||
|
(Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
|
||||||
|
|
||||||
|
diff --git a/llvm/test/Analysis/CostModel/SystemZ/struct-cost-crash.ll b/llvm/test/Analysis/CostModel/SystemZ/struct-cost-crash.ll
|
||||||
|
new file mode 100644
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/llvm/test/Analysis/CostModel/SystemZ/struct-cost-crash.ll
|
||||||
|
@@ -0,0 +1,25 @@
|
||||||
|
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
|
||||||
|
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output < %s | FileCheck %s
|
||||||
|
+;
|
||||||
|
+; Check that SystemZTTIImpl::getMemoryOpCost doesn't try to legalize structs,
|
||||||
|
+; which was failing llvm_unreachable in MVT::getVT.
|
||||||
|
+
|
||||||
|
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
|
||||||
|
+target triple = "s390x-unknown-linux-gnu"
|
||||||
|
+
|
||||||
|
+declare { i64, i32 } @bar()
|
||||||
|
+
|
||||||
|
+define i8 @foo() {
|
||||||
|
+; CHECK-LABEL: 'foo'
|
||||||
|
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %1
|
||||||
|
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call { i64, i32 } @bar()
|
||||||
|
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store { i64, i32 } %2, ptr inttoptr (i64 16 to ptr), align 16
|
||||||
|
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: br label %1
|
||||||
|
+;
|
||||||
|
+ br label %1
|
||||||
|
+
|
||||||
|
+1: ; preds = %1, %0
|
||||||
|
+ %2 = call { i64, i32 } @bar()
|
||||||
|
+ store { i64, i32 } %2, ptr inttoptr (i64 16 to ptr), align 16
|
||||||
|
+ br label %1
|
||||||
|
+}
|
||||||
|
|
1085
SPECS/llvm.spec
1085
SPECS/llvm.spec
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user