20 changed files with 1131 additions and 3896 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,6 @@
-SOURCES/llvm-project-19.1.7.src.tar.xz
-SOURCES/llvm-project-19.1.7.src.tar.xz.sig
+SOURCES/cmake-16.0.6.src.tar.xz
+SOURCES/cmake-16.0.6.src.tar.xz.sig
+SOURCES/llvm-16.0.6.src.tar.xz
+SOURCES/llvm-16.0.6.src.tar.xz.sig
+SOURCES/third-party-16.0.6.src.tar.xz
+SOURCES/third-party-16.0.6.src.tar.xz.sig
--- a/.llvm.metadata
+++ b/.llvm.metadata
@ -1,2 +1,6 @@
-6e4033d8b76a89e82220b5445bff58cdce64300e SOURCES/llvm-project-19.1.7.src.tar.xz
-48f839c6e47a34a1138862a9db6274c150179532 SOURCES/llvm-project-19.1.7.src.tar.xz.sig
+0de534cfef38697e115c3ae80634765f05e78e5b SOURCES/cmake-16.0.6.src.tar.xz
+2db5c88fe9277bb0fa85f49b58e946e49ff235c2 SOURCES/cmake-16.0.6.src.tar.xz.sig
+072d2fb4b10f95d06189de00eb7f7e9b35c54e9a SOURCES/llvm-16.0.6.src.tar.xz
+bfc74b3868c69ce674a583c91e938b6d4cf0fded SOURCES/llvm-16.0.6.src.tar.xz.sig
+5b1a58de6ed9d154a38edb6386a5749576e0b96a SOURCES/third-party-16.0.6.src.tar.xz
+51ad6a8ccc5ccd40faff6f1c98a2f33a9b600f88 SOURCES/third-party-16.0.6.src.tar.xz.sig
--- a/SOURCES/0001-18-Always-build-shared-libs-for-LLD.patch
+++ b/SOURCES/0001-18-Always-build-shared-libs-for-LLD.patch
@ -1,29 +0,0 @@
-From b1c60d7fa322a2d208556087df9e7ef94bfbffb8 Mon Sep 17 00:00:00 2001
-From: Nikita Popov <npopov@redhat.com>
-Date: Wed, 8 May 2024 12:30:36 +0900
-Subject: [PATCH] Always build shared libs for LLD
-
-We don't want to enable BUILD_SHARED_LIBS for the whole build,
-but we do want to build lld libraries.
---
- lld/cmake/modules/AddLLD.cmake | 5 ++---
- 1 file changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake
-index 2ee066b41535..270c03f096ac 100644
--- a/lld/cmake/modules/AddLLD.cmake
-+++ b/lld/cmake/modules/AddLLD.cmake
-@@ -7,9 +7,8 @@ macro(add_lld_library name)
-     ""
-     ""
-     ${ARGN})
-  if(ARG_SHARED)
-    set(ARG_ENABLE_SHARED SHARED)
-  endif()
-+  # Always build shared libs for LLD.
-+  set(ARG_ENABLE_SHARED SHARED)
-   llvm_add_library(${name} ${ARG_ENABLE_SHARED} ${ARG_UNPARSED_ARGUMENTS})
-   set_target_properties(${name} PROPERTIES FOLDER "lld libraries")
- 
-- 
-2.44.0
--- a/SOURCES/0001-19-Always-build-shared-libs-for-LLD.patch
+++ b/SOURCES/0001-19-Always-build-shared-libs-for-LLD.patch
@ -1,30 +0,0 @@
-From 69faadbc396000bfa60c722f6fb9c0fc3fb2daf0 Mon Sep 17 00:00:00 2001
-From: Nikita Popov <npopov@redhat.com>
-Date: Wed, 8 May 2024 12:30:36 +0900
-Subject: [PATCH] Always build shared libs for LLD
-
-We don't want to enable BUILD_SHARED_LIBS for the whole build,
-but we do want to build lld libraries.
---
- lld/cmake/modules/AddLLD.cmake | 5 ++---
- 1 file changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake
-index 9f2684b6f933..743ec87814a2 100644
--- a/lld/cmake/modules/AddLLD.cmake
-+++ b/lld/cmake/modules/AddLLD.cmake
-@@ -7,9 +7,8 @@ macro(add_lld_library name)
-     ""
-     ""
-     ${ARGN})
-  if(ARG_SHARED)
-    set(ARG_ENABLE_SHARED SHARED)
-  endif()
-+  # Always build shared libs for LLD.
-+  set(ARG_ENABLE_SHARED SHARED)
-   llvm_add_library(${name} ${ARG_ENABLE_SHARED} ${ARG_UNPARSED_ARGUMENTS})
- 
-   if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
-- 
-2.45.1
-
--- a/SOURCES/0001-Deactivate-markdown-doc.patch
+++ b/SOURCES/0001-Deactivate-markdown-doc.patch
@ -0,0 +1,25 @@
+diff -Naur a/llvm/docs/conf.py b/llvm/docs/conf.py
+--- a/llvm/docs/conf.py	2020-09-15 09:12:24.318287611 +0000
+++ b/llvm/docs/conf.py	2020-09-15 15:01:00.025893199 +0000
+@@ -36,20 +36,7 @@
+     '.rst': 'restructuredtext',
+ }
+ 
+-try:
+-  import recommonmark
+-except ImportError:
+-  # manpages do not use any .md sources
+-  if not tags.has('builder-man'):
+-    raise
+-else:
+-  import sphinx
+-  if sphinx.version_info >= (3, 0):
+-    # This requires 0.5 or later.
+-    extensions.append('recommonmark')
+-  else:
+-    source_parsers = {'.md': 'recommonmark.parser.CommonMarkParser'}
+-  source_suffix['.md'] = 'markdown'
+import sphinx
+ 
+ # The encoding of source files.
+ #source_encoding = 'utf-8-sig'
--- a/SOURCES/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch
+++ b/SOURCES/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch
@ -1,41 +0,0 @@
-From 73d3b4047d757ef35850e2cef38285b96be82f0f Mon Sep 17 00:00:00 2001
-From: Nikita Popov <npopov@redhat.com>
-Date: Tue, 23 May 2023 12:17:29 +0200
-Subject: [PATCH] [Driver] Give devtoolset path precedence over InstalledDir
-
-This is a followup to the change from c5fe10f365247c3dd9416b7ec8bad73a60b5946e.
-While that commit correctly adds the bindir from devtoolset to the
-path, the driver dir / install dir still comes first. This means
-we'll still end up picking /usr/bin/ld rather than the one from
-devtoolset.
-
-Unfortunately, I don't see any way to test this. In the environment
-the tests are run, this would only result in a behavior difference
-if there is an ld binary present in the LLVM build directory, which
-isn't the case.
-
-Differential Revision: https://reviews.llvm.org/D151203
---
- clang/lib/Driver/ToolChains/Linux.cpp | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
-index 853ff99d9fe5..aecabb46d4b9 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
-+++ b/clang/lib/Driver/ToolChains/Linux.cpp
-@@ -244,9 +244,9 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
-     // With devtoolset on RHEL, we want to add a bin directory that is relative
-     // to the detected gcc install, because if we are using devtoolset gcc then
-     // we want to use other tools from devtoolset (e.g. ld) instead of the
-    // standard system tools.
-    PPaths.push_back(Twine(GCCInstallation.getParentLibPath() +
-                     "/../bin").str());
-+    // standard system tools. This should take precedence over InstalledDir.
-+    PPaths.insert(PPaths.begin(),
-+                  Twine(GCCInstallation.getParentLibPath() + "/../bin").str());
- 
-   if (Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb)
-     ExtraOpts.push_back("-X");
-- 
-2.40.1
-
--- a/SOURCES/0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch
+++ b/SOURCES/0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch
@ -1,25 +0,0 @@
-From 5f73befe5a0df82e455f4b1052e62f34009e98bb Mon Sep 17 00:00:00 2001
-From: Tom Stellard <tstellar@redhat.com>
-Date: Tue, 23 Apr 2024 15:08:34 -0700
-Subject: [PATCH] Fix page size constant on aarch64 and ppc64le
-
---
- compiler-rt/lib/cfi/cfi.cpp | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/compiler-rt/lib/cfi/cfi.cpp b/compiler-rt/lib/cfi/cfi.cpp
-index ad1c91623514..e7e86e5807a8 100644
--- a/compiler-rt/lib/cfi/cfi.cpp
-+++ b/compiler-rt/lib/cfi/cfi.cpp
-@@ -53,6 +53,8 @@ namespace __cfi {
- 
- #if SANITIZER_LOONGARCH64
- #define kCfiShadowLimitsStorageSize 16384 // 16KiB on loongarch64 per page
-+#elif defined(__aarch64__) || defined(__powerpc64__)
-+#define kCfiShadowLimitsStorageSize 65536 // 1 page
- #else
- #define kCfiShadowLimitsStorageSize 4096 // 1 page
- #endif
-- 
-2.40.1
-
--- a/SOURCES/0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch
+++ b/SOURCES/0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch
@ -1,27 +0,0 @@
-From 49f827b09db549de62dcaf8b90b3fcb3e08c0ee5 Mon Sep 17 00:00:00 2001
-From: Serge Guelton <sguelton@redhat.com>
-Date: Mon, 6 Mar 2023 12:37:48 +0100
-Subject: [PATCH] Make -funwind-tables the default on all archs
-
---
- clang/lib/Driver/ToolChains/Gnu.cpp | 4 ++++
- 1 file changed, 4 insertions(+)
-
-diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
-index 24fbdcffc07b..8fed46b49515 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
-+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
-@@ -2904,6 +2904,10 @@ Generic_GCC::getDefaultUnwindTableLevel(const ArgList &Args) const {
-   case llvm::Triple::riscv64:
-   case llvm::Triple::x86:
-   case llvm::Triple::x86_64:
-+  // Enable -funwind-tables on all architectures supported by Fedora:
-+  // rhbz#1655546
-+  case llvm::Triple::systemz:
-+  case llvm::Triple::arm:
-     return UnwindTableLevel::Asynchronous;
-   default:
-     return UnwindTableLevel::None;
-- 
-2.39.1
-
--- a/SOURCES/0001-SystemZ-Improve-error-messages-for-unsupported-reloc.patch
+++ b/SOURCES/0001-SystemZ-Improve-error-messages-for-unsupported-reloc.patch
@ -0,0 +1,184 @@
+From efbaf8bc61f4c0e29a3eaafb11ac0ddda8bd3dff Mon Sep 17 00:00:00 2001
+From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
+Date: Fri, 30 Jun 2023 16:02:56 +0200
+Subject: [PATCH] [SystemZ] Improve error messages for unsupported relocations
+
+In the SystemZMCObjectWriter, we currently just abort in case
+some unsupported relocation in requested.  However, as this
+situation can be triggered by invalid (inline) assembler input,
+we should really get a regular error message instead.
+---
+ .../MCTargetDesc/SystemZMCObjectWriter.cpp    | 59 +++++++++++--------
+ 1 file changed, 35 insertions(+), 24 deletions(-)
+
+diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+index c23463ab9bde..0b11468afc52 100644
+--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+@@ -9,6 +9,7 @@
+ #include "MCTargetDesc/SystemZMCFixups.h"
+ #include "MCTargetDesc/SystemZMCTargetDesc.h"
+ #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
+ #include "llvm/MC/MCELFObjectWriter.h"
+ #include "llvm/MC/MCExpr.h"
+ #include "llvm/MC/MCFixup.h"
+@@ -40,7 +41,7 @@ SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
+                             /*HasRelocationAddend_=*/ true) {}
+ 
+ // Return the relocation type for an absolute value of MCFixupKind Kind.
+-static unsigned getAbsoluteReloc(unsigned Kind) {
+static unsigned getAbsoluteReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
+   switch (Kind) {
+   case FK_Data_1: return ELF::R_390_8;
+   case FK_Data_2: return ELF::R_390_16;
+@@ -49,11 +50,12 @@ static unsigned getAbsoluteReloc(unsigned Kind) {
+   case SystemZ::FK_390_12: return ELF::R_390_12;
+   case SystemZ::FK_390_20: return ELF::R_390_20;
+   }
+-  llvm_unreachable("Unsupported absolute address");
+  Ctx.reportError(Loc, "Unsupported absolute address");
+  return 0;
+ }
+ 
+ // Return the relocation type for a PC-relative value of MCFixupKind Kind.
+-static unsigned getPCRelReloc(unsigned Kind) {
+static unsigned getPCRelReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
+   switch (Kind) {
+   case FK_Data_2:                return ELF::R_390_PC16;
+   case FK_Data_4:                return ELF::R_390_PC32;
+@@ -63,62 +65,69 @@ static unsigned getPCRelReloc(unsigned Kind) {
+   case SystemZ::FK_390_PC24DBL:  return ELF::R_390_PC24DBL;
+   case SystemZ::FK_390_PC32DBL:  return ELF::R_390_PC32DBL;
+   }
+-  llvm_unreachable("Unsupported PC-relative address");
+  Ctx.reportError(Loc, "Unsupported PC-relative address");
+  return 0;
+ }
+ 
+ // Return the R_390_TLS_LE* relocation type for MCFixupKind Kind.
+-static unsigned getTLSLEReloc(unsigned Kind) {
+static unsigned getTLSLEReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
+   switch (Kind) {
+   case FK_Data_4: return ELF::R_390_TLS_LE32;
+   case FK_Data_8: return ELF::R_390_TLS_LE64;
+   }
+-  llvm_unreachable("Unsupported absolute address");
+  Ctx.reportError(Loc, "Unsupported thread-local address (local-exec)");
+  return 0;
+ }
+ 
+ // Return the R_390_TLS_LDO* relocation type for MCFixupKind Kind.
+-static unsigned getTLSLDOReloc(unsigned Kind) {
+static unsigned getTLSLDOReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
+   switch (Kind) {
+   case FK_Data_4: return ELF::R_390_TLS_LDO32;
+   case FK_Data_8: return ELF::R_390_TLS_LDO64;
+   }
+-  llvm_unreachable("Unsupported absolute address");
+  Ctx.reportError(Loc, "Unsupported thread-local address (local-dynamic)");
+  return 0;
+ }
+ 
+ // Return the R_390_TLS_LDM* relocation type for MCFixupKind Kind.
+-static unsigned getTLSLDMReloc(unsigned Kind) {
+static unsigned getTLSLDMReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
+   switch (Kind) {
+   case FK_Data_4: return ELF::R_390_TLS_LDM32;
+   case FK_Data_8: return ELF::R_390_TLS_LDM64;
+   case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_LDCALL;
+   }
+-  llvm_unreachable("Unsupported absolute address");
+  Ctx.reportError(Loc, "Unsupported thread-local address (local-dynamic)");
+  return 0;
+ }
+ 
+ // Return the R_390_TLS_GD* relocation type for MCFixupKind Kind.
+-static unsigned getTLSGDReloc(unsigned Kind) {
+static unsigned getTLSGDReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
+   switch (Kind) {
+   case FK_Data_4: return ELF::R_390_TLS_GD32;
+   case FK_Data_8: return ELF::R_390_TLS_GD64;
+   case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_GDCALL;
+   }
+-  llvm_unreachable("Unsupported absolute address");
+  Ctx.reportError(Loc, "Unsupported thread-local address (general-dynamic)");
+  return 0;
+ }
+ 
+ // Return the PLT relocation counterpart of MCFixupKind Kind.
+-static unsigned getPLTReloc(unsigned Kind) {
+static unsigned getPLTReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
+   switch (Kind) {
+   case SystemZ::FK_390_PC12DBL: return ELF::R_390_PLT12DBL;
+   case SystemZ::FK_390_PC16DBL: return ELF::R_390_PLT16DBL;
+   case SystemZ::FK_390_PC24DBL: return ELF::R_390_PLT24DBL;
+   case SystemZ::FK_390_PC32DBL: return ELF::R_390_PLT32DBL;
+   }
+-  llvm_unreachable("Unsupported absolute address");
+  Ctx.reportError(Loc, "Unsupported PC-relative PLT address");
+  return 0;
+ }
+ 
+ unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
+                                            const MCValue &Target,
+                                            const MCFixup &Fixup,
+                                            bool IsPCRel) const {
+  SMLoc Loc = Fixup.getLoc();
+   unsigned Kind = Fixup.getKind();
+   if (Kind >= FirstLiteralRelocationKind)
+     return Kind - FirstLiteralRelocationKind;
+@@ -126,38 +135,40 @@ unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
+   switch (Modifier) {
+   case MCSymbolRefExpr::VK_None:
+     if (IsPCRel)
+-      return getPCRelReloc(Kind);
+-    return getAbsoluteReloc(Kind);
+      return getPCRelReloc(Ctx, Loc, Kind);
+    return getAbsoluteReloc(Ctx, Loc, Kind);
+ 
+   case MCSymbolRefExpr::VK_NTPOFF:
+     assert(!IsPCRel && "NTPOFF shouldn't be PC-relative");
+-    return getTLSLEReloc(Kind);
+    return getTLSLEReloc(Ctx, Loc, Kind);
+ 
+   case MCSymbolRefExpr::VK_INDNTPOFF:
+     if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
+       return ELF::R_390_TLS_IEENT;
+-    llvm_unreachable("Only PC-relative INDNTPOFF accesses are supported for now");
+    Ctx.reportError(Loc, "Only PC-relative INDNTPOFF accesses are supported for now");
+    return 0;
+ 
+   case MCSymbolRefExpr::VK_DTPOFF:
+     assert(!IsPCRel && "DTPOFF shouldn't be PC-relative");
+-    return getTLSLDOReloc(Kind);
+    return getTLSLDOReloc(Ctx, Loc, Kind);
+ 
+   case MCSymbolRefExpr::VK_TLSLDM:
+     assert(!IsPCRel && "TLSLDM shouldn't be PC-relative");
+-    return getTLSLDMReloc(Kind);
+    return getTLSLDMReloc(Ctx, Loc, Kind);
+ 
+   case MCSymbolRefExpr::VK_TLSGD:
+     assert(!IsPCRel && "TLSGD shouldn't be PC-relative");
+-    return getTLSGDReloc(Kind);
+    return getTLSGDReloc(Ctx, Loc, Kind);
+ 
+   case MCSymbolRefExpr::VK_GOT:
+     if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
+       return ELF::R_390_GOTENT;
+-    llvm_unreachable("Only PC-relative GOT accesses are supported for now");
+    Ctx.reportError(Loc, "Only PC-relative GOT accesses are supported for now");
+    return 0;
+ 
+   case MCSymbolRefExpr::VK_PLT:
+-    assert(IsPCRel && "@PLT shouldt be PC-relative");
+-    return getPLTReloc(Kind);
+    assert(IsPCRel && "@PLT shouldn't be PC-relative");
+    return getPLTReloc(Ctx, Loc, Kind);
+ 
+   default:
+     llvm_unreachable("Modifier not supported");
+-- 
+2.41.0
+
--- a/SOURCES/0001-Workaround-a-bug-in-ORC-on-ppc64le.patch
+++ b/SOURCES/0001-Workaround-a-bug-in-ORC-on-ppc64le.patch
@ -1,30 +0,0 @@
-From a2449cee8c995b56f1892502aab3dfad3d6f3ca1 Mon Sep 17 00:00:00 2001
-From: Tulio Magno Quites Machado Filho <tuliom@redhat.com>
-Date: Fri, 8 Sep 2023 11:45:34 -0300
-Subject: [PATCH] Workaround a bug in ORC on ppc64le
-
-The Jit code appears to be returning the wrong printf symbol on ppc64le
-after the transition of the default long double to IEEE 128-bit floating
-point.
---
- clang/unittests/Interpreter/InterpreterTest.cpp | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/clang/unittests/Interpreter/InterpreterTest.cpp b/clang/unittests/Interpreter/InterpreterTest.cpp
-index abb8e6377aab..7b6697ebc6ed 100644
--- a/clang/unittests/Interpreter/InterpreterTest.cpp
-+++ b/clang/unittests/Interpreter/InterpreterTest.cpp
-@@ -243,7 +243,9 @@ TEST(IncrementalProcessing, FindMangledNameSymbol) {
-   EXPECT_FALSE(!Addr);
- 
-   // FIXME: Re-enable when we investigate the way we handle dllimports on Win.
-#ifndef _WIN32
-+  // FIXME: The printf symbol returned from the Jit may not be correct on
-+  //        ppc64le when the default long double is IEEE 128-bit fp.
-+#if !defined _WIN32 && !(defined __PPC64__ && defined __LITTLE_ENDIAN__)
-   EXPECT_EQ((uintptr_t)&printf, Addr->getValue());
- #endif // _WIN32
- }
-- 
-2.41.0
-
--- a/SOURCES/0001-llvm-Add-install-targets-for-gtest.patch
+++ b/SOURCES/0001-llvm-Add-install-targets-for-gtest.patch
@ -0,0 +1,32 @@
+From 8cc3870f09d728d9017c72eba9520117a4283fee Mon Sep 17 00:00:00 2001
+From: Tom Stellard <tstellar@redhat.com>
+Date: Thu, 17 Nov 2022 09:01:10 +0000
+Subject: Add install targets for gtest
+
+Stand-alone builds need an installed version of gtest in order to run
+the unittests.
+
+Differential Revision: https://reviews.llvm.org/D137890
+---
+ llvm/CMakeLists.txt | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
+index 60e1f29620af..d91338532815 100644
+--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
+@@ -693,6 +693,11 @@ option(LLVM_BUILD_TESTS
+   "Build LLVM unit tests. If OFF, just generate build targets." OFF)
+ option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON)
+ 
+option(LLVM_INSTALL_GTEST
+  "Install the llvm gtest library.  This should be on if you want to do
+   stand-alone builds of the other projects and run their unit tests." OFF)
+
+
+ option(LLVM_BUILD_BENCHMARKS "Add LLVM benchmark targets to the list of default
+ targets. If OFF, benchmarks still could be built using Benchmarks target." OFF)
+ option(LLVM_INCLUDE_BENCHMARKS "Generate benchmark targets. If OFF, benchmarks can't be built." ON)
+-- 
+2.34.3
+
--- a/SOURCES/0001-openmp-Add-option-to-disable-tsan-tests-111548.patch
+++ b/SOURCES/0001-openmp-Add-option-to-disable-tsan-tests-111548.patch
@ -1,62 +0,0 @@
-From b2edeb58b8cb3268acee425cd52b406eb60a8095 Mon Sep 17 00:00:00 2001
-From: Nikita Popov <npopov@redhat.com>
-Date: Wed, 9 Oct 2024 11:29:30 +0200
-Subject: [PATCH] [openmp] Add option to disable tsan tests (#111548)
-
-This adds a OPENMP_TEST_ENABLE_TSAN option that allows to override
-whether tests using tsan will be enabled. The option defaults to the
-existing auto-detection.
-
-The background here is
-https://github.com/llvm/llvm-project/issues/111492, where we have some
-systems where tsan doesn't work, but we do still want to build it and
-run tests that don't use tsan.
---
- openmp/cmake/OpenMPTesting.cmake          | 3 +++
- openmp/tools/archer/tests/CMakeLists.txt  | 2 +-
- openmp/tools/archer/tests/lit.site.cfg.in | 2 +-
- 3 files changed, 5 insertions(+), 2 deletions(-)
-
-diff --git a/openmp/cmake/OpenMPTesting.cmake b/openmp/cmake/OpenMPTesting.cmake
-index c67ad8b1cbd9..14cc5c67d84c 100644
--- a/openmp/cmake/OpenMPTesting.cmake
-+++ b/openmp/cmake/OpenMPTesting.cmake
-@@ -163,6 +163,9 @@ else()
-   set(OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS 1)
- endif()
- 
-+set(OPENMP_TEST_ENABLE_TSAN "${OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS}" CACHE BOOL
-+    "Whether to enable tests using tsan")
-+
- # Function to set compiler features for use in lit.
- function(update_test_compiler_features)
-   set(FEATURES "[")
-diff --git a/openmp/tools/archer/tests/CMakeLists.txt b/openmp/tools/archer/tests/CMakeLists.txt
-index 5de91148fa4b..412c7d63725e 100644
--- a/openmp/tools/archer/tests/CMakeLists.txt
-+++ b/openmp/tools/archer/tests/CMakeLists.txt
-@@ -28,7 +28,7 @@ macro(pythonize_bool var)
- endmacro()
- 
- pythonize_bool(LIBARCHER_HAVE_LIBATOMIC)
-pythonize_bool(OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS)
-+pythonize_bool(OPENMP_TEST_ENABLE_TSAN)
- 
- set(ARCHER_TSAN_TEST_DEPENDENCE "")
- if(TARGET tsan)
-diff --git a/openmp/tools/archer/tests/lit.site.cfg.in b/openmp/tools/archer/tests/lit.site.cfg.in
-index 55edfde9738e..ddcb7b8bc3a5 100644
--- a/openmp/tools/archer/tests/lit.site.cfg.in
-+++ b/openmp/tools/archer/tests/lit.site.cfg.in
-@@ -12,7 +12,7 @@ config.omp_library_dir = "@LIBOMP_LIBRARY_DIR@"
- config.omp_header_dir = "@LIBOMP_INCLUDE_DIR@"
- config.operating_system = "@CMAKE_SYSTEM_NAME@"
- config.has_libatomic = @LIBARCHER_HAVE_LIBATOMIC@
-config.has_tsan = @OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS@
-+config.has_tsan = @OPENMP_TEST_ENABLE_TSAN@
- 
- config.test_archer_flags = "@LIBARCHER_TEST_FLAGS@"
- config.libarcher_obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
-- 
-2.46.0
-
--- a/SOURCES/0001-openmp-Use-core_siblings_list-if-physical_package_id.patch
+++ b/SOURCES/0001-openmp-Use-core_siblings_list-if-physical_package_id.patch
@ -1,205 +0,0 @@
-From 5fb4d7f6079a76b2907ccc8c53c7c509c30a3dca Mon Sep 17 00:00:00 2001
-From: Nikita Popov <npopov@redhat.com>
-Date: Thu, 10 Oct 2024 12:47:33 +0000
-Subject: [PATCH] [openmp] Use core_siblings_list if physical_package_id not
- available
-
-On powerpc, physical_package_id may not be available. Currently,
-this causes openmp to fall back to flat topology and various
-affinity tests fail.
-
-Fix this by parsing core_siblings_list to deterimine which cpus
-belong to the same socket. This matches what the testing code
-does. The code to parse the CPU list format thankfully already
-exists.
-
-Fixes https://github.com/llvm/llvm-project/issues/111809.
---
- openmp/runtime/src/kmp_affinity.cpp          | 100 +++++++++++++------
- openmp/runtime/test/affinity/kmp-hw-subset.c |   2 +-
- 2 files changed, 72 insertions(+), 30 deletions(-)
-
-diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
-index cf5cad04eb57..c3d5ecf1345e 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
-+++ b/openmp/runtime/src/kmp_affinity.cpp
-@@ -1589,15 +1589,13 @@ kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
-   return buf;
- }
- 
-// Return (possibly empty) affinity mask representing the offline CPUs
-// Caller must free the mask
-kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
-  kmp_affin_mask_t *offline;
-  KMP_CPU_ALLOC(offline);
-  KMP_CPU_ZERO(offline);
-+static kmp_affin_mask_t *__kmp_parse_cpu_list(const char *path) {
-+  kmp_affin_mask_t *mask;
-+  KMP_CPU_ALLOC(mask);
-+  KMP_CPU_ZERO(mask);
- #if KMP_OS_LINUX
-   int n, begin_cpu, end_cpu;
-  kmp_safe_raii_file_t offline_file;
-+  kmp_safe_raii_file_t file;
-   auto skip_ws = [](FILE *f) {
-     int c;
-     do {
-@@ -1606,29 +1604,29 @@ kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
-     if (c != EOF)
-       ungetc(c, f);
-   };
-  // File contains CSV of integer ranges representing the offline CPUs
-+  // File contains CSV of integer ranges representing the CPUs
-   // e.g., 1,2,4-7,9,11-15
-  int status = offline_file.try_open("/sys/devices/system/cpu/offline", "r");
-+  int status = file.try_open(path, "r");
-   if (status != 0)
-    return offline;
-  while (!feof(offline_file)) {
-    skip_ws(offline_file);
-    n = fscanf(offline_file, "%d", &begin_cpu);
-+    return mask;
-+  while (!feof(file)) {
-+    skip_ws(file);
-+    n = fscanf(file, "%d", &begin_cpu);
-     if (n != 1)
-       break;
-    skip_ws(offline_file);
-    int c = fgetc(offline_file);
-+    skip_ws(file);
-+    int c = fgetc(file);
-     if (c == EOF || c == ',') {
-       // Just single CPU
-       end_cpu = begin_cpu;
-     } else if (c == '-') {
-       // Range of CPUs
-      skip_ws(offline_file);
-      n = fscanf(offline_file, "%d", &end_cpu);
-+      skip_ws(file);
-+      n = fscanf(file, "%d", &end_cpu);
-       if (n != 1)
-         break;
-      skip_ws(offline_file);
-      c = fgetc(offline_file); // skip ','
-+      skip_ws(file);
-+      c = fgetc(file); // skip ','
-     } else {
-       // Syntax problem
-       break;
-@@ -1638,13 +1636,19 @@ kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
-         end_cpu >= __kmp_xproc || begin_cpu > end_cpu) {
-       continue;
-     }
-    // Insert [begin_cpu, end_cpu] into offline mask
-+    // Insert [begin_cpu, end_cpu] into mask
-     for (int cpu = begin_cpu; cpu <= end_cpu; ++cpu) {
-      KMP_CPU_SET(cpu, offline);
-+      KMP_CPU_SET(cpu, mask);
-     }
-   }
- #endif
-  return offline;
-+  return mask;
-+}
-+
-+// Return (possibly empty) affinity mask representing the offline CPUs
-+// Caller must free the mask
-+kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
-+  return __kmp_parse_cpu_list("/sys/devices/system/cpu/offline");
- }
- 
- // Return the number of available procs
-@@ -3175,6 +3179,37 @@ static inline const char *__kmp_cpuinfo_get_envvar() {
-   return envvar;
- }
- 
-+static bool __kmp_package_id_from_core_siblings_list(unsigned **threadInfo,
-+                                                     unsigned num_avail,
-+                                                     unsigned idx) {
-+  if (!KMP_AFFINITY_CAPABLE())
-+    return false;
-+
-+  char path[256];
-+  KMP_SNPRINTF(path, sizeof(path),
-+               "/sys/devices/system/cpu/cpu%u/topology/core_siblings_list",
-+               threadInfo[idx][osIdIndex]);
-+  kmp_affin_mask_t *siblings = __kmp_parse_cpu_list(path);
-+  for (unsigned i = 0; i < num_avail; ++i) {
-+    unsigned cpu_id = threadInfo[i][osIdIndex];
-+    KMP_ASSERT(cpu_id < __kmp_affin_mask_size * CHAR_BIT);
-+    if (!KMP_CPU_ISSET(cpu_id, siblings))
-+      continue;
-+    if (threadInfo[i][pkgIdIndex] == UINT_MAX) {
-+      // Arbitrarily pick the first index we encounter, it only matters that
-+      // the value is the same for all siblings.
-+      threadInfo[i][pkgIdIndex] = idx;
-+    } else if (threadInfo[i][pkgIdIndex] != idx) {
-+      // Contradictory sibling lists.
-+      KMP_CPU_FREE(siblings);
-+      return false;
-+    }
-+  }
-+  KMP_ASSERT(threadInfo[idx][pkgIdIndex] != UINT_MAX);
-+  KMP_CPU_FREE(siblings);
-+  return true;
-+}
-+
- // Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
- // affinity map. On AIX, the map is obtained through system SRAD (Scheduler
- // Resource Allocation Domain).
-@@ -3550,18 +3585,13 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
-         return false;
-       }
- 
-      // Check for missing fields.  The osId field must be there, and we
-      // currently require that the physical id field is specified, also.
-+      // Check for missing fields.  The osId field must be there. The physical
-+      // id field will be checked later.
-       if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
-         CLEANUP_THREAD_INFO;
-         *msg_id = kmp_i18n_str_MissingProcField;
-         return false;
-       }
-      if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
-        CLEANUP_THREAD_INFO;
-        *msg_id = kmp_i18n_str_MissingPhysicalIDField;
-        return false;
-      }
- 
-       // Skip this proc if it is not included in the machine model.
-       if (KMP_AFFINITY_CAPABLE() &&
-@@ -3591,6 +3621,18 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
-   }
-   *line = 0;
- 
-+  // At least on powerpc, Linux may return -1 for physical_package_id. Try
-+  // to reconstruct topology from core_siblings_list in that case.
-+  for (i = 0; i < num_avail; ++i) {
-+    if (threadInfo[i][pkgIdIndex] == UINT_MAX) {
-+      if (!__kmp_package_id_from_core_siblings_list(threadInfo, num_avail, i)) {
-+        CLEANUP_THREAD_INFO;
-+        *msg_id = kmp_i18n_str_MissingPhysicalIDField;
-+        return false;
-+      }
-+    }
-+  }
-+
- #if KMP_MIC && REDUCE_TEAM_SIZE
-   unsigned teamSize = 0;
- #endif // KMP_MIC && REDUCE_TEAM_SIZE
-diff --git a/openmp/runtime/test/affinity/kmp-hw-subset.c b/openmp/runtime/test/affinity/kmp-hw-subset.c
-index 606fcdfbada9..0b49969bd3b1 100644
--- a/openmp/runtime/test/affinity/kmp-hw-subset.c
-+++ b/openmp/runtime/test/affinity/kmp-hw-subset.c
-@@ -25,7 +25,7 @@ static int compare_hw_subset_places(const place_list_t *openmp_places,
-     expected_per_place = nthreads_per_core;
-   } else {
-     expected_total = nsockets;
-    expected_per_place = ncores_per_socket;
-+    expected_per_place = ncores_per_socket * nthreads_per_core;
-   }
-   if (openmp_places->num_places != expected_total) {
-     fprintf(stderr, "error: KMP_HW_SUBSET did not half each resource layer!\n");
-- 
-2.47.0
-
--- a/SOURCES/0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch
+++ b/SOURCES/0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch
@ -1,86 +0,0 @@
-From ccc2b792e57d632bc887b226a4e7f0a8189eab8b Mon Sep 17 00:00:00 2001
-From: Josh Stone <jistone@redhat.com>
-Date: Mon, 4 Nov 2024 16:37:49 -0800
-Subject: [PATCH] [profile] Use base+vaddr for `__llvm_write_binary_ids` note
- pointers
-
-This function is always examining its own ELF headers in memory, but it
-was trying to use conditions between examining files or memory, and it
-wasn't accounting for LOAD offsets at runtime. This is especially bad if
-a loaded segment has additional padding that's not in the file offsets.
-
-Now we do a first scan of the program headers to figure out the runtime
-base address based on `PT_PHDR` and/or `PT_DYNAMIC` (else assume zero),
-similar to libc's `do_start`. Then each `PT_NOTE` pointer is simply the
-base plus the segments's `pt_vaddr`, which includes LOAD offsets.
-
-Fixes #114605
---
- .../lib/profile/InstrProfilingPlatformLinux.c | 40 ++++++++-----------
- 1 file changed, 16 insertions(+), 24 deletions(-)
-
-diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
-index e2c06d51e0c6..c365129a0768 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
-+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
-@@ -194,41 +194,33 @@ static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
-  */
- COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
-   extern const ElfW(Ehdr) __ehdr_start __attribute__((visibility("hidden")));
-+  extern ElfW(Dyn) _DYNAMIC[] __attribute__((weak, visibility("hidden")));
-+
-   const ElfW(Ehdr) *ElfHeader = &__ehdr_start;
-   const ElfW(Phdr) *ProgramHeader =
-       (const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff);
- 
-+  /* Compute the added base address in case of position-independent code. */
-+  uintptr_t Base = 0;
-+  for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
-+    if (ProgramHeader[I].p_type == PT_PHDR)
-+      Base = (uintptr_t)ProgramHeader - ProgramHeader[I].p_vaddr;
-+    if (ProgramHeader[I].p_type == PT_DYNAMIC && _DYNAMIC)
-+      Base = (uintptr_t)_DYNAMIC - ProgramHeader[I].p_vaddr;
-+  }
-+
-   int TotalBinaryIdsSize = 0;
-  uint32_t I;
-   /* Iterate through entries in the program header. */
-  for (I = 0; I < ElfHeader->e_phnum; I++) {
-+  for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
-     /* Look for the notes segment in program header entries. */
-     if (ProgramHeader[I].p_type != PT_NOTE)
-       continue;
- 
-     /* There can be multiple notes segment, and examine each of them. */
-    const ElfW(Nhdr) * Note;
-    const ElfW(Nhdr) * NotesEnd;
-    /*
-     * When examining notes in file, use p_offset, which is the offset within
-     * the elf file, to find the start of notes.
-     */
-    if (ProgramHeader[I].p_memsz == 0 ||
-        ProgramHeader[I].p_memsz == ProgramHeader[I].p_filesz) {
-      Note = (const ElfW(Nhdr) *)((uintptr_t)ElfHeader +
-                                  ProgramHeader[I].p_offset);
-      NotesEnd = (const ElfW(Nhdr) *)((const char *)(Note) +
-                                      ProgramHeader[I].p_filesz);
-    } else {
-      /*
-       * When examining notes in memory, use p_vaddr, which is the address of
-       * section after loaded to memory, to find the start of notes.
-       */
-      Note =
-          (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_vaddr);
-      NotesEnd =
-          (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
-    }
-+    const ElfW(Nhdr) *Note =
-+        (const ElfW(Nhdr) *)(Base + ProgramHeader[I].p_vaddr);
-+    const ElfW(Nhdr) *NotesEnd =
-+        (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
- 
-     int BinaryIdsSize = WriteBinaryIds(Writer, Note, NotesEnd);
-     if (TotalBinaryIdsSize == -1)
-- 
-2.47.0
-
--- a/SOURCES/0003-PATCH-clang-Don-t-install-static-libraries.patch
+++ b/SOURCES/0003-PATCH-clang-Don-t-install-static-libraries.patch
@ -1,25 +0,0 @@
-From 88704fc2eabb9dd19a9c3eb81a9b3dc37d95651c Mon Sep 17 00:00:00 2001
-From: Tom Stellard <tstellar@redhat.com>
-Date: Fri, 31 Jan 2020 11:04:57 -0800
-Subject: [PATCH][clang] Don't install static libraries
-
---
- clang/cmake/modules/AddClang.cmake | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/clang/cmake/modules/AddClang.cmake b/clang/cmake/modules/AddClang.cmake
-index 5752f4277444..0f52822d91f0 100644
--- a/clang/cmake/modules/AddClang.cmake
-+++ b/clang/cmake/modules/AddClang.cmake
-@@ -113,7 +113,7 @@ macro(add_clang_library name)
-     if(TARGET ${lib})
-       target_link_libraries(${lib} INTERFACE ${LLVM_COMMON_LIBS})
- 
-      if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ARG_INSTALL_WITH_TOOLCHAIN)
-+      if (ARG_SHARED AND (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ARG_INSTALL_WITH_TOOLCHAIN))
-         get_target_export_arg(${name} Clang export_to_clangtargets UMBRELLA clang-libraries)
-         install(TARGETS ${lib}
-           COMPONENT ${lib}
-- 
-2.30.2
-
--- a/SOURCES/0201-third-party-Add-install-targets-for-gtest.patch
+++ b/SOURCES/0201-third-party-Add-install-targets-for-gtest.patch
@ -0,0 +1,47 @@
+From 8cc3870f09d728d9017c72eba9520117a4283fee Mon Sep 17 00:00:00 2001
+From: Tom Stellard <tstellar@redhat.com>
+Date: Thu, 17 Nov 2022 09:01:10 +0000
+Subject: Add install targets for gtest
+
+Stand-alone builds need an installed version of gtest in order to run
+the unittests.
+
+Differential Revision: https://reviews.llvm.org/D137890
+---
+ third-party/unittest/CMakeLists.txt | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+diff --git a/third-party/unittest/CMakeLists.txt b/third-party/unittest/CMakeLists.txt
+index 0e54e0e57c35..1d2a52730d7d 100644
+--- a/third-party/unittest/CMakeLists.txt
+++ b/third-party/unittest/CMakeLists.txt
+@@ -65,12 +65,25 @@ if (NOT LLVM_ENABLE_THREADS)
+ endif ()
+ 
+ target_include_directories(llvm_gtest
+-  PUBLIC googletest/include googlemock/include
+  PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/googletest/include>
+         $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/googlemock/include>
+         $<INSTALL_INTERFACE:include/llvm-gtest/>
+         $<INSTALL_INTERFACE:include/llvm-gmock/>
+   PRIVATE googletest googlemock
+   )
+ 
+ add_subdirectory(UnitTestMain)
+ 
+if (LLVM_INSTALL_GTEST)
+export(TARGETS llvm_gtest llvm_gtest_main LLVMTestingSupport FILE LLVMGTestConfig.cmake)
+install(TARGETS llvm_gtest llvm_gtest_main LLVMTestingSupport EXPORT LLVMGTestConfig
+  ARCHIVE DESTINATION "lib${LLVM_LIBDIR_SUFFIX}" COMPONENT llvm_gtest)
+  install(EXPORT LLVMGTestConfig DESTINATION ${LLVM_INSTALL_PACKAGE_DIR} COMPONENT llvm_gtest)
+    add_llvm_install_targets(install-llvm_gtest COMPONENT llvm_gtest DEPENDS llvm_gtest LLVMGTestConfig.cmake)
+  install(DIRECTORY googletest/include/gtest/ DESTINATION include/llvm-gtest/gtest/ COMPONENT llvm_gtest)
+  install(DIRECTORY googlemock/include/gmock/ DESTINATION include/llvm-gmock/gmock/ COMPONENT llvm_gtest)
+endif()
+
+ # When LLVM_LINK_LLVM_DYLIB is enabled, libLLVM.so is added to the interface
+ # link libraries for gtest and gtest_main.  This means that any target, like
+ # unittests for example, that links against gtest will be forced to link
+-- 
+2.34.3
+
--- a/SOURCES/18-99273.patch
+++ b/SOURCES/18-99273.patch
@ -1,893 +0,0 @@
-From 91052169960477fbc39169c10f9fae3bec732510 Mon Sep 17 00:00:00 2001
-From: Carl Ritson <carl.ritson@amd.com>
-Date: Wed, 17 Jul 2024 15:07:42 +0900
-Subject: [PATCH 1/3] [AMDGPU] Implement workaround for GFX11.5 export priority
-
-On GFX11.5 shaders having completed exports need to execute/wait
-at a lower priority than shaders still executing exports.
-Add code to maintain normal priority of 2 for shaders that export
-and drop to priority 0 after exports.
---
- llvm/lib/Target/AMDGPU/AMDGPU.td              |  15 +-
- .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 112 ++++++
- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h  |   1 +
- llvm/lib/Target/AMDGPU/GCNSubtarget.h         |   3 +
- .../AMDGPU/required-export-priority.ll        | 344 ++++++++++++++++++
- .../AMDGPU/required-export-priority.mir       | 293 +++++++++++++++
- 6 files changed, 765 insertions(+), 3 deletions(-)
- create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.ll
- create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.mir
-
-diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
-index dfc8eaea66f7b..14fcf6a210a78 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
-+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
-@@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
-   "Has restricted SOffset (immediate not supported)."
- >;
- 
-+def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
-+  "HasRequiredExportPriority",
-+  "true",
-+  "Export priority must be explicitly manipulated on GFX11.5"
-+>;
-+
- //===------------------------------------------------------------===//
- // Subtarget Features (options and debugging)
- //===------------------------------------------------------------===//
-@@ -1597,14 +1603,16 @@ def FeatureISAVersion11_5_0 : FeatureSet<
-   !listconcat(FeatureISAVersion11_Common.Features,
-     [FeatureSALUFloatInsts,
-      FeatureDPPSrc1SGPR,
-     FeatureVGPRSingleUseHintInsts])>;
-+     FeatureVGPRSingleUseHintInsts,
-+     FeatureRequiredExportPriority])>;
- 
- def FeatureISAVersion11_5_1 : FeatureSet<
-   !listconcat(FeatureISAVersion11_Common.Features,
-     [FeatureSALUFloatInsts,
-      FeatureDPPSrc1SGPR,
-      FeatureVGPRSingleUseHintInsts,
-     FeatureGFX11FullVGPRs])>;
-+     FeatureGFX11FullVGPRs,
-+     FeatureRequiredExportPriority])>;
- 
- def FeatureISAVersion12 : FeatureSet<
-   [FeatureGFX12,
-diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
-index a402fc6d7e611..a8b171aa82840 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
-+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
-@@ -14,6 +14,7 @@
- #include "GCNSubtarget.h"
- #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
- #include "SIMachineFunctionInfo.h"
-+#include "llvm/CodeGen/MachineFrameInfo.h"
- #include "llvm/CodeGen/MachineFunction.h"
- #include "llvm/CodeGen/ScheduleDAG.h"
- #include "llvm/TargetParser/TargetParser.h"
-@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
-   fixWMMAHazards(MI);
-   fixShift64HighRegBug(MI);
-   fixVALUMaskWriteHazard(MI);
-+  fixRequiredExportPriority(MI);
- }
- 
- bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
-@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
- 
-   return true;
- }
-+
-+static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
-+                               const SIInstrInfo &TII) {
-+  MachineBasicBlock &EntryMBB = MF->front();
-+  if (EntryMBB.begin() != EntryMBB.end()) {
-+    auto &EntryMI = *EntryMBB.begin();
-+    if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
-+        EntryMI.getOperand(0).getImm() >= Priority)
-+      return false;
-+  }
-+
-+  BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
-+      .addImm(Priority);
-+  return true;
-+}
-+
-+bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
-+  if (!ST.hasRequiredExportPriority())
-+    return false;
-+
-+  // Assume the following shader types will never have exports,
-+  // and avoid adding or adjusting S_SETPRIO.
-+  MachineBasicBlock *MBB = MI->getParent();
-+  MachineFunction *MF = MBB->getParent();
-+  auto CC = MF->getFunction().getCallingConv();
-+  switch (CC) {
-+  case CallingConv::AMDGPU_CS:
-+  case CallingConv::AMDGPU_CS_Chain:
-+  case CallingConv::AMDGPU_CS_ChainPreserve:
-+  case CallingConv::AMDGPU_KERNEL:
-+    return false;
-+  default:
-+    break;
-+  }
-+
-+  const int MaxPriority = 3;
-+  const int NormalPriority = 2;
-+  const int PostExportPriority = 0;
-+
-+  auto It = MI->getIterator();
-+  switch (MI->getOpcode()) {
-+  case AMDGPU::S_ENDPGM:
-+  case AMDGPU::S_ENDPGM_SAVED:
-+  case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
-+  case AMDGPU::SI_RETURN_TO_EPILOG:
-+    // Ensure shader with calls raises priority at entry.
-+    // This ensures correct priority if exports exist in callee.
-+    if (MF->getFrameInfo().hasCalls())
-+      return ensureEntrySetPrio(MF, NormalPriority, TII);
-+    return false;
-+  case AMDGPU::S_SETPRIO: {
-+    // Raise minimum priority unless in workaround.
-+    auto &PrioOp = MI->getOperand(0);
-+    int Prio = PrioOp.getImm();
-+    bool InWA = (Prio == PostExportPriority) &&
-+                (It != MBB->begin() && TII.isEXP(*std::prev(It)));
-+    if (InWA || Prio >= NormalPriority)
-+      return false;
-+    PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
-+    return true;
-+  }
-+  default:
-+    if (!TII.isEXP(*MI))
-+      return false;
-+    break;
-+  }
-+
-+  // Check entry priority at each export (as there will only be a few).
-+  // Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
-+  bool Changed = false;
-+  if (CC != CallingConv::AMDGPU_Gfx)
-+    Changed = ensureEntrySetPrio(MF, NormalPriority, TII);
-+
-+  auto NextMI = std::next(It);
-+  bool EndOfShader = false;
-+  if (NextMI != MBB->end()) {
-+    // Only need WA at end of sequence of exports.
-+    if (TII.isEXP(*NextMI))
-+      return Changed;
-+    // Assume appropriate S_SETPRIO after export means WA already applied.
-+    if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
-+        NextMI->getOperand(0).getImm() == PostExportPriority)
-+      return Changed;
-+    EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
-+  }
-+
-+  const DebugLoc &DL = MI->getDebugLoc();
-+
-+  // Lower priority.
-+  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
-+      .addImm(PostExportPriority);
-+
-+  if (!EndOfShader) {
-+    // Wait for exports to complete.
-+    BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
-+        .addReg(AMDGPU::SGPR_NULL)
-+        .addImm(0);
-+  }
-+
-+  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
-+  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
-+
-+  if (!EndOfShader) {
-+    // Return to normal (higher) priority.
-+    BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
-+        .addImm(NormalPriority);
-+  }
-+
-+  return true;
-+}
-diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
-index 3ccca527c626b..f2a64ab48e180 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
-+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
-@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
-   bool fixWMMAHazards(MachineInstr *MI);
-   bool fixShift64HighRegBug(MachineInstr *MI);
-   bool fixVALUMaskWriteHazard(MachineInstr *MI);
-+  bool fixRequiredExportPriority(MachineInstr *MI);
- 
-   int checkMAIHazards(MachineInstr *MI);
-   int checkMAIHazards908(MachineInstr *MI);
-diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
-index e5817594a4521..def89c785b855 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
-+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
-@@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
-   bool HasVOPDInsts = false;
-   bool HasVALUTransUseHazard = false;
-   bool HasForceStoreSC0SC1 = false;
-+  bool HasRequiredExportPriority = false;
- 
-   // Dummy feature to use for assembler in tablegen.
-   bool FeatureDisable = false;
-@@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
- 
-   bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
- 
-+  bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
-+
-   /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
-   /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
-   bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
-diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
-new file mode 100644
-index 0000000000000..377902f3f0d1a
--- /dev/null
-+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
-@@ -0,0 +1,344 @@
-+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-+
-+define amdgpu_ps void @test_export_zeroes_f32() #0 {
-+; GCN-LABEL: test_export_zeroes_f32:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    v_mov_b32_e32 v0, 0
-+; GCN-NEXT:    exp mrt0 off, off, off, off
-+; GCN-NEXT:    exp mrt0 off, off, off, off done
-+; GCN-NEXT:    s_setprio 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_endpgm
-+  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
-+  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
-+  ret void
-+}
-+
-+define amdgpu_ps void @test_export_en_src0_f32() #0 {
-+; GCN-LABEL: test_export_en_src0_f32:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
-+; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
-+; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
-+; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
-+; GCN-NEXT:    exp mrt0 v3, off, off, off done
-+; GCN-NEXT:    s_setprio 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_endpgm
-+  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-+  ret void
-+}
-+
-+define amdgpu_gs void @test_export_gs() #0 {
-+; GCN-LABEL: test_export_gs:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
-+; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
-+; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
-+; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
-+; GCN-NEXT:    exp mrt0 off, v2, off, off done
-+; GCN-NEXT:    s_setprio 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_endpgm
-+  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-+  ret void
-+}
-+
-+define amdgpu_hs void @test_export_hs() #0 {
-+; GCN-LABEL: test_export_hs:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
-+; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
-+; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
-+; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
-+; GCN-NEXT:    exp mrt0 off, v2, off, off done
-+; GCN-NEXT:    s_setprio 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_endpgm
-+  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-+  ret void
-+}
-+
-+define amdgpu_gfx void @test_export_gfx(float %v) #0 {
-+; GCN-LABEL: test_export_gfx:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-+; GCN-NEXT:    v_mov_b32_e32 v1, 4.0
-+; GCN-NEXT:    v_mov_b32_e32 v2, 0.5
-+; GCN-NEXT:    v_mov_b32_e32 v3, 2.0
-+; GCN-NEXT:    exp mrt0 off, v3, off, off done
-+; GCN-NEXT:    s_setprio 0
-+; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    s_waitcnt expcnt(0)
-+; GCN-NEXT:    s_setpc_b64 s[30:31]
-+  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-+  ret void
-+}
-+
-+define amdgpu_cs void @test_export_cs() #0 {
-+; GCN-LABEL: test_export_cs:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
-+; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
-+; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
-+; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
-+; GCN-NEXT:    exp mrt0 off, v2, off, off done
-+; GCN-NEXT:    s_endpgm
-+  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-+  ret void
-+}
-+
-+define amdgpu_kernel void @test_export_kernel() #0 {
-+; GCN-LABEL: test_export_kernel:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
-+; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
-+; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
-+; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
-+; GCN-NEXT:    exp mrt0 off, v2, off, off done
-+; GCN-NEXT:    s_endpgm
-+  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-+  ret void
-+}
-+
-+define amdgpu_gfx void @test_no_export_gfx(float %v) #0 {
-+; GCN-LABEL: test_no_export_gfx:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-+; GCN-NEXT:    s_setpc_b64 s[30:31]
-+  ret void
-+}
-+
-+define amdgpu_ps void @test_no_export_ps(float %v) #0 {
-+; GCN-LABEL: test_no_export_ps:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_endpgm
-+  ret void
-+}
-+
-+define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
-+; GCN-LABEL: test_if_export_f32:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    s_mov_b32 s0, exec_lo
-+; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
-+; GCN-NEXT:    s_cbranch_execz .LBB9_2
-+; GCN-NEXT:  ; %bb.1: ; %exp
-+; GCN-NEXT:    exp mrt0 v1, v2, v3, v4
-+; GCN-NEXT:    s_setprio 0
-+; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:  .LBB9_2: ; %end
-+; GCN-NEXT:    s_endpgm
-+  %cc = icmp eq i32 %flag, 0
-+  br i1 %cc, label %end, label %exp
-+
-+exp:
-+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
-+  br label %end
-+
-+end:
-+  ret void
-+}
-+
-+define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
-+; GCN-LABEL: test_if_export_vm_f32:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    s_mov_b32 s0, exec_lo
-+; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
-+; GCN-NEXT:    s_cbranch_execz .LBB10_2
-+; GCN-NEXT:  ; %bb.1: ; %exp
-+; GCN-NEXT:    exp mrt0 v1, v2, v3, v4
-+; GCN-NEXT:    s_setprio 0
-+; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:  .LBB10_2: ; %end
-+; GCN-NEXT:    s_endpgm
-+  %cc = icmp eq i32 %flag, 0
-+  br i1 %cc, label %end, label %exp
-+
-+exp:
-+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
-+  br label %end
-+
-+end:
-+  ret void
-+}
-+
-+define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
-+; GCN-LABEL: test_if_export_done_f32:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    s_mov_b32 s0, exec_lo
-+; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
-+; GCN-NEXT:    s_cbranch_execz .LBB11_2
-+; GCN-NEXT:  ; %bb.1: ; %exp
-+; GCN-NEXT:    exp mrt0 v1, v2, v3, v4 done
-+; GCN-NEXT:    s_setprio 0
-+; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:  .LBB11_2: ; %end
-+; GCN-NEXT:    s_endpgm
-+  %cc = icmp eq i32 %flag, 0
-+  br i1 %cc, label %end, label %exp
-+
-+exp:
-+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
-+  br label %end
-+
-+end:
-+  ret void
-+}
-+
-+define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
-+; GCN-LABEL: test_if_export_vm_done_f32:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    s_mov_b32 s0, exec_lo
-+; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
-+; GCN-NEXT:    s_cbranch_execz .LBB12_2
-+; GCN-NEXT:  ; %bb.1: ; %exp
-+; GCN-NEXT:    exp mrt0 v1, v2, v3, v4 done
-+; GCN-NEXT:    s_setprio 0
-+; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:  .LBB12_2: ; %end
-+; GCN-NEXT:    s_endpgm
-+  %cc = icmp eq i32 %flag, 0
-+  br i1 %cc, label %end, label %exp
-+
-+exp:
-+  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
-+  br label %end
-+
-+end:
-+  ret void
-+}
-+
-+define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
-+; GCN-LABEL: test_export_pos_before_param_across_load:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen
-+; GCN-NEXT:    v_mov_b32_e32 v1, 0
-+; GCN-NEXT:    v_mov_b32_e32 v2, 1.0
-+; GCN-NEXT:    v_mov_b32_e32 v3, 0.5
-+; GCN-NEXT:    s_waitcnt vmcnt(0)
-+; GCN-NEXT:    exp pos0 v1, v1, v1, v0 done
-+; GCN-NEXT:    exp invalid_target_32 v2, v2, v2, v2
-+; GCN-NEXT:    exp invalid_target_33 v2, v2, v2, v3
-+; GCN-NEXT:    s_setprio 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_endpgm
-+  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
-+  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
-+  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
-+  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
-+  ret void
-+}
-+
-+define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
-+; GCN-LABEL: test_export_across_store_load:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    v_mov_b32_e32 v2, 24
-+; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
-+; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-+; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 8, vcc_lo
-+; GCN-NEXT:    v_mov_b32_e32 v2, 0
-+; GCN-NEXT:    scratch_store_b32 v0, v1, off
-+; GCN-NEXT:    scratch_load_b32 v0, off, off
-+; GCN-NEXT:    v_mov_b32_e32 v1, 1.0
-+; GCN-NEXT:    exp pos0 v2, v2, v2, v1 done
-+; GCN-NEXT:    s_setprio 0
-+; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    s_waitcnt vmcnt(0)
-+; GCN-NEXT:    exp invalid_target_32 v0, v2, v1, v2
-+; GCN-NEXT:    exp invalid_target_33 v0, v2, v1, v2
-+; GCN-NEXT:    s_setprio 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_nop 0
-+; GCN-NEXT:    s_endpgm
-+  %data0 = alloca <4 x float>, align 8, addrspace(5)
-+  %data1 = alloca <4 x float>, align 8, addrspace(5)
-+  %cmp = icmp eq i32 %idx, 1
-+  %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
-+  store float %v, ptr addrspace(5) %data, align 8
-+  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
-+  %load0 = load float, ptr addrspace(5) %data0, align 8
-+  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
-+  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
-+  ret void
-+}
-+
-+define amdgpu_ps void @test_export_in_callee(float %v) #0 {
-+; GCN-LABEL: test_export_in_callee:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    s_getpc_b64 s[0:1]
-+; GCN-NEXT:    s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
-+; GCN-NEXT:    s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
-+; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
-+; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
-+; GCN-NEXT:    s_mov_b32 s32, 0
-+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-+; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-+; GCN-NEXT:    s_endpgm
-+  %x = fadd float %v, 1.0
-+  call void @test_export_gfx(float %x)
-+  ret void
-+}
-+
-+define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
-+; GCN-LABEL: test_export_in_callee_prio:
-+; GCN:       ; %bb.0:
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    s_mov_b32 s32, 0
-+; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
-+; GCN-NEXT:    s_setprio 2
-+; GCN-NEXT:    s_getpc_b64 s[0:1]
-+; GCN-NEXT:    s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
-+; GCN-NEXT:    s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
-+; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
-+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-+; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
-+; GCN-NEXT:    s_endpgm
-+  %x = fadd float %v, 1.0
-+  call void @llvm.amdgcn.s.setprio(i16 0)
-+  call void @test_export_gfx(float %x)
-+  ret void
-+}
-+
-+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-+declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
-+declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
-+declare void @llvm.amdgcn.s.setprio(i16)
-+
-+attributes #0 = { nounwind }
-+attributes #1 = { nounwind inaccessiblememonly }
-+attributes #2 = { nounwind readnone }
-diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.mir b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
-new file mode 100644
-index 0000000000000..eee04468036e5
--- /dev/null
-+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
-@@ -0,0 +1,293 @@
-+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=post-RA-hazard-rec -verify-machineinstrs  %s -o - | FileCheck -check-prefixes=GFX1150 %s
-+
-+--- |
-+  define amdgpu_ps void @end_of_shader() {
-+    ret void
-+  }
-+  define amdgpu_ps void @end_of_shader_return_to_epilogue() {
-+    ret void
-+  }
-+  define amdgpu_ps void @end_of_block() {
-+    ret void
-+  }
-+  define amdgpu_ps void @start_of_block() {
-+    ret void
-+  }
-+  define amdgpu_ps void @block_of_exports() {
-+    ret void
-+  }
-+  define amdgpu_ps void @sparse_exports() {
-+    ret void
-+  }
-+  define amdgpu_ps void @existing_setprio_1() {
-+    ret void
-+  }
-+  define amdgpu_ps void @existing_setprio_2() {
-+    ret void
-+  }
-+...
-+
-+---
-+name: end_of_shader
-+tracksRegLiveness: true
-+liveins:
-+  - { reg: '$vgpr0' }
-+body: |
-+  bb.0:
-+    liveins: $vgpr0
-+    ; GFX1150-LABEL: name: end_of_shader
-+    ; GFX1150: liveins: $vgpr0
-+    ; GFX1150-NEXT: {{  $}}
-+    ; GFX1150-NEXT: S_SETPRIO 2
-+    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    ; GFX1150-NEXT: S_SETPRIO 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_ENDPGM 0
-+    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    S_ENDPGM 0
-+...
-+
-+---
-+name: end_of_shader_return_to_epilogue
-+tracksRegLiveness: true
-+liveins:
-+  - { reg: '$vgpr0' }
-+body: |
-+  bb.0:
-+    liveins: $vgpr0
-+    ; GFX1150-LABEL: name: end_of_shader_return_to_epilogue
-+    ; GFX1150: liveins: $vgpr0
-+    ; GFX1150-NEXT: {{  $}}
-+    ; GFX1150-NEXT: S_SETPRIO 2
-+    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    ; GFX1150-NEXT: S_SETPRIO 0
-+    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_SETPRIO 2
-+    ; GFX1150-NEXT: SI_RETURN_TO_EPILOG $vgpr0
-+    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    SI_RETURN_TO_EPILOG $vgpr0
-+...
-+
-+---
-+name: end_of_block
-+tracksRegLiveness: true
-+liveins:
-+  - { reg: '$vgpr0' }
-+body: |
-+  ; GFX1150-LABEL: name: end_of_block
-+  ; GFX1150: bb.0:
-+  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
-+  ; GFX1150-NEXT:   liveins: $vgpr0
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT:   S_SETPRIO 2
-+  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+  ; GFX1150-NEXT:   S_SETPRIO 0
-+  ; GFX1150-NEXT:   S_WAITCNT_EXPCNT $sgpr_null, 0
-+  ; GFX1150-NEXT:   S_NOP 0
-+  ; GFX1150-NEXT:   S_NOP 0
-+  ; GFX1150-NEXT:   S_SETPRIO 2
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT: bb.1:
-+  ; GFX1150-NEXT:   S_ENDPGM 0
-+  bb.0:
-+    liveins: $vgpr0
-+    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+
-+  bb.1:
-+    S_ENDPGM 0
-+...
-+
-+---
-+name: start_of_block
-+tracksRegLiveness: true
-+liveins:
-+  - { reg: '$vgpr0' }
-+body: |
-+  ; GFX1150-LABEL: name: start_of_block
-+  ; GFX1150: bb.0:
-+  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
-+  ; GFX1150-NEXT:   liveins: $vgpr0
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT:   S_SETPRIO 2
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT: bb.1:
-+  ; GFX1150-NEXT:   successors: %bb.2(0x80000000)
-+  ; GFX1150-NEXT:   liveins: $vgpr0
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+  ; GFX1150-NEXT:   S_SETPRIO 0
-+  ; GFX1150-NEXT:   S_WAITCNT_EXPCNT $sgpr_null, 0
-+  ; GFX1150-NEXT:   S_NOP 0
-+  ; GFX1150-NEXT:   S_NOP 0
-+  ; GFX1150-NEXT:   S_SETPRIO 2
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT: bb.2:
-+  ; GFX1150-NEXT:   S_ENDPGM 0
-+  bb.0:
-+    liveins: $vgpr0
-+
-+  bb.1:
-+    liveins: $vgpr0
-+    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+
-+  bb.2:
-+    S_ENDPGM 0
-+...
-+
-+---
-+name: block_of_exports
-+tracksRegLiveness: true
-+liveins:
-+  - { reg: '$vgpr0' }
-+body: |
-+  bb.0:
-+    liveins: $vgpr0
-+    ; GFX1150-LABEL: name: block_of_exports
-+    ; GFX1150: liveins: $vgpr0
-+    ; GFX1150-NEXT: {{  $}}
-+    ; GFX1150-NEXT: S_SETPRIO 2
-+    ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    ; GFX1150-NEXT: S_SETPRIO 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_ENDPGM 0
-+    EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    S_ENDPGM 0
-+...
-+
-+---
-+name: sparse_exports
-+tracksRegLiveness: true
-+liveins:
-+  - { reg: '$vgpr0' }
-+body: |
-+  bb.0:
-+    liveins: $vgpr0
-+    ; GFX1150-LABEL: name: sparse_exports
-+    ; GFX1150: liveins: $vgpr0
-+    ; GFX1150-NEXT: {{  $}}
-+    ; GFX1150-NEXT: S_SETPRIO 2
-+    ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    ; GFX1150-NEXT: S_SETPRIO 0
-+    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_SETPRIO 2
-+    ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
-+    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    ; GFX1150-NEXT: S_SETPRIO 0
-+    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_SETPRIO 2
-+    ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
-+    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    ; GFX1150-NEXT: S_SETPRIO 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_ENDPGM 0
-+    EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
-+    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
-+    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    S_ENDPGM 0
-+...
-+
-+---
-+name: existing_setprio_1
-+tracksRegLiveness: true
-+liveins:
-+  - { reg: '$vgpr0' }
-+body: |
-+  ; GFX1150-LABEL: name: existing_setprio_1
-+  ; GFX1150: bb.0:
-+  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
-+  ; GFX1150-NEXT:   liveins: $vgpr0
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT:   S_SETPRIO 2
-+  ; GFX1150-NEXT:   $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT: bb.1:
-+  ; GFX1150-NEXT:   successors: %bb.2(0x80000000)
-+  ; GFX1150-NEXT:   liveins: $vgpr0
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT:   S_SETPRIO 3
-+  ; GFX1150-NEXT:   $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
-+  ; GFX1150-NEXT:   S_SETPRIO 2
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT: bb.2:
-+  ; GFX1150-NEXT:   successors: %bb.3(0x80000000)
-+  ; GFX1150-NEXT:   liveins: $vgpr0
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT:   S_SETPRIO 3
-+  ; GFX1150-NEXT:   $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
-+  ; GFX1150-NEXT:   S_SETPRIO 2
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT: bb.3:
-+  ; GFX1150-NEXT:   liveins: $vgpr0
-+  ; GFX1150-NEXT: {{  $}}
-+  ; GFX1150-NEXT:   EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+  ; GFX1150-NEXT:   S_SETPRIO 0
-+  ; GFX1150-NEXT:   S_NOP 0
-+  ; GFX1150-NEXT:   S_NOP 0
-+  ; GFX1150-NEXT:   S_ENDPGM 0
-+  bb.0:
-+    liveins: $vgpr0
-+    $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
-+
-+  bb.1:
-+    liveins: $vgpr0
-+    S_SETPRIO 3
-+    $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
-+    S_SETPRIO 0
-+
-+  bb.2:
-+    liveins: $vgpr0
-+    S_SETPRIO 1
-+    $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
-+    S_SETPRIO 0
-+
-+  bb.3:
-+    liveins: $vgpr0
-+    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    S_ENDPGM 0
-+...
-+
-+---
-+name: existing_setprio_2
-+tracksRegLiveness: true
-+liveins:
-+  - { reg: '$vgpr0' }
-+body: |
-+  bb.0:
-+    liveins: $vgpr0
-+    ; GFX1150-LABEL: name: existing_setprio_2
-+    ; GFX1150: liveins: $vgpr0
-+    ; GFX1150-NEXT: {{  $}}
-+    ; GFX1150-NEXT: S_SETPRIO 3
-+    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    ; GFX1150-NEXT: S_SETPRIO 0
-+    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_NOP 0
-+    ; GFX1150-NEXT: S_SETPRIO 2
-+    ; GFX1150-NEXT: S_SETPRIO 3
-+    ; GFX1150-NEXT: S_ENDPGM 0
-+    S_SETPRIO 3
-+    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-+    S_SETPRIO 3
-+    S_ENDPGM 0
-+...
-
-From 8ea44e65f2c19facff751aeb2ac960f907fb210f Mon Sep 17 00:00:00 2001
-From: Carl Ritson <carl.ritson@amd.com>
-Date: Wed, 17 Jul 2024 16:18:02 +0900
-Subject: [PATCH 2/3] Remove -verify-machineinstrs from test.
-
---
- llvm/test/CodeGen/AMDGPU/required-export-priority.ll | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
-index 377902f3f0d1a..ebc209bd4d451 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
-+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
-@@ -1,5 +1,5 @@
- ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s
- 
- define amdgpu_ps void @test_export_zeroes_f32() #0 {
- ; GCN-LABEL: test_export_zeroes_f32:
--- a/SOURCES/D156379.diff
+++ b/SOURCES/D156379.diff
@ -0,0 +1,46 @@
+diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+@@ -1152,6 +1152,11 @@
+     }
+   }
+ 
+  // Type legalization (via getNumberOfParts) can't handle structs
+  if (TLI->getValueType(DL, Src, true) == MVT::Other)
+    return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+                                  CostKind);
+
+   unsigned NumOps =
+     (Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
+ 
+diff --git a/llvm/test/Analysis/CostModel/SystemZ/struct-cost-crash.ll b/llvm/test/Analysis/CostModel/SystemZ/struct-cost-crash.ll
+new file mode 100644
+--- /dev/null
+++ b/llvm/test/Analysis/CostModel/SystemZ/struct-cost-crash.ll
+@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output < %s | FileCheck %s
+;
+; Check that SystemZTTIImpl::getMemoryOpCost doesn't try to legalize structs,
+; which was failing llvm_unreachable in MVT::getVT.
+
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+target triple = "s390x-unknown-linux-gnu"
+
+declare { i64, i32 } @bar()
+
+define i8 @foo() {
+; CHECK-LABEL: 'foo'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %1
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call { i64, i32 } @bar()
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store { i64, i32 } %2, ptr inttoptr (i64 16 to ptr), align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: br label %1
+;
+  br label %1
+
+1:                                                ; preds = %1, %0
+  %2 = call { i64, i32 } @bar()
+  store { i64, i32 } %2, ptr inttoptr (i64 16 to ptr), align 16
+  br label %1
+}
+
--- a/SOURCES/macros.clang
+++ b/SOURCES/macros.clang
@ -1,11 +0,0 @@
-%clang_major_version @@CLANG_MAJOR_VERSION@@
-%clang_minor_version @@CLANG_MINOR_VERSION@@
-%clang_patch_version @@CLANG_PATCH_VERSION@@
-
-%clang_version %{clang_major_version}.%{clang_minor_version}.%{clang_patch_version}
-
-# This is the path to the clang resource directory that has clang's internal
-# headers and libraries.  This path should be used by packages that need to
-# install files into this directory.  This macro's value changes every time
-# clang's version changes.
-%clang_resource_dir %{_prefix}/lib/clang/%{clang_major_version}
--- a/SPECS/llvm.spec
+++ b/SPECS/llvm.spec