Update to LLVM 19.1.1

This is an import from Fedora rawhide with the following changes: * Replace with RHEL changelog * Enable bundle_compat_lib Resolves: RHEL-57456
2024-10-02 16:47:26 +02:00 · 2024-10-02 16:47:26 +02:00 · 13ecd6f381
commit 13ecd6f381
parent f9583268af
14 changed files with 2193 additions and 1211 deletions
--- a/0001-19-Remove-myst_parser-dependency-for-RHEL.patch
+++ b/0001-19-Remove-myst_parser-dependency-for-RHEL.patch
@ -0,0 +1,41 @@
 From d8742e9b361e5fd6fee2298b8ea0aeb4671ec05a Mon Sep 17 00:00:00 2001
 From: Nikita Popov <npopov@redhat.com>
 Date: Wed, 22 May 2024 09:39:26 +0200
 Subject: [PATCH] Remove myst_parser dependency for RHEL
 ---
 clang/docs/conf.py | 3 +--
 llvm/docs/conf.py  | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)
 diff --git a/clang/docs/conf.py b/clang/docs/conf.py
 index 4cee382a718f..d2e2198e05d4 100644
 --- a/clang/docs/conf.py
 +++ b/clang/docs/conf.py
@@ -43,8 +43,7 @@ try:
     extensions.append("myst_parser")
 except ImportError:
 -    if not tags.has("builder-man"):
 -        raise
 +    pass
 # The encoding of source files.
 diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py
 index 7f2ed5309606..354a41f11280 100644
 --- a/llvm/docs/conf.py
 +++ b/llvm/docs/conf.py
@@ -36,8 +36,7 @@ try:
     extensions.append("myst_parser")
 except ImportError:
 -    if not tags.has("builder-man"):
 -        raise
 +    pass
 # Automatic anchors for markdown titles
 from llvm_slug import make_slug
 -- 
 2.44.0
--- a/0001-20-Remove-myst_parser-dependency-for-RHEL.patch
+++ b/0001-20-Remove-myst_parser-dependency-for-RHEL.patch
@ -0,0 +1,43 @@
 From 50cd36c2156d375a6d50f661908b460fbbd22e78 Mon Sep 17 00:00:00 2001
 From: Nikita Popov <npopov@redhat.com>
 Date: Wed, 22 May 2024 09:39:26 +0200
 Subject: [PATCH] Remove myst_parser dependency for RHEL
 ---
 clang/docs/conf.py | 3 +--
 llvm/docs/conf.py  | 5 +----
 2 files changed, 2 insertions(+), 6 deletions(-)
 diff --git a/clang/docs/conf.py b/clang/docs/conf.py
 index 4cee382a718f..d2e2198e05d4 100644
 --- a/clang/docs/conf.py
 +++ b/clang/docs/conf.py
@@ -43,8 +43,7 @@ try:
     extensions.append("myst_parser")
 except ImportError:
 -    if not tags.has("builder-man"):
 -        raise
 +    pass
 # The encoding of source files.
 diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py
 index d9fa6961032b..e38c009a457d 100644
 --- a/llvm/docs/conf.py
 +++ b/llvm/docs/conf.py
@@ -36,10 +36,7 @@ try:
     extensions.append("myst_parser")
 except ImportError:
 -    if not tags.has("builder-man"):
 -        raise
 -else:
 -    myst_enable_extensions = ["substitution"]
 +    pass
 # Automatic anchors for markdown titles
 myst_heading_anchors = 6
 -- 
 2.46.0
--- a/0001-Always-build-shared-libs-for-LLD.patch
+++ b/0001-Always-build-shared-libs-for-LLD.patch
@ -0,0 +1,30 @@
 From 69faadbc396000bfa60c722f6fb9c0fc3fb2daf0 Mon Sep 17 00:00:00 2001
 From: Nikita Popov <npopov@redhat.com>
 Date: Wed, 8 May 2024 12:30:36 +0900
 Subject: [PATCH] Always build shared libs for LLD
 We don't want to enable BUILD_SHARED_LIBS for the whole build,
 but we do want to build lld libraries.
 ---
 lld/cmake/modules/AddLLD.cmake | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)
 diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake
 index 9f2684b6f933..743ec87814a2 100644
 --- a/lld/cmake/modules/AddLLD.cmake
 +++ b/lld/cmake/modules/AddLLD.cmake
@@ -7,9 +7,8 @@ macro(add_lld_library name)
     ""
     ""
     ${ARGN})
 -  if(ARG_SHARED)
 -    set(ARG_ENABLE_SHARED SHARED)
 -  endif()
 +  # Always build shared libs for LLD.
 +  set(ARG_ENABLE_SHARED SHARED)
   llvm_add_library(${name} ${ARG_ENABLE_SHARED} ${ARG_UNPARSED_ARGUMENTS})
   if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
 -- 
 2.45.1
--- a/0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch
+++ b/0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch
@ -0,0 +1,25 @@
 From 5f73befe5a0df82e455f4b1052e62f34009e98bb Mon Sep 17 00:00:00 2001
 From: Tom Stellard <tstellar@redhat.com>
 Date: Tue, 23 Apr 2024 15:08:34 -0700
 Subject: [PATCH] Fix page size constant on aarch64 and ppc64le
 ---
 compiler-rt/lib/cfi/cfi.cpp | 2 ++
 1 file changed, 2 insertions(+)
 diff --git a/compiler-rt/lib/cfi/cfi.cpp b/compiler-rt/lib/cfi/cfi.cpp
 index ad1c91623514..e7e86e5807a8 100644
 --- a/compiler-rt/lib/cfi/cfi.cpp
 +++ b/compiler-rt/lib/cfi/cfi.cpp
@@ -53,6 +53,8 @@ namespace __cfi {
 #if SANITIZER_LOONGARCH64
 #define kCfiShadowLimitsStorageSize 16384 // 16KiB on loongarch64 per page
 +#elif defined(__aarch64__) || defined(__powerpc64__)
 +#define kCfiShadowLimitsStorageSize 65536 // 1 page
 #else
 #define kCfiShadowLimitsStorageSize 4096 // 1 page
 #endif
 -- 
 2.40.1
--- a/0001-Fix-python3-clang.patch
+++ b/0001-Fix-python3-clang.patch
@ -0,0 +1,45 @@
 From 1c8a88c870a00eea6c80109cc682e0276ff7888d Mon Sep 17 00:00:00 2001
 From: Nikita Popov <npopov@redhat.com>
 Date: Mon, 8 Jul 2024 12:32:57 +0200
 Subject: [PATCH] Fix python3-clang
 Drop confusing `cd ..` so we stay in the root of the LLVM sources
 and the install command succeeds.
 ---
 install.spec.inc | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)
 diff --git a/install.spec.inc b/install.spec.inc
 index 0fc1424..5f4a43b 100644
 --- a/install.spec.inc
 +++ b/install.spec.inc
@@ -98,8 +98,6 @@ touch %{buildroot}%{_bindir}/llvm-config%{exec_suffix}
 mkdir -p %{buildroot}%{pkg_datadir}/llvm/cmake
 cp -Rv cmake/* %{buildroot}%{pkg_datadir}/llvm/cmake
 -cd ..
 -
 #endregion
 #region CLANG installation
@@ -134,7 +132,7 @@ mkdir -p %{buildroot}%{python3_sitelib}/clang/
 # install: omitting directory 'bindings/python/clang/__pycache__'
 # NOTE: this only happens if we include the gdb plugin of libomp.
 # Remove the plugin with command and we're good: rm -rf %{buildroot}/%{_datarootdir}/gdb
 -install -p -m644 clang/bindings/python/clang/* %{buildroot}%{python3_sitelib}/clang/ || true
 +install -p -m644 clang/bindings/python/clang/* %{buildroot}%{python3_sitelib}/clang/
 %py_byte_compile %{__python3} %{buildroot}%{python3_sitelib}/clang
 # install scanbuild-py to python sitelib.
@@ -268,7 +266,7 @@ done
 # https://docs.fedoraproject.org/en-US/packaging-guidelines/Alternatives/
 touch %{buildroot}%{_bindir}/ld
 -install -D -m 644 -t  %{buildroot}%{_mandir}/man1/ %{src_tarball_dir}/lld/docs/ld.lld.1
 +install -D -m 644 -t  %{buildroot}%{_mandir}/man1/ lld/docs/ld.lld.1
 %post -n %{pkg_name_lld}
 %{_sbindir}/update-alternatives --install %{_bindir}/ld ld %{_bindir}/ld.lld 1
 -- 
 2.45.2
--- a/0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch
+++ b/0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch
@ -0,0 +1,27 @@
 From 49f827b09db549de62dcaf8b90b3fcb3e08c0ee5 Mon Sep 17 00:00:00 2001
 From: Serge Guelton <sguelton@redhat.com>
 Date: Mon, 6 Mar 2023 12:37:48 +0100
 Subject: [PATCH] Make -funwind-tables the default on all archs
 ---
 clang/lib/Driver/ToolChains/Gnu.cpp | 4 ++++
 1 file changed, 4 insertions(+)
 diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
 index 24fbdcffc07b..8fed46b49515 100644
 --- a/clang/lib/Driver/ToolChains/Gnu.cpp
 +++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -2904,6 +2904,10 @@ Generic_GCC::getDefaultUnwindTableLevel(const ArgList &Args) const {
   case llvm::Triple::riscv64:
   case llvm::Triple::x86:
   case llvm::Triple::x86_64:
 +  // Enable -funwind-tables on all architectures supported by Fedora:
 +  // rhbz#1655546
 +  case llvm::Triple::systemz:
 +  case llvm::Triple::arm:
     return UnwindTableLevel::Asynchronous;
   default:
     return UnwindTableLevel::None;
 -- 
 2.39.1
--- a/0001-PEI-Don-t-zero-out-noreg-operands.patch
+++ b/0001-PEI-Don-t-zero-out-noreg-operands.patch
@ -1,74 +0,0 @@
 From 9d1f05a7b8537deb5f626cd1b7b26ef2678f4c8e Mon Sep 17 00:00:00 2001
 From: Arthur Eubanks <aeubanks@google.com>
 Date: Thu, 27 Jul 2023 13:27:58 -0700
 Subject: [PATCH] [PEI] Don't zero out noreg operands
 A tail call may have $noreg operands.
 Fixes a crash.
 Reviewed By: xgupta
 Differential Revision: https://reviews.llvm.org/D156485
 (cherry picked from commit f800c1f3b207e7bcdc8b4c7192928d9a078242a0)
 ---
 llvm/lib/CodeGen/PrologEpilogInserter.cpp    |  9 +++++++--
 llvm/test/CodeGen/X86/zero-call-used-regs.ll | 14 ++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)
 diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
 index e323aaaeefaf..49047719fdaa 100644
 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
 +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -1285,6 +1285,8 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
           continue;
         MCRegister Reg = MO.getReg();
 +        if (!Reg)
 +          continue;
         // This picks up sibling registers (e.q. %al -> %ah).
         for (MCRegUnit Unit : TRI.regunits(Reg))
@@ -1308,8 +1310,11 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
         if (!MO.isReg())
           continue;
 -        for (const MCPhysReg &Reg :
 -             TRI.sub_and_superregs_inclusive(MO.getReg()))
 +        MCRegister Reg = MO.getReg();
 +        if (!Reg)
 +          continue;
 +
 +        for (const MCPhysReg Reg : TRI.sub_and_superregs_inclusive(Reg))
           RegsToZero.reset(Reg);
       }
     }
 diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs.ll b/llvm/test/CodeGen/X86/zero-call-used-regs.ll
 index 63d51c916bb9..97ad5ce9c8cb 100644
 --- a/llvm/test/CodeGen/X86/zero-call-used-regs.ll
 +++ b/llvm/test/CodeGen/X86/zero-call-used-regs.ll
@@ -241,6 +241,20 @@ entry:
   ret i32 %x
 }
 +define dso_local void @tailcall(ptr %p) local_unnamed_addr #0 "zero-call-used-regs"="used-gpr" {
 +; I386-LABEL: tailcall:
 +; I386:       # %bb.0:
 +; I386-NEXT:    movl {{[0-9]+}}(%esp), %eax
 +; I386-NEXT:    jmpl *(%eax) # TAILCALL
 +;
 +; X86-64-LABEL: tailcall:
 +; X86-64:       # %bb.0:
 +; X86-64-NEXT:    jmpq *(%rdi) # TAILCALL
 +  %c = load ptr, ptr %p
 +  tail call void %c()
 +  ret void
 +}
 +
 ; Don't emit zeroing registers in "main" function.
 define dso_local i32 @main() local_unnamed_addr #1 {
 ; I386-LABEL: main:
 -- 
 2.43.0
--- a/0001-Workaround-a-bug-in-ORC-on-ppc64le.patch
+++ b/0001-Workaround-a-bug-in-ORC-on-ppc64le.patch
@ -0,0 +1,30 @@
 From a2449cee8c995b56f1892502aab3dfad3d6f3ca1 Mon Sep 17 00:00:00 2001
 From: Tulio Magno Quites Machado Filho <tuliom@redhat.com>
 Date: Fri, 8 Sep 2023 11:45:34 -0300
 Subject: [PATCH] Workaround a bug in ORC on ppc64le
 The Jit code appears to be returning the wrong printf symbol on ppc64le
 after the transition of the default long double to IEEE 128-bit floating
 point.
 ---
 clang/unittests/Interpreter/InterpreterTest.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
 diff --git a/clang/unittests/Interpreter/InterpreterTest.cpp b/clang/unittests/Interpreter/InterpreterTest.cpp
 index abb8e6377aab..7b6697ebc6ed 100644
 --- a/clang/unittests/Interpreter/InterpreterTest.cpp
 +++ b/clang/unittests/Interpreter/InterpreterTest.cpp
@@ -243,7 +243,9 @@ TEST(IncrementalProcessing, FindMangledNameSymbol) {
   EXPECT_FALSE(!Addr);
   // FIXME: Re-enable when we investigate the way we handle dllimports on Win.
 -#ifndef _WIN32
 +  // FIXME: The printf symbol returned from the Jit may not be correct on
 +  //        ppc64le when the default long double is IEEE 128-bit fp.
 +#if !defined _WIN32 && !(defined __PPC64__ && defined __LITTLE_ENDIAN__)
   EXPECT_EQ((uintptr_t)&printf, Addr->getValue());
 #endif // _WIN32
 }
 -- 
 2.41.0
--- a/0003-PATCH-clang-Don-t-install-static-libraries.patch
+++ b/0003-PATCH-clang-Don-t-install-static-libraries.patch
@ -0,0 +1,25 @@
 From 88704fc2eabb9dd19a9c3eb81a9b3dc37d95651c Mon Sep 17 00:00:00 2001
 From: Tom Stellard <tstellar@redhat.com>
 Date: Fri, 31 Jan 2020 11:04:57 -0800
 Subject: [PATCH][clang] Don't install static libraries
 ---
 clang/cmake/modules/AddClang.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/clang/cmake/modules/AddClang.cmake b/clang/cmake/modules/AddClang.cmake
 index 5752f4277444..0f52822d91f0 100644
 --- a/clang/cmake/modules/AddClang.cmake
 +++ b/clang/cmake/modules/AddClang.cmake
@@ -113,7 +113,7 @@ macro(add_clang_library name)
     if(TARGET ${lib})
       target_link_libraries(${lib} INTERFACE ${LLVM_COMMON_LIBS})
 -      if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ARG_INSTALL_WITH_TOOLCHAIN)
 +      if (ARG_SHARED AND (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ARG_INSTALL_WITH_TOOLCHAIN))
         get_target_export_arg(${name} Clang export_to_clangtargets UMBRELLA clang-libraries)
         install(TARGETS ${lib}
           COMPONENT ${lib}
 -- 
 2.30.2
--- a/0101-Deactivate-markdown-doc.patch
+++ b/0101-Deactivate-markdown-doc.patch
@ -1,13 +0,0 @@
 diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py
 index cf8a75980b53..b208ad138e89 100644
 --- a/llvm/docs/conf.py
 +++ b/llvm/docs/conf.py
@@ -26,7 +26,7 @@ from datetime import date
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 -extensions = ["myst_parser", "sphinx.ext.intersphinx", "sphinx.ext.todo"]
 +extensions = ["sphinx.ext.intersphinx", "sphinx.ext.todo"]
 # Automatic anchors for markdown titles
 from llvm_slug import make_slug
--- a/99273.patch
+++ b/99273.patch
@ -1,893 +0,0 @@
 From 91052169960477fbc39169c10f9fae3bec732510 Mon Sep 17 00:00:00 2001
 From: Carl Ritson <carl.ritson@amd.com>
 Date: Wed, 17 Jul 2024 15:07:42 +0900
 Subject: [PATCH 1/3] [AMDGPU] Implement workaround for GFX11.5 export priority
 On GFX11.5 shaders having completed exports need to execute/wait
 at a lower priority than shaders still executing exports.
 Add code to maintain normal priority of 2 for shaders that export
 and drop to priority 0 after exports.
 ---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |  15 +-
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 112 ++++++
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h  |   1 +
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |   3 +
 .../AMDGPU/required-export-priority.ll        | 344 ++++++++++++++++++
 .../AMDGPU/required-export-priority.mir       | 293 +++++++++++++++
 6 files changed, 765 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.mir
 diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
 index dfc8eaea66f7b..14fcf6a210a78 100644
 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td
 +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
   "Has restricted SOffset (immediate not supported)."
 >;
 +def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
 +  "HasRequiredExportPriority",
 +  "true",
 +  "Export priority must be explicitly manipulated on GFX11.5"
 +>;
 +
 //===------------------------------------------------------------===//
 // Subtarget Features (options and debugging)
 //===------------------------------------------------------------===//
@@ -1597,14 +1603,16 @@ def FeatureISAVersion11_5_0 : FeatureSet<
   !listconcat(FeatureISAVersion11_Common.Features,
     [FeatureSALUFloatInsts,
      FeatureDPPSrc1SGPR,
 -     FeatureVGPRSingleUseHintInsts])>;
 +     FeatureVGPRSingleUseHintInsts,
 +     FeatureRequiredExportPriority])>;
 def FeatureISAVersion11_5_1 : FeatureSet<
   !listconcat(FeatureISAVersion11_Common.Features,
     [FeatureSALUFloatInsts,
      FeatureDPPSrc1SGPR,
      FeatureVGPRSingleUseHintInsts,
 -     FeatureGFX11FullVGPRs])>;
 +     FeatureGFX11FullVGPRs,
 +     FeatureRequiredExportPriority])>;
 def FeatureISAVersion12 : FeatureSet<
   [FeatureGFX12,
 diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
 index a402fc6d7e611..a8b171aa82840 100644
 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
 +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -14,6 +14,7 @@
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIMachineFunctionInfo.h"
 +#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/TargetParser/TargetParser.h"
@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
   fixWMMAHazards(MI);
   fixShift64HighRegBug(MI);
   fixVALUMaskWriteHazard(MI);
 +  fixRequiredExportPriority(MI);
 }
 bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
   return true;
 }
 +
 +static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
 +                               const SIInstrInfo &TII) {
 +  MachineBasicBlock &EntryMBB = MF->front();
 +  if (EntryMBB.begin() != EntryMBB.end()) {
 +    auto &EntryMI = *EntryMBB.begin();
 +    if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
 +        EntryMI.getOperand(0).getImm() >= Priority)
 +      return false;
 +  }
 +
 +  BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
 +      .addImm(Priority);
 +  return true;
 +}
 +
 +bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
 +  if (!ST.hasRequiredExportPriority())
 +    return false;
 +
 +  // Assume the following shader types will never have exports,
 +  // and avoid adding or adjusting S_SETPRIO.
 +  MachineBasicBlock *MBB = MI->getParent();
 +  MachineFunction *MF = MBB->getParent();
 +  auto CC = MF->getFunction().getCallingConv();
 +  switch (CC) {
 +  case CallingConv::AMDGPU_CS:
 +  case CallingConv::AMDGPU_CS_Chain:
 +  case CallingConv::AMDGPU_CS_ChainPreserve:
 +  case CallingConv::AMDGPU_KERNEL:
 +    return false;
 +  default:
 +    break;
 +  }
 +
 +  const int MaxPriority = 3;
 +  const int NormalPriority = 2;
 +  const int PostExportPriority = 0;
 +
 +  auto It = MI->getIterator();
 +  switch (MI->getOpcode()) {
 +  case AMDGPU::S_ENDPGM:
 +  case AMDGPU::S_ENDPGM_SAVED:
 +  case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
 +  case AMDGPU::SI_RETURN_TO_EPILOG:
 +    // Ensure shader with calls raises priority at entry.
 +    // This ensures correct priority if exports exist in callee.
 +    if (MF->getFrameInfo().hasCalls())
 +      return ensureEntrySetPrio(MF, NormalPriority, TII);
 +    return false;
 +  case AMDGPU::S_SETPRIO: {
 +    // Raise minimum priority unless in workaround.
 +    auto &PrioOp = MI->getOperand(0);
 +    int Prio = PrioOp.getImm();
 +    bool InWA = (Prio == PostExportPriority) &&
 +                (It != MBB->begin() && TII.isEXP(*std::prev(It)));
 +    if (InWA || Prio >= NormalPriority)
 +      return false;
 +    PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
 +    return true;
 +  }
 +  default:
 +    if (!TII.isEXP(*MI))
 +      return false;
 +    break;
 +  }
 +
 +  // Check entry priority at each export (as there will only be a few).
 +  // Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
 +  bool Changed = false;
 +  if (CC != CallingConv::AMDGPU_Gfx)
 +    Changed = ensureEntrySetPrio(MF, NormalPriority, TII);
 +
 +  auto NextMI = std::next(It);
 +  bool EndOfShader = false;
 +  if (NextMI != MBB->end()) {
 +    // Only need WA at end of sequence of exports.
 +    if (TII.isEXP(*NextMI))
 +      return Changed;
 +    // Assume appropriate S_SETPRIO after export means WA already applied.
 +    if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
 +        NextMI->getOperand(0).getImm() == PostExportPriority)
 +      return Changed;
 +    EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
 +  }
 +
 +  const DebugLoc &DL = MI->getDebugLoc();
 +
 +  // Lower priority.
 +  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
 +      .addImm(PostExportPriority);
 +
 +  if (!EndOfShader) {
 +    // Wait for exports to complete.
 +    BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
 +        .addReg(AMDGPU::SGPR_NULL)
 +        .addImm(0);
 +  }
 +
 +  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
 +  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
 +
 +  if (!EndOfShader) {
 +    // Return to normal (higher) priority.
 +    BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
 +        .addImm(NormalPriority);
 +  }
 +
 +  return true;
 +}
 diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
 index 3ccca527c626b..f2a64ab48e180 100644
 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
 +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   bool fixWMMAHazards(MachineInstr *MI);
   bool fixShift64HighRegBug(MachineInstr *MI);
   bool fixVALUMaskWriteHazard(MachineInstr *MI);
 +  bool fixRequiredExportPriority(MachineInstr *MI);
   int checkMAIHazards(MachineInstr *MI);
   int checkMAIHazards908(MachineInstr *MI);
 diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
 index e5817594a4521..def89c785b855 100644
 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
 +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasVOPDInsts = false;
   bool HasVALUTransUseHazard = false;
   bool HasForceStoreSC0SC1 = false;
 +  bool HasRequiredExportPriority = false;
   // Dummy feature to use for assembler in tablegen.
   bool FeatureDisable = false;
@@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
 +  bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
 +
   /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
   /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
   bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
 diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
 new file mode 100644
 index 0000000000000..377902f3f0d1a
 --- /dev/null
 +++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -0,0 +1,344 @@
 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 +
 +define amdgpu_ps void @test_export_zeroes_f32() #0 {
 +; GCN-LABEL: test_export_zeroes_f32:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    v_mov_b32_e32 v0, 0
 +; GCN-NEXT:    exp mrt0 off, off, off, off
 +; GCN-NEXT:    exp mrt0 off, off, off, off done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_export_en_src0_f32() #0 {
 +; GCN-LABEL: test_export_en_src0_f32:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
 +; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
 +; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
 +; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
 +; GCN-NEXT:    exp mrt0 v3, off, off, off done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_gs void @test_export_gs() #0 {
 +; GCN-LABEL: test_export_gs:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
 +; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
 +; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
 +; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
 +; GCN-NEXT:    exp mrt0 off, v2, off, off done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_hs void @test_export_hs() #0 {
 +; GCN-LABEL: test_export_hs:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
 +; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
 +; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
 +; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
 +; GCN-NEXT:    exp mrt0 off, v2, off, off done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_gfx void @test_export_gfx(float %v) #0 {
 +; GCN-LABEL: test_export_gfx:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 +; GCN-NEXT:    v_mov_b32_e32 v1, 4.0
 +; GCN-NEXT:    v_mov_b32_e32 v2, 0.5
 +; GCN-NEXT:    v_mov_b32_e32 v3, 2.0
 +; GCN-NEXT:    exp mrt0 off, v3, off, off done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_waitcnt expcnt(0)
 +; GCN-NEXT:    s_setpc_b64 s[30:31]
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_cs void @test_export_cs() #0 {
 +; GCN-LABEL: test_export_cs:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
 +; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
 +; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
 +; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
 +; GCN-NEXT:    exp mrt0 off, v2, off, off done
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_kernel void @test_export_kernel() #0 {
 +; GCN-LABEL: test_export_kernel:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
 +; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
 +; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
 +; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
 +; GCN-NEXT:    exp mrt0 off, v2, off, off done
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_gfx void @test_no_export_gfx(float %v) #0 {
 +; GCN-LABEL: test_no_export_gfx:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 +; GCN-NEXT:    s_setpc_b64 s[30:31]
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_no_export_ps(float %v) #0 {
 +; GCN-LABEL: test_no_export_ps:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_endpgm
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
 +; GCN-LABEL: test_if_export_f32:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_mov_b32 s0, exec_lo
 +; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
 +; GCN-NEXT:    s_cbranch_execz .LBB9_2
 +; GCN-NEXT:  ; %bb.1: ; %exp
 +; GCN-NEXT:    exp mrt0 v1, v2, v3, v4
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:  .LBB9_2: ; %end
 +; GCN-NEXT:    s_endpgm
 +  %cc = icmp eq i32 %flag, 0
 +  br i1 %cc, label %end, label %exp
 +
 +exp:
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
 +  br label %end
 +
 +end:
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
 +; GCN-LABEL: test_if_export_vm_f32:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_mov_b32 s0, exec_lo
 +; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
 +; GCN-NEXT:    s_cbranch_execz .LBB10_2
 +; GCN-NEXT:  ; %bb.1: ; %exp
 +; GCN-NEXT:    exp mrt0 v1, v2, v3, v4
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:  .LBB10_2: ; %end
 +; GCN-NEXT:    s_endpgm
 +  %cc = icmp eq i32 %flag, 0
 +  br i1 %cc, label %end, label %exp
 +
 +exp:
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
 +  br label %end
 +
 +end:
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
 +; GCN-LABEL: test_if_export_done_f32:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_mov_b32 s0, exec_lo
 +; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
 +; GCN-NEXT:    s_cbranch_execz .LBB11_2
 +; GCN-NEXT:  ; %bb.1: ; %exp
 +; GCN-NEXT:    exp mrt0 v1, v2, v3, v4 done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:  .LBB11_2: ; %end
 +; GCN-NEXT:    s_endpgm
 +  %cc = icmp eq i32 %flag, 0
 +  br i1 %cc, label %end, label %exp
 +
 +exp:
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
 +  br label %end
 +
 +end:
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
 +; GCN-LABEL: test_if_export_vm_done_f32:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_mov_b32 s0, exec_lo
 +; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
 +; GCN-NEXT:    s_cbranch_execz .LBB12_2
 +; GCN-NEXT:  ; %bb.1: ; %exp
 +; GCN-NEXT:    exp mrt0 v1, v2, v3, v4 done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:  .LBB12_2: ; %end
 +; GCN-NEXT:    s_endpgm
 +  %cc = icmp eq i32 %flag, 0
 +  br i1 %cc, label %end, label %exp
 +
 +exp:
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
 +  br label %end
 +
 +end:
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
 +; GCN-LABEL: test_export_pos_before_param_across_load:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen
 +; GCN-NEXT:    v_mov_b32_e32 v1, 0
 +; GCN-NEXT:    v_mov_b32_e32 v2, 1.0
 +; GCN-NEXT:    v_mov_b32_e32 v3, 0.5
 +; GCN-NEXT:    s_waitcnt vmcnt(0)
 +; GCN-NEXT:    exp pos0 v1, v1, v1, v0 done
 +; GCN-NEXT:    exp invalid_target_32 v2, v2, v2, v2
 +; GCN-NEXT:    exp invalid_target_33 v2, v2, v2, v3
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
 +  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
 +  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
 +  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
 +; GCN-LABEL: test_export_across_store_load:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    v_mov_b32_e32 v2, 24
 +; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
 +; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 +; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 8, vcc_lo
 +; GCN-NEXT:    v_mov_b32_e32 v2, 0
 +; GCN-NEXT:    scratch_store_b32 v0, v1, off
 +; GCN-NEXT:    scratch_load_b32 v0, off, off
 +; GCN-NEXT:    v_mov_b32_e32 v1, 1.0
 +; GCN-NEXT:    exp pos0 v2, v2, v2, v1 done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_waitcnt vmcnt(0)
 +; GCN-NEXT:    exp invalid_target_32 v0, v2, v1, v2
 +; GCN-NEXT:    exp invalid_target_33 v0, v2, v1, v2
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_endpgm
 +  %data0 = alloca <4 x float>, align 8, addrspace(5)
 +  %data1 = alloca <4 x float>, align 8, addrspace(5)
 +  %cmp = icmp eq i32 %idx, 1
 +  %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
 +  store float %v, ptr addrspace(5) %data, align 8
 +  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
 +  %load0 = load float, ptr addrspace(5) %data0, align 8
 +  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
 +  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_export_in_callee(float %v) #0 {
 +; GCN-LABEL: test_export_in_callee:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_getpc_b64 s[0:1]
 +; GCN-NEXT:    s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
 +; GCN-NEXT:    s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
 +; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
 +; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 +; GCN-NEXT:    s_mov_b32 s32, 0
 +; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 +; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 +; GCN-NEXT:    s_endpgm
 +  %x = fadd float %v, 1.0
 +  call void @test_export_gfx(float %x)
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
 +; GCN-LABEL: test_export_in_callee_prio:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_mov_b32 s32, 0
 +; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_getpc_b64 s[0:1]
 +; GCN-NEXT:    s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
 +; GCN-NEXT:    s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
 +; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 +; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 +; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 +; GCN-NEXT:    s_endpgm
 +  %x = fadd float %v, 1.0
 +  call void @llvm.amdgcn.s.setprio(i16 0)
 +  call void @test_export_gfx(float %x)
 +  ret void
 +}
 +
 +declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
 +declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
 +declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
 +declare void @llvm.amdgcn.s.setprio(i16)
 +
 +attributes #0 = { nounwind }
 +attributes #1 = { nounwind inaccessiblememonly }
 +attributes #2 = { nounwind readnone }
 diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.mir b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
 new file mode 100644
 index 0000000000000..eee04468036e5
 --- /dev/null
 +++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
@@ -0,0 +1,293 @@
 +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=post-RA-hazard-rec -verify-machineinstrs  %s -o - | FileCheck -check-prefixes=GFX1150 %s
 +
 +--- |
 +  define amdgpu_ps void @end_of_shader() {
 +    ret void
 +  }
 +  define amdgpu_ps void @end_of_shader_return_to_epilogue() {
 +    ret void
 +  }
 +  define amdgpu_ps void @end_of_block() {
 +    ret void
 +  }
 +  define amdgpu_ps void @start_of_block() {
 +    ret void
 +  }
 +  define amdgpu_ps void @block_of_exports() {
 +    ret void
 +  }
 +  define amdgpu_ps void @sparse_exports() {
 +    ret void
 +  }
 +  define amdgpu_ps void @existing_setprio_1() {
 +    ret void
 +  }
 +  define amdgpu_ps void @existing_setprio_2() {
 +    ret void
 +  }
 +...
 +
 +---
 +name: end_of_shader
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  bb.0:
 +    liveins: $vgpr0
 +    ; GFX1150-LABEL: name: end_of_shader
 +    ; GFX1150: liveins: $vgpr0
 +    ; GFX1150-NEXT: {{  $}}
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_ENDPGM 0
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    S_ENDPGM 0
 +...
 +
 +---
 +name: end_of_shader_return_to_epilogue
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  bb.0:
 +    liveins: $vgpr0
 +    ; GFX1150-LABEL: name: end_of_shader_return_to_epilogue
 +    ; GFX1150: liveins: $vgpr0
 +    ; GFX1150-NEXT: {{  $}}
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: SI_RETURN_TO_EPILOG $vgpr0
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    SI_RETURN_TO_EPILOG $vgpr0
 +...
 +
 +---
 +name: end_of_block
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  ; GFX1150-LABEL: name: end_of_block
 +  ; GFX1150: bb.0:
 +  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +  ; GFX1150-NEXT:   S_SETPRIO 0
 +  ; GFX1150-NEXT:   S_WAITCNT_EXPCNT $sgpr_null, 0
 +  ; GFX1150-NEXT:   S_NOP 0
 +  ; GFX1150-NEXT:   S_NOP 0
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT: bb.1:
 +  ; GFX1150-NEXT:   S_ENDPGM 0
 +  bb.0:
 +    liveins: $vgpr0
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +
 +  bb.1:
 +    S_ENDPGM 0
 +...
 +
 +---
 +name: start_of_block
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  ; GFX1150-LABEL: name: start_of_block
 +  ; GFX1150: bb.0:
 +  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT: bb.1:
 +  ; GFX1150-NEXT:   successors: %bb.2(0x80000000)
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +  ; GFX1150-NEXT:   S_SETPRIO 0
 +  ; GFX1150-NEXT:   S_WAITCNT_EXPCNT $sgpr_null, 0
 +  ; GFX1150-NEXT:   S_NOP 0
 +  ; GFX1150-NEXT:   S_NOP 0
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT: bb.2:
 +  ; GFX1150-NEXT:   S_ENDPGM 0
 +  bb.0:
 +    liveins: $vgpr0
 +
 +  bb.1:
 +    liveins: $vgpr0
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +
 +  bb.2:
 +    S_ENDPGM 0
 +...
 +
 +---
 +name: block_of_exports
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  bb.0:
 +    liveins: $vgpr0
 +    ; GFX1150-LABEL: name: block_of_exports
 +    ; GFX1150: liveins: $vgpr0
 +    ; GFX1150-NEXT: {{  $}}
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_ENDPGM 0
 +    EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    S_ENDPGM 0
 +...
 +
 +---
 +name: sparse_exports
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  bb.0:
 +    liveins: $vgpr0
 +    ; GFX1150-LABEL: name: sparse_exports
 +    ; GFX1150: liveins: $vgpr0
 +    ; GFX1150-NEXT: {{  $}}
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
 +    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
 +    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_ENDPGM 0
 +    EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
 +    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    S_ENDPGM 0
 +...
 +
 +---
 +name: existing_setprio_1
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  ; GFX1150-LABEL: name: existing_setprio_1
 +  ; GFX1150: bb.0:
 +  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT:   $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT: bb.1:
 +  ; GFX1150-NEXT:   successors: %bb.2(0x80000000)
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   S_SETPRIO 3
 +  ; GFX1150-NEXT:   $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT: bb.2:
 +  ; GFX1150-NEXT:   successors: %bb.3(0x80000000)
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   S_SETPRIO 3
 +  ; GFX1150-NEXT:   $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT: bb.3:
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +  ; GFX1150-NEXT:   S_SETPRIO 0
 +  ; GFX1150-NEXT:   S_NOP 0
 +  ; GFX1150-NEXT:   S_NOP 0
 +  ; GFX1150-NEXT:   S_ENDPGM 0
 +  bb.0:
 +    liveins: $vgpr0
 +    $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
 +
 +  bb.1:
 +    liveins: $vgpr0
 +    S_SETPRIO 3
 +    $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
 +    S_SETPRIO 0
 +
 +  bb.2:
 +    liveins: $vgpr0
 +    S_SETPRIO 1
 +    $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
 +    S_SETPRIO 0
 +
 +  bb.3:
 +    liveins: $vgpr0
 +    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    S_ENDPGM 0
 +...
 +
 +---
 +name: existing_setprio_2
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  bb.0:
 +    liveins: $vgpr0
 +    ; GFX1150-LABEL: name: existing_setprio_2
 +    ; GFX1150: liveins: $vgpr0
 +    ; GFX1150-NEXT: {{  $}}
 +    ; GFX1150-NEXT: S_SETPRIO 3
 +    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: S_SETPRIO 3
 +    ; GFX1150-NEXT: S_ENDPGM 0
 +    S_SETPRIO 3
 +    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    S_SETPRIO 3
 +    S_ENDPGM 0
 +...
 From 8ea44e65f2c19facff751aeb2ac960f907fb210f Mon Sep 17 00:00:00 2001
 From: Carl Ritson <carl.ritson@amd.com>
 Date: Wed, 17 Jul 2024 16:18:02 +0900
 Subject: [PATCH 2/3] Remove -verify-machineinstrs from test.
 ---
 llvm/test/CodeGen/AMDGPU/required-export-priority.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
 index 377902f3f0d1a..ebc209bd4d451 100644
 --- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
 +++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 -; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s
 define amdgpu_ps void @test_export_zeroes_f32() #0 {
 ; GCN-LABEL: test_export_zeroes_f32:
--- a/llvm.spec
+++ b/llvm.spec
--- a/macros.clang
+++ b/macros.clang
@ -0,0 +1,11 @@
 %clang_major_version @@CLANG_MAJOR_VERSION@@
 %clang_minor_version @@CLANG_MINOR_VERSION@@
 %clang_patch_version @@CLANG_PATCH_VERSION@@
 %clang_version %{clang_major_version}.%{clang_minor_version}.%{clang_patch_version}
 # This is the path to the clang resource directory that has clang's internal
 # headers and libraries.  This path should be used by packages that need to
 # install files into this directory.  This macro's value changes every time
 # clang's version changes.
 %clang_resource_dir %{_prefix}/lib/clang/%{clang_major_version}
--- a/10
+++ b/10
@ -1,6 +1,4 @@
-SHA512 (llvm-18.1.8.src.tar.xz) = 930814730bb2d80cf7f7b2968f0f1f1442009ca62a7ca29992b69d63823270584b059d16aa845bb381411da566e7e4f255fcfbc38acbdf865eb0419b4dfd7459
+SHA512 (llvm-project-19.1.1.src.tar.xz) = 84adab40ffb9ec236dbf203d86c08a0c2c651f98278a9d0936490c7901159eb26eabd3db9316013886b549426d4acb43b75d866f7dc670ab299bf93ba35b1891
-SHA512 (llvm-18.1.8.src.tar.xz.sig) = aab7cb61a6b5dd3776a9b306d91d08763710725b72ba6a4263d3cca5ae5959e3b073b27dbfd95f9a53a78600c6f414e2fd1cc0dbe3176d7cf142996f7af700ca
+SHA512 (llvm-project-19.1.1.src.tar.xz.sig) = 07bb7bffb2b035417d702ca47be9d5759250f1a2cd57606855027d458ceb972a293b45d3d93bcda195588986acbb5eace60524f4aecdc0da7aeb3a8414c37c31
-SHA512 (cmake-18.1.8.src.tar.xz) = e02243b491f9e688db28d7b53270fcf87debf09d3c95b136a7c7b96e26890de68712c60a1e85f5a448a95ad8c81f2d8ae77047780822443bbe39f1a9e6211007
+SHA512 (llvm-project-18.1.8.src.tar.xz) = 25eeee9984c8b4d0fbc240df90f33cbb000d3b0414baff5c8982beafcc5e59e7ef18f6f85d95b3a5f60cb3d4cd4f877c80487b5768bc21bc833f107698ad93db
-SHA512 (cmake-18.1.8.src.tar.xz.sig) = 99191e95130fe4363a8db8f411a0e61af0549ad182a1280f99f0dd3ee679a321b993d103c6915d535a55d9f8a4d7fea86b7fdcc77605e02150e8edf1e18dee57
+SHA512 (llvm-project-18.1.8.src.tar.xz.sig) = ddfd1e8a06756759af6cbe488c82a6d6a62ba91f3e8a0eb4cece561321824f5d165b08ed91010588790b76e19790931d2651b24dba8567e3b151d3cb43bec25b
 SHA512 (third-party-18.1.8.src.tar.xz) = bedaa5d29ebeaf0ee1c700eb8492d0fef185e7c16528202927c81117d94fadd568829aa0e1873e1217e8e72866f3876a9681bbdb2a6a0a5466fc911f7b3620d4
 SHA512 (third-party-18.1.8.src.tar.xz.sig) = 32c4d779a56a3908b291a4f0cf1df72ccb86b55439ad66f9cbad1b48a77cb92b129b131806d2914d0e63cb319cde3181a2c03b75856ec36cee5f88120bb58214