40 changed files with 2398 additions and 3972 deletions
--- a/.centos-ignore
+++ b/.centos-ignore
@ -1,9 +0,0 @@
 # List of files and directories that are not needed on CentOS/RHEL.
 /centos-sync.sh
 /ci.fmf
 /prepare-copr.sh
 /tests/kernel-ark-build.fmf
 /Makefile
 /.copr
 /.git-blame-ignore-revs
 /.packit.yaml
--- a/.copr/Makefile
+++ b/.copr/Makefile
@ -1,33 +0,0 @@
 # See https://docs.pagure.org/copr.copr/user_documentation.html#make-srpm
 # See for the --setopt option in the enabling of copr repo see:
 # https://pagure.io/copr/copr/issue/184
 COPR_USERNAME=$(shell rpm --eval %copr_username)
 COPR_PROJECT=$(shell rpm --eval %copr_projectname)
 YYYYMMDD=$(shell date +%Y%m%d)
 required_packages=git tar xz curl fedpkg
 outdir?=$(shell pwd)
 spec?=llvm.spec
 .PHONY: srpm
 srpm:
 	@echo "Check for required packages needed by snapshot-info.sh: $(required_packages)"
 	rpm -q $(required_packages) || dnf install -y $(required_packages) --setopt=install_weak_deps=False
 	@echo "Fetch information about today's snapshot"
 	YYYYMMDD=$(YYYYMMDD) ./.copr/snapshot-info.sh > version.spec.inc
 	@echo "Get sources"
 	fedpkg --release rawhide sources --outdir $(shell pwd)
 	@echo "Remove left-over llvm-project tarball and signature"
 	rm -vf $(shell pwd)/llvm-project-*.tar.xz*
 	@echo "Finally build SRPM"
 	rpmbuild \
 		--with=snapshot_build \
 		--define "_srcrpmdir $(outdir)" \
 		--define "_sourcedir $(shell pwd)" \
 		--define "_disable_source_fetch 0" \
 		-bs $(spec)
--- a/.fmf/version
+++ b/.fmf/version
@ -1 +0,0 @@
 1
--- a/.gitignore
+++ b/.gitignore
@ -1,14 +1,2 @@
-/*.src.rpm
+SOURCES/llvm-project-19.1.7.src.tar.xz
-/*.src.tar.xz
+SOURCES/llvm-project-19.1.7.src.tar.xz.sig
 /*.src.tar.xz.sig
 /*.tar.gz
 /cmake/
 /llvm-*.src/
 /results_llvm/
 /third-party/
 /llvm-git-revision-*.txt
 /llvm-release-*.txt
 /BUILD
 /BUILDROOT
 /out
 /version.spec.inc
--- a/.llvm.metadata
+++ b/.llvm.metadata
@ -0,0 +1,2 @@
 6e4033d8b76a89e82220b5445bff58cdce64300e SOURCES/llvm-project-19.1.7.src.tar.xz
 48f839c6e47a34a1138862a9db6274c150179532 SOURCES/llvm-project-19.1.7.src.tar.xz.sig
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -1,30 +0,0 @@
 # See https://pre-commit.com for more information
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v5.0.0
    hooks:
      - id: check-ast
      - id: check-case-conflict
      - id: check-docstring-first
      - id: check-executables-have-shebangs
      - id: check-merge-conflict
      - id: check-symlinks
      - id: trailing-whitespace
      - id: end-of-file-fixer
      - id: mixed-line-ending
      - id: fix-byte-order-marker
      - id: detect-private-key
      - id: check-toml
      - id: check-yaml
        args:
          - "--allow-multiple-documents"
          - "--unsafe"
  # See https://tmt.readthedocs.io/en/latest/guide.html#checking-data-validity
  - repo: https://github.com/teemtee/tmt.git
    rev: 1.38.0
    hooks:
      - id: tmt-lint
 exclude: ".*\\.patch"
--- a/0001-20-polly-shared-libs.patch
+++ b/0001-20-polly-shared-libs.patch
@ -1,59 +0,0 @@
 From cecb98f56e7d6619d0427fbdbc2f200ce212f0c6 Mon Sep 17 00:00:00 2001
 From: Konrad Kleine <kkleine@redhat.com>
 Date: Tue, 28 Jan 2025 08:34:09 +0000
 Subject: [PATCH] [polly] shared libs
 ---
 polly/cmake/polly_macros.cmake    | 5 ++++-
 polly/lib/CMakeLists.txt          | 1 +
 polly/lib/External/CMakeLists.txt | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)
 diff --git a/polly/cmake/polly_macros.cmake b/polly/cmake/polly_macros.cmake
 index 9bd7b0b0ea59..fc2c3a76901f 100644
 --- a/polly/cmake/polly_macros.cmake
 +++ b/polly/cmake/polly_macros.cmake
@@ -1,5 +1,5 @@
 macro(add_polly_library name)
 -  cmake_parse_arguments(ARG "" "" "" ${ARGN})
 +  cmake_parse_arguments(ARG "SHARED" "" "" ${ARGN})
   set(srcs ${ARG_UNPARSED_ARGUMENTS})
   if(MSVC_IDE OR XCODE)
     file( GLOB_RECURSE headers *.h *.td *.def)
@@ -17,6 +17,9 @@ macro(add_polly_library name)
   else()
     set(libkind)
   endif()
 +  if (ARG_SHARED)
 +    set(libkind SHARED)
 +  endif()
   add_library( ${name} ${libkind} ${srcs} )
   set_target_properties(${name} PROPERTIES FOLDER "Polly/Libraries")
 diff --git a/polly/lib/CMakeLists.txt b/polly/lib/CMakeLists.txt
 index d91f4ecd37e6..965f635b7ff6 100644
 --- a/polly/lib/CMakeLists.txt
 +++ b/polly/lib/CMakeLists.txt
@@ -41,6 +41,7 @@ set(POLLY_COMPONENTS
 # the sources them to be recompiled for each of them.
 add_llvm_pass_plugin(Polly
   NO_MODULE
 +  SHARED
   SUBPROJECT Polly
   Analysis/DependenceInfo.cpp
   Analysis/PolyhedralInfo.cpp
 diff --git a/polly/lib/External/CMakeLists.txt b/polly/lib/External/CMakeLists.txt
 index 5dd69b7199dc..f065fbd7b942 100644
 --- a/polly/lib/External/CMakeLists.txt
 +++ b/polly/lib/External/CMakeLists.txt
@@ -284,6 +284,7 @@ if (POLLY_BUNDLED_ISL)
     )
   add_polly_library(PollyISL
 +    SHARED
     ${ISL_FILES}
     )
 -- 
 2.46.0
--- a/0001-22-polly-shared-libs.patch
+++ b/0001-22-polly-shared-libs.patch
@ -1,59 +0,0 @@
 From daf5077c8ce848b39239879369679c9fea7041b1 Mon Sep 17 00:00:00 2001
 From: Konrad Kleine <kkleine@redhat.com>
 Date: Tue, 28 Jan 2025 08:34:09 +0000
 Subject: [PATCH] shared libs
 ---
 polly/cmake/polly_macros.cmake    | 5 ++++-
 polly/lib/CMakeLists.txt          | 1 +
 polly/lib/External/CMakeLists.txt | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)
 diff --git a/polly/cmake/polly_macros.cmake b/polly/cmake/polly_macros.cmake
 index 9bd7b0b0ea59..fc2c3a76901f 100644
 --- a/polly/cmake/polly_macros.cmake
 +++ b/polly/cmake/polly_macros.cmake
@@ -1,5 +1,5 @@
 macro(add_polly_library name)
 -  cmake_parse_arguments(ARG "" "" "" ${ARGN})
 +  cmake_parse_arguments(ARG "SHARED" "" "" ${ARGN})
   set(srcs ${ARG_UNPARSED_ARGUMENTS})
   if(MSVC_IDE OR XCODE)
     file( GLOB_RECURSE headers *.h *.td *.def)
@@ -17,6 +17,9 @@ macro(add_polly_library name)
   else()
     set(libkind)
   endif()
 +  if (ARG_SHARED)
 +    set(libkind SHARED)
 +  endif()
   add_library( ${name} ${libkind} ${srcs} )
   set_target_properties(${name} PROPERTIES FOLDER "Polly/Libraries")
 diff --git a/polly/lib/CMakeLists.txt b/polly/lib/CMakeLists.txt
 index 0ed673815ff3..e156dcb31655 100644
 --- a/polly/lib/CMakeLists.txt
 +++ b/polly/lib/CMakeLists.txt
@@ -41,6 +41,7 @@ set(POLLY_COMPONENTS
 # the sources them to be recompiled for each of them.
 add_llvm_pass_plugin(Polly
   NO_MODULE
 +  SHARED
   SUBPROJECT Polly
   Analysis/DependenceInfo.cpp
   Analysis/ScopDetection.cpp
 diff --git a/polly/lib/External/CMakeLists.txt b/polly/lib/External/CMakeLists.txt
 index ab5cba93cdcf..fdfd06864bc7 100644
 --- a/polly/lib/External/CMakeLists.txt
 +++ b/polly/lib/External/CMakeLists.txt
@@ -284,6 +284,7 @@ if (POLLY_BUNDLED_ISL)
     )
   add_polly_library(PollyISL
 +    SHARED
     ${ISL_FILES}
     )
 -- 
 2.50.1
--- a/0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch
+++ b/0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch
@ -1,26 +0,0 @@
 From ffc7d5ae2d79f98967943fabb2abfbc1b1e047fd Mon Sep 17 00:00:00 2001
 From: Douglas Yung <douglas.yung@sony.com>
 Date: Tue, 24 Jun 2025 04:08:34 +0000
 Subject: [PATCH] Add `REQUIRES: asserts` to test added in #145149 because it
 uses the `-debug-only=` flag.
 This should fix the test failure when building without asserts.
 ---
 llvm/test/CodeGen/PowerPC/pr141642.ll | 1 +
 1 file changed, 1 insertion(+)
 diff --git a/llvm/test/CodeGen/PowerPC/pr141642.ll b/llvm/test/CodeGen/PowerPC/pr141642.ll
 index 38a706574786..61bda4dfaf53 100644
 --- a/llvm/test/CodeGen/PowerPC/pr141642.ll
 +++ b/llvm/test/CodeGen/PowerPC/pr141642.ll
@@ -2,6 +2,7 @@
 ; RUN:  FileCheck %s
 ; CHECK-NOT: lxvdsx
 ; CHECK-NOT: LD_SPLAT
 +; REQUIRES: asserts
 define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr {
 entry:
 -- 
 2.49.0
--- a/0001-CGP-Bail-out-if-Base-Scaled-Reg-does-not-dominate-in.patch
+++ b/0001-CGP-Bail-out-if-Base-Scaled-Reg-does-not-dominate-in.patch
@ -1,131 +0,0 @@
 From dde30a47313bf52fef02bbcb1de931a8d725659f Mon Sep 17 00:00:00 2001
 From: Florian Hahn <flo@fhahn.com>
 Date: Fri, 6 Jun 2025 12:38:30 +0100
 Subject: [PATCH] [CGP] Bail out if (Base|Scaled)Reg does not dominate insert
 point. (#142949)
 (Base|Scaled)Reg may not dominate the chosen insert point, if there are
 multiple uses of the address. Bail out if that's the case, otherwise we
 will generate invalid IR.
 In some cases, we could probably adjust the insert point or hoist the
 (Base|Scaled)Reg.
 Fixes https://github.com/llvm/llvm-project/issues/142830.
 PR: https://github.com/llvm/llvm-project/pull/142949
 ---
 llvm/lib/CodeGen/CodeGenPrepare.cpp           | 13 +++-
 .../X86/sink-addrmode-reg-does-not-geps.ll    | 76 +++++++++++++++++++
 2 files changed, 87 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll
 diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
 index 822ed6283117..32348a899683 100644
 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
 +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -5945,8 +5945,17 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   // The current BB may be optimized multiple times, we can't guarantee the
   // reuse of Addr happens later, call findInsertPos to find an appropriate
   // insert position.
 -  IRBuilder<> Builder(MemoryInst->getParent(),
 -                      findInsertPos(Addr, MemoryInst, SunkAddr));
 +  auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
 +
 +  // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
 +  if (!SunkAddr) {
 +    auto &DT = getDT(*MemoryInst->getFunction());
 +    if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
 +        (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
 +      return Modified;
 +  }
 +
 +  IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
   if (SunkAddr) {
     LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll
 new file mode 100644
 index 000000000000..1640bafbd0bf
 --- /dev/null
 +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll
@@ -0,0 +1,76 @@
 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 +; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' %s | FileCheck %s
 +
 +target triple = "x86_64-unknown-linux"
 +
 +declare i1 @cond(float)
 +
 +define void @scaled_reg_does_not_dominate_insert_point(ptr %src) {
 +; CHECK-LABEL: define void @scaled_reg_does_not_dominate_insert_point(
 +; CHECK-SAME: ptr [[SRC:%.*]]) {
 +; CHECK-NEXT:  [[BB:.*]]:
 +; CHECK-NEXT:    br label %[[LOOP:.*]]
 +; CHECK:       [[LOOP]]:
 +; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
 +; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 +; CHECK-NEXT:    [[SUNKADDR2:%.*]] = mul i64 [[IV_NEXT]], 2
 +; CHECK-NEXT:    [[SUNKADDR3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[SUNKADDR2]]
 +; CHECK-NEXT:    [[SUNKADDR4:%.*]] = getelementptr i8, ptr [[SUNKADDR3]], i64 6
 +; CHECK-NEXT:    [[L_0:%.*]] = load float, ptr [[SUNKADDR4]], align 4
 +; CHECK-NEXT:    [[SUNKADDR:%.*]] = mul i64 [[IV]], 2
 +; CHECK-NEXT:    [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[SUNKADDR]]
 +; CHECK-NEXT:    [[L_1:%.*]] = load float, ptr [[SUNKADDR1]], align 4
 +; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @cond(float [[L_0]])
 +; CHECK-NEXT:    [[C:%.*]] = call i1 @cond(float [[L_1]])
 +; CHECK-NEXT:    br i1 [[C]], label %[[LOOP]], label %[[EXIT:.*]]
 +; CHECK:       [[EXIT]]:
 +; CHECK-NEXT:    ret void
 +;
 +bb:
 +  %gep.base = getelementptr i8, ptr %src, i64 8
 +  br label %loop
 +
 +loop:
 +  %iv = phi i64 [ 0, %bb ], [ %iv.next, %loop ]
 +  %iv.shl = shl i64 %iv, 1
 +  %gep.shl = getelementptr i8, ptr %gep.base, i64 %iv.shl
 +  %gep.sub = getelementptr i8, ptr %gep.shl, i64 -8
 +  %iv.next = add i64 %iv, 1
 +  %l.0 = load float, ptr %gep.shl, align 4
 +  %l.1 = load float, ptr %gep.sub, align 4
 +  call i1 @cond(float %l.0)
 +  %c = call i1 @cond(float %l.1)
 +  br i1 %c, label %loop, label %exit
 +
 +exit:
 +  ret void
 +}
 +
 +define void @check_dt_after_modifying_cfg(ptr %dst, i64 %x, i8 %y, i8 %z) {
 +; CHECK-LABEL: define void @check_dt_after_modifying_cfg(
 +; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]]) {
 +; CHECK-NEXT:  [[ENTRY:.*]]:
 +; CHECK-NEXT:    [[OFFSET:%.*]] = lshr i64 [[X]], 2
 +; CHECK-NEXT:    [[SEL_FROZEN:%.*]] = freeze i8 [[Z]]
 +; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SEL_FROZEN]], 0
 +; CHECK-NEXT:    br i1 [[CMP]], label %[[SELECT_END:.*]], label %[[SELECT_FALSE_SINK:.*]]
 +; CHECK:       [[SELECT_FALSE_SINK]]:
 +; CHECK-NEXT:    [[SMIN:%.*]] = tail call i8 @llvm.smin.i8(i8 [[Y]], i8 0)
 +; CHECK-NEXT:    br label %[[SELECT_END]]
 +; CHECK:       [[SELECT_END]]:
 +; CHECK-NEXT:    [[SEL:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[SMIN]], %[[SELECT_FALSE_SINK]] ]
 +; CHECK-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET]]
 +; CHECK-NEXT:    store i8 [[SEL]], ptr [[SUNKADDR]], align 1
 +; CHECK-NEXT:    ret void
 +;
 +entry:
 +  %offset = lshr i64 %x, 2
 +  %gep.dst = getelementptr i8, ptr %dst, i64 %offset
 +  %smin = tail call i8 @llvm.smin.i8(i8 %y, i8 0)
 +  %cmp = icmp slt i8 %z, 0
 +  %sel = select i1 %cmp, i8 0, i8 %smin
 +  store i8 %sel, ptr %gep.dst, align 1
 +  ret void
 +}
 +
 +declare i8 @llvm.smin.i8(i8, i8) #0
 -- 
 2.50.1
--- a/0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch
+++ b/0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch
@ -1,143 +0,0 @@
 From c76137f1cfd5758f6889236d49a65f059e6432ff Mon Sep 17 00:00:00 2001
 From: weiguozhi <57237827+weiguozhi@users.noreply.github.com>
 Date: Thu, 15 May 2025 09:27:25 -0700
 Subject: [PATCH] [CodeGenPrepare] Make sure instruction get from SunkAddrs is
 before MemoryInst (#139303)
 Function optimizeBlock may do optimizations on a block for multiple
 times. In the first iteration of the loop, MemoryInst1 may generate a
 sunk instruction and store it into SunkAddrs. In the second iteration of
 the loop, MemoryInst2 may use the same address and then it can reuse the
 sunk instruction stored in SunkAddrs, but MemoryInst2 may be before
 MemoryInst1 and the corresponding sunk instruction. In order to avoid
 use before def error, we need to find appropriate insert position for the
 sunk instruction.
 Fixes #138208.
 (cherry picked from commit 59c6d70ed8120b8864e5f796e2bf3de5518a0ef0)
 ---
 llvm/lib/CodeGen/CodeGenPrepare.cpp           | 41 ++++++++++++++---
 .../CodeGenPrepare/X86/sink-addr-reuse.ll     | 44 +++++++++++++++++++
 2 files changed, 80 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
 diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
 index 088062afab17..f779f4b782ae 100644
 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
 +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -5728,6 +5728,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
   return false;
 }
 +// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
 +// is the first instruction that will use Addr. So we need to find the first
 +// user of Addr in current BB.
 +static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst,
 +                                          Value *SunkAddr) {
 +  if (Addr->hasOneUse())
 +    return MemoryInst->getIterator();
 +
 +  // We already have a SunkAddr in current BB, but we may need to insert cast
 +  // instruction after it.
 +  if (SunkAddr) {
 +    if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
 +      return std::next(AddrInst->getIterator());
 +  }
 +
 +  // Find the first user of Addr in current BB.
 +  Instruction *Earliest = MemoryInst;
 +  for (User *U : Addr->users()) {
 +    Instruction *UserInst = dyn_cast<Instruction>(U);
 +    if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
 +      if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
 +        continue;
 +      if (UserInst->comesBefore(Earliest))
 +        Earliest = UserInst;
 +    }
 +  }
 +  return Earliest->getIterator();
 +}
 +
 /// Sink addressing mode computation immediate before MemoryInst if doing so
 /// can be done without increasing register pressure.  The need for the
 /// register pressure constraint means this can end up being an all or nothing
@@ -5852,11 +5881,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     return Modified;
   }
 -  // Insert this computation right after this user.  Since our caller is
 -  // scanning from the top of the BB to the bottom, reuse of the expr are
 -  // guaranteed to happen later.
 -  IRBuilder<> Builder(MemoryInst);
 -
   // Now that we determined the addressing expression we want to use and know
   // that we have to sink it into this block.  Check to see if we have already
   // done this for some other load/store instr in this block.  If so, reuse
@@ -5867,6 +5891,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
   Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
 +
 +  // The current BB may be optimized multiple times, we can't guarantee the
 +  // reuse of Addr happens later, call findInsertPos to find an appropriate
 +  // insert position.
 +  IRBuilder<> Builder(MemoryInst->getParent(),
 +                      findInsertPos(Addr, MemoryInst, SunkAddr));
 +
   if (SunkAddr) {
     LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
                       << " for " << *MemoryInst << "\n");
 diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
 new file mode 100644
 index 000000000000..019f31140655
 --- /dev/null
 +++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
@@ -0,0 +1,44 @@
 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 +; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s
 +
 +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 +target triple = "x86_64-grtev4-linux-gnu"
 +
 +declare void @g(ptr)
 +
 +; %load and %load5 use the same address, %load5 is optimized first, %load is
 +; optimized later and reuse the same address computation instruction. We must
 +; make sure not to generate use before def error.
 +
 +define void @f(ptr %arg) {
 +; CHECK-LABEL: define void @f(
 +; CHECK-SAME: ptr [[ARG:%.*]]) {
 +; CHECK-NEXT:  [[BB:.*:]]
 +; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
 +; CHECK-NEXT:    call void @g(ptr [[GETELEMENTPTR]])
 +; CHECK-NEXT:    [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
 +; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
 +; CHECK-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56
 +; CHECK-NEXT:    [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8
 +; CHECK-NEXT:    [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
 +; CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0)
 +; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
 +; CHECK-NEXT:    ret void
 +;
 +bb:
 +  %getelementptr = getelementptr i8, ptr %arg, i64 -64
 +  %getelementptr1 = getelementptr i8, ptr %arg, i64 -56
 +  call void @g(ptr %getelementptr)
 +  br label %bb3
 +
 +bb3:
 +  %load = load ptr, ptr %getelementptr, align 8
 +  %load4 = load i32, ptr %getelementptr1, align 8
 +  %load5 = load ptr, ptr %getelementptr, align 8
 +  %add = add i32 1, 0
 +  %icmp = icmp eq i32 %add, 0
 +  br i1 %icmp, label %bb7, label %bb7
 +
 +bb7:
 +  ret void
 +}
 -- 
 2.49.0
--- a/0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch
+++ b/0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch
@ -1,67 +0,0 @@
 From 735d721de451067c3a618b309703d0b8beb9cacc Mon Sep 17 00:00:00 2001
 From: Wael Yehia <wmyehia2001@yahoo.com>
 Date: Mon, 23 Jun 2025 13:22:33 -0400
 Subject: [PATCH] [PowerPC] Fix handling of undefs in the
 PPC::isSplatShuffleMask query (#145149)
 Currently, the query assumes that a single undef byte implies the rest of
 the `EltSize - 1` bytes are undefs, but that's not always true.
 e.g. isSplatShuffleMask(
 <0,1,2,3,4,5,6,7,undef,undef,undef,undef,0,1,2,3>, 8) should return
 false.
 ---------
 Co-authored-by: Wael Yehia <wyehia@ca.ibm.com>
 ---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 13 +++++++++----
 llvm/test/CodeGen/PowerPC/pr141642.ll       | 13 +++++++++++++
 2 files changed, 22 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/pr141642.ll
 diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
 index 421a808de667..88c6fe632d26 100644
 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
 +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2242,10 +2242,15 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
       return false;
   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
 -    if (N->getMaskElt(i) < 0) continue;
 -    for (unsigned j = 0; j != EltSize; ++j)
 -      if (N->getMaskElt(i+j) != N->getMaskElt(j))
 -        return false;
 +    // An UNDEF element is a sequence of UNDEF bytes.
 +    if (N->getMaskElt(i) < 0) {
 +      for (unsigned j = 1; j != EltSize; ++j)
 +        if (N->getMaskElt(i + j) >= 0)
 +          return false;
 +    } else
 +      for (unsigned j = 0; j != EltSize; ++j)
 +        if (N->getMaskElt(i + j) != N->getMaskElt(j))
 +          return false;
   }
   return true;
 }
 diff --git a/llvm/test/CodeGen/PowerPC/pr141642.ll b/llvm/test/CodeGen/PowerPC/pr141642.ll
 new file mode 100644
 index 000000000000..38a706574786
 --- /dev/null
 +++ b/llvm/test/CodeGen/PowerPC/pr141642.ll
@@ -0,0 +1,13 @@
 +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O0 -debug-only=selectiondag -o - < %s 2>&1 | \
 +; RUN:  FileCheck %s
 +; CHECK-NOT: lxvdsx
 +; CHECK-NOT: LD_SPLAT
 +
 +define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr {
 +entry:
 +  %ld = load <2 x i32>, ptr %packed_in, align 2
 +  %shuf = shufflevector <2 x i32> %ld, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 0>
 +  %ie = insertelement <4 x i32> %shuf, i32 7, i32 2
 +  store <4 x i32> %shuf, ptr %packed_in, align 2
 +  ret void
 +}
 -- 
 2.49.0
--- a/0001-SystemZ-Fix-ICE-with-i128-i64-uaddo-carry-chain.patch
+++ b/0001-SystemZ-Fix-ICE-with-i128-i64-uaddo-carry-chain.patch
@ -1,81 +0,0 @@
 From 6d5697f7cb4e933d2f176c46b7ac05a9cbaeb8b6 Mon Sep 17 00:00:00 2001
 From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
 Date: Thu, 23 Jan 2025 19:11:18 +0100
 Subject: [PATCH] [SystemZ] Fix ICE with i128->i64 uaddo carry chain
 We can only optimize a uaddo_carry via specialized instruction
 if the carry was produced by another uaddo(_carry) instruction;
 there is already a check for that.
 However, i128 uaddo(_carry) use a completely different mechanism;
 they indicate carry in a vector register instead of the CC flag.
 Thus, we must also check that we don't mix those two - that check
 has been missing.
 Fixes: https://github.com/llvm/llvm-project/issues/124001
 ---
 .../Target/SystemZ/SystemZISelLowering.cpp    | 12 ++++++----
 llvm/test/CodeGen/SystemZ/pr124001.ll         | 23 +++++++++++++++++++
 2 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/SystemZ/pr124001.ll
 diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
 index 4040ab6d4510..1fb31c26e20d 100644
 --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
 +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -4708,15 +4708,19 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
 }
 static bool isAddCarryChain(SDValue Carry) {
 -  while (Carry.getOpcode() == ISD::UADDO_CARRY)
 +  while (Carry.getOpcode() == ISD::UADDO_CARRY &&
 +         Carry->getValueType(0) != MVT::i128)
     Carry = Carry.getOperand(2);
 -  return Carry.getOpcode() == ISD::UADDO;
 +  return Carry.getOpcode() == ISD::UADDO &&
 +         Carry->getValueType(0) != MVT::i128;
 }
 static bool isSubBorrowChain(SDValue Carry) {
 -  while (Carry.getOpcode() == ISD::USUBO_CARRY)
 +  while (Carry.getOpcode() == ISD::USUBO_CARRY &&
 +         Carry->getValueType(0) != MVT::i128)
     Carry = Carry.getOperand(2);
 -  return Carry.getOpcode() == ISD::USUBO;
 +  return Carry.getOpcode() == ISD::USUBO &&
 +         Carry->getValueType(0) != MVT::i128;
 }
 // Lower UADDO_CARRY/USUBO_CARRY nodes.
 diff --git a/llvm/test/CodeGen/SystemZ/pr124001.ll b/llvm/test/CodeGen/SystemZ/pr124001.ll
 new file mode 100644
 index 000000000000..9cf630a55dd6
 --- /dev/null
 +++ b/llvm/test/CodeGen/SystemZ/pr124001.ll
@@ -0,0 +1,23 @@
 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
 +
 +define i64 @test(i128 %in) {
 +; CHECK-LABEL: test:
 +; CHECK:       # %bb.0:
 +; CHECK-NEXT:    larl %r1, .LCPI0_0
 +; CHECK-NEXT:    vl %v0, 0(%r2), 3
 +; CHECK-NEXT:    vl %v1, 0(%r1), 3
 +; CHECK-NEXT:    vaccq %v0, %v0, %v1
 +; CHECK-NEXT:    vlgvg %r1, %v0, 1
 +; CHECK-NEXT:    la %r2, 1(%r1)
 +; CHECK-NEXT:    br %r14
 +  %1 = tail call { i128, i1 } @llvm.uadd.with.overflow.i128(i128 %in, i128 1)
 +  %2 = extractvalue { i128, i1 } %1, 1
 +  %3 = zext i1 %2 to i64
 +  %4 = add i64 %3, 1
 +  ret i64 %4
 +}
 +
 +declare { i128, i1 } @llvm.uadd.with.overflow.i128(i128, i128) #0
 +
 +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 -- 
 2.48.1
--- a/0001-clang-Add-a-hack-to-fix-the-offload-build-with-the-m.patch
+++ b/0001-clang-Add-a-hack-to-fix-the-offload-build-with-the-m.patch
@ -1,27 +0,0 @@
 From f028fc042ef2875a13c6abf3828626a313e4a8e6 Mon Sep 17 00:00:00 2001
 From: Tom Stellard <tstellar@redhat.com>
 Date: Fri, 1 Aug 2025 15:38:22 +0000
 Subject: [PATCH] clang: Add a hack to fix the offload build with the
 mtls-dialect option
 ---
 clang/lib/Driver/ToolChains/CommonArgs.cpp | 3 +++
 1 file changed, 3 insertions(+)
 diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
 index 097d186ad8ea..0dc9e60f8428 100644
 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
 +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -920,6 +920,9 @@ bool tools::isTLSDESCEnabled(const ToolChain &TC,
   } else if (Triple.isX86()) {
     SupportedArgument = V == "gnu" || V == "gnu2";
     EnableTLSDESC = V == "gnu2";
 +  } else if( Triple.isGPU()) {
 +    // HACK To fix the offload build.
 +    return false;
   } else {
     Unsupported = true;
   }
 -- 
 2.49.0
--- a/0001-cmake-Resolve-symlink-when-finding-install-prefix.patch
+++ b/0001-cmake-Resolve-symlink-when-finding-install-prefix.patch
@ -1,39 +0,0 @@
 From 06774eb8a7dc0bc36b59e53310c7f5b5d89f6c29 Mon Sep 17 00:00:00 2001
 From: Nikita Popov <npopov@redhat.com>
 Date: Tue, 28 Jan 2025 12:31:49 +0100
 Subject: [PATCH] [cmake] Resolve symlink when finding install prefix
 When determining the install prefix in LLVMConfig.cmake etc resolve
 symlinks in CMAKE_CURRENT_LIST_FILE first. The motivation for this
 is to support symlinks like `/usr/lib64/cmake/llvm` to
 `/usr/lib64/llvm19/lib/cmake/llvm`. This only works correctly if
 the paths are relative to the resolved symlink.
 It's worth noting that this *mostly* already works out of the box,
 because cmake automatically does the symlink resolution when the
 library is found via CMAKE_PREFIX_PATH. It just doesn't happen
 when it's found via the default prefix path.
 ---
 cmake/Modules/FindPrefixFromConfig.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 diff --git a/cmake/Modules/FindPrefixFromConfig.cmake b/cmake/Modules/FindPrefixFromConfig.cmake
 index 22211e4b72f2..3daff607ff84 100644
 --- a/cmake/Modules/FindPrefixFromConfig.cmake
 +++ b/cmake/Modules/FindPrefixFromConfig.cmake
@@ -39,10 +39,10 @@ function(find_prefix_from_config out_var prefix_var path_to_leave)
     # install prefix, and avoid hard-coding any absolute paths.
     set(config_code
       "# Compute the installation prefix from this LLVMConfig.cmake file location."
 -      "get_filename_component(${prefix_var} \"\${CMAKE_CURRENT_LIST_FILE}\" PATH)")
 +      "get_filename_component(${prefix_var} \"\${CMAKE_CURRENT_LIST_FILE}\" REALPATH)")
     # Construct the proper number of get_filename_component(... PATH)
     # calls to compute the installation prefix.
 -    string(REGEX REPLACE "/" ";" _count "${path_to_leave}")
 +    string(REGEX REPLACE "/" ";" _count "${path_to_leave}/plus_one")
     foreach(p ${_count})
       list(APPEND config_code
         "get_filename_component(${prefix_var} \"\${${prefix_var}}\" PATH)")
 -- 
 2.48.1
--- a/20-131099.patch
+++ b/20-131099.patch
@ -1,28 +0,0 @@
 From e43271ec7438ecb78f99db134aeca274a47f6c28 Mon Sep 17 00:00:00 2001
 From: Konrad Kleine <kkleine@redhat.com>
 Date: Thu, 13 Mar 2025 09:12:24 +0100
 Subject: [PATCH] Filter out configuration file from compile commands
 The commands to run the compilation when printed with `-###` contain
 various irrelevant lines for the perf-training. Most of them are
 filtered out already but when configured with
 `CLANG_CONFIG_FILE_SYSTEM_DIR` a new line like the following is
 added and needs to be filtered out:
 `Configuration file: /etc/clang/x86_64-redhat-linux-gnu-clang.cfg`
 ---
 clang/utils/perf-training/perf-helper.py | 1 +
 1 file changed, 1 insertion(+)
 diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py
 index 80c6356d0497c..29904aded5ab0 100644
 --- a/clang/utils/perf-training/perf-helper.py
 +++ b/clang/utils/perf-training/perf-helper.py
@@ -237,6 +237,7 @@ def get_cc1_command_for_args(cmd, env):
             or ln.startswith("InstalledDir:")
             or ln.startswith("LLVM Profile Note")
             or ln.startswith(" (in-process)")
 +            or ln.startswith("Configuration file:")
             or " version " in ln
         ):
             continue
--- a/21-146424.patch
+++ b/21-146424.patch
@ -1,94 +0,0 @@
 From eba58195932f37fb461ae17c69fc517181b99c9a Mon Sep 17 00:00:00 2001
 From: Paul Murphy <paumurph@redhat.com>
 Date: Mon, 30 Jun 2025 10:13:37 -0500
 Subject: [PATCH] [PowerPC] fix lowering of SPILL_CRBIT on pwr9 and pwr10
 If a copy exists between creation of a crbit and a spill, machine-cp
 may delete the copy since it seems unaware of the relation between a cr
 and crbit. A fix was previously made for the generic ppc64 lowering. It
 should be applied to the pwr9 and pwr10 variants too.
 Likewise, relax and extend the pwr8 test to verify pwr9 and pwr10
 codegen too.
 This fixes #143989.
 ---
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp     | 17 +++++++++++------
 .../PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir |  8 +++++++-
 2 files changed, 18 insertions(+), 7 deletions(-)
 diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
 index 76dca4794e05..78d254a55fd9 100644
 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
 +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1102,13 +1102,20 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
     SpillsKnownBit = true;
     break;
   default:
 +    // When spilling a CR bit, The super register may not be explicitly defined
 +    // (i.e. it can be defined by a CR-logical that only defines the subreg) so
 +    // we state that the CR field is undef. Also, in order to preserve the kill
 +    // flag on the CR bit, we add it as an implicit use.
 +
     // On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all
     // bits (specifically, it produces a -1 if the CR bit is set). Ultimately,
     // the bit that is of importance to us is bit 32 (bit 0 of a 32-bit
     // register), and SETNBC will set this.
     if (Subtarget.isISA3_1()) {
       BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg)
 -          .addReg(SrcReg, RegState::Undef);
 +          .addReg(SrcReg, RegState::Undef)
 +          .addReg(SrcReg, RegState::Implicit |
 +                              getKillRegState(MI.getOperand(0).isKill()));
       break;
     }
@@ -1122,16 +1129,14 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
           SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT ||
           SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) {
         BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg)
 -          .addReg(getCRFromCRBit(SrcReg), RegState::Undef);
 +            .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
 +            .addReg(SrcReg, RegState::Implicit |
 +                                getKillRegState(MI.getOperand(0).isKill()));
         break;
       }
     }
     // We need to move the CR field that contains the CR bit we are spilling.
 -    // The super register may not be explicitly defined (i.e. it can be defined
 -    // by a CR-logical that only defines the subreg) so we state that the CR
 -    // field is undef. Also, in order to preserve the kill flag on the CR bit,
 -    // we add it as an implicit use.
     BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
       .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
       .addReg(SrcReg,
 diff --git a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
 index 41e21248a3f0..2796cdb3ae87 100644
 --- a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
 +++ b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
@@ -1,6 +1,12 @@
 # RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -start-after \
 # RUN:   virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \
 # RUN:   -o - | FileCheck %s
 +# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -start-after \
 +# RUN:   virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \
 +# RUN:   -o - | FileCheck %s
 +# RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu -start-after \
 +# RUN:   virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \
 +# RUN:   -o - | FileCheck %s
 --- |
   ; ModuleID = 'a.ll'
@@ -30,7 +36,7 @@
   ; Function Attrs: nounwind
   declare void @llvm.stackprotector(ptr, ptr) #1
 -  attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
 +  attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
   attributes #1 = { nounwind }
   !llvm.ident = !{!0}
 -- 
 2.49.0
--- a/206
+++ b/206
@ -1,206 +0,0 @@
 .DEFAULT_GOAL=help
 # See ~/.config/mock/<CONFIG>.cfg or /etc/mock/<CONFIG>.cfg
 # Tweak this to centos-stream-9-x86_64 to build for CentOS
 MOCK_CHROOT?=fedora-rawhide-x86_64
 MOCK_OPTS?=
 MOCK_OPTS_RELEASE?=--no-clean --no-cleanup-after --without lto_build --without pgo --define "debug_package %{nil}" $(MOCK_OPTS)
 MOCK_OPTS_SNAPSHOT?=$(MOCK_OPTS_RELEASE) --with snapshot_build $(MOCK_OPTS)
 YYYYMMDD?=$(shell date +%Y%m%d)
 SOURCEDIR=$(shell pwd)
 SPEC=llvm.spec
 # When nothing is given, this will be determined based on release or snapshot
 # builds.
 SRPM_PATH?=
 # Provide a path to your local llvm-project clone to build a snapshot of that
 # tree.
 GIT_TREE?=
 ######### Get sources
 .PHONY: get-sources-snapshot
 ## Downloads all sources we need for a snapshot build.
 get-sources-snapshot:
 	YYYYMMDD=$(YYYYMMDD) GIT_TREE=$(GIT_TREE) ./.copr/snapshot-info.sh > $(SOURCEDIR)/version.spec.inc
 ifeq ($(GIT_TREE),)
 	spectool -g --define "_sourcedir $(SOURCEDIR)" --define "_with_snapshot_build 1" $(SPEC)
 else
 	$(info Creating tarball from git tree: $(GIT_TREE))
 	$(eval llvm_snapshot_git_revision:=$(shell git -C $(GIT_TREE) rev-parse HEAD))
 	git -C $(GIT_TREE) archive --format=tar.gz -o $(PWD)/$(llvm_snapshot_git_revision).tar.gz --prefix=llvm-project-$(llvm_snapshot_git_revision)/ HEAD
 endif
 .PHONY: get-sources-release
 ## Downloads all sources we need for a release build.
 get-sources-release:
 	spectool -g --define "_sourcedir $(SOURCEDIR)" $(SPEC)
 ######### Build SRPM
 .PHONY: srpm-release
 ## Builds an SRPM that can be used for a release build.
 srpm-release: get-sources-release
 	rpmbuild \
 		--define "_rpmdir $(SOURCEDIR)" \
 		--define "_sourcedir $(SOURCEDIR)" \
 		--define "_specdir $(SOURCEDIR)" \
 		--define "_srcrpmdir $(SOURCEDIR)" \
 		--define "_builddir $(SOURCEDIR)" \
 		-bs $(SPEC)
 .PHONY: srpm-snapshot
 ## Builds an SRPM that can be used for a snapshot build.
 srpm-snapshot: get-sources-snapshot
 	rpmbuild \
 		--with=snapshot_build \
 		--define "_rpmdir $(SOURCEDIR)" \
 		--define "_sourcedir $(SOURCEDIR)" \
 		--define "_specdir $(SOURCEDIR)" \
 		--define "_srcrpmdir $(SOURCEDIR)" \
 		--define "_builddir $(SOURCEDIR)" \
 		-bs $(SPEC)
 ######### Scrub mock chroot and cache
 .PHONY: scrub-chroot
 ## Completely remove the fedora chroot and cache.
 scrub-chroot:
 	mock -r $(MOCK_CHROOT) --scrub all
 ######### Do a mock build
 .PHONY: mockbuild-release
 ## Start a mock build of the release SRPM.
 mockbuild-release: srpm-release get-srpm-release
 	mock -r $(MOCK_CHROOT) $(MOCK_OPTS_RELEASE) $(srpm_path)
 .PHONY: mockbuild-snapshot
 ## Start a mock build of the snapshot SRPM.
 mockbuild-snapshot: srpm-snapshot get-srpm-snapshot
 	mock -r $(MOCK_CHROOT) $(MOCK_OPTS_SNAPSHOT) $(srpm_path)
 ######### Edit-last-failing-script
 .PHONY: get-last-run-script
 ## Get the file that was last modified in /var/tmp/ within the chroot.
 get-last-run-script:
 	$(eval last_run_script:=/var/tmp/$(shell ls -t1 /var/lib/mock/$(MOCK_CHROOT)/root/var/tmp | head -n1))
 	$(info last_run_script=$(last_run_script))
 	@echo > /dev/null
 .PHONY: edit-last-failing-script
 ## Opens the last failing or running script from mock in your editor
 ## of choice for you to edit it and later re-run it in mock with:
 ## "make mockbuild-rerun-last-script".
 edit-last-failing-script: get-last-run-script
 	$$EDITOR /var/lib/mock/$(MOCK_CHROOT)/root$(last_run_script)
 ######### Re-run the last failing script from mock
 .PHONY: mockbuild-rerun-last-script
 ## Re-runs the last failing or running script of your release/snapshot mock mockbuild.
 mockbuild-rerun-last-script: get-last-run-script
 	mock --root=$(MOCK_CHROOT) --shell 'sh -e $(last_run_script)'
 .PHONY: mock-shell
 ## Run an interactive mock shell with bash
 mock-shell:
 	mock --root=$(MOCK_CHROOT) --shell bash
 ######### Help debug inside mock environment
 .PHONY: mock-install-debugging-tools
 ## This will install gdb, gdb-dashboard, vim, valgrind, lldb and
 ## other tools into your mock environment for you to debug any
 ## problems.
 mock-install-debugging-tools:
 	mock --root=$(MOCK_CHROOT) --install python3-pygments vim gdb lldb python3-rpm valgrind
 	curl -sLO https://github.com/cyrus-and/gdb-dashboard/raw/master/.gdbinit
 	mock --root=$(MOCK_CHROOT) --copyin .gdbinit /builddir/.gdbinit
 .PHONY: help
 # Based on https://gist.github.com/rcmachado/af3db315e31383502660
 ## Display this help text.
 help:/
 	$(info Available targets)
 	$(info -----------------)
 	@awk '/^[a-zA-Z\-0-9]+:/ { \
 		helpMessage = match(lastLine, /^## (.*)/); \
 		helpCommand = substr($$1, 0, index($$1, ":")-1); \
 		if (helpMessage) { \
 			helpMessage = substr(lastLine, RSTART + 3, RLENGTH); \
 			gsub(/##/, "\n                                       ", helpMessage); \
 		} else { \
 			helpMessage = "(No documentation)"; \
 		} \
 		printf "%-37s - %s\n", helpCommand, helpMessage; \
 		lastLine = "" \
 	} \
 	{ hasComment = match(lastLine, /^## (.*)/); \
          if(hasComment) { \
            lastLine=lastLine$$0; \
 	  } \
          else { \
 	    lastLine = $$0 \
          } \
        }' $(MAKEFILE_LIST)
 ######### Deprecated targets
 # Map deprecated targets to new targets
 .PHONY: snapshot-srpm release-srpm
 snapshot-srpm release-srpm:
 	$(eval mapped_target:=$(subst snapshot-srpm,srpm-snapshot,$(MAKECMDGOALS)))
 	$(eval mapped_target:=$(subst release-srpm,srpm-release,$(mapped_target)))
 	$(info WARNING: "$(MAKECMDGOALS)" is deprecated. Instead running "$(mapped_target)")
 	$(MAKE) $(mapped_target)
 ######### Version/Release helper targets to build name of SRPM
 .PHONY: get-llvm-version-release
 ## Determines the LLVM version given in the llvm.spec file.
 get-llvm-version-release:
 	$(eval llvm_version_release:=$(shell grep -ioP "%global\s+(maj|min|patch)_ver[^0-9]\K[0-9]+" $(SPEC) | paste -sd'.'))
 	$(info LLVM Release Version: $(llvm_version_release))
 	@echo > /dev/null
 .PHONY: get-llvm-version-snapshot
 ## Determines the LLVM version given in the version.spec.inc file.
 get-llvm-version-snapshot:
 	$(eval llvm_version_snapshot:=$(shell grep -ioP "%global\s+(maj|min|patch)_ver[^0-9]\K[0-9]+" version.spec.inc | paste -sd'.'))
 	$(info LLVM Snapshot Version: $(llvm_version_snapshot))
 	@echo > /dev/null
 .PHONY: get-spec-file-release
 ## Parses the spec file for the Release: tag
 get-spec-file-release:
 	$(eval spec_file_release:=$(shell grep -ioP '^Release:\s*\K[0-9]+' $(SPEC)))
 	$(info LLVM Spec file Release: $(spec_file_release))
 	@echo > /dev/null
 .PHONY: get-srpm-release
 ## Determines the name of the SRPM used for release builds
 ## Can be overriden by giving "make ... SRPM_PATH=foo.src.rpm".
 get-srpm-release: get-llvm-version-release get-spec-file-release
 ifeq ($(SRPM_PATH),)
 	$(eval srpm_path:=llvm-$(llvm_version_release)-$(spec_file_release).*.src.rpm)
 else
 	$(eval srpm_path:=$(SRPM_PATH))
 endif
 	$(info LLVM SRPM Release: $(srpm_path))
 	@echo > /dev/null
 .PHONY: get-srpm-snapshot
 ## Determines the name of the SRPM used for snapshot builds
 ## Can be overriden by giving "make ... SRPM_PATH=foo.src.rpm".
 get-srpm-snapshot: get-llvm-version-snapshot get-spec-file-release
 ifeq ($(SRPM_PATH),)
 	$(eval yyyymmdd:=$(shell grep -ioP "%global\s+llvm_snapshot_yyyymmdd\s+\K[0-9]+" version.spec.inc))
 	$(eval git_short:=$(shell grep -ioP "%global\s+llvm_snapshot_git_revision_short\s+\K[a-zA-Z0-9]+" version.spec.inc))
 	$(eval srpm_path:=llvm-$(llvm_version_snapshot)~pre$(yyyymmdd).g$(git_short)-$(spec_file_release).*.src.rpm)
 else
 	$(eval srpm_path:=$(SRPM_PATH))
 endif
 	$(info LLVM SRPM Snapshot: $(srpm_path))
 	@echo > /dev/null
--- a/SOURCES/0001-18-Always-build-shared-libs-for-LLD.patch
+++ b/SOURCES/0001-18-Always-build-shared-libs-for-LLD.patch
@ -0,0 +1,29 @@
 From b1c60d7fa322a2d208556087df9e7ef94bfbffb8 Mon Sep 17 00:00:00 2001
 From: Nikita Popov <npopov@redhat.com>
 Date: Wed, 8 May 2024 12:30:36 +0900
 Subject: [PATCH] Always build shared libs for LLD
 We don't want to enable BUILD_SHARED_LIBS for the whole build,
 but we do want to build lld libraries.
 ---
 lld/cmake/modules/AddLLD.cmake | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)
 diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake
 index 2ee066b41535..270c03f096ac 100644
 --- a/lld/cmake/modules/AddLLD.cmake
 +++ b/lld/cmake/modules/AddLLD.cmake
@@ -7,9 +7,8 @@ macro(add_lld_library name)
     ""
     ""
     ${ARGN})
 -  if(ARG_SHARED)
 -    set(ARG_ENABLE_SHARED SHARED)
 -  endif()
 +  # Always build shared libs for LLD.
 +  set(ARG_ENABLE_SHARED SHARED)
   llvm_add_library(${name} ${ARG_ENABLE_SHARED} ${ARG_UNPARSED_ARGUMENTS})
   set_target_properties(${name} PROPERTIES FOLDER "lld libraries")
 -- 
 2.44.0
--- a/SOURCES/0001-19-Always-build-shared-libs-for-LLD.patch
+++ b/SOURCES/0001-19-Always-build-shared-libs-for-LLD.patch
--- a/SOURCES/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch
+++ b/SOURCES/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch
--- a/SOURCES/0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch
+++ b/SOURCES/0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch
--- a/SOURCES/0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch
+++ b/SOURCES/0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch
--- a/SOURCES/0001-Workaround-a-bug-in-ORC-on-ppc64le.patch
+++ b/SOURCES/0001-Workaround-a-bug-in-ORC-on-ppc64le.patch
--- a/SOURCES/0001-openmp-Add-option-to-disable-tsan-tests-111548.patch
+++ b/SOURCES/0001-openmp-Add-option-to-disable-tsan-tests-111548.patch
@ -0,0 +1,62 @@
 From b2edeb58b8cb3268acee425cd52b406eb60a8095 Mon Sep 17 00:00:00 2001
 From: Nikita Popov <npopov@redhat.com>
 Date: Wed, 9 Oct 2024 11:29:30 +0200
 Subject: [PATCH] [openmp] Add option to disable tsan tests (#111548)
 This adds a OPENMP_TEST_ENABLE_TSAN option that allows to override
 whether tests using tsan will be enabled. The option defaults to the
 existing auto-detection.
 The background here is
 https://github.com/llvm/llvm-project/issues/111492, where we have some
 systems where tsan doesn't work, but we do still want to build it and
 run tests that don't use tsan.
 ---
 openmp/cmake/OpenMPTesting.cmake          | 3 +++
 openmp/tools/archer/tests/CMakeLists.txt  | 2 +-
 openmp/tools/archer/tests/lit.site.cfg.in | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)
 diff --git a/openmp/cmake/OpenMPTesting.cmake b/openmp/cmake/OpenMPTesting.cmake
 index c67ad8b1cbd9..14cc5c67d84c 100644
 --- a/openmp/cmake/OpenMPTesting.cmake
 +++ b/openmp/cmake/OpenMPTesting.cmake
@@ -163,6 +163,9 @@ else()
   set(OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS 1)
 endif()
 +set(OPENMP_TEST_ENABLE_TSAN "${OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS}" CACHE BOOL
 +    "Whether to enable tests using tsan")
 +
 # Function to set compiler features for use in lit.
 function(update_test_compiler_features)
   set(FEATURES "[")
 diff --git a/openmp/tools/archer/tests/CMakeLists.txt b/openmp/tools/archer/tests/CMakeLists.txt
 index 5de91148fa4b..412c7d63725e 100644
 --- a/openmp/tools/archer/tests/CMakeLists.txt
 +++ b/openmp/tools/archer/tests/CMakeLists.txt
@@ -28,7 +28,7 @@ macro(pythonize_bool var)
 endmacro()
 pythonize_bool(LIBARCHER_HAVE_LIBATOMIC)
 -pythonize_bool(OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS)
 +pythonize_bool(OPENMP_TEST_ENABLE_TSAN)
 set(ARCHER_TSAN_TEST_DEPENDENCE "")
 if(TARGET tsan)
 diff --git a/openmp/tools/archer/tests/lit.site.cfg.in b/openmp/tools/archer/tests/lit.site.cfg.in
 index 55edfde9738e..ddcb7b8bc3a5 100644
 --- a/openmp/tools/archer/tests/lit.site.cfg.in
 +++ b/openmp/tools/archer/tests/lit.site.cfg.in
@@ -12,7 +12,7 @@ config.omp_library_dir = "@LIBOMP_LIBRARY_DIR@"
 config.omp_header_dir = "@LIBOMP_INCLUDE_DIR@"
 config.operating_system = "@CMAKE_SYSTEM_NAME@"
 config.has_libatomic = @LIBARCHER_HAVE_LIBATOMIC@
 -config.has_tsan = @OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS@
 +config.has_tsan = @OPENMP_TEST_ENABLE_TSAN@
 config.test_archer_flags = "@LIBARCHER_TEST_FLAGS@"
 config.libarcher_obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
 -- 
 2.46.0
--- a/SOURCES/0001-openmp-Use-core_siblings_list-if-physical_package_id.patch
+++ b/SOURCES/0001-openmp-Use-core_siblings_list-if-physical_package_id.patch
@ -0,0 +1,205 @@
 From 5fb4d7f6079a76b2907ccc8c53c7c509c30a3dca Mon Sep 17 00:00:00 2001
 From: Nikita Popov <npopov@redhat.com>
 Date: Thu, 10 Oct 2024 12:47:33 +0000
 Subject: [PATCH] [openmp] Use core_siblings_list if physical_package_id not
 available
 On powerpc, physical_package_id may not be available. Currently,
 this causes openmp to fall back to flat topology and various
 affinity tests fail.
 Fix this by parsing core_siblings_list to deterimine which cpus
 belong to the same socket. This matches what the testing code
 does. The code to parse the CPU list format thankfully already
 exists.
 Fixes https://github.com/llvm/llvm-project/issues/111809.
 ---
 openmp/runtime/src/kmp_affinity.cpp          | 100 +++++++++++++------
 openmp/runtime/test/affinity/kmp-hw-subset.c |   2 +-
 2 files changed, 72 insertions(+), 30 deletions(-)
 diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
 index cf5cad04eb57..c3d5ecf1345e 100644
 --- a/openmp/runtime/src/kmp_affinity.cpp
 +++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -1589,15 +1589,13 @@ kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
   return buf;
 }
 -// Return (possibly empty) affinity mask representing the offline CPUs
 -// Caller must free the mask
 -kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
 -  kmp_affin_mask_t *offline;
 -  KMP_CPU_ALLOC(offline);
 -  KMP_CPU_ZERO(offline);
 +static kmp_affin_mask_t *__kmp_parse_cpu_list(const char *path) {
 +  kmp_affin_mask_t *mask;
 +  KMP_CPU_ALLOC(mask);
 +  KMP_CPU_ZERO(mask);
 #if KMP_OS_LINUX
   int n, begin_cpu, end_cpu;
 -  kmp_safe_raii_file_t offline_file;
 +  kmp_safe_raii_file_t file;
   auto skip_ws = [](FILE *f) {
     int c;
     do {
@@ -1606,29 +1604,29 @@ kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
     if (c != EOF)
       ungetc(c, f);
   };
 -  // File contains CSV of integer ranges representing the offline CPUs
 +  // File contains CSV of integer ranges representing the CPUs
   // e.g., 1,2,4-7,9,11-15
 -  int status = offline_file.try_open("/sys/devices/system/cpu/offline", "r");
 +  int status = file.try_open(path, "r");
   if (status != 0)
 -    return offline;
 -  while (!feof(offline_file)) {
 -    skip_ws(offline_file);
 -    n = fscanf(offline_file, "%d", &begin_cpu);
 +    return mask;
 +  while (!feof(file)) {
 +    skip_ws(file);
 +    n = fscanf(file, "%d", &begin_cpu);
     if (n != 1)
       break;
 -    skip_ws(offline_file);
 -    int c = fgetc(offline_file);
 +    skip_ws(file);
 +    int c = fgetc(file);
     if (c == EOF || c == ',') {
       // Just single CPU
       end_cpu = begin_cpu;
     } else if (c == '-') {
       // Range of CPUs
 -      skip_ws(offline_file);
 -      n = fscanf(offline_file, "%d", &end_cpu);
 +      skip_ws(file);
 +      n = fscanf(file, "%d", &end_cpu);
       if (n != 1)
         break;
 -      skip_ws(offline_file);
 -      c = fgetc(offline_file); // skip ','
 +      skip_ws(file);
 +      c = fgetc(file); // skip ','
     } else {
       // Syntax problem
       break;
@@ -1638,13 +1636,19 @@ kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
         end_cpu >= __kmp_xproc || begin_cpu > end_cpu) {
       continue;
     }
 -    // Insert [begin_cpu, end_cpu] into offline mask
 +    // Insert [begin_cpu, end_cpu] into mask
     for (int cpu = begin_cpu; cpu <= end_cpu; ++cpu) {
 -      KMP_CPU_SET(cpu, offline);
 +      KMP_CPU_SET(cpu, mask);
     }
   }
 #endif
 -  return offline;
 +  return mask;
 +}
 +
 +// Return (possibly empty) affinity mask representing the offline CPUs
 +// Caller must free the mask
 +kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
 +  return __kmp_parse_cpu_list("/sys/devices/system/cpu/offline");
 }
 // Return the number of available procs
@@ -3175,6 +3179,37 @@ static inline const char *__kmp_cpuinfo_get_envvar() {
   return envvar;
 }
 +static bool __kmp_package_id_from_core_siblings_list(unsigned **threadInfo,
 +                                                     unsigned num_avail,
 +                                                     unsigned idx) {
 +  if (!KMP_AFFINITY_CAPABLE())
 +    return false;
 +
 +  char path[256];
 +  KMP_SNPRINTF(path, sizeof(path),
 +               "/sys/devices/system/cpu/cpu%u/topology/core_siblings_list",
 +               threadInfo[idx][osIdIndex]);
 +  kmp_affin_mask_t *siblings = __kmp_parse_cpu_list(path);
 +  for (unsigned i = 0; i < num_avail; ++i) {
 +    unsigned cpu_id = threadInfo[i][osIdIndex];
 +    KMP_ASSERT(cpu_id < __kmp_affin_mask_size * CHAR_BIT);
 +    if (!KMP_CPU_ISSET(cpu_id, siblings))
 +      continue;
 +    if (threadInfo[i][pkgIdIndex] == UINT_MAX) {
 +      // Arbitrarily pick the first index we encounter, it only matters that
 +      // the value is the same for all siblings.
 +      threadInfo[i][pkgIdIndex] = idx;
 +    } else if (threadInfo[i][pkgIdIndex] != idx) {
 +      // Contradictory sibling lists.
 +      KMP_CPU_FREE(siblings);
 +      return false;
 +    }
 +  }
 +  KMP_ASSERT(threadInfo[idx][pkgIdIndex] != UINT_MAX);
 +  KMP_CPU_FREE(siblings);
 +  return true;
 +}
 +
 // Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
 // affinity map. On AIX, the map is obtained through system SRAD (Scheduler
 // Resource Allocation Domain).
@@ -3550,18 +3585,13 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
         return false;
       }
 -      // Check for missing fields.  The osId field must be there, and we
 -      // currently require that the physical id field is specified, also.
 +      // Check for missing fields.  The osId field must be there. The physical
 +      // id field will be checked later.
       if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
         CLEANUP_THREAD_INFO;
         *msg_id = kmp_i18n_str_MissingProcField;
         return false;
       }
 -      if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
 -        CLEANUP_THREAD_INFO;
 -        *msg_id = kmp_i18n_str_MissingPhysicalIDField;
 -        return false;
 -      }
       // Skip this proc if it is not included in the machine model.
       if (KMP_AFFINITY_CAPABLE() &&
@@ -3591,6 +3621,18 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
   }
   *line = 0;
 +  // At least on powerpc, Linux may return -1 for physical_package_id. Try
 +  // to reconstruct topology from core_siblings_list in that case.
 +  for (i = 0; i < num_avail; ++i) {
 +    if (threadInfo[i][pkgIdIndex] == UINT_MAX) {
 +      if (!__kmp_package_id_from_core_siblings_list(threadInfo, num_avail, i)) {
 +        CLEANUP_THREAD_INFO;
 +        *msg_id = kmp_i18n_str_MissingPhysicalIDField;
 +        return false;
 +      }
 +    }
 +  }
 +
 #if KMP_MIC && REDUCE_TEAM_SIZE
   unsigned teamSize = 0;
 #endif // KMP_MIC && REDUCE_TEAM_SIZE
 diff --git a/openmp/runtime/test/affinity/kmp-hw-subset.c b/openmp/runtime/test/affinity/kmp-hw-subset.c
 index 606fcdfbada9..0b49969bd3b1 100644
 --- a/openmp/runtime/test/affinity/kmp-hw-subset.c
 +++ b/openmp/runtime/test/affinity/kmp-hw-subset.c
@@ -25,7 +25,7 @@ static int compare_hw_subset_places(const place_list_t *openmp_places,
     expected_per_place = nthreads_per_core;
   } else {
     expected_total = nsockets;
 -    expected_per_place = ncores_per_socket;
 +    expected_per_place = ncores_per_socket * nthreads_per_core;
   }
   if (openmp_places->num_places != expected_total) {
     fprintf(stderr, "error: KMP_HW_SUBSET did not half each resource layer!\n");
 -- 
 2.47.0
--- a/SOURCES/0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch
+++ b/SOURCES/0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch
@ -0,0 +1,86 @@
 From ccc2b792e57d632bc887b226a4e7f0a8189eab8b Mon Sep 17 00:00:00 2001
 From: Josh Stone <jistone@redhat.com>
 Date: Mon, 4 Nov 2024 16:37:49 -0800
 Subject: [PATCH] [profile] Use base+vaddr for `__llvm_write_binary_ids` note
 pointers
 This function is always examining its own ELF headers in memory, but it
 was trying to use conditions between examining files or memory, and it
 wasn't accounting for LOAD offsets at runtime. This is especially bad if
 a loaded segment has additional padding that's not in the file offsets.
 Now we do a first scan of the program headers to figure out the runtime
 base address based on `PT_PHDR` and/or `PT_DYNAMIC` (else assume zero),
 similar to libc's `do_start`. Then each `PT_NOTE` pointer is simply the
 base plus the segments's `pt_vaddr`, which includes LOAD offsets.
 Fixes #114605
 ---
 .../lib/profile/InstrProfilingPlatformLinux.c | 40 ++++++++-----------
 1 file changed, 16 insertions(+), 24 deletions(-)
 diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
 index e2c06d51e0c6..c365129a0768 100644
 --- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
 +++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -194,41 +194,33 @@ static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
  */
 COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
   extern const ElfW(Ehdr) __ehdr_start __attribute__((visibility("hidden")));
 +  extern ElfW(Dyn) _DYNAMIC[] __attribute__((weak, visibility("hidden")));
 +
   const ElfW(Ehdr) *ElfHeader = &__ehdr_start;
   const ElfW(Phdr) *ProgramHeader =
       (const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff);
 +  /* Compute the added base address in case of position-independent code. */
 +  uintptr_t Base = 0;
 +  for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
 +    if (ProgramHeader[I].p_type == PT_PHDR)
 +      Base = (uintptr_t)ProgramHeader - ProgramHeader[I].p_vaddr;
 +    if (ProgramHeader[I].p_type == PT_DYNAMIC && _DYNAMIC)
 +      Base = (uintptr_t)_DYNAMIC - ProgramHeader[I].p_vaddr;
 +  }
 +
   int TotalBinaryIdsSize = 0;
 -  uint32_t I;
   /* Iterate through entries in the program header. */
 -  for (I = 0; I < ElfHeader->e_phnum; I++) {
 +  for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
     /* Look for the notes segment in program header entries. */
     if (ProgramHeader[I].p_type != PT_NOTE)
       continue;
     /* There can be multiple notes segment, and examine each of them. */
 -    const ElfW(Nhdr) * Note;
 -    const ElfW(Nhdr) * NotesEnd;
 -    /*
 -     * When examining notes in file, use p_offset, which is the offset within
 -     * the elf file, to find the start of notes.
 -     */
 -    if (ProgramHeader[I].p_memsz == 0 ||
 -        ProgramHeader[I].p_memsz == ProgramHeader[I].p_filesz) {
 -      Note = (const ElfW(Nhdr) *)((uintptr_t)ElfHeader +
 -                                  ProgramHeader[I].p_offset);
 -      NotesEnd = (const ElfW(Nhdr) *)((const char *)(Note) +
 -                                      ProgramHeader[I].p_filesz);
 -    } else {
 -      /*
 -       * When examining notes in memory, use p_vaddr, which is the address of
 -       * section after loaded to memory, to find the start of notes.
 -       */
 -      Note =
 -          (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_vaddr);
 -      NotesEnd =
 -          (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
 -    }
 +    const ElfW(Nhdr) *Note =
 +        (const ElfW(Nhdr) *)(Base + ProgramHeader[I].p_vaddr);
 +    const ElfW(Nhdr) *NotesEnd =
 +        (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
     int BinaryIdsSize = WriteBinaryIds(Writer, Note, NotesEnd);
     if (TotalBinaryIdsSize == -1)
 -- 
 2.47.0
--- a/SOURCES/0003-PATCH-clang-Don-t-install-static-libraries.patch
+++ b/SOURCES/0003-PATCH-clang-Don-t-install-static-libraries.patch
--- a/SOURCES/18-99273.patch
+++ b/SOURCES/18-99273.patch
@ -0,0 +1,893 @@
 From 91052169960477fbc39169c10f9fae3bec732510 Mon Sep 17 00:00:00 2001
 From: Carl Ritson <carl.ritson@amd.com>
 Date: Wed, 17 Jul 2024 15:07:42 +0900
 Subject: [PATCH 1/3] [AMDGPU] Implement workaround for GFX11.5 export priority
 On GFX11.5 shaders having completed exports need to execute/wait
 at a lower priority than shaders still executing exports.
 Add code to maintain normal priority of 2 for shaders that export
 and drop to priority 0 after exports.
 ---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |  15 +-
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 112 ++++++
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h  |   1 +
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |   3 +
 .../AMDGPU/required-export-priority.ll        | 344 ++++++++++++++++++
 .../AMDGPU/required-export-priority.mir       | 293 +++++++++++++++
 6 files changed, 765 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.mir
 diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
 index dfc8eaea66f7b..14fcf6a210a78 100644
 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td
 +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
   "Has restricted SOffset (immediate not supported)."
 >;
 +def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
 +  "HasRequiredExportPriority",
 +  "true",
 +  "Export priority must be explicitly manipulated on GFX11.5"
 +>;
 +
 //===------------------------------------------------------------===//
 // Subtarget Features (options and debugging)
 //===------------------------------------------------------------===//
@@ -1597,14 +1603,16 @@ def FeatureISAVersion11_5_0 : FeatureSet<
   !listconcat(FeatureISAVersion11_Common.Features,
     [FeatureSALUFloatInsts,
      FeatureDPPSrc1SGPR,
 -     FeatureVGPRSingleUseHintInsts])>;
 +     FeatureVGPRSingleUseHintInsts,
 +     FeatureRequiredExportPriority])>;
 def FeatureISAVersion11_5_1 : FeatureSet<
   !listconcat(FeatureISAVersion11_Common.Features,
     [FeatureSALUFloatInsts,
      FeatureDPPSrc1SGPR,
      FeatureVGPRSingleUseHintInsts,
 -     FeatureGFX11FullVGPRs])>;
 +     FeatureGFX11FullVGPRs,
 +     FeatureRequiredExportPriority])>;
 def FeatureISAVersion12 : FeatureSet<
   [FeatureGFX12,
 diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
 index a402fc6d7e611..a8b171aa82840 100644
 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
 +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -14,6 +14,7 @@
 #include "GCNSubtarget.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIMachineFunctionInfo.h"
 +#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/TargetParser/TargetParser.h"
@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
   fixWMMAHazards(MI);
   fixShift64HighRegBug(MI);
   fixVALUMaskWriteHazard(MI);
 +  fixRequiredExportPriority(MI);
 }
 bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
   return true;
 }
 +
 +static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
 +                               const SIInstrInfo &TII) {
 +  MachineBasicBlock &EntryMBB = MF->front();
 +  if (EntryMBB.begin() != EntryMBB.end()) {
 +    auto &EntryMI = *EntryMBB.begin();
 +    if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
 +        EntryMI.getOperand(0).getImm() >= Priority)
 +      return false;
 +  }
 +
 +  BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
 +      .addImm(Priority);
 +  return true;
 +}
 +
 +bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
 +  if (!ST.hasRequiredExportPriority())
 +    return false;
 +
 +  // Assume the following shader types will never have exports,
 +  // and avoid adding or adjusting S_SETPRIO.
 +  MachineBasicBlock *MBB = MI->getParent();
 +  MachineFunction *MF = MBB->getParent();
 +  auto CC = MF->getFunction().getCallingConv();
 +  switch (CC) {
 +  case CallingConv::AMDGPU_CS:
 +  case CallingConv::AMDGPU_CS_Chain:
 +  case CallingConv::AMDGPU_CS_ChainPreserve:
 +  case CallingConv::AMDGPU_KERNEL:
 +    return false;
 +  default:
 +    break;
 +  }
 +
 +  const int MaxPriority = 3;
 +  const int NormalPriority = 2;
 +  const int PostExportPriority = 0;
 +
 +  auto It = MI->getIterator();
 +  switch (MI->getOpcode()) {
 +  case AMDGPU::S_ENDPGM:
 +  case AMDGPU::S_ENDPGM_SAVED:
 +  case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
 +  case AMDGPU::SI_RETURN_TO_EPILOG:
 +    // Ensure shader with calls raises priority at entry.
 +    // This ensures correct priority if exports exist in callee.
 +    if (MF->getFrameInfo().hasCalls())
 +      return ensureEntrySetPrio(MF, NormalPriority, TII);
 +    return false;
 +  case AMDGPU::S_SETPRIO: {
 +    // Raise minimum priority unless in workaround.
 +    auto &PrioOp = MI->getOperand(0);
 +    int Prio = PrioOp.getImm();
 +    bool InWA = (Prio == PostExportPriority) &&
 +                (It != MBB->begin() && TII.isEXP(*std::prev(It)));
 +    if (InWA || Prio >= NormalPriority)
 +      return false;
 +    PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
 +    return true;
 +  }
 +  default:
 +    if (!TII.isEXP(*MI))
 +      return false;
 +    break;
 +  }
 +
 +  // Check entry priority at each export (as there will only be a few).
 +  // Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
 +  bool Changed = false;
 +  if (CC != CallingConv::AMDGPU_Gfx)
 +    Changed = ensureEntrySetPrio(MF, NormalPriority, TII);
 +
 +  auto NextMI = std::next(It);
 +  bool EndOfShader = false;
 +  if (NextMI != MBB->end()) {
 +    // Only need WA at end of sequence of exports.
 +    if (TII.isEXP(*NextMI))
 +      return Changed;
 +    // Assume appropriate S_SETPRIO after export means WA already applied.
 +    if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
 +        NextMI->getOperand(0).getImm() == PostExportPriority)
 +      return Changed;
 +    EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
 +  }
 +
 +  const DebugLoc &DL = MI->getDebugLoc();
 +
 +  // Lower priority.
 +  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
 +      .addImm(PostExportPriority);
 +
 +  if (!EndOfShader) {
 +    // Wait for exports to complete.
 +    BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
 +        .addReg(AMDGPU::SGPR_NULL)
 +        .addImm(0);
 +  }
 +
 +  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
 +  BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
 +
 +  if (!EndOfShader) {
 +    // Return to normal (higher) priority.
 +    BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
 +        .addImm(NormalPriority);
 +  }
 +
 +  return true;
 +}
 diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
 index 3ccca527c626b..f2a64ab48e180 100644
 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
 +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   bool fixWMMAHazards(MachineInstr *MI);
   bool fixShift64HighRegBug(MachineInstr *MI);
   bool fixVALUMaskWriteHazard(MachineInstr *MI);
 +  bool fixRequiredExportPriority(MachineInstr *MI);
   int checkMAIHazards(MachineInstr *MI);
   int checkMAIHazards908(MachineInstr *MI);
 diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
 index e5817594a4521..def89c785b855 100644
 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
 +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasVOPDInsts = false;
   bool HasVALUTransUseHazard = false;
   bool HasForceStoreSC0SC1 = false;
 +  bool HasRequiredExportPriority = false;
   // Dummy feature to use for assembler in tablegen.
   bool FeatureDisable = false;
@@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
 +  bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
 +
   /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
   /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
   bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
 diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
 new file mode 100644
 index 0000000000000..377902f3f0d1a
 --- /dev/null
 +++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -0,0 +1,344 @@
 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 +
 +define amdgpu_ps void @test_export_zeroes_f32() #0 {
 +; GCN-LABEL: test_export_zeroes_f32:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    v_mov_b32_e32 v0, 0
 +; GCN-NEXT:    exp mrt0 off, off, off, off
 +; GCN-NEXT:    exp mrt0 off, off, off, off done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_export_en_src0_f32() #0 {
 +; GCN-LABEL: test_export_en_src0_f32:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
 +; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
 +; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
 +; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
 +; GCN-NEXT:    exp mrt0 v3, off, off, off done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_gs void @test_export_gs() #0 {
 +; GCN-LABEL: test_export_gs:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
 +; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
 +; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
 +; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
 +; GCN-NEXT:    exp mrt0 off, v2, off, off done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_hs void @test_export_hs() #0 {
 +; GCN-LABEL: test_export_hs:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
 +; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
 +; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
 +; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
 +; GCN-NEXT:    exp mrt0 off, v2, off, off done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_gfx void @test_export_gfx(float %v) #0 {
 +; GCN-LABEL: test_export_gfx:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 +; GCN-NEXT:    v_mov_b32_e32 v1, 4.0
 +; GCN-NEXT:    v_mov_b32_e32 v2, 0.5
 +; GCN-NEXT:    v_mov_b32_e32 v3, 2.0
 +; GCN-NEXT:    exp mrt0 off, v3, off, off done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_waitcnt expcnt(0)
 +; GCN-NEXT:    s_setpc_b64 s[30:31]
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_cs void @test_export_cs() #0 {
 +; GCN-LABEL: test_export_cs:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
 +; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
 +; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
 +; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
 +; GCN-NEXT:    exp mrt0 off, v2, off, off done
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_kernel void @test_export_kernel() #0 {
 +; GCN-LABEL: test_export_kernel:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    v_mov_b32_e32 v0, 4.0
 +; GCN-NEXT:    v_mov_b32_e32 v1, 0.5
 +; GCN-NEXT:    v_mov_b32_e32 v2, 2.0
 +; GCN-NEXT:    v_mov_b32_e32 v3, 1.0
 +; GCN-NEXT:    exp mrt0 off, v2, off, off done
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_gfx void @test_no_export_gfx(float %v) #0 {
 +; GCN-LABEL: test_no_export_gfx:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 +; GCN-NEXT:    s_setpc_b64 s[30:31]
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_no_export_ps(float %v) #0 {
 +; GCN-LABEL: test_no_export_ps:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_endpgm
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
 +; GCN-LABEL: test_if_export_f32:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_mov_b32 s0, exec_lo
 +; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
 +; GCN-NEXT:    s_cbranch_execz .LBB9_2
 +; GCN-NEXT:  ; %bb.1: ; %exp
 +; GCN-NEXT:    exp mrt0 v1, v2, v3, v4
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:  .LBB9_2: ; %end
 +; GCN-NEXT:    s_endpgm
 +  %cc = icmp eq i32 %flag, 0
 +  br i1 %cc, label %end, label %exp
 +
 +exp:
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
 +  br label %end
 +
 +end:
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
 +; GCN-LABEL: test_if_export_vm_f32:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_mov_b32 s0, exec_lo
 +; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
 +; GCN-NEXT:    s_cbranch_execz .LBB10_2
 +; GCN-NEXT:  ; %bb.1: ; %exp
 +; GCN-NEXT:    exp mrt0 v1, v2, v3, v4
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:  .LBB10_2: ; %end
 +; GCN-NEXT:    s_endpgm
 +  %cc = icmp eq i32 %flag, 0
 +  br i1 %cc, label %end, label %exp
 +
 +exp:
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
 +  br label %end
 +
 +end:
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
 +; GCN-LABEL: test_if_export_done_f32:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_mov_b32 s0, exec_lo
 +; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
 +; GCN-NEXT:    s_cbranch_execz .LBB11_2
 +; GCN-NEXT:  ; %bb.1: ; %exp
 +; GCN-NEXT:    exp mrt0 v1, v2, v3, v4 done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:  .LBB11_2: ; %end
 +; GCN-NEXT:    s_endpgm
 +  %cc = icmp eq i32 %flag, 0
 +  br i1 %cc, label %end, label %exp
 +
 +exp:
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
 +  br label %end
 +
 +end:
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
 +; GCN-LABEL: test_if_export_vm_done_f32:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_mov_b32 s0, exec_lo
 +; GCN-NEXT:    v_cmpx_ne_u32_e32 0, v0
 +; GCN-NEXT:    s_cbranch_execz .LBB12_2
 +; GCN-NEXT:  ; %bb.1: ; %exp
 +; GCN-NEXT:    exp mrt0 v1, v2, v3, v4 done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:  .LBB12_2: ; %end
 +; GCN-NEXT:    s_endpgm
 +  %cc = icmp eq i32 %flag, 0
 +  br i1 %cc, label %end, label %exp
 +
 +exp:
 +  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
 +  br label %end
 +
 +end:
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
 +; GCN-LABEL: test_export_pos_before_param_across_load:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen
 +; GCN-NEXT:    v_mov_b32_e32 v1, 0
 +; GCN-NEXT:    v_mov_b32_e32 v2, 1.0
 +; GCN-NEXT:    v_mov_b32_e32 v3, 0.5
 +; GCN-NEXT:    s_waitcnt vmcnt(0)
 +; GCN-NEXT:    exp pos0 v1, v1, v1, v0 done
 +; GCN-NEXT:    exp invalid_target_32 v2, v2, v2, v2
 +; GCN-NEXT:    exp invalid_target_33 v2, v2, v2, v3
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_endpgm
 +  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
 +  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
 +  %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
 +  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
 +; GCN-LABEL: test_export_across_store_load:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    v_mov_b32_e32 v2, 24
 +; GCN-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
 +; GCN-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 +; GCN-NEXT:    v_cndmask_b32_e64 v0, v2, 8, vcc_lo
 +; GCN-NEXT:    v_mov_b32_e32 v2, 0
 +; GCN-NEXT:    scratch_store_b32 v0, v1, off
 +; GCN-NEXT:    scratch_load_b32 v0, off, off
 +; GCN-NEXT:    v_mov_b32_e32 v1, 1.0
 +; GCN-NEXT:    exp pos0 v2, v2, v2, v1 done
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_waitcnt_expcnt null, 0x0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_waitcnt vmcnt(0)
 +; GCN-NEXT:    exp invalid_target_32 v0, v2, v1, v2
 +; GCN-NEXT:    exp invalid_target_33 v0, v2, v1, v2
 +; GCN-NEXT:    s_setprio 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_nop 0
 +; GCN-NEXT:    s_endpgm
 +  %data0 = alloca <4 x float>, align 8, addrspace(5)
 +  %data1 = alloca <4 x float>, align 8, addrspace(5)
 +  %cmp = icmp eq i32 %idx, 1
 +  %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
 +  store float %v, ptr addrspace(5) %data, align 8
 +  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
 +  %load0 = load float, ptr addrspace(5) %data0, align 8
 +  call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
 +  call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_export_in_callee(float %v) #0 {
 +; GCN-LABEL: test_export_in_callee:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_getpc_b64 s[0:1]
 +; GCN-NEXT:    s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
 +; GCN-NEXT:    s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
 +; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
 +; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 +; GCN-NEXT:    s_mov_b32 s32, 0
 +; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 +; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 +; GCN-NEXT:    s_endpgm
 +  %x = fadd float %v, 1.0
 +  call void @test_export_gfx(float %x)
 +  ret void
 +}
 +
 +define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
 +; GCN-LABEL: test_export_in_callee_prio:
 +; GCN:       ; %bb.0:
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_mov_b32 s32, 0
 +; GCN-NEXT:    v_add_f32_e32 v0, 1.0, v0
 +; GCN-NEXT:    s_setprio 2
 +; GCN-NEXT:    s_getpc_b64 s[0:1]
 +; GCN-NEXT:    s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
 +; GCN-NEXT:    s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
 +; GCN-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
 +; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 +; GCN-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 +; GCN-NEXT:    s_endpgm
 +  %x = fadd float %v, 1.0
 +  call void @llvm.amdgcn.s.setprio(i16 0)
 +  call void @test_export_gfx(float %x)
 +  ret void
 +}
 +
 +declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
 +declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
 +declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
 +declare void @llvm.amdgcn.s.setprio(i16)
 +
 +attributes #0 = { nounwind }
 +attributes #1 = { nounwind inaccessiblememonly }
 +attributes #2 = { nounwind readnone }
 diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.mir b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
 new file mode 100644
 index 0000000000000..eee04468036e5
 --- /dev/null
 +++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
@@ -0,0 +1,293 @@
 +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=post-RA-hazard-rec -verify-machineinstrs  %s -o - | FileCheck -check-prefixes=GFX1150 %s
 +
 +--- |
 +  define amdgpu_ps void @end_of_shader() {
 +    ret void
 +  }
 +  define amdgpu_ps void @end_of_shader_return_to_epilogue() {
 +    ret void
 +  }
 +  define amdgpu_ps void @end_of_block() {
 +    ret void
 +  }
 +  define amdgpu_ps void @start_of_block() {
 +    ret void
 +  }
 +  define amdgpu_ps void @block_of_exports() {
 +    ret void
 +  }
 +  define amdgpu_ps void @sparse_exports() {
 +    ret void
 +  }
 +  define amdgpu_ps void @existing_setprio_1() {
 +    ret void
 +  }
 +  define amdgpu_ps void @existing_setprio_2() {
 +    ret void
 +  }
 +...
 +
 +---
 +name: end_of_shader
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  bb.0:
 +    liveins: $vgpr0
 +    ; GFX1150-LABEL: name: end_of_shader
 +    ; GFX1150: liveins: $vgpr0
 +    ; GFX1150-NEXT: {{  $}}
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_ENDPGM 0
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    S_ENDPGM 0
 +...
 +
 +---
 +name: end_of_shader_return_to_epilogue
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  bb.0:
 +    liveins: $vgpr0
 +    ; GFX1150-LABEL: name: end_of_shader_return_to_epilogue
 +    ; GFX1150: liveins: $vgpr0
 +    ; GFX1150-NEXT: {{  $}}
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: SI_RETURN_TO_EPILOG $vgpr0
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    SI_RETURN_TO_EPILOG $vgpr0
 +...
 +
 +---
 +name: end_of_block
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  ; GFX1150-LABEL: name: end_of_block
 +  ; GFX1150: bb.0:
 +  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +  ; GFX1150-NEXT:   S_SETPRIO 0
 +  ; GFX1150-NEXT:   S_WAITCNT_EXPCNT $sgpr_null, 0
 +  ; GFX1150-NEXT:   S_NOP 0
 +  ; GFX1150-NEXT:   S_NOP 0
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT: bb.1:
 +  ; GFX1150-NEXT:   S_ENDPGM 0
 +  bb.0:
 +    liveins: $vgpr0
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +
 +  bb.1:
 +    S_ENDPGM 0
 +...
 +
 +---
 +name: start_of_block
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  ; GFX1150-LABEL: name: start_of_block
 +  ; GFX1150: bb.0:
 +  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT: bb.1:
 +  ; GFX1150-NEXT:   successors: %bb.2(0x80000000)
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +  ; GFX1150-NEXT:   S_SETPRIO 0
 +  ; GFX1150-NEXT:   S_WAITCNT_EXPCNT $sgpr_null, 0
 +  ; GFX1150-NEXT:   S_NOP 0
 +  ; GFX1150-NEXT:   S_NOP 0
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT: bb.2:
 +  ; GFX1150-NEXT:   S_ENDPGM 0
 +  bb.0:
 +    liveins: $vgpr0
 +
 +  bb.1:
 +    liveins: $vgpr0
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +
 +  bb.2:
 +    S_ENDPGM 0
 +...
 +
 +---
 +name: block_of_exports
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  bb.0:
 +    liveins: $vgpr0
 +    ; GFX1150-LABEL: name: block_of_exports
 +    ; GFX1150: liveins: $vgpr0
 +    ; GFX1150-NEXT: {{  $}}
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_ENDPGM 0
 +    EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    S_ENDPGM 0
 +...
 +
 +---
 +name: sparse_exports
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  bb.0:
 +    liveins: $vgpr0
 +    ; GFX1150-LABEL: name: sparse_exports
 +    ; GFX1150: liveins: $vgpr0
 +    ; GFX1150-NEXT: {{  $}}
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
 +    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
 +    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_ENDPGM 0
 +    EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
 +    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    S_ENDPGM 0
 +...
 +
 +---
 +name: existing_setprio_1
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  ; GFX1150-LABEL: name: existing_setprio_1
 +  ; GFX1150: bb.0:
 +  ; GFX1150-NEXT:   successors: %bb.1(0x80000000)
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT:   $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT: bb.1:
 +  ; GFX1150-NEXT:   successors: %bb.2(0x80000000)
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   S_SETPRIO 3
 +  ; GFX1150-NEXT:   $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT: bb.2:
 +  ; GFX1150-NEXT:   successors: %bb.3(0x80000000)
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   S_SETPRIO 3
 +  ; GFX1150-NEXT:   $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
 +  ; GFX1150-NEXT:   S_SETPRIO 2
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT: bb.3:
 +  ; GFX1150-NEXT:   liveins: $vgpr0
 +  ; GFX1150-NEXT: {{  $}}
 +  ; GFX1150-NEXT:   EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +  ; GFX1150-NEXT:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +  ; GFX1150-NEXT:   S_SETPRIO 0
 +  ; GFX1150-NEXT:   S_NOP 0
 +  ; GFX1150-NEXT:   S_NOP 0
 +  ; GFX1150-NEXT:   S_ENDPGM 0
 +  bb.0:
 +    liveins: $vgpr0
 +    $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
 +
 +  bb.1:
 +    liveins: $vgpr0
 +    S_SETPRIO 3
 +    $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
 +    S_SETPRIO 0
 +
 +  bb.2:
 +    liveins: $vgpr0
 +    S_SETPRIO 1
 +    $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
 +    S_SETPRIO 0
 +
 +  bb.3:
 +    liveins: $vgpr0
 +    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    S_ENDPGM 0
 +...
 +
 +---
 +name: existing_setprio_2
 +tracksRegLiveness: true
 +liveins:
 +  - { reg: '$vgpr0' }
 +body: |
 +  bb.0:
 +    liveins: $vgpr0
 +    ; GFX1150-LABEL: name: existing_setprio_2
 +    ; GFX1150: liveins: $vgpr0
 +    ; GFX1150-NEXT: {{  $}}
 +    ; GFX1150-NEXT: S_SETPRIO 3
 +    ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    ; GFX1150-NEXT: S_SETPRIO 0
 +    ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_NOP 0
 +    ; GFX1150-NEXT: S_SETPRIO 2
 +    ; GFX1150-NEXT: S_SETPRIO 3
 +    ; GFX1150-NEXT: S_ENDPGM 0
 +    S_SETPRIO 3
 +    EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
 +    S_SETPRIO 3
 +    S_ENDPGM 0
 +...
 From 8ea44e65f2c19facff751aeb2ac960f907fb210f Mon Sep 17 00:00:00 2001
 From: Carl Ritson <carl.ritson@amd.com>
 Date: Wed, 17 Jul 2024 16:18:02 +0900
 Subject: [PATCH 2/3] Remove -verify-machineinstrs from test.
 ---
 llvm/test/CodeGen/AMDGPU/required-export-priority.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
 index 377902f3f0d1a..ebc209bd4d451 100644
 --- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
 +++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 -; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s
 define amdgpu_ps void @test_export_zeroes_f32() #0 {
 ; GCN-LABEL: test_export_zeroes_f32:
--- a/SOURCES/macros.clang
+++ b/SOURCES/macros.clang
--- a/SOURCES/release-keys.asc
+++ b/SOURCES/release-keys.asc
@ -102,29 +102,3 @@ yWfeofTJ7PhKzoXM2Y/rRFoM5gNh1RVA19ngLT5Jwiof8fPZvHJ/9ZkHn+O7eMNm
 m5++gYza3pnn2/PoGpGGAKok+sfJiq5Tb7RUefyJTeZiyTZ/XJrA
 =tMzl
 -----END PGP PUBLIC KEY BLOCK-----
 -----BEGIN PGP PUBLIC KEY BLOCK-----
 mDMEaMgtRhYJKwYBBAHaRw8BAQdA4NRjJPhVd56sOM+QmTbZKkRT3bYbgg6+Bxed
 CELeGp+0JUN1bGxlbiBSaG9kZXMgPGN1bGxlbi5yaG9kZXNAYXJtLmNvbT6IkwQT
 FgoAOxYhBHEEbR6cZla91hFxhz6Dur9KT56FBQJoyC1GAhsDBQsJCAcCAiICBhUK
 CQgLAgQWAgMBAh4HAheAAAoJED6Dur9KT56FkVwA/RLNMBHrjXoAKpRm1iIjiC6w
 gLRqGOnj1qAqPqgntMmmAQCQ2lGpw46rvh88ng84IGsRF0JlTAYb6SR/YYNsQyah
 Arg4BGjILUYSCisGAQQBl1UBBQEBB0B48hCLw13kduwibGDGoIax0BIa+f66IUC+
 HhNlucsjbgMBCAeIeAQYFgoAIBYhBHEEbR6cZla91hFxhz6Dur9KT56FBQJoyC1G
 AhsMAAoJED6Dur9KT56FVssBAO1lL/S2cU65XFHgbjc6crwljDrD7PYbxBA7hDpi
 pC4ZAP98rK1hGQ5wxpeiJ0heZ8zhpdUwEeymIDBaIcwgrJRFBQ==
 =HcEB
 -----END PGP PUBLIC KEY BLOCK-----
 -----BEGIN PGP PUBLIC KEY BLOCK-----
 mDMEaMg2hBYJKwYBBAHaRw8BAQdA2J814YnhPQSdsyjwx8VxZ7AitqCnns9lzvkx
 HX9lWMW0JERvdWdsYXMgWXVuZyA8ZG91Z2xhcy55dW5nQHNvbnkuY29tPoiTBBMW
 CgA7FiEE/7M2iYDz5rtXNxRaMWxW0GTKy6UFAmjINoQCGwMFCwkIBwICIgIGFQoJ
 CAsCBBYCAwECHgcCF4AACgkQMWxW0GTKy6XjYgEApJ7p+o7EAeaaOdO2f440KDfg
 t7haaBLaxr5fiaSKjkYA+gLDxWOh39Y84upf23qMmpSTZ3SK5LvJtBTVtV7AEX0B
 uDgEaMg2hBIKKwYBBAGXVQEFAQEHQL3CL6jHZAakhtLLj2Ks34u7ItY/7USl/bYk
 f/+mZTJMAwEIB4h4BBgWCgAgFiEE/7M2iYDz5rtXNxRaMWxW0GTKy6UFAmjINoQC
 GwwACgkQMWxW0GTKy6VTOgEArDn9bg58W7bfZfVfneJJbIeICEf3NN9IovbRbAOB
 ax0A/RxtrG4qowLlo907vb25ITOa1hBoheSV2wNoDaDUhFEF
 =JQ8u
 -----END PGP PUBLIC KEY BLOCK-----
--- a/SPECS/llvm.spec
+++ b/SPECS/llvm.spec
--- a/1013
+++ b/1013
--- a/gating.yaml
+++ b/gating.yaml
@ -1,31 +0,0 @@
 --- !Policy
 product_versions:
  - fedora-*
 decision_contexts:
  - bodhi_update_push_testing
  - bodhi_update_push_stable
  - bodhi_update_push_stable_critpath
 subject_type: koji_build
 rules:
  - !PassingTestCaseRule {test_case_name: fedora-ci.koji-build./tests/build-gating.functional}
  - !PassingTestCaseRule {test_case_name: fedora-ci.koji-build./tests/lld-alternatives.functional}
  - !PassingTestCaseRule {test_case_name: fedora-ci.koji-build.installability.functional}
 --- !Policy
 product_versions:
  # The version number here should match the current rawhide release.
  - fedora-44
 decision_contexts:
  - bodhi_update_push_stable
  - bodhi_update_push_stable_critpath
 rules:
  - !PassingTestCaseRule {test_case_name: fedora-ci.koji-build.rpmdeplint.functional}
  - !PassingTestCaseRule {test_case_name: fedora-ci.koji-build./tests/kernel-ark-build.functional}
 --- !Policy
 product_versions:
  - rhel-9
  - rhel-10
 decision_context: osci_compose_gate
 rules:
  - !PassingTestCaseRule {test_case_name: baseos-ci.brew-build.tier0-tmt-x86_64-aarch64.functional}
  - !PassingTestCaseRule {test_case_name: baseos-ci.brew-build.tier0-tmt-s390x-ppc64le.functional}
  - !PassingTestCaseRule {test_case_name: osci.brew-build.rebuild.validation}
--- a/llvm.rpmlintrc
+++ b/llvm.rpmlintrc
@ -1,49 +0,0 @@
 ## LLVM #############################################################
 # This library has no dependencies.
 addFilter("llvm-libs.x86_64: E: shared-lib-without-dependency-information /usr/lib64/libRemarks.so.[0-9]+")
 addFilter("llvm-googletest.x86_64: W: devel-file-in-non-devel-package")
 # same for llvm-test utilities
 addFilter("llvm-test.x86_64: W: no-manual-page-for-binary")
 # Don't warn about libs in llvm-libs
 addFilter("llvm-libs.x86_64: W: devel-file-in-non-devel-package /usr/lib64/lib")
 # These is ok in the llvm gold plugin
 addFilter("llvm-libs.x86_64: W: shared-lib-calls-exit /usr/lib64/LLVMgold.so")
 addFilter("llvm-libs.x86_64: W: no-soname /usr/lib64/LLVMgold.so")
 # These are without documentation
 addFilter("llvm-googletest.x86_64: W: no-documentation")
 addFilter("llvm-libs.x86_64: W: no-documentation")
 addFilter("llvm-static.x86_64: W: no-documentation")
 addFilter("llvm-test.x86_64: W: no-documentation")
 ## COMPILER-RT ######################################################
 # This is a devel package
 addFilter("W: devel-file-in-non-devel-package")
 # These symlinks are dangling on x64_64
 addFilter("compiler-rt.x86_64: W: dangling-relative-symlink /usr/lib64/clang/[0-9]+.[0-9]+.[0-9]+/")
 addFilter("E: hardcoded-library-path in ../../lib/clang/")
 ## LIBOMP ###########################################################
 ## CLANG ############################################################
 clang needs libstdc++-devel installed in order to compile c++ programs.
 addFilter("E: devel-dependency libstdc\+\+-devel")
 addFilter("E: explicit-lib-dependency libstdc\+\+-devel")
 # clang installs libear to /usr/lib on all arches, so we have to use
 # a hard-coded /usr/lib path in order to move it to the python3 sitelib.
 addFilter("E: hardcoded-library-path in %{_prefix}/lib/{libear")
--- a/rpminspect.yaml
+++ b/rpminspect.yaml
@ -1,29 +0,0 @@
 ---
 inspections:
    # We need to disable abidiff due to abidiff+testing-farm issues. Ref link:
    # https://docs.testing-farm.io/Testing%20Farm/0.1/errors.html#TFE-1
    abidiff: off
 badfuncs:
    # For compiler-rt, we allow the following forbidden functions:
    # - gethostbyname
    # - gethostbyname2
    # - gethostbyaddr
    # - inet_aton
    # These are never actually used, and are installed just as interceptors.
    allowed:
        /usr/lib*/clang/*/lib/*/libclang_rt.?san.so:
            - gethostbyaddr
            - gethostbyname
            - gethostbyname2
            - inet_aton
        /usr/lib*/clang/*/lib/*/libclang_rt.memprof.so:
            - gethostbyaddr
            - gethostbyname
            - gethostbyname2
            - inet_aton
 unicode:
    ignore:
        # Ignore bidirectional unicode sequence documentation file
        - llvm-project-*.src/clang-tools-extra/docs/clang-tidy/checks/misc/misleading-bidirectional.rst
--- a/4
+++ b/4
@ -1,4 +0,0 @@
 SHA512 (llvm-project-21.1.3.src.tar.xz) = d3058e7c18ada2a6a6192c7e75970406520e0d2ba390dba3b89e99f05959198fd2976d38c200f8e6af37fb569d866b6367bf6e0e249fe4b340dfab74499e5723
 SHA512 (llvm-project-21.1.3.src.tar.xz.sig) = d218a4071451e32a77890dd2e7de7a3b8a310ca85c7e6d90b88d85bad128979cf6866c9d772b880b50da2ec117832e77ba162049478c1deb7b0299cae008151a
 SHA512 (llvm-project-20.1.8.src.tar.xz) = f330e72e6a1da468569049437cc0ba7a41abb816ccece7367189344f7ebfef730f4788ac7af2bef0aa8a49341c15ab1d31e941ffa782f264d11fe0dc05470773
 SHA512 (llvm-project-20.1.8.src.tar.xz.sig) = d74369bdb4d1b82775161ea53c9c5f3a23ce810f4df5ff617123023f9d8ce720e7d6ecc9e17f8ebd39fd9e7a9de79560abdf2ffe73bcb907a43148d43665d619
--- a/tests/README.md
+++ b/tests/README.md
@ -1,9 +0,0 @@
 # Gating testplans for LLVM
 The tests for LLVM are in a separate repo:
 * llvm:  https://gitlab.com/redhat/centos-stream/tests/llvm.git/
 This directory should contain only fmf plans (such as build-gating.fmf) which import
 the tests from the tests repo. This can be done using the "url" parameter of the
 plan's "discover" step. Reference: https://tmt.readthedocs.io/en/stable/spec/plans.html#fmf
--- a/tests/build-gating.fmf
+++ b/tests/build-gating.fmf
@ -1,51 +0,0 @@
 #
 # Build/PR gating tests for *LLVM 19*
 #
 # Compatible with various LLVM 19 distributions:
 #
 #   * Fedora (ursine packages)
 #   * CentOS 10 stream (ursine packages)
 #   * Centos 9 stream (ursine packages)
 #   * RHEL-10 (ursine packages)
 #   * RHEL-9 (ursine packages)
 #   * RHEL-8 (Red Hat module)
 #
 summary: LLVM tests for build/PR gating
 adjust:
  - because: "Plan to be ran when either executed locally, or executed by CI system to gate a build or PR."
    when: >-
      trigger is defined
      and trigger != commit
      and trigger != build
    enabled: false
  # Unfortunatelly, TMT does not support more declarative approach, we need to run commands on our own.
  - because: "On CentOS, CRB must be enabled to provide rarer packages"
    prepare+:
      - name: Enable CRB
        how: shell
        script: dnf config-manager --set-enabled crb
    when: >-
      distro == centos
  # Unfortunately, TMT does not support more declarative approach, we need to run commands on our own.
  - because: "On RHEL, CRB must be enabled to provide rarer packages"
    prepare+:
      - name: Enable CRB
        how: shell
        script: dnf config-manager --set-enabled rhel-CRB
    when: >-
      distro == rhel-9
      or distro == rhel-8
 discover:
  how: fmf
  url: https://gitlab.com/redhat/centos-stream/tests/llvm.git
  ref: main
  filter: "tag:-spoils-installation & tag:-not-in-default"
 execute:
    how: tmt
 provision:
  hardware:
    memory: ">= 4 GiB"
--- a/tests/lld-alternatives.fmf
+++ b/tests/lld-alternatives.fmf
@ -1,20 +0,0 @@
 summary: LLD tests for build/PR gating, testing alternatives and spoiling the installation
 adjust:
  - because: "Plan to be ran when either executed locally, or executed by CI system to gate a build or PR."
    when: >-
      trigger is defined
      and trigger != commit
      and trigger != build
    enabled: false
 discover:
    - name: lld-alternatives
      how: fmf
      url: https://gitlab.com/redhat/centos-stream/tests/llvm.git
      ref: main
      test: ld-alternative
 execute:
    how: tmt
 provision:
  hardware:
    memory: ">= 4 GiB"
		`@ -0,0 +1,2 @@`
							`6e4033d8b76a89e82220b5445bff58cdce64300e SOURCES/llvm-project-19.1.7.src.tar.xz`
							`48f839c6e47a34a1138862a9db6274c150179532 SOURCES/llvm-project-19.1.7.src.tar.xz.sig`