Backport fixes that didn't make 20.1.8

Resolves: RHEL-106426
2025-07-29 14:57:05 -07:00 · 2025-07-29 14:57:05 -07:00 · f610107fb1
commit f610107fb1
parent 9e2cafe0f6
5 changed files with 349 additions and 3 deletions
--- a/0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch
+++ b/0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch
@ -0,0 +1,26 @@
+From ffc7d5ae2d79f98967943fabb2abfbc1b1e047fd Mon Sep 17 00:00:00 2001
+From: Douglas Yung <douglas.yung@sony.com>
+Date: Tue, 24 Jun 2025 04:08:34 +0000
+Subject: [PATCH] Add `REQUIRES: asserts` to test added in #145149 because it
+ uses the `-debug-only=` flag.
+
+This should fix the test failure when building without asserts.
+---
+ llvm/test/CodeGen/PowerPC/pr141642.ll | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/llvm/test/CodeGen/PowerPC/pr141642.ll b/llvm/test/CodeGen/PowerPC/pr141642.ll
+index 38a706574786..61bda4dfaf53 100644
+--- a/llvm/test/CodeGen/PowerPC/pr141642.ll
+++ b/llvm/test/CodeGen/PowerPC/pr141642.ll
+@@ -2,6 +2,7 @@
+ ; RUN:  FileCheck %s
+ ; CHECK-NOT: lxvdsx
+ ; CHECK-NOT: LD_SPLAT
+; REQUIRES: asserts
+ 
+ define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr {
+ entry:
+-- 
+2.49.0
+
--- a/0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch
+++ b/0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch
@ -0,0 +1,143 @@
+From c76137f1cfd5758f6889236d49a65f059e6432ff Mon Sep 17 00:00:00 2001
+From: weiguozhi <57237827+weiguozhi@users.noreply.github.com>
+Date: Thu, 15 May 2025 09:27:25 -0700
+Subject: [PATCH] [CodeGenPrepare] Make sure instruction get from SunkAddrs is
+ before MemoryInst (#139303)
+
+Function optimizeBlock may do optimizations on a block for multiple
+times. In the first iteration of the loop, MemoryInst1 may generate a
+sunk instruction and store it into SunkAddrs. In the second iteration of
+the loop, MemoryInst2 may use the same address and then it can reuse the
+sunk instruction stored in SunkAddrs, but MemoryInst2 may be before
+MemoryInst1 and the corresponding sunk instruction. In order to avoid
+use before def error, we need to find appropriate insert position for the
+ sunk instruction.
+
+Fixes #138208.
+
+(cherry picked from commit 59c6d70ed8120b8864e5f796e2bf3de5518a0ef0)
+---
+ llvm/lib/CodeGen/CodeGenPrepare.cpp           | 41 ++++++++++++++---
+ .../CodeGenPrepare/X86/sink-addr-reuse.ll     | 44 +++++++++++++++++++
+ 2 files changed, 80 insertions(+), 5 deletions(-)
+ create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
+
+diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
+index 088062afab17..f779f4b782ae 100644
+--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
+@@ -5728,6 +5728,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
+   return false;
+ }
+ 
+// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
+// is the first instruction that will use Addr. So we need to find the first
+// user of Addr in current BB.
+static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst,
+                                          Value *SunkAddr) {
+  if (Addr->hasOneUse())
+    return MemoryInst->getIterator();
+
+  // We already have a SunkAddr in current BB, but we may need to insert cast
+  // instruction after it.
+  if (SunkAddr) {
+    if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
+      return std::next(AddrInst->getIterator());
+  }
+
+  // Find the first user of Addr in current BB.
+  Instruction *Earliest = MemoryInst;
+  for (User *U : Addr->users()) {
+    Instruction *UserInst = dyn_cast<Instruction>(U);
+    if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
+      if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
+        continue;
+      if (UserInst->comesBefore(Earliest))
+        Earliest = UserInst;
+    }
+  }
+  return Earliest->getIterator();
+}
+
+ /// Sink addressing mode computation immediate before MemoryInst if doing so
+ /// can be done without increasing register pressure.  The need for the
+ /// register pressure constraint means this can end up being an all or nothing
+@@ -5852,11 +5881,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+     return Modified;
+   }
+ 
+-  // Insert this computation right after this user.  Since our caller is
+-  // scanning from the top of the BB to the bottom, reuse of the expr are
+-  // guaranteed to happen later.
+-  IRBuilder<> Builder(MemoryInst);
+-
+   // Now that we determined the addressing expression we want to use and know
+   // that we have to sink it into this block.  Check to see if we have already
+   // done this for some other load/store instr in this block.  If so, reuse
+@@ -5867,6 +5891,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+ 
+   Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+   Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
+
+  // The current BB may be optimized multiple times, we can't guarantee the
+  // reuse of Addr happens later, call findInsertPos to find an appropriate
+  // insert position.
+  IRBuilder<> Builder(MemoryInst->getParent(),
+                      findInsertPos(Addr, MemoryInst, SunkAddr));
+
+   if (SunkAddr) {
+     LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
+                       << " for " << *MemoryInst << "\n");
+diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
+new file mode 100644
+index 000000000000..019f31140655
+--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
+@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+declare void @g(ptr)
+
+; %load and %load5 use the same address, %load5 is optimized first, %load is
+; optimized later and reuse the same address computation instruction. We must
+; make sure not to generate use before def error.
+
+define void @f(ptr %arg) {
+; CHECK-LABEL: define void @f(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
+; CHECK-NEXT:    call void @g(ptr [[GETELEMENTPTR]])
+; CHECK-NEXT:    [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
+; CHECK-NEXT:    [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56
+; CHECK-NEXT:    [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8
+; CHECK-NEXT:    [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0)
+; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
+; CHECK-NEXT:    ret void
+;
+bb:
+  %getelementptr = getelementptr i8, ptr %arg, i64 -64
+  %getelementptr1 = getelementptr i8, ptr %arg, i64 -56
+  call void @g(ptr %getelementptr)
+  br label %bb3
+
+bb3:
+  %load = load ptr, ptr %getelementptr, align 8
+  %load4 = load i32, ptr %getelementptr1, align 8
+  %load5 = load ptr, ptr %getelementptr, align 8
+  %add = add i32 1, 0
+  %icmp = icmp eq i32 %add, 0
+  br i1 %icmp, label %bb7, label %bb7
+
+bb7:
+  ret void
+}
+-- 
+2.49.0
+
--- a/0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch
+++ b/0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch
@ -0,0 +1,67 @@
+From 735d721de451067c3a618b309703d0b8beb9cacc Mon Sep 17 00:00:00 2001
+From: Wael Yehia <wmyehia2001@yahoo.com>
+Date: Mon, 23 Jun 2025 13:22:33 -0400
+Subject: [PATCH] [PowerPC] Fix handling of undefs in the
+ PPC::isSplatShuffleMask query (#145149)
+
+Currently, the query assumes that a single undef byte implies the rest of
+the `EltSize - 1` bytes are undefs, but that's not always true.
+e.g. isSplatShuffleMask(
+<0,1,2,3,4,5,6,7,undef,undef,undef,undef,0,1,2,3>, 8) should return
+false.
+
+---------
+
+Co-authored-by: Wael Yehia <wyehia@ca.ibm.com>
+---
+ llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 13 +++++++++----
+ llvm/test/CodeGen/PowerPC/pr141642.ll       | 13 +++++++++++++
+ 2 files changed, 22 insertions(+), 4 deletions(-)
+ create mode 100644 llvm/test/CodeGen/PowerPC/pr141642.ll
+
+diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+index 421a808de667..88c6fe632d26 100644
+--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+@@ -2242,10 +2242,15 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
+       return false;
+ 
+   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
+-    if (N->getMaskElt(i) < 0) continue;
+-    for (unsigned j = 0; j != EltSize; ++j)
+-      if (N->getMaskElt(i+j) != N->getMaskElt(j))
+-        return false;
+    // An UNDEF element is a sequence of UNDEF bytes.
+    if (N->getMaskElt(i) < 0) {
+      for (unsigned j = 1; j != EltSize; ++j)
+        if (N->getMaskElt(i + j) >= 0)
+          return false;
+    } else
+      for (unsigned j = 0; j != EltSize; ++j)
+        if (N->getMaskElt(i + j) != N->getMaskElt(j))
+          return false;
+   }
+   return true;
+ }
+diff --git a/llvm/test/CodeGen/PowerPC/pr141642.ll b/llvm/test/CodeGen/PowerPC/pr141642.ll
+new file mode 100644
+index 000000000000..38a706574786
+--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr141642.ll
+@@ -0,0 +1,13 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O0 -debug-only=selectiondag -o - < %s 2>&1 | \
+; RUN:  FileCheck %s
+; CHECK-NOT: lxvdsx
+; CHECK-NOT: LD_SPLAT
+
+define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr {
+entry:
+  %ld = load <2 x i32>, ptr %packed_in, align 2
+  %shuf = shufflevector <2 x i32> %ld, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 0>
+  %ie = insertelement <4 x i32> %shuf, i32 7, i32 2
+  store <4 x i32> %shuf, ptr %packed_in, align 2
+  ret void
+}
+-- 
+2.49.0
+
--- a/21-146424.patch
+++ b/21-146424.patch
@ -0,0 +1,94 @@
+From eba58195932f37fb461ae17c69fc517181b99c9a Mon Sep 17 00:00:00 2001
+From: Paul Murphy <paumurph@redhat.com>
+Date: Mon, 30 Jun 2025 10:13:37 -0500
+Subject: [PATCH] [PowerPC] fix lowering of SPILL_CRBIT on pwr9 and pwr10
+
+If a copy exists between creation of a crbit and a spill, machine-cp
+may delete the copy since it seems unaware of the relation between a cr
+and crbit. A fix was previously made for the generic ppc64 lowering. It
+should be applied to the pwr9 and pwr10 variants too.
+
+Likewise, relax and extend the pwr8 test to verify pwr9 and pwr10
+codegen too.
+
+This fixes #143989.
+---
+ llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp     | 17 +++++++++++------
+ .../PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir |  8 +++++++-
+ 2 files changed, 18 insertions(+), 7 deletions(-)
+
+diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+index 76dca4794e05..78d254a55fd9 100644
+--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+@@ -1102,13 +1102,20 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
+     SpillsKnownBit = true;
+     break;
+   default:
+    // When spilling a CR bit, The super register may not be explicitly defined
+    // (i.e. it can be defined by a CR-logical that only defines the subreg) so
+    // we state that the CR field is undef. Also, in order to preserve the kill
+    // flag on the CR bit, we add it as an implicit use.
+
+     // On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all
+     // bits (specifically, it produces a -1 if the CR bit is set). Ultimately,
+     // the bit that is of importance to us is bit 32 (bit 0 of a 32-bit
+     // register), and SETNBC will set this.
+     if (Subtarget.isISA3_1()) {
+       BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg)
+-          .addReg(SrcReg, RegState::Undef);
+          .addReg(SrcReg, RegState::Undef)
+          .addReg(SrcReg, RegState::Implicit |
+                              getKillRegState(MI.getOperand(0).isKill()));
+       break;
+     }
+ 
+@@ -1122,16 +1129,14 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
+           SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT ||
+           SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) {
+         BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg)
+-          .addReg(getCRFromCRBit(SrcReg), RegState::Undef);
+            .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
+            .addReg(SrcReg, RegState::Implicit |
+                                getKillRegState(MI.getOperand(0).isKill()));
+         break;
+       }
+     }
+ 
+     // We need to move the CR field that contains the CR bit we are spilling.
+-    // The super register may not be explicitly defined (i.e. it can be defined
+-    // by a CR-logical that only defines the subreg) so we state that the CR
+-    // field is undef. Also, in order to preserve the kill flag on the CR bit,
+-    // we add it as an implicit use.
+     BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
+       .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
+       .addReg(SrcReg,
+diff --git a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
+index 41e21248a3f0..2796cdb3ae87 100644
+--- a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
+++ b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
+@@ -1,6 +1,12 @@
+ # RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -start-after \
+ # RUN:   virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \
+ # RUN:   -o - | FileCheck %s
+# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -start-after \
+# RUN:   virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \
+# RUN:   -o - | FileCheck %s
+# RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu -start-after \
+# RUN:   virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \
+# RUN:   -o - | FileCheck %s
+ 
+ --- |
+   ; ModuleID = 'a.ll'
+@@ -30,7 +36,7 @@
+   ; Function Attrs: nounwind
+   declare void @llvm.stackprotector(ptr, ptr) #1
+   
+-  attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+   attributes #1 = { nounwind }
+   
+   !llvm.ident = !{!0}
+-- 
+2.49.0
+
--- a/llvm.spec
+++ b/llvm.spec
@ -316,7 +316,7 @@
 #region main package
 Name:		%{pkg_name_llvm}
 Version:	%{maj_ver}.%{min_ver}.%{patch_ver}%{?rc_ver:~rc%{rc_ver}}%{?llvm_snapshot_version_suffix:~%{llvm_snapshot_version_suffix}}
-Release:	2%{?dist}
+Release:	3%{?dist}
 Summary:	The Low Level Virtual Machine

 License:	Apache-2.0 WITH LLVM-exception OR NCSA
@ -394,8 +394,7 @@ Patch106: 0001-19-Always-build-shared-libs-for-LLD.patch
 #endregion LLD patches

 #region polly patches
-Patch2001: 0001-20-polly-shared-libs.patch
-Patch2101: 0001-20-polly-shared-libs.patch
+Patch107: 0001-20-polly-shared-libs.patch
 #endregion polly patches

 #region RHEL patches
@ -407,6 +406,18 @@ Patch501: 0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch
 # https://github.com/llvm/llvm-project/issues/124001
 Patch1901: 0001-SystemZ-Fix-ICE-with-i128-i64-uaddo-carry-chain.patch

+# Fix a pgo miscompilation triggered by building Rust 1.87 with pgo on ppc64le.
+# https://github.com/llvm/llvm-project/issues/138208
+Patch2004: 0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch
+
+# Fix Power9/Power10 crbit spilling
+# https://github.com/llvm/llvm-project/pull/146424
+Patch108: 21-146424.patch
+
+# Fix for highway package build on ppc64le
+Patch2005: 0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch
+Patch2006: 0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch
+
 %if 0%{?rhel} == 8
 %global python3_pkgversion 3.12
 %global __python3 /usr/bin/python3.12
@ -3432,6 +3443,11 @@ fi

 #region changelog
 %changelog
+* Tue Jul 29 2025 Tom Stellard <tstellar@redhat.com> - 20.1.8-2
+- Backport fix for pgo optimized rust toolchain on ppc64le (rhbz#2382683)
+- Backport fix for crbit spill miscompile on ppc64le power9 and power10 (rhbz#2383037)
+- Backport fix for build of highway package on ppc64le (rhbz#2383182)
+
 * Wed Jul 09 2025 Nikita Popov <npopov@redhat.com> - 20.1.8-1
 - Update to LLVM 20.1.8