Backport fixes that didn't make 20.1.8

Resolves: RHEL-106426
This commit is contained in:
Tom Stellard 2025-07-29 14:57:05 -07:00
parent 9e2cafe0f6
commit f610107fb1
5 changed files with 349 additions and 3 deletions

View File

@ -0,0 +1,26 @@
From ffc7d5ae2d79f98967943fabb2abfbc1b1e047fd Mon Sep 17 00:00:00 2001
From: Douglas Yung <douglas.yung@sony.com>
Date: Tue, 24 Jun 2025 04:08:34 +0000
Subject: [PATCH] Add `REQUIRES: asserts` to test added in #145149 because it
uses the `-debug-only=` flag.
This should fix the test failure when building without asserts.
---
llvm/test/CodeGen/PowerPC/pr141642.ll | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/test/CodeGen/PowerPC/pr141642.ll b/llvm/test/CodeGen/PowerPC/pr141642.ll
index 38a706574786..61bda4dfaf53 100644
--- a/llvm/test/CodeGen/PowerPC/pr141642.ll
+++ b/llvm/test/CodeGen/PowerPC/pr141642.ll
@@ -2,6 +2,7 @@
; RUN: FileCheck %s
; CHECK-NOT: lxvdsx
; CHECK-NOT: LD_SPLAT
+; REQUIRES: asserts
define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr {
entry:
--
2.49.0

View File

@ -0,0 +1,143 @@
From c76137f1cfd5758f6889236d49a65f059e6432ff Mon Sep 17 00:00:00 2001
From: weiguozhi <57237827+weiguozhi@users.noreply.github.com>
Date: Thu, 15 May 2025 09:27:25 -0700
Subject: [PATCH] [CodeGenPrepare] Make sure instruction get from SunkAddrs is
before MemoryInst (#139303)
Function optimizeBlock may do optimizations on a block for multiple
times. In the first iteration of the loop, MemoryInst1 may generate a
sunk instruction and store it into SunkAddrs. In the second iteration of
the loop, MemoryInst2 may use the same address and then it can reuse the
sunk instruction stored in SunkAddrs, but MemoryInst2 may be before
MemoryInst1 and the corresponding sunk instruction. In order to avoid
use before def error, we need to find appropriate insert position for the
sunk instruction.
Fixes #138208.
(cherry picked from commit 59c6d70ed8120b8864e5f796e2bf3de5518a0ef0)
---
llvm/lib/CodeGen/CodeGenPrepare.cpp | 41 ++++++++++++++---
.../CodeGenPrepare/X86/sink-addr-reuse.ll | 44 +++++++++++++++++++
2 files changed, 80 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 088062afab17..f779f4b782ae 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -5728,6 +5728,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
return false;
}
+// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
+// is the first instruction that will use Addr. So we need to find the first
+// user of Addr in current BB.
+static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst,
+ Value *SunkAddr) {
+ if (Addr->hasOneUse())
+ return MemoryInst->getIterator();
+
+ // We already have a SunkAddr in current BB, but we may need to insert cast
+ // instruction after it.
+ if (SunkAddr) {
+ if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
+ return std::next(AddrInst->getIterator());
+ }
+
+ // Find the first user of Addr in current BB.
+ Instruction *Earliest = MemoryInst;
+ for (User *U : Addr->users()) {
+ Instruction *UserInst = dyn_cast<Instruction>(U);
+ if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
+ if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
+ continue;
+ if (UserInst->comesBefore(Earliest))
+ Earliest = UserInst;
+ }
+ }
+ return Earliest->getIterator();
+}
+
/// Sink addressing mode computation immediate before MemoryInst if doing so
/// can be done without increasing register pressure. The need for the
/// register pressure constraint means this can end up being an all or nothing
@@ -5852,11 +5881,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
return Modified;
}
- // Insert this computation right after this user. Since our caller is
- // scanning from the top of the BB to the bottom, reuse of the expr are
- // guaranteed to happen later.
- IRBuilder<> Builder(MemoryInst);
-
// Now that we determined the addressing expression we want to use and know
// that we have to sink it into this block. Check to see if we have already
// done this for some other load/store instr in this block. If so, reuse
@@ -5867,6 +5891,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
+
+ // The current BB may be optimized multiple times, we can't guarantee the
+ // reuse of Addr happens later, call findInsertPos to find an appropriate
+ // insert position.
+ IRBuilder<> Builder(MemoryInst->getParent(),
+ findInsertPos(Addr, MemoryInst, SunkAddr));
+
if (SunkAddr) {
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
<< " for " << *MemoryInst << "\n");
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
new file mode 100644
index 000000000000..019f31140655
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+declare void @g(ptr)
+
+; %load and %load5 use the same address, %load5 is optimized first, %load is
+; optimized later and reuse the same address computation instruction. We must
+; make sure not to generate use before def error.
+
+define void @f(ptr %arg) {
+; CHECK-LABEL: define void @f(
+; CHECK-SAME: ptr [[ARG:%.*]]) {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
+; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]])
+; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
+; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
+; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56
+; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8
+; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0)
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
+; CHECK-NEXT: ret void
+;
+bb:
+ %getelementptr = getelementptr i8, ptr %arg, i64 -64
+ %getelementptr1 = getelementptr i8, ptr %arg, i64 -56
+ call void @g(ptr %getelementptr)
+ br label %bb3
+
+bb3:
+ %load = load ptr, ptr %getelementptr, align 8
+ %load4 = load i32, ptr %getelementptr1, align 8
+ %load5 = load ptr, ptr %getelementptr, align 8
+ %add = add i32 1, 0
+ %icmp = icmp eq i32 %add, 0
+ br i1 %icmp, label %bb7, label %bb7
+
+bb7:
+ ret void
+}
--
2.49.0

View File

@ -0,0 +1,67 @@
From 735d721de451067c3a618b309703d0b8beb9cacc Mon Sep 17 00:00:00 2001
From: Wael Yehia <wmyehia2001@yahoo.com>
Date: Mon, 23 Jun 2025 13:22:33 -0400
Subject: [PATCH] [PowerPC] Fix handling of undefs in the
PPC::isSplatShuffleMask query (#145149)
Currently, the query assumes that a single undef byte implies the rest of
the `EltSize - 1` bytes are undefs, but that's not always true.
e.g. isSplatShuffleMask(
<0,1,2,3,4,5,6,7,undef,undef,undef,undef,0,1,2,3>, 8) should return
false.
---------
Co-authored-by: Wael Yehia <wyehia@ca.ibm.com>
---
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 13 +++++++++----
llvm/test/CodeGen/PowerPC/pr141642.ll | 13 +++++++++++++
2 files changed, 22 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/pr141642.ll
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 421a808de667..88c6fe632d26 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2242,10 +2242,15 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return false;
for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
- if (N->getMaskElt(i) < 0) continue;
- for (unsigned j = 0; j != EltSize; ++j)
- if (N->getMaskElt(i+j) != N->getMaskElt(j))
- return false;
+ // An UNDEF element is a sequence of UNDEF bytes.
+ if (N->getMaskElt(i) < 0) {
+ for (unsigned j = 1; j != EltSize; ++j)
+ if (N->getMaskElt(i + j) >= 0)
+ return false;
+ } else
+ for (unsigned j = 0; j != EltSize; ++j)
+ if (N->getMaskElt(i + j) != N->getMaskElt(j))
+ return false;
}
return true;
}
diff --git a/llvm/test/CodeGen/PowerPC/pr141642.ll b/llvm/test/CodeGen/PowerPC/pr141642.ll
new file mode 100644
index 000000000000..38a706574786
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr141642.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O0 -debug-only=selectiondag -o - < %s 2>&1 | \
+; RUN: FileCheck %s
+; CHECK-NOT: lxvdsx
+; CHECK-NOT: LD_SPLAT
+
+define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr {
+entry:
+ %ld = load <2 x i32>, ptr %packed_in, align 2
+ %shuf = shufflevector <2 x i32> %ld, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 0>
+ %ie = insertelement <4 x i32> %shuf, i32 7, i32 2
+ store <4 x i32> %shuf, ptr %packed_in, align 2
+ ret void
+}
--
2.49.0

94
21-146424.patch Normal file
View File

@ -0,0 +1,94 @@
From eba58195932f37fb461ae17c69fc517181b99c9a Mon Sep 17 00:00:00 2001
From: Paul Murphy <paumurph@redhat.com>
Date: Mon, 30 Jun 2025 10:13:37 -0500
Subject: [PATCH] [PowerPC] fix lowering of SPILL_CRBIT on pwr9 and pwr10
If a copy exists between creation of a crbit and a spill, machine-cp
may delete the copy since it seems unaware of the relation between a cr
and crbit. A fix was previously made for the generic ppc64 lowering. It
should be applied to the pwr9 and pwr10 variants too.
Likewise, relax and extend the pwr8 test to verify pwr9 and pwr10
codegen too.
This fixes #143989.
---
llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 17 +++++++++++------
.../PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir | 8 +++++++-
2 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 76dca4794e05..78d254a55fd9 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1102,13 +1102,20 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
SpillsKnownBit = true;
break;
default:
+ // When spilling a CR bit, The super register may not be explicitly defined
+ // (i.e. it can be defined by a CR-logical that only defines the subreg) so
+ // we state that the CR field is undef. Also, in order to preserve the kill
+ // flag on the CR bit, we add it as an implicit use.
+
// On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all
// bits (specifically, it produces a -1 if the CR bit is set). Ultimately,
// the bit that is of importance to us is bit 32 (bit 0 of a 32-bit
// register), and SETNBC will set this.
if (Subtarget.isISA3_1()) {
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg)
- .addReg(SrcReg, RegState::Undef);
+ .addReg(SrcReg, RegState::Undef)
+ .addReg(SrcReg, RegState::Implicit |
+ getKillRegState(MI.getOperand(0).isKill()));
break;
}
@@ -1122,16 +1129,14 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT ||
SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) {
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg)
- .addReg(getCRFromCRBit(SrcReg), RegState::Undef);
+ .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
+ .addReg(SrcReg, RegState::Implicit |
+ getKillRegState(MI.getOperand(0).isKill()));
break;
}
}
// We need to move the CR field that contains the CR bit we are spilling.
- // The super register may not be explicitly defined (i.e. it can be defined
- // by a CR-logical that only defines the subreg) so we state that the CR
- // field is undef. Also, in order to preserve the kill flag on the CR bit,
- // we add it as an implicit use.
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
.addReg(getCRFromCRBit(SrcReg), RegState::Undef)
.addReg(SrcReg,
diff --git a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
index 41e21248a3f0..2796cdb3ae87 100644
--- a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
+++ b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
@@ -1,6 +1,12 @@
# RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -start-after \
# RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \
# RUN: -o - | FileCheck %s
+# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -start-after \
+# RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \
+# RUN: -o - | FileCheck %s
+# RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu -start-after \
+# RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \
+# RUN: -o - | FileCheck %s
--- |
; ModuleID = 'a.ll'
@@ -30,7 +36,7 @@
; Function Attrs: nounwind
declare void @llvm.stackprotector(ptr, ptr) #1
- attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind }
!llvm.ident = !{!0}
--
2.49.0

View File

@ -316,7 +316,7 @@
#region main package
Name: %{pkg_name_llvm}
Version: %{maj_ver}.%{min_ver}.%{patch_ver}%{?rc_ver:~rc%{rc_ver}}%{?llvm_snapshot_version_suffix:~%{llvm_snapshot_version_suffix}}
Release: 2%{?dist}
Release: 3%{?dist}
Summary: The Low Level Virtual Machine
License: Apache-2.0 WITH LLVM-exception OR NCSA
@ -394,8 +394,7 @@ Patch106: 0001-19-Always-build-shared-libs-for-LLD.patch
#endregion LLD patches
#region polly patches
Patch2001: 0001-20-polly-shared-libs.patch
Patch2101: 0001-20-polly-shared-libs.patch
Patch107: 0001-20-polly-shared-libs.patch
#endregion polly patches
#region RHEL patches
@ -407,6 +406,18 @@ Patch501: 0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch
# https://github.com/llvm/llvm-project/issues/124001
Patch1901: 0001-SystemZ-Fix-ICE-with-i128-i64-uaddo-carry-chain.patch
# Fix a pgo miscompilation triggered by building Rust 1.87 with pgo on ppc64le.
# https://github.com/llvm/llvm-project/issues/138208
Patch2004: 0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch
# Fix Power9/Power10 crbit spilling
# https://github.com/llvm/llvm-project/pull/146424
Patch108: 21-146424.patch
# Fix for highway package build on ppc64le
Patch2005: 0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch
Patch2006: 0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch
%if 0%{?rhel} == 8
%global python3_pkgversion 3.12
%global __python3 /usr/bin/python3.12
@ -3432,6 +3443,11 @@ fi
#region changelog
%changelog
* Tue Jul 29 2025 Tom Stellard <tstellar@redhat.com> - 20.1.8-2
- Backport fix for pgo optimized rust toolchain on ppc64le (rhbz#2382683)
- Backport fix for crbit spill miscompile on ppc64le power9 and power10 (rhbz#2383037)
- Backport fix for build of highway package on ppc64le (rhbz#2383182)
* Wed Jul 09 2025 Nikita Popov <npopov@redhat.com> - 20.1.8-1
- Update to LLVM 20.1.8