diff --git a/0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch b/0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch new file mode 100644 index 0000000..26f372c --- /dev/null +++ b/0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch @@ -0,0 +1,26 @@ +From ffc7d5ae2d79f98967943fabb2abfbc1b1e047fd Mon Sep 17 00:00:00 2001 +From: Douglas Yung +Date: Tue, 24 Jun 2025 04:08:34 +0000 +Subject: [PATCH] Add `REQUIRES: asserts` to test added in #145149 because it + uses the `-debug-only=` flag. + +This should fix the test failure when building without asserts. +--- + llvm/test/CodeGen/PowerPC/pr141642.ll | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/llvm/test/CodeGen/PowerPC/pr141642.ll b/llvm/test/CodeGen/PowerPC/pr141642.ll +index 38a706574786..61bda4dfaf53 100644 +--- a/llvm/test/CodeGen/PowerPC/pr141642.ll ++++ b/llvm/test/CodeGen/PowerPC/pr141642.ll +@@ -2,6 +2,7 @@ + ; RUN: FileCheck %s + ; CHECK-NOT: lxvdsx + ; CHECK-NOT: LD_SPLAT ++; REQUIRES: asserts + + define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr { + entry: +-- +2.49.0 + diff --git a/0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch b/0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch new file mode 100644 index 0000000..a195bc5 --- /dev/null +++ b/0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch @@ -0,0 +1,143 @@ +From c76137f1cfd5758f6889236d49a65f059e6432ff Mon Sep 17 00:00:00 2001 +From: weiguozhi <57237827+weiguozhi@users.noreply.github.com> +Date: Thu, 15 May 2025 09:27:25 -0700 +Subject: [PATCH] [CodeGenPrepare] Make sure instruction get from SunkAddrs is + before MemoryInst (#139303) + +Function optimizeBlock may do optimizations on a block for multiple +times. In the first iteration of the loop, MemoryInst1 may generate a +sunk instruction and store it into SunkAddrs. In the second iteration of +the loop, MemoryInst2 may use the same address and then it can reuse the +sunk instruction stored in SunkAddrs, but MemoryInst2 may be before +MemoryInst1 and the corresponding sunk instruction. In order to avoid +use before def error, we need to find appropriate insert position for the + sunk instruction. + +Fixes #138208. + +(cherry picked from commit 59c6d70ed8120b8864e5f796e2bf3de5518a0ef0) +--- + llvm/lib/CodeGen/CodeGenPrepare.cpp | 41 ++++++++++++++--- + .../CodeGenPrepare/X86/sink-addr-reuse.ll | 44 +++++++++++++++++++ + 2 files changed, 80 insertions(+), 5 deletions(-) + create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll + +diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp +index 088062afab17..f779f4b782ae 100644 +--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp ++++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp +@@ -5728,6 +5728,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { + return false; + } + ++// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst ++// is the first instruction that will use Addr. So we need to find the first ++// user of Addr in current BB. ++static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, ++ Value *SunkAddr) { ++ if (Addr->hasOneUse()) ++ return MemoryInst->getIterator(); ++ ++ // We already have a SunkAddr in current BB, but we may need to insert cast ++ // instruction after it. ++ if (SunkAddr) { ++ if (Instruction *AddrInst = dyn_cast(SunkAddr)) ++ return std::next(AddrInst->getIterator()); ++ } ++ ++ // Find the first user of Addr in current BB. ++ Instruction *Earliest = MemoryInst; ++ for (User *U : Addr->users()) { ++ Instruction *UserInst = dyn_cast(U); ++ if (UserInst && UserInst->getParent() == MemoryInst->getParent()) { ++ if (isa(UserInst) || UserInst->isDebugOrPseudoInst()) ++ continue; ++ if (UserInst->comesBefore(Earliest)) ++ Earliest = UserInst; ++ } ++ } ++ return Earliest->getIterator(); ++} ++ + /// Sink addressing mode computation immediate before MemoryInst if doing so + /// can be done without increasing register pressure. The need for the + /// register pressure constraint means this can end up being an all or nothing +@@ -5852,11 +5881,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, + return Modified; + } + +- // Insert this computation right after this user. Since our caller is +- // scanning from the top of the BB to the bottom, reuse of the expr are +- // guaranteed to happen later. +- IRBuilder<> Builder(MemoryInst); +- + // Now that we determined the addressing expression we want to use and know + // that we have to sink it into this block. Check to see if we have already + // done this for some other load/store instr in this block. If so, reuse +@@ -5867,6 +5891,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, + + Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; + Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); ++ ++ // The current BB may be optimized multiple times, we can't guarantee the ++ // reuse of Addr happens later, call findInsertPos to find an appropriate ++ // insert position. ++ IRBuilder<> Builder(MemoryInst->getParent(), ++ findInsertPos(Addr, MemoryInst, SunkAddr)); ++ + if (SunkAddr) { + LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode + << " for " << *MemoryInst << "\n"); +diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll +new file mode 100644 +index 000000000000..019f31140655 +--- /dev/null ++++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll +@@ -0,0 +1,44 @@ ++; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ++; RUN: opt -S -p 'require,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s ++ ++target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" ++target triple = "x86_64-grtev4-linux-gnu" ++ ++declare void @g(ptr) ++ ++; %load and %load5 use the same address, %load5 is optimized first, %load is ++; optimized later and reuse the same address computation instruction. We must ++; make sure not to generate use before def error. ++ ++define void @f(ptr %arg) { ++; CHECK-LABEL: define void @f( ++; CHECK-SAME: ptr [[ARG:%.*]]) { ++; CHECK-NEXT: [[BB:.*:]] ++; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 ++; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]]) ++; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 ++; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 ++; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56 ++; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8 ++; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 ++; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0) ++; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0 ++; CHECK-NEXT: ret void ++; ++bb: ++ %getelementptr = getelementptr i8, ptr %arg, i64 -64 ++ %getelementptr1 = getelementptr i8, ptr %arg, i64 -56 ++ call void @g(ptr %getelementptr) ++ br label %bb3 ++ ++bb3: ++ %load = load ptr, ptr %getelementptr, align 8 ++ %load4 = load i32, ptr %getelementptr1, align 8 ++ %load5 = load ptr, ptr %getelementptr, align 8 ++ %add = add i32 1, 0 ++ %icmp = icmp eq i32 %add, 0 ++ br i1 %icmp, label %bb7, label %bb7 ++ ++bb7: ++ ret void ++} +-- +2.49.0 + diff --git a/0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch b/0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch new file mode 100644 index 0000000..e3d6135 --- /dev/null +++ b/0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch @@ -0,0 +1,67 @@ +From 735d721de451067c3a618b309703d0b8beb9cacc Mon Sep 17 00:00:00 2001 +From: Wael Yehia +Date: Mon, 23 Jun 2025 13:22:33 -0400 +Subject: [PATCH] [PowerPC] Fix handling of undefs in the + PPC::isSplatShuffleMask query (#145149) + +Currently, the query assumes that a single undef byte implies the rest of +the `EltSize - 1` bytes are undefs, but that's not always true. +e.g. isSplatShuffleMask( +<0,1,2,3,4,5,6,7,undef,undef,undef,undef,0,1,2,3>, 8) should return +false. + +--------- + +Co-authored-by: Wael Yehia +--- + llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 13 +++++++++---- + llvm/test/CodeGen/PowerPC/pr141642.ll | 13 +++++++++++++ + 2 files changed, 22 insertions(+), 4 deletions(-) + create mode 100644 llvm/test/CodeGen/PowerPC/pr141642.ll + +diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +index 421a808de667..88c6fe632d26 100644 +--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp ++++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +@@ -2242,10 +2242,15 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { + return false; + + for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { +- if (N->getMaskElt(i) < 0) continue; +- for (unsigned j = 0; j != EltSize; ++j) +- if (N->getMaskElt(i+j) != N->getMaskElt(j)) +- return false; ++ // An UNDEF element is a sequence of UNDEF bytes. ++ if (N->getMaskElt(i) < 0) { ++ for (unsigned j = 1; j != EltSize; ++j) ++ if (N->getMaskElt(i + j) >= 0) ++ return false; ++ } else ++ for (unsigned j = 0; j != EltSize; ++j) ++ if (N->getMaskElt(i + j) != N->getMaskElt(j)) ++ return false; + } + return true; + } +diff --git a/llvm/test/CodeGen/PowerPC/pr141642.ll b/llvm/test/CodeGen/PowerPC/pr141642.ll +new file mode 100644 +index 000000000000..38a706574786 +--- /dev/null ++++ b/llvm/test/CodeGen/PowerPC/pr141642.ll +@@ -0,0 +1,13 @@ ++; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O0 -debug-only=selectiondag -o - < %s 2>&1 | \ ++; RUN: FileCheck %s ++; CHECK-NOT: lxvdsx ++; CHECK-NOT: LD_SPLAT ++ ++define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr { ++entry: ++ %ld = load <2 x i32>, ptr %packed_in, align 2 ++ %shuf = shufflevector <2 x i32> %ld, <2 x i32> poison, <4 x i32> ++ %ie = insertelement <4 x i32> %shuf, i32 7, i32 2 ++ store <4 x i32> %shuf, ptr %packed_in, align 2 ++ ret void ++} +-- +2.49.0 + diff --git a/21-146424.patch b/21-146424.patch new file mode 100644 index 0000000..5b95886 --- /dev/null +++ b/21-146424.patch @@ -0,0 +1,94 @@ +From eba58195932f37fb461ae17c69fc517181b99c9a Mon Sep 17 00:00:00 2001 +From: Paul Murphy +Date: Mon, 30 Jun 2025 10:13:37 -0500 +Subject: [PATCH] [PowerPC] fix lowering of SPILL_CRBIT on pwr9 and pwr10 + +If a copy exists between creation of a crbit and a spill, machine-cp +may delete the copy since it seems unaware of the relation between a cr +and crbit. A fix was previously made for the generic ppc64 lowering. It +should be applied to the pwr9 and pwr10 variants too. + +Likewise, relax and extend the pwr8 test to verify pwr9 and pwr10 +codegen too. + +This fixes #143989. +--- + llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 17 +++++++++++------ + .../PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir | 8 +++++++- + 2 files changed, 18 insertions(+), 7 deletions(-) + +diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +index 76dca4794e05..78d254a55fd9 100644 +--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp ++++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +@@ -1102,13 +1102,20 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, + SpillsKnownBit = true; + break; + default: ++ // When spilling a CR bit, The super register may not be explicitly defined ++ // (i.e. it can be defined by a CR-logical that only defines the subreg) so ++ // we state that the CR field is undef. Also, in order to preserve the kill ++ // flag on the CR bit, we add it as an implicit use. ++ + // On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all + // bits (specifically, it produces a -1 if the CR bit is set). Ultimately, + // the bit that is of importance to us is bit 32 (bit 0 of a 32-bit + // register), and SETNBC will set this. + if (Subtarget.isISA3_1()) { + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg) +- .addReg(SrcReg, RegState::Undef); ++ .addReg(SrcReg, RegState::Undef) ++ .addReg(SrcReg, RegState::Implicit | ++ getKillRegState(MI.getOperand(0).isKill())); + break; + } + +@@ -1122,16 +1129,14 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, + SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT || + SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) { + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg) +- .addReg(getCRFromCRBit(SrcReg), RegState::Undef); ++ .addReg(getCRFromCRBit(SrcReg), RegState::Undef) ++ .addReg(SrcReg, RegState::Implicit | ++ getKillRegState(MI.getOperand(0).isKill())); + break; + } + } + + // We need to move the CR field that contains the CR bit we are spilling. +- // The super register may not be explicitly defined (i.e. it can be defined +- // by a CR-logical that only defines the subreg) so we state that the CR +- // field is undef. Also, in order to preserve the kill flag on the CR bit, +- // we add it as an implicit use. + BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) + .addReg(getCRFromCRBit(SrcReg), RegState::Undef) + .addReg(SrcReg, +diff --git a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir +index 41e21248a3f0..2796cdb3ae87 100644 +--- a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir ++++ b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir +@@ -1,6 +1,12 @@ + # RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -start-after \ + # RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \ + # RUN: -o - | FileCheck %s ++# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -start-after \ ++# RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \ ++# RUN: -o - | FileCheck %s ++# RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu -start-after \ ++# RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \ ++# RUN: -o - | FileCheck %s + + --- | + ; ModuleID = 'a.ll' +@@ -30,7 +36,7 @@ + ; Function Attrs: nounwind + declare void @llvm.stackprotector(ptr, ptr) #1 + +- attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" } ++ attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { nounwind } + + !llvm.ident = !{!0} +-- +2.49.0 + diff --git a/llvm.spec b/llvm.spec index 010f0e5..90a1a6e 100644 --- a/llvm.spec +++ b/llvm.spec @@ -316,7 +316,7 @@ #region main package Name: %{pkg_name_llvm} Version: %{maj_ver}.%{min_ver}.%{patch_ver}%{?rc_ver:~rc%{rc_ver}}%{?llvm_snapshot_version_suffix:~%{llvm_snapshot_version_suffix}} -Release: 2%{?dist} +Release: 3%{?dist} Summary: The Low Level Virtual Machine License: Apache-2.0 WITH LLVM-exception OR NCSA @@ -394,8 +394,7 @@ Patch106: 0001-19-Always-build-shared-libs-for-LLD.patch #endregion LLD patches #region polly patches -Patch2001: 0001-20-polly-shared-libs.patch -Patch2101: 0001-20-polly-shared-libs.patch +Patch107: 0001-20-polly-shared-libs.patch #endregion polly patches #region RHEL patches @@ -407,6 +406,18 @@ Patch501: 0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch # https://github.com/llvm/llvm-project/issues/124001 Patch1901: 0001-SystemZ-Fix-ICE-with-i128-i64-uaddo-carry-chain.patch +# Fix a pgo miscompilation triggered by building Rust 1.87 with pgo on ppc64le. +# https://github.com/llvm/llvm-project/issues/138208 +Patch2004: 0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch + +# Fix Power9/Power10 crbit spilling +# https://github.com/llvm/llvm-project/pull/146424 +Patch108: 21-146424.patch + +# Fix for highway package build on ppc64le +Patch2005: 0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch +Patch2006: 0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch + %if 0%{?rhel} == 8 %global python3_pkgversion 3.12 %global __python3 /usr/bin/python3.12 @@ -3432,6 +3443,11 @@ fi #region changelog %changelog +* Tue Jul 29 2025 Tom Stellard - 20.1.8-2 +- Backport fix for pgo optimized rust toolchain on ppc64le (rhbz#2382683) +- Backport fix for crbit spill miscompile on ppc64le power9 and power10 (rhbz#2383037) +- Backport fix for build of highway package on ppc64le (rhbz#2383182) + * Wed Jul 09 2025 Nikita Popov - 20.1.8-1 - Update to LLVM 20.1.8