diff --git a/.centos-ignore b/.centos-ignore deleted file mode 100644 index 0a86052..0000000 --- a/.centos-ignore +++ /dev/null @@ -1,9 +0,0 @@ -# List of files and directories that are not needed on CentOS/RHEL. -/centos-sync.sh -/ci.fmf -/prepare-copr.sh -/tests/kernel-ark-build.fmf -/Makefile -/.copr -/.git-blame-ignore-revs -/.packit.yaml diff --git a/0001-23-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch b/0001-23-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch new file mode 100644 index 0000000..c79b55d --- /dev/null +++ b/0001-23-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch @@ -0,0 +1,27 @@ +From 49f827b09db549de62dcaf8b90b3fcb3e08c0ee5 Mon Sep 17 00:00:00 2001 +From: Serge Guelton +Date: Mon, 6 Mar 2023 12:37:48 +0100 +Subject: [PATCH] Make -funwind-tables the default on all archs + +--- + clang/lib/Driver/ToolChains/Gnu.cpp | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp +index 24fbdcffc07b..8fed46b49515 100644 +--- a/clang/lib/Driver/ToolChains/Gnu.cpp ++++ b/clang/lib/Driver/ToolChains/Gnu.cpp +@@ -3082,6 +3082,10 @@ Generic_GCC::getDefaultUnwindTableLevel(const ArgList &Args) const { + case llvm::Triple::riscv64be: + case llvm::Triple::x86: + case llvm::Triple::x86_64: ++ // Enable -funwind-tables on all architectures supported by Fedora: ++ // rhbz#1655546 ++ case llvm::Triple::systemz: ++ case llvm::Triple::arm: + return UnwindTableLevel::Asynchronous; + default: + return UnwindTableLevel::None; +-- +2.39.1 + diff --git a/0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch b/0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch deleted file mode 100644 index 26f372c..0000000 --- a/0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch +++ /dev/null @@ -1,26 +0,0 @@ -From ffc7d5ae2d79f98967943fabb2abfbc1b1e047fd Mon Sep 17 00:00:00 2001 -From: Douglas Yung -Date: Tue, 24 Jun 2025 04:08:34 +0000 -Subject: [PATCH] Add `REQUIRES: asserts` to test added in #145149 because it - uses the `-debug-only=` flag. - -This should fix the test failure when building without asserts. ---- - llvm/test/CodeGen/PowerPC/pr141642.ll | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/llvm/test/CodeGen/PowerPC/pr141642.ll b/llvm/test/CodeGen/PowerPC/pr141642.ll -index 38a706574786..61bda4dfaf53 100644 ---- a/llvm/test/CodeGen/PowerPC/pr141642.ll -+++ b/llvm/test/CodeGen/PowerPC/pr141642.ll -@@ -2,6 +2,7 @@ - ; RUN: FileCheck %s - ; CHECK-NOT: lxvdsx - ; CHECK-NOT: LD_SPLAT -+; REQUIRES: asserts - - define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr { - entry: --- -2.49.0 - diff --git a/0001-CGP-Bail-out-if-Base-Scaled-Reg-does-not-dominate-in.patch b/0001-CGP-Bail-out-if-Base-Scaled-Reg-does-not-dominate-in.patch deleted file mode 100644 index 0c2d067..0000000 --- a/0001-CGP-Bail-out-if-Base-Scaled-Reg-does-not-dominate-in.patch +++ /dev/null @@ -1,131 +0,0 @@ -From dde30a47313bf52fef02bbcb1de931a8d725659f Mon Sep 17 00:00:00 2001 -From: Florian Hahn -Date: Fri, 6 Jun 2025 12:38:30 +0100 -Subject: [PATCH] [CGP] Bail out if (Base|Scaled)Reg does not dominate insert - point. (#142949) - -(Base|Scaled)Reg may not dominate the chosen insert point, if there are -multiple uses of the address. Bail out if that's the case, otherwise we -will generate invalid IR. - -In some cases, we could probably adjust the insert point or hoist the -(Base|Scaled)Reg. - -Fixes https://github.com/llvm/llvm-project/issues/142830. - -PR: https://github.com/llvm/llvm-project/pull/142949 ---- - llvm/lib/CodeGen/CodeGenPrepare.cpp | 13 +++- - .../X86/sink-addrmode-reg-does-not-geps.ll | 76 +++++++++++++++++++ - 2 files changed, 87 insertions(+), 2 deletions(-) - create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll - -diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp -index 822ed6283117..32348a899683 100644 ---- a/llvm/lib/CodeGen/CodeGenPrepare.cpp -+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp -@@ -5945,8 +5945,17 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, - // The current BB may be optimized multiple times, we can't guarantee the - // reuse of Addr happens later, call findInsertPos to find an appropriate - // insert position. -- IRBuilder<> Builder(MemoryInst->getParent(), -- findInsertPos(Addr, MemoryInst, SunkAddr)); -+ auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr); -+ -+ // TODO: Adjust insert point considering (Base|Scaled)Reg if possible. -+ if (!SunkAddr) { -+ auto &DT = getDT(*MemoryInst->getFunction()); -+ if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) || -+ (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos))) -+ return Modified; -+ } -+ -+ IRBuilder<> Builder(MemoryInst->getParent(), InsertPos); - - if (SunkAddr) { - LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode -diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll -new file mode 100644 -index 000000000000..1640bafbd0bf ---- /dev/null -+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll -@@ -0,0 +1,76 @@ -+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -+; RUN: opt -S -passes='require,function(codegenprepare)' %s | FileCheck %s -+ -+target triple = "x86_64-unknown-linux" -+ -+declare i1 @cond(float) -+ -+define void @scaled_reg_does_not_dominate_insert_point(ptr %src) { -+; CHECK-LABEL: define void @scaled_reg_does_not_dominate_insert_point( -+; CHECK-SAME: ptr [[SRC:%.*]]) { -+; CHECK-NEXT: [[BB:.*]]: -+; CHECK-NEXT: br label %[[LOOP:.*]] -+; CHECK: [[LOOP]]: -+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -+; CHECK-NEXT: [[SUNKADDR2:%.*]] = mul i64 [[IV_NEXT]], 2 -+; CHECK-NEXT: [[SUNKADDR3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[SUNKADDR2]] -+; CHECK-NEXT: [[SUNKADDR4:%.*]] = getelementptr i8, ptr [[SUNKADDR3]], i64 6 -+; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[SUNKADDR4]], align 4 -+; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 2 -+; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[SUNKADDR]] -+; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[SUNKADDR1]], align 4 -+; CHECK-NEXT: [[TMP0:%.*]] = call i1 @cond(float [[L_0]]) -+; CHECK-NEXT: [[C:%.*]] = call i1 @cond(float [[L_1]]) -+; CHECK-NEXT: br i1 [[C]], label %[[LOOP]], label %[[EXIT:.*]] -+; CHECK: [[EXIT]]: -+; CHECK-NEXT: ret void -+; -+bb: -+ %gep.base = getelementptr i8, ptr %src, i64 8 -+ br label %loop -+ -+loop: -+ %iv = phi i64 [ 0, %bb ], [ %iv.next, %loop ] -+ %iv.shl = shl i64 %iv, 1 -+ %gep.shl = getelementptr i8, ptr %gep.base, i64 %iv.shl -+ %gep.sub = getelementptr i8, ptr %gep.shl, i64 -8 -+ %iv.next = add i64 %iv, 1 -+ %l.0 = load float, ptr %gep.shl, align 4 -+ %l.1 = load float, ptr %gep.sub, align 4 -+ call i1 @cond(float %l.0) -+ %c = call i1 @cond(float %l.1) -+ br i1 %c, label %loop, label %exit -+ -+exit: -+ ret void -+} -+ -+define void @check_dt_after_modifying_cfg(ptr %dst, i64 %x, i8 %y, i8 %z) { -+; CHECK-LABEL: define void @check_dt_after_modifying_cfg( -+; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]]) { -+; CHECK-NEXT: [[ENTRY:.*]]: -+; CHECK-NEXT: [[OFFSET:%.*]] = lshr i64 [[X]], 2 -+; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i8 [[Z]] -+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SEL_FROZEN]], 0 -+; CHECK-NEXT: br i1 [[CMP]], label %[[SELECT_END:.*]], label %[[SELECT_FALSE_SINK:.*]] -+; CHECK: [[SELECT_FALSE_SINK]]: -+; CHECK-NEXT: [[SMIN:%.*]] = tail call i8 @llvm.smin.i8(i8 [[Y]], i8 0) -+; CHECK-NEXT: br label %[[SELECT_END]] -+; CHECK: [[SELECT_END]]: -+; CHECK-NEXT: [[SEL:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[SMIN]], %[[SELECT_FALSE_SINK]] ] -+; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET]] -+; CHECK-NEXT: store i8 [[SEL]], ptr [[SUNKADDR]], align 1 -+; CHECK-NEXT: ret void -+; -+entry: -+ %offset = lshr i64 %x, 2 -+ %gep.dst = getelementptr i8, ptr %dst, i64 %offset -+ %smin = tail call i8 @llvm.smin.i8(i8 %y, i8 0) -+ %cmp = icmp slt i8 %z, 0 -+ %sel = select i1 %cmp, i8 0, i8 %smin -+ store i8 %sel, ptr %gep.dst, align 1 -+ ret void -+} -+ -+declare i8 @llvm.smin.i8(i8, i8) #0 --- -2.50.1 - diff --git a/0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch b/0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch deleted file mode 100644 index a195bc5..0000000 --- a/0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch +++ /dev/null @@ -1,143 +0,0 @@ -From c76137f1cfd5758f6889236d49a65f059e6432ff Mon Sep 17 00:00:00 2001 -From: weiguozhi <57237827+weiguozhi@users.noreply.github.com> -Date: Thu, 15 May 2025 09:27:25 -0700 -Subject: [PATCH] [CodeGenPrepare] Make sure instruction get from SunkAddrs is - before MemoryInst (#139303) - -Function optimizeBlock may do optimizations on a block for multiple -times. In the first iteration of the loop, MemoryInst1 may generate a -sunk instruction and store it into SunkAddrs. In the second iteration of -the loop, MemoryInst2 may use the same address and then it can reuse the -sunk instruction stored in SunkAddrs, but MemoryInst2 may be before -MemoryInst1 and the corresponding sunk instruction. In order to avoid -use before def error, we need to find appropriate insert position for the - sunk instruction. - -Fixes #138208. - -(cherry picked from commit 59c6d70ed8120b8864e5f796e2bf3de5518a0ef0) ---- - llvm/lib/CodeGen/CodeGenPrepare.cpp | 41 ++++++++++++++--- - .../CodeGenPrepare/X86/sink-addr-reuse.ll | 44 +++++++++++++++++++ - 2 files changed, 80 insertions(+), 5 deletions(-) - create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll - -diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp -index 088062afab17..f779f4b782ae 100644 ---- a/llvm/lib/CodeGen/CodeGenPrepare.cpp -+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp -@@ -5728,6 +5728,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) { - return false; - } - -+// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst -+// is the first instruction that will use Addr. So we need to find the first -+// user of Addr in current BB. -+static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, -+ Value *SunkAddr) { -+ if (Addr->hasOneUse()) -+ return MemoryInst->getIterator(); -+ -+ // We already have a SunkAddr in current BB, but we may need to insert cast -+ // instruction after it. -+ if (SunkAddr) { -+ if (Instruction *AddrInst = dyn_cast(SunkAddr)) -+ return std::next(AddrInst->getIterator()); -+ } -+ -+ // Find the first user of Addr in current BB. -+ Instruction *Earliest = MemoryInst; -+ for (User *U : Addr->users()) { -+ Instruction *UserInst = dyn_cast(U); -+ if (UserInst && UserInst->getParent() == MemoryInst->getParent()) { -+ if (isa(UserInst) || UserInst->isDebugOrPseudoInst()) -+ continue; -+ if (UserInst->comesBefore(Earliest)) -+ Earliest = UserInst; -+ } -+ } -+ return Earliest->getIterator(); -+} -+ - /// Sink addressing mode computation immediate before MemoryInst if doing so - /// can be done without increasing register pressure. The need for the - /// register pressure constraint means this can end up being an all or nothing -@@ -5852,11 +5881,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, - return Modified; - } - -- // Insert this computation right after this user. Since our caller is -- // scanning from the top of the BB to the bottom, reuse of the expr are -- // guaranteed to happen later. -- IRBuilder<> Builder(MemoryInst); -- - // Now that we determined the addressing expression we want to use and know - // that we have to sink it into this block. Check to see if we have already - // done this for some other load/store instr in this block. If so, reuse -@@ -5867,6 +5891,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, - - Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; - Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); -+ -+ // The current BB may be optimized multiple times, we can't guarantee the -+ // reuse of Addr happens later, call findInsertPos to find an appropriate -+ // insert position. -+ IRBuilder<> Builder(MemoryInst->getParent(), -+ findInsertPos(Addr, MemoryInst, SunkAddr)); -+ - if (SunkAddr) { - LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode - << " for " << *MemoryInst << "\n"); -diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll -new file mode 100644 -index 000000000000..019f31140655 ---- /dev/null -+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll -@@ -0,0 +1,44 @@ -+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -+; RUN: opt -S -p 'require,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s -+ -+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" -+target triple = "x86_64-grtev4-linux-gnu" -+ -+declare void @g(ptr) -+ -+; %load and %load5 use the same address, %load5 is optimized first, %load is -+; optimized later and reuse the same address computation instruction. We must -+; make sure not to generate use before def error. -+ -+define void @f(ptr %arg) { -+; CHECK-LABEL: define void @f( -+; CHECK-SAME: ptr [[ARG:%.*]]) { -+; CHECK-NEXT: [[BB:.*:]] -+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 -+; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]]) -+; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64 -+; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 -+; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56 -+; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8 -+; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8 -+; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0) -+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0 -+; CHECK-NEXT: ret void -+; -+bb: -+ %getelementptr = getelementptr i8, ptr %arg, i64 -64 -+ %getelementptr1 = getelementptr i8, ptr %arg, i64 -56 -+ call void @g(ptr %getelementptr) -+ br label %bb3 -+ -+bb3: -+ %load = load ptr, ptr %getelementptr, align 8 -+ %load4 = load i32, ptr %getelementptr1, align 8 -+ %load5 = load ptr, ptr %getelementptr, align 8 -+ %add = add i32 1, 0 -+ %icmp = icmp eq i32 %add, 0 -+ br i1 %icmp, label %bb7, label %bb7 -+ -+bb7: -+ ret void -+} --- -2.49.0 - diff --git a/0001-PowerPC-Add-check-for-cast-when-shufflevector-172443.patch b/0001-PowerPC-Add-check-for-cast-when-shufflevector-172443.patch new file mode 100644 index 0000000..1f6c0ad --- /dev/null +++ b/0001-PowerPC-Add-check-for-cast-when-shufflevector-172443.patch @@ -0,0 +1,137 @@ +From 98b82f90dfb7865ae4dbfcb5a83a9e817e7894a1 Mon Sep 17 00:00:00 2001 +From: Kevin Per +Date: Thu, 18 Dec 2025 10:14:01 +0100 +Subject: [PATCH] [PowerPC]: Add check for cast when shufflevector (#172443) + +The crash happens because the cast for `Mask = +cast(Res)->getMask();` fails for node `t197: v16i8 += vector_shuffle<16,17,18,19,4,5,6,7,8,9,10,11,u,u,u,u> t196, t196`. +However, both `LHS` and `RHS` are the same node, so +`DAG.getCommutedVectorShuffle` doesn't return a `ShuffleVectorSDNode` +and crashes. The fix is to add a check before the cast is performed. + +Closes https://github.com/llvm/llvm-project/issues/172265 +--- + llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 + + .../test/CodeGen/PowerPC/vec_shuffle_le_be.ll | 94 +++++++++++++++++++ + 2 files changed, 98 insertions(+) + create mode 100644 llvm/test/CodeGen/PowerPC/vec_shuffle_le_be.ll + +diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +index 5b1d9f814806..21297b812968 100644 +--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp ++++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +@@ -16886,6 +16886,10 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN, + RHS.getOpcode() != ISD::VECTOR_SHUFFLE) { + std::swap(LHS, RHS); + Res = DAG.getCommutedVectorShuffle(*SVN); ++ ++ if (!isa(Res)) ++ return Res; ++ + Mask = cast(Res)->getMask(); + } + +diff --git a/llvm/test/CodeGen/PowerPC/vec_shuffle_le_be.ll b/llvm/test/CodeGen/PowerPC/vec_shuffle_le_be.ll +new file mode 100644 +index 000000000000..24c1e54dd952 +--- /dev/null ++++ b/llvm/test/CodeGen/PowerPC/vec_shuffle_le_be.ll +@@ -0,0 +1,94 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck -check-prefix=CHECK-LE %s ++; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck -check-prefix=CHECK-BE %s ++ ++define <32 x i32> @issue_172265(<32 x i32> %BS_ARG_1, <3 x i32> %0) { ++; CHECK-LABEL: issue_172265: ++; CHECK: # %bb.0: # %entry ++; CHECK-NEXT: addis 3, 2, .LCPI18_0@toc@ha ++; CHECK-NEXT: vspltw 3, 10, 1 ++; CHECK-NEXT: addi 3, 3, .LCPI18_0@toc@l ++; CHECK-NEXT: vmr 7, 3 ++; CHECK-NEXT: lvx 4, 0, 3 ++; CHECK-NEXT: addis 3, 2, .LCPI18_1@toc@ha ++; CHECK-NEXT: addi 3, 3, .LCPI18_1@toc@l ++; CHECK-NEXT: vmr 8, 3 ++; CHECK-NEXT: vmr 9, 3 ++; CHECK-NEXT: vperm 4, 3, 3, 4 ++; CHECK-NEXT: lvx 1, 0, 3 ++; CHECK-NEXT: addis 3, 2, .LCPI18_2@toc@ha ++; CHECK-NEXT: addi 3, 3, .LCPI18_2@toc@l ++; CHECK-NEXT: lvx 5, 0, 3 ++; CHECK-NEXT: addis 3, 2, .LCPI18_3@toc@ha ++; CHECK-NEXT: addi 3, 3, .LCPI18_3@toc@l ++; CHECK-NEXT: lvx 6, 0, 3 ++; CHECK-NEXT: addis 3, 2, .LCPI18_4@toc@ha ++; CHECK-NEXT: addi 3, 3, .LCPI18_4@toc@l ++; CHECK-NEXT: vperm 4, 2, 4, 1 ++; CHECK-NEXT: lvx 2, 0, 3 ++; CHECK-NEXT: vperm 0, 3, 3, 5 ++; CHECK-NEXT: vperm 5, 3, 3, 6 ++; CHECK-NEXT: vperm 6, 3, 3, 2 ++; CHECK-NEXT: vmr 2, 0 ++; CHECK-NEXT: blr ++; CHECK-LE-LABEL: issue_172265: ++; CHECK-LE: # %bb.0: # %entry ++; CHECK-LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha ++; CHECK-LE-NEXT: xxspltw 35, 42, 1 ++; CHECK-LE-NEXT: addi 3, 3, .LCPI0_0@toc@l ++; CHECK-LE-NEXT: vmr 7, 3 ++; CHECK-LE-NEXT: vmr 8, 3 ++; CHECK-LE-NEXT: vmr 9, 3 ++; CHECK-LE-NEXT: lxvd2x 0, 0, 3 ++; CHECK-LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha ++; CHECK-LE-NEXT: addi 3, 3, .LCPI0_1@toc@l ++; CHECK-LE-NEXT: lxvd2x 1, 0, 3 ++; CHECK-LE-NEXT: addis 3, 2, .LCPI0_2@toc@ha ++; CHECK-LE-NEXT: addi 3, 3, .LCPI0_2@toc@l ++; CHECK-LE-NEXT: lxvd2x 2, 0, 3 ++; CHECK-LE-NEXT: addis 3, 2, .LCPI0_3@toc@ha ++; CHECK-LE-NEXT: addi 3, 3, .LCPI0_3@toc@l ++; CHECK-LE-NEXT: xxswapd 36, 0 ++; CHECK-LE-NEXT: lxvd2x 0, 0, 3 ++; CHECK-LE-NEXT: vperm 4, 2, 3, 4 ++; CHECK-LE-NEXT: xxswapd 37, 1 ++; CHECK-LE-NEXT: vperm 2, 3, 3, 5 ++; CHECK-LE-NEXT: xxswapd 32, 2 ++; CHECK-LE-NEXT: vperm 5, 3, 3, 0 ++; CHECK-LE-NEXT: xxswapd 33, 0 ++; CHECK-LE-NEXT: vperm 6, 3, 3, 1 ++; CHECK-LE-NEXT: blr ++; ++; CHECK-BE-LABEL: issue_172265: ++; CHECK-BE: # %bb.0: # %entry ++; CHECK-BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha ++; CHECK-BE-NEXT: vspltw 3, 10, 2 ++; CHECK-BE-NEXT: addi 3, 3, .LCPI0_0@toc@l ++; CHECK-BE-NEXT: vmr 7, 3 ++; CHECK-BE-NEXT: lvx 4, 0, 3 ++; CHECK-BE-NEXT: addis 3, 2, .LCPI0_2@toc@ha ++; CHECK-BE-NEXT: addi 3, 3, .LCPI0_2@toc@l ++; CHECK-BE-NEXT: lvx 5, 0, 3 ++; CHECK-BE-NEXT: addis 3, 2, .LCPI0_3@toc@ha ++; CHECK-BE-NEXT: addi 3, 3, .LCPI0_3@toc@l ++; CHECK-BE-NEXT: vperm 0, 3, 3, 5 ++; CHECK-BE-NEXT: lvx 5, 0, 3 ++; CHECK-BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha ++; CHECK-BE-NEXT: addi 3, 3, .LCPI0_1@toc@l ++; CHECK-BE-NEXT: lvx 1, 0, 3 ++; CHECK-BE-NEXT: addis 3, 2, .LCPI0_4@toc@ha ++; CHECK-BE-NEXT: addi 3, 3, .LCPI0_4@toc@l ++; CHECK-BE-NEXT: vperm 4, 3, 3, 4 ++; CHECK-BE-NEXT: vperm 4, 4, 2, 1 ++; CHECK-BE-NEXT: lvx 2, 0, 3 ++; CHECK-BE-NEXT: vperm 5, 3, 3, 5 ++; CHECK-BE-NEXT: vperm 6, 3, 3, 2 ++; CHECK-BE-NEXT: vmr 2, 0 ++; CHECK-BE-NEXT: vmr 8, 3 ++; CHECK-BE-NEXT: vmr 9, 3 ++; CHECK-BE-NEXT: blr ++entry: ++ %vecinit37 = shufflevector <3 x i32> %0, <3 x i32> zeroinitializer, <32 x i32> ++ %shuffle56 = shufflevector <32 x i32> %vecinit37, <32 x i32> %BS_ARG_1, <32 x i32> ++ ret <32 x i32> %shuffle56 ++} +-- +2.52.0 + diff --git a/0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch b/0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch deleted file mode 100644 index e3d6135..0000000 --- a/0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 735d721de451067c3a618b309703d0b8beb9cacc Mon Sep 17 00:00:00 2001 -From: Wael Yehia -Date: Mon, 23 Jun 2025 13:22:33 -0400 -Subject: [PATCH] [PowerPC] Fix handling of undefs in the - PPC::isSplatShuffleMask query (#145149) - -Currently, the query assumes that a single undef byte implies the rest of -the `EltSize - 1` bytes are undefs, but that's not always true. -e.g. isSplatShuffleMask( -<0,1,2,3,4,5,6,7,undef,undef,undef,undef,0,1,2,3>, 8) should return -false. - ---------- - -Co-authored-by: Wael Yehia ---- - llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 13 +++++++++---- - llvm/test/CodeGen/PowerPC/pr141642.ll | 13 +++++++++++++ - 2 files changed, 22 insertions(+), 4 deletions(-) - create mode 100644 llvm/test/CodeGen/PowerPC/pr141642.ll - -diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp -index 421a808de667..88c6fe632d26 100644 ---- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp -+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp -@@ -2242,10 +2242,15 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { - return false; - - for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { -- if (N->getMaskElt(i) < 0) continue; -- for (unsigned j = 0; j != EltSize; ++j) -- if (N->getMaskElt(i+j) != N->getMaskElt(j)) -- return false; -+ // An UNDEF element is a sequence of UNDEF bytes. -+ if (N->getMaskElt(i) < 0) { -+ for (unsigned j = 1; j != EltSize; ++j) -+ if (N->getMaskElt(i + j) >= 0) -+ return false; -+ } else -+ for (unsigned j = 0; j != EltSize; ++j) -+ if (N->getMaskElt(i + j) != N->getMaskElt(j)) -+ return false; - } - return true; - } -diff --git a/llvm/test/CodeGen/PowerPC/pr141642.ll b/llvm/test/CodeGen/PowerPC/pr141642.ll -new file mode 100644 -index 000000000000..38a706574786 ---- /dev/null -+++ b/llvm/test/CodeGen/PowerPC/pr141642.ll -@@ -0,0 +1,13 @@ -+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O0 -debug-only=selectiondag -o - < %s 2>&1 | \ -+; RUN: FileCheck %s -+; CHECK-NOT: lxvdsx -+; CHECK-NOT: LD_SPLAT -+ -+define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr { -+entry: -+ %ld = load <2 x i32>, ptr %packed_in, align 2 -+ %shuf = shufflevector <2 x i32> %ld, <2 x i32> poison, <4 x i32> -+ %ie = insertelement <4 x i32> %shuf, i32 7, i32 2 -+ store <4 x i32> %shuf, ptr %packed_in, align 2 -+ ret void -+} --- -2.49.0 - diff --git a/0001-SystemZ-Fix-code-in-widening-vector-multiplication-1.patch b/0001-SystemZ-Fix-code-in-widening-vector-multiplication-1.patch new file mode 100644 index 0000000..4de2dc9 --- /dev/null +++ b/0001-SystemZ-Fix-code-in-widening-vector-multiplication-1.patch @@ -0,0 +1,191 @@ +From fc12fc635b96e9fa521a33eb31336c539eed1918 Mon Sep 17 00:00:00 2001 +From: sujianIBM <98488060+sujianIBM@users.noreply.github.com> +Date: Thu, 31 Jul 2025 13:18:23 -0400 +Subject: [PATCH] [SystemZ] Fix code in widening vector multiplication + (#150836) + +Commit cdc7864 has an error which would wrongly fold widening +multiplications into an even/odd widening operation. +This PR fixes it and adds tests to check scenarios which should not be +folded into an even/odd widening operation are actually not. +--- + .../Target/SystemZ/SystemZISelLowering.cpp | 2 +- + llvm/test/CodeGen/SystemZ/vec-mul-07.ll | 114 ++++++++++++++++++ + 2 files changed, 115 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +index e30d7235b81b..fb0a47dc9dc4 100644 +--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp ++++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +@@ -9044,7 +9044,7 @@ static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, + if (unsigned(ShuffleMask[Elt]) != 2 * Elt) + CanUseEven = false; + if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1) +- CanUseEven = true; ++ CanUseOdd = false; + } + Op = Op.getOperand(0); + if (CanUseEven) +diff --git a/llvm/test/CodeGen/SystemZ/vec-mul-07.ll b/llvm/test/CodeGen/SystemZ/vec-mul-07.ll +index 73c7a8dec5df..583561625cfc 100644 +--- a/llvm/test/CodeGen/SystemZ/vec-mul-07.ll ++++ b/llvm/test/CodeGen/SystemZ/vec-mul-07.ll +@@ -3,6 +3,23 @@ + ; + ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + ++; Test a v16i8 -> v8i16 unsigned widening multiplication ++; which is not folded into an even/odd widening operation. ++define <8 x i16> @f1_not(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1_not: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vuplhb %v0, %v24 ++; CHECK-NEXT: vuplhb %v1, %v26 ++; CHECK-NEXT: vmlhw %v24, %v0, %v1 ++; CHECK-NEXT: br %r14 ++ %shuf1 = shufflevector <16 x i8> %val1, <16 x i8> poison, <8 x i32> ++ %zext1 = zext <8 x i8> %shuf1 to <8 x i16> ++ %shuf2 = shufflevector <16 x i8> %val2, <16 x i8> poison, <8 x i32> ++ %zext2 = zext <8 x i8> %shuf2 to <8 x i16> ++ %ret = mul <8 x i16> %zext1, %zext2 ++ ret <8 x i16> %ret ++} ++ + ; Test a v16i8 (even) -> v8i16 unsigned widening multiplication. + define <8 x i16> @f1(<16 x i8> %val1, <16 x i8> %val2) { + ; CHECK-LABEL: f1: +@@ -31,6 +48,23 @@ define <8 x i16> @f2(<16 x i8> %val1, <16 x i8> %val2) { + ret <8 x i16> %ret + } + ++; Test a v16i8 -> v8i16 signed widening multiplication ++; which is not folded into an even/odd widening operation. ++define <8 x i16> @f3_not(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f3_not: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vuphb %v0, %v26 ++; CHECK-NEXT: vuphb %v1, %v24 ++; CHECK-NEXT: vmlhw %v24, %v1, %v0 ++; CHECK-NEXT: br %r14 ++ %shuf1 = shufflevector <16 x i8> %val1, <16 x i8> poison, <8 x i32> ++ %sext1 = sext <8 x i8> %shuf1 to <8 x i16> ++ %shuf2 = shufflevector <16 x i8> %val2, <16 x i8> poison, <8 x i32> ++ %sext2 = sext <8 x i8> %shuf2 to <8 x i16> ++ %ret = mul <8 x i16> %sext1, %sext2 ++ ret <8 x i16> %ret ++} ++ + ; Test a v16i8 (even) -> v8i16 signed widening multiplication. + define <8 x i16> @f3(<16 x i8> %val1, <16 x i8> %val2) { + ; CHECK-LABEL: f3: +@@ -59,6 +93,23 @@ define <8 x i16> @f4(<16 x i8> %val1, <16 x i8> %val2) { + ret <8 x i16> %ret + } + ++; Test a v8i16 -> v4i32 unsigned widening multiplication ++; which is not folded into an even/odd widening operation. ++define <4 x i32> @f5_not(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f5_not: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vuplhh %v0, %v24 ++; CHECK-NEXT: vuplhh %v1, %v26 ++; CHECK-NEXT: vmlf %v24, %v0, %v1 ++; CHECK-NEXT: br %r14 ++ %shuf1 = shufflevector <8 x i16> %val1, <8 x i16> poison, <4 x i32> ++ %zext1 = zext <4 x i16> %shuf1 to <4 x i32> ++ %shuf2 = shufflevector <8 x i16> %val2, <8 x i16> poison, <4 x i32> ++ %zext2 = zext <4 x i16> %shuf2 to <4 x i32> ++ %ret = mul <4 x i32> %zext1, %zext2 ++ ret <4 x i32> %ret ++} ++ + ; Test a v8i16 (even) -> v4i32 unsigned widening multiplication. + define <4 x i32> @f5(<8 x i16> %val1, <8 x i16> %val2) { + ; CHECK-LABEL: f5: +@@ -87,6 +138,23 @@ define <4 x i32> @f6(<8 x i16> %val1, <8 x i16> %val2) { + ret <4 x i32> %ret + } + ++; Test a v8i16 -> v4i32 signed widening multiplication ++; which is not folded into an even/odd widening operation. ++define <4 x i32> @f7_not(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f7_not: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vuphh %v0, %v26 ++; CHECK-NEXT: vuphh %v1, %v24 ++; CHECK-NEXT: vmlf %v24, %v1, %v0 ++; CHECK-NEXT: br %r14 ++ %shuf1 = shufflevector <8 x i16> %val1, <8 x i16> poison, <4 x i32> ++ %sext1 = sext <4 x i16> %shuf1 to <4 x i32> ++ %shuf2 = shufflevector <8 x i16> %val2, <8 x i16> poison, <4 x i32> ++ %sext2 = sext <4 x i16> %shuf2 to <4 x i32> ++ %ret = mul <4 x i32> %sext1, %sext2 ++ ret <4 x i32> %ret ++} ++ + ; Test a v8i16 (even) -> v4i32 signed widening multiplication. + define <4 x i32> @f7(<8 x i16> %val1, <8 x i16> %val2) { + ; CHECK-LABEL: f7: +@@ -115,6 +183,29 @@ define <4 x i32> @f8(<8 x i16> %val1, <8 x i16> %val2) { + ret <4 x i32> %ret + } + ++; Test a v4i32 -> v2i64 unsigned widening multiplication ++; which is not folded into an even/odd widening operation. ++define <2 x i64> @f9_not(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f9_not: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vuplhf %v0, %v24 ++; CHECK-NEXT: vuplhf %v1, %v26 ++; CHECK-NEXT: vlgvg %r0, %v1, 1 ++; CHECK-NEXT: vlgvg %r1, %v0, 1 ++; CHECK-NEXT: msgr %r1, %r0 ++; CHECK-NEXT: vlgvg %r0, %v1, 0 ++; CHECK-NEXT: vlgvg %r2, %v0, 0 ++; CHECK-NEXT: msgr %r2, %r0 ++; CHECK-NEXT: vlvgp %v24, %r2, %r1 ++; CHECK-NEXT: br %r14 ++ %shuf1 = shufflevector <4 x i32> %val1, <4 x i32> poison, <2 x i32> ++ %zext1 = zext <2 x i32> %shuf1 to <2 x i64> ++ %shuf2 = shufflevector <4 x i32> %val2, <4 x i32> poison, <2 x i32> ++ %zext2 = zext <2 x i32> %shuf2 to <2 x i64> ++ %ret = mul <2 x i64> %zext1, %zext2 ++ ret <2 x i64> %ret ++} ++ + ; Test a v4i32 (even) -> v2i64 unsigned widening multiplication. + define <2 x i64> @f9(<4 x i32> %val1, <4 x i32> %val2) { + ; CHECK-LABEL: f9: +@@ -143,6 +234,29 @@ define <2 x i64> @f10(<4 x i32> %val1, <4 x i32> %val2) { + ret <2 x i64> %ret + } + ++; Test a v4i32 -> v2i64 signed widening multiplication ++; which is not folded into an even/odd widening operation. ++define <2 x i64> @f11_not(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f11_not: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vuphf %v0, %v24 ++; CHECK-NEXT: vuphf %v1, %v26 ++; CHECK-NEXT: vlgvg %r0, %v1, 1 ++; CHECK-NEXT: vlgvg %r1, %v0, 1 ++; CHECK-NEXT: msgr %r1, %r0 ++; CHECK-NEXT: vlgvg %r0, %v1, 0 ++; CHECK-NEXT: vlgvg %r2, %v0, 0 ++; CHECK-NEXT: msgr %r2, %r0 ++; CHECK-NEXT: vlvgp %v24, %r2, %r1 ++; CHECK-NEXT: br %r14 ++ %shuf1 = shufflevector <4 x i32> %val1, <4 x i32> poison, <2 x i32> ++ %sext1 = sext <2 x i32> %shuf1 to <2 x i64> ++ %shuf2 = shufflevector <4 x i32> %val2, <4 x i32> poison, <2 x i32> ++ %sext2 = sext <2 x i32> %shuf2 to <2 x i64> ++ %ret = mul <2 x i64> %sext1, %sext2 ++ ret <2 x i64> %ret ++} ++ + ; Test a v4i32 (even) -> v2i64 signed widening multiplication. + define <2 x i64> @f11(<4 x i32> %val1, <4 x i32> %val2) { + ; CHECK-LABEL: f11: +-- +2.52.0 + diff --git a/0001-cmake-Resolve-symlink-when-finding-install-prefix.patch b/0001-cmake-Resolve-symlink-when-finding-install-prefix.patch deleted file mode 100644 index 2b62da0..0000000 --- a/0001-cmake-Resolve-symlink-when-finding-install-prefix.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 06774eb8a7dc0bc36b59e53310c7f5b5d89f6c29 Mon Sep 17 00:00:00 2001 -From: Nikita Popov -Date: Tue, 28 Jan 2025 12:31:49 +0100 -Subject: [PATCH] [cmake] Resolve symlink when finding install prefix - -When determining the install prefix in LLVMConfig.cmake etc resolve -symlinks in CMAKE_CURRENT_LIST_FILE first. The motivation for this -is to support symlinks like `/usr/lib64/cmake/llvm` to -`/usr/lib64/llvm19/lib/cmake/llvm`. This only works correctly if -the paths are relative to the resolved symlink. - -It's worth noting that this *mostly* already works out of the box, -because cmake automatically does the symlink resolution when the -library is found via CMAKE_PREFIX_PATH. It just doesn't happen -when it's found via the default prefix path. ---- - cmake/Modules/FindPrefixFromConfig.cmake | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/cmake/Modules/FindPrefixFromConfig.cmake b/cmake/Modules/FindPrefixFromConfig.cmake -index 22211e4b72f2..3daff607ff84 100644 ---- a/cmake/Modules/FindPrefixFromConfig.cmake -+++ b/cmake/Modules/FindPrefixFromConfig.cmake -@@ -39,10 +39,10 @@ function(find_prefix_from_config out_var prefix_var path_to_leave) - # install prefix, and avoid hard-coding any absolute paths. - set(config_code - "# Compute the installation prefix from this LLVMConfig.cmake file location." -- "get_filename_component(${prefix_var} \"\${CMAKE_CURRENT_LIST_FILE}\" PATH)") -+ "get_filename_component(${prefix_var} \"\${CMAKE_CURRENT_LIST_FILE}\" REALPATH)") - # Construct the proper number of get_filename_component(... PATH) - # calls to compute the installation prefix. -- string(REGEX REPLACE "/" ";" _count "${path_to_leave}") -+ string(REGEX REPLACE "/" ";" _count "${path_to_leave}/plus_one") - foreach(p ${_count}) - list(APPEND config_code - "get_filename_component(${prefix_var} \"\${${prefix_var}}\" PATH)") --- -2.48.1 - diff --git a/20-131099.patch b/20-131099.patch deleted file mode 100644 index 0b66256..0000000 --- a/20-131099.patch +++ /dev/null @@ -1,28 +0,0 @@ -From e43271ec7438ecb78f99db134aeca274a47f6c28 Mon Sep 17 00:00:00 2001 -From: Konrad Kleine -Date: Thu, 13 Mar 2025 09:12:24 +0100 -Subject: [PATCH] Filter out configuration file from compile commands - -The commands to run the compilation when printed with `-###` contain -various irrelevant lines for the perf-training. Most of them are -filtered out already but when configured with -`CLANG_CONFIG_FILE_SYSTEM_DIR` a new line like the following is -added and needs to be filtered out: - -`Configuration file: /etc/clang/x86_64-redhat-linux-gnu-clang.cfg` ---- - clang/utils/perf-training/perf-helper.py | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py -index 80c6356d0497c..29904aded5ab0 100644 ---- a/clang/utils/perf-training/perf-helper.py -+++ b/clang/utils/perf-training/perf-helper.py -@@ -237,6 +237,7 @@ def get_cc1_command_for_args(cmd, env): - or ln.startswith("InstalledDir:") - or ln.startswith("LLVM Profile Note") - or ln.startswith(" (in-process)") -+ or ln.startswith("Configuration file:") - or " version " in ln - ): - continue diff --git a/21-146424.patch b/21-146424.patch deleted file mode 100644 index 5b95886..0000000 --- a/21-146424.patch +++ /dev/null @@ -1,94 +0,0 @@ -From eba58195932f37fb461ae17c69fc517181b99c9a Mon Sep 17 00:00:00 2001 -From: Paul Murphy -Date: Mon, 30 Jun 2025 10:13:37 -0500 -Subject: [PATCH] [PowerPC] fix lowering of SPILL_CRBIT on pwr9 and pwr10 - -If a copy exists between creation of a crbit and a spill, machine-cp -may delete the copy since it seems unaware of the relation between a cr -and crbit. A fix was previously made for the generic ppc64 lowering. It -should be applied to the pwr9 and pwr10 variants too. - -Likewise, relax and extend the pwr8 test to verify pwr9 and pwr10 -codegen too. - -This fixes #143989. ---- - llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 17 +++++++++++------ - .../PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir | 8 +++++++- - 2 files changed, 18 insertions(+), 7 deletions(-) - -diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp -index 76dca4794e05..78d254a55fd9 100644 ---- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp -+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp -@@ -1102,13 +1102,20 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, - SpillsKnownBit = true; - break; - default: -+ // When spilling a CR bit, The super register may not be explicitly defined -+ // (i.e. it can be defined by a CR-logical that only defines the subreg) so -+ // we state that the CR field is undef. Also, in order to preserve the kill -+ // flag on the CR bit, we add it as an implicit use. -+ - // On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all - // bits (specifically, it produces a -1 if the CR bit is set). Ultimately, - // the bit that is of importance to us is bit 32 (bit 0 of a 32-bit - // register), and SETNBC will set this. - if (Subtarget.isISA3_1()) { - BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg) -- .addReg(SrcReg, RegState::Undef); -+ .addReg(SrcReg, RegState::Undef) -+ .addReg(SrcReg, RegState::Implicit | -+ getKillRegState(MI.getOperand(0).isKill())); - break; - } - -@@ -1122,16 +1129,14 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II, - SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT || - SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) { - BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg) -- .addReg(getCRFromCRBit(SrcReg), RegState::Undef); -+ .addReg(getCRFromCRBit(SrcReg), RegState::Undef) -+ .addReg(SrcReg, RegState::Implicit | -+ getKillRegState(MI.getOperand(0).isKill())); - break; - } - } - - // We need to move the CR field that contains the CR bit we are spilling. -- // The super register may not be explicitly defined (i.e. it can be defined -- // by a CR-logical that only defines the subreg) so we state that the CR -- // field is undef. Also, in order to preserve the kill flag on the CR bit, -- // we add it as an implicit use. - BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg) - .addReg(getCRFromCRBit(SrcReg), RegState::Undef) - .addReg(SrcReg, -diff --git a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir -index 41e21248a3f0..2796cdb3ae87 100644 ---- a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir -+++ b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir -@@ -1,6 +1,12 @@ - # RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -start-after \ - # RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \ - # RUN: -o - | FileCheck %s -+# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -start-after \ -+# RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \ -+# RUN: -o - | FileCheck %s -+# RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu -start-after \ -+# RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \ -+# RUN: -o - | FileCheck %s - - --- | - ; ModuleID = 'a.ll' -@@ -30,7 +36,7 @@ - ; Function Attrs: nounwind - declare void @llvm.stackprotector(ptr, ptr) #1 - -- attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" } -+ attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - attributes #1 = { nounwind } - - !llvm.ident = !{!0} --- -2.49.0 - diff --git a/22-185375.patch b/22-185375.patch new file mode 100644 index 0000000..82c3af9 --- /dev/null +++ b/22-185375.patch @@ -0,0 +1,86 @@ +From f463bef09be73ae9a415fcd3fd49689bd95b0f0a Mon Sep 17 00:00:00 2001 +From: Congcong Cai +Date: Fri, 20 Feb 2026 07:03:27 +0800 +Subject: [PATCH] [SimplifyCFG] process prof data when remove case in umin + (#182261) + +In #164097, we introduce a optimization for umin. But it does not handle +profile data correctly. +This PR remove profile data when remove cases. +Fixed: #181837 + +(cherry picked from commit 31e5f86a3cdc960ef7b2f0a533c4a37cf526cacd) +--- + llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 +- + .../Transforms/SimplifyCFG/switch-umin.ll | 43 +++++++++++++++++++ + 2 files changed, 44 insertions(+), 1 deletion(-) + +diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +index 5f4807242581d..a16f274a4ed5a 100644 +--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp ++++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +@@ -7724,7 +7724,7 @@ static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU) { + BasicBlock *DeadCaseBB = I->getCaseSuccessor(); + DeadCaseBB->removePredecessor(BB); + Updates.push_back({DominatorTree::Delete, BB, DeadCaseBB}); +- I = SIW->removeCase(I); ++ I = SIW.removeCase(I); + E = SIW->case_end(); + } + +diff --git a/llvm/test/Transforms/SimplifyCFG/switch-umin.ll b/llvm/test/Transforms/SimplifyCFG/switch-umin.ll +index 44665365dc222..ff958e4d04147 100644 +--- a/llvm/test/Transforms/SimplifyCFG/switch-umin.ll ++++ b/llvm/test/Transforms/SimplifyCFG/switch-umin.ll +@@ -239,8 +239,51 @@ case4: + + } + ++define void @switch_remove_dead_cases(i32 %x) { ++; CHECK-LABEL: define void @switch_remove_dead_cases( ++; CHECK-SAME: i32 [[X:%.*]]) { ++; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[X]], i32 4) ++; CHECK-NEXT: switch i32 [[X]], label %[[COMMON_RET:.*]] [ ++; CHECK-NEXT: i32 2, label %[[CASE_A:.*]] ++; CHECK-NEXT: i32 3, label %[[CASE_B:.*]] ++; CHECK-NEXT: ], !prof [[PROF1:![0-9]+]] ++; CHECK: [[COMMON_RET]]: ++; CHECK-NEXT: ret void ++; CHECK: [[CASE_A]]: ++; CHECK-NEXT: call void @a() ++; CHECK-NEXT: br label %[[COMMON_RET]] ++; CHECK: [[CASE_B]]: ++; CHECK-NEXT: call void @b() ++; CHECK-NEXT: br label %[[COMMON_RET]] ++; ++ %min = call i32 @llvm.umin.i32(i32 %x, i32 4) ++ switch i32 %min, label %unreachable [ ++ i32 2, label %case_a ++ i32 3, label %case_b ++ i32 4, label %case_ret ++ i32 5, label %case_ret ++ ], !prof !1 ++ ++case_a: ++ call void @a() ++ ret void ++ ++case_b: ++ call void @b() ++ ret void ++ ++case_ret: ++ ret void ++ ++unreachable: ++ unreachable ++} + + !0 = !{!"branch_weights", i32 1, i32 2, i32 3, i32 99, i32 5} + ;. + ; CHECK: [[PROF0]] = !{!"branch_weights", i32 5, i32 2, i32 3, i32 99} + ;. ++!1 = !{!"branch_weights", i32 11, i32 12, i32 13, i32 14, i32 15} ++;. ++; CHECK: [[PROF1]] = !{!"branch_weights", i32 14, i32 12, i32 13} ++;. diff --git a/22-185922.patch b/22-185922.patch new file mode 100644 index 0000000..4513df1 --- /dev/null +++ b/22-185922.patch @@ -0,0 +1,55 @@ +From ccf0ee68b86f65a6a4e83756f717faad7c779cb1 Mon Sep 17 00:00:00 2001 +From: Nikita Popov +Date: Wed, 11 Mar 2026 18:03:05 +0100 +Subject: [PATCH] [SystemZ] Limit depth of findCCUse() + +The recursion here has potentially exponential complexity. Avoid +this by limiting the depth of recursion. + +An alternative would be to memoize the results. I went with the +simpler depth limit on the assumption that we don't particularly +care about very deep value chains here. +--- + llvm/lib/Target/SystemZ/SystemZISelLowering.cpp | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +index 2a9cb903f3921..84d66f88a812d 100644 +--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp ++++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +@@ -8692,7 +8692,12 @@ SDValue SystemZTargetLowering::combineSETCC( + return SDValue(); + } + +-static std::pair findCCUse(const SDValue &Val) { ++static std::pair findCCUse(const SDValue &Val, ++ unsigned Depth = 0) { ++ // Limit depth of potentially exponential walk. ++ if (Depth > 5) ++ return std::make_pair(SDValue(), SystemZ::CCMASK_NONE); ++ + switch (Val.getOpcode()) { + default: + return std::make_pair(SDValue(), SystemZ::CCMASK_NONE); +@@ -8705,7 +8710,7 @@ static std::pair findCCUse(const SDValue &Val) { + SDValue Op4CCReg = Val.getOperand(4); + if (Op4CCReg.getOpcode() == SystemZISD::ICMP || + Op4CCReg.getOpcode() == SystemZISD::TM) { +- auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0)); ++ auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0), Depth + 1); + if (OpCC != SDValue()) + return std::make_pair(OpCC, OpCCValid); + } +@@ -8722,10 +8727,10 @@ static std::pair findCCUse(const SDValue &Val) { + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: +- auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0)); ++ auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0), Depth + 1); + if (Op0CC != SDValue()) + return std::make_pair(Op0CC, Op0CCValid); +- return findCCUse(Val.getOperand(1)); ++ return findCCUse(Val.getOperand(1), Depth + 1); + } + } + diff --git a/22-190701.patch b/22-190701.patch new file mode 100644 index 0000000..51c3246 --- /dev/null +++ b/22-190701.patch @@ -0,0 +1,87 @@ +From 3915d1efcdb1e9d10c8f6966acbe5c359d824ba1 Mon Sep 17 00:00:00 2001 +From: Josh Stone +Date: Mon, 6 Apr 2026 14:08:10 -0700 +Subject: [PATCH] [CodeGen] Preserve big-endian trunc in concat_vectors + +A transform from `concat_vectors(trunc(scalar), undef)` to +`scalar_to_vector(scalar)` is only equivalent for little-endian targets. +On big-endian, that would put the extra upper bytes ahead of the desired +truncated bytes. This problem was seen on Rust s390x in [RHEL-147748]. + +[RHEL-147748]: https://redhat.atlassian.net/browse/RHEL-147748 + +Assisted-by: Claude Code +--- + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +- + llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll | 45 +++++++++++++++++++ + 2 files changed, 48 insertions(+), 1 deletion(-) + create mode 100644 llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll + +diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +index 383e45c5ea3a8..5485ee86251a5 100644 +--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ++++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +@@ -26513,9 +26513,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { + // If the bitcast type isn't legal, it might be a trunc of a legal type; + // look through the trunc so we can still do the transform: + // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) ++ // However, this is only equivalent on little-endian targets. + if (Scalar->getOpcode() == ISD::TRUNCATE && + !TLI.isTypeLegal(Scalar.getValueType()) && +- TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) ++ TLI.isTypeLegal(Scalar->getOperand(0).getValueType()) && ++ DAG.getDataLayout().isLittleEndian()) + Scalar = Scalar->getOperand(0); + + EVT SclTy = Scalar.getValueType(); +diff --git a/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll +new file mode 100644 +index 0000000000000..42d787d945145 +--- /dev/null ++++ b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll +@@ -0,0 +1,45 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test that truncated scalars use the correct vector insert instruction. ++; On big-endian targets, concat_vectors should not skip truncates when ++; creating scalar_to_vector, as the bytes would be in the wrong position. ++ ++; This truncated i16 should use vlvgh (insert halfword), not vlvgf (insert fullword). ++define <16 x i8> @test_concat_trunc_i16(i32 %x) { ++; CHECK-LABEL: test_concat_trunc_i16: ++; CHECK: # %bb.0: ++; CHECK-NEXT: vlvgh %v24, %r2, 0 ++; CHECK-NEXT: br %r14 ++ %t = trunc i32 %x to i16 ++ %vec = bitcast i16 %t to <2 x i8> ++ %result = shufflevector <2 x i8> %vec, <2 x i8> poison, <16 x i32> ++ ret <16 x i8> %result ++} ++ ++; Test with a more complex shuffle pattern, reduced from a Rust bug report. ++define fastcc void @test_shuffle_with_trunc() { ++; CHECK-LABEL: test_shuffle_with_trunc: ++; CHECK: # %bb.0: ++; CHECK-NEXT: lh %r1, 0 ++; CHECK-NEXT: l %r0, 0 ++; CHECK-NEXT: vlvgh %v1, %r1, 0 ++; CHECK-NEXT: larl %r1, .LCPI1_0 ++; CHECK-NEXT: vl %v2, 0(%r1), 3 ++; CHECK-NEXT: vlvgf %v0, %r0, 0 ++; CHECK-NEXT: vperm %v0, %v0, %v1, %v2 ++; CHECK-NEXT: vst %v0, 0, 3 ++; CHECK-NEXT: br %r14 ++ %1 = load i32, ptr null, align 8 ++ %2 = load i16, ptr null, align 1 ++ br label %3 ++ ++3: ++ %4 = bitcast i32 %1 to <4 x i8> ++ %5 = shufflevector <4 x i8> %4, <4 x i8> zeroinitializer, <16 x i32> ++ %6 = bitcast i16 %2 to <2 x i8> ++ %7 = shufflevector <2 x i8> %6, <2 x i8> zeroinitializer, <16 x i32> ++ %8 = shufflevector <16 x i8> %5, <16 x i8> %7, <16 x i32> ++ store <16 x i8> %8, ptr null, align 8 ++ ret void ++} diff --git a/43cb4631c1f42dbfce78288b8ae30b5840ed59b3.patch b/43cb4631c1f42dbfce78288b8ae30b5840ed59b3.patch new file mode 100644 index 0000000..47b333b --- /dev/null +++ b/43cb4631c1f42dbfce78288b8ae30b5840ed59b3.patch @@ -0,0 +1,276 @@ +From 43cb4631c1f42dbfce78288b8ae30b5840ed59b3 Mon Sep 17 00:00:00 2001 +From: Ebuka Ezike +Date: Thu, 8 Jan 2026 18:46:03 +0000 +Subject: [PATCH] [lldb] Fix typed commands not shown on the screen (#174216) + +The cause is that in `python3.14`, `fcntl.ioctl` now throws a buffer +overflow error +when the buffer is too small or too large (see +https://github.com/python/cpython/pull/132919). This caused the Python +interpreter to fail terminal detection and not properly echo user +commands back to the screen. + +Fix by dropping the custom terminal size check entirely and using the +built-in `sys.stdin.isatty()` instead. + +Fixes #173302 +--- + .../Python/lldbsuite/test/lldbpexpect.py | 1 + + .../Interpreter/embedded_interpreter.py | 59 +++--------------- + .../python_api/file_handle/TestFileHandle.py | 48 +++++++++++++- + .../API/terminal/TestPythonInterpreterEcho.py | 62 +++++++++++++++++++ + .../Shell/ScriptInterpreter/Python/io.test | 12 ++++ + 5 files changed, 131 insertions(+), 51 deletions(-) + create mode 100644 lldb/test/API/terminal/TestPythonInterpreterEcho.py + create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/io.test + +diff --git a/lldb/packages/Python/lldbsuite/test/lldbpexpect.py b/lldb/packages/Python/lldbsuite/test/lldbpexpect.py +index 3279e1fd39f8c..03b2500fbda52 100644 +--- a/lldb/packages/Python/lldbsuite/test/lldbpexpect.py ++++ b/lldb/packages/Python/lldbsuite/test/lldbpexpect.py +@@ -10,6 +10,7 @@ + + + @skipIfRemote ++@skipIfWindows + @add_test_categories(["pexpect"]) + class PExpectTest(TestBase): + NO_DEBUG_INFO_TESTCASE = True +diff --git a/lldb/source/Interpreter/embedded_interpreter.py b/lldb/source/Interpreter/embedded_interpreter.py +index 42a9ab5fc367a..12c47bd712816 100644 +--- a/lldb/source/Interpreter/embedded_interpreter.py ++++ b/lldb/source/Interpreter/embedded_interpreter.py +@@ -32,18 +32,6 @@ def is_libedit(): + g_run_one_line_str = None + + +-def get_terminal_size(fd): +- try: +- import fcntl +- import termios +- import struct +- +- hw = struct.unpack("hh", fcntl.ioctl(fd, termios.TIOCGWINSZ, "1234")) +- except: +- hw = (0, 0) +- return hw +- +- + class LLDBExit(SystemExit): + pass + +@@ -74,50 +62,21 @@ def readfunc_stdio(prompt): + def run_python_interpreter(local_dict): + # Pass in the dictionary, for continuity from one session to the next. + try: +- fd = sys.stdin.fileno() +- interacted = False +- if get_terminal_size(fd)[1] == 0: +- try: +- import termios +- +- old = termios.tcgetattr(fd) +- if old[3] & termios.ECHO: +- # Need to turn off echoing and restore +- new = termios.tcgetattr(fd) +- new[3] = new[3] & ~termios.ECHO +- try: +- termios.tcsetattr(fd, termios.TCSADRAIN, new) +- interacted = True +- code.interact( +- banner="Python Interactive Interpreter. To exit, type 'quit()', 'exit()'.", +- readfunc=readfunc_stdio, +- local=local_dict, +- ) +- finally: +- termios.tcsetattr(fd, termios.TCSADRAIN, old) +- except: +- pass +- # Don't need to turn off echoing +- if not interacted: +- code.interact( +- banner="Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D.", +- readfunc=readfunc_stdio, +- local=local_dict, +- ) +- else: +- # We have a real interactive terminal +- code.interact( +- banner="Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D.", +- readfunc=readfunc, +- local=local_dict, +- ) ++ banner = "Python Interactive Interpreter. To exit, type 'quit()', 'exit()'." ++ input_func = readfunc_stdio ++ ++ is_atty = sys.stdin.isatty() ++ if is_atty: ++ banner = "Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D." ++ input_func = readfunc ++ ++ code.interact(banner=banner, readfunc=input_func, local=local_dict) + except LLDBExit: + pass + except SystemExit as e: + if e.code: + print("Script exited with code %s" % e.code) + +- + def run_one_line(local_dict, input_string): + global g_run_one_line_str + try: +diff --git a/lldb/test/API/python_api/file_handle/TestFileHandle.py b/lldb/test/API/python_api/file_handle/TestFileHandle.py +index b38585577f6f6..707044a3afb0f 100644 +--- a/lldb/test/API/python_api/file_handle/TestFileHandle.py ++++ b/lldb/test/API/python_api/file_handle/TestFileHandle.py +@@ -111,10 +111,11 @@ def setUp(self): + super(FileHandleTestCase, self).setUp() + self.out_filename = self.getBuildArtifact("output") + self.in_filename = self.getBuildArtifact("input") ++ self.err_filename = self.getBuildArtifact("error") + + def tearDown(self): + super(FileHandleTestCase, self).tearDown() +- for name in (self.out_filename, self.in_filename): ++ for name in (self.out_filename, self.in_filename, self.err_filename): + if os.path.exists(name): + os.unlink(name) + +@@ -679,6 +680,51 @@ def test_stdout_file(self): + lines = [x for x in f.read().strip().split() if x != "7"] + self.assertEqual(lines, ["foobar"]) + ++ def test_stdout_file_interactive(self): ++ """Ensure when we read stdin from a file, outputs from python goes to the right I/O stream.""" ++ with open(self.in_filename, "w") as f: ++ f.write( ++ "script --language python --\nvalue = 250 + 5\nprint(value)\nprint(vel)" ++ ) ++ ++ with open(self.out_filename, "w") as outf, open( ++ self.in_filename, "r" ++ ) as inf, open(self.err_filename, "w") as errf: ++ status = self.dbg.SetOutputFile(lldb.SBFile(outf)) ++ self.assertSuccess(status) ++ status = self.dbg.SetErrorFile(lldb.SBFile(errf)) ++ self.assertSuccess(status) ++ status = self.dbg.SetInputFile(lldb.SBFile(inf)) ++ self.assertSuccess(status) ++ auto_handle_events = True ++ spawn_thread = False ++ num_errs = 0 ++ quit_requested = False ++ stopped_for_crash = False ++ opts = lldb.SBCommandInterpreterRunOptions() ++ self.dbg.RunCommandInterpreter( ++ auto_handle_events, ++ spawn_thread, ++ opts, ++ num_errs, ++ quit_requested, ++ stopped_for_crash, ++ ) ++ self.dbg.GetOutputFile().Flush() ++ expected_out_text = "255" ++ expected_err_text = "NameError" ++ # check stdout ++ with open(self.out_filename, "r") as f: ++ out_text = f.read() ++ self.assertIn(expected_out_text, out_text) ++ self.assertNotIn(expected_err_text, out_text) ++ ++ # check stderr ++ with open(self.err_filename, "r") as f: ++ err_text = f.read() ++ self.assertIn(expected_err_text, err_text) ++ self.assertNotIn(expected_out_text, err_text) ++ + def test_identity(self): + f = io.StringIO() + sbf = lldb.SBFile(f) +diff --git a/lldb/test/API/terminal/TestPythonInterpreterEcho.py b/lldb/test/API/terminal/TestPythonInterpreterEcho.py +new file mode 100644 +index 0000000000000..758a4f9cede5a +--- /dev/null ++++ b/lldb/test/API/terminal/TestPythonInterpreterEcho.py +@@ -0,0 +1,62 @@ ++""" ++Test that typing python expression in the terminal is echoed back to stdout. ++""" ++ ++from lldbsuite.test.decorators import skipIfAsan ++from lldbsuite.test.lldbpexpect import PExpectTest ++ ++ ++@skipIfAsan ++class PythonInterpreterEchoTest(PExpectTest): ++ PYTHON_PROMPT = ">>> " ++ ++ def verify_command_echo( ++ self, command: str, expected_output: str = "", is_regex: bool = False ++ ): ++ assert self.child != None ++ child = self.child ++ self.assertIsNotNone(self.child, "expected a running lldb process.") ++ ++ child.sendline(command) ++ ++ # Build pattern list: match whichever comes first (output or prompt). ++ # This prevents waiting for a timeout if there's no match. ++ pattern = [] ++ match_expected = expected_output and len(expected_output) > 0 ++ ++ if match_expected: ++ pattern.append(expected_output) ++ pattern.append(self.PYTHON_PROMPT) ++ ++ expect_func = child.expect if is_regex else child.expect_exact ++ match_idx = expect_func(pattern) ++ if match_expected: ++ self.assertEqual( ++ match_idx, 0, "Expected output `{expected_output}` in stdout." ++ ) ++ ++ self.assertIsNotNone(self.child.before, "Expected output before prompt") ++ self.assertIsInstance(self.child.before, bytes) ++ echoed_text: str = self.child.before.decode("ascii").strip() ++ self.assertEqual( ++ command, echoed_text, f"Command '{command}' should be echoed to stdout." ++ ) ++ ++ if match_expected: ++ child.expect_exact(self.PYTHON_PROMPT) ++ ++ def test_python_interpreter_echo(self): ++ """Test that that the user typed commands is echoed to stdout""" ++ ++ self.launch(use_colors=False, dimensions=(100, 100)) ++ ++ # Enter the python interpreter. ++ self.verify_command_echo( ++ "script --language python --", expected_output="Python.*\\.", is_regex=True ++ ) ++ self.child_in_script_interpreter = True ++ ++ self.verify_command_echo("val = 300") ++ self.verify_command_echo( ++ "print('result =', 300)", expected_output="result = 300" ++ ) +diff --git a/lldb/test/Shell/ScriptInterpreter/Python/io.test b/lldb/test/Shell/ScriptInterpreter/Python/io.test +new file mode 100644 +index 0000000000000..25e3de41724e0 +--- /dev/null ++++ b/lldb/test/Shell/ScriptInterpreter/Python/io.test +@@ -0,0 +1,12 @@ ++# RUN: rm -rf %t.stdout %t.stderr ++# RUN: cat %s | %lldb --script-language python > %t.stdout 2> %t.stderr ++# RUN: cat %t.stdout | FileCheck %s --check-prefix STDOUT ++# RUN: cat %t.stderr | FileCheck %s --check-prefix STDERR ++script ++variable = 300 ++print(variable) ++print(not_value) ++quit ++ ++# STDOUT: 300 ++# STDERR: NameError{{.*}}is not defined diff --git a/gating.yaml b/gating.yaml index f0c9afb..97dd90b 100644 --- a/gating.yaml +++ b/gating.yaml @@ -13,7 +13,7 @@ rules: --- !Policy product_versions: # The version number here should match the current rawhide release. - - fedora-44 + - fedora-45 decision_contexts: - bodhi_update_push_stable - bodhi_update_push_stable_critpath diff --git a/llvm.spec b/llvm.spec index b6ccc6e..af0ce53 100644 --- a/llvm.spec +++ b/llvm.spec @@ -1,8 +1,8 @@ #region globals #region version -%global maj_ver 21 +%global maj_ver 22 %global min_ver 1 -%global patch_ver 8 +%global patch_ver 3 #global rc_ver rc3 %bcond_with snapshot_build @@ -34,7 +34,6 @@ %define bcond_override_default_libcxx 0 %define bcond_override_default_lto_build 0 %define bcond_override_default_check 0 -%define _find_debuginfo_dwz_opts %{nil} %endif # Build compat packages llvmN instead of main package for the current LLVM @@ -42,11 +41,11 @@ %bcond_with compat_build # Bundle compat libraries for a previous LLVM version, as part of llvm-libs and # clang-libs. Used on RHEL. -%bcond_with bundle_compat_lib +%bcond_without bundle_compat_lib %bcond_without check %if %{with bundle_compat_lib} -%global compat_maj_ver 20 +%global compat_maj_ver 21 %global compat_ver %{compat_maj_ver}.1.8 %endif @@ -78,7 +77,7 @@ # MLIR version 22 started to require nanobind >= 2.9, which is only available # on Fedora >= 44. -%if %{without compat_build} && %{defined fedora} && (%{maj_ver} < 22 || 0%{?fedora} >= 44) +%if %{without compat_build} && %{defined fedora} && 0%{?fedora} >= 44 %ifarch %{ix86} %bcond_with mlir %else @@ -89,7 +88,7 @@ %endif #region flang -%if %{without compat_build} && %{defined fedora} && (%{maj_ver} >= 22 && 0%{?fedora} >= 44) +%if %{without compat_build} && %{defined fedora} && 0%{?fedora} >= 44 # Link error on i686. # s390x is not supported upstream yet. %ifarch i686 s390x @@ -186,16 +185,6 @@ end %endif %endif -# We only want to run the performance comparison on snapshot builds. -# centos-streams/RHEL do not have all the requirements. We tried to use pip, -# but we've seen issues on some architectures. We're now restricting this -# to Fedora. -%if %{with pgo} && %{with snapshot_build} && %{defined fedora} -%global run_pgo_perf_comparison 1 -%else -%global run_pgo_perf_comparison %{nil} -%endif - # Sanity checks for PGO and bootstrapping #---------------------------------------- %if %{with pgo} @@ -329,6 +318,12 @@ end %global build_install_prefix %{buildroot}%{install_prefix} +%if %{with compat_build} +%global install_pythondir %{install_prefix}/lib/python%{python3_version}/site-packages +%else +%global install_pythondir %{python3_sitelib}/ +%endif + # Lower memory usage of dwz on s390x %global _dwz_low_mem_die_limit_s390x 1 %global _dwz_max_die_limit_s390x 1000000 @@ -406,12 +401,6 @@ end %global pkg_name_polly polly%{pkg_suffix} #endregion polly globals -#region PGO globals -%if 0%{run_pgo_perf_comparison} -%global llvm_test_suite_dir %{_datadir}/llvm-test-suite -%endif -#endregion PGO globals - #region flang globals %global pkg_name_flang flang%{pkg_suffix} #endregion flang globals @@ -484,9 +473,10 @@ Source1001: changelog # behind the latest packaged LLVM version. #region CLANG patches -Patch101: 0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch +Patch2100: 0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch +Patch2200: 0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch +Patch2300: 0001-23-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch Patch102: 0003-PATCH-clang-Don-t-install-static-libraries.patch -Patch2002: 20-131099.patch # Workaround a bug in ORC on ppc64le. # More info is available here: https://reviews.llvm.org/D159115#4641826 @@ -497,10 +487,6 @@ Patch103: 0001-Workaround-a-bug-in-ORC-on-ppc64le.patch Patch104: 0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch #endregion CLANG patches -# Fix LLVMConfig.cmake when symlinks are used. -# (https://github.com/llvm/llvm-project/pull/124743 landed in LLVM 21) -Patch2003: 0001-cmake-Resolve-symlink-when-finding-install-prefix.patch - #region LLD patches Patch106: 0001-19-Always-build-shared-libs-for-LLD.patch Patch2103: 0001-lld-Adjust-compressed-debug-level-test-for-s390x-wit.patch @@ -509,6 +495,7 @@ Patch2103: 0001-lld-Adjust-compressed-debug-level-test-for-s390x-wit.patch #region polly patches Patch2102: 0001-20-polly-shared-libs.patch Patch2202: 0001-22-polly-shared-libs.patch +Patch2302: 0001-22-polly-shared-libs.patch #endregion polly patches #region RHEL patches @@ -523,28 +510,27 @@ Patch503: 0002-BPF-Remove-unused-weak-symbol-__bpf_trap-166003.patch Patch504: 0003-BPF-Remove-dead-code-related-to-__bpf_trap-global-va.patch #endregion RHEL patches -# Fix a pgo miscompilation triggered by building Rust 1.87 with pgo on ppc64le. -# https://github.com/llvm/llvm-project/issues/138208 -Patch2004: 0001-CodeGenPrepare-Make-sure-instruction-get-from-SunkAd.patch -# Related CGP fix for domination, rhbz#2388223 -Patch2008: 0001-CGP-Bail-out-if-Base-Scaled-Reg-does-not-dominate-in.patch - -# Fix Power9/Power10 crbit spilling -# https://github.com/llvm/llvm-project/pull/146424 -Patch2007: 21-146424.patch - -# Fix for highway package build on ppc64le -Patch2005: 0001-PowerPC-Fix-handling-of-undefs-in-the-PPC-isSplatShu.patch -Patch2006: 0001-Add-REQUIRES-asserts-to-test-added-in-145149-because.patch - # Fix for offload builds: The DeviceRTL libraries target device code and # don't support the mtls-dialect flag, so we need to patch the clang driver # to ignore it for these targets. Patch2101: 0001-clang-Add-a-hack-to-fix-the-offload-build-with-the-m.patch Patch2201: 0001-clang-Add-a-hack-to-fix-the-offload-build-with-the-m.patch +# Fix segfault compiling plotters rust crate on ppc64le +Patch2104: 0001-PowerPC-Add-check-for-cast-when-shufflevector-172443.patch + +# Fix for lldb python shell with python 3.14 (rbhz#2428608) +Patch2105: 43cb4631c1f42dbfce78288b8ae30b5840ed59b3.patch + +# Fix for s390x vector miscompilation (rhbz#2430017) +Patch2106: 0001-SystemZ-Fix-code-in-widening-vector-multiplication-1.patch + +# Fix for s390x vector miscompilation (RHEL-147748) +Patch2203: 22-190701.patch + %if 0%{?rhel} == 8 %global python3_pkgversion 3.12 +%global python3_version 3.12 %global __python3 /usr/bin/python3.12 %endif @@ -586,20 +572,6 @@ BuildRequires: compiler-rt BuildRequires: llvm %endif -%if 0%{run_pgo_perf_comparison} -BuildRequires: llvm-test-suite -BuildRequires: tcl-devel -BuildRequires: which -# pandas and scipy are needed for running llvm-test-suite/utils/compare.py -# For RHEL we have to install it from pip and for fedora we take the RPM package. -%if 0%{?rhel} -BuildRequires: python3-pip -%else -BuildRequires: python3-pandas -BuildRequires: python3-scipy -%endif -%endif - %else %if %{with use_lld} BuildRequires: lld @@ -646,20 +618,17 @@ BuildRequires: gnupg2 BuildRequires: swig BuildRequires: libxml2-devel -BuildRequires: doxygen # For clang-offload-packager BuildRequires: elfutils-libelf-devel -BuildRequires: perl -BuildRequires: perl-Data-Dumper -BuildRequires: perl-Encode BuildRequires: libffi-devel +# For scan-build +BuildRequires: perl-interpreter BuildRequires: perl-generators -# According to https://fedoraproject.org/wiki/Packaging:Emacs a package -# should BuildRequires: emacs if it packages emacs integration files. -BuildRequires: emacs +# We only need the emacs packaging macros, which are part of emacs-common. +BuildRequires: emacs-common BuildRequires: libatomic @@ -684,8 +653,6 @@ BuildRequires: python%{python3_pkgversion}-pyyaml BuildRequires: python%{python3_pkgversion}-nanobind-devel %endif -BuildRequires: graphviz - # This is required because we need "ps" when running LLDB tests BuildRequires: procps-ng @@ -951,21 +918,18 @@ Requires: python%{python3_pkgversion} %description -n git-clang-format%{pkg_suffix} clang-format integration for git. -%if %{without compat_build} -%package -n python%{python3_pkgversion}-clang +%package -n python%{python3_pkgversion}-%{pkg_name_clang} Summary: Python3 bindings for clang Requires: %{pkg_name_clang}-devel%{?_isa} = %{version}-%{release} -Requires: python%{python3_pkgversion} +Requires: python(abi) = %{python3_version} +Provides: python%{python3_pkgversion}-clang(major) = %{maj_ver} %if 0%{?rhel} == 8 # Became python3.12-clang in LLVM 19 Obsoletes: python3-clang < 18.9 %endif -%description -n python%{python3_pkgversion}-clang +%description -n python%{python3_pkgversion}-%{pkg_name_clang} Python3 bindings for clang. - -%endif - #endregion CLANG packages #region COMPILER-RT packages @@ -1327,6 +1291,13 @@ Flang runtime libraries. # automatically apply patches based on LLVM version %autopatch -m%{compat_maj_ver}00 -M%{compat_maj_ver}99 -p1 +%if 0%{?rhel} == 8 && %{compat_maj_ver} < 22 +# The following patches have been backported from LLVM 22. +%patch -p1 -P502 +%patch -p1 -P503 +%patch -p1 -P504 +%endif + %endif # -T : Do Not Perform Default Archive Unpacking (without this, the th source would be unpacked twice) @@ -1345,20 +1316,12 @@ Flang runtime libraries. %if %{defined rhel} && 0%{?rhel} == 8 %patch -p1 -P501 -%if %{maj_ver} < 22 -# The following patches have been backported from LLVM 22. -%patch -p1 -P502 -%patch -p1 -P503 -%patch -p1 -P504 -%endif %endif #region LLVM preparation %py3_shebang_fix \ - llvm/test/BugPoint/compile-custom.ll.py \ - llvm/tools/opt-viewer/*.py \ - llvm/utils/update_cc_test_checks.py + llvm/tools/opt-viewer/*.py #endregion LLVM preparation @@ -1490,11 +1453,9 @@ export ASMFLAGS="%{build_cflags}" # We set CLANG_DEFAULT_PIE_ON_LINUX=OFF and PPC_LINUX_DEFAULT_IEEELONGDOUBLE=ON to match the # defaults used by Fedora's GCC. -# Disable dwz on aarch64, because it takes a huge amount of time to decide not to optimize things. -# This is copied from clang. -%ifarch aarch64 +# Disable dwz because it takes a huge amount of time to decide not to +# optimize things. %define _find_debuginfo_dwz_opts %{nil} -%endif cd llvm @@ -1504,10 +1465,6 @@ OLD_LD_LIBRARY_PATH="$LD_LIBRARY_PATH" OLD_CWD="$PWD" %global builddir_instrumented $RPM_BUILD_DIR/instrumented-llvm -%if 0%{run_pgo_perf_comparison} -%global builddir_perf_pgo $RPM_BUILD_DIR/performance-of-pgoed-clang -%global builddir_perf_system $RPM_BUILD_DIR/performance-of-system-clang -%endif #region LLVM lit %if %{with python_lit} @@ -1538,14 +1495,33 @@ popd -DLLVM_BUILD_LLVM_DYLIB=ON \\\ -DLLVM_LINK_LLVM_DYLIB=ON \\\ -DCLANG_LINK_CLANG_DYLIB=ON \\\ - -DLLVM_ENABLE_FFI:BOOL=ON - -%if %{maj_ver} >= 22 -%global cmake_common_args %{cmake_common_args} \\\ + -DLLVM_ENABLE_FFI:BOOL=ON \\\ -DLLVM_ENABLE_EH=OFF -%else + +%if 0%{?rhel} == 8 +# On RHEL 8 we build with gcc, but the runtimes are built with the just built +# clang, so we need to pass clang supported compiler flags to the runtimes +# build. If we pass the gcc flags, some of the cmake feature checkes will +# fail, because they use -Werror and emit an error when passed gcc specific +# compiler flags like -specs. +# Specifically, this is required in order to fix the libomptest.so build. + +function strip_specs { + echo $1 | sed -e 's/-specs=[^ ]\+//g' +} + +CLANG_CC_CONFIG=$(pwd)/redhat-hardened-clang.cfg +CLANG_LD_CONFIG=$(pwd)/redhat-hardened-clang-ld.cfg +echo "-fPIE" >> $CLANG_CC_CONFIG +echo "-pie" >> $CLANG_LD_CONFIG +CLANG_CCFLAGS_EXTRA=--config=$CLANG_CC_CONFIG +CLANG_LDFLAGS_EXTRA=--config=$CLANG_LD_CONFIG + +CLANG_CXXFLAGS=$(strip_specs "$CXXFLAGS $CLANG_CCFLAGS_EXTRA") +CLANG_CFLAGS=$(strip_specs "$CFLAGS $CLANG_CCFLAGS_EXTRA") +CLANG_LDFLAGS=$(strip_specs "$LDFLAGS $CLANG_LDFLAGS_EXTRA") %global cmake_common_args %{cmake_common_args} \\\ - -DLLVM_ENABLE_EH=ON + -DRUNTIMES_CMAKE_ARGS="-DCMAKE_C_FLAGS=$CLANG_C_FLAGS;-DCMAKE_CXX_FLAGS=$CLANG_CXX_FLAGS;-DCMAKE_SHARED_LINKER_FLAGS=$CLANG_LD_FLAGS" %endif %if %reduce_debuginfo == 1 @@ -1691,7 +1667,7 @@ popd -DOPENMP_INSTALL_LIBDIR=%{unprefixed_libdir} \\\ -DLIBOMP_INSTALL_ALIASES=OFF -%if %{maj_ver} >= 22 && %{with offload} +%if %{with offload} # We reset the cxxflags to "" here because this is compiling for a GPU # target, where our cflags are either questionable or actively wrong. %global cmake_config_args %{cmake_config_args} \\\ @@ -1859,12 +1835,14 @@ fi -DLLVM_VP_COUNTERS_PER_SITE=8 %if %{defined host_clang_maj_ver} -%global cmake_config_args_instrumented %{cmake_config_args_instrumented} \\\ - -DLLVM_PROFDATA=%{_bindir}/llvm-profdata-%{host_clang_maj_ver} +%global profdata %{_bindir}/llvm-profdata-%{host_clang_maj_ver} +%global cxxfilt %{_bindir}/llvm-cxxfilt-%{host_clang_maj_ver} %else -%global cmake_config_args_instrumented %{cmake_config_args_instrumented} \\\ - -DLLVM_PROFDATA=%{_bindir}/llvm-profdata +%global profdata %{_bindir}/llvm-profdata +%global cxxfilt %{_bindir}/llvm-cxxfilt %endif +%global cmake_config_args_instrumented %{cmake_config_args_instrumented} \\\ + -DLLVM_PROFDATA=%{profdata} # TODO(kkleine): Should we see warnings like: # "function control flow change detected (hash mismatch)" @@ -1883,10 +1861,14 @@ fi %cmake_build --target generate-profdata # Show top 10 functions in the profile -llvm-profdata show --topn=10 %{builddir_instrumented}/tools/clang/utils/perf-training/clang.profdata | llvm-cxxfilt +%{profdata} show --topn=10 %{builddir_instrumented}/tools/clang/utils/perf-training/clang.profdata | %{cxxfilt} cp %{builddir_instrumented}/tools/clang/utils/perf-training/clang.profdata $RPM_BUILD_DIR/result.profdata +# The instrumented files are not needed anymore. +# Remove them in order to free disk space (~10GiB). +rm -rf %{builddir_instrumented} + #endregion Perf training %endif @@ -1964,71 +1946,17 @@ cd $OLD_CWD %cmake_build --target runtimes #endregion Final stage -#region Performance comparison -%if 0%{run_pgo_perf_comparison} - -function run_perf_test { - local build_dir=$1 - - cd %{llvm_test_suite_dir} - %__cmake -G Ninja \ - -S "%{llvm_test_suite_dir}" \ - -B "${build_dir}" \ - -DCMAKE_GENERATOR=Ninja \ - -DCMAKE_C_COMPILER=clang \ - -DCMAKE_CXX_COMPILER=clang++ \ - -DTEST_SUITE_BENCHMARKING_ONLY=ON \ - -DTEST_SUITE_COLLECT_STATS=ON \ - -DTEST_SUITE_USE_PERF=OFF \ - -DTEST_SUITE_SUBDIRS=CTMark \ - -DTEST_SUITE_RUN_BENCHMARKS=OFF \ - -DTEST_SUITE_COLLECT_CODE_SIZE=OFF \ - -C%{llvm_test_suite_dir}/cmake/caches/O3.cmake - - # Build the test-suite - %__cmake --build "${build_dir}" -j1 --verbose - - # Run the tests with lit: - %{builddir_instrumented}/bin/llvm-lit -v -o ${build_dir}/results.json ${build_dir} || true - cd $OLD_CWD -} - -# Run performance test for system clang -reset_paths -run_perf_test %{builddir_perf_system} - -# Run performance test for PGOed clang -reset_paths -FINAL_BUILD_DIR=`pwd`/%{_vpath_builddir} -export LD_LIBRARY_PATH="${FINAL_BUILD_DIR}/lib:${FINAL_BUILD_DIR}/lib64:${LD_LIBRARY_PATH}" -export PATH="${FINAL_BUILD_DIR}/bin:${OLD_PATH}" -run_perf_test %{builddir_perf_pgo} - -# Compare the performance of system and PGOed clang -%if 0%{?rhel} -python3 -m venv compare-env -source ./compare-env/bin/activate -pip install "pandas>=2.2.3" -pip install "scipy>=1.13.1" -MY_PYTHON_BIN=./compare-env/bin/python3 +%if %{with lto_build} +# The LTO cache is not needed anymore. +# Remove it in order to free disk space. +rm -rfv %{_vpath_builddir}/lto.cache %endif -system_llvm_release=$(/usr/bin/clang --version | grep -Po '[0-9]+\.[0-9]+\.[0-9]' | head -n1) -${MY_PYTHON_BIN} %{llvm_test_suite_dir}/utils/compare.py \ - --metric compile_time \ - --lhs-name ${system_llvm_release} \ - --rhs-name pgo-%{version} \ - %{builddir_perf_system}/results.json vs %{builddir_perf_pgo}/results.json > %{builddir_perf_pgo}/results-system-vs-pgo.txt || true - -echo "Result of Performance comparison between system and PGOed clang" -cat %{builddir_perf_pgo}/results-system-vs-pgo.txt - -%if 0%{?rhel} -# Deactivate virtual python environment created ealier -deactivate -%endif -%endif -#endregion Performance comparison +# Strip debug info from static libraries before the install phase because +# LLVM already consumes a lot of disk space (i.e. > 150GiB). +# The install phase duplicates files on disk, causing errors if the disk is +# too small. +RPM_BUILD_ROOT=$(realpath ..)/%{build_libdir} %__brp_strip_static_archive #region compat lib cd .. @@ -2172,9 +2100,6 @@ ln -s ../share/clang/clang-format-diff.py %{buildroot}%{install_bindir}/clang-fo # Install the PGO profile that was used to build this LLVM into the clang package %if 0%{with pgo} cp -v $RPM_BUILD_DIR/result.profdata %{buildroot}%{install_datadir}/llvm-pgo.profdata -%if 0%{run_pgo_perf_comparison} -cp -v %{builddir_perf_pgo}/results-system-vs-pgo.txt %{buildroot}%{install_datadir}/results-system-vs-pgo.txt -%endif %endif # File in the macros file for other packages to use. We are not doing this @@ -2187,15 +2112,6 @@ sed -i -e "s|@@CLANG_MAJOR_VERSION@@|%{maj_ver}|" \ -e "s|@@CLANG_PATCH_VERSION@@|%{patch_ver}|" \ %{buildroot}%{_rpmmacrodir}/macros.%{pkg_name_clang} -# install clang python bindings -mkdir -p %{buildroot}%{python3_sitelib}/clang/ -# If we don't default to true here, we'll see this error: -# install: omitting directory 'bindings/python/clang/__pycache__' -# NOTE: this only happens if we include the gdb plugin of libomp. -# Remove the plugin with command and we're good: rm -rf %{buildroot}/%{_datarootdir}/gdb -install -p -m644 clang/bindings/python/clang/* %{buildroot}%{python3_sitelib}/clang/ -%py_byte_compile %{__python3} %{buildroot}%{python3_sitelib}/clang - # install scanbuild-py to python sitelib. mv %{buildroot}%{install_prefix}/lib/{libear,libscanbuild} %{buildroot}%{python3_sitelib} # Cannot use {libear,libscanbuild} style expansion in py_byte_compile. @@ -2219,6 +2135,15 @@ rm -Rf %{buildroot}%{install_datadir}/clang/*.el %endif +# install clang python bindings +mkdir -p %{buildroot}%{install_pythondir}/clang/ +# If we don't default to true here, we'll see this error: +# install: omitting directory 'bindings/python/clang/__pycache__' +# NOTE: this only happens if we include the gdb plugin of libomp. +# Remove the plugin with command and we're good: rm -rf %{buildroot}/%{_datarootdir}/gdb +install -p -m644 clang/bindings/python/clang/* %{buildroot}%{install_pythondir}/clang/ +%py_byte_compile %{__python3} %{buildroot}%{install_pythondir}/clang/ + # Create manpage symlink for clang++ ln -s clang-%{maj_ver}.1 %{buildroot}%{install_mandir}/man1/clang++.1 @@ -2232,9 +2157,6 @@ chmod a+x %{buildroot}%{install_datadir}/scan-view/{Reporter.py,startfile.py} rm -vf %{buildroot}%{install_datadir}/clang/clang-format-bbedit.applescript rm -vf %{buildroot}%{install_datadir}/clang/clang-format-sublime.py* -# Remove unpackaged files -rm -Rvf %{buildroot}%{install_datadir}/clang-doc - # TODO: What are the Fedora guidelines for packaging bash autocomplete files? rm -vf %{buildroot}%{install_datadir}/clang/bash-autocomplete.sh @@ -2380,13 +2302,11 @@ rm -v %{buildroot}%{install_libdir}/libFIRAnalysis.a \ %{buildroot}%{install_libdir}/libHLFIRTransforms.a \ %{buildroot}%{install_libdir}/libCUFAttrs.a \ %{buildroot}%{install_libdir}/libCUFDialect.a \ - %{buildroot}%{install_libdir}/libFortranDecimal.a -%if %{maj_ver} >= 22 -rm -v %{buildroot}%{install_libdir}/libFortranUtils.a \ + %{buildroot}%{install_libdir}/libFortranDecimal.a \ + %{buildroot}%{install_libdir}/libFortranUtils.a \ %{buildroot}%{install_libdir}/libFIROpenACCAnalysis.a \ %{buildroot}%{install_libdir}/libFIROpenACCTransforms.a \ %{buildroot}%{install_libdir}/libMIFDialect.a -%endif find %{buildroot}%{install_includedir}/flang -type f -a ! -iname '*.mod' -delete @@ -2571,6 +2491,9 @@ function reset_test_opts() # Some test (e.g. mlir) require this to be set. unset PYTHONPATH + + # We use them in some cases. + unset LIT_NUM_SHARDS LIT_RUN_SHARD } # Convert array of test names into a regex. @@ -2891,6 +2814,7 @@ test_list_filter_out+=("MLIR :: python/execution_engine.py") # if ! LD_SHOW_AUXV=1 /bin/true | grep -q arch_3_00; then test_list_filter_out+=("MLIR :: python/execution_engine.py") test_list_filter_out+=("MLIR :: python/multithreaded_tests.py") +test_list_filter_out+=("MLIR :: python/global_constructors.py") %endif %if %{with flang} @@ -3099,7 +3023,6 @@ fi %license llvm/LICENSE.TXT %{expand_bins %{expand: - bugpoint dsymutil FileCheck llc @@ -3110,6 +3033,7 @@ fi llvm-bcanalyzer llvm-bitcode-strip llvm-c-test + llvm-cas llvm-cat llvm-cfi-verify llvm-cgdata @@ -3133,6 +3057,7 @@ fi llvm-gsymutil llvm-ifs llvm-install-name-tool + llvm-ir2vec llvm-jitlink llvm-jitlink-executor llvm-lib @@ -3150,6 +3075,8 @@ fi llvm-nm llvm-objcopy llvm-objdump + llvm-offload-wrapper + llvm-offload-binary llvm-opt-report llvm-otool llvm-pdbutil @@ -3187,16 +3114,18 @@ fi yaml2obj }} -%if %{maj_ver} >= 22 +%if %{maj_ver} >= 23 %{expand_bins %{expand: - llvm-ir2vec - llvm-offload-wrapper - llvm-offload-binary + llubi + llvm-gpu-loader +}} +%else +%{expand_bins %{expand: + bugpoint }} %endif %{expand_mans %{expand: - bugpoint clang-tblgen dsymutil FileCheck @@ -3221,6 +3150,7 @@ fi llvm-extract llvm-ifs llvm-install-name-tool + llvm-ir2vec llvm-lib llvm-libtool-darwin llvm-link @@ -3231,6 +3161,7 @@ fi llvm-nm llvm-objcopy llvm-objdump + llvm-offload-binary llvm-opt-report llvm-otool llvm-pdbutil @@ -3253,10 +3184,13 @@ fi tblgen }} -%if %{maj_ver} >= 22 +%if %{maj_ver} >= 23 %{expand_mans %{expand: - llvm-ir2vec - llvm-offload-binary + llubi +}} +%else +%{expand_mans %{expand: + bugpoint }} %endif @@ -3329,11 +3263,6 @@ fi llvm-opt-fuzzer llvm-test-mustache-spec }} -%if %{maj_ver} >= 22 -%{expand_bins %{expand: - llvm-cas -}} -%endif %{expand_mans %{expand: llvm-test-mustache-spec }} @@ -3378,9 +3307,6 @@ fi %if 0%{with pgo} %{expand_datas %{expand: llvm-pgo.profdata }} -%if 0%{run_pgo_perf_comparison} -%{expand_datas %{expand: results-system-vs-pgo.txt }} -%endif %endif @@ -3488,6 +3414,13 @@ fi offload-arch }} +%if %{maj_ver} >= 23 +%{expand_bins %{expand: + clang-ssaf-format + clang-ssaf-linker +}} +%endif + %if %{without compat_build} %{_emacs_sitestartdir}/clang-format.el %{_emacs_sitestartdir}/clang-include-fixer.el @@ -3499,6 +3432,7 @@ fi clang/clang-include-fixer.py* clang/clang-tidy-diff.py* clang/run-find-all-symbols.py* + clang-doc/* }} %files -n %{pkg_name_clang}-tools-extra-devel @@ -3509,12 +3443,9 @@ fi %license clang/LICENSE.TXT %expand_bins git-clang-format -%if %{without compat_build} -%files -n python%{python3_pkgversion}-clang +%files -n python%{python3_pkgversion}-%{pkg_name_clang} %license clang/LICENSE.TXT -%{python3_sitelib}/clang/ -%endif - +%{install_pythondir}/clang/ #endregion CLANG files #region COMPILER-RT files @@ -3537,14 +3468,9 @@ fi %{_prefix}/lib/clang/%{maj_ver}/lib/%{compiler_rt_triple}/clang_rt.crtbegin.o %{_prefix}/lib/clang/%{maj_ver}/lib/%{compiler_rt_triple}/clang_rt.crtend.o -%ifnarch %{ix86} s390x riscv64 +%ifnarch %{ix86} riscv64 %{_prefix}/lib/clang/%{maj_ver}/lib/%{compiler_rt_triple}/liborc_rt.a %endif -%ifarch s390x -%if %{maj_ver} >= 22 -%{_prefix}/lib/clang/%{maj_ver}/lib/%{compiler_rt_triple}/liborc_rt.a -%endif -%endif # Additional symlink if two triples are in use. %if "%{llvm_triple}" != "%{compiler_rt_triple}" @@ -3648,13 +3574,9 @@ fi lldb-argdumper lldb-dap lldb-instr + lldb-mcp lldb-server }} -%if %{maj_ver} >= 22 -%{expand_bins %{expand: - lldb-mcp -}} -%endif # Usually, *.so symlinks are kept in devel subpackages. However, the python # bindings depend on this symlink at runtime. %{expand_libs %{expand: @@ -3669,12 +3591,10 @@ fi %files -n %{pkg_name_lldb}-devel %expand_includes lldb -%if %{maj_ver} >= 22 %{expand_bins %{expand: lldb-tblgen yaml2macho-core }} -%endif %if %{without compat_build} %files -n python%{python3_pkgversion}-lldb @@ -3689,6 +3609,7 @@ fi %files -n %{pkg_name_mlir} %license LICENSE.TXT %{expand_libs %{expand: + libmlir_apfloat_wrappers.so.%{maj_ver}* libmlir_arm_runner_utils.so.%{maj_ver}* libmlir_arm_sme_abi_stubs.so.%{maj_ver}* libmlir_async_runtime.so.%{maj_ver}* @@ -3698,12 +3619,6 @@ fi libMLIR*.so.%{maj_ver}* }} -%if %{maj_ver} >= 22 -%{expand_libs %{expand: - libmlir_apfloat_wrappers.so.%{maj_ver}* -}} -%endif - %files -n %{pkg_name_mlir}-static %expand_libs libMLIR*.a @@ -3726,6 +3641,7 @@ fi %expand_includes mlir mlir-c %{expand_libs %{expand: cmake/mlir + libmlir_apfloat_wrappers.so libmlir_arm_runner_utils.so libmlir_arm_sme_abi_stubs.so libmlir_async_runtime.so @@ -3735,12 +3651,6 @@ fi libMLIR*.so }} -%if %{maj_ver} >= 22 -%{expand_libs %{expand: - libmlir_apfloat_wrappers.so -}} -%endif - %files -n python%{python3_pkgversion}-%{pkg_name_mlir} %{python3_sitearch}/mlir/ %endif @@ -3763,26 +3673,9 @@ fi }} %{install_bindir}/flang-%{maj_ver} %{expand_includes %{expand: - flang/__cuda_builtins.mod - flang/__cuda_device.mod - flang/__fortran_builtins.mod - flang/__fortran_ieee_exceptions.mod - flang/__fortran_type_info.mod - flang/__ppc_intrinsics.mod - flang/__ppc_types.mod - flang/cooperative_groups.mod - flang/ieee_arithmetic.mod - flang/ieee_exceptions.mod - flang/ieee_features.mod - flang/iso_c_binding.mod - flang/iso_fortran_env.mod - flang/mma.mod - flang/cudadevice.mod - flang/iso_fortran_env_impl.mod - flang/omp_lib.mod - flang/omp_lib_kinds.mod - flang/flang_debug.mod + flang/*.mod }} + %{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-flang.cfg %ifarch x86_64 %{_sysconfdir}/%{pkg_name_clang}/i386-redhat-linux-gnu-flang.cfg diff --git a/sources b/sources index 788e136..191f498 100644 --- a/sources +++ b/sources @@ -1,4 +1,4 @@ +SHA512 (llvm-project-22.1.3.src.tar.xz) = 3557a955d55471671ae2f7b9c809affd59a29a6fb1e70a2a5d040dc1c6376246deb0635be8ca36cae09112981760e9afb128c822e5554bd722589fb8dee3f0df +SHA512 (llvm-project-22.1.3.src.tar.xz.sig) = 153a0d174492a0facd061b5cfa3e18dbf946cc0c7d1fb50f4d961410d41cea1f355515fd3e892be676b8b34d61a21962c48acb90aa5d310d05cf6452053e52ad SHA512 (llvm-project-21.1.8.src.tar.xz) = cae4c44e7bf678071723da63ad5839491d717a7233e7f4791aa408207f3ea42f52de939ad15189b112c02a0770f1bb8d59bae6ad31ef53417a6eea7770fe52ab SHA512 (llvm-project-21.1.8.src.tar.xz.sig) = 10f58eff58ed6e701d0f123b15e68c82ab8cbdf99b1c86c0d83e3b8553e90ea51055e30327e8e442ded57c8f503e2a2de9ee075e9c28b5ba815a0f8922f8671c -SHA512 (llvm-project-20.1.8.src.tar.xz) = f330e72e6a1da468569049437cc0ba7a41abb816ccece7367189344f7ebfef730f4788ac7af2bef0aa8a49341c15ab1d31e941ffa782f264d11fe0dc05470773 -SHA512 (llvm-project-20.1.8.src.tar.xz.sig) = d74369bdb4d1b82775161ea53c9c5f3a23ce810f4df5ff617123023f9d8ce720e7d6ecc9e17f8ebd39fd9e7a9de79560abdf2ffe73bcb907a43148d43665d619