import UBI llvm-21.1.8-1.module+el8.10.0+23969+c061985f
This commit is contained in:
parent
7705541078
commit
7a1f5e88ae
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,2 +1,2 @@
|
||||
SOURCES/llvm-project-19.1.7.src.tar.xz
|
||||
SOURCES/llvm-project-19.1.7.src.tar.xz.sig
|
||||
SOURCES/llvm-project-21.1.8.src.tar.xz
|
||||
SOURCES/llvm-project-21.1.8.src.tar.xz.sig
|
||||
|
||||
@ -1,2 +1,2 @@
|
||||
6e4033d8b76a89e82220b5445bff58cdce64300e SOURCES/llvm-project-19.1.7.src.tar.xz
|
||||
48f839c6e47a34a1138862a9db6274c150179532 SOURCES/llvm-project-19.1.7.src.tar.xz.sig
|
||||
a02f43a68bf59be15a61d6ddd0d99bd4973244f4 SOURCES/llvm-project-21.1.8.src.tar.xz
|
||||
c10b9d8ebce251f8be51eb71378122300fd37de3 SOURCES/llvm-project-21.1.8.src.tar.xz.sig
|
||||
|
||||
@ -1,29 +0,0 @@
|
||||
From b1c60d7fa322a2d208556087df9e7ef94bfbffb8 Mon Sep 17 00:00:00 2001
|
||||
From: Nikita Popov <npopov@redhat.com>
|
||||
Date: Wed, 8 May 2024 12:30:36 +0900
|
||||
Subject: [PATCH] Always build shared libs for LLD
|
||||
|
||||
We don't want to enable BUILD_SHARED_LIBS for the whole build,
|
||||
but we do want to build lld libraries.
|
||||
---
|
||||
lld/cmake/modules/AddLLD.cmake | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake
|
||||
index 2ee066b41535..270c03f096ac 100644
|
||||
--- a/lld/cmake/modules/AddLLD.cmake
|
||||
+++ b/lld/cmake/modules/AddLLD.cmake
|
||||
@@ -7,9 +7,8 @@ macro(add_lld_library name)
|
||||
""
|
||||
""
|
||||
${ARGN})
|
||||
- if(ARG_SHARED)
|
||||
- set(ARG_ENABLE_SHARED SHARED)
|
||||
- endif()
|
||||
+ # Always build shared libs for LLD.
|
||||
+ set(ARG_ENABLE_SHARED SHARED)
|
||||
llvm_add_library(${name} ${ARG_ENABLE_SHARED} ${ARG_UNPARSED_ARGUMENTS})
|
||||
set_target_properties(${name} PROPERTIES FOLDER "lld libraries")
|
||||
|
||||
--
|
||||
2.44.0
|
||||
59
SOURCES/0001-20-polly-shared-libs.patch
Normal file
59
SOURCES/0001-20-polly-shared-libs.patch
Normal file
@ -0,0 +1,59 @@
|
||||
From cecb98f56e7d6619d0427fbdbc2f200ce212f0c6 Mon Sep 17 00:00:00 2001
|
||||
From: Konrad Kleine <kkleine@redhat.com>
|
||||
Date: Tue, 28 Jan 2025 08:34:09 +0000
|
||||
Subject: [PATCH] [polly] shared libs
|
||||
|
||||
---
|
||||
polly/cmake/polly_macros.cmake | 5 ++++-
|
||||
polly/lib/CMakeLists.txt | 1 +
|
||||
polly/lib/External/CMakeLists.txt | 1 +
|
||||
3 files changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/polly/cmake/polly_macros.cmake b/polly/cmake/polly_macros.cmake
|
||||
index 9bd7b0b0ea59..fc2c3a76901f 100644
|
||||
--- a/polly/cmake/polly_macros.cmake
|
||||
+++ b/polly/cmake/polly_macros.cmake
|
||||
@@ -1,5 +1,5 @@
|
||||
macro(add_polly_library name)
|
||||
- cmake_parse_arguments(ARG "" "" "" ${ARGN})
|
||||
+ cmake_parse_arguments(ARG "SHARED" "" "" ${ARGN})
|
||||
set(srcs ${ARG_UNPARSED_ARGUMENTS})
|
||||
if(MSVC_IDE OR XCODE)
|
||||
file( GLOB_RECURSE headers *.h *.td *.def)
|
||||
@@ -17,6 +17,9 @@ macro(add_polly_library name)
|
||||
else()
|
||||
set(libkind)
|
||||
endif()
|
||||
+ if (ARG_SHARED)
|
||||
+ set(libkind SHARED)
|
||||
+ endif()
|
||||
add_library( ${name} ${libkind} ${srcs} )
|
||||
set_target_properties(${name} PROPERTIES FOLDER "Polly/Libraries")
|
||||
|
||||
diff --git a/polly/lib/CMakeLists.txt b/polly/lib/CMakeLists.txt
|
||||
index d91f4ecd37e6..965f635b7ff6 100644
|
||||
--- a/polly/lib/CMakeLists.txt
|
||||
+++ b/polly/lib/CMakeLists.txt
|
||||
@@ -41,6 +41,7 @@ set(POLLY_COMPONENTS
|
||||
# the sources them to be recompiled for each of them.
|
||||
add_llvm_pass_plugin(Polly
|
||||
NO_MODULE
|
||||
+ SHARED
|
||||
SUBPROJECT Polly
|
||||
Analysis/DependenceInfo.cpp
|
||||
Analysis/PolyhedralInfo.cpp
|
||||
diff --git a/polly/lib/External/CMakeLists.txt b/polly/lib/External/CMakeLists.txt
|
||||
index 5dd69b7199dc..f065fbd7b942 100644
|
||||
--- a/polly/lib/External/CMakeLists.txt
|
||||
+++ b/polly/lib/External/CMakeLists.txt
|
||||
@@ -284,6 +284,7 @@ if (POLLY_BUNDLED_ISL)
|
||||
)
|
||||
|
||||
add_polly_library(PollyISL
|
||||
+ SHARED
|
||||
${ISL_FILES}
|
||||
)
|
||||
|
||||
--
|
||||
2.46.0
|
||||
|
||||
59
SOURCES/0001-22-polly-shared-libs.patch
Normal file
59
SOURCES/0001-22-polly-shared-libs.patch
Normal file
@ -0,0 +1,59 @@
|
||||
From daf5077c8ce848b39239879369679c9fea7041b1 Mon Sep 17 00:00:00 2001
|
||||
From: Konrad Kleine <kkleine@redhat.com>
|
||||
Date: Tue, 28 Jan 2025 08:34:09 +0000
|
||||
Subject: [PATCH] shared libs
|
||||
|
||||
---
|
||||
polly/cmake/polly_macros.cmake | 5 ++++-
|
||||
polly/lib/CMakeLists.txt | 1 +
|
||||
polly/lib/External/CMakeLists.txt | 1 +
|
||||
3 files changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/polly/cmake/polly_macros.cmake b/polly/cmake/polly_macros.cmake
|
||||
index 9bd7b0b0ea59..fc2c3a76901f 100644
|
||||
--- a/polly/cmake/polly_macros.cmake
|
||||
+++ b/polly/cmake/polly_macros.cmake
|
||||
@@ -1,5 +1,5 @@
|
||||
macro(add_polly_library name)
|
||||
- cmake_parse_arguments(ARG "" "" "" ${ARGN})
|
||||
+ cmake_parse_arguments(ARG "SHARED" "" "" ${ARGN})
|
||||
set(srcs ${ARG_UNPARSED_ARGUMENTS})
|
||||
if(MSVC_IDE OR XCODE)
|
||||
file( GLOB_RECURSE headers *.h *.td *.def)
|
||||
@@ -17,6 +17,9 @@ macro(add_polly_library name)
|
||||
else()
|
||||
set(libkind)
|
||||
endif()
|
||||
+ if (ARG_SHARED)
|
||||
+ set(libkind SHARED)
|
||||
+ endif()
|
||||
add_library( ${name} ${libkind} ${srcs} )
|
||||
set_target_properties(${name} PROPERTIES FOLDER "Polly/Libraries")
|
||||
|
||||
diff --git a/polly/lib/CMakeLists.txt b/polly/lib/CMakeLists.txt
|
||||
index 0ed673815ff3..e156dcb31655 100644
|
||||
--- a/polly/lib/CMakeLists.txt
|
||||
+++ b/polly/lib/CMakeLists.txt
|
||||
@@ -41,6 +41,7 @@ set(POLLY_COMPONENTS
|
||||
# the sources them to be recompiled for each of them.
|
||||
add_llvm_pass_plugin(Polly
|
||||
NO_MODULE
|
||||
+ SHARED
|
||||
SUBPROJECT Polly
|
||||
Analysis/DependenceInfo.cpp
|
||||
Analysis/ScopDetection.cpp
|
||||
diff --git a/polly/lib/External/CMakeLists.txt b/polly/lib/External/CMakeLists.txt
|
||||
index ab5cba93cdcf..fdfd06864bc7 100644
|
||||
--- a/polly/lib/External/CMakeLists.txt
|
||||
+++ b/polly/lib/External/CMakeLists.txt
|
||||
@@ -284,6 +284,7 @@ if (POLLY_BUNDLED_ISL)
|
||||
)
|
||||
|
||||
add_polly_library(PollyISL
|
||||
+ SHARED
|
||||
${ISL_FILES}
|
||||
)
|
||||
|
||||
--
|
||||
2.50.1
|
||||
|
||||
@ -0,0 +1,26 @@
|
||||
From ffc7d5ae2d79f98967943fabb2abfbc1b1e047fd Mon Sep 17 00:00:00 2001
|
||||
From: Douglas Yung <douglas.yung@sony.com>
|
||||
Date: Tue, 24 Jun 2025 04:08:34 +0000
|
||||
Subject: [PATCH] Add `REQUIRES: asserts` to test added in #145149 because it
|
||||
uses the `-debug-only=` flag.
|
||||
|
||||
This should fix the test failure when building without asserts.
|
||||
---
|
||||
llvm/test/CodeGen/PowerPC/pr141642.ll | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/llvm/test/CodeGen/PowerPC/pr141642.ll b/llvm/test/CodeGen/PowerPC/pr141642.ll
|
||||
index 38a706574786..61bda4dfaf53 100644
|
||||
--- a/llvm/test/CodeGen/PowerPC/pr141642.ll
|
||||
+++ b/llvm/test/CodeGen/PowerPC/pr141642.ll
|
||||
@@ -2,6 +2,7 @@
|
||||
; RUN: FileCheck %s
|
||||
; CHECK-NOT: lxvdsx
|
||||
; CHECK-NOT: LD_SPLAT
|
||||
+; REQUIRES: asserts
|
||||
|
||||
define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr {
|
||||
entry:
|
||||
--
|
||||
2.49.0
|
||||
|
||||
1354
SOURCES/0001-BPF-Support-Jump-Table-149715.patch
Normal file
1354
SOURCES/0001-BPF-Support-Jump-Table-149715.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,131 @@
|
||||
From dde30a47313bf52fef02bbcb1de931a8d725659f Mon Sep 17 00:00:00 2001
|
||||
From: Florian Hahn <flo@fhahn.com>
|
||||
Date: Fri, 6 Jun 2025 12:38:30 +0100
|
||||
Subject: [PATCH] [CGP] Bail out if (Base|Scaled)Reg does not dominate insert
|
||||
point. (#142949)
|
||||
|
||||
(Base|Scaled)Reg may not dominate the chosen insert point, if there are
|
||||
multiple uses of the address. Bail out if that's the case, otherwise we
|
||||
will generate invalid IR.
|
||||
|
||||
In some cases, we could probably adjust the insert point or hoist the
|
||||
(Base|Scaled)Reg.
|
||||
|
||||
Fixes https://github.com/llvm/llvm-project/issues/142830.
|
||||
|
||||
PR: https://github.com/llvm/llvm-project/pull/142949
|
||||
---
|
||||
llvm/lib/CodeGen/CodeGenPrepare.cpp | 13 +++-
|
||||
.../X86/sink-addrmode-reg-does-not-geps.ll | 76 +++++++++++++++++++
|
||||
2 files changed, 87 insertions(+), 2 deletions(-)
|
||||
create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll
|
||||
|
||||
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
|
||||
index 822ed6283117..32348a899683 100644
|
||||
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
|
||||
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
|
||||
@@ -5945,8 +5945,17 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
||||
// The current BB may be optimized multiple times, we can't guarantee the
|
||||
// reuse of Addr happens later, call findInsertPos to find an appropriate
|
||||
// insert position.
|
||||
- IRBuilder<> Builder(MemoryInst->getParent(),
|
||||
- findInsertPos(Addr, MemoryInst, SunkAddr));
|
||||
+ auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
|
||||
+
|
||||
+ // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
|
||||
+ if (!SunkAddr) {
|
||||
+ auto &DT = getDT(*MemoryInst->getFunction());
|
||||
+ if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
|
||||
+ (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
|
||||
+ return Modified;
|
||||
+ }
|
||||
+
|
||||
+ IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
|
||||
|
||||
if (SunkAddr) {
|
||||
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
|
||||
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll
|
||||
new file mode 100644
|
||||
index 000000000000..1640bafbd0bf
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrmode-reg-does-not-geps.ll
|
||||
@@ -0,0 +1,76 @@
|
||||
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
+; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' %s | FileCheck %s
|
||||
+
|
||||
+target triple = "x86_64-unknown-linux"
|
||||
+
|
||||
+declare i1 @cond(float)
|
||||
+
|
||||
+define void @scaled_reg_does_not_dominate_insert_point(ptr %src) {
|
||||
+; CHECK-LABEL: define void @scaled_reg_does_not_dominate_insert_point(
|
||||
+; CHECK-SAME: ptr [[SRC:%.*]]) {
|
||||
+; CHECK-NEXT: [[BB:.*]]:
|
||||
+; CHECK-NEXT: br label %[[LOOP:.*]]
|
||||
+; CHECK: [[LOOP]]:
|
||||
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
|
||||
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
|
||||
+; CHECK-NEXT: [[SUNKADDR2:%.*]] = mul i64 [[IV_NEXT]], 2
|
||||
+; CHECK-NEXT: [[SUNKADDR3:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[SUNKADDR2]]
|
||||
+; CHECK-NEXT: [[SUNKADDR4:%.*]] = getelementptr i8, ptr [[SUNKADDR3]], i64 6
|
||||
+; CHECK-NEXT: [[L_0:%.*]] = load float, ptr [[SUNKADDR4]], align 4
|
||||
+; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 2
|
||||
+; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[SUNKADDR]]
|
||||
+; CHECK-NEXT: [[L_1:%.*]] = load float, ptr [[SUNKADDR1]], align 4
|
||||
+; CHECK-NEXT: [[TMP0:%.*]] = call i1 @cond(float [[L_0]])
|
||||
+; CHECK-NEXT: [[C:%.*]] = call i1 @cond(float [[L_1]])
|
||||
+; CHECK-NEXT: br i1 [[C]], label %[[LOOP]], label %[[EXIT:.*]]
|
||||
+; CHECK: [[EXIT]]:
|
||||
+; CHECK-NEXT: ret void
|
||||
+;
|
||||
+bb:
|
||||
+ %gep.base = getelementptr i8, ptr %src, i64 8
|
||||
+ br label %loop
|
||||
+
|
||||
+loop:
|
||||
+ %iv = phi i64 [ 0, %bb ], [ %iv.next, %loop ]
|
||||
+ %iv.shl = shl i64 %iv, 1
|
||||
+ %gep.shl = getelementptr i8, ptr %gep.base, i64 %iv.shl
|
||||
+ %gep.sub = getelementptr i8, ptr %gep.shl, i64 -8
|
||||
+ %iv.next = add i64 %iv, 1
|
||||
+ %l.0 = load float, ptr %gep.shl, align 4
|
||||
+ %l.1 = load float, ptr %gep.sub, align 4
|
||||
+ call i1 @cond(float %l.0)
|
||||
+ %c = call i1 @cond(float %l.1)
|
||||
+ br i1 %c, label %loop, label %exit
|
||||
+
|
||||
+exit:
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define void @check_dt_after_modifying_cfg(ptr %dst, i64 %x, i8 %y, i8 %z) {
|
||||
+; CHECK-LABEL: define void @check_dt_after_modifying_cfg(
|
||||
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[X:%.*]], i8 [[Y:%.*]], i8 [[Z:%.*]]) {
|
||||
+; CHECK-NEXT: [[ENTRY:.*]]:
|
||||
+; CHECK-NEXT: [[OFFSET:%.*]] = lshr i64 [[X]], 2
|
||||
+; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i8 [[Z]]
|
||||
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[SEL_FROZEN]], 0
|
||||
+; CHECK-NEXT: br i1 [[CMP]], label %[[SELECT_END:.*]], label %[[SELECT_FALSE_SINK:.*]]
|
||||
+; CHECK: [[SELECT_FALSE_SINK]]:
|
||||
+; CHECK-NEXT: [[SMIN:%.*]] = tail call i8 @llvm.smin.i8(i8 [[Y]], i8 0)
|
||||
+; CHECK-NEXT: br label %[[SELECT_END]]
|
||||
+; CHECK: [[SELECT_END]]:
|
||||
+; CHECK-NEXT: [[SEL:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[SMIN]], %[[SELECT_FALSE_SINK]] ]
|
||||
+; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET]]
|
||||
+; CHECK-NEXT: store i8 [[SEL]], ptr [[SUNKADDR]], align 1
|
||||
+; CHECK-NEXT: ret void
|
||||
+;
|
||||
+entry:
|
||||
+ %offset = lshr i64 %x, 2
|
||||
+ %gep.dst = getelementptr i8, ptr %dst, i64 %offset
|
||||
+ %smin = tail call i8 @llvm.smin.i8(i8 %y, i8 0)
|
||||
+ %cmp = icmp slt i8 %z, 0
|
||||
+ %sel = select i1 %cmp, i8 0, i8 %smin
|
||||
+ store i8 %sel, ptr %gep.dst, align 1
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+declare i8 @llvm.smin.i8(i8, i8) #0
|
||||
--
|
||||
2.50.1
|
||||
|
||||
@ -0,0 +1,143 @@
|
||||
From c76137f1cfd5758f6889236d49a65f059e6432ff Mon Sep 17 00:00:00 2001
|
||||
From: weiguozhi <57237827+weiguozhi@users.noreply.github.com>
|
||||
Date: Thu, 15 May 2025 09:27:25 -0700
|
||||
Subject: [PATCH] [CodeGenPrepare] Make sure instruction get from SunkAddrs is
|
||||
before MemoryInst (#139303)
|
||||
|
||||
Function optimizeBlock may do optimizations on a block for multiple
|
||||
times. In the first iteration of the loop, MemoryInst1 may generate a
|
||||
sunk instruction and store it into SunkAddrs. In the second iteration of
|
||||
the loop, MemoryInst2 may use the same address and then it can reuse the
|
||||
sunk instruction stored in SunkAddrs, but MemoryInst2 may be before
|
||||
MemoryInst1 and the corresponding sunk instruction. In order to avoid
|
||||
use before def error, we need to find appropriate insert position for the
|
||||
sunk instruction.
|
||||
|
||||
Fixes #138208.
|
||||
|
||||
(cherry picked from commit 59c6d70ed8120b8864e5f796e2bf3de5518a0ef0)
|
||||
---
|
||||
llvm/lib/CodeGen/CodeGenPrepare.cpp | 41 ++++++++++++++---
|
||||
.../CodeGenPrepare/X86/sink-addr-reuse.ll | 44 +++++++++++++++++++
|
||||
2 files changed, 80 insertions(+), 5 deletions(-)
|
||||
create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
|
||||
|
||||
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
|
||||
index 088062afab17..f779f4b782ae 100644
|
||||
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
|
||||
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
|
||||
@@ -5728,6 +5728,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
|
||||
return false;
|
||||
}
|
||||
|
||||
+// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
|
||||
+// is the first instruction that will use Addr. So we need to find the first
|
||||
+// user of Addr in current BB.
|
||||
+static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst,
|
||||
+ Value *SunkAddr) {
|
||||
+ if (Addr->hasOneUse())
|
||||
+ return MemoryInst->getIterator();
|
||||
+
|
||||
+ // We already have a SunkAddr in current BB, but we may need to insert cast
|
||||
+ // instruction after it.
|
||||
+ if (SunkAddr) {
|
||||
+ if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
|
||||
+ return std::next(AddrInst->getIterator());
|
||||
+ }
|
||||
+
|
||||
+ // Find the first user of Addr in current BB.
|
||||
+ Instruction *Earliest = MemoryInst;
|
||||
+ for (User *U : Addr->users()) {
|
||||
+ Instruction *UserInst = dyn_cast<Instruction>(U);
|
||||
+ if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
|
||||
+ if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
|
||||
+ continue;
|
||||
+ if (UserInst->comesBefore(Earliest))
|
||||
+ Earliest = UserInst;
|
||||
+ }
|
||||
+ }
|
||||
+ return Earliest->getIterator();
|
||||
+}
|
||||
+
|
||||
/// Sink addressing mode computation immediate before MemoryInst if doing so
|
||||
/// can be done without increasing register pressure. The need for the
|
||||
/// register pressure constraint means this can end up being an all or nothing
|
||||
@@ -5852,11 +5881,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
||||
return Modified;
|
||||
}
|
||||
|
||||
- // Insert this computation right after this user. Since our caller is
|
||||
- // scanning from the top of the BB to the bottom, reuse of the expr are
|
||||
- // guaranteed to happen later.
|
||||
- IRBuilder<> Builder(MemoryInst);
|
||||
-
|
||||
// Now that we determined the addressing expression we want to use and know
|
||||
// that we have to sink it into this block. Check to see if we have already
|
||||
// done this for some other load/store instr in this block. If so, reuse
|
||||
@@ -5867,6 +5891,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
||||
|
||||
Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
|
||||
Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
|
||||
+
|
||||
+ // The current BB may be optimized multiple times, we can't guarantee the
|
||||
+ // reuse of Addr happens later, call findInsertPos to find an appropriate
|
||||
+ // insert position.
|
||||
+ IRBuilder<> Builder(MemoryInst->getParent(),
|
||||
+ findInsertPos(Addr, MemoryInst, SunkAddr));
|
||||
+
|
||||
if (SunkAddr) {
|
||||
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
|
||||
<< " for " << *MemoryInst << "\n");
|
||||
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
|
||||
new file mode 100644
|
||||
index 000000000000..019f31140655
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
|
||||
@@ -0,0 +1,44 @@
|
||||
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
+; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s
|
||||
+
|
||||
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
||||
+target triple = "x86_64-grtev4-linux-gnu"
|
||||
+
|
||||
+declare void @g(ptr)
|
||||
+
|
||||
+; %load and %load5 use the same address, %load5 is optimized first, %load is
|
||||
+; optimized later and reuse the same address computation instruction. We must
|
||||
+; make sure not to generate use before def error.
|
||||
+
|
||||
+define void @f(ptr %arg) {
|
||||
+; CHECK-LABEL: define void @f(
|
||||
+; CHECK-SAME: ptr [[ARG:%.*]]) {
|
||||
+; CHECK-NEXT: [[BB:.*:]]
|
||||
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
|
||||
+; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]])
|
||||
+; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
|
||||
+; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
|
||||
+; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56
|
||||
+; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8
|
||||
+; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
|
||||
+; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0)
|
||||
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
|
||||
+; CHECK-NEXT: ret void
|
||||
+;
|
||||
+bb:
|
||||
+ %getelementptr = getelementptr i8, ptr %arg, i64 -64
|
||||
+ %getelementptr1 = getelementptr i8, ptr %arg, i64 -56
|
||||
+ call void @g(ptr %getelementptr)
|
||||
+ br label %bb3
|
||||
+
|
||||
+bb3:
|
||||
+ %load = load ptr, ptr %getelementptr, align 8
|
||||
+ %load4 = load i32, ptr %getelementptr1, align 8
|
||||
+ %load5 = load ptr, ptr %getelementptr, align 8
|
||||
+ %add = add i32 1, 0
|
||||
+ %icmp = icmp eq i32 %add, 0
|
||||
+ br i1 %icmp, label %bb7, label %bb7
|
||||
+
|
||||
+bb7:
|
||||
+ ret void
|
||||
+}
|
||||
--
|
||||
2.49.0
|
||||
|
||||
@ -0,0 +1,137 @@
|
||||
From 98b82f90dfb7865ae4dbfcb5a83a9e817e7894a1 Mon Sep 17 00:00:00 2001
|
||||
From: Kevin Per <kevin.per@protonmail.com>
|
||||
Date: Thu, 18 Dec 2025 10:14:01 +0100
|
||||
Subject: [PATCH] [PowerPC]: Add check for cast when shufflevector (#172443)
|
||||
|
||||
The crash happens because the cast for `Mask =
|
||||
cast<ShuffleVectorSDNode>(Res)->getMask();` fails for node `t197: v16i8
|
||||
= vector_shuffle<16,17,18,19,4,5,6,7,8,9,10,11,u,u,u,u> t196, t196`.
|
||||
However, both `LHS` and `RHS` are the same node, so
|
||||
`DAG.getCommutedVectorShuffle` doesn't return a `ShuffleVectorSDNode`
|
||||
and crashes. The fix is to add a check before the cast is performed.
|
||||
|
||||
Closes https://github.com/llvm/llvm-project/issues/172265
|
||||
---
|
||||
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 4 +
|
||||
.../test/CodeGen/PowerPC/vec_shuffle_le_be.ll | 94 +++++++++++++++++++
|
||||
2 files changed, 98 insertions(+)
|
||||
create mode 100644 llvm/test/CodeGen/PowerPC/vec_shuffle_le_be.ll
|
||||
|
||||
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
|
||||
index 5b1d9f814806..21297b812968 100644
|
||||
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
|
||||
@@ -16886,6 +16886,10 @@ SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
|
||||
RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
|
||||
std::swap(LHS, RHS);
|
||||
Res = DAG.getCommutedVectorShuffle(*SVN);
|
||||
+
|
||||
+ if (!isa<ShuffleVectorSDNode>(Res))
|
||||
+ return Res;
|
||||
+
|
||||
Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
|
||||
}
|
||||
|
||||
diff --git a/llvm/test/CodeGen/PowerPC/vec_shuffle_le_be.ll b/llvm/test/CodeGen/PowerPC/vec_shuffle_le_be.ll
|
||||
new file mode 100644
|
||||
index 000000000000..24c1e54dd952
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/PowerPC/vec_shuffle_le_be.ll
|
||||
@@ -0,0 +1,94 @@
|
||||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck -check-prefix=CHECK-LE %s
|
||||
+; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck -check-prefix=CHECK-BE %s
|
||||
+
|
||||
+define <32 x i32> @issue_172265(<32 x i32> %BS_ARG_1, <3 x i32> %0) {
|
||||
+; CHECK-LABEL: issue_172265:
|
||||
+; CHECK: # %bb.0: # %entry
|
||||
+; CHECK-NEXT: addis 3, 2, .LCPI18_0@toc@ha
|
||||
+; CHECK-NEXT: vspltw 3, 10, 1
|
||||
+; CHECK-NEXT: addi 3, 3, .LCPI18_0@toc@l
|
||||
+; CHECK-NEXT: vmr 7, 3
|
||||
+; CHECK-NEXT: lvx 4, 0, 3
|
||||
+; CHECK-NEXT: addis 3, 2, .LCPI18_1@toc@ha
|
||||
+; CHECK-NEXT: addi 3, 3, .LCPI18_1@toc@l
|
||||
+; CHECK-NEXT: vmr 8, 3
|
||||
+; CHECK-NEXT: vmr 9, 3
|
||||
+; CHECK-NEXT: vperm 4, 3, 3, 4
|
||||
+; CHECK-NEXT: lvx 1, 0, 3
|
||||
+; CHECK-NEXT: addis 3, 2, .LCPI18_2@toc@ha
|
||||
+; CHECK-NEXT: addi 3, 3, .LCPI18_2@toc@l
|
||||
+; CHECK-NEXT: lvx 5, 0, 3
|
||||
+; CHECK-NEXT: addis 3, 2, .LCPI18_3@toc@ha
|
||||
+; CHECK-NEXT: addi 3, 3, .LCPI18_3@toc@l
|
||||
+; CHECK-NEXT: lvx 6, 0, 3
|
||||
+; CHECK-NEXT: addis 3, 2, .LCPI18_4@toc@ha
|
||||
+; CHECK-NEXT: addi 3, 3, .LCPI18_4@toc@l
|
||||
+; CHECK-NEXT: vperm 4, 2, 4, 1
|
||||
+; CHECK-NEXT: lvx 2, 0, 3
|
||||
+; CHECK-NEXT: vperm 0, 3, 3, 5
|
||||
+; CHECK-NEXT: vperm 5, 3, 3, 6
|
||||
+; CHECK-NEXT: vperm 6, 3, 3, 2
|
||||
+; CHECK-NEXT: vmr 2, 0
|
||||
+; CHECK-NEXT: blr
|
||||
+; CHECK-LE-LABEL: issue_172265:
|
||||
+; CHECK-LE: # %bb.0: # %entry
|
||||
+; CHECK-LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
+; CHECK-LE-NEXT: xxspltw 35, 42, 1
|
||||
+; CHECK-LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
+; CHECK-LE-NEXT: vmr 7, 3
|
||||
+; CHECK-LE-NEXT: vmr 8, 3
|
||||
+; CHECK-LE-NEXT: vmr 9, 3
|
||||
+; CHECK-LE-NEXT: lxvd2x 0, 0, 3
|
||||
+; CHECK-LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
|
||||
+; CHECK-LE-NEXT: addi 3, 3, .LCPI0_1@toc@l
|
||||
+; CHECK-LE-NEXT: lxvd2x 1, 0, 3
|
||||
+; CHECK-LE-NEXT: addis 3, 2, .LCPI0_2@toc@ha
|
||||
+; CHECK-LE-NEXT: addi 3, 3, .LCPI0_2@toc@l
|
||||
+; CHECK-LE-NEXT: lxvd2x 2, 0, 3
|
||||
+; CHECK-LE-NEXT: addis 3, 2, .LCPI0_3@toc@ha
|
||||
+; CHECK-LE-NEXT: addi 3, 3, .LCPI0_3@toc@l
|
||||
+; CHECK-LE-NEXT: xxswapd 36, 0
|
||||
+; CHECK-LE-NEXT: lxvd2x 0, 0, 3
|
||||
+; CHECK-LE-NEXT: vperm 4, 2, 3, 4
|
||||
+; CHECK-LE-NEXT: xxswapd 37, 1
|
||||
+; CHECK-LE-NEXT: vperm 2, 3, 3, 5
|
||||
+; CHECK-LE-NEXT: xxswapd 32, 2
|
||||
+; CHECK-LE-NEXT: vperm 5, 3, 3, 0
|
||||
+; CHECK-LE-NEXT: xxswapd 33, 0
|
||||
+; CHECK-LE-NEXT: vperm 6, 3, 3, 1
|
||||
+; CHECK-LE-NEXT: blr
|
||||
+;
|
||||
+; CHECK-BE-LABEL: issue_172265:
|
||||
+; CHECK-BE: # %bb.0: # %entry
|
||||
+; CHECK-BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
+; CHECK-BE-NEXT: vspltw 3, 10, 2
|
||||
+; CHECK-BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
+; CHECK-BE-NEXT: vmr 7, 3
|
||||
+; CHECK-BE-NEXT: lvx 4, 0, 3
|
||||
+; CHECK-BE-NEXT: addis 3, 2, .LCPI0_2@toc@ha
|
||||
+; CHECK-BE-NEXT: addi 3, 3, .LCPI0_2@toc@l
|
||||
+; CHECK-BE-NEXT: lvx 5, 0, 3
|
||||
+; CHECK-BE-NEXT: addis 3, 2, .LCPI0_3@toc@ha
|
||||
+; CHECK-BE-NEXT: addi 3, 3, .LCPI0_3@toc@l
|
||||
+; CHECK-BE-NEXT: vperm 0, 3, 3, 5
|
||||
+; CHECK-BE-NEXT: lvx 5, 0, 3
|
||||
+; CHECK-BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
|
||||
+; CHECK-BE-NEXT: addi 3, 3, .LCPI0_1@toc@l
|
||||
+; CHECK-BE-NEXT: lvx 1, 0, 3
|
||||
+; CHECK-BE-NEXT: addis 3, 2, .LCPI0_4@toc@ha
|
||||
+; CHECK-BE-NEXT: addi 3, 3, .LCPI0_4@toc@l
|
||||
+; CHECK-BE-NEXT: vperm 4, 3, 3, 4
|
||||
+; CHECK-BE-NEXT: vperm 4, 4, 2, 1
|
||||
+; CHECK-BE-NEXT: lvx 2, 0, 3
|
||||
+; CHECK-BE-NEXT: vperm 5, 3, 3, 5
|
||||
+; CHECK-BE-NEXT: vperm 6, 3, 3, 2
|
||||
+; CHECK-BE-NEXT: vmr 2, 0
|
||||
+; CHECK-BE-NEXT: vmr 8, 3
|
||||
+; CHECK-BE-NEXT: vmr 9, 3
|
||||
+; CHECK-BE-NEXT: blr
|
||||
+entry:
|
||||
+ %vecinit37 = shufflevector <3 x i32> %0, <3 x i32> zeroinitializer, <32 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
+ %shuffle56 = shufflevector <32 x i32> %vecinit37, <32 x i32> %BS_ARG_1, <32 x i32> <i32 4, i32 9, i32 3, i32 3, i32 4, i32 1, i32 1, i32 0, i32 16, i32 5, i32 5, i32 34, i32 3, i32 0, i32 8, i32 2, i32 8, i32 1, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 7, i32 5, i32 3, i32 6, i32 0, i32 3, i32 4, i32 7>
|
||||
+ ret <32 x i32> %shuffle56
|
||||
+}
|
||||
--
|
||||
2.52.0
|
||||
|
||||
@ -0,0 +1,67 @@
|
||||
From 735d721de451067c3a618b309703d0b8beb9cacc Mon Sep 17 00:00:00 2001
|
||||
From: Wael Yehia <wmyehia2001@yahoo.com>
|
||||
Date: Mon, 23 Jun 2025 13:22:33 -0400
|
||||
Subject: [PATCH] [PowerPC] Fix handling of undefs in the
|
||||
PPC::isSplatShuffleMask query (#145149)
|
||||
|
||||
Currently, the query assumes that a single undef byte implies the rest of
|
||||
the `EltSize - 1` bytes are undefs, but that's not always true.
|
||||
e.g. isSplatShuffleMask(
|
||||
<0,1,2,3,4,5,6,7,undef,undef,undef,undef,0,1,2,3>, 8) should return
|
||||
false.
|
||||
|
||||
---------
|
||||
|
||||
Co-authored-by: Wael Yehia <wyehia@ca.ibm.com>
|
||||
---
|
||||
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 13 +++++++++----
|
||||
llvm/test/CodeGen/PowerPC/pr141642.ll | 13 +++++++++++++
|
||||
2 files changed, 22 insertions(+), 4 deletions(-)
|
||||
create mode 100644 llvm/test/CodeGen/PowerPC/pr141642.ll
|
||||
|
||||
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
|
||||
index 421a808de667..88c6fe632d26 100644
|
||||
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
|
||||
@@ -2242,10 +2242,15 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
|
||||
return false;
|
||||
|
||||
for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
|
||||
- if (N->getMaskElt(i) < 0) continue;
|
||||
- for (unsigned j = 0; j != EltSize; ++j)
|
||||
- if (N->getMaskElt(i+j) != N->getMaskElt(j))
|
||||
- return false;
|
||||
+ // An UNDEF element is a sequence of UNDEF bytes.
|
||||
+ if (N->getMaskElt(i) < 0) {
|
||||
+ for (unsigned j = 1; j != EltSize; ++j)
|
||||
+ if (N->getMaskElt(i + j) >= 0)
|
||||
+ return false;
|
||||
+ } else
|
||||
+ for (unsigned j = 0; j != EltSize; ++j)
|
||||
+ if (N->getMaskElt(i + j) != N->getMaskElt(j))
|
||||
+ return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
diff --git a/llvm/test/CodeGen/PowerPC/pr141642.ll b/llvm/test/CodeGen/PowerPC/pr141642.ll
|
||||
new file mode 100644
|
||||
index 000000000000..38a706574786
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/PowerPC/pr141642.ll
|
||||
@@ -0,0 +1,13 @@
|
||||
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O0 -debug-only=selectiondag -o - < %s 2>&1 | \
|
||||
+; RUN: FileCheck %s
|
||||
+; CHECK-NOT: lxvdsx
|
||||
+; CHECK-NOT: LD_SPLAT
|
||||
+
|
||||
+define weak_odr dso_local void @unpack(ptr noalias noundef %packed_in) local_unnamed_addr {
|
||||
+entry:
|
||||
+ %ld = load <2 x i32>, ptr %packed_in, align 2
|
||||
+ %shuf = shufflevector <2 x i32> %ld, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 0>
|
||||
+ %ie = insertelement <4 x i32> %shuf, i32 7, i32 2
|
||||
+ store <4 x i32> %shuf, ptr %packed_in, align 2
|
||||
+ ret void
|
||||
+}
|
||||
--
|
||||
2.49.0
|
||||
|
||||
@ -0,0 +1,191 @@
|
||||
From fc12fc635b96e9fa521a33eb31336c539eed1918 Mon Sep 17 00:00:00 2001
|
||||
From: sujianIBM <98488060+sujianIBM@users.noreply.github.com>
|
||||
Date: Thu, 31 Jul 2025 13:18:23 -0400
|
||||
Subject: [PATCH] [SystemZ] Fix code in widening vector multiplication
|
||||
(#150836)
|
||||
|
||||
Commit cdc7864 has an error which would wrongly fold widening
|
||||
multiplications into an even/odd widening operation.
|
||||
This PR fixes it and adds tests to check scenarios which should not be
|
||||
folded into an even/odd widening operation are actually not.
|
||||
---
|
||||
.../Target/SystemZ/SystemZISelLowering.cpp | 2 +-
|
||||
llvm/test/CodeGen/SystemZ/vec-mul-07.ll | 114 ++++++++++++++++++
|
||||
2 files changed, 115 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
|
||||
index e30d7235b81b..fb0a47dc9dc4 100644
|
||||
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
|
||||
@@ -9044,7 +9044,7 @@ static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG,
|
||||
if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
|
||||
CanUseEven = false;
|
||||
if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
|
||||
- CanUseEven = true;
|
||||
+ CanUseOdd = false;
|
||||
}
|
||||
Op = Op.getOperand(0);
|
||||
if (CanUseEven)
|
||||
diff --git a/llvm/test/CodeGen/SystemZ/vec-mul-07.ll b/llvm/test/CodeGen/SystemZ/vec-mul-07.ll
|
||||
index 73c7a8dec5df..583561625cfc 100644
|
||||
--- a/llvm/test/CodeGen/SystemZ/vec-mul-07.ll
|
||||
+++ b/llvm/test/CodeGen/SystemZ/vec-mul-07.ll
|
||||
@@ -3,6 +3,23 @@
|
||||
;
|
||||
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
|
||||
+; Test a v16i8 -> v8i16 unsigned widening multiplication
|
||||
+; which is not folded into an even/odd widening operation.
|
||||
+define <8 x i16> @f1_not(<16 x i8> %val1, <16 x i8> %val2) {
|
||||
+; CHECK-LABEL: f1_not:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: vuplhb %v0, %v24
|
||||
+; CHECK-NEXT: vuplhb %v1, %v26
|
||||
+; CHECK-NEXT: vmlhw %v24, %v0, %v1
|
||||
+; CHECK-NEXT: br %r14
|
||||
+ %shuf1 = shufflevector <16 x i8> %val1, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
+ %zext1 = zext <8 x i8> %shuf1 to <8 x i16>
|
||||
+ %shuf2 = shufflevector <16 x i8> %val2, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
+ %zext2 = zext <8 x i8> %shuf2 to <8 x i16>
|
||||
+ %ret = mul <8 x i16> %zext1, %zext2
|
||||
+ ret <8 x i16> %ret
|
||||
+}
|
||||
+
|
||||
; Test a v16i8 (even) -> v8i16 unsigned widening multiplication.
|
||||
define <8 x i16> @f1(<16 x i8> %val1, <16 x i8> %val2) {
|
||||
; CHECK-LABEL: f1:
|
||||
@@ -31,6 +48,23 @@ define <8 x i16> @f2(<16 x i8> %val1, <16 x i8> %val2) {
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
+; Test a v16i8 -> v8i16 signed widening multiplication
|
||||
+; which is not folded into an even/odd widening operation.
|
||||
+define <8 x i16> @f3_not(<16 x i8> %val1, <16 x i8> %val2) {
|
||||
+; CHECK-LABEL: f3_not:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: vuphb %v0, %v26
|
||||
+; CHECK-NEXT: vuphb %v1, %v24
|
||||
+; CHECK-NEXT: vmlhw %v24, %v1, %v0
|
||||
+; CHECK-NEXT: br %r14
|
||||
+ %shuf1 = shufflevector <16 x i8> %val1, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
+ %sext1 = sext <8 x i8> %shuf1 to <8 x i16>
|
||||
+ %shuf2 = shufflevector <16 x i8> %val2, <16 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
+ %sext2 = sext <8 x i8> %shuf2 to <8 x i16>
|
||||
+ %ret = mul <8 x i16> %sext1, %sext2
|
||||
+ ret <8 x i16> %ret
|
||||
+}
|
||||
+
|
||||
; Test a v16i8 (even) -> v8i16 signed widening multiplication.
|
||||
define <8 x i16> @f3(<16 x i8> %val1, <16 x i8> %val2) {
|
||||
; CHECK-LABEL: f3:
|
||||
@@ -59,6 +93,23 @@ define <8 x i16> @f4(<16 x i8> %val1, <16 x i8> %val2) {
|
||||
ret <8 x i16> %ret
|
||||
}
|
||||
|
||||
+; Test a v8i16 -> v4i32 unsigned widening multiplication
|
||||
+; which is not folded into an even/odd widening operation.
|
||||
+define <4 x i32> @f5_not(<8 x i16> %val1, <8 x i16> %val2) {
|
||||
+; CHECK-LABEL: f5_not:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: vuplhh %v0, %v24
|
||||
+; CHECK-NEXT: vuplhh %v1, %v26
|
||||
+; CHECK-NEXT: vmlf %v24, %v0, %v1
|
||||
+; CHECK-NEXT: br %r14
|
||||
+ %shuf1 = shufflevector <8 x i16> %val1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
+ %zext1 = zext <4 x i16> %shuf1 to <4 x i32>
|
||||
+ %shuf2 = shufflevector <8 x i16> %val2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
+ %zext2 = zext <4 x i16> %shuf2 to <4 x i32>
|
||||
+ %ret = mul <4 x i32> %zext1, %zext2
|
||||
+ ret <4 x i32> %ret
|
||||
+}
|
||||
+
|
||||
; Test a v8i16 (even) -> v4i32 unsigned widening multiplication.
|
||||
define <4 x i32> @f5(<8 x i16> %val1, <8 x i16> %val2) {
|
||||
; CHECK-LABEL: f5:
|
||||
@@ -87,6 +138,23 @@ define <4 x i32> @f6(<8 x i16> %val1, <8 x i16> %val2) {
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
+; Test a v8i16 -> v4i32 signed widening multiplication
|
||||
+; which is not folded into an even/odd widening operation.
|
||||
+define <4 x i32> @f7_not(<8 x i16> %val1, <8 x i16> %val2) {
|
||||
+; CHECK-LABEL: f7_not:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: vuphh %v0, %v26
|
||||
+; CHECK-NEXT: vuphh %v1, %v24
|
||||
+; CHECK-NEXT: vmlf %v24, %v1, %v0
|
||||
+; CHECK-NEXT: br %r14
|
||||
+ %shuf1 = shufflevector <8 x i16> %val1, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
+ %sext1 = sext <4 x i16> %shuf1 to <4 x i32>
|
||||
+ %shuf2 = shufflevector <8 x i16> %val2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
+ %sext2 = sext <4 x i16> %shuf2 to <4 x i32>
|
||||
+ %ret = mul <4 x i32> %sext1, %sext2
|
||||
+ ret <4 x i32> %ret
|
||||
+}
|
||||
+
|
||||
; Test a v8i16 (even) -> v4i32 signed widening multiplication.
|
||||
define <4 x i32> @f7(<8 x i16> %val1, <8 x i16> %val2) {
|
||||
; CHECK-LABEL: f7:
|
||||
@@ -115,6 +183,29 @@ define <4 x i32> @f8(<8 x i16> %val1, <8 x i16> %val2) {
|
||||
ret <4 x i32> %ret
|
||||
}
|
||||
|
||||
+; Test a v4i32 -> v2i64 unsigned widening multiplication
|
||||
+; which is not folded into an even/odd widening operation.
|
||||
+define <2 x i64> @f9_not(<4 x i32> %val1, <4 x i32> %val2) {
|
||||
+; CHECK-LABEL: f9_not:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: vuplhf %v0, %v24
|
||||
+; CHECK-NEXT: vuplhf %v1, %v26
|
||||
+; CHECK-NEXT: vlgvg %r0, %v1, 1
|
||||
+; CHECK-NEXT: vlgvg %r1, %v0, 1
|
||||
+; CHECK-NEXT: msgr %r1, %r0
|
||||
+; CHECK-NEXT: vlgvg %r0, %v1, 0
|
||||
+; CHECK-NEXT: vlgvg %r2, %v0, 0
|
||||
+; CHECK-NEXT: msgr %r2, %r0
|
||||
+; CHECK-NEXT: vlvgp %v24, %r2, %r1
|
||||
+; CHECK-NEXT: br %r14
|
||||
+ %shuf1 = shufflevector <4 x i32> %val1, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
|
||||
+ %zext1 = zext <2 x i32> %shuf1 to <2 x i64>
|
||||
+ %shuf2 = shufflevector <4 x i32> %val2, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
|
||||
+ %zext2 = zext <2 x i32> %shuf2 to <2 x i64>
|
||||
+ %ret = mul <2 x i64> %zext1, %zext2
|
||||
+ ret <2 x i64> %ret
|
||||
+}
|
||||
+
|
||||
; Test a v4i32 (even) -> v2i64 unsigned widening multiplication.
|
||||
define <2 x i64> @f9(<4 x i32> %val1, <4 x i32> %val2) {
|
||||
; CHECK-LABEL: f9:
|
||||
@@ -143,6 +234,29 @@ define <2 x i64> @f10(<4 x i32> %val1, <4 x i32> %val2) {
|
||||
ret <2 x i64> %ret
|
||||
}
|
||||
|
||||
+; Test a v4i32 -> v2i64 signed widening multiplication
|
||||
+; which is not folded into an even/odd widening operation.
|
||||
+define <2 x i64> @f11_not(<4 x i32> %val1, <4 x i32> %val2) {
|
||||
+; CHECK-LABEL: f11_not:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: vuphf %v0, %v24
|
||||
+; CHECK-NEXT: vuphf %v1, %v26
|
||||
+; CHECK-NEXT: vlgvg %r0, %v1, 1
|
||||
+; CHECK-NEXT: vlgvg %r1, %v0, 1
|
||||
+; CHECK-NEXT: msgr %r1, %r0
|
||||
+; CHECK-NEXT: vlgvg %r0, %v1, 0
|
||||
+; CHECK-NEXT: vlgvg %r2, %v0, 0
|
||||
+; CHECK-NEXT: msgr %r2, %r0
|
||||
+; CHECK-NEXT: vlvgp %v24, %r2, %r1
|
||||
+; CHECK-NEXT: br %r14
|
||||
+ %shuf1 = shufflevector <4 x i32> %val1, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
|
||||
+ %sext1 = sext <2 x i32> %shuf1 to <2 x i64>
|
||||
+ %shuf2 = shufflevector <4 x i32> %val2, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
|
||||
+ %sext2 = sext <2 x i32> %shuf2 to <2 x i64>
|
||||
+ %ret = mul <2 x i64> %sext1, %sext2
|
||||
+ ret <2 x i64> %ret
|
||||
+}
|
||||
+
|
||||
; Test a v4i32 (even) -> v2i64 signed widening multiplication.
|
||||
define <2 x i64> @f11(<4 x i32> %val1, <4 x i32> %val2) {
|
||||
; CHECK-LABEL: f11:
|
||||
--
|
||||
2.52.0
|
||||
|
||||
@ -0,0 +1,27 @@
|
||||
From f028fc042ef2875a13c6abf3828626a313e4a8e6 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Stellard <tstellar@redhat.com>
|
||||
Date: Fri, 1 Aug 2025 15:38:22 +0000
|
||||
Subject: [PATCH] clang: Add a hack to fix the offload build with the
|
||||
mtls-dialect option
|
||||
|
||||
---
|
||||
clang/lib/Driver/ToolChains/CommonArgs.cpp | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
|
||||
index 097d186ad8ea..0dc9e60f8428 100644
|
||||
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
|
||||
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
|
||||
@@ -920,6 +920,9 @@ bool tools::isTLSDESCEnabled(const ToolChain &TC,
|
||||
} else if (Triple.isX86()) {
|
||||
SupportedArgument = V == "gnu" || V == "gnu2";
|
||||
EnableTLSDESC = V == "gnu2";
|
||||
+ } else if( Triple.isGPU()) {
|
||||
+ // HACK To fix the offload build.
|
||||
+ return false;
|
||||
} else {
|
||||
Unsupported = true;
|
||||
}
|
||||
--
|
||||
2.49.0
|
||||
|
||||
@ -0,0 +1,39 @@
|
||||
From 06774eb8a7dc0bc36b59e53310c7f5b5d89f6c29 Mon Sep 17 00:00:00 2001
|
||||
From: Nikita Popov <npopov@redhat.com>
|
||||
Date: Tue, 28 Jan 2025 12:31:49 +0100
|
||||
Subject: [PATCH] [cmake] Resolve symlink when finding install prefix
|
||||
|
||||
When determining the install prefix in LLVMConfig.cmake etc resolve
|
||||
symlinks in CMAKE_CURRENT_LIST_FILE first. The motivation for this
|
||||
is to support symlinks like `/usr/lib64/cmake/llvm` to
|
||||
`/usr/lib64/llvm19/lib/cmake/llvm`. This only works correctly if
|
||||
the paths are relative to the resolved symlink.
|
||||
|
||||
It's worth noting that this *mostly* already works out of the box,
|
||||
because cmake automatically does the symlink resolution when the
|
||||
library is found via CMAKE_PREFIX_PATH. It just doesn't happen
|
||||
when it's found via the default prefix path.
|
||||
---
|
||||
cmake/Modules/FindPrefixFromConfig.cmake | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/cmake/Modules/FindPrefixFromConfig.cmake b/cmake/Modules/FindPrefixFromConfig.cmake
|
||||
index 22211e4b72f2..3daff607ff84 100644
|
||||
--- a/cmake/Modules/FindPrefixFromConfig.cmake
|
||||
+++ b/cmake/Modules/FindPrefixFromConfig.cmake
|
||||
@@ -39,10 +39,10 @@ function(find_prefix_from_config out_var prefix_var path_to_leave)
|
||||
# install prefix, and avoid hard-coding any absolute paths.
|
||||
set(config_code
|
||||
"# Compute the installation prefix from this LLVMConfig.cmake file location."
|
||||
- "get_filename_component(${prefix_var} \"\${CMAKE_CURRENT_LIST_FILE}\" PATH)")
|
||||
+ "get_filename_component(${prefix_var} \"\${CMAKE_CURRENT_LIST_FILE}\" REALPATH)")
|
||||
# Construct the proper number of get_filename_component(... PATH)
|
||||
# calls to compute the installation prefix.
|
||||
- string(REGEX REPLACE "/" ";" _count "${path_to_leave}")
|
||||
+ string(REGEX REPLACE "/" ";" _count "${path_to_leave}/plus_one")
|
||||
foreach(p ${_count})
|
||||
list(APPEND config_code
|
||||
"get_filename_component(${prefix_var} \"\${${prefix_var}}\" PATH)")
|
||||
--
|
||||
2.48.1
|
||||
|
||||
@ -0,0 +1,29 @@
|
||||
From 4fbbdb4f6b95158b87e1b072b3a246722ccf3b7d Mon Sep 17 00:00:00 2001
|
||||
From: Nikita Popov <npopov@redhat.com>
|
||||
Date: Fri, 19 Dec 2025 17:02:24 +0100
|
||||
Subject: [PATCH] [lld] Adjust compressed-debug-level test for s390x with
|
||||
DFLTCC (#172972)
|
||||
|
||||
After enabling DFLTCC in zlib-ng for s390x this test starts failing,
|
||||
because slightly better compression is produced at level 1. Add 1c as a
|
||||
permissible output.
|
||||
---
|
||||
lld/test/ELF/compressed-debug-level.test | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/lld/test/ELF/compressed-debug-level.test b/lld/test/ELF/compressed-debug-level.test
|
||||
index 5a4d37e31eca..7d64298e518f 100644
|
||||
--- a/lld/test/ELF/compressed-debug-level.test
|
||||
+++ b/lld/test/ELF/compressed-debug-level.test
|
||||
@@ -18,7 +18,7 @@
|
||||
# RUN: llvm-readelf --sections %t.6 | FileCheck -check-prefixes=HEADER,LEVEL6 %s
|
||||
|
||||
# HEADER: [Nr] Name Type Address Off Size
|
||||
-# LEVEL1: [ 1] .debug_info PROGBITS 00000000 000094 0000{{1[def]|21}}
|
||||
+# LEVEL1: [ 1] .debug_info PROGBITS 00000000 000094 0000{{1[cdef]|21}}
|
||||
# LEVEL6: [ 1] .debug_info PROGBITS 00000000 000094 00001{{[abc]}}
|
||||
|
||||
## A little arbitrary debug section which has a different size after
|
||||
--
|
||||
2.50.1
|
||||
|
||||
@ -1,62 +0,0 @@
|
||||
From b2edeb58b8cb3268acee425cd52b406eb60a8095 Mon Sep 17 00:00:00 2001
|
||||
From: Nikita Popov <npopov@redhat.com>
|
||||
Date: Wed, 9 Oct 2024 11:29:30 +0200
|
||||
Subject: [PATCH] [openmp] Add option to disable tsan tests (#111548)
|
||||
|
||||
This adds a OPENMP_TEST_ENABLE_TSAN option that allows to override
|
||||
whether tests using tsan will be enabled. The option defaults to the
|
||||
existing auto-detection.
|
||||
|
||||
The background here is
|
||||
https://github.com/llvm/llvm-project/issues/111492, where we have some
|
||||
systems where tsan doesn't work, but we do still want to build it and
|
||||
run tests that don't use tsan.
|
||||
---
|
||||
openmp/cmake/OpenMPTesting.cmake | 3 +++
|
||||
openmp/tools/archer/tests/CMakeLists.txt | 2 +-
|
||||
openmp/tools/archer/tests/lit.site.cfg.in | 2 +-
|
||||
3 files changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/openmp/cmake/OpenMPTesting.cmake b/openmp/cmake/OpenMPTesting.cmake
|
||||
index c67ad8b1cbd9..14cc5c67d84c 100644
|
||||
--- a/openmp/cmake/OpenMPTesting.cmake
|
||||
+++ b/openmp/cmake/OpenMPTesting.cmake
|
||||
@@ -163,6 +163,9 @@ else()
|
||||
set(OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS 1)
|
||||
endif()
|
||||
|
||||
+set(OPENMP_TEST_ENABLE_TSAN "${OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS}" CACHE BOOL
|
||||
+ "Whether to enable tests using tsan")
|
||||
+
|
||||
# Function to set compiler features for use in lit.
|
||||
function(update_test_compiler_features)
|
||||
set(FEATURES "[")
|
||||
diff --git a/openmp/tools/archer/tests/CMakeLists.txt b/openmp/tools/archer/tests/CMakeLists.txt
|
||||
index 5de91148fa4b..412c7d63725e 100644
|
||||
--- a/openmp/tools/archer/tests/CMakeLists.txt
|
||||
+++ b/openmp/tools/archer/tests/CMakeLists.txt
|
||||
@@ -28,7 +28,7 @@ macro(pythonize_bool var)
|
||||
endmacro()
|
||||
|
||||
pythonize_bool(LIBARCHER_HAVE_LIBATOMIC)
|
||||
-pythonize_bool(OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS)
|
||||
+pythonize_bool(OPENMP_TEST_ENABLE_TSAN)
|
||||
|
||||
set(ARCHER_TSAN_TEST_DEPENDENCE "")
|
||||
if(TARGET tsan)
|
||||
diff --git a/openmp/tools/archer/tests/lit.site.cfg.in b/openmp/tools/archer/tests/lit.site.cfg.in
|
||||
index 55edfde9738e..ddcb7b8bc3a5 100644
|
||||
--- a/openmp/tools/archer/tests/lit.site.cfg.in
|
||||
+++ b/openmp/tools/archer/tests/lit.site.cfg.in
|
||||
@@ -12,7 +12,7 @@ config.omp_library_dir = "@LIBOMP_LIBRARY_DIR@"
|
||||
config.omp_header_dir = "@LIBOMP_INCLUDE_DIR@"
|
||||
config.operating_system = "@CMAKE_SYSTEM_NAME@"
|
||||
config.has_libatomic = @LIBARCHER_HAVE_LIBATOMIC@
|
||||
-config.has_tsan = @OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS@
|
||||
+config.has_tsan = @OPENMP_TEST_ENABLE_TSAN@
|
||||
|
||||
config.test_archer_flags = "@LIBARCHER_TEST_FLAGS@"
|
||||
config.libarcher_obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
|
||||
--
|
||||
2.46.0
|
||||
|
||||
@ -1,205 +0,0 @@
|
||||
From 5fb4d7f6079a76b2907ccc8c53c7c509c30a3dca Mon Sep 17 00:00:00 2001
|
||||
From: Nikita Popov <npopov@redhat.com>
|
||||
Date: Thu, 10 Oct 2024 12:47:33 +0000
|
||||
Subject: [PATCH] [openmp] Use core_siblings_list if physical_package_id not
|
||||
available
|
||||
|
||||
On powerpc, physical_package_id may not be available. Currently,
|
||||
this causes openmp to fall back to flat topology and various
|
||||
affinity tests fail.
|
||||
|
||||
Fix this by parsing core_siblings_list to deterimine which cpus
|
||||
belong to the same socket. This matches what the testing code
|
||||
does. The code to parse the CPU list format thankfully already
|
||||
exists.
|
||||
|
||||
Fixes https://github.com/llvm/llvm-project/issues/111809.
|
||||
---
|
||||
openmp/runtime/src/kmp_affinity.cpp | 100 +++++++++++++------
|
||||
openmp/runtime/test/affinity/kmp-hw-subset.c | 2 +-
|
||||
2 files changed, 72 insertions(+), 30 deletions(-)
|
||||
|
||||
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
|
||||
index cf5cad04eb57..c3d5ecf1345e 100644
|
||||
--- a/openmp/runtime/src/kmp_affinity.cpp
|
||||
+++ b/openmp/runtime/src/kmp_affinity.cpp
|
||||
@@ -1589,15 +1589,13 @@ kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
|
||||
return buf;
|
||||
}
|
||||
|
||||
-// Return (possibly empty) affinity mask representing the offline CPUs
|
||||
-// Caller must free the mask
|
||||
-kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
|
||||
- kmp_affin_mask_t *offline;
|
||||
- KMP_CPU_ALLOC(offline);
|
||||
- KMP_CPU_ZERO(offline);
|
||||
+static kmp_affin_mask_t *__kmp_parse_cpu_list(const char *path) {
|
||||
+ kmp_affin_mask_t *mask;
|
||||
+ KMP_CPU_ALLOC(mask);
|
||||
+ KMP_CPU_ZERO(mask);
|
||||
#if KMP_OS_LINUX
|
||||
int n, begin_cpu, end_cpu;
|
||||
- kmp_safe_raii_file_t offline_file;
|
||||
+ kmp_safe_raii_file_t file;
|
||||
auto skip_ws = [](FILE *f) {
|
||||
int c;
|
||||
do {
|
||||
@@ -1606,29 +1604,29 @@ kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
|
||||
if (c != EOF)
|
||||
ungetc(c, f);
|
||||
};
|
||||
- // File contains CSV of integer ranges representing the offline CPUs
|
||||
+ // File contains CSV of integer ranges representing the CPUs
|
||||
// e.g., 1,2,4-7,9,11-15
|
||||
- int status = offline_file.try_open("/sys/devices/system/cpu/offline", "r");
|
||||
+ int status = file.try_open(path, "r");
|
||||
if (status != 0)
|
||||
- return offline;
|
||||
- while (!feof(offline_file)) {
|
||||
- skip_ws(offline_file);
|
||||
- n = fscanf(offline_file, "%d", &begin_cpu);
|
||||
+ return mask;
|
||||
+ while (!feof(file)) {
|
||||
+ skip_ws(file);
|
||||
+ n = fscanf(file, "%d", &begin_cpu);
|
||||
if (n != 1)
|
||||
break;
|
||||
- skip_ws(offline_file);
|
||||
- int c = fgetc(offline_file);
|
||||
+ skip_ws(file);
|
||||
+ int c = fgetc(file);
|
||||
if (c == EOF || c == ',') {
|
||||
// Just single CPU
|
||||
end_cpu = begin_cpu;
|
||||
} else if (c == '-') {
|
||||
// Range of CPUs
|
||||
- skip_ws(offline_file);
|
||||
- n = fscanf(offline_file, "%d", &end_cpu);
|
||||
+ skip_ws(file);
|
||||
+ n = fscanf(file, "%d", &end_cpu);
|
||||
if (n != 1)
|
||||
break;
|
||||
- skip_ws(offline_file);
|
||||
- c = fgetc(offline_file); // skip ','
|
||||
+ skip_ws(file);
|
||||
+ c = fgetc(file); // skip ','
|
||||
} else {
|
||||
// Syntax problem
|
||||
break;
|
||||
@@ -1638,13 +1636,19 @@ kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
|
||||
end_cpu >= __kmp_xproc || begin_cpu > end_cpu) {
|
||||
continue;
|
||||
}
|
||||
- // Insert [begin_cpu, end_cpu] into offline mask
|
||||
+ // Insert [begin_cpu, end_cpu] into mask
|
||||
for (int cpu = begin_cpu; cpu <= end_cpu; ++cpu) {
|
||||
- KMP_CPU_SET(cpu, offline);
|
||||
+ KMP_CPU_SET(cpu, mask);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
- return offline;
|
||||
+ return mask;
|
||||
+}
|
||||
+
|
||||
+// Return (possibly empty) affinity mask representing the offline CPUs
|
||||
+// Caller must free the mask
|
||||
+kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
|
||||
+ return __kmp_parse_cpu_list("/sys/devices/system/cpu/offline");
|
||||
}
|
||||
|
||||
// Return the number of available procs
|
||||
@@ -3175,6 +3179,37 @@ static inline const char *__kmp_cpuinfo_get_envvar() {
|
||||
return envvar;
|
||||
}
|
||||
|
||||
+static bool __kmp_package_id_from_core_siblings_list(unsigned **threadInfo,
|
||||
+ unsigned num_avail,
|
||||
+ unsigned idx) {
|
||||
+ if (!KMP_AFFINITY_CAPABLE())
|
||||
+ return false;
|
||||
+
|
||||
+ char path[256];
|
||||
+ KMP_SNPRINTF(path, sizeof(path),
|
||||
+ "/sys/devices/system/cpu/cpu%u/topology/core_siblings_list",
|
||||
+ threadInfo[idx][osIdIndex]);
|
||||
+ kmp_affin_mask_t *siblings = __kmp_parse_cpu_list(path);
|
||||
+ for (unsigned i = 0; i < num_avail; ++i) {
|
||||
+ unsigned cpu_id = threadInfo[i][osIdIndex];
|
||||
+ KMP_ASSERT(cpu_id < __kmp_affin_mask_size * CHAR_BIT);
|
||||
+ if (!KMP_CPU_ISSET(cpu_id, siblings))
|
||||
+ continue;
|
||||
+ if (threadInfo[i][pkgIdIndex] == UINT_MAX) {
|
||||
+ // Arbitrarily pick the first index we encounter, it only matters that
|
||||
+ // the value is the same for all siblings.
|
||||
+ threadInfo[i][pkgIdIndex] = idx;
|
||||
+ } else if (threadInfo[i][pkgIdIndex] != idx) {
|
||||
+ // Contradictory sibling lists.
|
||||
+ KMP_CPU_FREE(siblings);
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+ KMP_ASSERT(threadInfo[idx][pkgIdIndex] != UINT_MAX);
|
||||
+ KMP_CPU_FREE(siblings);
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
|
||||
// affinity map. On AIX, the map is obtained through system SRAD (Scheduler
|
||||
// Resource Allocation Domain).
|
||||
@@ -3550,18 +3585,13 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
|
||||
return false;
|
||||
}
|
||||
|
||||
- // Check for missing fields. The osId field must be there, and we
|
||||
- // currently require that the physical id field is specified, also.
|
||||
+ // Check for missing fields. The osId field must be there. The physical
|
||||
+ // id field will be checked later.
|
||||
if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
|
||||
CLEANUP_THREAD_INFO;
|
||||
*msg_id = kmp_i18n_str_MissingProcField;
|
||||
return false;
|
||||
}
|
||||
- if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
|
||||
- CLEANUP_THREAD_INFO;
|
||||
- *msg_id = kmp_i18n_str_MissingPhysicalIDField;
|
||||
- return false;
|
||||
- }
|
||||
|
||||
// Skip this proc if it is not included in the machine model.
|
||||
if (KMP_AFFINITY_CAPABLE() &&
|
||||
@@ -3591,6 +3621,18 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
|
||||
}
|
||||
*line = 0;
|
||||
|
||||
+ // At least on powerpc, Linux may return -1 for physical_package_id. Try
|
||||
+ // to reconstruct topology from core_siblings_list in that case.
|
||||
+ for (i = 0; i < num_avail; ++i) {
|
||||
+ if (threadInfo[i][pkgIdIndex] == UINT_MAX) {
|
||||
+ if (!__kmp_package_id_from_core_siblings_list(threadInfo, num_avail, i)) {
|
||||
+ CLEANUP_THREAD_INFO;
|
||||
+ *msg_id = kmp_i18n_str_MissingPhysicalIDField;
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
#if KMP_MIC && REDUCE_TEAM_SIZE
|
||||
unsigned teamSize = 0;
|
||||
#endif // KMP_MIC && REDUCE_TEAM_SIZE
|
||||
diff --git a/openmp/runtime/test/affinity/kmp-hw-subset.c b/openmp/runtime/test/affinity/kmp-hw-subset.c
|
||||
index 606fcdfbada9..0b49969bd3b1 100644
|
||||
--- a/openmp/runtime/test/affinity/kmp-hw-subset.c
|
||||
+++ b/openmp/runtime/test/affinity/kmp-hw-subset.c
|
||||
@@ -25,7 +25,7 @@ static int compare_hw_subset_places(const place_list_t *openmp_places,
|
||||
expected_per_place = nthreads_per_core;
|
||||
} else {
|
||||
expected_total = nsockets;
|
||||
- expected_per_place = ncores_per_socket;
|
||||
+ expected_per_place = ncores_per_socket * nthreads_per_core;
|
||||
}
|
||||
if (openmp_places->num_places != expected_total) {
|
||||
fprintf(stderr, "error: KMP_HW_SUBSET did not half each resource layer!\n");
|
||||
--
|
||||
2.47.0
|
||||
|
||||
@ -1,86 +0,0 @@
|
||||
From ccc2b792e57d632bc887b226a4e7f0a8189eab8b Mon Sep 17 00:00:00 2001
|
||||
From: Josh Stone <jistone@redhat.com>
|
||||
Date: Mon, 4 Nov 2024 16:37:49 -0800
|
||||
Subject: [PATCH] [profile] Use base+vaddr for `__llvm_write_binary_ids` note
|
||||
pointers
|
||||
|
||||
This function is always examining its own ELF headers in memory, but it
|
||||
was trying to use conditions between examining files or memory, and it
|
||||
wasn't accounting for LOAD offsets at runtime. This is especially bad if
|
||||
a loaded segment has additional padding that's not in the file offsets.
|
||||
|
||||
Now we do a first scan of the program headers to figure out the runtime
|
||||
base address based on `PT_PHDR` and/or `PT_DYNAMIC` (else assume zero),
|
||||
similar to libc's `do_start`. Then each `PT_NOTE` pointer is simply the
|
||||
base plus the segments's `pt_vaddr`, which includes LOAD offsets.
|
||||
|
||||
Fixes #114605
|
||||
---
|
||||
.../lib/profile/InstrProfilingPlatformLinux.c | 40 ++++++++-----------
|
||||
1 file changed, 16 insertions(+), 24 deletions(-)
|
||||
|
||||
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
|
||||
index e2c06d51e0c6..c365129a0768 100644
|
||||
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
|
||||
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
|
||||
@@ -194,41 +194,33 @@ static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
|
||||
*/
|
||||
COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
|
||||
extern const ElfW(Ehdr) __ehdr_start __attribute__((visibility("hidden")));
|
||||
+ extern ElfW(Dyn) _DYNAMIC[] __attribute__((weak, visibility("hidden")));
|
||||
+
|
||||
const ElfW(Ehdr) *ElfHeader = &__ehdr_start;
|
||||
const ElfW(Phdr) *ProgramHeader =
|
||||
(const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff);
|
||||
|
||||
+ /* Compute the added base address in case of position-independent code. */
|
||||
+ uintptr_t Base = 0;
|
||||
+ for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
|
||||
+ if (ProgramHeader[I].p_type == PT_PHDR)
|
||||
+ Base = (uintptr_t)ProgramHeader - ProgramHeader[I].p_vaddr;
|
||||
+ if (ProgramHeader[I].p_type == PT_DYNAMIC && _DYNAMIC)
|
||||
+ Base = (uintptr_t)_DYNAMIC - ProgramHeader[I].p_vaddr;
|
||||
+ }
|
||||
+
|
||||
int TotalBinaryIdsSize = 0;
|
||||
- uint32_t I;
|
||||
/* Iterate through entries in the program header. */
|
||||
- for (I = 0; I < ElfHeader->e_phnum; I++) {
|
||||
+ for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
|
||||
/* Look for the notes segment in program header entries. */
|
||||
if (ProgramHeader[I].p_type != PT_NOTE)
|
||||
continue;
|
||||
|
||||
/* There can be multiple notes segment, and examine each of them. */
|
||||
- const ElfW(Nhdr) * Note;
|
||||
- const ElfW(Nhdr) * NotesEnd;
|
||||
- /*
|
||||
- * When examining notes in file, use p_offset, which is the offset within
|
||||
- * the elf file, to find the start of notes.
|
||||
- */
|
||||
- if (ProgramHeader[I].p_memsz == 0 ||
|
||||
- ProgramHeader[I].p_memsz == ProgramHeader[I].p_filesz) {
|
||||
- Note = (const ElfW(Nhdr) *)((uintptr_t)ElfHeader +
|
||||
- ProgramHeader[I].p_offset);
|
||||
- NotesEnd = (const ElfW(Nhdr) *)((const char *)(Note) +
|
||||
- ProgramHeader[I].p_filesz);
|
||||
- } else {
|
||||
- /*
|
||||
- * When examining notes in memory, use p_vaddr, which is the address of
|
||||
- * section after loaded to memory, to find the start of notes.
|
||||
- */
|
||||
- Note =
|
||||
- (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_vaddr);
|
||||
- NotesEnd =
|
||||
- (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
|
||||
- }
|
||||
+ const ElfW(Nhdr) *Note =
|
||||
+ (const ElfW(Nhdr) *)(Base + ProgramHeader[I].p_vaddr);
|
||||
+ const ElfW(Nhdr) *NotesEnd =
|
||||
+ (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
|
||||
|
||||
int BinaryIdsSize = WriteBinaryIds(Writer, Note, NotesEnd);
|
||||
if (TotalBinaryIdsSize == -1)
|
||||
--
|
||||
2.47.0
|
||||
|
||||
@ -0,0 +1,130 @@
|
||||
From be4fa19ecf95d94d3ef46be183d3d4b4ebb6bb47 Mon Sep 17 00:00:00 2001
|
||||
From: yonghong-song <yhs@fb.com>
|
||||
Date: Mon, 3 Nov 2025 11:11:47 -0800
|
||||
Subject: [PATCH] [BPF] Remove unused weak symbol __bpf_trap (#166003)
|
||||
|
||||
Nikita Popov reported an issue ([1]) where a dangling weak symbol
|
||||
__bpf_trap is in the final binary and this caused libbpf failing like
|
||||
below:
|
||||
|
||||
$ veristat -v ./t.o
|
||||
Processing 't.o'...
|
||||
libbpf: elf: skipping unrecognized data section(4) .eh_frame
|
||||
libbpf: elf: skipping relo section(5) .rel.eh_frame for section(4) .eh_frame
|
||||
libbpf: failed to find BTF for extern '__bpf_trap': -3
|
||||
Failed to open './t.o': -3
|
||||
|
||||
In llvm, the dag selection phase generates __bpf_trap in code. Later the
|
||||
UnreachableBlockElim pass removed __bpf_trap from the code, but
|
||||
__bpf_trap symbol survives in the symbol table.
|
||||
|
||||
Having a dangling __bpf_trap weak symbol is not good for old kernels as
|
||||
seen in the above veristat failure. Although users could use compiler
|
||||
flag `-mllvm -bpf-disable-trap-unreachable` to workaround the issue,
|
||||
this patch fixed the issue by removing the dangling __bpf_trap.
|
||||
|
||||
[1] https://github.com/llvm/llvm-project/issues/165696
|
||||
|
||||
(cherry picked from commit 8fd1bf2f8c9e6e7c4bc5f6915a9d52bb3672601b)
|
||||
---
|
||||
llvm/lib/Target/BPF/BPFAsmPrinter.cpp | 24 ++++++++++++++++++++
|
||||
llvm/lib/Target/BPF/BPFAsmPrinter.h | 1 +
|
||||
llvm/test/CodeGen/BPF/bpf_trap.ll | 32 +++++++++++++++++++++++++++
|
||||
3 files changed, 57 insertions(+)
|
||||
create mode 100644 llvm/test/CodeGen/BPF/bpf_trap.ll
|
||||
|
||||
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
|
||||
index 77dc4a75a7d6..b2a82040ee82 100644
|
||||
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
|
||||
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
|
||||
@@ -88,6 +88,16 @@ bool BPFAsmPrinter::doFinalization(Module &M) {
|
||||
}
|
||||
}
|
||||
|
||||
+ for (GlobalObject &GO : M.global_objects()) {
|
||||
+ if (!GO.hasExternalWeakLinkage())
|
||||
+ continue;
|
||||
+
|
||||
+ if (!SawTrapCall && GO.getName() == BPF_TRAP) {
|
||||
+ GO.eraseFromParent();
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
return AsmPrinter::doFinalization(M);
|
||||
}
|
||||
|
||||
@@ -160,6 +170,20 @@ bool BPFAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
|
||||
}
|
||||
|
||||
void BPFAsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||
+ if (MI->isCall()) {
|
||||
+ for (const MachineOperand &Op : MI->operands()) {
|
||||
+ if (Op.isGlobal()) {
|
||||
+ if (const GlobalValue *GV = Op.getGlobal())
|
||||
+ if (GV->getName() == BPF_TRAP)
|
||||
+ SawTrapCall = true;
|
||||
+ } else if (Op.isSymbol()) {
|
||||
+ if (const MCSymbol *Sym = Op.getMCSymbol())
|
||||
+ if (Sym->getName() == BPF_TRAP)
|
||||
+ SawTrapCall = true;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
BPF_MC::verifyInstructionPredicates(MI->getOpcode(),
|
||||
getSubtargetInfo().getFeatureBits());
|
||||
|
||||
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.h b/llvm/lib/Target/BPF/BPFAsmPrinter.h
|
||||
index 0cfb2839c8ff..60a285ea2b7d 100644
|
||||
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.h
|
||||
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.h
|
||||
@@ -39,6 +39,7 @@ public:
|
||||
private:
|
||||
BTFDebug *BTF;
|
||||
TargetMachine &TM;
|
||||
+ bool SawTrapCall = false;
|
||||
|
||||
const BPFTargetMachine &getBTM() const;
|
||||
};
|
||||
diff --git a/llvm/test/CodeGen/BPF/bpf_trap.ll b/llvm/test/CodeGen/BPF/bpf_trap.ll
|
||||
new file mode 100644
|
||||
index 000000000000..ab8df5ff7cb0
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/BPF/bpf_trap.ll
|
||||
@@ -0,0 +1,32 @@
|
||||
+; RUN: llc < %s | FileCheck %s
|
||||
+;
|
||||
+target triple = "bpf"
|
||||
+
|
||||
+define i32 @test(i8 %x) {
|
||||
+entry:
|
||||
+ %0 = and i8 %x, 3
|
||||
+ switch i8 %0, label %default.unreachable4 [
|
||||
+ i8 0, label %return
|
||||
+ i8 1, label %sw.bb1
|
||||
+ i8 2, label %sw.bb2
|
||||
+ i8 3, label %sw.bb3
|
||||
+ ]
|
||||
+
|
||||
+sw.bb1: ; preds = %entry
|
||||
+ br label %return
|
||||
+
|
||||
+sw.bb2: ; preds = %entry
|
||||
+ br label %return
|
||||
+
|
||||
+sw.bb3: ; preds = %entry
|
||||
+ br label %return
|
||||
+
|
||||
+default.unreachable4: ; preds = %entry
|
||||
+ unreachable
|
||||
+
|
||||
+return: ; preds = %entry, %sw.bb3, %sw.bb2, %sw.bb1
|
||||
+ %retval.0 = phi i32 [ 12, %sw.bb1 ], [ 43, %sw.bb2 ], [ 54, %sw.bb3 ], [ 32, %entry ]
|
||||
+ ret i32 %retval.0
|
||||
+}
|
||||
+
|
||||
+; CHECK-NOT: __bpf_trap
|
||||
--
|
||||
2.50.1
|
||||
|
||||
@ -0,0 +1,34 @@
|
||||
From ac5b6151976c70c8b676d3bc6ff82895fe0e1d01 Mon Sep 17 00:00:00 2001
|
||||
From: yonghong-song <yhs@fb.com>
|
||||
Date: Tue, 4 Nov 2025 15:15:33 -0800
|
||||
Subject: [PATCH] [BPF] Remove dead code related to __bpf_trap global var
|
||||
(#166440)
|
||||
|
||||
In [1], the symbol __bpf_trap (macro BPF_TRAP) is removed if it is not
|
||||
used in the code. In the discussion in [1], it is found that the branch
|
||||
"if (Op.isSymbol())" is actually always false. Remove it to avoid
|
||||
confusion.
|
||||
|
||||
[1] https://github.com/llvm/llvm-project/pull/166003
|
||||
---
|
||||
llvm/lib/Target/BPF/BPFAsmPrinter.cpp | 4 ----
|
||||
1 file changed, 4 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
|
||||
index 378a72ab27dd..abe081c0c76f 100644
|
||||
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
|
||||
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
|
||||
@@ -176,10 +176,6 @@ void BPFAsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||
if (const GlobalValue *GV = Op.getGlobal())
|
||||
if (GV->getName() == BPF_TRAP)
|
||||
SawTrapCall = true;
|
||||
- } else if (Op.isSymbol()) {
|
||||
- if (const MCSymbol *Sym = Op.getMCSymbol())
|
||||
- if (Sym->getName() == BPF_TRAP)
|
||||
- SawTrapCall = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
--
|
||||
2.50.1
|
||||
|
||||
@ -1,893 +0,0 @@
|
||||
From 91052169960477fbc39169c10f9fae3bec732510 Mon Sep 17 00:00:00 2001
|
||||
From: Carl Ritson <carl.ritson@amd.com>
|
||||
Date: Wed, 17 Jul 2024 15:07:42 +0900
|
||||
Subject: [PATCH 1/3] [AMDGPU] Implement workaround for GFX11.5 export priority
|
||||
|
||||
On GFX11.5 shaders having completed exports need to execute/wait
|
||||
at a lower priority than shaders still executing exports.
|
||||
Add code to maintain normal priority of 2 for shaders that export
|
||||
and drop to priority 0 after exports.
|
||||
---
|
||||
llvm/lib/Target/AMDGPU/AMDGPU.td | 15 +-
|
||||
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 112 ++++++
|
||||
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
|
||||
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 +
|
||||
.../AMDGPU/required-export-priority.ll | 344 ++++++++++++++++++
|
||||
.../AMDGPU/required-export-priority.mir | 293 +++++++++++++++
|
||||
6 files changed, 765 insertions(+), 3 deletions(-)
|
||||
create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
||||
create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.mir
|
||||
|
||||
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
|
||||
index dfc8eaea66f7b..14fcf6a210a78 100644
|
||||
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
|
||||
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
|
||||
@@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
|
||||
"Has restricted SOffset (immediate not supported)."
|
||||
>;
|
||||
|
||||
+def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
|
||||
+ "HasRequiredExportPriority",
|
||||
+ "true",
|
||||
+ "Export priority must be explicitly manipulated on GFX11.5"
|
||||
+>;
|
||||
+
|
||||
//===------------------------------------------------------------===//
|
||||
// Subtarget Features (options and debugging)
|
||||
//===------------------------------------------------------------===//
|
||||
@@ -1597,14 +1603,16 @@ def FeatureISAVersion11_5_0 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion11_Common.Features,
|
||||
[FeatureSALUFloatInsts,
|
||||
FeatureDPPSrc1SGPR,
|
||||
- FeatureVGPRSingleUseHintInsts])>;
|
||||
+ FeatureVGPRSingleUseHintInsts,
|
||||
+ FeatureRequiredExportPriority])>;
|
||||
|
||||
def FeatureISAVersion11_5_1 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion11_Common.Features,
|
||||
[FeatureSALUFloatInsts,
|
||||
FeatureDPPSrc1SGPR,
|
||||
FeatureVGPRSingleUseHintInsts,
|
||||
- FeatureGFX11FullVGPRs])>;
|
||||
+ FeatureGFX11FullVGPRs,
|
||||
+ FeatureRequiredExportPriority])>;
|
||||
|
||||
def FeatureISAVersion12 : FeatureSet<
|
||||
[FeatureGFX12,
|
||||
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
|
||||
index a402fc6d7e611..a8b171aa82840 100644
|
||||
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
|
||||
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "GCNSubtarget.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
+#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/ScheduleDAG.h"
|
||||
#include "llvm/TargetParser/TargetParser.h"
|
||||
@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
|
||||
fixWMMAHazards(MI);
|
||||
fixShift64HighRegBug(MI);
|
||||
fixVALUMaskWriteHazard(MI);
|
||||
+ fixRequiredExportPriority(MI);
|
||||
}
|
||||
|
||||
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
|
||||
@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
|
||||
|
||||
return true;
|
||||
}
|
||||
+
|
||||
+static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
|
||||
+ const SIInstrInfo &TII) {
|
||||
+ MachineBasicBlock &EntryMBB = MF->front();
|
||||
+ if (EntryMBB.begin() != EntryMBB.end()) {
|
||||
+ auto &EntryMI = *EntryMBB.begin();
|
||||
+ if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
|
||||
+ EntryMI.getOperand(0).getImm() >= Priority)
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
|
||||
+ .addImm(Priority);
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
|
||||
+ if (!ST.hasRequiredExportPriority())
|
||||
+ return false;
|
||||
+
|
||||
+ // Assume the following shader types will never have exports,
|
||||
+ // and avoid adding or adjusting S_SETPRIO.
|
||||
+ MachineBasicBlock *MBB = MI->getParent();
|
||||
+ MachineFunction *MF = MBB->getParent();
|
||||
+ auto CC = MF->getFunction().getCallingConv();
|
||||
+ switch (CC) {
|
||||
+ case CallingConv::AMDGPU_CS:
|
||||
+ case CallingConv::AMDGPU_CS_Chain:
|
||||
+ case CallingConv::AMDGPU_CS_ChainPreserve:
|
||||
+ case CallingConv::AMDGPU_KERNEL:
|
||||
+ return false;
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ const int MaxPriority = 3;
|
||||
+ const int NormalPriority = 2;
|
||||
+ const int PostExportPriority = 0;
|
||||
+
|
||||
+ auto It = MI->getIterator();
|
||||
+ switch (MI->getOpcode()) {
|
||||
+ case AMDGPU::S_ENDPGM:
|
||||
+ case AMDGPU::S_ENDPGM_SAVED:
|
||||
+ case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
|
||||
+ case AMDGPU::SI_RETURN_TO_EPILOG:
|
||||
+ // Ensure shader with calls raises priority at entry.
|
||||
+ // This ensures correct priority if exports exist in callee.
|
||||
+ if (MF->getFrameInfo().hasCalls())
|
||||
+ return ensureEntrySetPrio(MF, NormalPriority, TII);
|
||||
+ return false;
|
||||
+ case AMDGPU::S_SETPRIO: {
|
||||
+ // Raise minimum priority unless in workaround.
|
||||
+ auto &PrioOp = MI->getOperand(0);
|
||||
+ int Prio = PrioOp.getImm();
|
||||
+ bool InWA = (Prio == PostExportPriority) &&
|
||||
+ (It != MBB->begin() && TII.isEXP(*std::prev(It)));
|
||||
+ if (InWA || Prio >= NormalPriority)
|
||||
+ return false;
|
||||
+ PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
|
||||
+ return true;
|
||||
+ }
|
||||
+ default:
|
||||
+ if (!TII.isEXP(*MI))
|
||||
+ return false;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ // Check entry priority at each export (as there will only be a few).
|
||||
+ // Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
|
||||
+ bool Changed = false;
|
||||
+ if (CC != CallingConv::AMDGPU_Gfx)
|
||||
+ Changed = ensureEntrySetPrio(MF, NormalPriority, TII);
|
||||
+
|
||||
+ auto NextMI = std::next(It);
|
||||
+ bool EndOfShader = false;
|
||||
+ if (NextMI != MBB->end()) {
|
||||
+ // Only need WA at end of sequence of exports.
|
||||
+ if (TII.isEXP(*NextMI))
|
||||
+ return Changed;
|
||||
+ // Assume appropriate S_SETPRIO after export means WA already applied.
|
||||
+ if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
|
||||
+ NextMI->getOperand(0).getImm() == PostExportPriority)
|
||||
+ return Changed;
|
||||
+ EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
|
||||
+ }
|
||||
+
|
||||
+ const DebugLoc &DL = MI->getDebugLoc();
|
||||
+
|
||||
+ // Lower priority.
|
||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
|
||||
+ .addImm(PostExportPriority);
|
||||
+
|
||||
+ if (!EndOfShader) {
|
||||
+ // Wait for exports to complete.
|
||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
|
||||
+ .addReg(AMDGPU::SGPR_NULL)
|
||||
+ .addImm(0);
|
||||
+ }
|
||||
+
|
||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
|
||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
|
||||
+
|
||||
+ if (!EndOfShader) {
|
||||
+ // Return to normal (higher) priority.
|
||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
|
||||
+ .addImm(NormalPriority);
|
||||
+ }
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
|
||||
index 3ccca527c626b..f2a64ab48e180 100644
|
||||
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
|
||||
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
|
||||
@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
|
||||
bool fixWMMAHazards(MachineInstr *MI);
|
||||
bool fixShift64HighRegBug(MachineInstr *MI);
|
||||
bool fixVALUMaskWriteHazard(MachineInstr *MI);
|
||||
+ bool fixRequiredExportPriority(MachineInstr *MI);
|
||||
|
||||
int checkMAIHazards(MachineInstr *MI);
|
||||
int checkMAIHazards908(MachineInstr *MI);
|
||||
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
|
||||
index e5817594a4521..def89c785b855 100644
|
||||
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
|
||||
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
|
||||
@@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
|
||||
bool HasVOPDInsts = false;
|
||||
bool HasVALUTransUseHazard = false;
|
||||
bool HasForceStoreSC0SC1 = false;
|
||||
+ bool HasRequiredExportPriority = false;
|
||||
|
||||
// Dummy feature to use for assembler in tablegen.
|
||||
bool FeatureDisable = false;
|
||||
@@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
|
||||
|
||||
bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
|
||||
|
||||
+ bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
|
||||
+
|
||||
/// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
|
||||
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
|
||||
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
|
||||
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
||||
new file mode 100644
|
||||
index 0000000000000..377902f3f0d1a
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
||||
@@ -0,0 +1,344 @@
|
||||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
+
|
||||
+define amdgpu_ps void @test_export_zeroes_f32() #0 {
|
||||
+; GCN-LABEL: test_export_zeroes_f32:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
+; GCN-NEXT: exp mrt0 off, off, off, off
|
||||
+; GCN-NEXT: exp mrt0 off, off, off, off done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_export_en_src0_f32() #0 {
|
||||
+; GCN-LABEL: test_export_en_src0_f32:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
||||
+; GCN-NEXT: exp mrt0 v3, off, off, off done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_gs void @test_export_gs() #0 {
|
||||
+; GCN-LABEL: test_export_gs:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_hs void @test_export_hs() #0 {
|
||||
+; GCN-LABEL: test_export_hs:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_gfx void @test_export_gfx(float %v) #0 {
|
||||
+; GCN-LABEL: test_export_gfx:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 4.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 0.5
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 2.0
|
||||
+; GCN-NEXT: exp mrt0 off, v3, off, off done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_waitcnt expcnt(0)
|
||||
+; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_cs void @test_export_cs() #0 {
|
||||
+; GCN-LABEL: test_export_cs:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_kernel void @test_export_kernel() #0 {
|
||||
+; GCN-LABEL: test_export_kernel:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_gfx void @test_no_export_gfx(float %v) #0 {
|
||||
+; GCN-LABEL: test_no_export_gfx:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
+; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_no_export_ps(float %v) #0 {
|
||||
+; GCN-LABEL: test_no_export_ps:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
||||
+; GCN-LABEL: test_if_export_f32:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
||||
+; GCN-NEXT: s_cbranch_execz .LBB9_2
|
||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: .LBB9_2: ; %end
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %cc = icmp eq i32 %flag, 0
|
||||
+ br i1 %cc, label %end, label %exp
|
||||
+
|
||||
+exp:
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
|
||||
+ br label %end
|
||||
+
|
||||
+end:
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
||||
+; GCN-LABEL: test_if_export_vm_f32:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
||||
+; GCN-NEXT: s_cbranch_execz .LBB10_2
|
||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: .LBB10_2: ; %end
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %cc = icmp eq i32 %flag, 0
|
||||
+ br i1 %cc, label %end, label %exp
|
||||
+
|
||||
+exp:
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
|
||||
+ br label %end
|
||||
+
|
||||
+end:
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
||||
+; GCN-LABEL: test_if_export_done_f32:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
||||
+; GCN-NEXT: s_cbranch_execz .LBB11_2
|
||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: .LBB11_2: ; %end
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %cc = icmp eq i32 %flag, 0
|
||||
+ br i1 %cc, label %end, label %exp
|
||||
+
|
||||
+exp:
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
|
||||
+ br label %end
|
||||
+
|
||||
+end:
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
||||
+; GCN-LABEL: test_if_export_vm_done_f32:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
||||
+; GCN-NEXT: s_cbranch_execz .LBB12_2
|
||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: .LBB12_2: ; %end
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %cc = icmp eq i32 %flag, 0
|
||||
+ br i1 %cc, label %end, label %exp
|
||||
+
|
||||
+exp:
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
|
||||
+ br label %end
|
||||
+
|
||||
+end:
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
|
||||
+; GCN-LABEL: test_export_pos_before_param_across_load:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 1.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 0.5
|
||||
+; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
+; GCN-NEXT: exp pos0 v1, v1, v1, v0 done
|
||||
+; GCN-NEXT: exp invalid_target_32 v2, v2, v2, v2
|
||||
+; GCN-NEXT: exp invalid_target_33 v2, v2, v2, v3
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
|
||||
+ %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
|
||||
+; GCN-LABEL: test_export_across_store_load:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 24
|
||||
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
|
||||
+; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 8, vcc_lo
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 0
|
||||
+; GCN-NEXT: scratch_store_b32 v0, v1, off
|
||||
+; GCN-NEXT: scratch_load_b32 v0, off, off
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 1.0
|
||||
+; GCN-NEXT: exp pos0 v2, v2, v2, v1 done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
+; GCN-NEXT: exp invalid_target_32 v0, v2, v1, v2
|
||||
+; GCN-NEXT: exp invalid_target_33 v0, v2, v1, v2
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %data0 = alloca <4 x float>, align 8, addrspace(5)
|
||||
+ %data1 = alloca <4 x float>, align 8, addrspace(5)
|
||||
+ %cmp = icmp eq i32 %idx, 1
|
||||
+ %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
|
||||
+ store float %v, ptr addrspace(5) %data, align 8
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
|
||||
+ %load0 = load float, ptr addrspace(5) %data0, align 8
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_export_in_callee(float %v) #0 {
|
||||
+; GCN-LABEL: test_export_in_callee:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_getpc_b64 s[0:1]
|
||||
+; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
|
||||
+; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
|
||||
+; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
|
||||
+; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
+; GCN-NEXT: s_mov_b32 s32, 0
|
||||
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
+; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %x = fadd float %v, 1.0
|
||||
+ call void @test_export_gfx(float %x)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
|
||||
+; GCN-LABEL: test_export_in_callee_prio:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_mov_b32 s32, 0
|
||||
+; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_getpc_b64 s[0:1]
|
||||
+; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
|
||||
+; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
|
||||
+; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
+; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %x = fadd float %v, 1.0
|
||||
+ call void @llvm.amdgcn.s.setprio(i16 0)
|
||||
+ call void @test_export_gfx(float %x)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
|
||||
+declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
|
||||
+declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
|
||||
+declare void @llvm.amdgcn.s.setprio(i16)
|
||||
+
|
||||
+attributes #0 = { nounwind }
|
||||
+attributes #1 = { nounwind inaccessiblememonly }
|
||||
+attributes #2 = { nounwind readnone }
|
||||
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.mir b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
|
||||
new file mode 100644
|
||||
index 0000000000000..eee04468036e5
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
|
||||
@@ -0,0 +1,293 @@
|
||||
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
||||
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=post-RA-hazard-rec -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX1150 %s
|
||||
+
|
||||
+--- |
|
||||
+ define amdgpu_ps void @end_of_shader() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @end_of_shader_return_to_epilogue() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @end_of_block() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @start_of_block() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @block_of_exports() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @sparse_exports() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @existing_setprio_1() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @existing_setprio_2() {
|
||||
+ ret void
|
||||
+ }
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: end_of_shader
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ ; GFX1150-LABEL: name: end_of_shader
|
||||
+ ; GFX1150: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: end_of_shader_return_to_epilogue
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ ; GFX1150-LABEL: name: end_of_shader_return_to_epilogue
|
||||
+ ; GFX1150: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ SI_RETURN_TO_EPILOG $vgpr0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: end_of_block
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ ; GFX1150-LABEL: name: end_of_block
|
||||
+ ; GFX1150: bb.0:
|
||||
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: bb.1:
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+
|
||||
+ bb.1:
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: start_of_block
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ ; GFX1150-LABEL: name: start_of_block
|
||||
+ ; GFX1150: bb.0:
|
||||
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: bb.1:
|
||||
+ ; GFX1150-NEXT: successors: %bb.2(0x80000000)
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: bb.2:
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+
|
||||
+ bb.1:
|
||||
+ liveins: $vgpr0
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+
|
||||
+ bb.2:
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: block_of_exports
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ ; GFX1150-LABEL: name: block_of_exports
|
||||
+ ; GFX1150: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: sparse_exports
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ ; GFX1150-LABEL: name: sparse_exports
|
||||
+ ; GFX1150: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: existing_setprio_1
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ ; GFX1150-LABEL: name: existing_setprio_1
|
||||
+ ; GFX1150: bb.0:
|
||||
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: bb.1:
|
||||
+ ; GFX1150-NEXT: successors: %bb.2(0x80000000)
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
||||
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: bb.2:
|
||||
+ ; GFX1150-NEXT: successors: %bb.3(0x80000000)
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
||||
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: bb.3:
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
||||
+
|
||||
+ bb.1:
|
||||
+ liveins: $vgpr0
|
||||
+ S_SETPRIO 3
|
||||
+ $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
||||
+ S_SETPRIO 0
|
||||
+
|
||||
+ bb.2:
|
||||
+ liveins: $vgpr0
|
||||
+ S_SETPRIO 1
|
||||
+ $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
|
||||
+ S_SETPRIO 0
|
||||
+
|
||||
+ bb.3:
|
||||
+ liveins: $vgpr0
|
||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: existing_setprio_2
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ ; GFX1150-LABEL: name: existing_setprio_2
|
||||
+ ; GFX1150: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ S_SETPRIO 3
|
||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ S_SETPRIO 3
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
|
||||
From 8ea44e65f2c19facff751aeb2ac960f907fb210f Mon Sep 17 00:00:00 2001
|
||||
From: Carl Ritson <carl.ritson@amd.com>
|
||||
Date: Wed, 17 Jul 2024 16:18:02 +0900
|
||||
Subject: [PATCH 2/3] Remove -verify-machineinstrs from test.
|
||||
|
||||
---
|
||||
llvm/test/CodeGen/AMDGPU/required-export-priority.ll | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
||||
index 377902f3f0d1a..ebc209bd4d451 100644
|
||||
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
||||
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
||||
@@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
define amdgpu_ps void @test_export_zeroes_f32() #0 {
|
||||
; GCN-LABEL: test_export_zeroes_f32:
|
||||
28
SOURCES/20-131099.patch
Normal file
28
SOURCES/20-131099.patch
Normal file
@ -0,0 +1,28 @@
|
||||
From e43271ec7438ecb78f99db134aeca274a47f6c28 Mon Sep 17 00:00:00 2001
|
||||
From: Konrad Kleine <kkleine@redhat.com>
|
||||
Date: Thu, 13 Mar 2025 09:12:24 +0100
|
||||
Subject: [PATCH] Filter out configuration file from compile commands
|
||||
|
||||
The commands to run the compilation when printed with `-###` contain
|
||||
various irrelevant lines for the perf-training. Most of them are
|
||||
filtered out already but when configured with
|
||||
`CLANG_CONFIG_FILE_SYSTEM_DIR` a new line like the following is
|
||||
added and needs to be filtered out:
|
||||
|
||||
`Configuration file: /etc/clang/x86_64-redhat-linux-gnu-clang.cfg`
|
||||
---
|
||||
clang/utils/perf-training/perf-helper.py | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py
|
||||
index 80c6356d0497c..29904aded5ab0 100644
|
||||
--- a/clang/utils/perf-training/perf-helper.py
|
||||
+++ b/clang/utils/perf-training/perf-helper.py
|
||||
@@ -237,6 +237,7 @@ def get_cc1_command_for_args(cmd, env):
|
||||
or ln.startswith("InstalledDir:")
|
||||
or ln.startswith("LLVM Profile Note")
|
||||
or ln.startswith(" (in-process)")
|
||||
+ or ln.startswith("Configuration file:")
|
||||
or " version " in ln
|
||||
):
|
||||
continue
|
||||
94
SOURCES/21-146424.patch
Normal file
94
SOURCES/21-146424.patch
Normal file
@ -0,0 +1,94 @@
|
||||
From eba58195932f37fb461ae17c69fc517181b99c9a Mon Sep 17 00:00:00 2001
|
||||
From: Paul Murphy <paumurph@redhat.com>
|
||||
Date: Mon, 30 Jun 2025 10:13:37 -0500
|
||||
Subject: [PATCH] [PowerPC] fix lowering of SPILL_CRBIT on pwr9 and pwr10
|
||||
|
||||
If a copy exists between creation of a crbit and a spill, machine-cp
|
||||
may delete the copy since it seems unaware of the relation between a cr
|
||||
and crbit. A fix was previously made for the generic ppc64 lowering. It
|
||||
should be applied to the pwr9 and pwr10 variants too.
|
||||
|
||||
Likewise, relax and extend the pwr8 test to verify pwr9 and pwr10
|
||||
codegen too.
|
||||
|
||||
This fixes #143989.
|
||||
---
|
||||
llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 17 +++++++++++------
|
||||
.../PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir | 8 +++++++-
|
||||
2 files changed, 18 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
|
||||
index 76dca4794e05..78d254a55fd9 100644
|
||||
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
|
||||
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
|
||||
@@ -1102,13 +1102,20 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
|
||||
SpillsKnownBit = true;
|
||||
break;
|
||||
default:
|
||||
+ // When spilling a CR bit, The super register may not be explicitly defined
|
||||
+ // (i.e. it can be defined by a CR-logical that only defines the subreg) so
|
||||
+ // we state that the CR field is undef. Also, in order to preserve the kill
|
||||
+ // flag on the CR bit, we add it as an implicit use.
|
||||
+
|
||||
// On Power10, we can use SETNBC to spill all CR bits. SETNBC will set all
|
||||
// bits (specifically, it produces a -1 if the CR bit is set). Ultimately,
|
||||
// the bit that is of importance to us is bit 32 (bit 0 of a 32-bit
|
||||
// register), and SETNBC will set this.
|
||||
if (Subtarget.isISA3_1()) {
|
||||
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETNBC8 : PPC::SETNBC), Reg)
|
||||
- .addReg(SrcReg, RegState::Undef);
|
||||
+ .addReg(SrcReg, RegState::Undef)
|
||||
+ .addReg(SrcReg, RegState::Implicit |
|
||||
+ getKillRegState(MI.getOperand(0).isKill()));
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1122,16 +1129,14 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
|
||||
SrcReg == PPC::CR4LT || SrcReg == PPC::CR5LT ||
|
||||
SrcReg == PPC::CR6LT || SrcReg == PPC::CR7LT) {
|
||||
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::SETB8 : PPC::SETB), Reg)
|
||||
- .addReg(getCRFromCRBit(SrcReg), RegState::Undef);
|
||||
+ .addReg(getCRFromCRBit(SrcReg), RegState::Undef)
|
||||
+ .addReg(SrcReg, RegState::Implicit |
|
||||
+ getKillRegState(MI.getOperand(0).isKill()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// We need to move the CR field that contains the CR bit we are spilling.
|
||||
- // The super register may not be explicitly defined (i.e. it can be defined
|
||||
- // by a CR-logical that only defines the subreg) so we state that the CR
|
||||
- // field is undef. Also, in order to preserve the kill flag on the CR bit,
|
||||
- // we add it as an implicit use.
|
||||
BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
|
||||
.addReg(getCRFromCRBit(SrcReg), RegState::Undef)
|
||||
.addReg(SrcReg,
|
||||
diff --git a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
|
||||
index 41e21248a3f0..2796cdb3ae87 100644
|
||||
--- a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
|
||||
+++ b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
|
||||
@@ -1,6 +1,12 @@
|
||||
# RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -start-after \
|
||||
# RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \
|
||||
# RUN: -o - | FileCheck %s
|
||||
+# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -start-after \
|
||||
+# RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \
|
||||
+# RUN: -o - | FileCheck %s
|
||||
+# RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu -start-after \
|
||||
+# RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \
|
||||
+# RUN: -o - | FileCheck %s
|
||||
|
||||
--- |
|
||||
; ModuleID = 'a.ll'
|
||||
@@ -30,7 +36,7 @@
|
||||
; Function Attrs: nounwind
|
||||
declare void @llvm.stackprotector(ptr, ptr) #1
|
||||
|
||||
- attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
+ attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
!llvm.ident = !{!0}
|
||||
--
|
||||
2.49.0
|
||||
|
||||
276
SOURCES/43cb4631c1f42dbfce78288b8ae30b5840ed59b3.patch
Normal file
276
SOURCES/43cb4631c1f42dbfce78288b8ae30b5840ed59b3.patch
Normal file
@ -0,0 +1,276 @@
|
||||
From 43cb4631c1f42dbfce78288b8ae30b5840ed59b3 Mon Sep 17 00:00:00 2001
|
||||
From: Ebuka Ezike <yerimyah1@gmail.com>
|
||||
Date: Thu, 8 Jan 2026 18:46:03 +0000
|
||||
Subject: [PATCH] [lldb] Fix typed commands not shown on the screen (#174216)
|
||||
|
||||
The cause is that in `python3.14`, `fcntl.ioctl` now throws a buffer
|
||||
overflow error
|
||||
when the buffer is too small or too large (see
|
||||
https://github.com/python/cpython/pull/132919). This caused the Python
|
||||
interpreter to fail terminal detection and not properly echo user
|
||||
commands back to the screen.
|
||||
|
||||
Fix by dropping the custom terminal size check entirely and using the
|
||||
built-in `sys.stdin.isatty()` instead.
|
||||
|
||||
Fixes #173302
|
||||
---
|
||||
.../Python/lldbsuite/test/lldbpexpect.py | 1 +
|
||||
.../Interpreter/embedded_interpreter.py | 59 +++---------------
|
||||
.../python_api/file_handle/TestFileHandle.py | 48 +++++++++++++-
|
||||
.../API/terminal/TestPythonInterpreterEcho.py | 62 +++++++++++++++++++
|
||||
.../Shell/ScriptInterpreter/Python/io.test | 12 ++++
|
||||
5 files changed, 131 insertions(+), 51 deletions(-)
|
||||
create mode 100644 lldb/test/API/terminal/TestPythonInterpreterEcho.py
|
||||
create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/io.test
|
||||
|
||||
diff --git a/lldb/packages/Python/lldbsuite/test/lldbpexpect.py b/lldb/packages/Python/lldbsuite/test/lldbpexpect.py
|
||||
index 3279e1fd39f8c..03b2500fbda52 100644
|
||||
--- a/lldb/packages/Python/lldbsuite/test/lldbpexpect.py
|
||||
+++ b/lldb/packages/Python/lldbsuite/test/lldbpexpect.py
|
||||
@@ -10,6 +10,7 @@
|
||||
|
||||
|
||||
@skipIfRemote
|
||||
+@skipIfWindows
|
||||
@add_test_categories(["pexpect"])
|
||||
class PExpectTest(TestBase):
|
||||
NO_DEBUG_INFO_TESTCASE = True
|
||||
diff --git a/lldb/source/Interpreter/embedded_interpreter.py b/lldb/source/Interpreter/embedded_interpreter.py
|
||||
index 42a9ab5fc367a..12c47bd712816 100644
|
||||
--- a/lldb/source/Interpreter/embedded_interpreter.py
|
||||
+++ b/lldb/source/Interpreter/embedded_interpreter.py
|
||||
@@ -32,18 +32,6 @@ def is_libedit():
|
||||
g_run_one_line_str = None
|
||||
|
||||
|
||||
-def get_terminal_size(fd):
|
||||
- try:
|
||||
- import fcntl
|
||||
- import termios
|
||||
- import struct
|
||||
-
|
||||
- hw = struct.unpack("hh", fcntl.ioctl(fd, termios.TIOCGWINSZ, "1234"))
|
||||
- except:
|
||||
- hw = (0, 0)
|
||||
- return hw
|
||||
-
|
||||
-
|
||||
class LLDBExit(SystemExit):
|
||||
pass
|
||||
|
||||
@@ -74,50 +62,21 @@ def readfunc_stdio(prompt):
|
||||
def run_python_interpreter(local_dict):
|
||||
# Pass in the dictionary, for continuity from one session to the next.
|
||||
try:
|
||||
- fd = sys.stdin.fileno()
|
||||
- interacted = False
|
||||
- if get_terminal_size(fd)[1] == 0:
|
||||
- try:
|
||||
- import termios
|
||||
-
|
||||
- old = termios.tcgetattr(fd)
|
||||
- if old[3] & termios.ECHO:
|
||||
- # Need to turn off echoing and restore
|
||||
- new = termios.tcgetattr(fd)
|
||||
- new[3] = new[3] & ~termios.ECHO
|
||||
- try:
|
||||
- termios.tcsetattr(fd, termios.TCSADRAIN, new)
|
||||
- interacted = True
|
||||
- code.interact(
|
||||
- banner="Python Interactive Interpreter. To exit, type 'quit()', 'exit()'.",
|
||||
- readfunc=readfunc_stdio,
|
||||
- local=local_dict,
|
||||
- )
|
||||
- finally:
|
||||
- termios.tcsetattr(fd, termios.TCSADRAIN, old)
|
||||
- except:
|
||||
- pass
|
||||
- # Don't need to turn off echoing
|
||||
- if not interacted:
|
||||
- code.interact(
|
||||
- banner="Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D.",
|
||||
- readfunc=readfunc_stdio,
|
||||
- local=local_dict,
|
||||
- )
|
||||
- else:
|
||||
- # We have a real interactive terminal
|
||||
- code.interact(
|
||||
- banner="Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D.",
|
||||
- readfunc=readfunc,
|
||||
- local=local_dict,
|
||||
- )
|
||||
+ banner = "Python Interactive Interpreter. To exit, type 'quit()', 'exit()'."
|
||||
+ input_func = readfunc_stdio
|
||||
+
|
||||
+ is_atty = sys.stdin.isatty()
|
||||
+ if is_atty:
|
||||
+ banner = "Python Interactive Interpreter. To exit, type 'quit()', 'exit()' or Ctrl-D."
|
||||
+ input_func = readfunc
|
||||
+
|
||||
+ code.interact(banner=banner, readfunc=input_func, local=local_dict)
|
||||
except LLDBExit:
|
||||
pass
|
||||
except SystemExit as e:
|
||||
if e.code:
|
||||
print("Script exited with code %s" % e.code)
|
||||
|
||||
-
|
||||
def run_one_line(local_dict, input_string):
|
||||
global g_run_one_line_str
|
||||
try:
|
||||
diff --git a/lldb/test/API/python_api/file_handle/TestFileHandle.py b/lldb/test/API/python_api/file_handle/TestFileHandle.py
|
||||
index b38585577f6f6..707044a3afb0f 100644
|
||||
--- a/lldb/test/API/python_api/file_handle/TestFileHandle.py
|
||||
+++ b/lldb/test/API/python_api/file_handle/TestFileHandle.py
|
||||
@@ -111,10 +111,11 @@ def setUp(self):
|
||||
super(FileHandleTestCase, self).setUp()
|
||||
self.out_filename = self.getBuildArtifact("output")
|
||||
self.in_filename = self.getBuildArtifact("input")
|
||||
+ self.err_filename = self.getBuildArtifact("error")
|
||||
|
||||
def tearDown(self):
|
||||
super(FileHandleTestCase, self).tearDown()
|
||||
- for name in (self.out_filename, self.in_filename):
|
||||
+ for name in (self.out_filename, self.in_filename, self.err_filename):
|
||||
if os.path.exists(name):
|
||||
os.unlink(name)
|
||||
|
||||
@@ -679,6 +680,51 @@ def test_stdout_file(self):
|
||||
lines = [x for x in f.read().strip().split() if x != "7"]
|
||||
self.assertEqual(lines, ["foobar"])
|
||||
|
||||
+ def test_stdout_file_interactive(self):
|
||||
+ """Ensure when we read stdin from a file, outputs from python goes to the right I/O stream."""
|
||||
+ with open(self.in_filename, "w") as f:
|
||||
+ f.write(
|
||||
+ "script --language python --\nvalue = 250 + 5\nprint(value)\nprint(vel)"
|
||||
+ )
|
||||
+
|
||||
+ with open(self.out_filename, "w") as outf, open(
|
||||
+ self.in_filename, "r"
|
||||
+ ) as inf, open(self.err_filename, "w") as errf:
|
||||
+ status = self.dbg.SetOutputFile(lldb.SBFile(outf))
|
||||
+ self.assertSuccess(status)
|
||||
+ status = self.dbg.SetErrorFile(lldb.SBFile(errf))
|
||||
+ self.assertSuccess(status)
|
||||
+ status = self.dbg.SetInputFile(lldb.SBFile(inf))
|
||||
+ self.assertSuccess(status)
|
||||
+ auto_handle_events = True
|
||||
+ spawn_thread = False
|
||||
+ num_errs = 0
|
||||
+ quit_requested = False
|
||||
+ stopped_for_crash = False
|
||||
+ opts = lldb.SBCommandInterpreterRunOptions()
|
||||
+ self.dbg.RunCommandInterpreter(
|
||||
+ auto_handle_events,
|
||||
+ spawn_thread,
|
||||
+ opts,
|
||||
+ num_errs,
|
||||
+ quit_requested,
|
||||
+ stopped_for_crash,
|
||||
+ )
|
||||
+ self.dbg.GetOutputFile().Flush()
|
||||
+ expected_out_text = "255"
|
||||
+ expected_err_text = "NameError"
|
||||
+ # check stdout
|
||||
+ with open(self.out_filename, "r") as f:
|
||||
+ out_text = f.read()
|
||||
+ self.assertIn(expected_out_text, out_text)
|
||||
+ self.assertNotIn(expected_err_text, out_text)
|
||||
+
|
||||
+ # check stderr
|
||||
+ with open(self.err_filename, "r") as f:
|
||||
+ err_text = f.read()
|
||||
+ self.assertIn(expected_err_text, err_text)
|
||||
+ self.assertNotIn(expected_out_text, err_text)
|
||||
+
|
||||
def test_identity(self):
|
||||
f = io.StringIO()
|
||||
sbf = lldb.SBFile(f)
|
||||
diff --git a/lldb/test/API/terminal/TestPythonInterpreterEcho.py b/lldb/test/API/terminal/TestPythonInterpreterEcho.py
|
||||
new file mode 100644
|
||||
index 0000000000000..758a4f9cede5a
|
||||
--- /dev/null
|
||||
+++ b/lldb/test/API/terminal/TestPythonInterpreterEcho.py
|
||||
@@ -0,0 +1,62 @@
|
||||
+"""
|
||||
+Test that typing python expression in the terminal is echoed back to stdout.
|
||||
+"""
|
||||
+
|
||||
+from lldbsuite.test.decorators import skipIfAsan
|
||||
+from lldbsuite.test.lldbpexpect import PExpectTest
|
||||
+
|
||||
+
|
||||
+@skipIfAsan
|
||||
+class PythonInterpreterEchoTest(PExpectTest):
|
||||
+ PYTHON_PROMPT = ">>> "
|
||||
+
|
||||
+ def verify_command_echo(
|
||||
+ self, command: str, expected_output: str = "", is_regex: bool = False
|
||||
+ ):
|
||||
+ assert self.child != None
|
||||
+ child = self.child
|
||||
+ self.assertIsNotNone(self.child, "expected a running lldb process.")
|
||||
+
|
||||
+ child.sendline(command)
|
||||
+
|
||||
+ # Build pattern list: match whichever comes first (output or prompt).
|
||||
+ # This prevents waiting for a timeout if there's no match.
|
||||
+ pattern = []
|
||||
+ match_expected = expected_output and len(expected_output) > 0
|
||||
+
|
||||
+ if match_expected:
|
||||
+ pattern.append(expected_output)
|
||||
+ pattern.append(self.PYTHON_PROMPT)
|
||||
+
|
||||
+ expect_func = child.expect if is_regex else child.expect_exact
|
||||
+ match_idx = expect_func(pattern)
|
||||
+ if match_expected:
|
||||
+ self.assertEqual(
|
||||
+ match_idx, 0, "Expected output `{expected_output}` in stdout."
|
||||
+ )
|
||||
+
|
||||
+ self.assertIsNotNone(self.child.before, "Expected output before prompt")
|
||||
+ self.assertIsInstance(self.child.before, bytes)
|
||||
+ echoed_text: str = self.child.before.decode("ascii").strip()
|
||||
+ self.assertEqual(
|
||||
+ command, echoed_text, f"Command '{command}' should be echoed to stdout."
|
||||
+ )
|
||||
+
|
||||
+ if match_expected:
|
||||
+ child.expect_exact(self.PYTHON_PROMPT)
|
||||
+
|
||||
+ def test_python_interpreter_echo(self):
|
||||
+ """Test that that the user typed commands is echoed to stdout"""
|
||||
+
|
||||
+ self.launch(use_colors=False, dimensions=(100, 100))
|
||||
+
|
||||
+ # Enter the python interpreter.
|
||||
+ self.verify_command_echo(
|
||||
+ "script --language python --", expected_output="Python.*\\.", is_regex=True
|
||||
+ )
|
||||
+ self.child_in_script_interpreter = True
|
||||
+
|
||||
+ self.verify_command_echo("val = 300")
|
||||
+ self.verify_command_echo(
|
||||
+ "print('result =', 300)", expected_output="result = 300"
|
||||
+ )
|
||||
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/io.test b/lldb/test/Shell/ScriptInterpreter/Python/io.test
|
||||
new file mode 100644
|
||||
index 0000000000000..25e3de41724e0
|
||||
--- /dev/null
|
||||
+++ b/lldb/test/Shell/ScriptInterpreter/Python/io.test
|
||||
@@ -0,0 +1,12 @@
|
||||
+# RUN: rm -rf %t.stdout %t.stderr
|
||||
+# RUN: cat %s | %lldb --script-language python > %t.stdout 2> %t.stderr
|
||||
+# RUN: cat %t.stdout | FileCheck %s --check-prefix STDOUT
|
||||
+# RUN: cat %t.stderr | FileCheck %s --check-prefix STDERR
|
||||
+script
|
||||
+variable = 300
|
||||
+print(variable)
|
||||
+print(not_value)
|
||||
+quit
|
||||
+
|
||||
+# STDOUT: 300
|
||||
+# STDERR: NameError{{.*}}is not defined
|
||||
1013
SOURCES/changelog
Normal file
1013
SOURCES/changelog
Normal file
File diff suppressed because it is too large
Load Diff
@ -102,3 +102,29 @@ yWfeofTJ7PhKzoXM2Y/rRFoM5gNh1RVA19ngLT5Jwiof8fPZvHJ/9ZkHn+O7eMNm
|
||||
m5++gYza3pnn2/PoGpGGAKok+sfJiq5Tb7RUefyJTeZiyTZ/XJrA
|
||||
=tMzl
|
||||
-----END PGP PUBLIC KEY BLOCK-----
|
||||
-----BEGIN PGP PUBLIC KEY BLOCK-----
|
||||
|
||||
mDMEaMgtRhYJKwYBBAHaRw8BAQdA4NRjJPhVd56sOM+QmTbZKkRT3bYbgg6+Bxed
|
||||
CELeGp+0JUN1bGxlbiBSaG9kZXMgPGN1bGxlbi5yaG9kZXNAYXJtLmNvbT6IkwQT
|
||||
FgoAOxYhBHEEbR6cZla91hFxhz6Dur9KT56FBQJoyC1GAhsDBQsJCAcCAiICBhUK
|
||||
CQgLAgQWAgMBAh4HAheAAAoJED6Dur9KT56FkVwA/RLNMBHrjXoAKpRm1iIjiC6w
|
||||
gLRqGOnj1qAqPqgntMmmAQCQ2lGpw46rvh88ng84IGsRF0JlTAYb6SR/YYNsQyah
|
||||
Arg4BGjILUYSCisGAQQBl1UBBQEBB0B48hCLw13kduwibGDGoIax0BIa+f66IUC+
|
||||
HhNlucsjbgMBCAeIeAQYFgoAIBYhBHEEbR6cZla91hFxhz6Dur9KT56FBQJoyC1G
|
||||
AhsMAAoJED6Dur9KT56FVssBAO1lL/S2cU65XFHgbjc6crwljDrD7PYbxBA7hDpi
|
||||
pC4ZAP98rK1hGQ5wxpeiJ0heZ8zhpdUwEeymIDBaIcwgrJRFBQ==
|
||||
=HcEB
|
||||
-----END PGP PUBLIC KEY BLOCK-----
|
||||
-----BEGIN PGP PUBLIC KEY BLOCK-----
|
||||
|
||||
mDMEaMg2hBYJKwYBBAHaRw8BAQdA2J814YnhPQSdsyjwx8VxZ7AitqCnns9lzvkx
|
||||
HX9lWMW0JERvdWdsYXMgWXVuZyA8ZG91Z2xhcy55dW5nQHNvbnkuY29tPoiTBBMW
|
||||
CgA7FiEE/7M2iYDz5rtXNxRaMWxW0GTKy6UFAmjINoQCGwMFCwkIBwICIgIGFQoJ
|
||||
CAsCBBYCAwECHgcCF4AACgkQMWxW0GTKy6XjYgEApJ7p+o7EAeaaOdO2f440KDfg
|
||||
t7haaBLaxr5fiaSKjkYA+gLDxWOh39Y84upf23qMmpSTZ3SK5LvJtBTVtV7AEX0B
|
||||
uDgEaMg2hBIKKwYBBAGXVQEFAQEHQL3CL6jHZAakhtLLj2Ks34u7ItY/7USl/bYk
|
||||
f/+mZTJMAwEIB4h4BBgWCgAgFiEE/7M2iYDz5rtXNxRaMWxW0GTKy6UFAmjINoQC
|
||||
GwwACgkQMWxW0GTKy6VTOgEArDn9bg58W7bfZfVfneJJbIeICEf3NN9IovbRbAOB
|
||||
ax0A/RxtrG4qowLlo907vb25ITOa1hBoheSV2wNoDaDUhFEF
|
||||
=JQ8u
|
||||
-----END PGP PUBLIC KEY BLOCK-----
|
||||
|
||||
3187
SPECS/llvm.spec
3187
SPECS/llvm.spec
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user