LLVM 20.1.0 Release
Import from Fedora rawhide. Resolves: RHEL-81006
This commit is contained in:
parent
ead726baaf
commit
ce9909db5c
7
.gitignore
vendored
7
.gitignore
vendored
@ -1,7 +1,14 @@
|
||||
/*.src.rpm
|
||||
/*.src.tar.xz
|
||||
/*.src.tar.xz.sig
|
||||
/*.tar.gz
|
||||
/cmake/
|
||||
/llvm-*.src/
|
||||
/results_llvm/
|
||||
/third-party/
|
||||
/llvm-git-revision-*.txt
|
||||
/llvm-release-*.txt
|
||||
/BUILD
|
||||
/BUILDROOT
|
||||
/out
|
||||
/version.spec.inc
|
||||
|
@ -1,29 +0,0 @@
|
||||
From b1c60d7fa322a2d208556087df9e7ef94bfbffb8 Mon Sep 17 00:00:00 2001
|
||||
From: Nikita Popov <npopov@redhat.com>
|
||||
Date: Wed, 8 May 2024 12:30:36 +0900
|
||||
Subject: [PATCH] Always build shared libs for LLD
|
||||
|
||||
We don't want to enable BUILD_SHARED_LIBS for the whole build,
|
||||
but we do want to build lld libraries.
|
||||
---
|
||||
lld/cmake/modules/AddLLD.cmake | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake
|
||||
index 2ee066b41535..270c03f096ac 100644
|
||||
--- a/lld/cmake/modules/AddLLD.cmake
|
||||
+++ b/lld/cmake/modules/AddLLD.cmake
|
||||
@@ -7,9 +7,8 @@ macro(add_lld_library name)
|
||||
""
|
||||
""
|
||||
${ARGN})
|
||||
- if(ARG_SHARED)
|
||||
- set(ARG_ENABLE_SHARED SHARED)
|
||||
- endif()
|
||||
+ # Always build shared libs for LLD.
|
||||
+ set(ARG_ENABLE_SHARED SHARED)
|
||||
llvm_add_library(${name} ${ARG_ENABLE_SHARED} ${ARG_UNPARSED_ARGUMENTS})
|
||||
set_target_properties(${name} PROPERTIES FOLDER "lld libraries")
|
||||
|
||||
--
|
||||
2.44.0
|
@ -1,187 +0,0 @@
|
||||
From ee0f56cff40b324bb06e034e247ec85ae9a846bf Mon Sep 17 00:00:00 2001
|
||||
From: Nikita Popov <npopov@redhat.com>
|
||||
Date: Wed, 8 Jan 2025 08:28:18 +0100
|
||||
Subject: [PATCH] [PATCH] [Bolt][CMake] Don't export bolt libraries in
|
||||
LLVMExports.cmake
|
||||
|
||||
Bolt makes use of add_llvm_library and as such ends up exporting
|
||||
its libraries from LLVMExports.cmake, which is not correct.
|
||||
|
||||
Bolt doesn't have its own exports file, and I assume that there
|
||||
is no desire to have one either -- Bolt libraries are not intended
|
||||
to be consumed as a cmake module, right?
|
||||
|
||||
As such, this PR adds a NO_EXPORT option to simplify exclude these
|
||||
libraries from the exports file.
|
||||
|
||||
---
|
||||
This patch originates from this PR:
|
||||
|
||||
https://patch-diff.githubusercontent.com/raw/llvm/llvm-project/pull/121936.
|
||||
|
||||
The commit was:
|
||||
|
||||
https://github.com/nikic/llvm-project/commit/4333a4dd270b5c046c5469b97846e3dae58fb221.patch
|
||||
|
||||
And then it was rebased onto llvmorg-19.1.6.
|
||||
---
|
||||
bolt/lib/Core/CMakeLists.txt | 1 +
|
||||
bolt/lib/Passes/CMakeLists.txt | 1 +
|
||||
bolt/lib/Profile/CMakeLists.txt | 1 +
|
||||
bolt/lib/Rewrite/CMakeLists.txt | 1 +
|
||||
bolt/lib/RuntimeLibs/CMakeLists.txt | 1 +
|
||||
bolt/lib/Target/AArch64/CMakeLists.txt | 1 +
|
||||
bolt/lib/Target/RISCV/CMakeLists.txt | 1 +
|
||||
bolt/lib/Target/X86/CMakeLists.txt | 1 +
|
||||
bolt/lib/Utils/CMakeLists.txt | 1 +
|
||||
llvm/cmake/modules/AddLLVM.cmake | 12 +++++++++---
|
||||
10 files changed, 18 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt
|
||||
index bb58667066fd..8c1f5d0bb37b 100644
|
||||
--- a/bolt/lib/Core/CMakeLists.txt
|
||||
+++ b/bolt/lib/Core/CMakeLists.txt
|
||||
@@ -35,6 +35,7 @@ add_llvm_library(LLVMBOLTCore
|
||||
ParallelUtilities.cpp
|
||||
Relocation.cpp
|
||||
|
||||
+ NO_EXPORT
|
||||
DISABLE_LLVM_LINK_LLVM_DYLIB
|
||||
LINK_LIBS
|
||||
${LLVM_PTHREAD_LIB}
|
||||
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
|
||||
index 407d8b03f739..7367b541545d 100644
|
||||
--- a/bolt/lib/Passes/CMakeLists.txt
|
||||
+++ b/bolt/lib/Passes/CMakeLists.txt
|
||||
@@ -45,6 +45,7 @@ add_llvm_library(LLVMBOLTPasses
|
||||
VeneerElimination.cpp
|
||||
RetpolineInsertion.cpp
|
||||
|
||||
+ NO_EXPORT
|
||||
DISABLE_LLVM_LINK_LLVM_DYLIB
|
||||
|
||||
LINK_LIBS
|
||||
diff --git a/bolt/lib/Profile/CMakeLists.txt b/bolt/lib/Profile/CMakeLists.txt
|
||||
index 9aa4ba0490b0..a2bb4aa074c7 100644
|
||||
--- a/bolt/lib/Profile/CMakeLists.txt
|
||||
+++ b/bolt/lib/Profile/CMakeLists.txt
|
||||
@@ -7,6 +7,7 @@ add_llvm_library(LLVMBOLTProfile
|
||||
YAMLProfileReader.cpp
|
||||
YAMLProfileWriter.cpp
|
||||
|
||||
+ NO_EXPORT
|
||||
DISABLE_LLVM_LINK_LLVM_DYLIB
|
||||
|
||||
LINK_COMPONENTS
|
||||
diff --git a/bolt/lib/Rewrite/CMakeLists.txt b/bolt/lib/Rewrite/CMakeLists.txt
|
||||
index 34993af2623b..6737e89b8451 100644
|
||||
--- a/bolt/lib/Rewrite/CMakeLists.txt
|
||||
+++ b/bolt/lib/Rewrite/CMakeLists.txt
|
||||
@@ -26,6 +26,7 @@ add_llvm_library(LLVMBOLTRewrite
|
||||
RewriteInstance.cpp
|
||||
SDTRewriter.cpp
|
||||
|
||||
+ NO_EXPORT
|
||||
DISABLE_LLVM_LINK_LLVM_DYLIB
|
||||
|
||||
LINK_LIBS
|
||||
diff --git a/bolt/lib/RuntimeLibs/CMakeLists.txt b/bolt/lib/RuntimeLibs/CMakeLists.txt
|
||||
index d3ac71d3e797..b8db7e4a1553 100644
|
||||
--- a/bolt/lib/RuntimeLibs/CMakeLists.txt
|
||||
+++ b/bolt/lib/RuntimeLibs/CMakeLists.txt
|
||||
@@ -11,6 +11,7 @@ add_llvm_library(LLVMBOLTRuntimeLibs
|
||||
HugifyRuntimeLibrary.cpp
|
||||
InstrumentationRuntimeLibrary.cpp
|
||||
|
||||
+ NO_EXPORT
|
||||
DISABLE_LLVM_LINK_LLVM_DYLIB
|
||||
)
|
||||
|
||||
diff --git a/bolt/lib/Target/AArch64/CMakeLists.txt b/bolt/lib/Target/AArch64/CMakeLists.txt
|
||||
index be03e247aa96..526a9645cb54 100644
|
||||
--- a/bolt/lib/Target/AArch64/CMakeLists.txt
|
||||
+++ b/bolt/lib/Target/AArch64/CMakeLists.txt
|
||||
@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
|
||||
add_llvm_library(LLVMBOLTTargetAArch64
|
||||
AArch64MCPlusBuilder.cpp
|
||||
|
||||
+ NO_EXPORT
|
||||
DISABLE_LLVM_LINK_LLVM_DYLIB
|
||||
|
||||
DEPENDS
|
||||
diff --git a/bolt/lib/Target/RISCV/CMakeLists.txt b/bolt/lib/Target/RISCV/CMakeLists.txt
|
||||
index 7f9557606320..3955cfc0f089 100644
|
||||
--- a/bolt/lib/Target/RISCV/CMakeLists.txt
|
||||
+++ b/bolt/lib/Target/RISCV/CMakeLists.txt
|
||||
@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
|
||||
add_llvm_library(LLVMBOLTTargetRISCV
|
||||
RISCVMCPlusBuilder.cpp
|
||||
|
||||
+ NO_EXPORT
|
||||
DISABLE_LLVM_LINK_LLVM_DYLIB
|
||||
|
||||
DEPENDS
|
||||
diff --git a/bolt/lib/Target/X86/CMakeLists.txt b/bolt/lib/Target/X86/CMakeLists.txt
|
||||
index 2b769bc7e7f5..00100e9b84c9 100644
|
||||
--- a/bolt/lib/Target/X86/CMakeLists.txt
|
||||
+++ b/bolt/lib/Target/X86/CMakeLists.txt
|
||||
@@ -9,6 +9,7 @@ add_llvm_library(LLVMBOLTTargetX86
|
||||
X86MCPlusBuilder.cpp
|
||||
X86MCSymbolizer.cpp
|
||||
|
||||
+ NO_EXPORT
|
||||
DISABLE_LLVM_LINK_LLVM_DYLIB
|
||||
|
||||
DEPENDS
|
||||
diff --git a/bolt/lib/Utils/CMakeLists.txt b/bolt/lib/Utils/CMakeLists.txt
|
||||
index d1403314274b..ceddcfc8f57a 100644
|
||||
--- a/bolt/lib/Utils/CMakeLists.txt
|
||||
+++ b/bolt/lib/Utils/CMakeLists.txt
|
||||
@@ -2,6 +2,7 @@ add_llvm_library(LLVMBOLTUtils
|
||||
CommandLineOpts.cpp
|
||||
Utils.cpp
|
||||
|
||||
+ NO_EXPORT
|
||||
DISABLE_LLVM_LINK_LLVM_DYLIB
|
||||
|
||||
LINK_LIBS
|
||||
diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
|
||||
index 03f4e1f190fd..addf083b7c71 100644
|
||||
--- a/llvm/cmake/modules/AddLLVM.cmake
|
||||
+++ b/llvm/cmake/modules/AddLLVM.cmake
|
||||
@@ -897,7 +897,7 @@ endfunction()
|
||||
|
||||
macro(add_llvm_library name)
|
||||
cmake_parse_arguments(ARG
|
||||
- "SHARED;BUILDTREE_ONLY;MODULE;INSTALL_WITH_TOOLCHAIN"
|
||||
+ "SHARED;BUILDTREE_ONLY;MODULE;INSTALL_WITH_TOOLCHAIN;NO_EXPORT"
|
||||
""
|
||||
""
|
||||
${ARGN})
|
||||
@@ -932,7 +932,11 @@ macro(add_llvm_library name)
|
||||
set(umbrella)
|
||||
endif()
|
||||
|
||||
- get_target_export_arg(${name} LLVM export_to_llvmexports ${umbrella})
|
||||
+ if(ARG_NO_EXPORT)
|
||||
+ set(export_to_llvmexports)
|
||||
+ else()
|
||||
+ get_target_export_arg(${name} LLVM export_to_llvmexports ${umbrella})
|
||||
+ endif()
|
||||
install(TARGETS ${name}
|
||||
${export_to_llvmexports}
|
||||
LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT ${name}
|
||||
@@ -945,7 +949,9 @@ macro(add_llvm_library name)
|
||||
COMPONENT ${name})
|
||||
endif()
|
||||
endif()
|
||||
- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name})
|
||||
+ if(NOT ARG_NO_EXPORT)
|
||||
+ set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name})
|
||||
+ endif()
|
||||
endif()
|
||||
|
||||
get_subproject_title(subproject_title)
|
||||
--
|
||||
2.47.1
|
||||
|
59
0001-20-polly-shared-libs.patch
Normal file
59
0001-20-polly-shared-libs.patch
Normal file
@ -0,0 +1,59 @@
|
||||
From cecb98f56e7d6619d0427fbdbc2f200ce212f0c6 Mon Sep 17 00:00:00 2001
|
||||
From: Konrad Kleine <kkleine@redhat.com>
|
||||
Date: Tue, 28 Jan 2025 08:34:09 +0000
|
||||
Subject: [PATCH] [polly] shared libs
|
||||
|
||||
---
|
||||
polly/cmake/polly_macros.cmake | 5 ++++-
|
||||
polly/lib/CMakeLists.txt | 1 +
|
||||
polly/lib/External/CMakeLists.txt | 1 +
|
||||
3 files changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/polly/cmake/polly_macros.cmake b/polly/cmake/polly_macros.cmake
|
||||
index 9bd7b0b0ea59..fc2c3a76901f 100644
|
||||
--- a/polly/cmake/polly_macros.cmake
|
||||
+++ b/polly/cmake/polly_macros.cmake
|
||||
@@ -1,5 +1,5 @@
|
||||
macro(add_polly_library name)
|
||||
- cmake_parse_arguments(ARG "" "" "" ${ARGN})
|
||||
+ cmake_parse_arguments(ARG "SHARED" "" "" ${ARGN})
|
||||
set(srcs ${ARG_UNPARSED_ARGUMENTS})
|
||||
if(MSVC_IDE OR XCODE)
|
||||
file( GLOB_RECURSE headers *.h *.td *.def)
|
||||
@@ -17,6 +17,9 @@ macro(add_polly_library name)
|
||||
else()
|
||||
set(libkind)
|
||||
endif()
|
||||
+ if (ARG_SHARED)
|
||||
+ set(libkind SHARED)
|
||||
+ endif()
|
||||
add_library( ${name} ${libkind} ${srcs} )
|
||||
set_target_properties(${name} PROPERTIES FOLDER "Polly/Libraries")
|
||||
|
||||
diff --git a/polly/lib/CMakeLists.txt b/polly/lib/CMakeLists.txt
|
||||
index d91f4ecd37e6..965f635b7ff6 100644
|
||||
--- a/polly/lib/CMakeLists.txt
|
||||
+++ b/polly/lib/CMakeLists.txt
|
||||
@@ -41,6 +41,7 @@ set(POLLY_COMPONENTS
|
||||
# the sources them to be recompiled for each of them.
|
||||
add_llvm_pass_plugin(Polly
|
||||
NO_MODULE
|
||||
+ SHARED
|
||||
SUBPROJECT Polly
|
||||
Analysis/DependenceInfo.cpp
|
||||
Analysis/PolyhedralInfo.cpp
|
||||
diff --git a/polly/lib/External/CMakeLists.txt b/polly/lib/External/CMakeLists.txt
|
||||
index 5dd69b7199dc..f065fbd7b942 100644
|
||||
--- a/polly/lib/External/CMakeLists.txt
|
||||
+++ b/polly/lib/External/CMakeLists.txt
|
||||
@@ -284,6 +284,7 @@ if (POLLY_BUNDLED_ISL)
|
||||
)
|
||||
|
||||
add_polly_library(PollyISL
|
||||
+ SHARED
|
||||
${ISL_FILES}
|
||||
)
|
||||
|
||||
--
|
||||
2.46.0
|
||||
|
@ -1,129 +0,0 @@
|
||||
From 17ff6161b83e6a5e86fcb6a13c5551bba1438405 Mon Sep 17 00:00:00 2001
|
||||
From: Erick Ochoa <github@ceci-nest-pas.me>
|
||||
Date: Thu, 12 Sep 2024 21:16:58 -0400
|
||||
Subject: [PATCH 1/9] [CMake] Add missing dependency (#108461)
|
||||
|
||||
The [`mlir-capi-execution-engine-test` test
|
||||
executable](https://github.com/llvm/llvm-project/blob/main/mlir/test/CAPI/CMakeLists.txt#L26-L34)
|
||||
|
||||
```cmake
|
||||
if(MLIR_ENABLE_EXECUTION_ENGINE)
|
||||
_add_capi_test_executable(mlir-capi-execution-engine-test
|
||||
execution_engine.c
|
||||
LINK_LIBS PRIVATE
|
||||
MLIRCAPIConversion
|
||||
MLIRCAPIExecutionEngine
|
||||
MLIRCAPIRegisterEverything
|
||||
)
|
||||
endif()
|
||||
```
|
||||
|
||||
|
||||
is run by lit tests, but it is not properly listed as a dependency. It
|
||||
is added in places conditionally across the file
|
||||
[`tests/CMakeLists.txt`](https://github.com/llvm/llvm-project/blob/main/mlir/test/CMakeLists.txt#L130-L143)
|
||||
|
||||
```cmake
|
||||
# The native target may not be enabled, in this case we won't
|
||||
# run tests that involves executing on the host: do not build
|
||||
# useless binaries.
|
||||
if(LLVM_ENABLE_PIC AND TARGET ${LLVM_NATIVE_ARCH})
|
||||
list(APPEND MLIR_TEST_DEPENDS
|
||||
mlir-cpu-runner
|
||||
llc
|
||||
mlir_async_runtime
|
||||
mlir-capi-execution-engine-test
|
||||
mlir_c_runner_utils
|
||||
mlir_runner_utils
|
||||
mlir_float16_utils
|
||||
)
|
||||
endif()
|
||||
```
|
||||
|
||||
But this condition is not the same as the one where the test executable
|
||||
is added. [It has been reported on discord that the following error
|
||||
occurred:](https://discord.com/channels/636084430946959380/642426447167881246/1283811636725022730)
|
||||
|
||||
```
|
||||
FAIL: MLIR :: CAPI/execution_engine.c (2 of 2121)
|
||||
******************** TEST 'MLIR :: CAPI/execution_engine.c' FAILED ********************
|
||||
Exit Code: 127
|
||||
Command Output (stdout):
|
||||
--
|
||||
# RUN: at line 10
|
||||
/usr/bin/mlir-capi-execution-engine-test 2>&1 | /usr/bin/FileCheck /builddir/build/BUILD/mlir-19.1.0_rc4-build/mlir-19.1.0-rc4.src/test/CAPI/execution_engine.c
|
||||
# executed command: /usr/bin/mlir-capi-execution-engine-test
|
||||
# .---command stderr------------
|
||||
# | '/usr/bin/mlir-capi-execution-engine-test': command not found
|
||||
# `-----------------------------
|
||||
```
|
||||
|
||||
This error will not be deterministic and is dependent on the order in
|
||||
which tools are built. If by any chance,
|
||||
`mlir-capi-execution-engine-test` is built before the lit tests run,
|
||||
then nothing will happen. But lit tests can be run before
|
||||
`mlir-capi-execution-engine-test` is built.
|
||||
|
||||
This patch adds the `mlir-capi-execution-engine` to the
|
||||
`MLIR_TEST_DEPENDS` list when the `MLIR_ENABLE_EXECUTION_ENGINE` flag is
|
||||
present.
|
||||
|
||||
Happy to make changes like:
|
||||
* removing `mlir-capi-execution-engine-test` from the other place where
|
||||
it is included in the tests
|
||||
* and merge and sort alphabetically these two commands
|
||||
|
||||
```cmake
|
||||
set(MLIR_TEST_DEPENDS
|
||||
FileCheck count not split-file
|
||||
mlir-capi-ir-test
|
||||
mlir-capi-irdl-test
|
||||
mlir-capi-llvm-test
|
||||
mlir-capi-pass-test
|
||||
mlir-capi-quant-test
|
||||
mlir-capi-rewrite-test
|
||||
mlir-capi-sparse-tensor-test
|
||||
mlir-capi-transform-test
|
||||
mlir-capi-transform-interpreter-test
|
||||
mlir-capi-translation-test
|
||||
mlir-linalg-ods-yaml-gen
|
||||
mlir-lsp-server
|
||||
mlir-opt
|
||||
mlir-query
|
||||
mlir-reduce
|
||||
mlir-tblgen
|
||||
mlir-translate
|
||||
tblgen-lsp-server
|
||||
tblgen-to-irdl
|
||||
)
|
||||
|
||||
set(MLIR_TEST_DEPENDS ${MLIR_TEST_DEPENDS}
|
||||
mlir-capi-pdl-test
|
||||
mlir-pdll-lsp-server
|
||||
mlir-pdll
|
||||
)
|
||||
```
|
||||
|
||||
Co-authored-by: Erick Ochoa <erick@ceci-nest-pas.me>
|
||||
---
|
||||
mlir/test/CMakeLists.txt | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
|
||||
index df95e5db11f1..4d2d738b734e 100644
|
||||
--- a/mlir/test/CMakeLists.txt
|
||||
+++ b/mlir/test/CMakeLists.txt
|
||||
@@ -150,6 +150,10 @@ if(MLIR_ENABLE_CUDA_RUNNER)
|
||||
list(APPEND MLIR_TEST_DEPENDS mlir_cuda_runtime)
|
||||
endif()
|
||||
|
||||
+if(MLIR_ENABLE_EXECUTION_ENGINE)
|
||||
+ list(APPEND MLIR_TEST_DEPENDS mlir-capi-execution-engine-test)
|
||||
+endif()
|
||||
+
|
||||
if(MLIR_ENABLE_ROCM_RUNNER)
|
||||
list(APPEND MLIR_TEST_DEPENDS mlir_rocm_runtime)
|
||||
endif()
|
||||
--
|
||||
2.46.0
|
||||
|
@ -1,38 +0,0 @@
|
||||
From b443e55162861125a50048ae9bc521e98058b273 Mon Sep 17 00:00:00 2001
|
||||
From: Nikita Popov <npopov@redhat.com>
|
||||
Date: Mon, 16 Dec 2024 14:44:43 +0100
|
||||
Subject: [PATCH] [CMake] Use correct exports file for MLIR tools
|
||||
|
||||
llvm_add_tool() currently does not respect the passed project and
|
||||
puts all tools into LLVMExports.cmake. This means that we end up
|
||||
with binaries like mlir-opt in LLVMExports.cmake instead of
|
||||
MLIRTargets.cmake, where they should be.
|
||||
|
||||
Adjust llvm_add_tool() to take the project into account.
|
||||
---
|
||||
llvm/cmake/modules/AddLLVM.cmake | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
|
||||
index 006dfb6de3a199..6cf0ee1a54dbdb 100644
|
||||
--- a/llvm/cmake/modules/AddLLVM.cmake
|
||||
+++ b/llvm/cmake/modules/AddLLVM.cmake
|
||||
@@ -1483,7 +1483,7 @@ macro(llvm_add_tool project name)
|
||||
|
||||
if ( ${name} IN_LIST LLVM_TOOLCHAIN_TOOLS OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
|
||||
if( LLVM_BUILD_TOOLS )
|
||||
- get_target_export_arg(${name} LLVM export_to_llvmexports)
|
||||
+ get_target_export_arg(${name} ${project} export_to_llvmexports)
|
||||
install(TARGETS ${name}
|
||||
${export_to_llvmexports}
|
||||
RUNTIME DESTINATION ${${project}_TOOLS_INSTALL_DIR}
|
||||
@@ -1497,7 +1497,8 @@ macro(llvm_add_tool project name)
|
||||
endif()
|
||||
endif()
|
||||
if( LLVM_BUILD_TOOLS )
|
||||
- set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS ${name})
|
||||
+ string(TOUPPER "${project}" project_upper)
|
||||
+ set_property(GLOBAL APPEND PROPERTY ${project_upper}_EXPORTS ${name})
|
||||
endif()
|
||||
endif()
|
||||
get_subproject_title(subproject_title)
|
81
0001-SystemZ-Fix-ICE-with-i128-i64-uaddo-carry-chain.patch
Normal file
81
0001-SystemZ-Fix-ICE-with-i128-i64-uaddo-carry-chain.patch
Normal file
@ -0,0 +1,81 @@
|
||||
From 6d5697f7cb4e933d2f176c46b7ac05a9cbaeb8b6 Mon Sep 17 00:00:00 2001
|
||||
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
|
||||
Date: Thu, 23 Jan 2025 19:11:18 +0100
|
||||
Subject: [PATCH] [SystemZ] Fix ICE with i128->i64 uaddo carry chain
|
||||
|
||||
We can only optimize a uaddo_carry via specialized instruction
|
||||
if the carry was produced by another uaddo(_carry) instruction;
|
||||
there is already a check for that.
|
||||
|
||||
However, i128 uaddo(_carry) use a completely different mechanism;
|
||||
they indicate carry in a vector register instead of the CC flag.
|
||||
Thus, we must also check that we don't mix those two - that check
|
||||
has been missing.
|
||||
|
||||
Fixes: https://github.com/llvm/llvm-project/issues/124001
|
||||
---
|
||||
.../Target/SystemZ/SystemZISelLowering.cpp | 12 ++++++----
|
||||
llvm/test/CodeGen/SystemZ/pr124001.ll | 23 +++++++++++++++++++
|
||||
2 files changed, 31 insertions(+), 4 deletions(-)
|
||||
create mode 100644 llvm/test/CodeGen/SystemZ/pr124001.ll
|
||||
|
||||
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
|
||||
index 4040ab6d4510..1fb31c26e20d 100644
|
||||
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
|
||||
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
|
||||
@@ -4708,15 +4708,19 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
|
||||
}
|
||||
|
||||
static bool isAddCarryChain(SDValue Carry) {
|
||||
- while (Carry.getOpcode() == ISD::UADDO_CARRY)
|
||||
+ while (Carry.getOpcode() == ISD::UADDO_CARRY &&
|
||||
+ Carry->getValueType(0) != MVT::i128)
|
||||
Carry = Carry.getOperand(2);
|
||||
- return Carry.getOpcode() == ISD::UADDO;
|
||||
+ return Carry.getOpcode() == ISD::UADDO &&
|
||||
+ Carry->getValueType(0) != MVT::i128;
|
||||
}
|
||||
|
||||
static bool isSubBorrowChain(SDValue Carry) {
|
||||
- while (Carry.getOpcode() == ISD::USUBO_CARRY)
|
||||
+ while (Carry.getOpcode() == ISD::USUBO_CARRY &&
|
||||
+ Carry->getValueType(0) != MVT::i128)
|
||||
Carry = Carry.getOperand(2);
|
||||
- return Carry.getOpcode() == ISD::USUBO;
|
||||
+ return Carry.getOpcode() == ISD::USUBO &&
|
||||
+ Carry->getValueType(0) != MVT::i128;
|
||||
}
|
||||
|
||||
// Lower UADDO_CARRY/USUBO_CARRY nodes.
|
||||
diff --git a/llvm/test/CodeGen/SystemZ/pr124001.ll b/llvm/test/CodeGen/SystemZ/pr124001.ll
|
||||
new file mode 100644
|
||||
index 000000000000..9cf630a55dd6
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/SystemZ/pr124001.ll
|
||||
@@ -0,0 +1,23 @@
|
||||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
|
||||
+
|
||||
+define i64 @test(i128 %in) {
|
||||
+; CHECK-LABEL: test:
|
||||
+; CHECK: # %bb.0:
|
||||
+; CHECK-NEXT: larl %r1, .LCPI0_0
|
||||
+; CHECK-NEXT: vl %v0, 0(%r2), 3
|
||||
+; CHECK-NEXT: vl %v1, 0(%r1), 3
|
||||
+; CHECK-NEXT: vaccq %v0, %v0, %v1
|
||||
+; CHECK-NEXT: vlgvg %r1, %v0, 1
|
||||
+; CHECK-NEXT: la %r2, 1(%r1)
|
||||
+; CHECK-NEXT: br %r14
|
||||
+ %1 = tail call { i128, i1 } @llvm.uadd.with.overflow.i128(i128 %in, i128 1)
|
||||
+ %2 = extractvalue { i128, i1 } %1, 1
|
||||
+ %3 = zext i1 %2 to i64
|
||||
+ %4 = add i64 %3, 1
|
||||
+ ret i64 %4
|
||||
+}
|
||||
+
|
||||
+declare { i128, i1 } @llvm.uadd.with.overflow.i128(i128, i128) #0
|
||||
+
|
||||
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
--
|
||||
2.48.1
|
||||
|
39
0001-cmake-Resolve-symlink-when-finding-install-prefix.patch
Normal file
39
0001-cmake-Resolve-symlink-when-finding-install-prefix.patch
Normal file
@ -0,0 +1,39 @@
|
||||
From 06774eb8a7dc0bc36b59e53310c7f5b5d89f6c29 Mon Sep 17 00:00:00 2001
|
||||
From: Nikita Popov <npopov@redhat.com>
|
||||
Date: Tue, 28 Jan 2025 12:31:49 +0100
|
||||
Subject: [PATCH] [cmake] Resolve symlink when finding install prefix
|
||||
|
||||
When determining the install prefix in LLVMConfig.cmake etc resolve
|
||||
symlinks in CMAKE_CURRENT_LIST_FILE first. The motivation for this
|
||||
is to support symlinks like `/usr/lib64/cmake/llvm` to
|
||||
`/usr/lib64/llvm19/lib/cmake/llvm`. This only works correctly if
|
||||
the paths are relative to the resolved symlink.
|
||||
|
||||
It's worth noting that this *mostly* already works out of the box,
|
||||
because cmake automatically does the symlink resolution when the
|
||||
library is found via CMAKE_PREFIX_PATH. It just doesn't happen
|
||||
when it's found via the default prefix path.
|
||||
---
|
||||
cmake/Modules/FindPrefixFromConfig.cmake | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/cmake/Modules/FindPrefixFromConfig.cmake b/cmake/Modules/FindPrefixFromConfig.cmake
|
||||
index 22211e4b72f2..3daff607ff84 100644
|
||||
--- a/cmake/Modules/FindPrefixFromConfig.cmake
|
||||
+++ b/cmake/Modules/FindPrefixFromConfig.cmake
|
||||
@@ -39,10 +39,10 @@ function(find_prefix_from_config out_var prefix_var path_to_leave)
|
||||
# install prefix, and avoid hard-coding any absolute paths.
|
||||
set(config_code
|
||||
"# Compute the installation prefix from this LLVMConfig.cmake file location."
|
||||
- "get_filename_component(${prefix_var} \"\${CMAKE_CURRENT_LIST_FILE}\" PATH)")
|
||||
+ "get_filename_component(${prefix_var} \"\${CMAKE_CURRENT_LIST_FILE}\" REALPATH)")
|
||||
# Construct the proper number of get_filename_component(... PATH)
|
||||
# calls to compute the installation prefix.
|
||||
- string(REGEX REPLACE "/" ";" _count "${path_to_leave}")
|
||||
+ string(REGEX REPLACE "/" ";" _count "${path_to_leave}/plus_one")
|
||||
foreach(p ${_count})
|
||||
list(APPEND config_code
|
||||
"get_filename_component(${prefix_var} \"\${${prefix_var}}\" PATH)")
|
||||
--
|
||||
2.48.1
|
||||
|
@ -1,80 +0,0 @@
|
||||
From c8f93f1958de1f59222a89dd64a573f91105e135 Mon Sep 17 00:00:00 2001
|
||||
From: Konrad Kleine <kkleine@redhat.com>
|
||||
Date: Tue, 3 Dec 2024 20:53:29 +0100
|
||||
Subject: [PATCH] [mlir] Specify deps via `LLVM_LINK_COMPONENTS`
|
||||
|
||||
This specifies the dependencies to link against with
|
||||
`LLVM_LINK_COMPONENTS` for the
|
||||
`mlir/test/Target/LLVM/MLIRTargetLLVMTests` binary.
|
||||
|
||||
Before, the dependencies where directly added to the
|
||||
`target_link_libraries()` call which caused the problems I describe
|
||||
next:
|
||||
|
||||
When doing a build of LLVM with MLIR I want to link against
|
||||
`libLLVM.so` instead of statically linking `libLLVMSupport.a`.
|
||||
MLIR on the other side seems to statically link against
|
||||
`libLLVMSupport.a` because when I link to the shared library `libLLVM.so` I get:
|
||||
|
||||
```
|
||||
CommandLine Error: Option 'aarch64-ptrauth-auth-checks' registered more than once!
|
||||
```
|
||||
|
||||
This error indicates that the `Support` library is linked twice in the `MLIRTargetLLVMTest` binary.
|
||||
|
||||
Here's the creation of the `MLIRTargetLLVMTest` binary before (Notice the
|
||||
`libLLVMSupport.a`):
|
||||
|
||||
```
|
||||
[6535/6847] : && /usr/bin/clang++ -O2 -flto=thin -ffat-lto-objects -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-U_FORTIFY_SOURCE,-D_FORTIFY_SOURCE=3 -Wp,-D_GLIBCXX_ASSERTIONS --config=/usr/lib/rpm/redhat/redhat-hardened-clang.cfg -fstack-protector-strong -mbranch-protection=standard -fasynchronous-unwind-tables -D_DEFAULT_SOURCE -Dasm=__asm__ -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -Wundef -Werror=mismatched-tags -O2 -g -DNDEBUG -Wl,-z,relro -Wl,--as-needed -Wl,-z,pack-relative-relocs -Wl,-z,now --config=/usr/lib/rpm/redhat/redhat-hardened-clang-ld.cfg -flto=thin -ffat-lto-objects -Wl,--build-id=sha1 -Wl,--gc-sections -fno-lto tools/mlir/unittests/Target/LLVM/CMakeFiles/MLIRTargetLLVMTests.dir/SerializeNVVMTarget.cpp.o tools/mlir/unittests/Target/LLVM/CMakeFiles/MLIRTargetLLVMTests.dir/SerializeROCDLTarget.cpp.o tools/mlir/unittests/Target/LLVM/CMakeFiles/MLIRTargetLLVMTests.dir/SerializeToLLVMBitcode.cpp.o -o tools/mlir/unittests/Target/LLVM/MLIRTargetLLVMTests -Wl,-rpath,/builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/lib64 lib64/libllvm_gtest_main.a lib64/libllvm_gtest.a lib64/libMLIRTargetLLVM.a lib64/libMLIRNVVMTarget.a lib64/libMLIRROCDLTarget.a lib64/libMLIRGPUDialect.a lib64/libMLIRNVVMDialect.a lib64/libMLIRLLVMDialect.a lib64/libMLIRLLVMToLLVMIRTranslation.a lib64/libMLIRBuiltinToLLVMIRTranslation.a lib64/libMLIRNVVMToLLVMIRTranslation.a lib64/libMLIRROCDLToLLVMIRTranslation.a lib64/libMLIRGPUToLLVMIRTranslation.a lib64/libLLVMAArch64CodeGen.a lib64/libLLVMAArch64Desc.a lib64/libLLVMAArch64Info.a -lpthread lib64/libMLIRTargetLLVM.a lib64/libMLIRROCDLDialect.a lib64/libMLIRExecutionEngineUtils.a lib64/libMLIRGPUDialect.a lib64/libMLIRMemRefDialect.a lib64/libMLIRArithUtils.a lib64/libMLIRDialectUtils.a lib64/libMLIRComplexDialect.a lib64/libMLIRArithAttrToLLVMConversion.a lib64/libMLIRArithDialect.a lib64/libMLIRCastInterfaces.a lib64/libMLIRDialect.a lib64/libMLIRInferIntRangeCommon.a lib64/libMLIRUBDialect.a lib64/libMLIRShapedOpInterfaces.a lib64/libMLIRTargetLLVMIRExport.a lib64/libMLIRDLTIDialect.a lib64/libMLIRLLVMIRTransforms.a lib64/libMLIRNVVMDialect.a lib64/libMLIRLLVMDialect.a lib64/libMLIRFuncDialect.a lib64/libMLIRTransforms.a lib64/libMLIRMemorySlotInterfaces.a lib64/libMLIRCopyOpInterface.a lib64/libMLIRRuntimeVerifiableOpInterface.a lib64/libMLIRTranslateLib.a lib64/libMLIRParser.a lib64/libMLIRBytecodeReader.a lib64/libMLIRAsmParser.a lib64/libMLIRTransformUtils.a lib64/libMLIRSubsetOpInterface.a lib64/libMLIRValueBoundsOpInterface.a lib64/libMLIRDestinationStyleOpInterface.a lib64/libMLIRRewrite.a lib64/libMLIRRewritePDL.a lib64/libMLIRPDLToPDLInterp.a lib64/libMLIRPass.a lib64/libMLIRAnalysis.a lib64/libMLIRControlFlowInterfaces.a lib64/libMLIRInferIntRangeInterface.a lib64/libMLIRCallInterfaces.a lib64/libMLIRDataLayoutInterfaces.a lib64/libMLIRViewLikeInterface.a lib64/libMLIRLoopLikeInterface.a lib64/libMLIRPresburger.a lib64/libMLIRPDLInterpDialect.a lib64/libMLIRFunctionInterfaces.a lib64/libMLIRPDLDialect.a lib64/libMLIRSideEffectInterfaces.a lib64/libMLIRInferTypeOpInterface.a lib64/libMLIRIR.a lib64/libMLIRSupport.a lib64/libLLVM.so.19.1 lib64/libLLVMAArch64Utils.a lib64/libLLVMAsmPrinter.a lib64/libLLVMCFGuard.a lib64/libLLVMGlobalISel.a lib64/libLLVMSelectionDAG.a lib64/libLLVMCodeGen.a lib64/libLLVMScalarOpts.a lib64/libLLVMAggressiveInstCombine.a lib64/libLLVMInstCombine.a lib64/libLLVMBitWriter.a lib64/libLLVMObjCARCOpts.a lib64/libLLVMCodeGenTypes.a lib64/libLLVMTarget.a lib64/libLLVMVectorize.a lib64/libLLVMTransformUtils.a lib64/libLLVMAnalysis.a lib64/libLLVMProfileData.a lib64/libLLVMSymbolize.a lib64/libLLVMDebugInfoDWARF.a lib64/libLLVMDebugInfoPDB.a lib64/libLLVMObject.a lib64/libLLVMMCParser.a lib64/libLLVMMC.a lib64/libLLVMIRReader.a lib64/libLLVMBitReader.a lib64/libLLVMAsmParser.a lib64/libLLVMTextAPI.a lib64/libLLVMDebugInfoCodeView.a lib64/libLLVMDebugInfoMSF.a lib64/libLLVMDebugInfoBTF.a lib64/libLLVMCore.a lib64/libLLVMBinaryFormat.a lib64/libLLVMRemarks.a lib64/libLLVMBitstreamReader.a lib64/libLLVMTargetParser.a lib64/libLLVMSupport.a lib64/libLLVMDemangle.a -lrt -ldl -lm /usr/lib64/libz.so /usr/lib64/libzstd.so && :
|
||||
```
|
||||
|
||||
Here's the full error:
|
||||
|
||||
```
|
||||
[24/25] cd /builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/tools/mlir/test && /usr/bin/python3 /builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/./bin/llvm-lit -vv /builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/tools/mlir/test
|
||||
: CommandLine Error: Option 'aarch64-ptrauth-auth-checks' registered more than once!
|
||||
LLVM ERROR: inconsistency in registered CommandLine options
|
||||
llvm-lit: /builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/utils/lit/lit/formats/googletest.py:38: warning: unable to discover google-tests in '/builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/tools/mlir/unittests/Target/LLVM/./MLIRTargetLLVMTests': Command '['/builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/tools/mlir/unittests/Target/LLVM/./MLIRTargetLLVMTests', '--gtest_list_tests', '--gtest_filter=-*DISABLED_*']' died with <Signals.SIGABRT: 6>.. Process output: b''
|
||||
error: filter did not match any tests (of 2704 discovered). Use '--allow-empty-runs' to suppress this error.
|
||||
FAILED: tools/mlir/test/CMakeFiles/check-mlir /builddir/build/BUILD/llvm-19.1.3-build/llvm-project-19.1.3.src/llvm/redhat-linux-build/tools/mlir/test/CMakeFiles/check-mlir
|
||||
```
|
||||
|
||||
Here's the CMake invocation:
|
||||
|
||||
```
|
||||
/usr/bin/cmake -S . -B redhat-linux-build -DCMAKE_C_FLAGS_RELEASE:STRING=-DNDEBUG -DCMAKE_CXX_FLAGS_RELEASE:STRING=-DNDEBUG -DCMAKE_Fortran_FLAGS_RELEASE:STRING=-DNDEBUG -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_INSTALL_DO_STRIP:BOOL=OFF -DCMAKE_INSTALL_PREFIX:PATH=/usr -DINCLUDE_INSTALL_DIR:PATH=/usr/include -DLIB_INSTALL_DIR:PATH=/usr/lib64 -DSYSCONF_INSTALL_DIR:PATH=/etc -DSHARE_INSTALL_PREFIX:PATH=/usr/share -DLIB_SUFFIX=64 -DBUILD_SHARED_LIBS:BOOL=ON -G Ninja '' -DCLANG_BUILD_EXAMPLES:BOOL=OFF -DCLANG_CONFIG_FILE_SYSTEM_DIR=/etc/clang/ -DCLANG_DEFAULT_PIE_ON_LINUX=OFF -DCLANG_DEFAULT_UNWINDLIB=libgcc -DCLANG_ENABLE_ARCMT:BOOL=ON -DCLANG_ENABLE_STATIC_ANALYZER:BOOL=ON -DCLANG_INCLUDE_DOCS:BOOL=ON -DCLANG_INCLUDE_TESTS:BOOL=ON -DCLANG_LINK_CLANG_DYLIB=ON -DCLANG_PLUGIN_SUPPORT:BOOL=ON '-DCLANG_REPOSITORY_STRING=Fedora 19.1.3-5.fc42' -DLLVM_EXTERNAL_CLANG_TOOLS_EXTRA_SOURCE_DIR=../clang-tools-extra -DCLANG_RESOURCE_DIR=../lib/clang/19 -DCOMPILER_RT_INCLUDE_TESTS:BOOL=OFF -DCOMPILER_RT_INSTALL_PATH=/usr/lib/clang/19 -DLLVM_ENABLE_DOXYGEN:BOOL=OFF -DLLVM_ENABLE_SPHINX:BOOL=ON -DLLVM_BUILD_DOCS:BOOL=ON -DSPHINX_EXECUTABLE=/usr/bin/sphinx-build-3 -DSPHINX_OUTPUT_HTML:BOOL=OFF -DSPHINX_OUTPUT_MAN:BOOL=ON -DSPHINX_WARNINGS_AS_ERRORS=OFF -DLLDB_DISABLE_CURSES:BOOL=OFF -DLLDB_DISABLE_LIBEDIT:BOOL=OFF -DLLDB_DISABLE_PYTHON:BOOL=OFF -DLLDB_ENFORCE_STRICT_TEST_REQUIREMENTS:BOOL=ON -DLLVM_APPEND_VC_REV:BOOL=OFF -DLLVM_BUILD_EXAMPLES:BOOL=OFF -DLLVM_BUILD_EXTERNAL_COMPILER_RT:BOOL=ON -DLLVM_BUILD_LLVM_DYLIB:BOOL=ON -DLLVM_BUILD_RUNTIME:BOOL=ON -DLLVM_BUILD_TOOLS:BOOL=ON -DLLVM_BUILD_UTILS:BOOL=ON -DLLVM_COMMON_CMAKE_UTILS=/usr/share/llvm/cmake -DLLVM_DEFAULT_TARGET_TRIPLE=aarch64-redhat-linux-gnu -DLLVM_DYLIB_COMPONENTS=all -DLLVM_ENABLE_EH=ON -DLLVM_ENABLE_FFI:BOOL=ON -DLLVM_ENABLE_LIBCXX:BOOL=OFF -DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON '-DLLVM_ENABLE_PROJECTS=clang;clang-tools-extra;lld;lldb;mlir' -DLLVM_ENABLE_RTTI:BOOL=ON '-DLLVM_ENABLE_RUNTIMES=compiler-rt;openmp;offload' -DLLVM_ENABLE_ZLIB:BOOL=ON -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=AVR -DLLVM_INCLUDE_BENCHMARKS=OFF -DLLVM_INCLUDE_EXAMPLES:BOOL=ON -DLLVM_INCLUDE_TOOLS:BOOL=ON -DLLVM_INCLUDE_UTILS:BOOL=ON -DLLVM_INSTALL_TOOLCHAIN_ONLY:BOOL=OFF -DLLVM_INSTALL_UTILS:BOOL=ON -DLLVM_LINK_LLVM_DYLIB:BOOL=ON -DLLVM_PARALLEL_LINK_JOBS=1 -DLLVM_TARGETS_TO_BUILD=all -DLLVM_TOOLS_INSTALL_DIR:PATH=bin -DLLVM_UNREACHABLE_OPTIMIZE:BOOL=OFF -DLLVM_USE_PERF:BOOL=ON -DLLVM_UTILS_INSTALL_DIR:PATH=bin -DMLIR_INCLUDE_DOCS:BOOL=ON -DMLIR_INCLUDE_TESTS:BOOL=ON -DMLIR_INCLUDE_INTEGRATION_TESTS:BOOL=OFF -DMLIR_INSTALL_AGGREGATE_OBJECTS=OFF -DMLIR_BUILD_MLIR_C_DYLIB=ON -DMLIR_ENABLE_BINDINGS_PYTHON:BOOL=ON -DOPENMP_INSTALL_LIBDIR=lib64 -DLIBOMP_INSTALL_ALIASES=OFF -DLLVM_BUILD_TESTS:BOOL=ON -DLLVM_INCLUDE_TESTS:BOOL=ON -DLLVM_INSTALL_GTEST:BOOL=ON -DLLVM_LIT_ARGS=-vv -DLLVM_UNITTEST_LINK_FLAGS=-fno-lto -DBUILD_SHARED_LIBS:BOOL=OFF -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=/usr -DENABLE_LINKER_BUILD_ID:BOOL=ON -DOFFLOAD_INSTALL_LIBDIR=lib64 -DPython3_EXECUTABLE=/usr/bin/python3 -DCMAKE_SKIP_INSTALL_RPATH:BOOL=ON -DPPC_LINUX_DEFAULT_IEEELONGDOUBLE=ON -DLLVM_LIBDIR_SUFFIX=64 -DLLVM_BINUTILS_INCDIR=/usr/include -DLLVM_VERSION_SUFFIX=
|
||||
```
|
||||
---
|
||||
mlir/unittests/Target/LLVM/CMakeLists.txt | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/mlir/unittests/Target/LLVM/CMakeLists.txt b/mlir/unittests/Target/LLVM/CMakeLists.txt
|
||||
index 6d612548a94c..0c61d222dedf 100644
|
||||
--- a/mlir/unittests/Target/LLVM/CMakeLists.txt
|
||||
+++ b/mlir/unittests/Target/LLVM/CMakeLists.txt
|
||||
@@ -1,11 +1,11 @@
|
||||
+set(LLVM_LINK_COMPONENTS nativecodegen)
|
||||
+
|
||||
add_mlir_unittest(MLIRTargetLLVMTests
|
||||
SerializeNVVMTarget.cpp
|
||||
SerializeROCDLTarget.cpp
|
||||
SerializeToLLVMBitcode.cpp
|
||||
)
|
||||
|
||||
-llvm_map_components_to_libnames(llvm_libs nativecodegen)
|
||||
-
|
||||
target_link_libraries(MLIRTargetLLVMTests
|
||||
PRIVATE
|
||||
MLIRTargetLLVM
|
||||
@@ -19,7 +19,6 @@ target_link_libraries(MLIRTargetLLVMTests
|
||||
MLIRNVVMToLLVMIRTranslation
|
||||
MLIRROCDLToLLVMIRTranslation
|
||||
MLIRGPUToLLVMIRTranslation
|
||||
- ${llvm_libs}
|
||||
)
|
||||
|
||||
if (DEFINED LLVM_NATIVE_TARGET)
|
||||
--
|
||||
2.46.0
|
||||
|
@ -1,77 +0,0 @@
|
||||
From 1d043550c7601ca776b0318b9b913e5ecf325baf Mon Sep 17 00:00:00 2001
|
||||
From: Tulio Magno Quites Machado Filho <tuliom@redhat.com>
|
||||
Date: Tue, 5 Sep 2023 10:35:37 -0300
|
||||
Subject: [PATCH] Reuse the library directory
|
||||
|
||||
Prefer to get the path to the shared libraries from config.llvm_shlib_dir.
|
||||
Fallback to the previous path only if config.llvm_shlib_dir is not
|
||||
defined.
|
||||
|
||||
This ensures the test will pass regardless of the build configuration
|
||||
used downstream.
|
||||
---
|
||||
mlir/test/lit.cfg.py | 1 +
|
||||
mlir/test/python/execution_engine.py | 12 +++++++-----
|
||||
2 files changed, 8 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
|
||||
--- a/mlir/test/lit.cfg.py 2024-08-05 10:40:33.000000000 +0200
|
||||
+++ b/mlir/test/lit.cfg.py 2024-08-14 19:17:55.160470316 +0200
|
||||
@@ -164,6 +164,7 @@
|
||||
ToolSubst("transform-opt-ch2", unresolved="ignore"),
|
||||
ToolSubst("transform-opt-ch3", unresolved="ignore"),
|
||||
ToolSubst("transform-opt-ch4", unresolved="ignore"),
|
||||
+ ToolSubst("%llvm_shlib_dir", config.llvm_shlib_dir, unresolved="ignore"),
|
||||
ToolSubst("mlir-transform-opt", unresolved="ignore"),
|
||||
ToolSubst("%mlir_lib_dir", config.mlir_lib_dir, unresolved="ignore"),
|
||||
ToolSubst("%mlir_src_dir", config.mlir_src_root, unresolved="ignore"),
|
||||
diff -ruN mlir-19.1.0-rc2.src.orig/test/python/execution_engine.py mlir-19.1.0-rc2.src/test/python/execution_engine.py
|
||||
--- a/mlir/test/python/execution_engine.py 2024-08-05 10:40:33.000000000 +0200
|
||||
+++ b/mlir/test/python/execution_engine.py 2024-08-14 19:17:20.822219824 +0200
|
||||
@@ -1,4 +1,4 @@
|
||||
-# RUN: %PYTHON %s 2>&1 | FileCheck %s
|
||||
+# RUN: env LLVM_SHLIB_DIR=%llvm_shlib_dir %PYTHON %s 2>&1 | FileCheck %s
|
||||
# REQUIRES: host-supports-jit
|
||||
import gc, sys, os, tempfile
|
||||
from mlir.ir import *
|
||||
@@ -6,6 +6,9 @@
|
||||
from mlir.execution_engine import *
|
||||
from mlir.runtime import *
|
||||
|
||||
+_DEFAULT_LIB_DIR = "../../../../lib"
|
||||
+LIB_DIR = os.getenv("LLVM_SHLIB_DIR", _DEFAULT_LIB_DIR)
|
||||
+
|
||||
|
||||
# Log everything to stderr and flush so that we have a unified stream to match
|
||||
# errors/info emitted by MLIR to stderr.
|
||||
@@ -613,6 +616,7 @@
|
||||
shared_libs = [
|
||||
"../../../../bin/mlir_runner_utils.dll",
|
||||
"../../../../bin/mlir_c_runner_utils.dll",
|
||||
+
|
||||
]
|
||||
elif sys.platform == "darwin":
|
||||
shared_libs = [
|
||||
@@ -621,8 +625,9 @@
|
||||
]
|
||||
else:
|
||||
shared_libs = [
|
||||
- "../../../../lib/libmlir_runner_utils.so",
|
||||
- "../../../../lib/libmlir_c_runner_utils.so",
|
||||
+ LIB_DIR + "/libmlir_runner_utils.so",
|
||||
+ LIB_DIR + "/libmlir_c_runner_utils.so",
|
||||
+
|
||||
]
|
||||
|
||||
execution_engine = ExecutionEngine(
|
||||
@@ -664,8 +669,8 @@
|
||||
]
|
||||
else:
|
||||
shared_libs = [
|
||||
- "../../../../lib/libmlir_runner_utils.so",
|
||||
- "../../../../lib/libmlir_c_runner_utils.so",
|
||||
+ LIB_DIR + "/libmlir_runner_utils.so",
|
||||
+ LIB_DIR + "/libmlir_c_runner_utils.so",
|
||||
]
|
||||
|
||||
execution_engine = ExecutionEngine(
|
@ -1,62 +0,0 @@
|
||||
From b2edeb58b8cb3268acee425cd52b406eb60a8095 Mon Sep 17 00:00:00 2001
|
||||
From: Nikita Popov <npopov@redhat.com>
|
||||
Date: Wed, 9 Oct 2024 11:29:30 +0200
|
||||
Subject: [PATCH] [openmp] Add option to disable tsan tests (#111548)
|
||||
|
||||
This adds a OPENMP_TEST_ENABLE_TSAN option that allows to override
|
||||
whether tests using tsan will be enabled. The option defaults to the
|
||||
existing auto-detection.
|
||||
|
||||
The background here is
|
||||
https://github.com/llvm/llvm-project/issues/111492, where we have some
|
||||
systems where tsan doesn't work, but we do still want to build it and
|
||||
run tests that don't use tsan.
|
||||
---
|
||||
openmp/cmake/OpenMPTesting.cmake | 3 +++
|
||||
openmp/tools/archer/tests/CMakeLists.txt | 2 +-
|
||||
openmp/tools/archer/tests/lit.site.cfg.in | 2 +-
|
||||
3 files changed, 5 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/openmp/cmake/OpenMPTesting.cmake b/openmp/cmake/OpenMPTesting.cmake
|
||||
index c67ad8b1cbd9..14cc5c67d84c 100644
|
||||
--- a/openmp/cmake/OpenMPTesting.cmake
|
||||
+++ b/openmp/cmake/OpenMPTesting.cmake
|
||||
@@ -163,6 +163,9 @@ else()
|
||||
set(OPENMP_TEST_COMPILER_HAS_OMIT_FRAME_POINTER_FLAGS 1)
|
||||
endif()
|
||||
|
||||
+set(OPENMP_TEST_ENABLE_TSAN "${OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS}" CACHE BOOL
|
||||
+ "Whether to enable tests using tsan")
|
||||
+
|
||||
# Function to set compiler features for use in lit.
|
||||
function(update_test_compiler_features)
|
||||
set(FEATURES "[")
|
||||
diff --git a/openmp/tools/archer/tests/CMakeLists.txt b/openmp/tools/archer/tests/CMakeLists.txt
|
||||
index 5de91148fa4b..412c7d63725e 100644
|
||||
--- a/openmp/tools/archer/tests/CMakeLists.txt
|
||||
+++ b/openmp/tools/archer/tests/CMakeLists.txt
|
||||
@@ -28,7 +28,7 @@ macro(pythonize_bool var)
|
||||
endmacro()
|
||||
|
||||
pythonize_bool(LIBARCHER_HAVE_LIBATOMIC)
|
||||
-pythonize_bool(OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS)
|
||||
+pythonize_bool(OPENMP_TEST_ENABLE_TSAN)
|
||||
|
||||
set(ARCHER_TSAN_TEST_DEPENDENCE "")
|
||||
if(TARGET tsan)
|
||||
diff --git a/openmp/tools/archer/tests/lit.site.cfg.in b/openmp/tools/archer/tests/lit.site.cfg.in
|
||||
index 55edfde9738e..ddcb7b8bc3a5 100644
|
||||
--- a/openmp/tools/archer/tests/lit.site.cfg.in
|
||||
+++ b/openmp/tools/archer/tests/lit.site.cfg.in
|
||||
@@ -12,7 +12,7 @@ config.omp_library_dir = "@LIBOMP_LIBRARY_DIR@"
|
||||
config.omp_header_dir = "@LIBOMP_INCLUDE_DIR@"
|
||||
config.operating_system = "@CMAKE_SYSTEM_NAME@"
|
||||
config.has_libatomic = @LIBARCHER_HAVE_LIBATOMIC@
|
||||
-config.has_tsan = @OPENMP_TEST_COMPILER_HAS_TSAN_FLAGS@
|
||||
+config.has_tsan = @OPENMP_TEST_ENABLE_TSAN@
|
||||
|
||||
config.test_archer_flags = "@LIBARCHER_TEST_FLAGS@"
|
||||
config.libarcher_obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
|
||||
--
|
||||
2.46.0
|
||||
|
@ -1,51 +0,0 @@
|
||||
From 90a05f32166c4a45224a5eedbec9c5c7e21d2dbf Mon Sep 17 00:00:00 2001
|
||||
From: Nikita Popov <npopov@redhat.com>
|
||||
Date: Fri, 17 Jan 2025 09:26:49 +0100
|
||||
Subject: [PATCH] [openmp] Support CET in z_Linux_asm.S (#123213)
|
||||
|
||||
When libomp is built with -cf-protection, add endbr instructions to the
|
||||
start of functions for Intel CET support.
|
||||
---
|
||||
openmp/runtime/src/z_Linux_asm.S | 12 ++++++++++++
|
||||
1 file changed, 12 insertions(+)
|
||||
|
||||
diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S
|
||||
index cc5344cdd124..0bf9f07a13f1 100644
|
||||
--- a/openmp/runtime/src/z_Linux_asm.S
|
||||
+++ b/openmp/runtime/src/z_Linux_asm.S
|
||||
@@ -19,6 +19,16 @@
|
||||
|
||||
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
|
||||
|
||||
+# if defined(__ELF__) && defined(__CET__) && defined(__has_include)
|
||||
+# if __has_include(<cet.h>)
|
||||
+# include <cet.h>
|
||||
+# endif
|
||||
+# endif
|
||||
+
|
||||
+# if !defined(_CET_ENDBR)
|
||||
+# define _CET_ENDBR
|
||||
+# endif
|
||||
+
|
||||
# if KMP_MIC
|
||||
// the 'delay r16/r32/r64' should be used instead of the 'pause'.
|
||||
// The delay operation has the effect of removing the current thread from
|
||||
@@ -66,6 +76,7 @@
|
||||
ALIGN 4
|
||||
.globl KMP_PREFIX_UNDERSCORE($0)
|
||||
KMP_PREFIX_UNDERSCORE($0):
|
||||
+ _CET_ENDBR
|
||||
.endmacro
|
||||
# else // KMP_OS_DARWIN
|
||||
# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
|
||||
@@ -92,6 +103,7 @@ KMP_PREFIX_UNDERSCORE($0):
|
||||
.globl KMP_PREFIX_UNDERSCORE(\proc)
|
||||
KMP_PREFIX_UNDERSCORE(\proc):
|
||||
.cfi_startproc
|
||||
+ _CET_ENDBR
|
||||
.endm
|
||||
.macro KMP_CFI_DEF_OFFSET sz
|
||||
.cfi_def_cfa_offset \sz
|
||||
--
|
||||
2.47.1
|
||||
|
@ -1,205 +0,0 @@
|
||||
From 5fb4d7f6079a76b2907ccc8c53c7c509c30a3dca Mon Sep 17 00:00:00 2001
|
||||
From: Nikita Popov <npopov@redhat.com>
|
||||
Date: Thu, 10 Oct 2024 12:47:33 +0000
|
||||
Subject: [PATCH] [openmp] Use core_siblings_list if physical_package_id not
|
||||
available
|
||||
|
||||
On powerpc, physical_package_id may not be available. Currently,
|
||||
this causes openmp to fall back to flat topology and various
|
||||
affinity tests fail.
|
||||
|
||||
Fix this by parsing core_siblings_list to deterimine which cpus
|
||||
belong to the same socket. This matches what the testing code
|
||||
does. The code to parse the CPU list format thankfully already
|
||||
exists.
|
||||
|
||||
Fixes https://github.com/llvm/llvm-project/issues/111809.
|
||||
---
|
||||
openmp/runtime/src/kmp_affinity.cpp | 100 +++++++++++++------
|
||||
openmp/runtime/test/affinity/kmp-hw-subset.c | 2 +-
|
||||
2 files changed, 72 insertions(+), 30 deletions(-)
|
||||
|
||||
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
|
||||
index cf5cad04eb57..c3d5ecf1345e 100644
|
||||
--- a/openmp/runtime/src/kmp_affinity.cpp
|
||||
+++ b/openmp/runtime/src/kmp_affinity.cpp
|
||||
@@ -1589,15 +1589,13 @@ kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
|
||||
return buf;
|
||||
}
|
||||
|
||||
-// Return (possibly empty) affinity mask representing the offline CPUs
|
||||
-// Caller must free the mask
|
||||
-kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
|
||||
- kmp_affin_mask_t *offline;
|
||||
- KMP_CPU_ALLOC(offline);
|
||||
- KMP_CPU_ZERO(offline);
|
||||
+static kmp_affin_mask_t *__kmp_parse_cpu_list(const char *path) {
|
||||
+ kmp_affin_mask_t *mask;
|
||||
+ KMP_CPU_ALLOC(mask);
|
||||
+ KMP_CPU_ZERO(mask);
|
||||
#if KMP_OS_LINUX
|
||||
int n, begin_cpu, end_cpu;
|
||||
- kmp_safe_raii_file_t offline_file;
|
||||
+ kmp_safe_raii_file_t file;
|
||||
auto skip_ws = [](FILE *f) {
|
||||
int c;
|
||||
do {
|
||||
@@ -1606,29 +1604,29 @@ kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
|
||||
if (c != EOF)
|
||||
ungetc(c, f);
|
||||
};
|
||||
- // File contains CSV of integer ranges representing the offline CPUs
|
||||
+ // File contains CSV of integer ranges representing the CPUs
|
||||
// e.g., 1,2,4-7,9,11-15
|
||||
- int status = offline_file.try_open("/sys/devices/system/cpu/offline", "r");
|
||||
+ int status = file.try_open(path, "r");
|
||||
if (status != 0)
|
||||
- return offline;
|
||||
- while (!feof(offline_file)) {
|
||||
- skip_ws(offline_file);
|
||||
- n = fscanf(offline_file, "%d", &begin_cpu);
|
||||
+ return mask;
|
||||
+ while (!feof(file)) {
|
||||
+ skip_ws(file);
|
||||
+ n = fscanf(file, "%d", &begin_cpu);
|
||||
if (n != 1)
|
||||
break;
|
||||
- skip_ws(offline_file);
|
||||
- int c = fgetc(offline_file);
|
||||
+ skip_ws(file);
|
||||
+ int c = fgetc(file);
|
||||
if (c == EOF || c == ',') {
|
||||
// Just single CPU
|
||||
end_cpu = begin_cpu;
|
||||
} else if (c == '-') {
|
||||
// Range of CPUs
|
||||
- skip_ws(offline_file);
|
||||
- n = fscanf(offline_file, "%d", &end_cpu);
|
||||
+ skip_ws(file);
|
||||
+ n = fscanf(file, "%d", &end_cpu);
|
||||
if (n != 1)
|
||||
break;
|
||||
- skip_ws(offline_file);
|
||||
- c = fgetc(offline_file); // skip ','
|
||||
+ skip_ws(file);
|
||||
+ c = fgetc(file); // skip ','
|
||||
} else {
|
||||
// Syntax problem
|
||||
break;
|
||||
@@ -1638,13 +1636,19 @@ kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
|
||||
end_cpu >= __kmp_xproc || begin_cpu > end_cpu) {
|
||||
continue;
|
||||
}
|
||||
- // Insert [begin_cpu, end_cpu] into offline mask
|
||||
+ // Insert [begin_cpu, end_cpu] into mask
|
||||
for (int cpu = begin_cpu; cpu <= end_cpu; ++cpu) {
|
||||
- KMP_CPU_SET(cpu, offline);
|
||||
+ KMP_CPU_SET(cpu, mask);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
- return offline;
|
||||
+ return mask;
|
||||
+}
|
||||
+
|
||||
+// Return (possibly empty) affinity mask representing the offline CPUs
|
||||
+// Caller must free the mask
|
||||
+kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
|
||||
+ return __kmp_parse_cpu_list("/sys/devices/system/cpu/offline");
|
||||
}
|
||||
|
||||
// Return the number of available procs
|
||||
@@ -3175,6 +3179,37 @@ static inline const char *__kmp_cpuinfo_get_envvar() {
|
||||
return envvar;
|
||||
}
|
||||
|
||||
+static bool __kmp_package_id_from_core_siblings_list(unsigned **threadInfo,
|
||||
+ unsigned num_avail,
|
||||
+ unsigned idx) {
|
||||
+ if (!KMP_AFFINITY_CAPABLE())
|
||||
+ return false;
|
||||
+
|
||||
+ char path[256];
|
||||
+ KMP_SNPRINTF(path, sizeof(path),
|
||||
+ "/sys/devices/system/cpu/cpu%u/topology/core_siblings_list",
|
||||
+ threadInfo[idx][osIdIndex]);
|
||||
+ kmp_affin_mask_t *siblings = __kmp_parse_cpu_list(path);
|
||||
+ for (unsigned i = 0; i < num_avail; ++i) {
|
||||
+ unsigned cpu_id = threadInfo[i][osIdIndex];
|
||||
+ KMP_ASSERT(cpu_id < __kmp_affin_mask_size * CHAR_BIT);
|
||||
+ if (!KMP_CPU_ISSET(cpu_id, siblings))
|
||||
+ continue;
|
||||
+ if (threadInfo[i][pkgIdIndex] == UINT_MAX) {
|
||||
+ // Arbitrarily pick the first index we encounter, it only matters that
|
||||
+ // the value is the same for all siblings.
|
||||
+ threadInfo[i][pkgIdIndex] = idx;
|
||||
+ } else if (threadInfo[i][pkgIdIndex] != idx) {
|
||||
+ // Contradictory sibling lists.
|
||||
+ KMP_CPU_FREE(siblings);
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+ KMP_ASSERT(threadInfo[idx][pkgIdIndex] != UINT_MAX);
|
||||
+ KMP_CPU_FREE(siblings);
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
|
||||
// affinity map. On AIX, the map is obtained through system SRAD (Scheduler
|
||||
// Resource Allocation Domain).
|
||||
@@ -3550,18 +3585,13 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
|
||||
return false;
|
||||
}
|
||||
|
||||
- // Check for missing fields. The osId field must be there, and we
|
||||
- // currently require that the physical id field is specified, also.
|
||||
+ // Check for missing fields. The osId field must be there. The physical
|
||||
+ // id field will be checked later.
|
||||
if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
|
||||
CLEANUP_THREAD_INFO;
|
||||
*msg_id = kmp_i18n_str_MissingProcField;
|
||||
return false;
|
||||
}
|
||||
- if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
|
||||
- CLEANUP_THREAD_INFO;
|
||||
- *msg_id = kmp_i18n_str_MissingPhysicalIDField;
|
||||
- return false;
|
||||
- }
|
||||
|
||||
// Skip this proc if it is not included in the machine model.
|
||||
if (KMP_AFFINITY_CAPABLE() &&
|
||||
@@ -3591,6 +3621,18 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
|
||||
}
|
||||
*line = 0;
|
||||
|
||||
+ // At least on powerpc, Linux may return -1 for physical_package_id. Try
|
||||
+ // to reconstruct topology from core_siblings_list in that case.
|
||||
+ for (i = 0; i < num_avail; ++i) {
|
||||
+ if (threadInfo[i][pkgIdIndex] == UINT_MAX) {
|
||||
+ if (!__kmp_package_id_from_core_siblings_list(threadInfo, num_avail, i)) {
|
||||
+ CLEANUP_THREAD_INFO;
|
||||
+ *msg_id = kmp_i18n_str_MissingPhysicalIDField;
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
#if KMP_MIC && REDUCE_TEAM_SIZE
|
||||
unsigned teamSize = 0;
|
||||
#endif // KMP_MIC && REDUCE_TEAM_SIZE
|
||||
diff --git a/openmp/runtime/test/affinity/kmp-hw-subset.c b/openmp/runtime/test/affinity/kmp-hw-subset.c
|
||||
index 606fcdfbada9..0b49969bd3b1 100644
|
||||
--- a/openmp/runtime/test/affinity/kmp-hw-subset.c
|
||||
+++ b/openmp/runtime/test/affinity/kmp-hw-subset.c
|
||||
@@ -25,7 +25,7 @@ static int compare_hw_subset_places(const place_list_t *openmp_places,
|
||||
expected_per_place = nthreads_per_core;
|
||||
} else {
|
||||
expected_total = nsockets;
|
||||
- expected_per_place = ncores_per_socket;
|
||||
+ expected_per_place = ncores_per_socket * nthreads_per_core;
|
||||
}
|
||||
if (openmp_places->num_places != expected_total) {
|
||||
fprintf(stderr, "error: KMP_HW_SUBSET did not half each resource layer!\n");
|
||||
--
|
||||
2.47.0
|
||||
|
@ -1,86 +0,0 @@
|
||||
From ccc2b792e57d632bc887b226a4e7f0a8189eab8b Mon Sep 17 00:00:00 2001
|
||||
From: Josh Stone <jistone@redhat.com>
|
||||
Date: Mon, 4 Nov 2024 16:37:49 -0800
|
||||
Subject: [PATCH] [profile] Use base+vaddr for `__llvm_write_binary_ids` note
|
||||
pointers
|
||||
|
||||
This function is always examining its own ELF headers in memory, but it
|
||||
was trying to use conditions between examining files or memory, and it
|
||||
wasn't accounting for LOAD offsets at runtime. This is especially bad if
|
||||
a loaded segment has additional padding that's not in the file offsets.
|
||||
|
||||
Now we do a first scan of the program headers to figure out the runtime
|
||||
base address based on `PT_PHDR` and/or `PT_DYNAMIC` (else assume zero),
|
||||
similar to libc's `do_start`. Then each `PT_NOTE` pointer is simply the
|
||||
base plus the segments's `pt_vaddr`, which includes LOAD offsets.
|
||||
|
||||
Fixes #114605
|
||||
---
|
||||
.../lib/profile/InstrProfilingPlatformLinux.c | 40 ++++++++-----------
|
||||
1 file changed, 16 insertions(+), 24 deletions(-)
|
||||
|
||||
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
|
||||
index e2c06d51e0c6..c365129a0768 100644
|
||||
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
|
||||
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
|
||||
@@ -194,41 +194,33 @@ static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
|
||||
*/
|
||||
COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
|
||||
extern const ElfW(Ehdr) __ehdr_start __attribute__((visibility("hidden")));
|
||||
+ extern ElfW(Dyn) _DYNAMIC[] __attribute__((weak, visibility("hidden")));
|
||||
+
|
||||
const ElfW(Ehdr) *ElfHeader = &__ehdr_start;
|
||||
const ElfW(Phdr) *ProgramHeader =
|
||||
(const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff);
|
||||
|
||||
+ /* Compute the added base address in case of position-independent code. */
|
||||
+ uintptr_t Base = 0;
|
||||
+ for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
|
||||
+ if (ProgramHeader[I].p_type == PT_PHDR)
|
||||
+ Base = (uintptr_t)ProgramHeader - ProgramHeader[I].p_vaddr;
|
||||
+ if (ProgramHeader[I].p_type == PT_DYNAMIC && _DYNAMIC)
|
||||
+ Base = (uintptr_t)_DYNAMIC - ProgramHeader[I].p_vaddr;
|
||||
+ }
|
||||
+
|
||||
int TotalBinaryIdsSize = 0;
|
||||
- uint32_t I;
|
||||
/* Iterate through entries in the program header. */
|
||||
- for (I = 0; I < ElfHeader->e_phnum; I++) {
|
||||
+ for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
|
||||
/* Look for the notes segment in program header entries. */
|
||||
if (ProgramHeader[I].p_type != PT_NOTE)
|
||||
continue;
|
||||
|
||||
/* There can be multiple notes segment, and examine each of them. */
|
||||
- const ElfW(Nhdr) * Note;
|
||||
- const ElfW(Nhdr) * NotesEnd;
|
||||
- /*
|
||||
- * When examining notes in file, use p_offset, which is the offset within
|
||||
- * the elf file, to find the start of notes.
|
||||
- */
|
||||
- if (ProgramHeader[I].p_memsz == 0 ||
|
||||
- ProgramHeader[I].p_memsz == ProgramHeader[I].p_filesz) {
|
||||
- Note = (const ElfW(Nhdr) *)((uintptr_t)ElfHeader +
|
||||
- ProgramHeader[I].p_offset);
|
||||
- NotesEnd = (const ElfW(Nhdr) *)((const char *)(Note) +
|
||||
- ProgramHeader[I].p_filesz);
|
||||
- } else {
|
||||
- /*
|
||||
- * When examining notes in memory, use p_vaddr, which is the address of
|
||||
- * section after loaded to memory, to find the start of notes.
|
||||
- */
|
||||
- Note =
|
||||
- (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_vaddr);
|
||||
- NotesEnd =
|
||||
- (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
|
||||
- }
|
||||
+ const ElfW(Nhdr) *Note =
|
||||
+ (const ElfW(Nhdr) *)(Base + ProgramHeader[I].p_vaddr);
|
||||
+ const ElfW(Nhdr) *NotesEnd =
|
||||
+ (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
|
||||
|
||||
int BinaryIdsSize = WriteBinaryIds(Writer, Note, NotesEnd);
|
||||
if (TotalBinaryIdsSize == -1)
|
||||
--
|
||||
2.47.0
|
||||
|
893
18-99273.patch
893
18-99273.patch
@ -1,893 +0,0 @@
|
||||
From 91052169960477fbc39169c10f9fae3bec732510 Mon Sep 17 00:00:00 2001
|
||||
From: Carl Ritson <carl.ritson@amd.com>
|
||||
Date: Wed, 17 Jul 2024 15:07:42 +0900
|
||||
Subject: [PATCH 1/3] [AMDGPU] Implement workaround for GFX11.5 export priority
|
||||
|
||||
On GFX11.5 shaders having completed exports need to execute/wait
|
||||
at a lower priority than shaders still executing exports.
|
||||
Add code to maintain normal priority of 2 for shaders that export
|
||||
and drop to priority 0 after exports.
|
||||
---
|
||||
llvm/lib/Target/AMDGPU/AMDGPU.td | 15 +-
|
||||
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 112 ++++++
|
||||
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
|
||||
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 +
|
||||
.../AMDGPU/required-export-priority.ll | 344 ++++++++++++++++++
|
||||
.../AMDGPU/required-export-priority.mir | 293 +++++++++++++++
|
||||
6 files changed, 765 insertions(+), 3 deletions(-)
|
||||
create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
||||
create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.mir
|
||||
|
||||
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
|
||||
index dfc8eaea66f7b..14fcf6a210a78 100644
|
||||
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
|
||||
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
|
||||
@@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
|
||||
"Has restricted SOffset (immediate not supported)."
|
||||
>;
|
||||
|
||||
+def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
|
||||
+ "HasRequiredExportPriority",
|
||||
+ "true",
|
||||
+ "Export priority must be explicitly manipulated on GFX11.5"
|
||||
+>;
|
||||
+
|
||||
//===------------------------------------------------------------===//
|
||||
// Subtarget Features (options and debugging)
|
||||
//===------------------------------------------------------------===//
|
||||
@@ -1597,14 +1603,16 @@ def FeatureISAVersion11_5_0 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion11_Common.Features,
|
||||
[FeatureSALUFloatInsts,
|
||||
FeatureDPPSrc1SGPR,
|
||||
- FeatureVGPRSingleUseHintInsts])>;
|
||||
+ FeatureVGPRSingleUseHintInsts,
|
||||
+ FeatureRequiredExportPriority])>;
|
||||
|
||||
def FeatureISAVersion11_5_1 : FeatureSet<
|
||||
!listconcat(FeatureISAVersion11_Common.Features,
|
||||
[FeatureSALUFloatInsts,
|
||||
FeatureDPPSrc1SGPR,
|
||||
FeatureVGPRSingleUseHintInsts,
|
||||
- FeatureGFX11FullVGPRs])>;
|
||||
+ FeatureGFX11FullVGPRs,
|
||||
+ FeatureRequiredExportPriority])>;
|
||||
|
||||
def FeatureISAVersion12 : FeatureSet<
|
||||
[FeatureGFX12,
|
||||
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
|
||||
index a402fc6d7e611..a8b171aa82840 100644
|
||||
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
|
||||
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "GCNSubtarget.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
+#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/ScheduleDAG.h"
|
||||
#include "llvm/TargetParser/TargetParser.h"
|
||||
@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
|
||||
fixWMMAHazards(MI);
|
||||
fixShift64HighRegBug(MI);
|
||||
fixVALUMaskWriteHazard(MI);
|
||||
+ fixRequiredExportPriority(MI);
|
||||
}
|
||||
|
||||
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
|
||||
@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
|
||||
|
||||
return true;
|
||||
}
|
||||
+
|
||||
+static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
|
||||
+ const SIInstrInfo &TII) {
|
||||
+ MachineBasicBlock &EntryMBB = MF->front();
|
||||
+ if (EntryMBB.begin() != EntryMBB.end()) {
|
||||
+ auto &EntryMI = *EntryMBB.begin();
|
||||
+ if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
|
||||
+ EntryMI.getOperand(0).getImm() >= Priority)
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
|
||||
+ .addImm(Priority);
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
|
||||
+ if (!ST.hasRequiredExportPriority())
|
||||
+ return false;
|
||||
+
|
||||
+ // Assume the following shader types will never have exports,
|
||||
+ // and avoid adding or adjusting S_SETPRIO.
|
||||
+ MachineBasicBlock *MBB = MI->getParent();
|
||||
+ MachineFunction *MF = MBB->getParent();
|
||||
+ auto CC = MF->getFunction().getCallingConv();
|
||||
+ switch (CC) {
|
||||
+ case CallingConv::AMDGPU_CS:
|
||||
+ case CallingConv::AMDGPU_CS_Chain:
|
||||
+ case CallingConv::AMDGPU_CS_ChainPreserve:
|
||||
+ case CallingConv::AMDGPU_KERNEL:
|
||||
+ return false;
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ const int MaxPriority = 3;
|
||||
+ const int NormalPriority = 2;
|
||||
+ const int PostExportPriority = 0;
|
||||
+
|
||||
+ auto It = MI->getIterator();
|
||||
+ switch (MI->getOpcode()) {
|
||||
+ case AMDGPU::S_ENDPGM:
|
||||
+ case AMDGPU::S_ENDPGM_SAVED:
|
||||
+ case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
|
||||
+ case AMDGPU::SI_RETURN_TO_EPILOG:
|
||||
+ // Ensure shader with calls raises priority at entry.
|
||||
+ // This ensures correct priority if exports exist in callee.
|
||||
+ if (MF->getFrameInfo().hasCalls())
|
||||
+ return ensureEntrySetPrio(MF, NormalPriority, TII);
|
||||
+ return false;
|
||||
+ case AMDGPU::S_SETPRIO: {
|
||||
+ // Raise minimum priority unless in workaround.
|
||||
+ auto &PrioOp = MI->getOperand(0);
|
||||
+ int Prio = PrioOp.getImm();
|
||||
+ bool InWA = (Prio == PostExportPriority) &&
|
||||
+ (It != MBB->begin() && TII.isEXP(*std::prev(It)));
|
||||
+ if (InWA || Prio >= NormalPriority)
|
||||
+ return false;
|
||||
+ PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
|
||||
+ return true;
|
||||
+ }
|
||||
+ default:
|
||||
+ if (!TII.isEXP(*MI))
|
||||
+ return false;
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ // Check entry priority at each export (as there will only be a few).
|
||||
+ // Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
|
||||
+ bool Changed = false;
|
||||
+ if (CC != CallingConv::AMDGPU_Gfx)
|
||||
+ Changed = ensureEntrySetPrio(MF, NormalPriority, TII);
|
||||
+
|
||||
+ auto NextMI = std::next(It);
|
||||
+ bool EndOfShader = false;
|
||||
+ if (NextMI != MBB->end()) {
|
||||
+ // Only need WA at end of sequence of exports.
|
||||
+ if (TII.isEXP(*NextMI))
|
||||
+ return Changed;
|
||||
+ // Assume appropriate S_SETPRIO after export means WA already applied.
|
||||
+ if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
|
||||
+ NextMI->getOperand(0).getImm() == PostExportPriority)
|
||||
+ return Changed;
|
||||
+ EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
|
||||
+ }
|
||||
+
|
||||
+ const DebugLoc &DL = MI->getDebugLoc();
|
||||
+
|
||||
+ // Lower priority.
|
||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
|
||||
+ .addImm(PostExportPriority);
|
||||
+
|
||||
+ if (!EndOfShader) {
|
||||
+ // Wait for exports to complete.
|
||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
|
||||
+ .addReg(AMDGPU::SGPR_NULL)
|
||||
+ .addImm(0);
|
||||
+ }
|
||||
+
|
||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
|
||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
|
||||
+
|
||||
+ if (!EndOfShader) {
|
||||
+ // Return to normal (higher) priority.
|
||||
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
|
||||
+ .addImm(NormalPriority);
|
||||
+ }
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
|
||||
index 3ccca527c626b..f2a64ab48e180 100644
|
||||
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
|
||||
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
|
||||
@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
|
||||
bool fixWMMAHazards(MachineInstr *MI);
|
||||
bool fixShift64HighRegBug(MachineInstr *MI);
|
||||
bool fixVALUMaskWriteHazard(MachineInstr *MI);
|
||||
+ bool fixRequiredExportPriority(MachineInstr *MI);
|
||||
|
||||
int checkMAIHazards(MachineInstr *MI);
|
||||
int checkMAIHazards908(MachineInstr *MI);
|
||||
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
|
||||
index e5817594a4521..def89c785b855 100644
|
||||
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
|
||||
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
|
||||
@@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
|
||||
bool HasVOPDInsts = false;
|
||||
bool HasVALUTransUseHazard = false;
|
||||
bool HasForceStoreSC0SC1 = false;
|
||||
+ bool HasRequiredExportPriority = false;
|
||||
|
||||
// Dummy feature to use for assembler in tablegen.
|
||||
bool FeatureDisable = false;
|
||||
@@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
|
||||
|
||||
bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
|
||||
|
||||
+ bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
|
||||
+
|
||||
/// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
|
||||
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
|
||||
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
|
||||
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
||||
new file mode 100644
|
||||
index 0000000000000..377902f3f0d1a
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
||||
@@ -0,0 +1,344 @@
|
||||
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
+
|
||||
+define amdgpu_ps void @test_export_zeroes_f32() #0 {
|
||||
+; GCN-LABEL: test_export_zeroes_f32:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
+; GCN-NEXT: exp mrt0 off, off, off, off
|
||||
+; GCN-NEXT: exp mrt0 off, off, off, off done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_export_en_src0_f32() #0 {
|
||||
+; GCN-LABEL: test_export_en_src0_f32:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
||||
+; GCN-NEXT: exp mrt0 v3, off, off, off done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_gs void @test_export_gs() #0 {
|
||||
+; GCN-LABEL: test_export_gs:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_hs void @test_export_hs() #0 {
|
||||
+; GCN-LABEL: test_export_hs:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_gfx void @test_export_gfx(float %v) #0 {
|
||||
+; GCN-LABEL: test_export_gfx:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 4.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 0.5
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 2.0
|
||||
+; GCN-NEXT: exp mrt0 off, v3, off, off done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_waitcnt expcnt(0)
|
||||
+; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_cs void @test_export_cs() #0 {
|
||||
+; GCN-LABEL: test_export_cs:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_kernel void @test_export_kernel() #0 {
|
||||
+; GCN-LABEL: test_export_kernel:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
|
||||
+; GCN-NEXT: exp mrt0 off, v2, off, off done
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_gfx void @test_no_export_gfx(float %v) #0 {
|
||||
+; GCN-LABEL: test_no_export_gfx:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
+; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_no_export_ps(float %v) #0 {
|
||||
+; GCN-LABEL: test_no_export_ps:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
||||
+; GCN-LABEL: test_if_export_f32:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
||||
+; GCN-NEXT: s_cbranch_execz .LBB9_2
|
||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: .LBB9_2: ; %end
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %cc = icmp eq i32 %flag, 0
|
||||
+ br i1 %cc, label %end, label %exp
|
||||
+
|
||||
+exp:
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
|
||||
+ br label %end
|
||||
+
|
||||
+end:
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
||||
+; GCN-LABEL: test_if_export_vm_f32:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
||||
+; GCN-NEXT: s_cbranch_execz .LBB10_2
|
||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: .LBB10_2: ; %end
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %cc = icmp eq i32 %flag, 0
|
||||
+ br i1 %cc, label %end, label %exp
|
||||
+
|
||||
+exp:
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
|
||||
+ br label %end
|
||||
+
|
||||
+end:
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
||||
+; GCN-LABEL: test_if_export_done_f32:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
||||
+; GCN-NEXT: s_cbranch_execz .LBB11_2
|
||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: .LBB11_2: ; %end
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %cc = icmp eq i32 %flag, 0
|
||||
+ br i1 %cc, label %end, label %exp
|
||||
+
|
||||
+exp:
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
|
||||
+ br label %end
|
||||
+
|
||||
+end:
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
|
||||
+; GCN-LABEL: test_if_export_vm_done_f32:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_mov_b32 s0, exec_lo
|
||||
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
|
||||
+; GCN-NEXT: s_cbranch_execz .LBB12_2
|
||||
+; GCN-NEXT: ; %bb.1: ; %exp
|
||||
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: .LBB12_2: ; %end
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %cc = icmp eq i32 %flag, 0
|
||||
+ br i1 %cc, label %end, label %exp
|
||||
+
|
||||
+exp:
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
|
||||
+ br label %end
|
||||
+
|
||||
+end:
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
|
||||
+; GCN-LABEL: test_export_pos_before_param_across_load:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 1.0
|
||||
+; GCN-NEXT: v_mov_b32_e32 v3, 0.5
|
||||
+; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
+; GCN-NEXT: exp pos0 v1, v1, v1, v0 done
|
||||
+; GCN-NEXT: exp invalid_target_32 v2, v2, v2, v2
|
||||
+; GCN-NEXT: exp invalid_target_33 v2, v2, v2, v3
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
|
||||
+ %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
|
||||
+; GCN-LABEL: test_export_across_store_load:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 24
|
||||
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
|
||||
+; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2)
|
||||
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 8, vcc_lo
|
||||
+; GCN-NEXT: v_mov_b32_e32 v2, 0
|
||||
+; GCN-NEXT: scratch_store_b32 v0, v1, off
|
||||
+; GCN-NEXT: scratch_load_b32 v0, off, off
|
||||
+; GCN-NEXT: v_mov_b32_e32 v1, 1.0
|
||||
+; GCN-NEXT: exp pos0 v2, v2, v2, v1 done
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_waitcnt vmcnt(0)
|
||||
+; GCN-NEXT: exp invalid_target_32 v0, v2, v1, v2
|
||||
+; GCN-NEXT: exp invalid_target_33 v0, v2, v1, v2
|
||||
+; GCN-NEXT: s_setprio 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_nop 0
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %data0 = alloca <4 x float>, align 8, addrspace(5)
|
||||
+ %data1 = alloca <4 x float>, align 8, addrspace(5)
|
||||
+ %cmp = icmp eq i32 %idx, 1
|
||||
+ %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
|
||||
+ store float %v, ptr addrspace(5) %data, align 8
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
|
||||
+ %load0 = load float, ptr addrspace(5) %data0, align 8
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
|
||||
+ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_export_in_callee(float %v) #0 {
|
||||
+; GCN-LABEL: test_export_in_callee:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_getpc_b64 s[0:1]
|
||||
+; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
|
||||
+; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
|
||||
+; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
|
||||
+; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
+; GCN-NEXT: s_mov_b32 s32, 0
|
||||
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
+; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %x = fadd float %v, 1.0
|
||||
+ call void @test_export_gfx(float %x)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
|
||||
+; GCN-LABEL: test_export_in_callee_prio:
|
||||
+; GCN: ; %bb.0:
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_mov_b32 s32, 0
|
||||
+; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
|
||||
+; GCN-NEXT: s_setprio 2
|
||||
+; GCN-NEXT: s_getpc_b64 s[0:1]
|
||||
+; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
|
||||
+; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
|
||||
+; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
|
||||
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
+; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
|
||||
+; GCN-NEXT: s_endpgm
|
||||
+ %x = fadd float %v, 1.0
|
||||
+ call void @llvm.amdgcn.s.setprio(i16 0)
|
||||
+ call void @test_export_gfx(float %x)
|
||||
+ ret void
|
||||
+}
|
||||
+
|
||||
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
|
||||
+declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
|
||||
+declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
|
||||
+declare void @llvm.amdgcn.s.setprio(i16)
|
||||
+
|
||||
+attributes #0 = { nounwind }
|
||||
+attributes #1 = { nounwind inaccessiblememonly }
|
||||
+attributes #2 = { nounwind readnone }
|
||||
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.mir b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
|
||||
new file mode 100644
|
||||
index 0000000000000..eee04468036e5
|
||||
--- /dev/null
|
||||
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
|
||||
@@ -0,0 +1,293 @@
|
||||
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
|
||||
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=post-RA-hazard-rec -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX1150 %s
|
||||
+
|
||||
+--- |
|
||||
+ define amdgpu_ps void @end_of_shader() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @end_of_shader_return_to_epilogue() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @end_of_block() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @start_of_block() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @block_of_exports() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @sparse_exports() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @existing_setprio_1() {
|
||||
+ ret void
|
||||
+ }
|
||||
+ define amdgpu_ps void @existing_setprio_2() {
|
||||
+ ret void
|
||||
+ }
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: end_of_shader
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ ; GFX1150-LABEL: name: end_of_shader
|
||||
+ ; GFX1150: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: end_of_shader_return_to_epilogue
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ ; GFX1150-LABEL: name: end_of_shader_return_to_epilogue
|
||||
+ ; GFX1150: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: SI_RETURN_TO_EPILOG $vgpr0
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ SI_RETURN_TO_EPILOG $vgpr0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: end_of_block
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ ; GFX1150-LABEL: name: end_of_block
|
||||
+ ; GFX1150: bb.0:
|
||||
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: bb.1:
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+
|
||||
+ bb.1:
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: start_of_block
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ ; GFX1150-LABEL: name: start_of_block
|
||||
+ ; GFX1150: bb.0:
|
||||
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: bb.1:
|
||||
+ ; GFX1150-NEXT: successors: %bb.2(0x80000000)
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: bb.2:
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+
|
||||
+ bb.1:
|
||||
+ liveins: $vgpr0
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+
|
||||
+ bb.2:
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: block_of_exports
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ ; GFX1150-LABEL: name: block_of_exports
|
||||
+ ; GFX1150: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: sparse_exports
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ ; GFX1150-LABEL: name: sparse_exports
|
||||
+ ; GFX1150: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: existing_setprio_1
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ ; GFX1150-LABEL: name: existing_setprio_1
|
||||
+ ; GFX1150: bb.0:
|
||||
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: bb.1:
|
||||
+ ; GFX1150-NEXT: successors: %bb.2(0x80000000)
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
||||
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: bb.2:
|
||||
+ ; GFX1150-NEXT: successors: %bb.3(0x80000000)
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
||||
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: bb.3:
|
||||
+ ; GFX1150-NEXT: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
|
||||
+
|
||||
+ bb.1:
|
||||
+ liveins: $vgpr0
|
||||
+ S_SETPRIO 3
|
||||
+ $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
|
||||
+ S_SETPRIO 0
|
||||
+
|
||||
+ bb.2:
|
||||
+ liveins: $vgpr0
|
||||
+ S_SETPRIO 1
|
||||
+ $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
|
||||
+ S_SETPRIO 0
|
||||
+
|
||||
+ bb.3:
|
||||
+ liveins: $vgpr0
|
||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
+
|
||||
+---
|
||||
+name: existing_setprio_2
|
||||
+tracksRegLiveness: true
|
||||
+liveins:
|
||||
+ - { reg: '$vgpr0' }
|
||||
+body: |
|
||||
+ bb.0:
|
||||
+ liveins: $vgpr0
|
||||
+ ; GFX1150-LABEL: name: existing_setprio_2
|
||||
+ ; GFX1150: liveins: $vgpr0
|
||||
+ ; GFX1150-NEXT: {{ $}}
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
||||
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 0
|
||||
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_NOP 0
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 2
|
||||
+ ; GFX1150-NEXT: S_SETPRIO 3
|
||||
+ ; GFX1150-NEXT: S_ENDPGM 0
|
||||
+ S_SETPRIO 3
|
||||
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
|
||||
+ S_SETPRIO 3
|
||||
+ S_ENDPGM 0
|
||||
+...
|
||||
|
||||
From 8ea44e65f2c19facff751aeb2ac960f907fb210f Mon Sep 17 00:00:00 2001
|
||||
From: Carl Ritson <carl.ritson@amd.com>
|
||||
Date: Wed, 17 Jul 2024 16:18:02 +0900
|
||||
Subject: [PATCH 2/3] Remove -verify-machineinstrs from test.
|
||||
|
||||
---
|
||||
llvm/test/CodeGen/AMDGPU/required-export-priority.ll | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
||||
index 377902f3f0d1a..ebc209bd4d451 100644
|
||||
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
||||
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
|
||||
@@ -1,5 +1,5 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
define amdgpu_ps void @test_export_zeroes_f32() #0 {
|
||||
; GCN-LABEL: test_export_zeroes_f32:
|
2
Makefile
2
Makefile
@ -4,7 +4,7 @@
|
||||
# Tweak this to centos-stream-9-x86_64 to build for CentOS
|
||||
MOCK_CHROOT?=fedora-rawhide-x86_64
|
||||
MOCK_OPTS?=
|
||||
MOCK_OPTS_RELEASE?=--no-clean --no-cleanup-after --without lto_build $(MOCK_OPTS)
|
||||
MOCK_OPTS_RELEASE?=--no-clean --no-cleanup-after --without lto_build --define "debug_package %{nil}" $(MOCK_OPTS)
|
||||
MOCK_OPTS_SNAPSHOT?=$(MOCK_OPTS_RELEASE) --with snapshot_build $(MOCK_OPTS)
|
||||
YYYYMMDD?=$(shell date +%Y%m%d)
|
||||
SOURCEDIR=$(shell pwd)
|
||||
|
@ -1,24 +0,0 @@
|
||||
diff -ruN llvm-19.1.7-build.orig/llvm-project-19.1.7.src/mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h llvm-19.1.7-build/llvm-project-19.1.7.src/mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h
|
||||
--- a/mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h 2025-01-14 10:41:02.000000000 +0100
|
||||
+++ b/mlir/include/mlir/Dialect/Affine/IR/ValueBoundsOpInterfaceImpl.h 2025-01-17 08:31:49.581864809 +0100
|
||||
@@ -9,6 +9,7 @@
|
||||
#ifndef MLIR_DIALECT_AFFINE_IR_VALUEBOUNDSOPINTERFACEIMPL_H
|
||||
#define MLIR_DIALECT_AFFINE_IR_VALUEBOUNDSOPINTERFACEIMPL_H
|
||||
|
||||
+#include <cstdint>
|
||||
#include "mlir/Support/LLVM.h"
|
||||
|
||||
namespace mlir {
|
||||
diff -ruN llvm-19.1.7-build.orig/llvm-project-19.1.7.src/mlir/include/mlir/Support/LLVM.h llvm-19.1.7-build/llvm-project-19.1.7.src/mlir/include/mlir/Support/LLVM.h
|
||||
--- a/mlir/include/mlir/Support/LLVM.h 2025-01-14 10:41:02.000000000 +0100
|
||||
+++ b/mlir/include/mlir/Support/LLVM.h 2025-01-17 10:20:19.356337873 +0100
|
||||
@@ -18,6 +18,9 @@
|
||||
#ifndef MLIR_SUPPORT_LLVM_H
|
||||
#define MLIR_SUPPORT_LLVM_H
|
||||
|
||||
+
|
||||
+
|
||||
+#include <cstdint>
|
||||
// We include this header because it cannot be practically forward
|
||||
// declared, and are effectively language features.
|
||||
#include "llvm/Support/Casting.h"
|
28
gating.yaml
28
gating.yaml
@ -1,20 +1,22 @@
|
||||
--- !Policy
|
||||
product_versions:
|
||||
- fedora-*
|
||||
decision_context: bodhi_update_push_testing
|
||||
decision_contexts:
|
||||
- bodhi_update_push_testing
|
||||
- bodhi_update_push_stable
|
||||
- bodhi_update_push_stable_critpath
|
||||
subject_type: koji_build
|
||||
rules:
|
||||
- !PassingTestCaseRule {test_case_name: fedora-ci.koji-build.tier0.functional}
|
||||
- !PassingTestCaseRule {test_case_name: fedora-ci.koji-build./tests/build-gating.functional}
|
||||
- !PassingTestCaseRule {test_case_name: fedora-ci.koji-build./tests/lld-alternatives.functional}
|
||||
- !PassingTestCaseRule {test_case_name: fedora-ci.koji-build.installability.functional}
|
||||
--- !Policy
|
||||
product_versions:
|
||||
- fedora-*
|
||||
decision_context: bodhi_update_push_stable
|
||||
# The version number here should match the current rawhide release.
|
||||
- fedora-43
|
||||
decision_contexts:
|
||||
- bodhi_update_push_stable
|
||||
- bodhi_update_push_stable_critpath
|
||||
rules:
|
||||
- !PassingTestCaseRule {test_case_name: fedora-ci.koji-build.tier0.functional}
|
||||
--- !Policy
|
||||
product_versions:
|
||||
- rhel-9
|
||||
decision_context: osci_compose_gate
|
||||
rules:
|
||||
- !PassingTestCaseRule {test_case_name: baseos-ci.brew-build.tier0-tmt-x86_64-aarch64.functional}
|
||||
- !PassingTestCaseRule {test_case_name: baseos-ci.brew-build.tier0-tmt-s390x-ppc64le.functional}
|
||||
- !PassingTestCaseRule {test_case_name: osci.brew-build.rebuild.validation}
|
||||
- !PassingTestCaseRule {test_case_name: fedora-ci.koji-build.rpmdeplint.functional}
|
||||
- !PassingTestCaseRule {test_case_name: fedora-ci.koji-build./tests/kernel-ark-build.functional}
|
||||
|
@ -18,3 +18,7 @@ badfuncs:
|
||||
- gethostbyname2
|
||||
- inet_aton
|
||||
|
||||
unicode:
|
||||
ignore:
|
||||
# Ignore bidirectional unicode sequence documentation file
|
||||
- llvm-project-*.src/clang-tools-extra/docs/clang-tidy/checks/misc/misleading-bidirectional.rst
|
||||
|
2
sources
2
sources
@ -1,2 +1,4 @@
|
||||
SHA512 (llvm-project-20.1.0.src.tar.xz) = c90fbc43c40e148357912adc40d799a79cce3d3a929136c137421f6342ed7338659e651f1dedfa8e10b94b9f2897e25b2962e17709add02857f89f10615a2397
|
||||
SHA512 (llvm-project-20.1.0.src.tar.xz.sig) = 68da1c2a13aaa48a67bbb9e716f9f030cf372edc651bca2861453378a6a9172a207ff59e29b175001f042888bf1f9c118fbf938f757e1f4c966943baa7e2696a
|
||||
SHA512 (llvm-project-19.1.7.src.tar.xz) = c7d63286d662707a9cd54758c9e3aaf52794a91900c484c4a6efa62d90bc719d5e7a345e4192feeb0c9fd11c82570d64677c781e5be1d645556b6aa018e47ec8
|
||||
SHA512 (llvm-project-19.1.7.src.tar.xz.sig) = 195797b06ac80a742e0ccbc03a50dc06dd2e04377d783d5474e3e72c5a75203b60292b047929312a411d22b137a239943fba414a4d136a2be14cbff978eb6bda
|
||||
|
Loading…
Reference in New Issue
Block a user