From d9ecda1f54c9a6d9c01cc08a139afbca87c93e5c Mon Sep 17 00:00:00 2001 From: Edjunior Machado Date: Fri, 29 Nov 2024 14:47:00 +0000 Subject: [PATCH 1/3] build-gating.fmf: Update repo --- tests/build-gating.fmf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/build-gating.fmf b/tests/build-gating.fmf index 8c19a1a..be6b730 100644 --- a/tests/build-gating.fmf +++ b/tests/build-gating.fmf @@ -50,7 +50,7 @@ adjust: discover: - name: llvm-tests how: fmf - url: https://src.fedoraproject.org/tests/llvm.git + url: https://gitlab.com/redhat/centos-stream/tests/llvm.git ref: main execute: how: tmt From 80aa4ee5a6a71a6c2acc034edc251f455afe872f Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 15 Jan 2025 14:49:56 +0100 Subject: [PATCH 2/3] Update to LLVM 19.1.7 Resolves: RHEL-57456 Resolves: RHEL-57457 Resolves: RHEL-58900 Resolves: RHEL-70325 --- ...-18-Always-build-shared-libs-for-LLD.patch | 29 + ...-19-Always-build-shared-libs-for-LLD.patch | 0 ...move-myst_parser-dependency-for-RHEL.patch | 41 - ...move-myst_parser-dependency-for-RHEL.patch | 43 - ...oolset-path-precedence-over-Installe.patch | 41 + 0001-Fix-python3-clang.patch | 45 - ...-vaddr-for-__llvm_write_binary_ids-n.patch | 86 ++ 18-99273.patch | 893 ++++++++++++++++++ llvm.spec | 289 ++++-- sources | 6 +- 10 files changed, 1266 insertions(+), 207 deletions(-) create mode 100644 0001-18-Always-build-shared-libs-for-LLD.patch rename 0001-Always-build-shared-libs-for-LLD.patch => 0001-19-Always-build-shared-libs-for-LLD.patch (100%) delete mode 100644 0001-19-Remove-myst_parser-dependency-for-RHEL.patch delete mode 100644 0001-20-Remove-myst_parser-dependency-for-RHEL.patch create mode 100644 0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch delete mode 100644 0001-Fix-python3-clang.patch create mode 100644 0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch create mode 100644 18-99273.patch diff --git a/0001-18-Always-build-shared-libs-for-LLD.patch b/0001-18-Always-build-shared-libs-for-LLD.patch new file mode 100644 index 0000000..1659800 --- /dev/null +++ b/0001-18-Always-build-shared-libs-for-LLD.patch @@ -0,0 +1,29 @@ +From b1c60d7fa322a2d208556087df9e7ef94bfbffb8 Mon Sep 17 00:00:00 2001 +From: Nikita Popov +Date: Wed, 8 May 2024 12:30:36 +0900 +Subject: [PATCH] Always build shared libs for LLD + +We don't want to enable BUILD_SHARED_LIBS for the whole build, +but we do want to build lld libraries. +--- + lld/cmake/modules/AddLLD.cmake | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake +index 2ee066b41535..270c03f096ac 100644 +--- a/lld/cmake/modules/AddLLD.cmake ++++ b/lld/cmake/modules/AddLLD.cmake +@@ -7,9 +7,8 @@ macro(add_lld_library name) + "" + "" + ${ARGN}) +- if(ARG_SHARED) +- set(ARG_ENABLE_SHARED SHARED) +- endif() ++ # Always build shared libs for LLD. ++ set(ARG_ENABLE_SHARED SHARED) + llvm_add_library(${name} ${ARG_ENABLE_SHARED} ${ARG_UNPARSED_ARGUMENTS}) + set_target_properties(${name} PROPERTIES FOLDER "lld libraries") + +-- +2.44.0 \ No newline at end of file diff --git a/0001-Always-build-shared-libs-for-LLD.patch b/0001-19-Always-build-shared-libs-for-LLD.patch similarity index 100% rename from 0001-Always-build-shared-libs-for-LLD.patch rename to 0001-19-Always-build-shared-libs-for-LLD.patch diff --git a/0001-19-Remove-myst_parser-dependency-for-RHEL.patch b/0001-19-Remove-myst_parser-dependency-for-RHEL.patch deleted file mode 100644 index d1df528..0000000 --- a/0001-19-Remove-myst_parser-dependency-for-RHEL.patch +++ /dev/null @@ -1,41 +0,0 @@ -From d8742e9b361e5fd6fee2298b8ea0aeb4671ec05a Mon Sep 17 00:00:00 2001 -From: Nikita Popov -Date: Wed, 22 May 2024 09:39:26 +0200 -Subject: [PATCH] Remove myst_parser dependency for RHEL - ---- - clang/docs/conf.py | 3 +-- - llvm/docs/conf.py | 3 +-- - 2 files changed, 2 insertions(+), 4 deletions(-) - -diff --git a/clang/docs/conf.py b/clang/docs/conf.py -index 4cee382a718f..d2e2198e05d4 100644 ---- a/clang/docs/conf.py -+++ b/clang/docs/conf.py -@@ -43,8 +43,7 @@ try: - - extensions.append("myst_parser") - except ImportError: -- if not tags.has("builder-man"): -- raise -+ pass - - - # The encoding of source files. -diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py -index 7f2ed5309606..354a41f11280 100644 ---- a/llvm/docs/conf.py -+++ b/llvm/docs/conf.py -@@ -36,8 +36,7 @@ try: - - extensions.append("myst_parser") - except ImportError: -- if not tags.has("builder-man"): -- raise -+ pass - - # Automatic anchors for markdown titles - from llvm_slug import make_slug --- -2.44.0 - diff --git a/0001-20-Remove-myst_parser-dependency-for-RHEL.patch b/0001-20-Remove-myst_parser-dependency-for-RHEL.patch deleted file mode 100644 index 0697d1e..0000000 --- a/0001-20-Remove-myst_parser-dependency-for-RHEL.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 50cd36c2156d375a6d50f661908b460fbbd22e78 Mon Sep 17 00:00:00 2001 -From: Nikita Popov -Date: Wed, 22 May 2024 09:39:26 +0200 -Subject: [PATCH] Remove myst_parser dependency for RHEL - ---- - clang/docs/conf.py | 3 +-- - llvm/docs/conf.py | 5 +---- - 2 files changed, 2 insertions(+), 6 deletions(-) - -diff --git a/clang/docs/conf.py b/clang/docs/conf.py -index 4cee382a718f..d2e2198e05d4 100644 ---- a/clang/docs/conf.py -+++ b/clang/docs/conf.py -@@ -43,8 +43,7 @@ try: - - extensions.append("myst_parser") - except ImportError: -- if not tags.has("builder-man"): -- raise -+ pass - - - # The encoding of source files. -diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py -index d9fa6961032b..e38c009a457d 100644 ---- a/llvm/docs/conf.py -+++ b/llvm/docs/conf.py -@@ -36,10 +36,7 @@ try: - - extensions.append("myst_parser") - except ImportError: -- if not tags.has("builder-man"): -- raise --else: -- myst_enable_extensions = ["substitution"] -+ pass - - # Automatic anchors for markdown titles - myst_heading_anchors = 6 --- -2.46.0 - diff --git a/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch b/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch new file mode 100644 index 0000000..92ab81c --- /dev/null +++ b/0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch @@ -0,0 +1,41 @@ +From 73d3b4047d757ef35850e2cef38285b96be82f0f Mon Sep 17 00:00:00 2001 +From: Nikita Popov +Date: Tue, 23 May 2023 12:17:29 +0200 +Subject: [PATCH] [Driver] Give devtoolset path precedence over InstalledDir + +This is a followup to the change from c5fe10f365247c3dd9416b7ec8bad73a60b5946e. +While that commit correctly adds the bindir from devtoolset to the +path, the driver dir / install dir still comes first. This means +we'll still end up picking /usr/bin/ld rather than the one from +devtoolset. + +Unfortunately, I don't see any way to test this. In the environment +the tests are run, this would only result in a behavior difference +if there is an ld binary present in the LLVM build directory, which +isn't the case. + +Differential Revision: https://reviews.llvm.org/D151203 +--- + clang/lib/Driver/ToolChains/Linux.cpp | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp +index 853ff99d9fe5..aecabb46d4b9 100644 +--- a/clang/lib/Driver/ToolChains/Linux.cpp ++++ b/clang/lib/Driver/ToolChains/Linux.cpp +@@ -244,9 +244,9 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) + // With devtoolset on RHEL, we want to add a bin directory that is relative + // to the detected gcc install, because if we are using devtoolset gcc then + // we want to use other tools from devtoolset (e.g. ld) instead of the +- // standard system tools. +- PPaths.push_back(Twine(GCCInstallation.getParentLibPath() + +- "/../bin").str()); ++ // standard system tools. This should take precedence over InstalledDir. ++ PPaths.insert(PPaths.begin(), ++ Twine(GCCInstallation.getParentLibPath() + "/../bin").str()); + + if (Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb) + ExtraOpts.push_back("-X"); +-- +2.40.1 + diff --git a/0001-Fix-python3-clang.patch b/0001-Fix-python3-clang.patch deleted file mode 100644 index 1053a0b..0000000 --- a/0001-Fix-python3-clang.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 1c8a88c870a00eea6c80109cc682e0276ff7888d Mon Sep 17 00:00:00 2001 -From: Nikita Popov -Date: Mon, 8 Jul 2024 12:32:57 +0200 -Subject: [PATCH] Fix python3-clang - -Drop confusing `cd ..` so we stay in the root of the LLVM sources -and the install command succeeds. ---- - install.spec.inc | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - -diff --git a/install.spec.inc b/install.spec.inc -index 0fc1424..5f4a43b 100644 ---- a/install.spec.inc -+++ b/install.spec.inc -@@ -98,8 +98,6 @@ touch %{buildroot}%{_bindir}/llvm-config%{exec_suffix} - mkdir -p %{buildroot}%{pkg_datadir}/llvm/cmake - cp -Rv cmake/* %{buildroot}%{pkg_datadir}/llvm/cmake - --cd .. -- - #endregion - - #region CLANG installation -@@ -134,7 +132,7 @@ mkdir -p %{buildroot}%{python3_sitelib}/clang/ - # install: omitting directory 'bindings/python/clang/__pycache__' - # NOTE: this only happens if we include the gdb plugin of libomp. - # Remove the plugin with command and we're good: rm -rf %{buildroot}/%{_datarootdir}/gdb --install -p -m644 clang/bindings/python/clang/* %{buildroot}%{python3_sitelib}/clang/ || true -+install -p -m644 clang/bindings/python/clang/* %{buildroot}%{python3_sitelib}/clang/ - %py_byte_compile %{__python3} %{buildroot}%{python3_sitelib}/clang - - # install scanbuild-py to python sitelib. -@@ -268,7 +266,7 @@ done - # https://docs.fedoraproject.org/en-US/packaging-guidelines/Alternatives/ - touch %{buildroot}%{_bindir}/ld - --install -D -m 644 -t %{buildroot}%{_mandir}/man1/ %{src_tarball_dir}/lld/docs/ld.lld.1 -+install -D -m 644 -t %{buildroot}%{_mandir}/man1/ lld/docs/ld.lld.1 - - %post -n %{pkg_name_lld} - %{_sbindir}/update-alternatives --install %{_bindir}/ld ld %{_bindir}/ld.lld 1 --- -2.45.2 - diff --git a/0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch b/0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch new file mode 100644 index 0000000..7f0a7cf --- /dev/null +++ b/0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch @@ -0,0 +1,86 @@ +From ccc2b792e57d632bc887b226a4e7f0a8189eab8b Mon Sep 17 00:00:00 2001 +From: Josh Stone +Date: Mon, 4 Nov 2024 16:37:49 -0800 +Subject: [PATCH] [profile] Use base+vaddr for `__llvm_write_binary_ids` note + pointers + +This function is always examining its own ELF headers in memory, but it +was trying to use conditions between examining files or memory, and it +wasn't accounting for LOAD offsets at runtime. This is especially bad if +a loaded segment has additional padding that's not in the file offsets. + +Now we do a first scan of the program headers to figure out the runtime +base address based on `PT_PHDR` and/or `PT_DYNAMIC` (else assume zero), +similar to libc's `do_start`. Then each `PT_NOTE` pointer is simply the +base plus the segments's `pt_vaddr`, which includes LOAD offsets. + +Fixes #114605 +--- + .../lib/profile/InstrProfilingPlatformLinux.c | 40 ++++++++----------- + 1 file changed, 16 insertions(+), 24 deletions(-) + +diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +index e2c06d51e0c6..c365129a0768 100644 +--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c ++++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c +@@ -194,41 +194,33 @@ static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note, + */ + COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) { + extern const ElfW(Ehdr) __ehdr_start __attribute__((visibility("hidden"))); ++ extern ElfW(Dyn) _DYNAMIC[] __attribute__((weak, visibility("hidden"))); ++ + const ElfW(Ehdr) *ElfHeader = &__ehdr_start; + const ElfW(Phdr) *ProgramHeader = + (const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff); + ++ /* Compute the added base address in case of position-independent code. */ ++ uintptr_t Base = 0; ++ for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) { ++ if (ProgramHeader[I].p_type == PT_PHDR) ++ Base = (uintptr_t)ProgramHeader - ProgramHeader[I].p_vaddr; ++ if (ProgramHeader[I].p_type == PT_DYNAMIC && _DYNAMIC) ++ Base = (uintptr_t)_DYNAMIC - ProgramHeader[I].p_vaddr; ++ } ++ + int TotalBinaryIdsSize = 0; +- uint32_t I; + /* Iterate through entries in the program header. */ +- for (I = 0; I < ElfHeader->e_phnum; I++) { ++ for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) { + /* Look for the notes segment in program header entries. */ + if (ProgramHeader[I].p_type != PT_NOTE) + continue; + + /* There can be multiple notes segment, and examine each of them. */ +- const ElfW(Nhdr) * Note; +- const ElfW(Nhdr) * NotesEnd; +- /* +- * When examining notes in file, use p_offset, which is the offset within +- * the elf file, to find the start of notes. +- */ +- if (ProgramHeader[I].p_memsz == 0 || +- ProgramHeader[I].p_memsz == ProgramHeader[I].p_filesz) { +- Note = (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + +- ProgramHeader[I].p_offset); +- NotesEnd = (const ElfW(Nhdr) *)((const char *)(Note) + +- ProgramHeader[I].p_filesz); +- } else { +- /* +- * When examining notes in memory, use p_vaddr, which is the address of +- * section after loaded to memory, to find the start of notes. +- */ +- Note = +- (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_vaddr); +- NotesEnd = +- (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz); +- } ++ const ElfW(Nhdr) *Note = ++ (const ElfW(Nhdr) *)(Base + ProgramHeader[I].p_vaddr); ++ const ElfW(Nhdr) *NotesEnd = ++ (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz); + + int BinaryIdsSize = WriteBinaryIds(Writer, Note, NotesEnd); + if (TotalBinaryIdsSize == -1) +-- +2.47.0 + diff --git a/18-99273.patch b/18-99273.patch new file mode 100644 index 0000000..bacb46b --- /dev/null +++ b/18-99273.patch @@ -0,0 +1,893 @@ +From 91052169960477fbc39169c10f9fae3bec732510 Mon Sep 17 00:00:00 2001 +From: Carl Ritson +Date: Wed, 17 Jul 2024 15:07:42 +0900 +Subject: [PATCH 1/3] [AMDGPU] Implement workaround for GFX11.5 export priority + +On GFX11.5 shaders having completed exports need to execute/wait +at a lower priority than shaders still executing exports. +Add code to maintain normal priority of 2 for shaders that export +and drop to priority 0 after exports. +--- + llvm/lib/Target/AMDGPU/AMDGPU.td | 15 +- + .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 112 ++++++ + llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 + + llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 + + .../AMDGPU/required-export-priority.ll | 344 ++++++++++++++++++ + .../AMDGPU/required-export-priority.mir | 293 +++++++++++++++ + 6 files changed, 765 insertions(+), 3 deletions(-) + create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.ll + create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.mir + +diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td +index dfc8eaea66f7b..14fcf6a210a78 100644 +--- a/llvm/lib/Target/AMDGPU/AMDGPU.td ++++ b/llvm/lib/Target/AMDGPU/AMDGPU.td +@@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset", + "Has restricted SOffset (immediate not supported)." + >; + ++def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority", ++ "HasRequiredExportPriority", ++ "true", ++ "Export priority must be explicitly manipulated on GFX11.5" ++>; ++ + //===------------------------------------------------------------===// + // Subtarget Features (options and debugging) + //===------------------------------------------------------------===// +@@ -1597,14 +1603,16 @@ def FeatureISAVersion11_5_0 : FeatureSet< + !listconcat(FeatureISAVersion11_Common.Features, + [FeatureSALUFloatInsts, + FeatureDPPSrc1SGPR, +- FeatureVGPRSingleUseHintInsts])>; ++ FeatureVGPRSingleUseHintInsts, ++ FeatureRequiredExportPriority])>; + + def FeatureISAVersion11_5_1 : FeatureSet< + !listconcat(FeatureISAVersion11_Common.Features, + [FeatureSALUFloatInsts, + FeatureDPPSrc1SGPR, + FeatureVGPRSingleUseHintInsts, +- FeatureGFX11FullVGPRs])>; ++ FeatureGFX11FullVGPRs, ++ FeatureRequiredExportPriority])>; + + def FeatureISAVersion12 : FeatureSet< + [FeatureGFX12, +diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +index a402fc6d7e611..a8b171aa82840 100644 +--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp ++++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +@@ -14,6 +14,7 @@ + #include "GCNSubtarget.h" + #include "MCTargetDesc/AMDGPUMCTargetDesc.h" + #include "SIMachineFunctionInfo.h" ++#include "llvm/CodeGen/MachineFrameInfo.h" + #include "llvm/CodeGen/MachineFunction.h" + #include "llvm/CodeGen/ScheduleDAG.h" + #include "llvm/TargetParser/TargetParser.h" +@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { + fixWMMAHazards(MI); + fixShift64HighRegBug(MI); + fixVALUMaskWriteHazard(MI); ++ fixRequiredExportPriority(MI); + } + + bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { +@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) { + + return true; + } ++ ++static bool ensureEntrySetPrio(MachineFunction *MF, int Priority, ++ const SIInstrInfo &TII) { ++ MachineBasicBlock &EntryMBB = MF->front(); ++ if (EntryMBB.begin() != EntryMBB.end()) { ++ auto &EntryMI = *EntryMBB.begin(); ++ if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO && ++ EntryMI.getOperand(0).getImm() >= Priority) ++ return false; ++ } ++ ++ BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO)) ++ .addImm(Priority); ++ return true; ++} ++ ++bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) { ++ if (!ST.hasRequiredExportPriority()) ++ return false; ++ ++ // Assume the following shader types will never have exports, ++ // and avoid adding or adjusting S_SETPRIO. ++ MachineBasicBlock *MBB = MI->getParent(); ++ MachineFunction *MF = MBB->getParent(); ++ auto CC = MF->getFunction().getCallingConv(); ++ switch (CC) { ++ case CallingConv::AMDGPU_CS: ++ case CallingConv::AMDGPU_CS_Chain: ++ case CallingConv::AMDGPU_CS_ChainPreserve: ++ case CallingConv::AMDGPU_KERNEL: ++ return false; ++ default: ++ break; ++ } ++ ++ const int MaxPriority = 3; ++ const int NormalPriority = 2; ++ const int PostExportPriority = 0; ++ ++ auto It = MI->getIterator(); ++ switch (MI->getOpcode()) { ++ case AMDGPU::S_ENDPGM: ++ case AMDGPU::S_ENDPGM_SAVED: ++ case AMDGPU::S_ENDPGM_ORDERED_PS_DONE: ++ case AMDGPU::SI_RETURN_TO_EPILOG: ++ // Ensure shader with calls raises priority at entry. ++ // This ensures correct priority if exports exist in callee. ++ if (MF->getFrameInfo().hasCalls()) ++ return ensureEntrySetPrio(MF, NormalPriority, TII); ++ return false; ++ case AMDGPU::S_SETPRIO: { ++ // Raise minimum priority unless in workaround. ++ auto &PrioOp = MI->getOperand(0); ++ int Prio = PrioOp.getImm(); ++ bool InWA = (Prio == PostExportPriority) && ++ (It != MBB->begin() && TII.isEXP(*std::prev(It))); ++ if (InWA || Prio >= NormalPriority) ++ return false; ++ PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority)); ++ return true; ++ } ++ default: ++ if (!TII.isEXP(*MI)) ++ return false; ++ break; ++ } ++ ++ // Check entry priority at each export (as there will only be a few). ++ // Note: amdgpu_gfx can only be a callee, so defer to caller setprio. ++ bool Changed = false; ++ if (CC != CallingConv::AMDGPU_Gfx) ++ Changed = ensureEntrySetPrio(MF, NormalPriority, TII); ++ ++ auto NextMI = std::next(It); ++ bool EndOfShader = false; ++ if (NextMI != MBB->end()) { ++ // Only need WA at end of sequence of exports. ++ if (TII.isEXP(*NextMI)) ++ return Changed; ++ // Assume appropriate S_SETPRIO after export means WA already applied. ++ if (NextMI->getOpcode() == AMDGPU::S_SETPRIO && ++ NextMI->getOperand(0).getImm() == PostExportPriority) ++ return Changed; ++ EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM; ++ } ++ ++ const DebugLoc &DL = MI->getDebugLoc(); ++ ++ // Lower priority. ++ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO)) ++ .addImm(PostExportPriority); ++ ++ if (!EndOfShader) { ++ // Wait for exports to complete. ++ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT)) ++ .addReg(AMDGPU::SGPR_NULL) ++ .addImm(0); ++ } ++ ++ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0); ++ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0); ++ ++ if (!EndOfShader) { ++ // Return to normal (higher) priority. ++ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO)) ++ .addImm(NormalPriority); ++ } ++ ++ return true; ++} +diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +index 3ccca527c626b..f2a64ab48e180 100644 +--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h ++++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { + bool fixWMMAHazards(MachineInstr *MI); + bool fixShift64HighRegBug(MachineInstr *MI); + bool fixVALUMaskWriteHazard(MachineInstr *MI); ++ bool fixRequiredExportPriority(MachineInstr *MI); + + int checkMAIHazards(MachineInstr *MI); + int checkMAIHazards908(MachineInstr *MI); +diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h +index e5817594a4521..def89c785b855 100644 +--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h ++++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h +@@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, + bool HasVOPDInsts = false; + bool HasVALUTransUseHazard = false; + bool HasForceStoreSC0SC1 = false; ++ bool HasRequiredExportPriority = false; + + // Dummy feature to use for assembler in tablegen. + bool FeatureDisable = false; +@@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, + + bool hasRestrictedSOffset() const { return HasRestrictedSOffset; } + ++ bool hasRequiredExportPriority() const { return HasRequiredExportPriority; } ++ + /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt + /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively. + bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; } +diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll +new file mode 100644 +index 0000000000000..377902f3f0d1a +--- /dev/null ++++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll +@@ -0,0 +1,344 @@ ++; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ++; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ++ ++define amdgpu_ps void @test_export_zeroes_f32() #0 { ++; GCN-LABEL: test_export_zeroes_f32: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: v_mov_b32_e32 v0, 0 ++; GCN-NEXT: exp mrt0 off, off, off, off ++; GCN-NEXT: exp mrt0 off, off, off, off done ++; GCN-NEXT: s_setprio 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_endpgm ++ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false) ++ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false) ++ ret void ++} ++ ++define amdgpu_ps void @test_export_en_src0_f32() #0 { ++; GCN-LABEL: test_export_en_src0_f32: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: v_mov_b32_e32 v0, 4.0 ++; GCN-NEXT: v_mov_b32_e32 v1, 0.5 ++; GCN-NEXT: v_mov_b32_e32 v2, 2.0 ++; GCN-NEXT: v_mov_b32_e32 v3, 1.0 ++; GCN-NEXT: exp mrt0 v3, off, off, off done ++; GCN-NEXT: s_setprio 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_endpgm ++ call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) ++ ret void ++} ++ ++define amdgpu_gs void @test_export_gs() #0 { ++; GCN-LABEL: test_export_gs: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: v_mov_b32_e32 v0, 4.0 ++; GCN-NEXT: v_mov_b32_e32 v1, 0.5 ++; GCN-NEXT: v_mov_b32_e32 v2, 2.0 ++; GCN-NEXT: v_mov_b32_e32 v3, 1.0 ++; GCN-NEXT: exp mrt0 off, v2, off, off done ++; GCN-NEXT: s_setprio 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_endpgm ++ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) ++ ret void ++} ++ ++define amdgpu_hs void @test_export_hs() #0 { ++; GCN-LABEL: test_export_hs: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: v_mov_b32_e32 v0, 4.0 ++; GCN-NEXT: v_mov_b32_e32 v1, 0.5 ++; GCN-NEXT: v_mov_b32_e32 v2, 2.0 ++; GCN-NEXT: v_mov_b32_e32 v3, 1.0 ++; GCN-NEXT: exp mrt0 off, v2, off, off done ++; GCN-NEXT: s_setprio 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_endpgm ++ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) ++ ret void ++} ++ ++define amdgpu_gfx void @test_export_gfx(float %v) #0 { ++; GCN-LABEL: test_export_gfx: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ++; GCN-NEXT: v_mov_b32_e32 v1, 4.0 ++; GCN-NEXT: v_mov_b32_e32 v2, 0.5 ++; GCN-NEXT: v_mov_b32_e32 v3, 2.0 ++; GCN-NEXT: exp mrt0 off, v3, off, off done ++; GCN-NEXT: s_setprio 0 ++; GCN-NEXT: s_waitcnt_expcnt null, 0x0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: s_waitcnt expcnt(0) ++; GCN-NEXT: s_setpc_b64 s[30:31] ++ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false) ++ ret void ++} ++ ++define amdgpu_cs void @test_export_cs() #0 { ++; GCN-LABEL: test_export_cs: ++; GCN: ; %bb.0: ++; GCN-NEXT: v_mov_b32_e32 v0, 4.0 ++; GCN-NEXT: v_mov_b32_e32 v1, 0.5 ++; GCN-NEXT: v_mov_b32_e32 v2, 2.0 ++; GCN-NEXT: v_mov_b32_e32 v3, 1.0 ++; GCN-NEXT: exp mrt0 off, v2, off, off done ++; GCN-NEXT: s_endpgm ++ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) ++ ret void ++} ++ ++define amdgpu_kernel void @test_export_kernel() #0 { ++; GCN-LABEL: test_export_kernel: ++; GCN: ; %bb.0: ++; GCN-NEXT: v_mov_b32_e32 v0, 4.0 ++; GCN-NEXT: v_mov_b32_e32 v1, 0.5 ++; GCN-NEXT: v_mov_b32_e32 v2, 2.0 ++; GCN-NEXT: v_mov_b32_e32 v3, 1.0 ++; GCN-NEXT: exp mrt0 off, v2, off, off done ++; GCN-NEXT: s_endpgm ++ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) ++ ret void ++} ++ ++define amdgpu_gfx void @test_no_export_gfx(float %v) #0 { ++; GCN-LABEL: test_no_export_gfx: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ++; GCN-NEXT: s_setpc_b64 s[30:31] ++ ret void ++} ++ ++define amdgpu_ps void @test_no_export_ps(float %v) #0 { ++; GCN-LABEL: test_no_export_ps: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_endpgm ++ ret void ++} ++ ++define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { ++; GCN-LABEL: test_if_export_f32: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: s_mov_b32 s0, exec_lo ++; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0 ++; GCN-NEXT: s_cbranch_execz .LBB9_2 ++; GCN-NEXT: ; %bb.1: ; %exp ++; GCN-NEXT: exp mrt0 v1, v2, v3, v4 ++; GCN-NEXT: s_setprio 0 ++; GCN-NEXT: s_waitcnt_expcnt null, 0x0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: .LBB9_2: ; %end ++; GCN-NEXT: s_endpgm ++ %cc = icmp eq i32 %flag, 0 ++ br i1 %cc, label %end, label %exp ++ ++exp: ++ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false) ++ br label %end ++ ++end: ++ ret void ++} ++ ++define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { ++; GCN-LABEL: test_if_export_vm_f32: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: s_mov_b32 s0, exec_lo ++; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0 ++; GCN-NEXT: s_cbranch_execz .LBB10_2 ++; GCN-NEXT: ; %bb.1: ; %exp ++; GCN-NEXT: exp mrt0 v1, v2, v3, v4 ++; GCN-NEXT: s_setprio 0 ++; GCN-NEXT: s_waitcnt_expcnt null, 0x0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: .LBB10_2: ; %end ++; GCN-NEXT: s_endpgm ++ %cc = icmp eq i32 %flag, 0 ++ br i1 %cc, label %end, label %exp ++ ++exp: ++ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true) ++ br label %end ++ ++end: ++ ret void ++} ++ ++define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { ++; GCN-LABEL: test_if_export_done_f32: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: s_mov_b32 s0, exec_lo ++; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0 ++; GCN-NEXT: s_cbranch_execz .LBB11_2 ++; GCN-NEXT: ; %bb.1: ; %exp ++; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done ++; GCN-NEXT: s_setprio 0 ++; GCN-NEXT: s_waitcnt_expcnt null, 0x0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: .LBB11_2: ; %end ++; GCN-NEXT: s_endpgm ++ %cc = icmp eq i32 %flag, 0 ++ br i1 %cc, label %end, label %exp ++ ++exp: ++ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false) ++ br label %end ++ ++end: ++ ret void ++} ++ ++define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { ++; GCN-LABEL: test_if_export_vm_done_f32: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: s_mov_b32 s0, exec_lo ++; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0 ++; GCN-NEXT: s_cbranch_execz .LBB12_2 ++; GCN-NEXT: ; %bb.1: ; %exp ++; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done ++; GCN-NEXT: s_setprio 0 ++; GCN-NEXT: s_waitcnt_expcnt null, 0x0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: .LBB12_2: ; %end ++; GCN-NEXT: s_endpgm ++ %cc = icmp eq i32 %flag, 0 ++ br i1 %cc, label %end, label %exp ++ ++exp: ++ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true) ++ br label %end ++ ++end: ++ ret void ++} ++ ++define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 { ++; GCN-LABEL: test_export_pos_before_param_across_load: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen ++; GCN-NEXT: v_mov_b32_e32 v1, 0 ++; GCN-NEXT: v_mov_b32_e32 v2, 1.0 ++; GCN-NEXT: v_mov_b32_e32 v3, 0.5 ++; GCN-NEXT: s_waitcnt vmcnt(0) ++; GCN-NEXT: exp pos0 v1, v1, v1, v0 done ++; GCN-NEXT: exp invalid_target_32 v2, v2, v2, v2 ++; GCN-NEXT: exp invalid_target_33 v2, v2, v2, v3 ++; GCN-NEXT: s_setprio 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_endpgm ++ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false) ++ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false) ++ %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0) ++ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false) ++ ret void ++} ++ ++define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 { ++; GCN-LABEL: test_export_across_store_load: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: v_mov_b32_e32 v2, 24 ++; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 ++; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) ++; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 8, vcc_lo ++; GCN-NEXT: v_mov_b32_e32 v2, 0 ++; GCN-NEXT: scratch_store_b32 v0, v1, off ++; GCN-NEXT: scratch_load_b32 v0, off, off ++; GCN-NEXT: v_mov_b32_e32 v1, 1.0 ++; GCN-NEXT: exp pos0 v2, v2, v2, v1 done ++; GCN-NEXT: s_setprio 0 ++; GCN-NEXT: s_waitcnt_expcnt null, 0x0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: s_waitcnt vmcnt(0) ++; GCN-NEXT: exp invalid_target_32 v0, v2, v1, v2 ++; GCN-NEXT: exp invalid_target_33 v0, v2, v1, v2 ++; GCN-NEXT: s_setprio 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_nop 0 ++; GCN-NEXT: s_endpgm ++ %data0 = alloca <4 x float>, align 8, addrspace(5) ++ %data1 = alloca <4 x float>, align 8, addrspace(5) ++ %cmp = icmp eq i32 %idx, 1 ++ %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1 ++ store float %v, ptr addrspace(5) %data, align 8 ++ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false) ++ %load0 = load float, ptr addrspace(5) %data0, align 8 ++ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false) ++ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false) ++ ret void ++} ++ ++define amdgpu_ps void @test_export_in_callee(float %v) #0 { ++; GCN-LABEL: test_export_in_callee: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: s_getpc_b64 s[0:1] ++; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4 ++; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12 ++; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0 ++; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ++; GCN-NEXT: s_mov_b32 s32, 0 ++; GCN-NEXT: s_waitcnt lgkmcnt(0) ++; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1] ++; GCN-NEXT: s_endpgm ++ %x = fadd float %v, 1.0 ++ call void @test_export_gfx(float %x) ++ ret void ++} ++ ++define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 { ++; GCN-LABEL: test_export_in_callee_prio: ++; GCN: ; %bb.0: ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: s_mov_b32 s32, 0 ++; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0 ++; GCN-NEXT: s_setprio 2 ++; GCN-NEXT: s_getpc_b64 s[0:1] ++; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4 ++; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12 ++; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ++; GCN-NEXT: s_waitcnt lgkmcnt(0) ++; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1] ++; GCN-NEXT: s_endpgm ++ %x = fadd float %v, 1.0 ++ call void @llvm.amdgcn.s.setprio(i16 0) ++ call void @test_export_gfx(float %x) ++ ret void ++} ++ ++declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 ++declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1 ++declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2 ++declare void @llvm.amdgcn.s.setprio(i16) ++ ++attributes #0 = { nounwind } ++attributes #1 = { nounwind inaccessiblememonly } ++attributes #2 = { nounwind readnone } +diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.mir b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir +new file mode 100644 +index 0000000000000..eee04468036e5 +--- /dev/null ++++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir +@@ -0,0 +1,293 @@ ++# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 ++# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=post-RA-hazard-rec -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX1150 %s ++ ++--- | ++ define amdgpu_ps void @end_of_shader() { ++ ret void ++ } ++ define amdgpu_ps void @end_of_shader_return_to_epilogue() { ++ ret void ++ } ++ define amdgpu_ps void @end_of_block() { ++ ret void ++ } ++ define amdgpu_ps void @start_of_block() { ++ ret void ++ } ++ define amdgpu_ps void @block_of_exports() { ++ ret void ++ } ++ define amdgpu_ps void @sparse_exports() { ++ ret void ++ } ++ define amdgpu_ps void @existing_setprio_1() { ++ ret void ++ } ++ define amdgpu_ps void @existing_setprio_2() { ++ ret void ++ } ++... ++ ++--- ++name: end_of_shader ++tracksRegLiveness: true ++liveins: ++ - { reg: '$vgpr0' } ++body: | ++ bb.0: ++ liveins: $vgpr0 ++ ; GFX1150-LABEL: name: end_of_shader ++ ; GFX1150: liveins: $vgpr0 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: S_SETPRIO 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_ENDPGM 0 ++ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ S_ENDPGM 0 ++... ++ ++--- ++name: end_of_shader_return_to_epilogue ++tracksRegLiveness: true ++liveins: ++ - { reg: '$vgpr0' } ++body: | ++ bb.0: ++ liveins: $vgpr0 ++ ; GFX1150-LABEL: name: end_of_shader_return_to_epilogue ++ ; GFX1150: liveins: $vgpr0 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: S_SETPRIO 0 ++ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: SI_RETURN_TO_EPILOG $vgpr0 ++ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ SI_RETURN_TO_EPILOG $vgpr0 ++... ++ ++--- ++name: end_of_block ++tracksRegLiveness: true ++liveins: ++ - { reg: '$vgpr0' } ++body: | ++ ; GFX1150-LABEL: name: end_of_block ++ ; GFX1150: bb.0: ++ ; GFX1150-NEXT: successors: %bb.1(0x80000000) ++ ; GFX1150-NEXT: liveins: $vgpr0 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: S_SETPRIO 0 ++ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: bb.1: ++ ; GFX1150-NEXT: S_ENDPGM 0 ++ bb.0: ++ liveins: $vgpr0 ++ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ++ bb.1: ++ S_ENDPGM 0 ++... ++ ++--- ++name: start_of_block ++tracksRegLiveness: true ++liveins: ++ - { reg: '$vgpr0' } ++body: | ++ ; GFX1150-LABEL: name: start_of_block ++ ; GFX1150: bb.0: ++ ; GFX1150-NEXT: successors: %bb.1(0x80000000) ++ ; GFX1150-NEXT: liveins: $vgpr0 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: bb.1: ++ ; GFX1150-NEXT: successors: %bb.2(0x80000000) ++ ; GFX1150-NEXT: liveins: $vgpr0 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: S_SETPRIO 0 ++ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: bb.2: ++ ; GFX1150-NEXT: S_ENDPGM 0 ++ bb.0: ++ liveins: $vgpr0 ++ ++ bb.1: ++ liveins: $vgpr0 ++ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ++ bb.2: ++ S_ENDPGM 0 ++... ++ ++--- ++name: block_of_exports ++tracksRegLiveness: true ++liveins: ++ - { reg: '$vgpr0' } ++body: | ++ bb.0: ++ liveins: $vgpr0 ++ ; GFX1150-LABEL: name: block_of_exports ++ ; GFX1150: liveins: $vgpr0 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: S_SETPRIO 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_ENDPGM 0 ++ EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ S_ENDPGM 0 ++... ++ ++--- ++name: sparse_exports ++tracksRegLiveness: true ++liveins: ++ - { reg: '$vgpr0' } ++body: | ++ bb.0: ++ liveins: $vgpr0 ++ ; GFX1150-LABEL: name: sparse_exports ++ ; GFX1150: liveins: $vgpr0 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: S_SETPRIO 0 ++ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec ++ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: S_SETPRIO 0 ++ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec ++ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: S_SETPRIO 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_ENDPGM 0 ++ EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec ++ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec ++ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ S_ENDPGM 0 ++... ++ ++--- ++name: existing_setprio_1 ++tracksRegLiveness: true ++liveins: ++ - { reg: '$vgpr0' } ++body: | ++ ; GFX1150-LABEL: name: existing_setprio_1 ++ ; GFX1150: bb.0: ++ ; GFX1150-NEXT: successors: %bb.1(0x80000000) ++ ; GFX1150-NEXT: liveins: $vgpr0 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: bb.1: ++ ; GFX1150-NEXT: successors: %bb.2(0x80000000) ++ ; GFX1150-NEXT: liveins: $vgpr0 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: S_SETPRIO 3 ++ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: bb.2: ++ ; GFX1150-NEXT: successors: %bb.3(0x80000000) ++ ; GFX1150-NEXT: liveins: $vgpr0 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: S_SETPRIO 3 ++ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: bb.3: ++ ; GFX1150-NEXT: liveins: $vgpr0 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: S_SETPRIO 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_ENDPGM 0 ++ bb.0: ++ liveins: $vgpr0 ++ $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec ++ ++ bb.1: ++ liveins: $vgpr0 ++ S_SETPRIO 3 ++ $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec ++ S_SETPRIO 0 ++ ++ bb.2: ++ liveins: $vgpr0 ++ S_SETPRIO 1 ++ $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec ++ S_SETPRIO 0 ++ ++ bb.3: ++ liveins: $vgpr0 ++ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ S_ENDPGM 0 ++... ++ ++--- ++name: existing_setprio_2 ++tracksRegLiveness: true ++liveins: ++ - { reg: '$vgpr0' } ++body: | ++ bb.0: ++ liveins: $vgpr0 ++ ; GFX1150-LABEL: name: existing_setprio_2 ++ ; GFX1150: liveins: $vgpr0 ++ ; GFX1150-NEXT: {{ $}} ++ ; GFX1150-NEXT: S_SETPRIO 3 ++ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ ; GFX1150-NEXT: S_SETPRIO 0 ++ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_NOP 0 ++ ; GFX1150-NEXT: S_SETPRIO 2 ++ ; GFX1150-NEXT: S_SETPRIO 3 ++ ; GFX1150-NEXT: S_ENDPGM 0 ++ S_SETPRIO 3 ++ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec ++ S_SETPRIO 3 ++ S_ENDPGM 0 ++... + +From 8ea44e65f2c19facff751aeb2ac960f907fb210f Mon Sep 17 00:00:00 2001 +From: Carl Ritson +Date: Wed, 17 Jul 2024 16:18:02 +0900 +Subject: [PATCH 2/3] Remove -verify-machineinstrs from test. + +--- + llvm/test/CodeGen/AMDGPU/required-export-priority.ll | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll +index 377902f3f0d1a..ebc209bd4d451 100644 +--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll ++++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll +@@ -1,5 +1,5 @@ + ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +-; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ++; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s + + define amdgpu_ps void @test_export_zeroes_f32() #0 { + ; GCN-LABEL: test_export_zeroes_f32: diff --git a/llvm.spec b/llvm.spec index ebfafa2..6f25bdf 100644 --- a/llvm.spec +++ b/llvm.spec @@ -2,7 +2,7 @@ #region version %global maj_ver 19 %global min_ver 1 -%global patch_ver 1 +%global patch_ver 7 #global rc_ver 4 %bcond_with snapshot_build @@ -24,7 +24,7 @@ %bcond_with compat_build # Bundle compat libraries for a previous LLVM version, as part of llvm-libs and # clang-libs. Used on RHEL. -%bcond_without bundle_compat_lib +%bcond_with bundle_compat_lib %bcond_without check %if %{with bundle_compat_lib} @@ -56,6 +56,11 @@ # See https://docs.fedoraproject.org/en-US/packaging-guidelines/#_compiler_macros %global toolchain clang + +%if %{defined rhel} && 0%{?rhel} < 10 +%global gts_version 14 +%endif + # Opt out of https://fedoraproject.org/wiki/Changes/fno-omit-frame-pointer # https://bugzilla.redhat.com/show_bug.cgi?id=2158587 %undefine _include_frame_pointers @@ -177,7 +182,7 @@ #region main package Name: %{pkg_name_llvm} Version: %{maj_ver}.%{min_ver}.%{patch_ver}%{?rc_ver:~rc%{rc_ver}}%{?llvm_snapshot_version_suffix:~%{llvm_snapshot_version_suffix}} -Release: 5%{?dist} +Release: 1%{?dist} Summary: The Low Level Virtual Machine License: Apache-2.0 WITH LLVM-exception OR NCSA @@ -206,43 +211,79 @@ Source3001: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{com Source1000: version.spec.inc %endif +# We've established the habit of numbering patches the following way: +# +# 0-499: All patches that are unconditionally applied +# 500-1000: Patches applied under certain conditions (e.g. only on RHEL8) +# 1500-1599: Patches for LLVM 15 +# 1600-1699: Patches for LLVM 16 +# 1700-1799: Patches for LLVM 17 +# ... +# 2000-2099: Patches for LLVM 20 +# +# The idea behind this is that the last range of patch numbers (e.g. 2000-2099) allow +# us to "deprecate" a patch instead of deleting it right away. +# Suppose llvm upstream in git is at version 20 and there's a patch living +# in some PR that has not been merged yet. You can copy that patch and put it +# in a line like: +# +# Patch2011: upstream.patch +# +# As time goes by, llvm moves on to LLVM 21 and meanwhile the patch has landed. +# There's no need for you to remove the "Patch2011:" line. In fact, we encourage you +# to not remove it for some time. For compat libraries and compat packages we might +# still need this patch and so we're applying it automatically for you in those +# situations. Remember that a compat library is always at least one major version +# behind the latest packaged LLVM version. + #region OpenMP patches -%if %{maj_ver} < 20 -Patch1001: 0001-openmp-Add-option-to-disable-tsan-tests-111548.patch -Patch1002: 0001-openmp-Use-core_siblings_list-if-physical_package_id.patch -%endif +Patch1900: 0001-openmp-Add-option-to-disable-tsan-tests-111548.patch +Patch1901: 0001-openmp-Use-core_siblings_list-if-physical_package_id.patch #endregion OpenMP patches #region CLANG patches -Patch2001: 0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch -Patch2002: 0003-PATCH-clang-Don-t-install-static-libraries.patch +Patch101: 0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch +Patch102: 0003-PATCH-clang-Don-t-install-static-libraries.patch #endregion CLANG patches # Workaround a bug in ORC on ppc64le. # More info is available here: https://reviews.llvm.org/D159115#4641826 -Patch2005: 0001-Workaround-a-bug-in-ORC-on-ppc64le.patch +Patch103: 0001-Workaround-a-bug-in-ORC-on-ppc64le.patch + +# With the introduction of --gcc-include-dir in the clang config file, +# this might no longer be needed. +Patch104: 0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch #region LLD patches -Patch3002: 0001-Always-build-shared-libs-for-LLD.patch +Patch1800: 0001-18-Always-build-shared-libs-for-LLD.patch +Patch1902: 0001-19-Always-build-shared-libs-for-LLD.patch +Patch2000: 0001-19-Always-build-shared-libs-for-LLD.patch #endregion LLD patches #region RHEL patches -# All RHEL -%if %{maj_ver} >= 20 -Patch9001: 0001-20-Remove-myst_parser-dependency-for-RHEL.patch -%else -Patch9001: 0001-19-Remove-myst_parser-dependency-for-RHEL.patch -%endif - # RHEL 8 only -Patch9002: 0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch +Patch501: 0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch #endregion RHEL patches +# Backport with modifications from +# https://github.com/llvm/llvm-project/pull/99273 +# Fixes RHEL-49517. +Patch1801: 18-99273.patch + +# Fix profiling after a binutils NOTE change. +# https://github.com/llvm/llvm-project/pull/114907 +Patch1802: 0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch +Patch1903: 0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch + %if 0%{?rhel} == 8 %global python3_pkgversion 3.12 %global __python3 /usr/bin/python3.12 %endif +%if %{defined gts_version} +# Required for 64-bit atomics on i686. +BuildRequires: gcc-toolset-%{gts_version}-libatomic-devel +%endif BuildRequires: gcc BuildRequires: gcc-c++ BuildRequires: clang @@ -250,6 +291,7 @@ BuildRequires: cmake BuildRequires: chrpath BuildRequires: ninja-build BuildRequires: zlib-devel +BuildRequires: libzstd-devel BuildRequires: libffi-devel BuildRequires: ncurses-devel # This intentionally does not use python3_pkgversion. RHEL 8 does not have @@ -268,6 +310,9 @@ BuildRequires: python%{python3_pkgversion}-myst-parser BuildRequires: multilib-rpm-config %if %{with gold} BuildRequires: binutils-devel +%if %{undefined rhel} || 0%{?rhel} > 8 +BuildRequires: binutils-gold +%endif %endif %ifarch %{valgrind_arches} # Enable extra functionality when run the LLVM JIT under valgrind. @@ -326,7 +371,7 @@ BuildRequires: procps-ng # For reproducible pyc file generation # See https://docs.fedoraproject.org/en-US/packaging-guidelines/Python_Appendix/#_byte_compilation_reproducibility # Since Fedora 41 this happens automatically, and RHEL 8 does not support this. -%if (%{defined fedora} && 0%{?fedora} < 41) || 0%{?rhel} == 9 || 0%{?rhel} == 10 +%if %{without compat_build} && ((%{defined fedora} && 0%{?fedora} < 41) || 0%{?rhel} == 9 || 0%{?rhel} == 10) BuildRequires: /usr/bin/marshalparser %global py_reproducible_pyc_path %{buildroot}%{python3_sitelib} %endif @@ -482,6 +527,9 @@ libomp-devel to enable -fopenmp. %package -n %{pkg_name_clang}-libs Summary: Runtime library for clang Requires: %{pkg_name_clang}-resource-filesystem%{?_isa} = %{version}-%{release} +%if %{defined gts_version} +Requires: gcc-toolset-%{gts_version}-gcc-c++ +%endif Recommends: %{pkg_name_compiler_rt}%{?_isa} = %{version}-%{release} Requires: %{pkg_name_llvm}-libs = %{version}-%{release} # atomic support is not part of compiler-rt @@ -600,6 +648,7 @@ Summary: OpenMP runtime for clang URL: http://openmp.llvm.org +Requires: %{pkg_name_llvm}-libs%{?_isa} = %{version}-%{release} Requires: elfutils-libelf%{?_isa} Provides: libomp(major) = %{maj_ver} @@ -632,6 +681,7 @@ Requires(post): %{_sbindir}/update-alternatives Requires(preun): %{_sbindir}/update-alternatives Requires: %{pkg_name_lld}-libs = %{version}-%{release} +Provides: lld(major) = %{maj_ver} %description -n %{pkg_name_lld} The LLVM project linker. @@ -723,6 +773,14 @@ The package contains the LLDB Python module. %if %{with bundle_compat_lib} %{gpgverify} --keyring='%{SOURCE6}' --signature='%{SOURCE3001}' --data='%{SOURCE3000}' %setup -T -q -b 3000 -n llvm-project-%{compat_ver}.src + +# Apply all patches with number < 500 (unconditionally) +# See https://rpm-software-management.github.io/rpm/manual/autosetup.html +%autopatch -M499 -p1 + +# automatically apply patches based on LLVM version +%autopatch -m%{compat_maj_ver}00 -M%{compat_maj_ver}99 -p1 + %endif # -T : Do Not Perform Default Archive Unpacking (without this, the th source would be unpacked twice) @@ -732,16 +790,15 @@ The package contains the LLDB Python module. # see http://ftp.rpm.org/max-rpm/s1-rpm-inside-macros.html %autosetup -N -T -b 0 -n %{src_tarball_dir} -# Apply all patches with number <= 9000 +# Apply all patches with number < 500 (unconditionally) # See https://rpm-software-management.github.io/rpm/manual/autosetup.html -%autopatch -M9000 -p1 +%autopatch -M499 -p1 -%if %{defined rhel} -%patch -p1 -P9001 +# automatically apply patches based on LLVM version +%autopatch -m%{maj_ver}00 -M%{maj_ver}99 -p1 -%if %{rhel} == 8 -%patch -p1 -P9002 -%endif +%if %{defined rhel} && 0%{?rhel} == 8 +%patch -p1 -P501 %endif #region LLVM preparation @@ -777,10 +834,6 @@ The package contains the LLDB Python module. #endregion COMPILER-RT preparation -#region LLDB preparation -# Empty lldb/docs/CMakeLists.txt because we cannot build it -echo "" > lldb/docs/CMakeLists.txt -#endregion LLDB preparation #endregion prep #region build @@ -796,6 +849,7 @@ echo "" > lldb/docs/CMakeLists.txt %endif %if %reduce_debuginfo == 1 +# Decrease debuginfo verbosity to reduce memory consumption during final library linking %global optflags %(echo %{optflags} | sed 's/-g /-g1 /') %endif @@ -831,7 +885,18 @@ popd %endif #region cmake options -%global cmake_config_args "" + +# Common cmake arguments used by both the normal build and bundle_compat_lib. +# Any ABI-affecting flags should be in here. +%global cmake_common_args \\\ + -DLLVM_ENABLE_EH=ON \\\ + -DLLVM_ENABLE_RTTI=ON \\\ + -DLLVM_USE_PERF=ON \\\ + -DLLVM_TARGETS_TO_BUILD=%{targets_to_build} \\\ + -DBUILD_SHARED_LIBS=OFF \\\ + -DLLVM_BUILD_LLVM_DYLIB=ON + +%global cmake_config_args %{cmake_common_args} #region clang options %global cmake_config_args %{cmake_config_args} \\\ @@ -861,14 +926,20 @@ popd #endregion compiler-rt options #region docs options + +# Add all *enabled* documentation targets (no doxygen but sphinx) %global cmake_config_args %{cmake_config_args} \\\ - -DLLVM_BUILD_DOCS:BOOL=ON \\\ - -DLLVM_ENABLE_SPHINX:BOOL=ON \\\ - -DSPHINX_EXECUTABLE=%{_bindir}/sphinx-build-3 \\\ - -DSPHINX_WARNINGS_AS_ERRORS=OFF \\\ -DLLVM_ENABLE_DOXYGEN:BOOL=OFF \\\ - -DLLVM_INCLUDE_DOCS:BOOL=ON \\\ - -DLLVM_INSTALL_SPHINX_HTML_DIR=%{_pkgdocdir}/html + -DLLVM_ENABLE_SPHINX:BOOL=ON \\\ + -DLLVM_BUILD_DOCS:BOOL=ON + +# Configure sphinx: +# Build man-pages but no HTML docs using sphinx +%global cmake_config_args %{cmake_config_args} \\\ + -DSPHINX_EXECUTABLE=%{_bindir}/sphinx-build-3 \\\ + -DSPHINX_OUTPUT_HTML:BOOL=OFF \\\ + -DSPHINX_OUTPUT_MAN:BOOL=ON \\\ + -DSPHINX_WARNINGS_AS_ERRORS=OFF #endregion docs options #region lldb options @@ -892,21 +963,19 @@ popd -DLLVM_APPEND_VC_REV:BOOL=OFF \\\ -DLLVM_BUILD_EXAMPLES:BOOL=OFF \\\ -DLLVM_BUILD_EXTERNAL_COMPILER_RT:BOOL=ON \\\ - -DLLVM_BUILD_LLVM_DYLIB:BOOL=ON \\\ -DLLVM_BUILD_RUNTIME:BOOL=ON \\\ -DLLVM_BUILD_TOOLS:BOOL=ON \\\ -DLLVM_BUILD_UTILS:BOOL=ON \\\ -DLLVM_COMMON_CMAKE_UTILS=%{install_datadir}/llvm/cmake \\\ -DLLVM_DEFAULT_TARGET_TRIPLE=%{llvm_triple} \\\ -DLLVM_DYLIB_COMPONENTS="all" \\\ - -DLLVM_ENABLE_EH=ON \\\ -DLLVM_ENABLE_FFI:BOOL=ON \\\ -DLLVM_ENABLE_LIBCXX:BOOL=OFF \\\ -DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON \\\ -DLLVM_ENABLE_PROJECTS="%{projects}" \\\ - -DLLVM_ENABLE_RTTI:BOOL=ON \\\ -DLLVM_ENABLE_RUNTIMES="compiler-rt;openmp;offload" \\\ - -DLLVM_ENABLE_ZLIB:BOOL=ON \\\ + -DLLVM_ENABLE_ZLIB:BOOL=FORCE_ON \\\ + -DLLVM_ENABLE_ZSTD:BOOL=FORCE_ON \\\ -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=%{experimental_targets_to_build} \\\ -DLLVM_INCLUDE_BENCHMARKS=OFF \\\ -DLLVM_INCLUDE_EXAMPLES:BOOL=ON \\\ @@ -916,10 +985,8 @@ popd -DLLVM_INSTALL_UTILS:BOOL=ON \\\ -DLLVM_LINK_LLVM_DYLIB:BOOL=ON \\\ -DLLVM_PARALLEL_LINK_JOBS=1 \\\ - -DLLVM_TARGETS_TO_BUILD=%{targets_to_build} \\\ -DLLVM_TOOLS_INSTALL_DIR:PATH=bin \\\ -DLLVM_UNREACHABLE_OPTIMIZE:BOOL=OFF \\\ - -DLLVM_USE_PERF:BOOL=ON \\\ -DLLVM_UTILS_INSTALL_DIR:PATH=bin #endregion llvm options @@ -947,7 +1014,6 @@ popd #region misc options %global cmake_config_args %{cmake_config_args} \\\ - -DBUILD_SHARED_LIBS:BOOL=OFF \\\ -DCMAKE_BUILD_TYPE=RelWithDebInfo \\\ -DCMAKE_INSTALL_PREFIX=%{install_prefix} \\\ -DENABLE_LINKER_BUILD_ID:BOOL=ON \\\ @@ -1022,7 +1088,7 @@ fi %cmake_build -# If we don't build the runtimes target here, we'll have to wait for the %check +# If we don't build the runtimes target here, we'll have to wait for the %%check # section until these files are available but they need to be installed. # # /usr/lib64/libomptarget.devicertl.a @@ -1039,18 +1105,15 @@ cd .. -DCMAKE_INSTALL_PREFIX=%{buildroot}%{_libdir}/llvm%{compat_maj_ver}/ \ -DCMAKE_SKIP_RPATH=ON \ -DCMAKE_BUILD_TYPE=Release \ - -DBUILD_SHARED_LIBS=OFF \ - -DLLVM_BUILD_LLVM_DYLIB=ON \ - -DLLVM_ENABLE_EH=ON \ - -DLLVM_ENABLE_RTTI=ON \ - -DLLVM_ENABLE_PROJECTS=clang \ - -DLLVM_TARGETS_TO_BUILD=%{targets_to_build} \ + -DLLVM_ENABLE_PROJECTS="clang;lldb" \ -DLLVM_INCLUDE_BENCHMARKS=OFF \ - -DLLVM_INCLUDE_TESTS=OFF + -DLLVM_INCLUDE_TESTS=OFF \ + %{cmake_common_args} %ninja_build -C ../llvm-compat-libs LLVM %ninja_build -C ../llvm-compat-libs libclang.so %ninja_build -C ../llvm-compat-libs libclang-cpp.so +%ninja_build -C ../llvm-compat-libs liblldb.so %endif #endregion compat lib @@ -1124,6 +1187,35 @@ done mkdir -p %{buildroot}%{pkg_datadir}/llvm/cmake cp -Rv cmake/* %{buildroot}%{pkg_datadir}/llvm/cmake +# Install a placeholder to redirect users of the formerly shipped +# HTML documentation to the upstream HTML documentation. +mkdir -pv %{buildroot}%{_pkgdocdir}/html +cat < %{buildroot}%{_pkgdocdir}/html/index.html + + + + LLVM %{maj_ver}.%{min_ver} documentation + + +

+ LLVM %{maj_ver}.%{min_ver} Documentation +

+ + + +EOF + #endregion LLVM installation #region CLANG installation @@ -1180,7 +1272,7 @@ rm -Rf %{buildroot}%{install_libdir}/{libear,libscanbuild} rm -Rf %{buildroot}%{install_datadir}/clang/*.el # Add clang++-{version} symlink -ln -s ../../%{install_bindir}/clang++ %{buildroot}%{install_bindir}/clang++-%{maj_ver} +ln -s clang++ %{buildroot}%{install_bindir}/clang++-%{maj_ver} %endif @@ -1201,9 +1293,7 @@ chmod a+x %{buildroot}%{install_datadir}/scan-view/{Reporter.py,startfile.py} rm -vf %{buildroot}%{install_datadir}/clang/clang-format-bbedit.applescript rm -vf %{buildroot}%{install_datadir}/clang/clang-format-sublime.py* -# TODO: Package html docs -rm -Rvf %{buildroot}%{install_docdir}/LLVM/clang/html -rm -Rvf %{buildroot}%{install_docdir}/LLVM/clang-tools/html +# Remove unpackaged files rm -Rvf %{buildroot}%{install_datadir}/clang-doc/clang-doc-default-stylesheet.css rm -Rvf %{buildroot}%{install_datadir}/clang-doc/index.js @@ -1220,11 +1310,28 @@ echo "%%clang%{maj_ver}_resource_dir %%{_prefix}/lib/clang/%{maj_ver}" >> %{buil # Install config file for clang %if %{maj_ver} >=18 -mkdir -p %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/ -echo "--gcc-triple=%{_target_cpu}-redhat-linux" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang.cfg -echo "--gcc-triple=%{_target_cpu}-redhat-linux" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang++.cfg +%global cfg_file_content --gcc-triple=%{_target_cpu}-redhat-linux + +# We want to use DWARF-5 on all snapshot builds. +%if %{without snapshot_build} && %{defined rhel} && 0%{?rhel} < 10 +%global cfg_file_content %{cfg_file_content} -gdwarf-4 -g0 %endif +%if %{defined gts_version} +%global cfg_file_content %{cfg_file_content} --gcc-install-dir=/opt/rh/gcc-toolset-%{gts_version}/root/%{_exec_prefix}/lib/gcc/%{_target_cpu}-redhat-linux/%{gts_version} +%endif + +mkdir -p %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/ +echo " %{cfg_file_content}" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang.cfg +echo " %{cfg_file_content}" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang++.cfg +%ifarch x86_64 +# On x86_64, install an additional set of config files so -m32 works. +echo " %{cfg_file_content}" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/i386-redhat-linux-gnu-clang.cfg +echo " %{cfg_file_content}" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/i386-redhat-linux-gnu-clang++.cfg +%endif +%endif + + #endregion CLANG installation #region COMPILER-RT installation @@ -1275,9 +1382,6 @@ rm %{buildroot}%{install_bindir}/llvm-omp-kernel-replay #region LLD installation -# Remove LLD's HTML documentation files -rm -Rvf %{buildroot}%{install_docdir}/LLVM/lld/html - %if %{without compat_build} # Required when using update-alternatives: # https://docs.fedoraproject.org/en-US/packaging-guidelines/Alternatives/ @@ -1341,6 +1445,7 @@ touch %{buildroot}%{_bindir}/llvm-config%{exec_suffix} install -m 0755 ../llvm-compat-libs/lib/libLLVM.so.%{compat_maj_ver}* %{buildroot}%{_libdir} install -m 0755 ../llvm-compat-libs/lib/libclang.so.%{compat_maj_ver}* %{buildroot}%{_libdir} install -m 0755 ../llvm-compat-libs/lib/libclang-cpp.so.%{compat_maj_ver}* %{buildroot}%{_libdir} +install -m 0755 ../llvm-compat-libs/lib/liblldb.so.%{compat_maj_ver}* %{buildroot}%{_libdir} %endif #endregion install @@ -1371,11 +1476,11 @@ function reset_test_opts() # See https://llvm.org/docs/CommandGuide/lit.html#general-options export LIT_OPTS="-vv --time-tests" - + # Set to mark tests as expected to fail. # See https://llvm.org/docs/CommandGuide/lit.html#cmdoption-lit-xfail unset LIT_XFAIL - + # Set to mark tests to not even run. # See https://llvm.org/docs/CommandGuide/lit.html#cmdoption-lit-filter-out # Unfortunately LIT_FILTER_OUT is not accepting a list but a regular expression. @@ -1387,7 +1492,7 @@ function reset_test_opts() unset LIT_FILTER_OUT # Set for filtering out unit tests. - # See http://google.github.io/googletest/advanced.html#running-a-subset-of-the-tests + # See http://google.github.io/googletest/advanced.html#running-a-subset-of-the-tests unset GTEST_FILTER } @@ -1404,7 +1509,7 @@ function reset_test_opts() # Then $LIT_FILTER_OUT should evaluate to: (foo|bar) function test_list_to_regex() { - local -n arr=$1 + local -n arr=$1 # Prepare LIT_FILTER_OUT regex from index bash array # Join each element with a pipe symbol (regex for "or") arr=$(printf "|%s" "${arr[@]}") @@ -1427,7 +1532,7 @@ reset_test_opts reset_test_opts # Xfail testing of update utility tools export LIT_XFAIL="tools/UpdateTestChecks" -%cmake_build --target check-llvm +%cmake_build --target check-llvm #endregion Test LLVM #region Test CLANG @@ -1498,6 +1603,14 @@ test_list_filter_out+=("libomp :: worksharing/for/omp_collapse_one_int.c") %ifarch s390x test_list_filter_out+=("libomp :: flush/omp_flush.c") +test_list_filter_out+=("libomp :: worksharing/for/omp_for_schedule_guided.c") +%endif + +%ifarch aarch64 s390x +# The following test has been failling intermittently on aarch64 and s390x. +# Re-enable it after https://github.com/llvm/llvm-project/issues/117773 +# gets fixed. +test_list_filter_out+=("libarcher :: races/taskwait-depend.c") %endif # The following tests seem pass on ppc64le and x86_64 and aarch64 only: @@ -1601,7 +1714,14 @@ export LIT_XFAIL="$LIT_XFAIL;offloading/thread_state_2.c" export LIT_FILTER_OUT=$(test_list_to_regex test_list_filter_out) +%if 0%{?rhel} +# libomp tests are often very slow on s390x brew builders +%ifnarch s390x %cmake_build --target check-openmp +%endif +%else +%cmake_build --target check-openmp +%endif #endregion Test OPENMP %if %{with lldb} @@ -1612,12 +1732,12 @@ export LIT_FILTER_OUT=$(test_list_to_regex test_list_filter_out) ## reset_test_opts ## %%cmake_build --target check-lldb-unit ## #endregion LLDB unit tests -## +## ## #region LLDB SB API tests ## reset_test_opts ## %%cmake_build --target check-lldb-api ## #endregion LLDB SB API tests -## +## ## #region LLDB shell tests ## reset_test_opts ## %%cmake_build --target check-lldb-shell @@ -1674,7 +1794,7 @@ fi # alternative must be removed in order to give priority to a newly installed # compat package. if [[ $1 -eq 0 - || "x$(%{_bindir}/llvm-config-%{maj_ver} --version | awk -F . '{ print $1 }')" != "x%{maj_ver}" ]]; then + || "x$(%{_bindir}/llvm-config%{exec_suffix} --version | awk -F . '{ print $1 }')" != "x%{maj_ver}" ]]; then %{_sbindir}/update-alternatives --remove llvm-config-%{maj_ver} %{install_bindir}/llvm-config%{exec_suffix}-%{__isa_bits} fi %endif @@ -1962,14 +2082,14 @@ fi %files -n %{pkg_name_llvm}-libs %license llvm/LICENSE.TXT -%{install_libdir}/libLLVM-%{maj_ver}%{?llvm_snapshot_version_suffix:%{llvm_snapshot_version_suffix}}.so +%{install_libdir}/libLLVM-%{maj_ver}%{?llvm_snapshot_version_suffix}.so %if %{with gold} %{install_libdir}/LLVMgold.so %if %{without compat_build} %{_libdir}/bfd-plugins/LLVMgold.so %endif %endif -%{install_libdir}/libLLVM.so.%{maj_ver}.%{min_ver}%{?llvm_snapshot_version_suffix:%{llvm_snapshot_version_suffix}} +%{install_libdir}/libLLVM.so.%{maj_ver}.%{min_ver}%{?llvm_snapshot_version_suffix} %{install_libdir}/libLTO.so* %{install_libdir}/libRemarks.so* %if %{with compat_build} @@ -1999,7 +2119,7 @@ fi %files -n %{pkg_name_llvm}-doc %license llvm/LICENSE.TXT -%doc %{_pkgdocdir}/html +%doc %{_pkgdocdir}/html/index.html %files -n %{pkg_name_llvm}-static %license llvm/LICENSE.TXT @@ -2056,6 +2176,10 @@ fi %{install_bindir}/clang-cpp %{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang.cfg %{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang++.cfg +%ifarch x86_64 +%{_sysconfdir}/%{pkg_name_clang}/i386-redhat-linux-gnu-clang.cfg +%{_sysconfdir}/%{pkg_name_clang}/i386-redhat-linux-gnu-clang++.cfg +%endif %{_mandir}/man1/clang-%{maj_ver}.1.gz %{_mandir}/man1/clang++-%{maj_ver}.1.gz %if %{without compat_build} @@ -2152,6 +2276,9 @@ fi %{install_bindir}/clang-reorder-fields %{install_bindir}/clang-repl %{install_bindir}/clang-scan-deps +%if %{maj_ver} >= 20 +%{install_bindir}/clang-sycl-linker +%endif %{install_bindir}/clang-tidy %{install_bindir}/clangd %{install_bindir}/diagtool @@ -2188,6 +2315,9 @@ fi %{_bindir}/clang-reorder-fields-%{maj_ver} %{_bindir}/clang-repl-%{maj_ver} %{_bindir}/clang-scan-deps-%{maj_ver} +%if %{maj_ver} >= 20 +%{_bindir}/clang-sycl-linker-%{maj_ver} +%endif %{_bindir}/clang-tidy-%{maj_ver} %{_bindir}/clangd-%{maj_ver} %{_bindir}/diagtool-%{maj_ver} @@ -2366,6 +2496,11 @@ fi %{install_libdir}/liblldb*.so %{install_libdir}/liblldb.so.* %{install_libdir}/liblldbIntelFeatures.so.* +%{_mandir}/man1/lldb-server%{exec_suffix}.1.gz +%{_mandir}/man1/lldb%{exec_suffix}.1.gz +%if %{with bundle_compat_lib} +%{_libdir}/liblldb.so.%{compat_maj_ver}* +%endif %files -n %{pkg_name_lldb}-devel %{install_includedir}/lldb @@ -2378,6 +2513,12 @@ fi #region changelog %changelog +* Wed Jan 15 2025 Nikita Popov - 19.1.7-1 +- Update to LLVM 19.1.7 (RHEL-57456) +- Remove llvm18 compat package (RHEL-57457) +- Remove generated html content from llvm-doc subpackage (RHEL-58900) +- Enable LLVM_ENABLE_ZSTD=ON (RHEL-70325) + * Mon Oct 14 2024 Nikita Popov - 19.1.1-5 - Add missing requires diff --git a/sources b/sources index 6f85d0c..d5e33a6 100644 --- a/sources +++ b/sources @@ -1,4 +1,2 @@ -SHA512 (llvm-project-19.1.1.src.tar.xz) = 84adab40ffb9ec236dbf203d86c08a0c2c651f98278a9d0936490c7901159eb26eabd3db9316013886b549426d4acb43b75d866f7dc670ab299bf93ba35b1891 -SHA512 (llvm-project-19.1.1.src.tar.xz.sig) = 07bb7bffb2b035417d702ca47be9d5759250f1a2cd57606855027d458ceb972a293b45d3d93bcda195588986acbb5eace60524f4aecdc0da7aeb3a8414c37c31 -SHA512 (llvm-project-18.1.8.src.tar.xz) = 25eeee9984c8b4d0fbc240df90f33cbb000d3b0414baff5c8982beafcc5e59e7ef18f6f85d95b3a5f60cb3d4cd4f877c80487b5768bc21bc833f107698ad93db -SHA512 (llvm-project-18.1.8.src.tar.xz.sig) = ddfd1e8a06756759af6cbe488c82a6d6a62ba91f3e8a0eb4cece561321824f5d165b08ed91010588790b76e19790931d2651b24dba8567e3b151d3cb43bec25b +SHA512 (llvm-project-19.1.7.src.tar.xz) = c7d63286d662707a9cd54758c9e3aaf52794a91900c484c4a6efa62d90bc719d5e7a345e4192feeb0c9fd11c82570d64677c781e5be1d645556b6aa018e47ec8 +SHA512 (llvm-project-19.1.7.src.tar.xz.sig) = 195797b06ac80a742e0ccbc03a50dc06dd2e04377d783d5474e3e72c5a75203b60292b047929312a411d22b137a239943fba414a4d136a2be14cbff978eb6bda From e4396ca0dacd83e330beffbe1392b2fcd52ff32c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 17 Jan 2025 09:58:00 +0100 Subject: [PATCH 3/3] Fix CET support in libomp.so Resolves: RHEL-74346 --- ...-Support-CET-in-z_Linux_asm.S-123213.patch | 51 +++++++++++++++++++ llvm.spec | 6 ++- 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 0001-openmp-Support-CET-in-z_Linux_asm.S-123213.patch diff --git a/0001-openmp-Support-CET-in-z_Linux_asm.S-123213.patch b/0001-openmp-Support-CET-in-z_Linux_asm.S-123213.patch new file mode 100644 index 0000000..3fd4cd1 --- /dev/null +++ b/0001-openmp-Support-CET-in-z_Linux_asm.S-123213.patch @@ -0,0 +1,51 @@ +From 90a05f32166c4a45224a5eedbec9c5c7e21d2dbf Mon Sep 17 00:00:00 2001 +From: Nikita Popov +Date: Fri, 17 Jan 2025 09:26:49 +0100 +Subject: [PATCH] [openmp] Support CET in z_Linux_asm.S (#123213) + +When libomp is built with -cf-protection, add endbr instructions to the +start of functions for Intel CET support. +--- + openmp/runtime/src/z_Linux_asm.S | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S +index cc5344cdd124..0bf9f07a13f1 100644 +--- a/openmp/runtime/src/z_Linux_asm.S ++++ b/openmp/runtime/src/z_Linux_asm.S +@@ -19,6 +19,16 @@ + + #if KMP_ARCH_X86 || KMP_ARCH_X86_64 + ++# if defined(__ELF__) && defined(__CET__) && defined(__has_include) ++# if __has_include() ++# include ++# endif ++# endif ++ ++# if !defined(_CET_ENDBR) ++# define _CET_ENDBR ++# endif ++ + # if KMP_MIC + // the 'delay r16/r32/r64' should be used instead of the 'pause'. + // The delay operation has the effect of removing the current thread from +@@ -66,6 +76,7 @@ + ALIGN 4 + .globl KMP_PREFIX_UNDERSCORE($0) + KMP_PREFIX_UNDERSCORE($0): ++ _CET_ENDBR + .endmacro + # else // KMP_OS_DARWIN + # define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols +@@ -92,6 +103,7 @@ KMP_PREFIX_UNDERSCORE($0): + .globl KMP_PREFIX_UNDERSCORE(\proc) + KMP_PREFIX_UNDERSCORE(\proc): + .cfi_startproc ++ _CET_ENDBR + .endm + .macro KMP_CFI_DEF_OFFSET sz + .cfi_def_cfa_offset \sz +-- +2.47.1 + diff --git a/llvm.spec b/llvm.spec index 6f25bdf..a1e8aa4 100644 --- a/llvm.spec +++ b/llvm.spec @@ -182,7 +182,7 @@ #region main package Name: %{pkg_name_llvm} Version: %{maj_ver}.%{min_ver}.%{patch_ver}%{?rc_ver:~rc%{rc_ver}}%{?llvm_snapshot_version_suffix:~%{llvm_snapshot_version_suffix}} -Release: 1%{?dist} +Release: 2%{?dist} Summary: The Low Level Virtual Machine License: Apache-2.0 WITH LLVM-exception OR NCSA @@ -239,6 +239,7 @@ Source1000: version.spec.inc #region OpenMP patches Patch1900: 0001-openmp-Add-option-to-disable-tsan-tests-111548.patch Patch1901: 0001-openmp-Use-core_siblings_list-if-physical_package_id.patch +Patch1910: 0001-openmp-Support-CET-in-z_Linux_asm.S-123213.patch #endregion OpenMP patches #region CLANG patches @@ -2513,6 +2514,9 @@ fi #region changelog %changelog +* Fri Jan 17 2025 Nikita Popov - 19.1.7-2 +- Support CET in libomp.so (RHEL-74346) + * Wed Jan 15 2025 Nikita Popov - 19.1.7-1 - Update to LLVM 19.1.7 (RHEL-57456) - Remove llvm18 compat package (RHEL-57457)