Merge branch 'c10s' into a10s

This commit is contained in:
eabdullin 2025-01-24 13:43:14 +03:00
commit 26b8067a9b
12 changed files with 1324 additions and 211 deletions

View File

@ -0,0 +1,29 @@
From b1c60d7fa322a2d208556087df9e7ef94bfbffb8 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 8 May 2024 12:30:36 +0900
Subject: [PATCH] Always build shared libs for LLD
We don't want to enable BUILD_SHARED_LIBS for the whole build,
but we do want to build lld libraries.
---
lld/cmake/modules/AddLLD.cmake | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/lld/cmake/modules/AddLLD.cmake b/lld/cmake/modules/AddLLD.cmake
index 2ee066b41535..270c03f096ac 100644
--- a/lld/cmake/modules/AddLLD.cmake
+++ b/lld/cmake/modules/AddLLD.cmake
@@ -7,9 +7,8 @@ macro(add_lld_library name)
""
""
${ARGN})
- if(ARG_SHARED)
- set(ARG_ENABLE_SHARED SHARED)
- endif()
+ # Always build shared libs for LLD.
+ set(ARG_ENABLE_SHARED SHARED)
llvm_add_library(${name} ${ARG_ENABLE_SHARED} ${ARG_UNPARSED_ARGUMENTS})
set_target_properties(${name} PROPERTIES FOLDER "lld libraries")
--
2.44.0

View File

@ -1,41 +0,0 @@
From d8742e9b361e5fd6fee2298b8ea0aeb4671ec05a Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 22 May 2024 09:39:26 +0200
Subject: [PATCH] Remove myst_parser dependency for RHEL
---
clang/docs/conf.py | 3 +--
llvm/docs/conf.py | 3 +--
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/clang/docs/conf.py b/clang/docs/conf.py
index 4cee382a718f..d2e2198e05d4 100644
--- a/clang/docs/conf.py
+++ b/clang/docs/conf.py
@@ -43,8 +43,7 @@ try:
extensions.append("myst_parser")
except ImportError:
- if not tags.has("builder-man"):
- raise
+ pass
# The encoding of source files.
diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py
index 7f2ed5309606..354a41f11280 100644
--- a/llvm/docs/conf.py
+++ b/llvm/docs/conf.py
@@ -36,8 +36,7 @@ try:
extensions.append("myst_parser")
except ImportError:
- if not tags.has("builder-man"):
- raise
+ pass
# Automatic anchors for markdown titles
from llvm_slug import make_slug
--
2.44.0

View File

@ -1,43 +0,0 @@
From 50cd36c2156d375a6d50f661908b460fbbd22e78 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 22 May 2024 09:39:26 +0200
Subject: [PATCH] Remove myst_parser dependency for RHEL
---
clang/docs/conf.py | 3 +--
llvm/docs/conf.py | 5 +----
2 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/clang/docs/conf.py b/clang/docs/conf.py
index 4cee382a718f..d2e2198e05d4 100644
--- a/clang/docs/conf.py
+++ b/clang/docs/conf.py
@@ -43,8 +43,7 @@ try:
extensions.append("myst_parser")
except ImportError:
- if not tags.has("builder-man"):
- raise
+ pass
# The encoding of source files.
diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py
index d9fa6961032b..e38c009a457d 100644
--- a/llvm/docs/conf.py
+++ b/llvm/docs/conf.py
@@ -36,10 +36,7 @@ try:
extensions.append("myst_parser")
except ImportError:
- if not tags.has("builder-man"):
- raise
-else:
- myst_enable_extensions = ["substitution"]
+ pass
# Automatic anchors for markdown titles
myst_heading_anchors = 6
--
2.46.0

View File

@ -0,0 +1,41 @@
From 73d3b4047d757ef35850e2cef38285b96be82f0f Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 23 May 2023 12:17:29 +0200
Subject: [PATCH] [Driver] Give devtoolset path precedence over InstalledDir
This is a followup to the change from c5fe10f365247c3dd9416b7ec8bad73a60b5946e.
While that commit correctly adds the bindir from devtoolset to the
path, the driver dir / install dir still comes first. This means
we'll still end up picking /usr/bin/ld rather than the one from
devtoolset.
Unfortunately, I don't see any way to test this. In the environment
the tests are run, this would only result in a behavior difference
if there is an ld binary present in the LLVM build directory, which
isn't the case.
Differential Revision: https://reviews.llvm.org/D151203
---
clang/lib/Driver/ToolChains/Linux.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index 853ff99d9fe5..aecabb46d4b9 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -244,9 +244,9 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
// With devtoolset on RHEL, we want to add a bin directory that is relative
// to the detected gcc install, because if we are using devtoolset gcc then
// we want to use other tools from devtoolset (e.g. ld) instead of the
- // standard system tools.
- PPaths.push_back(Twine(GCCInstallation.getParentLibPath() +
- "/../bin").str());
+ // standard system tools. This should take precedence over InstalledDir.
+ PPaths.insert(PPaths.begin(),
+ Twine(GCCInstallation.getParentLibPath() + "/../bin").str());
if (Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb)
ExtraOpts.push_back("-X");
--
2.40.1

View File

@ -1,45 +0,0 @@
From 1c8a88c870a00eea6c80109cc682e0276ff7888d Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 8 Jul 2024 12:32:57 +0200
Subject: [PATCH] Fix python3-clang
Drop confusing `cd ..` so we stay in the root of the LLVM sources
and the install command succeeds.
---
install.spec.inc | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/install.spec.inc b/install.spec.inc
index 0fc1424..5f4a43b 100644
--- a/install.spec.inc
+++ b/install.spec.inc
@@ -98,8 +98,6 @@ touch %{buildroot}%{_bindir}/llvm-config%{exec_suffix}
mkdir -p %{buildroot}%{pkg_datadir}/llvm/cmake
cp -Rv cmake/* %{buildroot}%{pkg_datadir}/llvm/cmake
-cd ..
-
#endregion
#region CLANG installation
@@ -134,7 +132,7 @@ mkdir -p %{buildroot}%{python3_sitelib}/clang/
# install: omitting directory 'bindings/python/clang/__pycache__'
# NOTE: this only happens if we include the gdb plugin of libomp.
# Remove the plugin with command and we're good: rm -rf %{buildroot}/%{_datarootdir}/gdb
-install -p -m644 clang/bindings/python/clang/* %{buildroot}%{python3_sitelib}/clang/ || true
+install -p -m644 clang/bindings/python/clang/* %{buildroot}%{python3_sitelib}/clang/
%py_byte_compile %{__python3} %{buildroot}%{python3_sitelib}/clang
# install scanbuild-py to python sitelib.
@@ -268,7 +266,7 @@ done
# https://docs.fedoraproject.org/en-US/packaging-guidelines/Alternatives/
touch %{buildroot}%{_bindir}/ld
-install -D -m 644 -t %{buildroot}%{_mandir}/man1/ %{src_tarball_dir}/lld/docs/ld.lld.1
+install -D -m 644 -t %{buildroot}%{_mandir}/man1/ lld/docs/ld.lld.1
%post -n %{pkg_name_lld}
%{_sbindir}/update-alternatives --install %{_bindir}/ld ld %{_bindir}/ld.lld 1
--
2.45.2

View File

@ -0,0 +1,51 @@
From 90a05f32166c4a45224a5eedbec9c5c7e21d2dbf Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 17 Jan 2025 09:26:49 +0100
Subject: [PATCH] [openmp] Support CET in z_Linux_asm.S (#123213)
When libomp is built with -cf-protection, add endbr instructions to the
start of functions for Intel CET support.
---
openmp/runtime/src/z_Linux_asm.S | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S
index cc5344cdd124..0bf9f07a13f1 100644
--- a/openmp/runtime/src/z_Linux_asm.S
+++ b/openmp/runtime/src/z_Linux_asm.S
@@ -19,6 +19,16 @@
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+# if defined(__ELF__) && defined(__CET__) && defined(__has_include)
+# if __has_include(<cet.h>)
+# include <cet.h>
+# endif
+# endif
+
+# if !defined(_CET_ENDBR)
+# define _CET_ENDBR
+# endif
+
# if KMP_MIC
// the 'delay r16/r32/r64' should be used instead of the 'pause'.
// The delay operation has the effect of removing the current thread from
@@ -66,6 +76,7 @@
ALIGN 4
.globl KMP_PREFIX_UNDERSCORE($0)
KMP_PREFIX_UNDERSCORE($0):
+ _CET_ENDBR
.endmacro
# else // KMP_OS_DARWIN
# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
@@ -92,6 +103,7 @@ KMP_PREFIX_UNDERSCORE($0):
.globl KMP_PREFIX_UNDERSCORE(\proc)
KMP_PREFIX_UNDERSCORE(\proc):
.cfi_startproc
+ _CET_ENDBR
.endm
.macro KMP_CFI_DEF_OFFSET sz
.cfi_def_cfa_offset \sz
--
2.47.1

View File

@ -0,0 +1,86 @@
From ccc2b792e57d632bc887b226a4e7f0a8189eab8b Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Mon, 4 Nov 2024 16:37:49 -0800
Subject: [PATCH] [profile] Use base+vaddr for `__llvm_write_binary_ids` note
pointers
This function is always examining its own ELF headers in memory, but it
was trying to use conditions between examining files or memory, and it
wasn't accounting for LOAD offsets at runtime. This is especially bad if
a loaded segment has additional padding that's not in the file offsets.
Now we do a first scan of the program headers to figure out the runtime
base address based on `PT_PHDR` and/or `PT_DYNAMIC` (else assume zero),
similar to libc's `do_start`. Then each `PT_NOTE` pointer is simply the
base plus the segments's `pt_vaddr`, which includes LOAD offsets.
Fixes #114605
---
.../lib/profile/InstrProfilingPlatformLinux.c | 40 ++++++++-----------
1 file changed, 16 insertions(+), 24 deletions(-)
diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
index e2c06d51e0c6..c365129a0768 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -194,41 +194,33 @@ static int WriteBinaryIds(ProfDataWriter *Writer, const ElfW(Nhdr) * Note,
*/
COMPILER_RT_VISIBILITY int __llvm_write_binary_ids(ProfDataWriter *Writer) {
extern const ElfW(Ehdr) __ehdr_start __attribute__((visibility("hidden")));
+ extern ElfW(Dyn) _DYNAMIC[] __attribute__((weak, visibility("hidden")));
+
const ElfW(Ehdr) *ElfHeader = &__ehdr_start;
const ElfW(Phdr) *ProgramHeader =
(const ElfW(Phdr) *)((uintptr_t)ElfHeader + ElfHeader->e_phoff);
+ /* Compute the added base address in case of position-independent code. */
+ uintptr_t Base = 0;
+ for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
+ if (ProgramHeader[I].p_type == PT_PHDR)
+ Base = (uintptr_t)ProgramHeader - ProgramHeader[I].p_vaddr;
+ if (ProgramHeader[I].p_type == PT_DYNAMIC && _DYNAMIC)
+ Base = (uintptr_t)_DYNAMIC - ProgramHeader[I].p_vaddr;
+ }
+
int TotalBinaryIdsSize = 0;
- uint32_t I;
/* Iterate through entries in the program header. */
- for (I = 0; I < ElfHeader->e_phnum; I++) {
+ for (uint32_t I = 0; I < ElfHeader->e_phnum; I++) {
/* Look for the notes segment in program header entries. */
if (ProgramHeader[I].p_type != PT_NOTE)
continue;
/* There can be multiple notes segment, and examine each of them. */
- const ElfW(Nhdr) * Note;
- const ElfW(Nhdr) * NotesEnd;
- /*
- * When examining notes in file, use p_offset, which is the offset within
- * the elf file, to find the start of notes.
- */
- if (ProgramHeader[I].p_memsz == 0 ||
- ProgramHeader[I].p_memsz == ProgramHeader[I].p_filesz) {
- Note = (const ElfW(Nhdr) *)((uintptr_t)ElfHeader +
- ProgramHeader[I].p_offset);
- NotesEnd = (const ElfW(Nhdr) *)((const char *)(Note) +
- ProgramHeader[I].p_filesz);
- } else {
- /*
- * When examining notes in memory, use p_vaddr, which is the address of
- * section after loaded to memory, to find the start of notes.
- */
- Note =
- (const ElfW(Nhdr) *)((uintptr_t)ElfHeader + ProgramHeader[I].p_vaddr);
- NotesEnd =
- (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
- }
+ const ElfW(Nhdr) *Note =
+ (const ElfW(Nhdr) *)(Base + ProgramHeader[I].p_vaddr);
+ const ElfW(Nhdr) *NotesEnd =
+ (const ElfW(Nhdr) *)((const char *)(Note) + ProgramHeader[I].p_memsz);
int BinaryIdsSize = WriteBinaryIds(Writer, Note, NotesEnd);
if (TotalBinaryIdsSize == -1)
--
2.47.0

893
18-99273.patch Normal file
View File

@ -0,0 +1,893 @@
From 91052169960477fbc39169c10f9fae3bec732510 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson@amd.com>
Date: Wed, 17 Jul 2024 15:07:42 +0900
Subject: [PATCH 1/3] [AMDGPU] Implement workaround for GFX11.5 export priority
On GFX11.5 shaders having completed exports need to execute/wait
at a lower priority than shaders still executing exports.
Add code to maintain normal priority of 2 for shaders that export
and drop to priority 0 after exports.
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 15 +-
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 112 ++++++
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 +
.../AMDGPU/required-export-priority.ll | 344 ++++++++++++++++++
.../AMDGPU/required-export-priority.mir | 293 +++++++++++++++
6 files changed, 765 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/required-export-priority.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index dfc8eaea66f7b..14fcf6a210a78 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -947,6 +947,12 @@ def FeatureHasRestrictedSOffset : SubtargetFeature<"restricted-soffset",
"Has restricted SOffset (immediate not supported)."
>;
+def FeatureRequiredExportPriority : SubtargetFeature<"required-export-priority",
+ "HasRequiredExportPriority",
+ "true",
+ "Export priority must be explicitly manipulated on GFX11.5"
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -1597,14 +1603,16 @@ def FeatureISAVersion11_5_0 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureSALUFloatInsts,
FeatureDPPSrc1SGPR,
- FeatureVGPRSingleUseHintInsts])>;
+ FeatureVGPRSingleUseHintInsts,
+ FeatureRequiredExportPriority])>;
def FeatureISAVersion11_5_1 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureSALUFloatInsts,
FeatureDPPSrc1SGPR,
FeatureVGPRSingleUseHintInsts,
- FeatureGFX11FullVGPRs])>;
+ FeatureGFX11FullVGPRs,
+ FeatureRequiredExportPriority])>;
def FeatureISAVersion12 : FeatureSet<
[FeatureGFX12,
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index a402fc6d7e611..a8b171aa82840 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -14,6 +14,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/TargetParser/TargetParser.h"
@@ -1104,6 +1105,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixWMMAHazards(MI);
fixShift64HighRegBug(MI);
fixVALUMaskWriteHazard(MI);
+ fixRequiredExportPriority(MI);
}
bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) {
@@ -2895,3 +2897,113 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
return true;
}
+
+static bool ensureEntrySetPrio(MachineFunction *MF, int Priority,
+ const SIInstrInfo &TII) {
+ MachineBasicBlock &EntryMBB = MF->front();
+ if (EntryMBB.begin() != EntryMBB.end()) {
+ auto &EntryMI = *EntryMBB.begin();
+ if (EntryMI.getOpcode() == AMDGPU::S_SETPRIO &&
+ EntryMI.getOperand(0).getImm() >= Priority)
+ return false;
+ }
+
+ BuildMI(EntryMBB, EntryMBB.begin(), DebugLoc(), TII.get(AMDGPU::S_SETPRIO))
+ .addImm(Priority);
+ return true;
+}
+
+bool GCNHazardRecognizer::fixRequiredExportPriority(MachineInstr *MI) {
+ if (!ST.hasRequiredExportPriority())
+ return false;
+
+ // Assume the following shader types will never have exports,
+ // and avoid adding or adjusting S_SETPRIO.
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction *MF = MBB->getParent();
+ auto CC = MF->getFunction().getCallingConv();
+ switch (CC) {
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
+ case CallingConv::AMDGPU_KERNEL:
+ return false;
+ default:
+ break;
+ }
+
+ const int MaxPriority = 3;
+ const int NormalPriority = 2;
+ const int PostExportPriority = 0;
+
+ auto It = MI->getIterator();
+ switch (MI->getOpcode()) {
+ case AMDGPU::S_ENDPGM:
+ case AMDGPU::S_ENDPGM_SAVED:
+ case AMDGPU::S_ENDPGM_ORDERED_PS_DONE:
+ case AMDGPU::SI_RETURN_TO_EPILOG:
+ // Ensure shader with calls raises priority at entry.
+ // This ensures correct priority if exports exist in callee.
+ if (MF->getFrameInfo().hasCalls())
+ return ensureEntrySetPrio(MF, NormalPriority, TII);
+ return false;
+ case AMDGPU::S_SETPRIO: {
+ // Raise minimum priority unless in workaround.
+ auto &PrioOp = MI->getOperand(0);
+ int Prio = PrioOp.getImm();
+ bool InWA = (Prio == PostExportPriority) &&
+ (It != MBB->begin() && TII.isEXP(*std::prev(It)));
+ if (InWA || Prio >= NormalPriority)
+ return false;
+ PrioOp.setImm(std::min(Prio + NormalPriority, MaxPriority));
+ return true;
+ }
+ default:
+ if (!TII.isEXP(*MI))
+ return false;
+ break;
+ }
+
+ // Check entry priority at each export (as there will only be a few).
+ // Note: amdgpu_gfx can only be a callee, so defer to caller setprio.
+ bool Changed = false;
+ if (CC != CallingConv::AMDGPU_Gfx)
+ Changed = ensureEntrySetPrio(MF, NormalPriority, TII);
+
+ auto NextMI = std::next(It);
+ bool EndOfShader = false;
+ if (NextMI != MBB->end()) {
+ // Only need WA at end of sequence of exports.
+ if (TII.isEXP(*NextMI))
+ return Changed;
+ // Assume appropriate S_SETPRIO after export means WA already applied.
+ if (NextMI->getOpcode() == AMDGPU::S_SETPRIO &&
+ NextMI->getOperand(0).getImm() == PostExportPriority)
+ return Changed;
+ EndOfShader = NextMI->getOpcode() == AMDGPU::S_ENDPGM;
+ }
+
+ const DebugLoc &DL = MI->getDebugLoc();
+
+ // Lower priority.
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
+ .addImm(PostExportPriority);
+
+ if (!EndOfShader) {
+ // Wait for exports to complete.
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_WAITCNT_EXPCNT))
+ .addReg(AMDGPU::SGPR_NULL)
+ .addImm(0);
+ }
+
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_NOP)).addImm(0);
+
+ if (!EndOfShader) {
+ // Return to normal (higher) priority.
+ BuildMI(*MBB, NextMI, DL, TII.get(AMDGPU::S_SETPRIO))
+ .addImm(NormalPriority);
+ }
+
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index 3ccca527c626b..f2a64ab48e180 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -107,6 +107,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
bool fixWMMAHazards(MachineInstr *MI);
bool fixShift64HighRegBug(MachineInstr *MI);
bool fixVALUMaskWriteHazard(MachineInstr *MI);
+ bool fixRequiredExportPriority(MachineInstr *MI);
int checkMAIHazards(MachineInstr *MI);
int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index e5817594a4521..def89c785b855 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -238,6 +238,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasVOPDInsts = false;
bool HasVALUTransUseHazard = false;
bool HasForceStoreSC0SC1 = false;
+ bool HasRequiredExportPriority = false;
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1282,6 +1283,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
+ bool hasRequiredExportPriority() const { return HasRequiredExportPriority; }
+
/// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
new file mode 100644
index 0000000000000..377902f3f0d1a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -0,0 +1,344 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+define amdgpu_ps void @test_export_zeroes_f32() #0 {
+; GCN-LABEL: test_export_zeroes_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: exp mrt0 off, off, off, off
+; GCN-NEXT: exp mrt0 off, off, off, off done
+; GCN-NEXT: s_setprio 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false)
+ ret void
+}
+
+define amdgpu_ps void @test_export_en_src0_f32() #0 {
+; GCN-LABEL: test_export_en_src0_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
+; GCN-NEXT: exp mrt0 v3, off, off, off done
+; GCN-NEXT: s_setprio 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+ ret void
+}
+
+define amdgpu_gs void @test_export_gs() #0 {
+; GCN-LABEL: test_export_gs:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
+; GCN-NEXT: exp mrt0 off, v2, off, off done
+; GCN-NEXT: s_setprio 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+ ret void
+}
+
+define amdgpu_hs void @test_export_hs() #0 {
+; GCN-LABEL: test_export_hs:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
+; GCN-NEXT: exp mrt0 off, v2, off, off done
+; GCN-NEXT: s_setprio 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+ ret void
+}
+
+define amdgpu_gfx void @test_export_gfx(float %v) #0 {
+; GCN-LABEL: test_export_gfx:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v1, 4.0
+; GCN-NEXT: v_mov_b32_e32 v2, 0.5
+; GCN-NEXT: v_mov_b32_e32 v3, 2.0
+; GCN-NEXT: exp mrt0 off, v3, off, off done
+; GCN-NEXT: s_setprio 0
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: s_waitcnt expcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+ ret void
+}
+
+define amdgpu_cs void @test_export_cs() #0 {
+; GCN-LABEL: test_export_cs:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
+; GCN-NEXT: exp mrt0 off, v2, off, off done
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @test_export_kernel() #0 {
+; GCN-LABEL: test_export_kernel:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_mov_b32_e32 v0, 4.0
+; GCN-NEXT: v_mov_b32_e32 v1, 0.5
+; GCN-NEXT: v_mov_b32_e32 v2, 2.0
+; GCN-NEXT: v_mov_b32_e32 v3, 1.0
+; GCN-NEXT: exp mrt0 off, v2, off, off done
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
+ ret void
+}
+
+define amdgpu_gfx void @test_no_export_gfx(float %v) #0 {
+; GCN-LABEL: test_no_export_gfx:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ ret void
+}
+
+define amdgpu_ps void @test_no_export_ps(float %v) #0 {
+; GCN-LABEL: test_no_export_ps:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_endpgm
+ ret void
+}
+
+define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+; GCN-LABEL: test_if_export_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: s_mov_b32 s0, exec_lo
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
+; GCN-NEXT: s_cbranch_execz .LBB9_2
+; GCN-NEXT: ; %bb.1: ; %exp
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4
+; GCN-NEXT: s_setprio 0
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: .LBB9_2: ; %end
+; GCN-NEXT: s_endpgm
+ %cc = icmp eq i32 %flag, 0
+ br i1 %cc, label %end, label %exp
+
+exp:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false)
+ br label %end
+
+end:
+ ret void
+}
+
+define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+; GCN-LABEL: test_if_export_vm_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: s_mov_b32 s0, exec_lo
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
+; GCN-NEXT: s_cbranch_execz .LBB10_2
+; GCN-NEXT: ; %bb.1: ; %exp
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4
+; GCN-NEXT: s_setprio 0
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: .LBB10_2: ; %end
+; GCN-NEXT: s_endpgm
+ %cc = icmp eq i32 %flag, 0
+ br i1 %cc, label %end, label %exp
+
+exp:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true)
+ br label %end
+
+end:
+ ret void
+}
+
+define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+; GCN-LABEL: test_if_export_done_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: s_mov_b32 s0, exec_lo
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
+; GCN-NEXT: s_cbranch_execz .LBB11_2
+; GCN-NEXT: ; %bb.1: ; %exp
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done
+; GCN-NEXT: s_setprio 0
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: .LBB11_2: ; %end
+; GCN-NEXT: s_endpgm
+ %cc = icmp eq i32 %flag, 0
+ br i1 %cc, label %end, label %exp
+
+exp:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false)
+ br label %end
+
+end:
+ ret void
+}
+
+define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 {
+; GCN-LABEL: test_if_export_vm_done_f32:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: s_mov_b32 s0, exec_lo
+; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0
+; GCN-NEXT: s_cbranch_execz .LBB12_2
+; GCN-NEXT: ; %bb.1: ; %exp
+; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done
+; GCN-NEXT: s_setprio 0
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: .LBB12_2: ; %end
+; GCN-NEXT: s_endpgm
+ %cc = icmp eq i32 %flag, 0
+ br i1 %cc, label %end, label %exp
+
+exp:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true)
+ br label %end
+
+end:
+ ret void
+}
+
+define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 {
+; GCN-LABEL: test_export_pos_before_param_across_load:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen
+; GCN-NEXT: v_mov_b32_e32 v1, 0
+; GCN-NEXT: v_mov_b32_e32 v2, 1.0
+; GCN-NEXT: v_mov_b32_e32 v3, 0.5
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: exp pos0 v1, v1, v1, v0 done
+; GCN-NEXT: exp invalid_target_32 v2, v2, v2, v2
+; GCN-NEXT: exp invalid_target_33 v2, v2, v2, v3
+; GCN-NEXT: s_setprio 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
+ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
+ %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0)
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false)
+ ret void
+}
+
+define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 {
+; GCN-LABEL: test_export_across_store_load:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: v_mov_b32_e32 v2, 24
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 8, vcc_lo
+; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: scratch_store_b32 v0, v1, off
+; GCN-NEXT: scratch_load_b32 v0, off, off
+; GCN-NEXT: v_mov_b32_e32 v1, 1.0
+; GCN-NEXT: exp pos0 v2, v2, v2, v1 done
+; GCN-NEXT: s_setprio 0
+; GCN-NEXT: s_waitcnt_expcnt null, 0x0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: exp invalid_target_32 v0, v2, v1, v2
+; GCN-NEXT: exp invalid_target_33 v0, v2, v1, v2
+; GCN-NEXT: s_setprio 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_nop 0
+; GCN-NEXT: s_endpgm
+ %data0 = alloca <4 x float>, align 8, addrspace(5)
+ %data1 = alloca <4 x float>, align 8, addrspace(5)
+ %cmp = icmp eq i32 %idx, 1
+ %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
+ store float %v, ptr addrspace(5) %data, align 8
+ call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
+ %load0 = load float, ptr addrspace(5) %data0, align 8
+ call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
+ call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
+ ret void
+}
+
+define amdgpu_ps void @test_export_in_callee(float %v) #0 {
+; GCN-LABEL: test_export_in_callee:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: s_getpc_b64 s[0:1]
+; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
+; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GCN-NEXT: s_endpgm
+ %x = fadd float %v, 1.0
+ call void @test_export_gfx(float %x)
+ ret void
+}
+
+define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 {
+; GCN-LABEL: test_export_in_callee_prio:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: s_mov_b32 s32, 0
+; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GCN-NEXT: s_setprio 2
+; GCN-NEXT: s_getpc_b64 s[0:1]
+; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4
+; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12
+; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1]
+; GCN-NEXT: s_endpgm
+ %x = fadd float %v, 1.0
+ call void @llvm.amdgcn.s.setprio(i16 0)
+ call void @test_export_gfx(float %x)
+ ret void
+}
+
+declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
+declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
+declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
+declare void @llvm.amdgcn.s.setprio(i16)
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind inaccessiblememonly }
+attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.mir b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
new file mode 100644
index 0000000000000..eee04468036e5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.mir
@@ -0,0 +1,293 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=post-RA-hazard-rec -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX1150 %s
+
+--- |
+ define amdgpu_ps void @end_of_shader() {
+ ret void
+ }
+ define amdgpu_ps void @end_of_shader_return_to_epilogue() {
+ ret void
+ }
+ define amdgpu_ps void @end_of_block() {
+ ret void
+ }
+ define amdgpu_ps void @start_of_block() {
+ ret void
+ }
+ define amdgpu_ps void @block_of_exports() {
+ ret void
+ }
+ define amdgpu_ps void @sparse_exports() {
+ ret void
+ }
+ define amdgpu_ps void @existing_setprio_1() {
+ ret void
+ }
+ define amdgpu_ps void @existing_setprio_2() {
+ ret void
+ }
+...
+
+---
+name: end_of_shader
+tracksRegLiveness: true
+liveins:
+ - { reg: '$vgpr0' }
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX1150-LABEL: name: end_of_shader
+ ; GFX1150: liveins: $vgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: S_SETPRIO 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_ENDPGM 0
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: end_of_shader_return_to_epilogue
+tracksRegLiveness: true
+liveins:
+ - { reg: '$vgpr0' }
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX1150-LABEL: name: end_of_shader_return_to_epilogue
+ ; GFX1150: liveins: $vgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: S_SETPRIO 0
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ SI_RETURN_TO_EPILOG $vgpr0
+...
+
+---
+name: end_of_block
+tracksRegLiveness: true
+liveins:
+ - { reg: '$vgpr0' }
+body: |
+ ; GFX1150-LABEL: name: end_of_block
+ ; GFX1150: bb.0:
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
+ ; GFX1150-NEXT: liveins: $vgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: S_SETPRIO 0
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: bb.1:
+ ; GFX1150-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+
+ bb.1:
+ S_ENDPGM 0
+...
+
+---
+name: start_of_block
+tracksRegLiveness: true
+liveins:
+ - { reg: '$vgpr0' }
+body: |
+ ; GFX1150-LABEL: name: start_of_block
+ ; GFX1150: bb.0:
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
+ ; GFX1150-NEXT: liveins: $vgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: bb.1:
+ ; GFX1150-NEXT: successors: %bb.2(0x80000000)
+ ; GFX1150-NEXT: liveins: $vgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: S_SETPRIO 0
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: bb.2:
+ ; GFX1150-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0
+
+ bb.1:
+ liveins: $vgpr0
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: block_of_exports
+tracksRegLiveness: true
+liveins:
+ - { reg: '$vgpr0' }
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX1150-LABEL: name: block_of_exports
+ ; GFX1150: liveins: $vgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: S_SETPRIO 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_ENDPGM 0
+ EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: sparse_exports
+tracksRegLiveness: true
+liveins:
+ - { reg: '$vgpr0' }
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX1150-LABEL: name: sparse_exports
+ ; GFX1150: liveins: $vgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: S_SETPRIO 0
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: S_SETPRIO 0
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: S_SETPRIO 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_ENDPGM 0
+ EXP 2, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: existing_setprio_1
+tracksRegLiveness: true
+liveins:
+ - { reg: '$vgpr0' }
+body: |
+ ; GFX1150-LABEL: name: existing_setprio_1
+ ; GFX1150: bb.0:
+ ; GFX1150-NEXT: successors: %bb.1(0x80000000)
+ ; GFX1150-NEXT: liveins: $vgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: bb.1:
+ ; GFX1150-NEXT: successors: %bb.2(0x80000000)
+ ; GFX1150-NEXT: liveins: $vgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: S_SETPRIO 3
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: bb.2:
+ ; GFX1150-NEXT: successors: %bb.3(0x80000000)
+ ; GFX1150-NEXT: liveins: $vgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: S_SETPRIO 3
+ ; GFX1150-NEXT: $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: bb.3:
+ ; GFX1150-NEXT: liveins: $vgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: S_SETPRIO 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_ENDPGM 0
+ bb.0:
+ liveins: $vgpr0
+ $vgpr0 = V_AND_B32_e32 1, $vgpr0, implicit $exec
+
+ bb.1:
+ liveins: $vgpr0
+ S_SETPRIO 3
+ $vgpr0 = V_OR_B32_e32 2, $vgpr0, implicit $exec
+ S_SETPRIO 0
+
+ bb.2:
+ liveins: $vgpr0
+ S_SETPRIO 1
+ $vgpr0 = V_OR_B32_e32 3, $vgpr0, implicit $exec
+ S_SETPRIO 0
+
+ bb.3:
+ liveins: $vgpr0
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: existing_setprio_2
+tracksRegLiveness: true
+liveins:
+ - { reg: '$vgpr0' }
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; GFX1150-LABEL: name: existing_setprio_2
+ ; GFX1150: liveins: $vgpr0
+ ; GFX1150-NEXT: {{ $}}
+ ; GFX1150-NEXT: S_SETPRIO 3
+ ; GFX1150-NEXT: EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GFX1150-NEXT: S_SETPRIO 0
+ ; GFX1150-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_NOP 0
+ ; GFX1150-NEXT: S_SETPRIO 2
+ ; GFX1150-NEXT: S_SETPRIO 3
+ ; GFX1150-NEXT: S_ENDPGM 0
+ S_SETPRIO 3
+ EXP 1, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ S_SETPRIO 3
+ S_ENDPGM 0
+...
From 8ea44e65f2c19facff751aeb2ac960f907fb210f Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson@amd.com>
Date: Wed, 17 Jul 2024 16:18:02 +0900
Subject: [PATCH 2/3] Remove -verify-machineinstrs from test.
---
llvm/test/CodeGen/AMDGPU/required-export-priority.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
index 377902f3f0d1a..ebc209bd4d451 100644
--- a/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/required-export-priority.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s
define amdgpu_ps void @test_export_zeroes_f32() #0 {
; GCN-LABEL: test_export_zeroes_f32:

298
llvm.spec
View File

@ -2,7 +2,7 @@
#region version
%global maj_ver 19
%global min_ver 1
%global patch_ver 1
%global patch_ver 7
#global rc_ver 4
%bcond_with snapshot_build
@ -24,7 +24,7 @@
%bcond_with compat_build
# Bundle compat libraries for a previous LLVM version, as part of llvm-libs and
# clang-libs. Used on RHEL.
%bcond_without bundle_compat_lib
%bcond_with bundle_compat_lib
%bcond_without check
%if %{with bundle_compat_lib}
@ -56,6 +56,11 @@
# See https://docs.fedoraproject.org/en-US/packaging-guidelines/#_compiler_macros
%global toolchain clang
%if %{defined rhel} && 0%{?rhel} < 10
%global gts_version 14
%endif
# Opt out of https://fedoraproject.org/wiki/Changes/fno-omit-frame-pointer
# https://bugzilla.redhat.com/show_bug.cgi?id=2158587
%undefine _include_frame_pointers
@ -177,7 +182,7 @@
#region main package
Name: %{pkg_name_llvm}
Version: %{maj_ver}.%{min_ver}.%{patch_ver}%{?rc_ver:~rc%{rc_ver}}%{?llvm_snapshot_version_suffix:~%{llvm_snapshot_version_suffix}}
Release: 5%{?dist}.alma.1
Release: 2%{?dist}.alma.1
Summary: The Low Level Virtual Machine
License: Apache-2.0 WITH LLVM-exception OR NCSA
@ -206,43 +211,80 @@ Source3001: https://github.com/llvm/llvm-project/releases/download/llvmorg-%{com
Source1000: version.spec.inc
%endif
# We've established the habit of numbering patches the following way:
#
# 0-499: All patches that are unconditionally applied
# 500-1000: Patches applied under certain conditions (e.g. only on RHEL8)
# 1500-1599: Patches for LLVM 15
# 1600-1699: Patches for LLVM 16
# 1700-1799: Patches for LLVM 17
# ...
# 2000-2099: Patches for LLVM 20
#
# The idea behind this is that the last range of patch numbers (e.g. 2000-2099) allow
# us to "deprecate" a patch instead of deleting it right away.
# Suppose llvm upstream in git is at version 20 and there's a patch living
# in some PR that has not been merged yet. You can copy that patch and put it
# in a line like:
#
# Patch2011: upstream.patch
#
# As time goes by, llvm moves on to LLVM 21 and meanwhile the patch has landed.
# There's no need for you to remove the "Patch2011:" line. In fact, we encourage you
# to not remove it for some time. For compat libraries and compat packages we might
# still need this patch and so we're applying it automatically for you in those
# situations. Remember that a compat library is always at least one major version
# behind the latest packaged LLVM version.
#region OpenMP patches
%if %{maj_ver} < 20
Patch1001: 0001-openmp-Add-option-to-disable-tsan-tests-111548.patch
Patch1002: 0001-openmp-Use-core_siblings_list-if-physical_package_id.patch
%endif
Patch1900: 0001-openmp-Add-option-to-disable-tsan-tests-111548.patch
Patch1901: 0001-openmp-Use-core_siblings_list-if-physical_package_id.patch
Patch1910: 0001-openmp-Support-CET-in-z_Linux_asm.S-123213.patch
#endregion OpenMP patches
#region CLANG patches
Patch2001: 0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch
Patch2002: 0003-PATCH-clang-Don-t-install-static-libraries.patch
Patch101: 0001-PATCH-clang-Make-funwind-tables-the-default-on-all-a.patch
Patch102: 0003-PATCH-clang-Don-t-install-static-libraries.patch
#endregion CLANG patches
# Workaround a bug in ORC on ppc64le.
# More info is available here: https://reviews.llvm.org/D159115#4641826
Patch2005: 0001-Workaround-a-bug-in-ORC-on-ppc64le.patch
Patch103: 0001-Workaround-a-bug-in-ORC-on-ppc64le.patch
# With the introduction of --gcc-include-dir in the clang config file,
# this might no longer be needed.
Patch104: 0001-Driver-Give-devtoolset-path-precedence-over-Installe.patch
#region LLD patches
Patch3002: 0001-Always-build-shared-libs-for-LLD.patch
Patch1800: 0001-18-Always-build-shared-libs-for-LLD.patch
Patch1902: 0001-19-Always-build-shared-libs-for-LLD.patch
Patch2000: 0001-19-Always-build-shared-libs-for-LLD.patch
#endregion LLD patches
#region RHEL patches
# All RHEL
%if %{maj_ver} >= 20
Patch9001: 0001-20-Remove-myst_parser-dependency-for-RHEL.patch
%else
Patch9001: 0001-19-Remove-myst_parser-dependency-for-RHEL.patch
%endif
# RHEL 8 only
Patch9002: 0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch
Patch501: 0001-Fix-page-size-constant-on-aarch64-and-ppc64le.patch
#endregion RHEL patches
# Backport with modifications from
# https://github.com/llvm/llvm-project/pull/99273
# Fixes RHEL-49517.
Patch1801: 18-99273.patch
# Fix profiling after a binutils NOTE change.
# https://github.com/llvm/llvm-project/pull/114907
Patch1802: 0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch
Patch1903: 0001-profile-Use-base-vaddr-for-__llvm_write_binary_ids-n.patch
%if 0%{?rhel} == 8
%global python3_pkgversion 3.12
%global __python3 /usr/bin/python3.12
%endif
%if %{defined gts_version}
# Required for 64-bit atomics on i686.
BuildRequires: gcc-toolset-%{gts_version}-libatomic-devel
%endif
BuildRequires: gcc
BuildRequires: gcc-c++
BuildRequires: clang
@ -250,6 +292,7 @@ BuildRequires: cmake
BuildRequires: chrpath
BuildRequires: ninja-build
BuildRequires: zlib-devel
BuildRequires: libzstd-devel
BuildRequires: libffi-devel
BuildRequires: ncurses-devel
# This intentionally does not use python3_pkgversion. RHEL 8 does not have
@ -268,6 +311,9 @@ BuildRequires: python%{python3_pkgversion}-myst-parser
BuildRequires: multilib-rpm-config
%if %{with gold}
BuildRequires: binutils-devel
%if %{undefined rhel} || 0%{?rhel} > 8
BuildRequires: binutils-gold
%endif
%endif
%ifarch %{valgrind_arches}
# Enable extra functionality when run the LLVM JIT under valgrind.
@ -326,7 +372,7 @@ BuildRequires: procps-ng
# For reproducible pyc file generation
# See https://docs.fedoraproject.org/en-US/packaging-guidelines/Python_Appendix/#_byte_compilation_reproducibility
# Since Fedora 41 this happens automatically, and RHEL 8 does not support this.
%if (%{defined fedora} && 0%{?fedora} < 41) || 0%{?rhel} == 9 || 0%{?rhel} == 10
%if %{without compat_build} && ((%{defined fedora} && 0%{?fedora} < 41) || 0%{?rhel} == 9 || 0%{?rhel} == 10)
BuildRequires: /usr/bin/marshalparser
%global py_reproducible_pyc_path %{buildroot}%{python3_sitelib}
%endif
@ -482,6 +528,9 @@ libomp-devel to enable -fopenmp.
%package -n %{pkg_name_clang}-libs
Summary: Runtime library for clang
Requires: %{pkg_name_clang}-resource-filesystem%{?_isa} = %{version}-%{release}
%if %{defined gts_version}
Requires: gcc-toolset-%{gts_version}-gcc-c++
%endif
Recommends: %{pkg_name_compiler_rt}%{?_isa} = %{version}-%{release}
Requires: %{pkg_name_llvm}-libs = %{version}-%{release}
# atomic support is not part of compiler-rt
@ -600,6 +649,7 @@ Summary: OpenMP runtime for clang
URL: http://openmp.llvm.org
Requires: %{pkg_name_llvm}-libs%{?_isa} = %{version}-%{release}
Requires: elfutils-libelf%{?_isa}
Provides: libomp(major) = %{maj_ver}
@ -632,6 +682,7 @@ Requires(post): %{_sbindir}/update-alternatives
Requires(preun): %{_sbindir}/update-alternatives
Requires: %{pkg_name_lld}-libs = %{version}-%{release}
Provides: lld(major) = %{maj_ver}
%description -n %{pkg_name_lld}
The LLVM project linker.
@ -723,6 +774,14 @@ The package contains the LLDB Python module.
%if %{with bundle_compat_lib}
%{gpgverify} --keyring='%{SOURCE6}' --signature='%{SOURCE3001}' --data='%{SOURCE3000}'
%setup -T -q -b 3000 -n llvm-project-%{compat_ver}.src
# Apply all patches with number < 500 (unconditionally)
# See https://rpm-software-management.github.io/rpm/manual/autosetup.html
%autopatch -M499 -p1
# automatically apply patches based on LLVM version
%autopatch -m%{compat_maj_ver}00 -M%{compat_maj_ver}99 -p1
%endif
# -T : Do Not Perform Default Archive Unpacking (without this, the <n>th source would be unpacked twice)
@ -732,16 +791,15 @@ The package contains the LLDB Python module.
# see http://ftp.rpm.org/max-rpm/s1-rpm-inside-macros.html
%autosetup -N -T -b 0 -n %{src_tarball_dir}
# Apply all patches with number <= 9000
# Apply all patches with number < 500 (unconditionally)
# See https://rpm-software-management.github.io/rpm/manual/autosetup.html
%autopatch -M9000 -p1
%autopatch -M499 -p1
%if %{defined rhel}
%patch -p1 -P9001
# automatically apply patches based on LLVM version
%autopatch -m%{maj_ver}00 -M%{maj_ver}99 -p1
%if %{rhel} == 8
%patch -p1 -P9002
%endif
%if %{defined rhel} && 0%{?rhel} == 8
%patch -p1 -P501
%endif
#region LLVM preparation
@ -777,10 +835,6 @@ The package contains the LLDB Python module.
#endregion COMPILER-RT preparation
#region LLDB preparation
# Empty lldb/docs/CMakeLists.txt because we cannot build it
echo "" > lldb/docs/CMakeLists.txt
#endregion LLDB preparation
#endregion prep
#region build
@ -796,6 +850,7 @@ echo "" > lldb/docs/CMakeLists.txt
%endif
%if %reduce_debuginfo == 1
# Decrease debuginfo verbosity to reduce memory consumption during final library linking
%global optflags %(echo %{optflags} | sed 's/-g /-g1 /')
%endif
@ -831,7 +886,18 @@ popd
%endif
#region cmake options
%global cmake_config_args ""
# Common cmake arguments used by both the normal build and bundle_compat_lib.
# Any ABI-affecting flags should be in here.
%global cmake_common_args \\\
-DLLVM_ENABLE_EH=ON \\\
-DLLVM_ENABLE_RTTI=ON \\\
-DLLVM_USE_PERF=ON \\\
-DLLVM_TARGETS_TO_BUILD=%{targets_to_build} \\\
-DBUILD_SHARED_LIBS=OFF \\\
-DLLVM_BUILD_LLVM_DYLIB=ON
%global cmake_config_args %{cmake_common_args}
#region clang options
%global cmake_config_args %{cmake_config_args} \\\
@ -861,14 +927,20 @@ popd
#endregion compiler-rt options
#region docs options
# Add all *enabled* documentation targets (no doxygen but sphinx)
%global cmake_config_args %{cmake_config_args} \\\
-DLLVM_BUILD_DOCS:BOOL=ON \\\
-DLLVM_ENABLE_SPHINX:BOOL=ON \\\
-DSPHINX_EXECUTABLE=%{_bindir}/sphinx-build-3 \\\
-DSPHINX_WARNINGS_AS_ERRORS=OFF \\\
-DLLVM_ENABLE_DOXYGEN:BOOL=OFF \\\
-DLLVM_INCLUDE_DOCS:BOOL=ON \\\
-DLLVM_INSTALL_SPHINX_HTML_DIR=%{_pkgdocdir}/html
-DLLVM_ENABLE_SPHINX:BOOL=ON \\\
-DLLVM_BUILD_DOCS:BOOL=ON
# Configure sphinx:
# Build man-pages but no HTML docs using sphinx
%global cmake_config_args %{cmake_config_args} \\\
-DSPHINX_EXECUTABLE=%{_bindir}/sphinx-build-3 \\\
-DSPHINX_OUTPUT_HTML:BOOL=OFF \\\
-DSPHINX_OUTPUT_MAN:BOOL=ON \\\
-DSPHINX_WARNINGS_AS_ERRORS=OFF
#endregion docs options
#region lldb options
@ -892,21 +964,19 @@ popd
-DLLVM_APPEND_VC_REV:BOOL=OFF \\\
-DLLVM_BUILD_EXAMPLES:BOOL=OFF \\\
-DLLVM_BUILD_EXTERNAL_COMPILER_RT:BOOL=ON \\\
-DLLVM_BUILD_LLVM_DYLIB:BOOL=ON \\\
-DLLVM_BUILD_RUNTIME:BOOL=ON \\\
-DLLVM_BUILD_TOOLS:BOOL=ON \\\
-DLLVM_BUILD_UTILS:BOOL=ON \\\
-DLLVM_COMMON_CMAKE_UTILS=%{install_datadir}/llvm/cmake \\\
-DLLVM_DEFAULT_TARGET_TRIPLE=%{llvm_triple} \\\
-DLLVM_DYLIB_COMPONENTS="all" \\\
-DLLVM_ENABLE_EH=ON \\\
-DLLVM_ENABLE_FFI:BOOL=ON \\\
-DLLVM_ENABLE_LIBCXX:BOOL=OFF \\\
-DLLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON \\\
-DLLVM_ENABLE_PROJECTS="%{projects}" \\\
-DLLVM_ENABLE_RTTI:BOOL=ON \\\
-DLLVM_ENABLE_RUNTIMES="compiler-rt;openmp;offload" \\\
-DLLVM_ENABLE_ZLIB:BOOL=ON \\\
-DLLVM_ENABLE_ZLIB:BOOL=FORCE_ON \\\
-DLLVM_ENABLE_ZSTD:BOOL=FORCE_ON \\\
-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=%{experimental_targets_to_build} \\\
-DLLVM_INCLUDE_BENCHMARKS=OFF \\\
-DLLVM_INCLUDE_EXAMPLES:BOOL=ON \\\
@ -916,10 +986,8 @@ popd
-DLLVM_INSTALL_UTILS:BOOL=ON \\\
-DLLVM_LINK_LLVM_DYLIB:BOOL=ON \\\
-DLLVM_PARALLEL_LINK_JOBS=1 \\\
-DLLVM_TARGETS_TO_BUILD=%{targets_to_build} \\\
-DLLVM_TOOLS_INSTALL_DIR:PATH=bin \\\
-DLLVM_UNREACHABLE_OPTIMIZE:BOOL=OFF \\\
-DLLVM_USE_PERF:BOOL=ON \\\
-DLLVM_UTILS_INSTALL_DIR:PATH=bin
#endregion llvm options
@ -947,7 +1015,6 @@ popd
#region misc options
%global cmake_config_args %{cmake_config_args} \\\
-DBUILD_SHARED_LIBS:BOOL=OFF \\\
-DCMAKE_BUILD_TYPE=RelWithDebInfo \\\
-DCMAKE_INSTALL_PREFIX=%{install_prefix} \\\
-DENABLE_LINKER_BUILD_ID:BOOL=ON \\\
@ -1022,7 +1089,7 @@ fi
%cmake_build
# If we don't build the runtimes target here, we'll have to wait for the %check
# If we don't build the runtimes target here, we'll have to wait for the %%check
# section until these files are available but they need to be installed.
#
# /usr/lib64/libomptarget.devicertl.a
@ -1039,18 +1106,15 @@ cd ..
-DCMAKE_INSTALL_PREFIX=%{buildroot}%{_libdir}/llvm%{compat_maj_ver}/ \
-DCMAKE_SKIP_RPATH=ON \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=OFF \
-DLLVM_BUILD_LLVM_DYLIB=ON \
-DLLVM_ENABLE_EH=ON \
-DLLVM_ENABLE_RTTI=ON \
-DLLVM_ENABLE_PROJECTS=clang \
-DLLVM_TARGETS_TO_BUILD=%{targets_to_build} \
-DLLVM_ENABLE_PROJECTS="clang;lldb" \
-DLLVM_INCLUDE_BENCHMARKS=OFF \
-DLLVM_INCLUDE_TESTS=OFF
-DLLVM_INCLUDE_TESTS=OFF \
%{cmake_common_args}
%ninja_build -C ../llvm-compat-libs LLVM
%ninja_build -C ../llvm-compat-libs libclang.so
%ninja_build -C ../llvm-compat-libs libclang-cpp.so
%ninja_build -C ../llvm-compat-libs liblldb.so
%endif
#endregion compat lib
@ -1124,6 +1188,35 @@ done
mkdir -p %{buildroot}%{pkg_datadir}/llvm/cmake
cp -Rv cmake/* %{buildroot}%{pkg_datadir}/llvm/cmake
# Install a placeholder to redirect users of the formerly shipped
# HTML documentation to the upstream HTML documentation.
mkdir -pv %{buildroot}%{_pkgdocdir}/html
cat <<EOF > %{buildroot}%{_pkgdocdir}/html/index.html
<!doctype html>
<html lang=en>
<head>
<title>LLVM %{maj_ver}.%{min_ver} documentation</title>
</head>
<body>
<h1>
LLVM %{maj_ver}.%{min_ver} Documentation
</h1>
<ul>
<li>
<a href="https://releases.llvm.org/%{maj_ver}.%{min_ver}.0/docs/index.html">
Click here for the upstream documentation of LLVM %{maj_ver}.%{min_ver}.
</a>
</li>
<li>
<a href="https://llvm.org/docs/">
Click here for the latest upstream documentation of LLVM.
</a>
</li>
</ul>
</body>
</html>
EOF
#endregion LLVM installation
#region CLANG installation
@ -1180,7 +1273,7 @@ rm -Rf %{buildroot}%{install_libdir}/{libear,libscanbuild}
rm -Rf %{buildroot}%{install_datadir}/clang/*.el
# Add clang++-{version} symlink
ln -s ../../%{install_bindir}/clang++ %{buildroot}%{install_bindir}/clang++-%{maj_ver}
ln -s clang++ %{buildroot}%{install_bindir}/clang++-%{maj_ver}
%endif
@ -1201,9 +1294,7 @@ chmod a+x %{buildroot}%{install_datadir}/scan-view/{Reporter.py,startfile.py}
rm -vf %{buildroot}%{install_datadir}/clang/clang-format-bbedit.applescript
rm -vf %{buildroot}%{install_datadir}/clang/clang-format-sublime.py*
# TODO: Package html docs
rm -Rvf %{buildroot}%{install_docdir}/LLVM/clang/html
rm -Rvf %{buildroot}%{install_docdir}/LLVM/clang-tools/html
# Remove unpackaged files
rm -Rvf %{buildroot}%{install_datadir}/clang-doc/clang-doc-default-stylesheet.css
rm -Rvf %{buildroot}%{install_datadir}/clang-doc/index.js
@ -1220,16 +1311,32 @@ echo "%%clang%{maj_ver}_resource_dir %%{_prefix}/lib/clang/%{maj_ver}" >> %{buil
# Install config file for clang
%if %{maj_ver} >=18
mkdir -p %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/
%ifarch x86_64_v2
echo "--gcc-triple=x86_64-redhat-linux" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang.cfg
echo "--gcc-triple=x86_64-redhat-linux" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang++.cfg
%global cfg_file_content --gcc-triple=x86_64-redhat-linux
%else
echo "--gcc-triple=%{_target_cpu}-redhat-linux" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang.cfg
echo "--gcc-triple=%{_target_cpu}-redhat-linux" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang++.cfg
%global cfg_file_content --gcc-triple=%{_target_cpu}-redhat-linux
%endif
# We want to use DWARF-5 on all snapshot builds.
%if %{without snapshot_build} && %{defined rhel} && 0%{?rhel} < 10
%global cfg_file_content %{cfg_file_content} -gdwarf-4 -g0
%endif
%if %{defined gts_version}
%global cfg_file_content %{cfg_file_content} --gcc-install-dir=/opt/rh/gcc-toolset-%{gts_version}/root/%{_exec_prefix}/lib/gcc/%{_target_cpu}-redhat-linux/%{gts_version}
%endif
mkdir -p %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/
echo " %{cfg_file_content}" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang.cfg
echo " %{cfg_file_content}" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang++.cfg
%ifarch x86_64
# On x86_64, install an additional set of config files so -m32 works.
echo " %{cfg_file_content}" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/i386-redhat-linux-gnu-clang.cfg
echo " %{cfg_file_content}" >> %{buildroot}%{_sysconfdir}/%{pkg_name_clang}/i386-redhat-linux-gnu-clang++.cfg
%endif
%endif
#endregion CLANG installation
#region COMPILER-RT installation
@ -1280,9 +1387,6 @@ rm %{buildroot}%{install_bindir}/llvm-omp-kernel-replay
#region LLD installation
# Remove LLD's HTML documentation files
rm -Rvf %{buildroot}%{install_docdir}/LLVM/lld/html
%if %{without compat_build}
# Required when using update-alternatives:
# https://docs.fedoraproject.org/en-US/packaging-guidelines/Alternatives/
@ -1346,6 +1450,7 @@ touch %{buildroot}%{_bindir}/llvm-config%{exec_suffix}
install -m 0755 ../llvm-compat-libs/lib/libLLVM.so.%{compat_maj_ver}* %{buildroot}%{_libdir}
install -m 0755 ../llvm-compat-libs/lib/libclang.so.%{compat_maj_ver}* %{buildroot}%{_libdir}
install -m 0755 ../llvm-compat-libs/lib/libclang-cpp.so.%{compat_maj_ver}* %{buildroot}%{_libdir}
install -m 0755 ../llvm-compat-libs/lib/liblldb.so.%{compat_maj_ver}* %{buildroot}%{_libdir}
%endif
#endregion install
@ -1376,11 +1481,11 @@ function reset_test_opts()
# See https://llvm.org/docs/CommandGuide/lit.html#general-options
export LIT_OPTS="-vv --time-tests"
# Set to mark tests as expected to fail.
# See https://llvm.org/docs/CommandGuide/lit.html#cmdoption-lit-xfail
unset LIT_XFAIL
# Set to mark tests to not even run.
# See https://llvm.org/docs/CommandGuide/lit.html#cmdoption-lit-filter-out
# Unfortunately LIT_FILTER_OUT is not accepting a list but a regular expression.
@ -1392,7 +1497,7 @@ function reset_test_opts()
unset LIT_FILTER_OUT
# Set for filtering out unit tests.
# See http://google.github.io/googletest/advanced.html#running-a-subset-of-the-tests
# See http://google.github.io/googletest/advanced.html#running-a-subset-of-the-tests
unset GTEST_FILTER
}
@ -1409,7 +1514,7 @@ function reset_test_opts()
# Then $LIT_FILTER_OUT should evaluate to: (foo|bar)
function test_list_to_regex()
{
local -n arr=$1
local -n arr=$1
# Prepare LIT_FILTER_OUT regex from index bash array
# Join each element with a pipe symbol (regex for "or")
arr=$(printf "|%s" "${arr[@]}")
@ -1432,7 +1537,7 @@ reset_test_opts
reset_test_opts
# Xfail testing of update utility tools
export LIT_XFAIL="tools/UpdateTestChecks"
%cmake_build --target check-llvm
%cmake_build --target check-llvm
#endregion Test LLVM
#region Test CLANG
@ -1503,6 +1608,14 @@ test_list_filter_out+=("libomp :: worksharing/for/omp_collapse_one_int.c")
%ifarch s390x
test_list_filter_out+=("libomp :: flush/omp_flush.c")
test_list_filter_out+=("libomp :: worksharing/for/omp_for_schedule_guided.c")
%endif
%ifarch aarch64 s390x
# The following test has been failling intermittently on aarch64 and s390x.
# Re-enable it after https://github.com/llvm/llvm-project/issues/117773
# gets fixed.
test_list_filter_out+=("libarcher :: races/taskwait-depend.c")
%endif
# The following tests seem pass on ppc64le and x86_64 and aarch64 only:
@ -1606,7 +1719,14 @@ export LIT_XFAIL="$LIT_XFAIL;offloading/thread_state_2.c"
export LIT_FILTER_OUT=$(test_list_to_regex test_list_filter_out)
%if 0%{?rhel}
# libomp tests are often very slow on s390x brew builders
%ifnarch s390x
%cmake_build --target check-openmp
%endif
%else
%cmake_build --target check-openmp
%endif
#endregion Test OPENMP
%if %{with lldb}
@ -1617,12 +1737,12 @@ export LIT_FILTER_OUT=$(test_list_to_regex test_list_filter_out)
## reset_test_opts
## %%cmake_build --target check-lldb-unit
## #endregion LLDB unit tests
##
##
## #region LLDB SB API tests
## reset_test_opts
## %%cmake_build --target check-lldb-api
## #endregion LLDB SB API tests
##
##
## #region LLDB shell tests
## reset_test_opts
## %%cmake_build --target check-lldb-shell
@ -1679,7 +1799,7 @@ fi
# alternative must be removed in order to give priority to a newly installed
# compat package.
if [[ $1 -eq 0
|| "x$(%{_bindir}/llvm-config-%{maj_ver} --version | awk -F . '{ print $1 }')" != "x%{maj_ver}" ]]; then
|| "x$(%{_bindir}/llvm-config%{exec_suffix} --version | awk -F . '{ print $1 }')" != "x%{maj_ver}" ]]; then
%{_sbindir}/update-alternatives --remove llvm-config-%{maj_ver} %{install_bindir}/llvm-config%{exec_suffix}-%{__isa_bits}
fi
%endif
@ -1967,14 +2087,14 @@ fi
%files -n %{pkg_name_llvm}-libs
%license llvm/LICENSE.TXT
%{install_libdir}/libLLVM-%{maj_ver}%{?llvm_snapshot_version_suffix:%{llvm_snapshot_version_suffix}}.so
%{install_libdir}/libLLVM-%{maj_ver}%{?llvm_snapshot_version_suffix}.so
%if %{with gold}
%{install_libdir}/LLVMgold.so
%if %{without compat_build}
%{_libdir}/bfd-plugins/LLVMgold.so
%endif
%endif
%{install_libdir}/libLLVM.so.%{maj_ver}.%{min_ver}%{?llvm_snapshot_version_suffix:%{llvm_snapshot_version_suffix}}
%{install_libdir}/libLLVM.so.%{maj_ver}.%{min_ver}%{?llvm_snapshot_version_suffix}
%{install_libdir}/libLTO.so*
%{install_libdir}/libRemarks.so*
%if %{with compat_build}
@ -2004,7 +2124,7 @@ fi
%files -n %{pkg_name_llvm}-doc
%license llvm/LICENSE.TXT
%doc %{_pkgdocdir}/html
%doc %{_pkgdocdir}/html/index.html
%files -n %{pkg_name_llvm}-static
%license llvm/LICENSE.TXT
@ -2061,6 +2181,10 @@ fi
%{install_bindir}/clang-cpp
%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang.cfg
%{_sysconfdir}/%{pkg_name_clang}/%{_target_platform}-clang++.cfg
%ifarch x86_64
%{_sysconfdir}/%{pkg_name_clang}/i386-redhat-linux-gnu-clang.cfg
%{_sysconfdir}/%{pkg_name_clang}/i386-redhat-linux-gnu-clang++.cfg
%endif
%{_mandir}/man1/clang-%{maj_ver}.1.gz
%{_mandir}/man1/clang++-%{maj_ver}.1.gz
%if %{without compat_build}
@ -2157,6 +2281,9 @@ fi
%{install_bindir}/clang-reorder-fields
%{install_bindir}/clang-repl
%{install_bindir}/clang-scan-deps
%if %{maj_ver} >= 20
%{install_bindir}/clang-sycl-linker
%endif
%{install_bindir}/clang-tidy
%{install_bindir}/clangd
%{install_bindir}/diagtool
@ -2193,6 +2320,9 @@ fi
%{_bindir}/clang-reorder-fields-%{maj_ver}
%{_bindir}/clang-repl-%{maj_ver}
%{_bindir}/clang-scan-deps-%{maj_ver}
%if %{maj_ver} >= 20
%{_bindir}/clang-sycl-linker-%{maj_ver}
%endif
%{_bindir}/clang-tidy-%{maj_ver}
%{_bindir}/clangd-%{maj_ver}
%{_bindir}/diagtool-%{maj_ver}
@ -2371,6 +2501,11 @@ fi
%{install_libdir}/liblldb*.so
%{install_libdir}/liblldb.so.*
%{install_libdir}/liblldbIntelFeatures.so.*
%{_mandir}/man1/lldb-server%{exec_suffix}.1.gz
%{_mandir}/man1/lldb%{exec_suffix}.1.gz
%if %{with bundle_compat_lib}
%{_libdir}/liblldb.so.%{compat_maj_ver}*
%endif
%files -n %{pkg_name_lldb}-devel
%{install_includedir}/lldb
@ -2383,9 +2518,18 @@ fi
#region changelog
%changelog
* Fri Dec 06 2024 Eduard Abdullin <eabdullin@almalinux.org> - 19.1.1-5
* Fri Jan 24 2025 Eduard Abdullin <eabdullin@almalinux.org> - 19.1.7-2.alma.1
- Use x86_64-redhat-linux as default gcc triple for x86_64_v2
* Fri Jan 17 2025 Nikita Popov <npopov@redhat.com> - 19.1.7-2
- Support CET in libomp.so (RHEL-74346)
* Wed Jan 15 2025 Nikita Popov <npopov@redhat.com> - 19.1.7-1
- Update to LLVM 19.1.7 (RHEL-57456)
- Remove llvm18 compat package (RHEL-57457)
- Remove generated html content from llvm-doc subpackage (RHEL-58900)
- Enable LLVM_ENABLE_ZSTD=ON (RHEL-70325)
* Mon Oct 14 2024 Nikita Popov <npopov@redhat.com> - 19.1.1-5
- Add missing requires

View File

@ -1,4 +1,2 @@
SHA512 (llvm-project-19.1.1.src.tar.xz) = 84adab40ffb9ec236dbf203d86c08a0c2c651f98278a9d0936490c7901159eb26eabd3db9316013886b549426d4acb43b75d866f7dc670ab299bf93ba35b1891
SHA512 (llvm-project-19.1.1.src.tar.xz.sig) = 07bb7bffb2b035417d702ca47be9d5759250f1a2cd57606855027d458ceb972a293b45d3d93bcda195588986acbb5eace60524f4aecdc0da7aeb3a8414c37c31
SHA512 (llvm-project-18.1.8.src.tar.xz) = 25eeee9984c8b4d0fbc240df90f33cbb000d3b0414baff5c8982beafcc5e59e7ef18f6f85d95b3a5f60cb3d4cd4f877c80487b5768bc21bc833f107698ad93db
SHA512 (llvm-project-18.1.8.src.tar.xz.sig) = ddfd1e8a06756759af6cbe488c82a6d6a62ba91f3e8a0eb4cece561321824f5d165b08ed91010588790b76e19790931d2651b24dba8567e3b151d3cb43bec25b
SHA512 (llvm-project-19.1.7.src.tar.xz) = c7d63286d662707a9cd54758c9e3aaf52794a91900c484c4a6efa62d90bc719d5e7a345e4192feeb0c9fd11c82570d64677c781e5be1d645556b6aa018e47ec8
SHA512 (llvm-project-19.1.7.src.tar.xz.sig) = 195797b06ac80a742e0ccbc03a50dc06dd2e04377d783d5474e3e72c5a75203b60292b047929312a411d22b137a239943fba414a4d136a2be14cbff978eb6bda

View File

@ -50,7 +50,7 @@ adjust:
discover:
- name: llvm-tests
how: fmf
url: https://src.fedoraproject.org/tests/llvm.git
url: https://gitlab.com/redhat/centos-stream/tests/llvm.git
ref: main
execute:
how: tmt