144 lines
6.1 KiB
Diff
144 lines
6.1 KiB
Diff
From c76137f1cfd5758f6889236d49a65f059e6432ff Mon Sep 17 00:00:00 2001
|
|
From: weiguozhi <57237827+weiguozhi@users.noreply.github.com>
|
|
Date: Thu, 15 May 2025 09:27:25 -0700
|
|
Subject: [PATCH] [CodeGenPrepare] Make sure instruction get from SunkAddrs is
|
|
before MemoryInst (#139303)
|
|
|
|
Function optimizeBlock may do optimizations on a block for multiple
|
|
times. In the first iteration of the loop, MemoryInst1 may generate a
|
|
sunk instruction and store it into SunkAddrs. In the second iteration of
|
|
the loop, MemoryInst2 may use the same address and then it can reuse the
|
|
sunk instruction stored in SunkAddrs, but MemoryInst2 may be before
|
|
MemoryInst1 and the corresponding sunk instruction. In order to avoid
|
|
use before def error, we need to find appropriate insert position for the
|
|
sunk instruction.
|
|
|
|
Fixes #138208.
|
|
|
|
(cherry picked from commit 59c6d70ed8120b8864e5f796e2bf3de5518a0ef0)
|
|
---
|
|
llvm/lib/CodeGen/CodeGenPrepare.cpp | 41 ++++++++++++++---
|
|
.../CodeGenPrepare/X86/sink-addr-reuse.ll | 44 +++++++++++++++++++
|
|
2 files changed, 80 insertions(+), 5 deletions(-)
|
|
create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
|
|
|
|
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
|
|
index 088062afab17..f779f4b782ae 100644
|
|
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
|
|
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
|
|
@@ -5728,6 +5728,35 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
|
|
return false;
|
|
}
|
|
|
|
+// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
|
|
+// is the first instruction that will use Addr. So we need to find the first
|
|
+// user of Addr in current BB.
|
|
+static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst,
|
|
+ Value *SunkAddr) {
|
|
+ if (Addr->hasOneUse())
|
|
+ return MemoryInst->getIterator();
|
|
+
|
|
+ // We already have a SunkAddr in current BB, but we may need to insert cast
|
|
+ // instruction after it.
|
|
+ if (SunkAddr) {
|
|
+ if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
|
|
+ return std::next(AddrInst->getIterator());
|
|
+ }
|
|
+
|
|
+ // Find the first user of Addr in current BB.
|
|
+ Instruction *Earliest = MemoryInst;
|
|
+ for (User *U : Addr->users()) {
|
|
+ Instruction *UserInst = dyn_cast<Instruction>(U);
|
|
+ if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
|
|
+ if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
|
|
+ continue;
|
|
+ if (UserInst->comesBefore(Earliest))
|
|
+ Earliest = UserInst;
|
|
+ }
|
|
+ }
|
|
+ return Earliest->getIterator();
|
|
+}
|
|
+
|
|
/// Sink addressing mode computation immediate before MemoryInst if doing so
|
|
/// can be done without increasing register pressure. The need for the
|
|
/// register pressure constraint means this can end up being an all or nothing
|
|
@@ -5852,11 +5881,6 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
|
return Modified;
|
|
}
|
|
|
|
- // Insert this computation right after this user. Since our caller is
|
|
- // scanning from the top of the BB to the bottom, reuse of the expr are
|
|
- // guaranteed to happen later.
|
|
- IRBuilder<> Builder(MemoryInst);
|
|
-
|
|
// Now that we determined the addressing expression we want to use and know
|
|
// that we have to sink it into this block. Check to see if we have already
|
|
// done this for some other load/store instr in this block. If so, reuse
|
|
@@ -5867,6 +5891,13 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
|
|
|
|
Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
|
|
Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
|
|
+
|
|
+ // The current BB may be optimized multiple times, we can't guarantee the
|
|
+ // reuse of Addr happens later, call findInsertPos to find an appropriate
|
|
+ // insert position.
|
|
+ IRBuilder<> Builder(MemoryInst->getParent(),
|
|
+ findInsertPos(Addr, MemoryInst, SunkAddr));
|
|
+
|
|
if (SunkAddr) {
|
|
LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
|
|
<< " for " << *MemoryInst << "\n");
|
|
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
|
|
new file mode 100644
|
|
index 000000000000..019f31140655
|
|
--- /dev/null
|
|
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addr-reuse.ll
|
|
@@ -0,0 +1,44 @@
|
|
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
+; RUN: opt -S -p 'require<profile-summary>,codegenprepare' -cgpp-huge-func=0 < %s | FileCheck %s
|
|
+
|
|
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
|
+target triple = "x86_64-grtev4-linux-gnu"
|
|
+
|
|
+declare void @g(ptr)
|
|
+
|
|
+; %load and %load5 use the same address, %load5 is optimized first, %load is
|
|
+; optimized later and reuse the same address computation instruction. We must
|
|
+; make sure not to generate use before def error.
|
|
+
|
|
+define void @f(ptr %arg) {
|
|
+; CHECK-LABEL: define void @f(
|
|
+; CHECK-SAME: ptr [[ARG:%.*]]) {
|
|
+; CHECK-NEXT: [[BB:.*:]]
|
|
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
|
|
+; CHECK-NEXT: call void @g(ptr [[GETELEMENTPTR]])
|
|
+; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[ARG]], i64 -64
|
|
+; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
|
|
+; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[ARG]], i64 -56
|
|
+; CHECK-NEXT: [[LOAD4:%.*]] = load i32, ptr [[SUNKADDR]], align 8
|
|
+; CHECK-NEXT: [[LOAD5:%.*]] = load ptr, ptr [[SUNKADDR1]], align 8
|
|
+; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 1, i32 0)
|
|
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0
|
|
+; CHECK-NEXT: ret void
|
|
+;
|
|
+bb:
|
|
+ %getelementptr = getelementptr i8, ptr %arg, i64 -64
|
|
+ %getelementptr1 = getelementptr i8, ptr %arg, i64 -56
|
|
+ call void @g(ptr %getelementptr)
|
|
+ br label %bb3
|
|
+
|
|
+bb3:
|
|
+ %load = load ptr, ptr %getelementptr, align 8
|
|
+ %load4 = load i32, ptr %getelementptr1, align 8
|
|
+ %load5 = load ptr, ptr %getelementptr, align 8
|
|
+ %add = add i32 1, 0
|
|
+ %icmp = icmp eq i32 %add, 0
|
|
+ br i1 %icmp, label %bb7, label %bb7
|
|
+
|
|
+bb7:
|
|
+ ret void
|
|
+}
|
|
--
|
|
2.49.0
|
|
|