127 lines
4.3 KiB
Diff
127 lines
4.3 KiB
Diff
|
From ffecacb1a1eeca8ca8567057c3c886161837d2a6 Mon Sep 17 00:00:00 2001
|
||
|
From: Tom Stellard <thomas.stellard@amd.com>
|
||
|
Date: Thu, 15 May 2014 20:55:58 +0000
|
||
|
Subject: [PATCH 3/5] Merging r208501:
|
||
|
|
||
|
------------------------------------------------------------------------
|
||
|
r208501 | hfinkel | 2014-05-11 12:23:29 -0400 (Sun, 11 May 2014) | 9 lines
|
||
|
|
||
|
[PowerPC] On PPC32, 128-bit shifts might be runtime calls
|
||
|
|
||
|
The counter-loops formation pass needs to know what operations might be
|
||
|
function calls (because they can't appear in counter-based loops). On PPC32,
|
||
|
128-bit shifts might be runtime calls (even though you can't use __int128 on
|
||
|
PPC32, it seems that SROA might form them).
|
||
|
|
||
|
Fixes PR19709.
|
||
|
|
||
|
------------------------------------------------------------------------
|
||
|
|
||
|
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_34@208916 91177308-0d34-0410-b5e6-96231b3b80d8
|
||
|
---
|
||
|
lib/Target/PowerPC/PPCCTRLoops.cpp | 8 +++++
|
||
|
test/CodeGen/PowerPC/ctrloop-sh.ll | 72 ++++++++++++++++++++++++++++++++++++++
|
||
|
2 files changed, 80 insertions(+)
|
||
|
create mode 100644 test/CodeGen/PowerPC/ctrloop-sh.ll
|
||
|
|
||
|
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
|
||
|
index e419b9b..819635c 100644
|
||
|
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
|
||
|
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
|
||
|
@@ -369,6 +369,14 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
|
||
|
J->getOpcode() == Instruction::URem ||
|
||
|
J->getOpcode() == Instruction::SRem)) {
|
||
|
return true;
|
||
|
+ } else if (TT.isArch32Bit() &&
|
||
|
+ isLargeIntegerTy(false, J->getType()->getScalarType()) &&
|
||
|
+ (J->getOpcode() == Instruction::Shl ||
|
||
|
+ J->getOpcode() == Instruction::AShr ||
|
||
|
+ J->getOpcode() == Instruction::LShr)) {
|
||
|
+ // Only on PPC32, for 128-bit integers (specifically not 64-bit
|
||
|
+ // integers), these might be runtime calls.
|
||
|
+ return true;
|
||
|
} else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
|
||
|
// On PowerPC, indirect jumps use the counter register.
|
||
|
return true;
|
||
|
diff --git a/test/CodeGen/PowerPC/ctrloop-sh.ll b/test/CodeGen/PowerPC/ctrloop-sh.ll
|
||
|
new file mode 100644
|
||
|
index 0000000..d8e6fc7
|
||
|
--- /dev/null
|
||
|
+++ b/test/CodeGen/PowerPC/ctrloop-sh.ll
|
||
|
@@ -0,0 +1,72 @@
|
||
|
+; RUN: llc < %s | FileCheck %s
|
||
|
+target datalayout = "E-m:e-p:32:32-i128:64-n32"
|
||
|
+target triple = "powerpc-ellcc-linux"
|
||
|
+
|
||
|
+; Function Attrs: nounwind
|
||
|
+define void @foo1(i128* %a, i128* readonly %b, i128* readonly %c) #0 {
|
||
|
+entry:
|
||
|
+ br label %for.body
|
||
|
+
|
||
|
+for.body: ; preds = %for.body, %entry
|
||
|
+ %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||
|
+ %0 = load i128* %b, align 16
|
||
|
+ %1 = load i128* %c, align 16
|
||
|
+ %shl = shl i128 %0, %1
|
||
|
+ store i128 %shl, i128* %a, align 16
|
||
|
+ %inc = add nsw i32 %i.02, 1
|
||
|
+ %exitcond = icmp eq i32 %inc, 2048
|
||
|
+ br i1 %exitcond, label %for.end, label %for.body
|
||
|
+
|
||
|
+for.end: ; preds = %for.body
|
||
|
+ ret void
|
||
|
+
|
||
|
+; CHECK-LABEL: @foo1
|
||
|
+; CHECK-NOT: mtctr
|
||
|
+}
|
||
|
+
|
||
|
+; Function Attrs: nounwind
|
||
|
+define void @foo2(i128* %a, i128* readonly %b, i128* readonly %c) #0 {
|
||
|
+entry:
|
||
|
+ br label %for.body
|
||
|
+
|
||
|
+for.body: ; preds = %for.body, %entry
|
||
|
+ %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||
|
+ %0 = load i128* %b, align 16
|
||
|
+ %1 = load i128* %c, align 16
|
||
|
+ %shl = ashr i128 %0, %1
|
||
|
+ store i128 %shl, i128* %a, align 16
|
||
|
+ %inc = add nsw i32 %i.02, 1
|
||
|
+ %exitcond = icmp eq i32 %inc, 2048
|
||
|
+ br i1 %exitcond, label %for.end, label %for.body
|
||
|
+
|
||
|
+for.end: ; preds = %for.body
|
||
|
+ ret void
|
||
|
+
|
||
|
+; CHECK-LABEL: @foo2
|
||
|
+; CHECK-NOT: mtctr
|
||
|
+}
|
||
|
+
|
||
|
+; Function Attrs: nounwind
|
||
|
+define void @foo3(i128* %a, i128* readonly %b, i128* readonly %c) #0 {
|
||
|
+entry:
|
||
|
+ br label %for.body
|
||
|
+
|
||
|
+for.body: ; preds = %for.body, %entry
|
||
|
+ %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||
|
+ %0 = load i128* %b, align 16
|
||
|
+ %1 = load i128* %c, align 16
|
||
|
+ %shl = lshr i128 %0, %1
|
||
|
+ store i128 %shl, i128* %a, align 16
|
||
|
+ %inc = add nsw i32 %i.02, 1
|
||
|
+ %exitcond = icmp eq i32 %inc, 2048
|
||
|
+ br i1 %exitcond, label %for.end, label %for.body
|
||
|
+
|
||
|
+for.end: ; preds = %for.body
|
||
|
+ ret void
|
||
|
+
|
||
|
+; CHECK-LABEL: @foo3
|
||
|
+; CHECK-NOT: mtctr
|
||
|
+}
|
||
|
+
|
||
|
+attributes #0 = { nounwind }
|
||
|
+
|
||
|
--
|
||
|
1.9.3
|
||
|
|