7.0.0-rc1 Release
- Reduce the number of enabled targets based on the architecture. - Drop s390 detection patch, LLVM does not support s390 codegen.
This commit is contained in:
parent
93d2074b7b
commit
7a93d34863
@ -1,39 +0,0 @@
|
|||||||
From 4d613a84ce271c6225068bef67d727ae02b2e3b1 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Pavel Labath <labath@google.com>
|
|
||||||
Date: Wed, 14 Mar 2018 09:28:38 +0000
|
|
||||||
Subject: [PATCH] Export LLVM_DYLIB_COMPONENTS in LLVMConfig.cmake
|
|
||||||
|
|
||||||
Summary:
|
|
||||||
This is needed so that external projects (e.g. a standalone build of
|
|
||||||
lldb) can link to the LLVM shared library via the USE_SHARED argument of
|
|
||||||
llvm_config. Without this, llvm_config would add LLVM to the link list,
|
|
||||||
but then also add the constituent static libraries, resulting in
|
|
||||||
multiply defined symbols.
|
|
||||||
|
|
||||||
Reviewers: beanz, mgorny
|
|
||||||
|
|
||||||
Subscribers: llvm-commits
|
|
||||||
|
|
||||||
Differential Revision: https://reviews.llvm.org/D44391
|
|
||||||
|
|
||||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@327484 91177308-0d34-0410-b5e6-96231b3b80d8
|
|
||||||
---
|
|
||||||
cmake/modules/LLVMConfig.cmake.in | 2 ++
|
|
||||||
1 file changed, 2 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/cmake/modules/LLVMConfig.cmake.in b/cmake/modules/LLVMConfig.cmake.in
|
|
||||||
index fe4df52..e700186 100644
|
|
||||||
--- a/cmake/modules/LLVMConfig.cmake.in
|
|
||||||
+++ b/cmake/modules/LLVMConfig.cmake.in
|
|
||||||
@@ -13,6 +13,8 @@ set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@)
|
|
||||||
|
|
||||||
set(LLVM_AVAILABLE_LIBS @LLVM_AVAILABLE_LIBS@)
|
|
||||||
|
|
||||||
+set(LLVM_DYLIB_COMPONENTS @LLVM_DYLIB_COMPONENTS@)
|
|
||||||
+
|
|
||||||
set(LLVM_ALL_TARGETS @LLVM_ALL_TARGETS@)
|
|
||||||
|
|
||||||
set(LLVM_TARGETS_TO_BUILD @LLVM_TARGETS_TO_BUILD@)
|
|
||||||
--
|
|
||||||
1.8.3.1
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
|||||||
From acdb4ab5aa8076469fa551cc79e6bc7bbe8c06a5 Mon Sep 17 00:00:00 2001
|
From 5f7fd92155db77c7608e3a07e5dcfad1ec7bd4e4 Mon Sep 17 00:00:00 2001
|
||||||
From: Tom Stellard <tstellar@redhat.com>
|
From: Tom Stellard <tstellar@redhat.com>
|
||||||
Date: Fri, 16 Mar 2018 07:52:33 -0700
|
Date: Fri, 16 Mar 2018 07:52:33 -0700
|
||||||
Subject: [PATCH] Filter out cxxflags not supported by clang
|
Subject: [PATCH] Filter out cxxflags not supported by clang
|
||||||
@ -8,10 +8,10 @@ Subject: [PATCH] Filter out cxxflags not supported by clang
|
|||||||
1 file changed, 4 insertions(+)
|
1 file changed, 4 insertions(+)
|
||||||
|
|
||||||
diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt
|
diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt
|
||||||
index 25f99ce..922d4c5 100644
|
index a0bd36c..4193b0e 100644
|
||||||
--- a/tools/llvm-config/CMakeLists.txt
|
--- a/tools/llvm-config/CMakeLists.txt
|
||||||
+++ b/tools/llvm-config/CMakeLists.txt
|
+++ b/tools/llvm-config/CMakeLists.txt
|
||||||
@@ -33,7 +33,11 @@ set(LLVM_SRC_ROOT ${LLVM_MAIN_SRC_DIR})
|
@@ -34,7 +34,11 @@ set(LLVM_SRC_ROOT ${LLVM_MAIN_SRC_DIR})
|
||||||
set(LLVM_OBJ_ROOT ${LLVM_BINARY_DIR})
|
set(LLVM_OBJ_ROOT ${LLVM_BINARY_DIR})
|
||||||
set(LLVM_CPPFLAGS "${CMAKE_CPP_FLAGS} ${CMAKE_CPP_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
|
set(LLVM_CPPFLAGS "${CMAKE_CPP_FLAGS} ${CMAKE_CPP_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
|
||||||
set(LLVM_CFLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
|
set(LLVM_CFLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
|
||||||
@ -22,7 +22,7 @@ index 25f99ce..922d4c5 100644
|
|||||||
+STRING(REGEX REPLACE "-fcf-protection" "" LLVM_CXXFLAGS ${LLVM_CXXFLAGS})
|
+STRING(REGEX REPLACE "-fcf-protection" "" LLVM_CXXFLAGS ${LLVM_CXXFLAGS})
|
||||||
set(LLVM_BUILD_SYSTEM cmake)
|
set(LLVM_BUILD_SYSTEM cmake)
|
||||||
set(LLVM_HAS_RTTI ${LLVM_CONFIG_HAS_RTTI})
|
set(LLVM_HAS_RTTI ${LLVM_CONFIG_HAS_RTTI})
|
||||||
set(LLVM_DYLIB_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX}")
|
set(LLVM_DYLIB_VERSION "${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX}")
|
||||||
--
|
--
|
||||||
1.8.3.1
|
1.8.3.1
|
||||||
|
|
||||||
|
@ -1,919 +0,0 @@
|
|||||||
From 88ad713b81c2f51dd8405b251f9825b0bca6e57d Mon Sep 17 00:00:00 2001
|
|
||||||
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
|
|
||||||
Date: Thu, 2 Aug 2018 00:03:22 +0000
|
|
||||||
Subject: [PATCH] [PowerPC] Do not round values prior to converting to integer
|
|
||||||
|
|
||||||
Adding the FP_ROUND nodes when combining FP_TO_[SU]INT of elements
|
|
||||||
feeding a BUILD_VECTOR into an FP_TO_[SU]INT of the built vector
|
|
||||||
loses precision. This patch removes the code that adds these nodes
|
|
||||||
to true f64 operands. It also adds patterns required to ensure
|
|
||||||
the code is still vectorized rather than converting individual
|
|
||||||
elements and inserting into a vector.
|
|
||||||
|
|
||||||
Fixes https://bugs.llvm.org/show_bug.cgi?id=38342
|
|
||||||
|
|
||||||
Differential Revision: https://reviews.llvm.org/D50121
|
|
||||||
|
|
||||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338658 91177308-0d34-0410-b5e6-96231b3b80d8
|
|
||||||
---
|
|
||||||
lib/Target/PowerPC/PPCISelLowering.cpp | 22 +-
|
|
||||||
lib/Target/PowerPC/PPCInstrVSX.td | 86 +++++++
|
|
||||||
test/CodeGen/PowerPC/build-vector-tests.ll | 357 +++++++++++++----------------
|
|
||||||
3 files changed, 258 insertions(+), 207 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
|
|
||||||
index f622b05..527ec5a 100644
|
|
||||||
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
|
|
||||||
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
|
|
||||||
@@ -11560,6 +11560,14 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
|
|
||||||
ShiftCst);
|
|
||||||
}
|
|
||||||
|
|
||||||
+// Is this an extending load from an f32 to an f64?
|
|
||||||
+static bool isFPExtLoad(SDValue Op) {
|
|
||||||
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
|
|
||||||
+ return LD->getExtensionType() == ISD::EXTLOAD &&
|
|
||||||
+ Op.getValueType() == MVT::f64;
|
|
||||||
+ return false;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
/// \brief Reduces the number of fp-to-int conversion when building a vector.
|
|
||||||
///
|
|
||||||
/// If this vector is built out of floating to integer conversions,
|
|
||||||
@@ -11594,11 +11602,18 @@ combineElementTruncationToVectorTruncation(SDNode *N,
|
|
||||||
SmallVector<SDValue, 4> Ops;
|
|
||||||
EVT TargetVT = N->getValueType(0);
|
|
||||||
for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
|
|
||||||
- if (N->getOperand(i).getOpcode() != PPCISD::MFVSR)
|
|
||||||
+ SDValue NextOp = N->getOperand(i);
|
|
||||||
+ if (NextOp.getOpcode() != PPCISD::MFVSR)
|
|
||||||
return SDValue();
|
|
||||||
- unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode();
|
|
||||||
+ unsigned NextConversion = NextOp.getOperand(0).getOpcode();
|
|
||||||
if (NextConversion != FirstConversion)
|
|
||||||
return SDValue();
|
|
||||||
+ // If we are converting to 32-bit integers, we need to add an FP_ROUND.
|
|
||||||
+ // This is not valid if the input was originally double precision. It is
|
|
||||||
+ // also not profitable to do unless this is an extending load in which
|
|
||||||
+ // case doing this combine will allow us to combine consecutive loads.
|
|
||||||
+ if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
|
|
||||||
+ return SDValue();
|
|
||||||
if (N->getOperand(i) != FirstInput)
|
|
||||||
IsSplat = false;
|
|
||||||
}
|
|
||||||
@@ -11612,8 +11627,9 @@ combineElementTruncationToVectorTruncation(SDNode *N,
|
|
||||||
// Now that we know we have the right type of node, get its operands
|
|
||||||
for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
|
|
||||||
SDValue In = N->getOperand(i).getOperand(0);
|
|
||||||
- // For 32-bit values, we need to add an FP_ROUND node.
|
|
||||||
if (Is32Bit) {
|
|
||||||
+ // For 32-bit values, we need to add an FP_ROUND node (if we made it
|
|
||||||
+ // here, we know that all inputs are extending loads so this is safe).
|
|
||||||
if (In.isUndef())
|
|
||||||
Ops.push_back(DAG.getUNDEF(SrcVT));
|
|
||||||
else {
|
|
||||||
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
|
|
||||||
index 6f71978..1f48473 100644
|
|
||||||
--- a/lib/Target/PowerPC/PPCInstrVSX.td
|
|
||||||
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
|
|
||||||
@@ -3100,6 +3100,17 @@ def DblToFlt {
|
|
||||||
dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
|
|
||||||
}
|
|
||||||
|
|
||||||
+def ExtDbl {
|
|
||||||
+ dag A0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 0))))));
|
|
||||||
+ dag A1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$A, 1))))));
|
|
||||||
+ dag B0S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 0))))));
|
|
||||||
+ dag B1S = (i32 (PPCmfvsr (f64 (PPCfctiwz (f64 (extractelt v2f64:$B, 1))))));
|
|
||||||
+ dag A0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 0))))));
|
|
||||||
+ dag A1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$A, 1))))));
|
|
||||||
+ dag B0U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 0))))));
|
|
||||||
+ dag B1U = (i32 (PPCmfvsr (f64 (PPCfctiwuz (f64 (extractelt v2f64:$B, 1))))));
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
def ByteToWord {
|
|
||||||
dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8));
|
|
||||||
dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8));
|
|
||||||
@@ -3177,9 +3188,15 @@ def FltToULong {
|
|
||||||
}
|
|
||||||
def DblToInt {
|
|
||||||
dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A))));
|
|
||||||
+ dag B = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$B))));
|
|
||||||
+ dag C = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$C))));
|
|
||||||
+ dag D = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$D))));
|
|
||||||
}
|
|
||||||
def DblToUInt {
|
|
||||||
dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A))));
|
|
||||||
+ dag B = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$B))));
|
|
||||||
+ dag C = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$C))));
|
|
||||||
+ dag D = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$D))));
|
|
||||||
}
|
|
||||||
def DblToLong {
|
|
||||||
dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A))));
|
|
||||||
@@ -3218,6 +3235,47 @@ def MrgFP {
|
|
||||||
dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3));
|
|
||||||
}
|
|
||||||
|
|
||||||
+// Word-element merge dags - conversions from f64 to i32 merged into vectors.
|
|
||||||
+def MrgWords {
|
|
||||||
+ // For big endian, we merge low and hi doublewords (A, B).
|
|
||||||
+ dag A0B0 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 0));
|
|
||||||
+ dag A1B1 = (v2f64 (XXPERMDI v2f64:$A, v2f64:$B, 3));
|
|
||||||
+ dag CVA1B1S = (v4i32 (XVCVDPSXWS A1B1));
|
|
||||||
+ dag CVA0B0S = (v4i32 (XVCVDPSXWS A0B0));
|
|
||||||
+ dag CVA1B1U = (v4i32 (XVCVDPUXWS A1B1));
|
|
||||||
+ dag CVA0B0U = (v4i32 (XVCVDPUXWS A0B0));
|
|
||||||
+
|
|
||||||
+ // For little endian, we merge low and hi doublewords (B, A).
|
|
||||||
+ dag B1A1 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 0));
|
|
||||||
+ dag B0A0 = (v2f64 (XXPERMDI v2f64:$B, v2f64:$A, 3));
|
|
||||||
+ dag CVB1A1S = (v4i32 (XVCVDPSXWS B1A1));
|
|
||||||
+ dag CVB0A0S = (v4i32 (XVCVDPSXWS B0A0));
|
|
||||||
+ dag CVB1A1U = (v4i32 (XVCVDPUXWS B1A1));
|
|
||||||
+ dag CVB0A0U = (v4i32 (XVCVDPUXWS B0A0));
|
|
||||||
+
|
|
||||||
+ // For big endian, we merge hi doublewords of (A, C) and (B, D), convert
|
|
||||||
+ // then merge.
|
|
||||||
+ dag AC = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$A, VSRC),
|
|
||||||
+ (COPY_TO_REGCLASS f64:$C, VSRC), 0));
|
|
||||||
+ dag BD = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$B, VSRC),
|
|
||||||
+ (COPY_TO_REGCLASS f64:$D, VSRC), 0));
|
|
||||||
+ dag CVACS = (v4i32 (XVCVDPSXWS AC));
|
|
||||||
+ dag CVBDS = (v4i32 (XVCVDPSXWS BD));
|
|
||||||
+ dag CVACU = (v4i32 (XVCVDPUXWS AC));
|
|
||||||
+ dag CVBDU = (v4i32 (XVCVDPUXWS BD));
|
|
||||||
+
|
|
||||||
+ // For little endian, we merge hi doublewords of (D, B) and (C, A), convert
|
|
||||||
+ // then merge.
|
|
||||||
+ dag DB = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$D, VSRC),
|
|
||||||
+ (COPY_TO_REGCLASS f64:$B, VSRC), 0));
|
|
||||||
+ dag CA = (v2f64 (XXPERMDI (COPY_TO_REGCLASS f64:$C, VSRC),
|
|
||||||
+ (COPY_TO_REGCLASS f64:$A, VSRC), 0));
|
|
||||||
+ dag CVDBS = (v4i32 (XVCVDPSXWS DB));
|
|
||||||
+ dag CVCAS = (v4i32 (XVCVDPSXWS CA));
|
|
||||||
+ dag CVDBU = (v4i32 (XVCVDPUXWS DB));
|
|
||||||
+ dag CVCAU = (v4i32 (XVCVDPUXWS CA));
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// Patterns for BUILD_VECTOR nodes.
|
|
||||||
def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
|
|
||||||
let AddedComplexity = 400 in {
|
|
||||||
@@ -3286,6 +3344,20 @@ let AddedComplexity = 400 in {
|
|
||||||
def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
|
|
||||||
DblToFlt.B0, DblToFlt.B1)),
|
|
||||||
(v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
|
|
||||||
+
|
|
||||||
+ // Convert 4 doubles to a vector of ints.
|
|
||||||
+ def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
|
|
||||||
+ DblToInt.C, DblToInt.D)),
|
|
||||||
+ (v4i32 (VMRGEW MrgWords.CVACS, MrgWords.CVBDS))>;
|
|
||||||
+ def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
|
|
||||||
+ DblToUInt.C, DblToUInt.D)),
|
|
||||||
+ (v4i32 (VMRGEW MrgWords.CVACU, MrgWords.CVBDU))>;
|
|
||||||
+ def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
|
|
||||||
+ ExtDbl.B0S, ExtDbl.B1S)),
|
|
||||||
+ (v4i32 (VMRGEW MrgWords.CVA0B0S, MrgWords.CVA1B1S))>;
|
|
||||||
+ def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
|
|
||||||
+ ExtDbl.B0U, ExtDbl.B1U)),
|
|
||||||
+ (v4i32 (VMRGEW MrgWords.CVA0B0U, MrgWords.CVA1B1U))>;
|
|
||||||
}
|
|
||||||
|
|
||||||
let Predicates = [IsLittleEndian, HasVSX] in {
|
|
||||||
@@ -3300,6 +3372,20 @@ let AddedComplexity = 400 in {
|
|
||||||
def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
|
|
||||||
DblToFlt.B0, DblToFlt.B1)),
|
|
||||||
(v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
|
|
||||||
+
|
|
||||||
+ // Convert 4 doubles to a vector of ints.
|
|
||||||
+ def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.B,
|
|
||||||
+ DblToInt.C, DblToInt.D)),
|
|
||||||
+ (v4i32 (VMRGEW MrgWords.CVDBS, MrgWords.CVCAS))>;
|
|
||||||
+ def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.B,
|
|
||||||
+ DblToUInt.C, DblToUInt.D)),
|
|
||||||
+ (v4i32 (VMRGEW MrgWords.CVDBU, MrgWords.CVCAU))>;
|
|
||||||
+ def : Pat<(v4i32 (build_vector ExtDbl.A0S, ExtDbl.A1S,
|
|
||||||
+ ExtDbl.B0S, ExtDbl.B1S)),
|
|
||||||
+ (v4i32 (VMRGEW MrgWords.CVB1A1S, MrgWords.CVB0A0S))>;
|
|
||||||
+ def : Pat<(v4i32 (build_vector ExtDbl.A0U, ExtDbl.A1U,
|
|
||||||
+ ExtDbl.B0U, ExtDbl.B1U)),
|
|
||||||
+ (v4i32 (VMRGEW MrgWords.CVB1A1U, MrgWords.CVB0A0U))>;
|
|
||||||
}
|
|
||||||
|
|
||||||
let Predicates = [HasDirectMove] in {
|
|
||||||
diff --git a/test/CodeGen/PowerPC/build-vector-tests.ll b/test/CodeGen/PowerPC/build-vector-tests.ll
|
|
||||||
index 16b562b..3785b2a 100644
|
|
||||||
--- a/test/CodeGen/PowerPC/build-vector-tests.ll
|
|
||||||
+++ b/test/CodeGen/PowerPC/build-vector-tests.ll
|
|
||||||
@@ -119,8 +119,8 @@
|
|
||||||
;vector int spltCnstConvftoi() { //
|
|
||||||
; return (vector int) 4.74f; //
|
|
||||||
;} //
|
|
||||||
-;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
-;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvdpsxws //
|
|
||||||
+;// P8: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
+;// P9: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
;vector int fromRegsConvftoi(float a, float b, float c, float d) { //
|
|
||||||
; return (vector int) { a, b, c, d }; //
|
|
||||||
;} //
|
|
||||||
@@ -139,15 +139,15 @@
|
|
||||||
;vector int fromDiffMemConsDConvftoi(float *ptr) { //
|
|
||||||
; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
|
|
||||||
;} //
|
|
||||||
-;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
-;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
+;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
+;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
|
|
||||||
;// sldi 2, load, xvcvspuxws //
|
|
||||||
;vector int fromDiffMemVarAConvftoi(float *arr, int elem) { //
|
|
||||||
; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; //
|
|
||||||
;} //
|
|
||||||
-;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
-;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
+;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
+;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
|
|
||||||
;// sldi 2, 2 x load, vperm, xvcvspuxws //
|
|
||||||
;vector int fromDiffMemVarDConvftoi(float *arr, int elem) { //
|
|
||||||
@@ -168,8 +168,8 @@
|
|
||||||
;vector int spltCnstConvdtoi() { //
|
|
||||||
; return (vector int) 4.74; //
|
|
||||||
;} //
|
|
||||||
-;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
-;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
+;// P8: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
+;// P9: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
;vector int fromRegsConvdtoi(double a, double b, double c, double d) { //
|
|
||||||
; return (vector int) { a, b, c, d }; //
|
|
||||||
;} //
|
|
||||||
@@ -178,25 +178,23 @@
|
|
||||||
;vector int fromDiffConstsConvdtoi() { //
|
|
||||||
; return (vector int) { 24.46, 234., 988.19, 422.39 }; //
|
|
||||||
;} //
|
|
||||||
-;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, //
|
|
||||||
-;// xvcvspsxws //
|
|
||||||
-;// P9: 2 x lxvx, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, //
|
|
||||||
-;// xvcvspsxws //
|
|
||||||
+;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
+;// P9: 2 x lxvx, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
;vector int fromDiffMemConsAConvdtoi(double *ptr) { //
|
|
||||||
; return (vector int) { ptr[0], ptr[1], ptr[2], ptr[3] }; //
|
|
||||||
;} //
|
|
||||||
-;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
-;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
+;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
+;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
;vector int fromDiffMemConsDConvdtoi(double *ptr) { //
|
|
||||||
; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
|
|
||||||
;} //
|
|
||||||
-;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
-;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
+;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
+;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
;vector int fromDiffMemVarAConvdtoi(double *arr, int elem) { //
|
|
||||||
; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; //
|
|
||||||
;} //
|
|
||||||
-;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
-;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
|
|
||||||
+;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
+;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
|
|
||||||
;vector int fromDiffMemVarDConvdtoi(double *arr, int elem) { //
|
|
||||||
; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; //
|
|
||||||
;} //
|
|
||||||
@@ -296,8 +294,8 @@
|
|
||||||
;vector unsigned int spltCnstConvftoui() { //
|
|
||||||
; return (vector unsigned int) 4.74f; //
|
|
||||||
;} //
|
|
||||||
-;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
-;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
+;// P8: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
+;// P9: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
;vector unsigned int fromRegsConvftoui(float a, float b, float c, float d) { //
|
|
||||||
; return (vector unsigned int) { a, b, c, d }; //
|
|
||||||
;} //
|
|
||||||
@@ -316,16 +314,16 @@
|
|
||||||
;vector unsigned int fromDiffMemConsDConvftoui(float *ptr) { //
|
|
||||||
; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
|
|
||||||
;} //
|
|
||||||
-;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
-;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
+;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
+;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
|
|
||||||
;// sldi 2, load, xvcvspuxws //
|
|
||||||
;vector unsigned int fromDiffMemVarAConvftoui(float *arr, int elem) { //
|
|
||||||
; return (vector unsigned int) { arr[elem], arr[elem+1], //
|
|
||||||
; arr[elem+2], arr[elem+3] }; //
|
|
||||||
;} //
|
|
||||||
-;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
-;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
+;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
+;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
|
|
||||||
;// sldi 2, 2 x load, vperm, xvcvspuxws //
|
|
||||||
;vector unsigned int fromDiffMemVarDConvftoui(float *arr, int elem) { //
|
|
||||||
@@ -347,8 +345,8 @@
|
|
||||||
;vector unsigned int spltCnstConvdtoui() { //
|
|
||||||
; return (vector unsigned int) 4.74; //
|
|
||||||
;} //
|
|
||||||
-;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
-;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
+;// P8: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
+;// P9: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
;vector unsigned int fromRegsConvdtoui(double a, double b, //
|
|
||||||
; double c, double d) { //
|
|
||||||
; return (vector unsigned int) { a, b, c, d }; //
|
|
||||||
@@ -358,25 +356,24 @@
|
|
||||||
;vector unsigned int fromDiffConstsConvdtoui() { //
|
|
||||||
; return (vector unsigned int) { 24.46, 234., 988.19, 422.39 }; //
|
|
||||||
;} //
|
|
||||||
-;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, //
|
|
||||||
-;// xvcvspuxws //
|
|
||||||
-;// P9: 2 x lxvx, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
+;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
+;// P9: 2 x lxvx, xxmrgld, xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
;vector unsigned int fromDiffMemConsAConvdtoui(double *ptr) { //
|
|
||||||
; return (vector unsigned int) { ptr[0], ptr[1], ptr[2], ptr[3] }; //
|
|
||||||
;} //
|
|
||||||
-;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
-;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
+;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
+;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
;vector unsigned int fromDiffMemConsDConvdtoui(double *ptr) { //
|
|
||||||
; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
|
|
||||||
;} //
|
|
||||||
-;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
-;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
+;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
+;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
;vector unsigned int fromDiffMemVarAConvdtoui(double *arr, int elem) { //
|
|
||||||
; return (vector unsigned int) { arr[elem], arr[elem+1], //
|
|
||||||
; arr[elem+2], arr[elem+3] }; //
|
|
||||||
;} //
|
|
||||||
-;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
-;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
|
|
||||||
+;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
+;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
|
|
||||||
;vector unsigned int fromDiffMemVarDConvdtoui(double *arr, int elem) { //
|
|
||||||
; return (vector unsigned int) { arr[elem], arr[elem-1], //
|
|
||||||
; arr[elem-2], arr[elem-3] }; //
|
|
||||||
@@ -1253,28 +1250,24 @@ entry:
|
|
||||||
; P8LE-LABEL: fromRegsConvftoi
|
|
||||||
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
|
|
||||||
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
|
|
||||||
-; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P9BE: vmrgew v2, [[REG3]], [[REG4]]
|
|
||||||
-; P9BE: xvcvspsxws v2, v2
|
|
||||||
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
|
|
||||||
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
|
|
||||||
-; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P9LE: vmrgew v2, [[REG4]], [[REG3]]
|
|
||||||
-; P9LE: xvcvspsxws v2, v2
|
|
||||||
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
|
|
||||||
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
|
|
||||||
-; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P8BE: vmrgew v2, [[REG3]], [[REG4]]
|
|
||||||
-; P8BE: xvcvspsxws v2, v2
|
|
||||||
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
|
|
||||||
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
|
|
||||||
-; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P8LE: vmrgew v2, [[REG4]], [[REG3]]
|
|
||||||
-; P8LE: xvcvspsxws v2, v2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: norecurse nounwind readnone
|
|
||||||
@@ -1529,28 +1522,24 @@ entry:
|
|
||||||
; P8LE-LABEL: fromRegsConvdtoi
|
|
||||||
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
|
|
||||||
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
|
|
||||||
-; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P9BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P9BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P9BE: vmrgew v2, [[REG3]], [[REG4]]
|
|
||||||
-; P9BE: xvcvspsxws v2, v2
|
|
||||||
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
|
|
||||||
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
|
|
||||||
-; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P9LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P9LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P9LE: vmrgew v2, [[REG4]], [[REG3]]
|
|
||||||
-; P9LE: xvcvspsxws v2, v2
|
|
||||||
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
|
|
||||||
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
|
|
||||||
-; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P8BE: vmrgew v2, [[REG3]], [[REG4]]
|
|
||||||
-; P8BE: xvcvspsxws v2, v2
|
|
||||||
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
|
|
||||||
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
|
|
||||||
-; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P8LE: vmrgew v2, [[REG4]], [[REG3]]
|
|
||||||
-; P8LE: xvcvspsxws v2, v2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: norecurse nounwind readnone
|
|
||||||
@@ -1592,36 +1581,32 @@ entry:
|
|
||||||
; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
|
|
||||||
; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
|
|
||||||
; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
|
|
||||||
-; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
|
|
||||||
-; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
|
|
||||||
+; P9BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]]
|
|
||||||
+; P9BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]]
|
|
||||||
; P9BE: vmrgew v2, [[REG6]], [[REG5]]
|
|
||||||
-; P9BE: xvcvspsxws v2, v2
|
|
||||||
; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
|
|
||||||
; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
|
|
||||||
; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
|
|
||||||
; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]]
|
|
||||||
-; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
|
|
||||||
-; P9LE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
|
|
||||||
+; P9LE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]]
|
|
||||||
+; P9LE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]]
|
|
||||||
; P9LE: vmrgew v2, [[REG6]], [[REG5]]
|
|
||||||
-; P9LE: xvcvspsxws v2, v2
|
|
||||||
; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
|
|
||||||
; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
|
|
||||||
; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
|
|
||||||
; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
|
|
||||||
-; P8BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
|
|
||||||
-; P8BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
|
|
||||||
+; P8BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]]
|
|
||||||
+; P8BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]]
|
|
||||||
; P8BE: vmrgew v2, [[REG6]], [[REG5]]
|
|
||||||
-; P8BE: xvcvspsxws v2, v2
|
|
||||||
; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
|
|
||||||
; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
|
|
||||||
; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]]
|
|
||||||
; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]]
|
|
||||||
; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]]
|
|
||||||
; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]]
|
|
||||||
-; P8LE-DAG: xvcvdpsp [[REG7:[vs0-9]+]], [[REG5]]
|
|
||||||
-; P8LE-DAG: xvcvdpsp [[REG8:[vs0-9]+]], [[REG6]]
|
|
||||||
+; P8LE-DAG: xvcvdpsxws [[REG7:[vs0-9]+]], [[REG5]]
|
|
||||||
+; P8LE-DAG: xvcvdpsxws [[REG8:[vs0-9]+]], [[REG6]]
|
|
||||||
; P8LE: vmrgew v2, [[REG8]], [[REG7]]
|
|
||||||
-; P8LE: xvcvspsxws v2, v2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: norecurse nounwind readonly
|
|
||||||
@@ -1653,40 +1638,36 @@ entry:
|
|
||||||
; P9BE: lfd
|
|
||||||
; P9BE: xxmrghd
|
|
||||||
; P9BE: xxmrghd
|
|
||||||
-; P9BE: xvcvdpsp
|
|
||||||
-; P9BE: xvcvdpsp
|
|
||||||
-; P9BE: vmrgew
|
|
||||||
-; P9BE: xvcvspsxws v2
|
|
||||||
+; P9BE: xvcvdpsxws
|
|
||||||
+; P9BE: xvcvdpsxws
|
|
||||||
+; P9BE: vmrgew v2
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: xxmrghd
|
|
||||||
; P9LE: xxmrghd
|
|
||||||
-; P9LE: xvcvdpsp
|
|
||||||
-; P9LE: xvcvdpsp
|
|
||||||
-; P9LE: vmrgew
|
|
||||||
-; P9LE: xvcvspsxws v2
|
|
||||||
+; P9LE: xvcvdpsxws
|
|
||||||
+; P9LE: xvcvdpsxws
|
|
||||||
+; P9LE: vmrgew v2
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: xxmrghd
|
|
||||||
; P8BE: xxmrghd
|
|
||||||
-; P8BE: xvcvdpsp
|
|
||||||
-; P8BE: xvcvdpsp
|
|
||||||
-; P8BE: vmrgew
|
|
||||||
-; P8BE: xvcvspsxws v2
|
|
||||||
+; P8BE: xvcvdpsxws
|
|
||||||
+; P8BE: xvcvdpsxws
|
|
||||||
+; P8BE: vmrgew v2
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: xxmrghd
|
|
||||||
; P8LE: xxmrghd
|
|
||||||
-; P8LE: xvcvdpsp
|
|
||||||
-; P8LE: xvcvdpsp
|
|
||||||
-; P8LE: vmrgew
|
|
||||||
-; P8LE: xvcvspsxws v2
|
|
||||||
+; P8LE: xvcvdpsxws
|
|
||||||
+; P8LE: xvcvdpsxws
|
|
||||||
+; P8LE: vmrgew v2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: norecurse nounwind readonly
|
|
||||||
@@ -1726,40 +1707,36 @@ entry:
|
|
||||||
; P9BE: lfd
|
|
||||||
; P9BE: xxmrghd
|
|
||||||
; P9BE: xxmrghd
|
|
||||||
-; P9BE: xvcvdpsp
|
|
||||||
-; P9BE: xvcvdpsp
|
|
||||||
-; P9BE: vmrgew
|
|
||||||
-; P9BE: xvcvspsxws v2
|
|
||||||
+; P9BE: xvcvdpsxws
|
|
||||||
+; P9BE: xvcvdpsxws
|
|
||||||
+; P9BE: vmrgew v2
|
|
||||||
; P9LE: lfdux
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: xxmrghd
|
|
||||||
; P9LE: xxmrghd
|
|
||||||
-; P9LE: xvcvdpsp
|
|
||||||
-; P9LE: xvcvdpsp
|
|
||||||
-; P9LE: vmrgew
|
|
||||||
-; P9LE: xvcvspsxws v2
|
|
||||||
+; P9LE: xvcvdpsxws
|
|
||||||
+; P9LE: xvcvdpsxws
|
|
||||||
+; P9LE: vmrgew v2
|
|
||||||
; P8BE: lfdux
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: xxmrghd
|
|
||||||
; P8BE: xxmrghd
|
|
||||||
-; P8BE: xvcvdpsp
|
|
||||||
-; P8BE: xvcvdpsp
|
|
||||||
-; P8BE: vmrgew
|
|
||||||
-; P8BE: xvcvspsxws v2
|
|
||||||
+; P8BE: xvcvdpsxws
|
|
||||||
+; P8BE: xvcvdpsxws
|
|
||||||
+; P8BE: vmrgew v2
|
|
||||||
; P8LE: lfdux
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: xxmrghd
|
|
||||||
; P8LE: xxmrghd
|
|
||||||
-; P8LE: xvcvdpsp
|
|
||||||
-; P8LE: xvcvdpsp
|
|
||||||
-; P8LE: vmrgew
|
|
||||||
-; P8LE: xvcvspsxws v2
|
|
||||||
+; P8LE: xvcvdpsxws
|
|
||||||
+; P8LE: xvcvdpsxws
|
|
||||||
+; P8LE: vmrgew v2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: norecurse nounwind readonly
|
|
||||||
@@ -1799,40 +1776,36 @@ entry:
|
|
||||||
; P9BE: lfd
|
|
||||||
; P9BE: xxmrghd
|
|
||||||
; P9BE: xxmrghd
|
|
||||||
-; P9BE: xvcvdpsp
|
|
||||||
-; P9BE: xvcvdpsp
|
|
||||||
-; P9BE: vmrgew
|
|
||||||
-; P9BE: xvcvspsxws v2
|
|
||||||
+; P9BE: xvcvdpsxws
|
|
||||||
+; P9BE: xvcvdpsxws
|
|
||||||
+; P9BE: vmrgew v2
|
|
||||||
; P9LE: lfdux
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: xxmrghd
|
|
||||||
; P9LE: xxmrghd
|
|
||||||
-; P9LE: xvcvdpsp
|
|
||||||
-; P9LE: xvcvdpsp
|
|
||||||
-; P9LE: vmrgew
|
|
||||||
-; P9LE: xvcvspsxws v2
|
|
||||||
+; P9LE: xvcvdpsxws
|
|
||||||
+; P9LE: xvcvdpsxws
|
|
||||||
+; P9LE: vmrgew v2
|
|
||||||
; P8BE: lfdux
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: xxmrghd
|
|
||||||
; P8BE: xxmrghd
|
|
||||||
-; P8BE: xvcvdpsp
|
|
||||||
-; P8BE: xvcvdpsp
|
|
||||||
-; P8BE: vmrgew
|
|
||||||
-; P8BE: xvcvspsxws v2
|
|
||||||
+; P8BE: xvcvdpsxws
|
|
||||||
+; P8BE: xvcvdpsxws
|
|
||||||
+; P8BE: vmrgew v2
|
|
||||||
; P8LE: lfdux
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: xxmrghd
|
|
||||||
; P8LE: xxmrghd
|
|
||||||
-; P8LE: xvcvdpsp
|
|
||||||
-; P8LE: xvcvdpsp
|
|
||||||
-; P8LE: vmrgew
|
|
||||||
-; P8LE: xvcvspsxws v2
|
|
||||||
+; P8LE: xvcvdpsxws
|
|
||||||
+; P8LE: xvcvdpsxws
|
|
||||||
+; P8LE: vmrgew v2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: norecurse nounwind readnone
|
|
||||||
@@ -2413,28 +2386,24 @@ entry:
|
|
||||||
; P8LE-LABEL: fromRegsConvftoui
|
|
||||||
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
|
|
||||||
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
|
|
||||||
-; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P9BE: vmrgew v2, [[REG3]], [[REG4]]
|
|
||||||
-; P9BE: xvcvspuxws v2, v2
|
|
||||||
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
|
|
||||||
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
|
|
||||||
-; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P9LE: vmrgew v2, [[REG4]], [[REG3]]
|
|
||||||
-; P9LE: xvcvspuxws v2, v2
|
|
||||||
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
|
|
||||||
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
|
|
||||||
-; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P8BE: vmrgew v2, [[REG3]], [[REG4]]
|
|
||||||
-; P8BE: xvcvspuxws v2, v2
|
|
||||||
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
|
|
||||||
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
|
|
||||||
-; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P8LE: vmrgew v2, [[REG4]], [[REG3]]
|
|
||||||
-; P8LE: xvcvspuxws v2, v2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: norecurse nounwind readnone
|
|
||||||
@@ -2689,28 +2658,24 @@ entry:
|
|
||||||
; P8LE-LABEL: fromRegsConvdtoui
|
|
||||||
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
|
|
||||||
; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
|
|
||||||
-; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P9BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P9BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P9BE: vmrgew v2, [[REG3]], [[REG4]]
|
|
||||||
-; P9BE: xvcvspuxws v2, v2
|
|
||||||
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
|
|
||||||
; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
|
|
||||||
-; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P9LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P9LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P9LE: vmrgew v2, [[REG4]], [[REG3]]
|
|
||||||
-; P9LE: xvcvspuxws v2, v2
|
|
||||||
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
|
|
||||||
; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
|
|
||||||
-; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P8BE: vmrgew v2, [[REG3]], [[REG4]]
|
|
||||||
-; P8BE: xvcvspuxws v2, v2
|
|
||||||
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
|
|
||||||
; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
|
|
||||||
-; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
-; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
+; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
|
|
||||||
+; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
|
|
||||||
; P8LE: vmrgew v2, [[REG4]], [[REG3]]
|
|
||||||
-; P8LE: xvcvspuxws v2, v2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: norecurse nounwind readnone
|
|
||||||
@@ -2752,36 +2717,32 @@ entry:
|
|
||||||
; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
|
|
||||||
; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
|
|
||||||
; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
|
|
||||||
-; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
|
|
||||||
-; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
|
|
||||||
+; P9BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]]
|
|
||||||
+; P9BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]]
|
|
||||||
; P9BE: vmrgew v2, [[REG6]], [[REG5]]
|
|
||||||
-; P9BE: xvcvspuxws v2, v2
|
|
||||||
; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
|
|
||||||
; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
|
|
||||||
-; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
|
|
||||||
; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]]
|
|
||||||
-; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
|
|
||||||
-; P9LE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
|
|
||||||
+; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
|
|
||||||
+; P9LE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]]
|
|
||||||
+; P9LE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]]
|
|
||||||
; P9LE: vmrgew v2, [[REG6]], [[REG5]]
|
|
||||||
-; P9LE: xvcvspuxws v2, v2
|
|
||||||
; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
|
|
||||||
; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
|
|
||||||
; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
|
|
||||||
; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
|
|
||||||
-; P8BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
|
|
||||||
-; P8BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
|
|
||||||
+; P8BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]]
|
|
||||||
+; P8BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]]
|
|
||||||
; P8BE: vmrgew v2, [[REG6]], [[REG5]]
|
|
||||||
-; P8BE: xvcvspuxws v2, v2
|
|
||||||
; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
|
|
||||||
; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
|
|
||||||
; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]]
|
|
||||||
; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]]
|
|
||||||
; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]]
|
|
||||||
; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]]
|
|
||||||
-; P8LE-DAG: xvcvdpsp [[REG7:[vs0-9]+]], [[REG5]]
|
|
||||||
-; P8LE-DAG: xvcvdpsp [[REG8:[vs0-9]+]], [[REG6]]
|
|
||||||
+; P8LE-DAG: xvcvdpuxws [[REG7:[vs0-9]+]], [[REG5]]
|
|
||||||
+; P8LE-DAG: xvcvdpuxws [[REG8:[vs0-9]+]], [[REG6]]
|
|
||||||
; P8LE: vmrgew v2, [[REG8]], [[REG7]]
|
|
||||||
-; P8LE: xvcvspuxws v2, v2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: norecurse nounwind readonly
|
|
||||||
@@ -2813,40 +2774,36 @@ entry:
|
|
||||||
; P9BE: lfd
|
|
||||||
; P9BE: xxmrghd
|
|
||||||
; P9BE: xxmrghd
|
|
||||||
-; P9BE: xvcvdpsp
|
|
||||||
-; P9BE: xvcvdpsp
|
|
||||||
-; P9BE: vmrgew
|
|
||||||
-; P9BE: xvcvspuxws v2
|
|
||||||
+; P9BE: xvcvdpuxws
|
|
||||||
+; P9BE: xvcvdpuxws
|
|
||||||
+; P9BE: vmrgew v2
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: xxmrghd
|
|
||||||
; P9LE: xxmrghd
|
|
||||||
-; P9LE: xvcvdpsp
|
|
||||||
-; P9LE: xvcvdpsp
|
|
||||||
-; P9LE: vmrgew
|
|
||||||
-; P9LE: xvcvspuxws v2
|
|
||||||
+; P9LE: xvcvdpuxws
|
|
||||||
+; P9LE: xvcvdpuxws
|
|
||||||
+; P9LE: vmrgew v2
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: xxmrghd
|
|
||||||
; P8BE: xxmrghd
|
|
||||||
-; P8BE: xvcvdpsp
|
|
||||||
-; P8BE: xvcvdpsp
|
|
||||||
-; P8BE: vmrgew
|
|
||||||
-; P8BE: xvcvspuxws v2
|
|
||||||
+; P8BE: xvcvdpuxws
|
|
||||||
+; P8BE: xvcvdpuxws
|
|
||||||
+; P8BE: vmrgew v2
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: xxmrghd
|
|
||||||
; P8LE: xxmrghd
|
|
||||||
-; P8LE: xvcvdpsp
|
|
||||||
-; P8LE: xvcvdpsp
|
|
||||||
-; P8LE: vmrgew
|
|
||||||
-; P8LE: xvcvspuxws v2
|
|
||||||
+; P8LE: xvcvdpuxws
|
|
||||||
+; P8LE: xvcvdpuxws
|
|
||||||
+; P8LE: vmrgew v2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: norecurse nounwind readonly
|
|
||||||
@@ -2886,40 +2843,36 @@ entry:
|
|
||||||
; P9BE: lfd
|
|
||||||
; P9BE: xxmrghd
|
|
||||||
; P9BE: xxmrghd
|
|
||||||
-; P9BE: xvcvdpsp
|
|
||||||
-; P9BE: xvcvdpsp
|
|
||||||
-; P9BE: vmrgew
|
|
||||||
-; P9BE: xvcvspuxws v2
|
|
||||||
+; P9BE: xvcvdpuxws
|
|
||||||
+; P9BE: xvcvdpuxws
|
|
||||||
+; P9BE: vmrgew v2
|
|
||||||
; P9LE: lfdux
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: xxmrghd
|
|
||||||
; P9LE: xxmrghd
|
|
||||||
-; P9LE: xvcvdpsp
|
|
||||||
-; P9LE: xvcvdpsp
|
|
||||||
-; P9LE: vmrgew
|
|
||||||
-; P9LE: xvcvspuxws v2
|
|
||||||
+; P9LE: xvcvdpuxws
|
|
||||||
+; P9LE: xvcvdpuxws
|
|
||||||
+; P9LE: vmrgew v2
|
|
||||||
; P8BE: lfdux
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: xxmrghd
|
|
||||||
; P8BE: xxmrghd
|
|
||||||
-; P8BE: xvcvdpsp
|
|
||||||
-; P8BE: xvcvdpsp
|
|
||||||
-; P8BE: vmrgew
|
|
||||||
-; P8BE: xvcvspuxws v2
|
|
||||||
+; P8BE: xvcvdpuxws
|
|
||||||
+; P8BE: xvcvdpuxws
|
|
||||||
+; P8BE: vmrgew v2
|
|
||||||
; P8LE: lfdux
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: xxmrghd
|
|
||||||
; P8LE: xxmrghd
|
|
||||||
-; P8LE: xvcvdpsp
|
|
||||||
-; P8LE: xvcvdpsp
|
|
||||||
-; P8LE: vmrgew
|
|
||||||
-; P8LE: xvcvspuxws v2
|
|
||||||
+; P8LE: xvcvdpuxws
|
|
||||||
+; P8LE: xvcvdpuxws
|
|
||||||
+; P8LE: vmrgew v2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: norecurse nounwind readonly
|
|
||||||
@@ -2959,40 +2912,36 @@ entry:
|
|
||||||
; P9BE: lfd
|
|
||||||
; P9BE: xxmrghd
|
|
||||||
; P9BE: xxmrghd
|
|
||||||
-; P9BE: xvcvdpsp
|
|
||||||
-; P9BE: xvcvdpsp
|
|
||||||
-; P9BE: vmrgew
|
|
||||||
-; P9BE: xvcvspuxws v2
|
|
||||||
+; P9BE: xvcvdpuxws
|
|
||||||
+; P9BE: xvcvdpuxws
|
|
||||||
+; P9BE: vmrgew v2
|
|
||||||
; P9LE: lfdux
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: lfd
|
|
||||||
; P9LE: xxmrghd
|
|
||||||
; P9LE: xxmrghd
|
|
||||||
-; P9LE: xvcvdpsp
|
|
||||||
-; P9LE: xvcvdpsp
|
|
||||||
-; P9LE: vmrgew
|
|
||||||
-; P9LE: xvcvspuxws v2
|
|
||||||
+; P9LE: xvcvdpuxws
|
|
||||||
+; P9LE: xvcvdpuxws
|
|
||||||
+; P9LE: vmrgew v2
|
|
||||||
; P8BE: lfdux
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: lxsdx
|
|
||||||
; P8BE: xxmrghd
|
|
||||||
; P8BE: xxmrghd
|
|
||||||
-; P8BE: xvcvdpsp
|
|
||||||
-; P8BE: xvcvdpsp
|
|
||||||
-; P8BE: vmrgew
|
|
||||||
-; P8BE: xvcvspuxws v2
|
|
||||||
+; P8BE: xvcvdpuxws
|
|
||||||
+; P8BE: xvcvdpuxws
|
|
||||||
+; P8BE: vmrgew v2
|
|
||||||
; P8LE: lfdux
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: lxsdx
|
|
||||||
; P8LE: xxmrghd
|
|
||||||
; P8LE: xxmrghd
|
|
||||||
-; P8LE: xvcvdpsp
|
|
||||||
-; P8LE: xvcvdpsp
|
|
||||||
-; P8LE: vmrgew
|
|
||||||
-; P8LE: xvcvspuxws v2
|
|
||||||
+; P8LE: xvcvdpuxws
|
|
||||||
+; P8LE: xvcvdpuxws
|
|
||||||
+; P8LE: vmrgew v2
|
|
||||||
}
|
|
||||||
|
|
||||||
; Function Attrs: norecurse nounwind readnone
|
|
||||||
--
|
|
||||||
1.8.3.1
|
|
||||||
|
|
@ -1,360 +0,0 @@
|
|||||||
From 2ac90db51fc323d183aabe744e57f4feca6d3008 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
|
|
||||||
Date: Wed, 1 Aug 2018 11:57:58 +0000
|
|
||||||
Subject: [PATCH] [SystemZ, TableGen] Fix shift count handling
|
|
||||||
|
|
||||||
*Backport of this patch from trunk without the TableGen fix and modified
|
|
||||||
to work with LLVM 6.0 TableGen. *
|
|
||||||
|
|
||||||
The DAG combiner logic to simplify AND masks in shift counts is invalid.
|
|
||||||
While it is true that the SystemZ shift instructions ignore all but the
|
|
||||||
low 6 bits of the shift count, it is still invalid to simplify the AND
|
|
||||||
masks while the DAG still uses the standard shift operators (which are
|
|
||||||
*not* defined to match the SystemZ instruction behavior).
|
|
||||||
|
|
||||||
Instead, this patch performs equivalent operations during instruction
|
|
||||||
selection. For completely removing the AND, this now happens via
|
|
||||||
additional DAG match patterns implemented by a multi-alternative
|
|
||||||
PatFrags. For simplifying a 32-bit AND to a 16-bit AND, the existing DAG
|
|
||||||
patterns were already mostly OK, they just needed an output XForm to
|
|
||||||
actually truncate the immediate value.
|
|
||||||
|
|
||||||
Unfortunately, the latter change also exposed a bug in TableGen: it
|
|
||||||
seems XForms are currently only handled correctly for direct operands of
|
|
||||||
the outermost operation node. This patch also fixes that bug by simply
|
|
||||||
recurring through the whole pattern. This should be NFC for all other
|
|
||||||
targets.
|
|
||||||
|
|
||||||
Differential Revision: https://reviews.llvm.org/D50096
|
|
||||||
|
|
||||||
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@338521 91177308-0d34-0410-b5e6-96231b3b80d8
|
|
||||||
---
|
|
||||||
lib/Target/SystemZ/SystemZISelLowering.cpp | 78 ------------------------------
|
|
||||||
lib/Target/SystemZ/SystemZISelLowering.h | 1 -
|
|
||||||
lib/Target/SystemZ/SystemZInstrInfo.td | 49 +++++++++++++------
|
|
||||||
lib/Target/SystemZ/SystemZOperands.td | 1 +
|
|
||||||
lib/Target/SystemZ/SystemZOperators.td | 6 +++
|
|
||||||
test/CodeGen/SystemZ/shift-12.ll | 12 +++++
|
|
||||||
utils/TableGen/CodeGenDAGPatterns.cpp | 39 ++++++++-------
|
|
||||||
7 files changed, 71 insertions(+), 115 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
|
|
||||||
index adf3683..505b143 100644
|
|
||||||
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
|
|
||||||
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
|
|
||||||
@@ -522,10 +522,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
|
|
||||||
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
|
|
||||||
setTargetDAGCombine(ISD::FP_ROUND);
|
|
||||||
setTargetDAGCombine(ISD::BSWAP);
|
|
||||||
- setTargetDAGCombine(ISD::SHL);
|
|
||||||
- setTargetDAGCombine(ISD::SRA);
|
|
||||||
- setTargetDAGCombine(ISD::SRL);
|
|
||||||
- setTargetDAGCombine(ISD::ROTL);
|
|
||||||
|
|
||||||
// Handle intrinsics.
|
|
||||||
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
|
|
||||||
@@ -5405,76 +5401,6 @@ SDValue SystemZTargetLowering::combineBSWAP(
|
|
||||||
return SDValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
-SDValue SystemZTargetLowering::combineSHIFTROT(
|
|
||||||
- SDNode *N, DAGCombinerInfo &DCI) const {
|
|
||||||
-
|
|
||||||
- SelectionDAG &DAG = DCI.DAG;
|
|
||||||
-
|
|
||||||
- // Shift/rotate instructions only use the last 6 bits of the second operand
|
|
||||||
- // register. If the second operand is the result of an AND with an immediate
|
|
||||||
- // value that has its last 6 bits set, we can safely remove the AND operation.
|
|
||||||
- //
|
|
||||||
- // If the AND operation doesn't have the last 6 bits set, we can't remove it
|
|
||||||
- // entirely, but we can still truncate it to a 16-bit value. This prevents
|
|
||||||
- // us from ending up with a NILL with a signed operand, which will cause the
|
|
||||||
- // instruction printer to abort.
|
|
||||||
- SDValue N1 = N->getOperand(1);
|
|
||||||
- if (N1.getOpcode() == ISD::AND) {
|
|
||||||
- SDValue AndMaskOp = N1->getOperand(1);
|
|
||||||
- auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
|
|
||||||
-
|
|
||||||
- // The AND mask is constant
|
|
||||||
- if (AndMask) {
|
|
||||||
- auto AmtVal = AndMask->getZExtValue();
|
|
||||||
-
|
|
||||||
- // Bottom 6 bits are set
|
|
||||||
- if ((AmtVal & 0x3f) == 0x3f) {
|
|
||||||
- SDValue AndOp = N1->getOperand(0);
|
|
||||||
-
|
|
||||||
- // This is the only use, so remove the node
|
|
||||||
- if (N1.hasOneUse()) {
|
|
||||||
- // Combine the AND away
|
|
||||||
- DCI.CombineTo(N1.getNode(), AndOp);
|
|
||||||
-
|
|
||||||
- // Return N so it isn't rechecked
|
|
||||||
- return SDValue(N, 0);
|
|
||||||
-
|
|
||||||
- // The node will be reused, so create a new node for this one use
|
|
||||||
- } else {
|
|
||||||
- SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
|
|
||||||
- N->getValueType(0), N->getOperand(0),
|
|
||||||
- AndOp);
|
|
||||||
- DCI.AddToWorklist(Replace.getNode());
|
|
||||||
-
|
|
||||||
- return Replace;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- // We can't remove the AND, but we can use NILL here (normally we would
|
|
||||||
- // use NILF). Only keep the last 16 bits of the mask. The actual
|
|
||||||
- // transformation will be handled by .td definitions.
|
|
||||||
- } else if (AmtVal >> 16 != 0) {
|
|
||||||
- SDValue AndOp = N1->getOperand(0);
|
|
||||||
-
|
|
||||||
- auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
|
|
||||||
- SDLoc(AndMaskOp),
|
|
||||||
- AndMaskOp.getValueType());
|
|
||||||
-
|
|
||||||
- auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
|
|
||||||
- AndOp, NewMask);
|
|
||||||
-
|
|
||||||
- SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
|
|
||||||
- N->getValueType(0), N->getOperand(0),
|
|
||||||
- NewAnd);
|
|
||||||
- DCI.AddToWorklist(Replace.getNode());
|
|
||||||
-
|
|
||||||
- return Replace;
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- return SDValue();
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
|
|
||||||
DAGCombinerInfo &DCI) const {
|
|
||||||
switch(N->getOpcode()) {
|
|
||||||
@@ -5487,10 +5413,6 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
|
|
||||||
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
|
|
||||||
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
|
|
||||||
case ISD::BSWAP: return combineBSWAP(N, DCI);
|
|
||||||
- case ISD::SHL:
|
|
||||||
- case ISD::SRA:
|
|
||||||
- case ISD::SRL:
|
|
||||||
- case ISD::ROTL: return combineSHIFTROT(N, DCI);
|
|
||||||
}
|
|
||||||
|
|
||||||
return SDValue();
|
|
||||||
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
|
|
||||||
index 2cdc88d..1918d45 100644
|
|
||||||
--- a/lib/Target/SystemZ/SystemZISelLowering.h
|
|
||||||
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
|
|
||||||
@@ -570,7 +570,6 @@ private:
|
|
||||||
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
|
|
||||||
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
|
|
||||||
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
|
|
||||||
- SDValue combineSHIFTROT(SDNode *N, DAGCombinerInfo &DCI) const;
|
|
||||||
|
|
||||||
// If the last instruction before MBBI in MBB was some form of COMPARE,
|
|
||||||
// try to replace it with a COMPARE AND BRANCH just before MBBI.
|
|
||||||
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
|
|
||||||
index abb8045..fb40cb4 100644
|
|
||||||
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
|
|
||||||
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
|
|
||||||
@@ -1318,9 +1318,20 @@ def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))),
|
|
||||||
// Shifts
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
+// Complexity is 8 so we match it before the NILL paterns below.
|
|
||||||
+let AddedComplexity = 8 in {
|
|
||||||
+
|
|
||||||
+class ShiftAndPat <SDNode node, Instruction inst, ValueType vt> : Pat <
|
|
||||||
+ (node vt:$val, (and i32:$count, imm32bottom6set)),
|
|
||||||
+ (inst vt:$val, i32:$count, 0)
|
|
||||||
+>;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
// Logical shift left.
|
|
||||||
defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
|
|
||||||
+def : ShiftAndPat <shl, SLL, i32>;
|
|
||||||
def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
|
|
||||||
+def : ShiftAndPat <shl, SLLG, i64>;
|
|
||||||
def SLDL : BinaryRS<"sldl", 0x8D, null_frag, GR128>;
|
|
||||||
|
|
||||||
// Arithmetic shift left.
|
|
||||||
@@ -1332,7 +1343,9 @@ let Defs = [CC] in {
|
|
||||||
|
|
||||||
// Logical shift right.
|
|
||||||
defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
|
|
||||||
+def : ShiftAndPat <srl, SRL, i32>;
|
|
||||||
def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
|
|
||||||
+def : ShiftAndPat <srl, SRLG, i64>;
|
|
||||||
def SRDL : BinaryRS<"srdl", 0x8C, null_frag, GR128>;
|
|
||||||
|
|
||||||
// Arithmetic shift right.
|
|
||||||
@@ -1341,10 +1354,14 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
|
|
||||||
def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>;
|
|
||||||
def SRDA : BinaryRS<"srda", 0x8E, null_frag, GR128>;
|
|
||||||
}
|
|
||||||
+def : ShiftAndPat <sra, SRA, i32>;
|
|
||||||
+def : ShiftAndPat <sra, SRAG, i64>;
|
|
||||||
|
|
||||||
// Rotate left.
|
|
||||||
def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>;
|
|
||||||
+def : ShiftAndPat <rotl, RLL, i32>;
|
|
||||||
def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
|
|
||||||
+def : ShiftAndPat <rotl, RLLG, i64>;
|
|
||||||
|
|
||||||
// Rotate second operand left and inserted selected bits into first operand.
|
|
||||||
// These can act like 32-bit operands provided that the constant start and
|
|
||||||
@@ -2154,29 +2171,29 @@ def : Pat<(and (xor GR64:$x, (i64 -1)), GR64:$y),
|
|
||||||
// Complexity is added so that we match this before we match NILF on the AND
|
|
||||||
// operation alone.
|
|
||||||
let AddedComplexity = 4 in {
|
|
||||||
- def : Pat<(shl GR32:$val, (and GR32:$shift, uimm32:$imm)),
|
|
||||||
- (SLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
|
|
||||||
+ def : Pat<(shl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
|
|
||||||
+ (SLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
|
|
||||||
|
|
||||||
- def : Pat<(sra GR32:$val, (and GR32:$shift, uimm32:$imm)),
|
|
||||||
- (SRA GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
|
|
||||||
+ def : Pat<(sra GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
|
|
||||||
+ (SRA GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
|
|
||||||
|
|
||||||
- def : Pat<(srl GR32:$val, (and GR32:$shift, uimm32:$imm)),
|
|
||||||
- (SRL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
|
|
||||||
+ def : Pat<(srl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
|
|
||||||
+ (SRL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
|
|
||||||
|
|
||||||
- def : Pat<(shl GR64:$val, (and GR32:$shift, uimm32:$imm)),
|
|
||||||
- (SLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
|
|
||||||
+ def : Pat<(shl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
|
|
||||||
+ (SLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
|
|
||||||
|
|
||||||
- def : Pat<(sra GR64:$val, (and GR32:$shift, uimm32:$imm)),
|
|
||||||
- (SRAG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
|
|
||||||
+ def : Pat<(sra GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
|
|
||||||
+ (SRAG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
|
|
||||||
|
|
||||||
- def : Pat<(srl GR64:$val, (and GR32:$shift, uimm32:$imm)),
|
|
||||||
- (SRLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
|
|
||||||
+ def : Pat<(srl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
|
|
||||||
+ (SRLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
|
|
||||||
|
|
||||||
- def : Pat<(rotl GR32:$val, (and GR32:$shift, uimm32:$imm)),
|
|
||||||
- (RLL GR32:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
|
|
||||||
+ def : Pat<(rotl GR32:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
|
|
||||||
+ (RLL GR32:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
|
|
||||||
|
|
||||||
- def : Pat<(rotl GR64:$val, (and GR32:$shift, uimm32:$imm)),
|
|
||||||
- (RLLG GR64:$val, (NILL GR32:$shift, uimm32:$imm), 0)>;
|
|
||||||
+ def : Pat<(rotl GR64:$val, (and GR32:$shift, imm32zx16trunc:$imm)),
|
|
||||||
+ (RLLG GR64:$val, (NILL GR32:$shift, imm32zx16trunc:$imm), 0)>;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Peepholes for turning scalar operations into block operations.
|
|
||||||
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
|
|
||||||
index 7136121..61a1124 100644
|
|
||||||
--- a/lib/Target/SystemZ/SystemZOperands.td
|
|
||||||
+++ b/lib/Target/SystemZ/SystemZOperands.td
|
|
||||||
@@ -341,6 +341,7 @@ def imm32zx16 : Immediate<i32, [{
|
|
||||||
}], UIMM16, "U16Imm">;
|
|
||||||
|
|
||||||
def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
|
|
||||||
+def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
|
|
||||||
|
|
||||||
// Full 32-bit immediates. we need both signed and unsigned versions
|
|
||||||
// because the assembler is picky. E.g. AFI requires signed operands
|
|
||||||
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
|
|
||||||
index d067f33..269c3d0 100644
|
|
||||||
--- a/lib/Target/SystemZ/SystemZOperators.td
|
|
||||||
+++ b/lib/Target/SystemZ/SystemZOperators.td
|
|
||||||
@@ -611,6 +611,12 @@ class storei<SDPatternOperator operator, SDPatternOperator store = store>
|
|
||||||
: PatFrag<(ops node:$addr),
|
|
||||||
(store (operator), node:$addr)>;
|
|
||||||
|
|
||||||
+// Create a shift operator that optionally ignores an AND of the
|
|
||||||
+// shift count with an immediate if the bottom 6 bits are all set.
|
|
||||||
+def imm32bottom6set : PatLeaf<(i32 imm), [{
|
|
||||||
+ return (N->getZExtValue() & 0x3f) == 0x3f;
|
|
||||||
+}]>;
|
|
||||||
+
|
|
||||||
// Vector representation of all-zeros and all-ones.
|
|
||||||
def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
|
|
||||||
def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
|
|
||||||
diff --git a/test/CodeGen/SystemZ/shift-12.ll b/test/CodeGen/SystemZ/shift-12.ll
|
|
||||||
index 4ebc42b..53d3d53 100644
|
|
||||||
--- a/test/CodeGen/SystemZ/shift-12.ll
|
|
||||||
+++ b/test/CodeGen/SystemZ/shift-12.ll
|
|
||||||
@@ -104,3 +104,15 @@ define i32 @f10(i32 %a, i32 %sh) {
|
|
||||||
%reuse = add i32 %and, %shift
|
|
||||||
ret i32 %reuse
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+; Test that AND is not removed for i128 (which calls __ashlti3)
|
|
||||||
+define i128 @f11(i128 %a, i32 %sh) {
|
|
||||||
+; CHECK-LABEL: f11:
|
|
||||||
+; CHECK: risbg %r4, %r4, 57, 191, 0
|
|
||||||
+; CHECK: brasl %r14, __ashlti3@PLT
|
|
||||||
+ %and = and i32 %sh, 127
|
|
||||||
+ %ext = zext i32 %and to i128
|
|
||||||
+ %shift = shl i128 %a, %ext
|
|
||||||
+ ret i128 %shift
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
|
|
||||||
index 493066e..74af62b 100644
|
|
||||||
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
|
|
||||||
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
|
|
||||||
@@ -3919,6 +3919,24 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
+// Promote xform function to be an explicit node wherever set.
|
|
||||||
+static TreePatternNode* PromoteXForms(TreePatternNode* N) {
|
|
||||||
+ if (Record *Xform = N->getTransformFn()) {
|
|
||||||
+ N->setTransformFn(nullptr);
|
|
||||||
+ std::vector<TreePatternNode*> Children;
|
|
||||||
+ Children.push_back(PromoteXForms(N));
|
|
||||||
+ return new TreePatternNode(Xform, std::move(Children),
|
|
||||||
+ N->getNumTypes());
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (!N->isLeaf())
|
|
||||||
+ for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
|
|
||||||
+ TreePatternNode* Child = N->getChild(i);
|
|
||||||
+ N->setChild(i, std::move(PromoteXForms(Child)));
|
|
||||||
+ }
|
|
||||||
+ return N;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
void CodeGenDAGPatterns::ParsePatterns() {
|
|
||||||
std::vector<Record*> Patterns = Records.getAllDerivedDefinitions("Pattern");
|
|
||||||
|
|
||||||
@@ -4009,26 +4027,7 @@ void CodeGenDAGPatterns::ParsePatterns() {
|
|
||||||
InstImpResults);
|
|
||||||
|
|
||||||
// Promote the xform function to be an explicit node if set.
|
|
||||||
- TreePatternNode *DstPattern = Result.getOnlyTree();
|
|
||||||
- std::vector<TreePatternNode*> ResultNodeOperands;
|
|
||||||
- for (unsigned ii = 0, ee = DstPattern->getNumChildren(); ii != ee; ++ii) {
|
|
||||||
- TreePatternNode *OpNode = DstPattern->getChild(ii);
|
|
||||||
- if (Record *Xform = OpNode->getTransformFn()) {
|
|
||||||
- OpNode->setTransformFn(nullptr);
|
|
||||||
- std::vector<TreePatternNode*> Children;
|
|
||||||
- Children.push_back(OpNode);
|
|
||||||
- OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
|
|
||||||
- }
|
|
||||||
- ResultNodeOperands.push_back(OpNode);
|
|
||||||
- }
|
|
||||||
- DstPattern = Result.getOnlyTree();
|
|
||||||
- if (!DstPattern->isLeaf())
|
|
||||||
- DstPattern = new TreePatternNode(DstPattern->getOperator(),
|
|
||||||
- ResultNodeOperands,
|
|
||||||
- DstPattern->getNumTypes());
|
|
||||||
-
|
|
||||||
- for (unsigned i = 0, e = Result.getOnlyTree()->getNumTypes(); i != e; ++i)
|
|
||||||
- DstPattern->setType(i, Result.getOnlyTree()->getExtType(i));
|
|
||||||
+ TreePatternNode* DstPattern = PromoteXForms(Result.getOnlyTree());
|
|
||||||
|
|
||||||
TreePattern Temp(Result.getRecord(), DstPattern, false, *this);
|
|
||||||
Temp.InferAllTypes();
|
|
||||||
--
|
|
||||||
1.8.3.1
|
|
||||||
|
|
@ -1,12 +0,0 @@
|
|||||||
diff -up llvm-3.7.1.src/cmake/config-ix.cmake.s390 llvm-3.7.1.src/cmake/config-ix.cmake
|
|
||||||
--- llvm-3.7.1.src/cmake/config-ix.cmake.s390 2016-02-16 12:27:36.000000000 +0100
|
|
||||||
+++ llvm-3.7.1.src/cmake/config-ix.cmake 2016-02-16 12:27:52.000000000 +0100
|
|
||||||
@@ -356,6 +356,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "msp430
|
|
||||||
set(LLVM_NATIVE_ARCH MSP430)
|
|
||||||
elseif (LLVM_NATIVE_ARCH MATCHES "hexagon")
|
|
||||||
set(LLVM_NATIVE_ARCH Hexagon)
|
|
||||||
+elseif (LLVM_NATIVE_ARCH MATCHES "s390")
|
|
||||||
+ set(LLVM_NATIVE_ARCH SystemZ)
|
|
||||||
elseif (LLVM_NATIVE_ARCH MATCHES "s390x")
|
|
||||||
set(LLVM_NATIVE_ARCH SystemZ)
|
|
||||||
elseif (LLVM_NATIVE_ARCH MATCHES "wasm32")
|
|
42
llvm.spec
42
llvm.spec
@ -8,9 +8,26 @@
|
|||||||
%global compat_build 0
|
%global compat_build 0
|
||||||
|
|
||||||
%global llvm_bindir %{_libdir}/%{name}
|
%global llvm_bindir %{_libdir}/%{name}
|
||||||
%global maj_ver 6
|
%global maj_ver 7
|
||||||
%global min_ver 0
|
%global min_ver 0
|
||||||
%global patch_ver 1
|
%global patch_ver 0
|
||||||
|
%global rc_ver 1
|
||||||
|
|
||||||
|
%ifarch s390x
|
||||||
|
%global llvm_targets SystemZ;BPF
|
||||||
|
%endif
|
||||||
|
%ifarch ppc64 ppc64le
|
||||||
|
%global llvm_targets PowerPC;AMDGPU;BPF
|
||||||
|
%endif
|
||||||
|
%ifarch %ix86 x86_64
|
||||||
|
%global llvm_targets X86;AMDGPU;NVPTX;BPF
|
||||||
|
%endif
|
||||||
|
%ifarch aarch64
|
||||||
|
%global llvm_targets AArch64;AMDGPU;BPF
|
||||||
|
%endif
|
||||||
|
%ifarch %{arm}
|
||||||
|
%global llvm_targets ARM;BPF
|
||||||
|
%endif
|
||||||
|
|
||||||
%if 0%{?compat_build}
|
%if 0%{?compat_build}
|
||||||
%global pkg_name llvm%{maj_ver}.%{min_ver}
|
%global pkg_name llvm%{maj_ver}.%{min_ver}
|
||||||
@ -26,26 +43,24 @@
|
|||||||
%else
|
%else
|
||||||
%global pkg_name llvm
|
%global pkg_name llvm
|
||||||
%global install_prefix /usr
|
%global install_prefix /usr
|
||||||
|
%global install_libdir %{_libdir}
|
||||||
|
%global pkg_libdir %{install_libdir}
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
Name: %{pkg_name}
|
Name: %{pkg_name}
|
||||||
Version: %{maj_ver}.%{min_ver}.%{patch_ver}
|
Version: %{maj_ver}.%{min_ver}.%{patch_ver}
|
||||||
Release: 6%{?dist}
|
Release: 0.1.rc%{rc_ver}%{?dist}
|
||||||
Summary: The Low Level Virtual Machine
|
Summary: The Low Level Virtual Machine
|
||||||
|
|
||||||
License: NCSA
|
License: NCSA
|
||||||
URL: http://llvm.org
|
URL: http://llvm.org
|
||||||
Source0: http://llvm.org/releases/%{version}/llvm-%{version}%{?rc_ver:rc%{rc_ver}}.src.tar.xz
|
Source0: http://%{?rc_ver:pre}releases.llvm.org/%{version}/%{?rc_ver:rc%{rc_ver}}/llvm-%{version}%{?rc_ver:rc%{rc_ver}}.src.tar.xz
|
||||||
|
|
||||||
# recognize s390 as SystemZ when configuring build
|
# recognize s390 as SystemZ when configuring build
|
||||||
Patch0: llvm-3.7.1-cmake-s390.patch
|
|
||||||
Patch3: 0001-CMake-Split-static-library-exports-into-their-own-ex.patch
|
Patch3: 0001-CMake-Split-static-library-exports-into-their-own-ex.patch
|
||||||
Patch7: 0001-Filter-out-cxxflags-not-supported-by-clang.patch
|
Patch7: 0001-Filter-out-cxxflags-not-supported-by-clang.patch
|
||||||
Patch9: 0001-Export-LLVM_DYLIB_COMPONENTS-in-LLVMConfig.cmake.patch
|
|
||||||
|
|
||||||
Patch10: 0001-Don-t-run-BV-DAG-Combine-before-legalization-if-it-a.patch
|
Patch10: 0001-Don-t-run-BV-DAG-Combine-before-legalization-if-it-a.patch
|
||||||
Patch11: 0001-PowerPC-Do-not-round-values-prior-to-converting-to-i.patch
|
|
||||||
Patch12: 0001-SystemZ-TableGen-Fix-shift-count-handling.patch
|
|
||||||
|
|
||||||
BuildRequires: gcc
|
BuildRequires: gcc
|
||||||
BuildRequires: gcc-c++
|
BuildRequires: gcc-c++
|
||||||
@ -135,7 +150,7 @@ cd _build
|
|||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
\
|
\
|
||||||
-DLLVM_TARGETS_TO_BUILD="X86;AMDGPU;PowerPC;NVPTX;SystemZ;AArch64;ARM;Mips;BPF" \
|
-DLLVM_TARGETS_TO_BUILD="%{llvm_targets}" \
|
||||||
-DLLVM_ENABLE_LIBCXX:BOOL=OFF \
|
-DLLVM_ENABLE_LIBCXX:BOOL=OFF \
|
||||||
-DLLVM_ENABLE_ZLIB:BOOL=ON \
|
-DLLVM_ENABLE_ZLIB:BOOL=ON \
|
||||||
-DLLVM_ENABLE_FFI:BOOL=ON \
|
-DLLVM_ENABLE_FFI:BOOL=ON \
|
||||||
@ -185,6 +200,9 @@ ninja -v
|
|||||||
cd _build
|
cd _build
|
||||||
ninja -v install
|
ninja -v install
|
||||||
|
|
||||||
|
# FIXME: Patch upstream to not install this
|
||||||
|
rm %{buildroot}%{install_libdir}/TestPlugin.so
|
||||||
|
|
||||||
%if !0%{?compat_build}
|
%if !0%{?compat_build}
|
||||||
# fix multi-lib
|
# fix multi-lib
|
||||||
mv -v %{buildroot}%{_bindir}/llvm-config{,-%{__isa_bits}}
|
mv -v %{buildroot}%{_bindir}/llvm-config{,-%{__isa_bits}}
|
||||||
@ -261,6 +279,7 @@ fi
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%files libs
|
%files libs
|
||||||
|
%{pkg_libdir}/libLLVM-%{maj_ver}.so
|
||||||
%if !0%{?compat_build}
|
%if !0%{?compat_build}
|
||||||
%{_libdir}/BugpointPasses.so
|
%{_libdir}/BugpointPasses.so
|
||||||
%{_libdir}/LLVMHello.so
|
%{_libdir}/LLVMHello.so
|
||||||
@ -314,6 +333,11 @@ fi
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Fri Aug 10 2018 Tom Stellard <tstellar@redhat.com> - 7.0.0-0.1.rc1
|
||||||
|
- 7.0.0-rc1 Release
|
||||||
|
- Reduce number of enabled targets on all arches.
|
||||||
|
- Drop s390 detection patch, LLVM does not support s390 codegen.
|
||||||
|
|
||||||
* Mon Aug 06 2018 Tom Stellard <tstellar@redhat.com> - 6.0.1-6
|
* Mon Aug 06 2018 Tom Stellard <tstellar@redhat.com> - 6.0.1-6
|
||||||
- Backport some fixes needed by mesa and rust
|
- Backport some fixes needed by mesa and rust
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user