10b57cec5SDimitry Andric //===-- PPCTargetTransformInfo.cpp - PPC specific TTI ---------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric #include "PPCTargetTransformInfo.h"
100b57cec5SDimitry Andric #include "llvm/Analysis/CodeMetrics.h"
11e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
120b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
130b57cec5SDimitry Andric #include "llvm/CodeGen/BasicTTIImpl.h"
140b57cec5SDimitry Andric #include "llvm/CodeGen/CostTable.h"
150b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h"
17e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsPowerPC.h"
18bdd1243dSDimitry Andric #include "llvm/IR/ProfDataUtils.h"
190b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
200b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
21e8d8bef9SDimitry Andric #include "llvm/Transforms/InstCombine/InstCombiner.h"
22e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
23bdd1243dSDimitry Andric #include <optional>
24e8d8bef9SDimitry Andric
250b57cec5SDimitry Andric using namespace llvm;
260b57cec5SDimitry Andric
270b57cec5SDimitry Andric #define DEBUG_TYPE "ppctti"
280b57cec5SDimitry Andric
295f757f3fSDimitry Andric static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost",
305f757f3fSDimitry Andric cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden);
315f757f3fSDimitry Andric
320b57cec5SDimitry Andric static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
330b57cec5SDimitry Andric cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
340b57cec5SDimitry Andric
350b57cec5SDimitry Andric static cl::opt<bool>
360b57cec5SDimitry Andric EnablePPCColdCC("ppc-enable-coldcc", cl::Hidden, cl::init(false),
370b57cec5SDimitry Andric cl::desc("Enable using coldcc calling conv for cold "
380b57cec5SDimitry Andric "internal functions"));
390b57cec5SDimitry Andric
405ffd83dbSDimitry Andric static cl::opt<bool>
415ffd83dbSDimitry Andric LsrNoInsnsCost("ppc-lsr-no-insns-cost", cl::Hidden, cl::init(false),
425ffd83dbSDimitry Andric cl::desc("Do not add instruction count to lsr cost model"));
435ffd83dbSDimitry Andric
440b57cec5SDimitry Andric // The latency of mtctr is only justified if there are more than 4
450b57cec5SDimitry Andric // comparisons that will be removed as a result.
460b57cec5SDimitry Andric static cl::opt<unsigned>
470b57cec5SDimitry Andric SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
480b57cec5SDimitry Andric cl::desc("Loops with a constant trip count smaller than "
490b57cec5SDimitry Andric "this value will not use the count register."));
500b57cec5SDimitry Andric
510b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
520b57cec5SDimitry Andric //
530b57cec5SDimitry Andric // PPC cost model.
540b57cec5SDimitry Andric //
550b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
560b57cec5SDimitry Andric
570b57cec5SDimitry Andric TargetTransformInfo::PopcntSupportKind
getPopcntSupport(unsigned TyWidth)580b57cec5SDimitry Andric PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
590b57cec5SDimitry Andric assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
600b57cec5SDimitry Andric if (ST->hasPOPCNTD() != PPCSubtarget::POPCNTD_Unavailable && TyWidth <= 64)
610b57cec5SDimitry Andric return ST->hasPOPCNTD() == PPCSubtarget::POPCNTD_Slow ?
620b57cec5SDimitry Andric TTI::PSK_SlowHardware : TTI::PSK_FastHardware;
630b57cec5SDimitry Andric return TTI::PSK_Software;
640b57cec5SDimitry Andric }
650b57cec5SDimitry Andric
66bdd1243dSDimitry Andric std::optional<Instruction *>
instCombineIntrinsic(InstCombiner & IC,IntrinsicInst & II) const67e8d8bef9SDimitry Andric PPCTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
68e8d8bef9SDimitry Andric Intrinsic::ID IID = II.getIntrinsicID();
69e8d8bef9SDimitry Andric switch (IID) {
70e8d8bef9SDimitry Andric default:
71e8d8bef9SDimitry Andric break;
72e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_lvx:
73e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_lvxl:
74e8d8bef9SDimitry Andric // Turn PPC lvx -> load if the pointer is known aligned.
75e8d8bef9SDimitry Andric if (getOrEnforceKnownAlignment(
76e8d8bef9SDimitry Andric II.getArgOperand(0), Align(16), IC.getDataLayout(), &II,
77e8d8bef9SDimitry Andric &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
785f757f3fSDimitry Andric Value *Ptr = II.getArgOperand(0);
79e8d8bef9SDimitry Andric return new LoadInst(II.getType(), Ptr, "", false, Align(16));
80e8d8bef9SDimitry Andric }
81e8d8bef9SDimitry Andric break;
82e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_lxvw4x:
83e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_lxvd2x: {
84e8d8bef9SDimitry Andric // Turn PPC VSX loads into normal loads.
855f757f3fSDimitry Andric Value *Ptr = II.getArgOperand(0);
86e8d8bef9SDimitry Andric return new LoadInst(II.getType(), Ptr, Twine(""), false, Align(1));
87e8d8bef9SDimitry Andric }
88e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_stvx:
89e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_stvxl:
90e8d8bef9SDimitry Andric // Turn stvx -> store if the pointer is known aligned.
91e8d8bef9SDimitry Andric if (getOrEnforceKnownAlignment(
92e8d8bef9SDimitry Andric II.getArgOperand(1), Align(16), IC.getDataLayout(), &II,
93e8d8bef9SDimitry Andric &IC.getAssumptionCache(), &IC.getDominatorTree()) >= 16) {
945f757f3fSDimitry Andric Value *Ptr = II.getArgOperand(1);
95e8d8bef9SDimitry Andric return new StoreInst(II.getArgOperand(0), Ptr, false, Align(16));
96e8d8bef9SDimitry Andric }
97e8d8bef9SDimitry Andric break;
98e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_stxvw4x:
99e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_stxvd2x: {
100e8d8bef9SDimitry Andric // Turn PPC VSX stores into normal stores.
1015f757f3fSDimitry Andric Value *Ptr = II.getArgOperand(1);
102e8d8bef9SDimitry Andric return new StoreInst(II.getArgOperand(0), Ptr, false, Align(1));
103e8d8bef9SDimitry Andric }
104e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_vperm:
105e8d8bef9SDimitry Andric // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
106e8d8bef9SDimitry Andric // Note that ppc_altivec_vperm has a big-endian bias, so when creating
107e8d8bef9SDimitry Andric // a vectorshuffle for little endian, we must undo the transformation
108e8d8bef9SDimitry Andric // performed on vec_perm in altivec.h. That is, we must complement
109e8d8bef9SDimitry Andric // the permutation mask with respect to 31 and reverse the order of
110e8d8bef9SDimitry Andric // V1 and V2.
111e8d8bef9SDimitry Andric if (Constant *Mask = dyn_cast<Constant>(II.getArgOperand(2))) {
112e8d8bef9SDimitry Andric assert(cast<FixedVectorType>(Mask->getType())->getNumElements() == 16 &&
113e8d8bef9SDimitry Andric "Bad type for intrinsic!");
114e8d8bef9SDimitry Andric
115e8d8bef9SDimitry Andric // Check that all of the elements are integer constants or undefs.
116e8d8bef9SDimitry Andric bool AllEltsOk = true;
117e8d8bef9SDimitry Andric for (unsigned i = 0; i != 16; ++i) {
118e8d8bef9SDimitry Andric Constant *Elt = Mask->getAggregateElement(i);
119e8d8bef9SDimitry Andric if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
120e8d8bef9SDimitry Andric AllEltsOk = false;
121e8d8bef9SDimitry Andric break;
122e8d8bef9SDimitry Andric }
123e8d8bef9SDimitry Andric }
124e8d8bef9SDimitry Andric
125e8d8bef9SDimitry Andric if (AllEltsOk) {
126e8d8bef9SDimitry Andric // Cast the input vectors to byte vectors.
127e8d8bef9SDimitry Andric Value *Op0 =
128e8d8bef9SDimitry Andric IC.Builder.CreateBitCast(II.getArgOperand(0), Mask->getType());
129e8d8bef9SDimitry Andric Value *Op1 =
130e8d8bef9SDimitry Andric IC.Builder.CreateBitCast(II.getArgOperand(1), Mask->getType());
131e8d8bef9SDimitry Andric Value *Result = UndefValue::get(Op0->getType());
132e8d8bef9SDimitry Andric
133e8d8bef9SDimitry Andric // Only extract each element once.
134e8d8bef9SDimitry Andric Value *ExtractedElts[32];
135e8d8bef9SDimitry Andric memset(ExtractedElts, 0, sizeof(ExtractedElts));
136e8d8bef9SDimitry Andric
137e8d8bef9SDimitry Andric for (unsigned i = 0; i != 16; ++i) {
138e8d8bef9SDimitry Andric if (isa<UndefValue>(Mask->getAggregateElement(i)))
139e8d8bef9SDimitry Andric continue;
140e8d8bef9SDimitry Andric unsigned Idx =
141e8d8bef9SDimitry Andric cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
142e8d8bef9SDimitry Andric Idx &= 31; // Match the hardware behavior.
143e8d8bef9SDimitry Andric if (DL.isLittleEndian())
144e8d8bef9SDimitry Andric Idx = 31 - Idx;
145e8d8bef9SDimitry Andric
146e8d8bef9SDimitry Andric if (!ExtractedElts[Idx]) {
147e8d8bef9SDimitry Andric Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
148e8d8bef9SDimitry Andric Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
149e8d8bef9SDimitry Andric ExtractedElts[Idx] = IC.Builder.CreateExtractElement(
150e8d8bef9SDimitry Andric Idx < 16 ? Op0ToUse : Op1ToUse, IC.Builder.getInt32(Idx & 15));
151e8d8bef9SDimitry Andric }
152e8d8bef9SDimitry Andric
153e8d8bef9SDimitry Andric // Insert this value into the result vector.
154e8d8bef9SDimitry Andric Result = IC.Builder.CreateInsertElement(Result, ExtractedElts[Idx],
155e8d8bef9SDimitry Andric IC.Builder.getInt32(i));
156e8d8bef9SDimitry Andric }
157e8d8bef9SDimitry Andric return CastInst::Create(Instruction::BitCast, Result, II.getType());
158e8d8bef9SDimitry Andric }
159e8d8bef9SDimitry Andric }
160e8d8bef9SDimitry Andric break;
161e8d8bef9SDimitry Andric }
162bdd1243dSDimitry Andric return std::nullopt;
163e8d8bef9SDimitry Andric }
164e8d8bef9SDimitry Andric
getIntImmCost(const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)165fe6060f1SDimitry Andric InstructionCost PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
1665ffd83dbSDimitry Andric TTI::TargetCostKind CostKind) {
1670b57cec5SDimitry Andric if (DisablePPCConstHoist)
1685ffd83dbSDimitry Andric return BaseT::getIntImmCost(Imm, Ty, CostKind);
1690b57cec5SDimitry Andric
1700b57cec5SDimitry Andric assert(Ty->isIntegerTy());
1710b57cec5SDimitry Andric
1720b57cec5SDimitry Andric unsigned BitSize = Ty->getPrimitiveSizeInBits();
1730b57cec5SDimitry Andric if (BitSize == 0)
1740b57cec5SDimitry Andric return ~0U;
1750b57cec5SDimitry Andric
1760b57cec5SDimitry Andric if (Imm == 0)
1770b57cec5SDimitry Andric return TTI::TCC_Free;
1780b57cec5SDimitry Andric
1790b57cec5SDimitry Andric if (Imm.getBitWidth() <= 64) {
1800b57cec5SDimitry Andric if (isInt<16>(Imm.getSExtValue()))
1810b57cec5SDimitry Andric return TTI::TCC_Basic;
1820b57cec5SDimitry Andric
1830b57cec5SDimitry Andric if (isInt<32>(Imm.getSExtValue())) {
1840b57cec5SDimitry Andric // A constant that can be materialized using lis.
1850b57cec5SDimitry Andric if ((Imm.getZExtValue() & 0xFFFF) == 0)
1860b57cec5SDimitry Andric return TTI::TCC_Basic;
1870b57cec5SDimitry Andric
1880b57cec5SDimitry Andric return 2 * TTI::TCC_Basic;
1890b57cec5SDimitry Andric }
1900b57cec5SDimitry Andric }
1910b57cec5SDimitry Andric
1920b57cec5SDimitry Andric return 4 * TTI::TCC_Basic;
1930b57cec5SDimitry Andric }
1940b57cec5SDimitry Andric
getIntImmCostIntrin(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)195fe6060f1SDimitry Andric InstructionCost PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1965ffd83dbSDimitry Andric const APInt &Imm, Type *Ty,
1975ffd83dbSDimitry Andric TTI::TargetCostKind CostKind) {
1980b57cec5SDimitry Andric if (DisablePPCConstHoist)
1995ffd83dbSDimitry Andric return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2000b57cec5SDimitry Andric
2010b57cec5SDimitry Andric assert(Ty->isIntegerTy());
2020b57cec5SDimitry Andric
2030b57cec5SDimitry Andric unsigned BitSize = Ty->getPrimitiveSizeInBits();
2040b57cec5SDimitry Andric if (BitSize == 0)
2050b57cec5SDimitry Andric return ~0U;
2060b57cec5SDimitry Andric
2070b57cec5SDimitry Andric switch (IID) {
2080b57cec5SDimitry Andric default:
2090b57cec5SDimitry Andric return TTI::TCC_Free;
2100b57cec5SDimitry Andric case Intrinsic::sadd_with_overflow:
2110b57cec5SDimitry Andric case Intrinsic::uadd_with_overflow:
2120b57cec5SDimitry Andric case Intrinsic::ssub_with_overflow:
2130b57cec5SDimitry Andric case Intrinsic::usub_with_overflow:
2140b57cec5SDimitry Andric if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
2150b57cec5SDimitry Andric return TTI::TCC_Free;
2160b57cec5SDimitry Andric break;
2170b57cec5SDimitry Andric case Intrinsic::experimental_stackmap:
2180b57cec5SDimitry Andric if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
2190b57cec5SDimitry Andric return TTI::TCC_Free;
2200b57cec5SDimitry Andric break;
2210b57cec5SDimitry Andric case Intrinsic::experimental_patchpoint_void:
2220fca6ea1SDimitry Andric case Intrinsic::experimental_patchpoint:
2230b57cec5SDimitry Andric if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
2240b57cec5SDimitry Andric return TTI::TCC_Free;
2250b57cec5SDimitry Andric break;
2260b57cec5SDimitry Andric }
2275ffd83dbSDimitry Andric return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
2280b57cec5SDimitry Andric }
2290b57cec5SDimitry Andric
getIntImmCostInst(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind,Instruction * Inst)230fe6060f1SDimitry Andric InstructionCost PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
2315ffd83dbSDimitry Andric const APInt &Imm, Type *Ty,
232e8d8bef9SDimitry Andric TTI::TargetCostKind CostKind,
233e8d8bef9SDimitry Andric Instruction *Inst) {
2340b57cec5SDimitry Andric if (DisablePPCConstHoist)
235e8d8bef9SDimitry Andric return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind, Inst);
2360b57cec5SDimitry Andric
2370b57cec5SDimitry Andric assert(Ty->isIntegerTy());
2380b57cec5SDimitry Andric
2390b57cec5SDimitry Andric unsigned BitSize = Ty->getPrimitiveSizeInBits();
2400b57cec5SDimitry Andric if (BitSize == 0)
2410b57cec5SDimitry Andric return ~0U;
2420b57cec5SDimitry Andric
2430b57cec5SDimitry Andric unsigned ImmIdx = ~0U;
2440b57cec5SDimitry Andric bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
2450b57cec5SDimitry Andric ZeroFree = false;
2460b57cec5SDimitry Andric switch (Opcode) {
2470b57cec5SDimitry Andric default:
2480b57cec5SDimitry Andric return TTI::TCC_Free;
2490b57cec5SDimitry Andric case Instruction::GetElementPtr:
2500b57cec5SDimitry Andric // Always hoist the base address of a GetElementPtr. This prevents the
2510b57cec5SDimitry Andric // creation of new constants for every base constant that gets constant
2520b57cec5SDimitry Andric // folded with the offset.
2530b57cec5SDimitry Andric if (Idx == 0)
2540b57cec5SDimitry Andric return 2 * TTI::TCC_Basic;
2550b57cec5SDimitry Andric return TTI::TCC_Free;
2560b57cec5SDimitry Andric case Instruction::And:
2570b57cec5SDimitry Andric RunFree = true; // (for the rotate-and-mask instructions)
258bdd1243dSDimitry Andric [[fallthrough]];
2590b57cec5SDimitry Andric case Instruction::Add:
2600b57cec5SDimitry Andric case Instruction::Or:
2610b57cec5SDimitry Andric case Instruction::Xor:
2620b57cec5SDimitry Andric ShiftedFree = true;
263bdd1243dSDimitry Andric [[fallthrough]];
2640b57cec5SDimitry Andric case Instruction::Sub:
2650b57cec5SDimitry Andric case Instruction::Mul:
2660b57cec5SDimitry Andric case Instruction::Shl:
2670b57cec5SDimitry Andric case Instruction::LShr:
2680b57cec5SDimitry Andric case Instruction::AShr:
2690b57cec5SDimitry Andric ImmIdx = 1;
2700b57cec5SDimitry Andric break;
2710b57cec5SDimitry Andric case Instruction::ICmp:
2720b57cec5SDimitry Andric UnsignedFree = true;
2730b57cec5SDimitry Andric ImmIdx = 1;
2740b57cec5SDimitry Andric // Zero comparisons can use record-form instructions.
275bdd1243dSDimitry Andric [[fallthrough]];
2760b57cec5SDimitry Andric case Instruction::Select:
2770b57cec5SDimitry Andric ZeroFree = true;
2780b57cec5SDimitry Andric break;
2790b57cec5SDimitry Andric case Instruction::PHI:
2800b57cec5SDimitry Andric case Instruction::Call:
2810b57cec5SDimitry Andric case Instruction::Ret:
2820b57cec5SDimitry Andric case Instruction::Load:
2830b57cec5SDimitry Andric case Instruction::Store:
2840b57cec5SDimitry Andric break;
2850b57cec5SDimitry Andric }
2860b57cec5SDimitry Andric
2870b57cec5SDimitry Andric if (ZeroFree && Imm == 0)
2880b57cec5SDimitry Andric return TTI::TCC_Free;
2890b57cec5SDimitry Andric
2900b57cec5SDimitry Andric if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
2910b57cec5SDimitry Andric if (isInt<16>(Imm.getSExtValue()))
2920b57cec5SDimitry Andric return TTI::TCC_Free;
2930b57cec5SDimitry Andric
2940b57cec5SDimitry Andric if (RunFree) {
2950b57cec5SDimitry Andric if (Imm.getBitWidth() <= 32 &&
2960b57cec5SDimitry Andric (isShiftedMask_32(Imm.getZExtValue()) ||
2970b57cec5SDimitry Andric isShiftedMask_32(~Imm.getZExtValue())))
2980b57cec5SDimitry Andric return TTI::TCC_Free;
2990b57cec5SDimitry Andric
3000b57cec5SDimitry Andric if (ST->isPPC64() &&
3010b57cec5SDimitry Andric (isShiftedMask_64(Imm.getZExtValue()) ||
3020b57cec5SDimitry Andric isShiftedMask_64(~Imm.getZExtValue())))
3030b57cec5SDimitry Andric return TTI::TCC_Free;
3040b57cec5SDimitry Andric }
3050b57cec5SDimitry Andric
3060b57cec5SDimitry Andric if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
3070b57cec5SDimitry Andric return TTI::TCC_Free;
3080b57cec5SDimitry Andric
3090b57cec5SDimitry Andric if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
3100b57cec5SDimitry Andric return TTI::TCC_Free;
3110b57cec5SDimitry Andric }
3120b57cec5SDimitry Andric
3135ffd83dbSDimitry Andric return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
3140b57cec5SDimitry Andric }
3150b57cec5SDimitry Andric
3164824e7fdSDimitry Andric // Check if the current Type is an MMA vector type. Valid MMA types are
3174824e7fdSDimitry Andric // v256i1 and v512i1 respectively.
isMMAType(Type * Ty)3184824e7fdSDimitry Andric static bool isMMAType(Type *Ty) {
3194824e7fdSDimitry Andric return Ty->isVectorTy() && (Ty->getScalarSizeInBits() == 1) &&
3204824e7fdSDimitry Andric (Ty->getPrimitiveSizeInBits() > 128);
3214824e7fdSDimitry Andric }
3224824e7fdSDimitry Andric
getInstructionCost(const User * U,ArrayRef<const Value * > Operands,TTI::TargetCostKind CostKind)323bdd1243dSDimitry Andric InstructionCost PPCTTIImpl::getInstructionCost(const User *U,
324fe6060f1SDimitry Andric ArrayRef<const Value *> Operands,
3255ffd83dbSDimitry Andric TTI::TargetCostKind CostKind) {
3265ffd83dbSDimitry Andric // We already implement getCastInstrCost and getMemoryOpCost where we perform
3275ffd83dbSDimitry Andric // the vector adjustment there.
3285ffd83dbSDimitry Andric if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
329bdd1243dSDimitry Andric return BaseT::getInstructionCost(U, Operands, CostKind);
3305ffd83dbSDimitry Andric
3310b57cec5SDimitry Andric if (U->getType()->isVectorTy()) {
3320b57cec5SDimitry Andric // Instructions that need to be split should cost more.
333bdd1243dSDimitry Andric std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(U->getType());
334bdd1243dSDimitry Andric return LT.first * BaseT::getInstructionCost(U, Operands, CostKind);
3350b57cec5SDimitry Andric }
3360b57cec5SDimitry Andric
337bdd1243dSDimitry Andric return BaseT::getInstructionCost(U, Operands, CostKind);
3380b57cec5SDimitry Andric }
3390b57cec5SDimitry Andric
isHardwareLoopProfitable(Loop * L,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * LibInfo,HardwareLoopInfo & HWLoopInfo)3400b57cec5SDimitry Andric bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
3410b57cec5SDimitry Andric AssumptionCache &AC,
3420b57cec5SDimitry Andric TargetLibraryInfo *LibInfo,
3430b57cec5SDimitry Andric HardwareLoopInfo &HWLoopInfo) {
3440b57cec5SDimitry Andric const PPCTargetMachine &TM = ST->getTargetMachine();
3450b57cec5SDimitry Andric TargetSchedModel SchedModel;
3460b57cec5SDimitry Andric SchedModel.init(ST);
3470b57cec5SDimitry Andric
3480b57cec5SDimitry Andric // Do not convert small short loops to CTR loop.
3490b57cec5SDimitry Andric unsigned ConstTripCount = SE.getSmallConstantTripCount(L);
3500b57cec5SDimitry Andric if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
3510b57cec5SDimitry Andric SmallPtrSet<const Value *, 32> EphValues;
3520b57cec5SDimitry Andric CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
3530b57cec5SDimitry Andric CodeMetrics Metrics;
3540b57cec5SDimitry Andric for (BasicBlock *BB : L->blocks())
3550b57cec5SDimitry Andric Metrics.analyzeBasicBlock(BB, *this, EphValues);
3560b57cec5SDimitry Andric // 6 is an approximate latency for the mtctr instruction.
3570b57cec5SDimitry Andric if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
3580b57cec5SDimitry Andric return false;
3590b57cec5SDimitry Andric }
3600b57cec5SDimitry Andric
361bdd1243dSDimitry Andric // Check that there is no hardware loop related intrinsics in the loop.
362bdd1243dSDimitry Andric for (auto *BB : L->getBlocks())
363bdd1243dSDimitry Andric for (auto &I : *BB)
364bdd1243dSDimitry Andric if (auto *Call = dyn_cast<IntrinsicInst>(&I))
365bdd1243dSDimitry Andric if (Call->getIntrinsicID() == Intrinsic::set_loop_iterations ||
366bdd1243dSDimitry Andric Call->getIntrinsicID() == Intrinsic::loop_decrement)
3670b57cec5SDimitry Andric return false;
3680b57cec5SDimitry Andric
3690b57cec5SDimitry Andric SmallVector<BasicBlock*, 4> ExitingBlocks;
3700b57cec5SDimitry Andric L->getExitingBlocks(ExitingBlocks);
3710b57cec5SDimitry Andric
3720b57cec5SDimitry Andric // If there is an exit edge known to be frequently taken,
3730b57cec5SDimitry Andric // we should not transform this loop.
3740b57cec5SDimitry Andric for (auto &BB : ExitingBlocks) {
3750b57cec5SDimitry Andric Instruction *TI = BB->getTerminator();
3760b57cec5SDimitry Andric if (!TI) continue;
3770b57cec5SDimitry Andric
3780b57cec5SDimitry Andric if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
3790b57cec5SDimitry Andric uint64_t TrueWeight = 0, FalseWeight = 0;
3800b57cec5SDimitry Andric if (!BI->isConditional() ||
381bdd1243dSDimitry Andric !extractBranchWeights(*BI, TrueWeight, FalseWeight))
3820b57cec5SDimitry Andric continue;
3830b57cec5SDimitry Andric
3840b57cec5SDimitry Andric // If the exit path is more frequent than the loop path,
3850b57cec5SDimitry Andric // we return here without further analysis for this loop.
3860b57cec5SDimitry Andric bool TrueIsExit = !L->contains(BI->getSuccessor(0));
3870b57cec5SDimitry Andric if (( TrueIsExit && FalseWeight < TrueWeight) ||
3880b57cec5SDimitry Andric (!TrueIsExit && FalseWeight > TrueWeight))
3890b57cec5SDimitry Andric return false;
3900b57cec5SDimitry Andric }
3910b57cec5SDimitry Andric }
3920b57cec5SDimitry Andric
3930b57cec5SDimitry Andric LLVMContext &C = L->getHeader()->getContext();
3940b57cec5SDimitry Andric HWLoopInfo.CountType = TM.isPPC64() ?
3950b57cec5SDimitry Andric Type::getInt64Ty(C) : Type::getInt32Ty(C);
3960b57cec5SDimitry Andric HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
3970b57cec5SDimitry Andric return true;
3980b57cec5SDimitry Andric }
3990b57cec5SDimitry Andric
getUnrollingPreferences(Loop * L,ScalarEvolution & SE,TTI::UnrollingPreferences & UP,OptimizationRemarkEmitter * ORE)4000b57cec5SDimitry Andric void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
401349cc55cSDimitry Andric TTI::UnrollingPreferences &UP,
402349cc55cSDimitry Andric OptimizationRemarkEmitter *ORE) {
403480093f4SDimitry Andric if (ST->getCPUDirective() == PPC::DIR_A2) {
4040b57cec5SDimitry Andric // The A2 is in-order with a deep pipeline, and concatenation unrolling
4050b57cec5SDimitry Andric // helps expose latency-hiding opportunities to the instruction scheduler.
4060b57cec5SDimitry Andric UP.Partial = UP.Runtime = true;
4070b57cec5SDimitry Andric
4080b57cec5SDimitry Andric // We unroll a lot on the A2 (hundreds of instructions), and the benefits
4090b57cec5SDimitry Andric // often outweigh the cost of a division to compute the trip count.
4100b57cec5SDimitry Andric UP.AllowExpensiveTripCount = true;
4110b57cec5SDimitry Andric }
4120b57cec5SDimitry Andric
413349cc55cSDimitry Andric BaseT::getUnrollingPreferences(L, SE, UP, ORE);
4140b57cec5SDimitry Andric }
4150b57cec5SDimitry Andric
getPeelingPreferences(Loop * L,ScalarEvolution & SE,TTI::PeelingPreferences & PP)4165ffd83dbSDimitry Andric void PPCTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
4175ffd83dbSDimitry Andric TTI::PeelingPreferences &PP) {
4185ffd83dbSDimitry Andric BaseT::getPeelingPreferences(L, SE, PP);
4195ffd83dbSDimitry Andric }
4200b57cec5SDimitry Andric // This function returns true to allow using coldcc calling convention.
4210b57cec5SDimitry Andric // Returning true results in coldcc being used for functions which are cold at
4220b57cec5SDimitry Andric // all call sites when the callers of the functions are not calling any other
4230b57cec5SDimitry Andric // non coldcc functions.
useColdCCForColdCall(Function & F)4240b57cec5SDimitry Andric bool PPCTTIImpl::useColdCCForColdCall(Function &F) {
4250b57cec5SDimitry Andric return EnablePPCColdCC;
4260b57cec5SDimitry Andric }
4270b57cec5SDimitry Andric
enableAggressiveInterleaving(bool LoopHasReductions)4280b57cec5SDimitry Andric bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
429e8d8bef9SDimitry Andric // On the A2, always unroll aggressively.
430480093f4SDimitry Andric if (ST->getCPUDirective() == PPC::DIR_A2)
4310b57cec5SDimitry Andric return true;
4320b57cec5SDimitry Andric
4330b57cec5SDimitry Andric return LoopHasReductions;
4340b57cec5SDimitry Andric }
4350b57cec5SDimitry Andric
4360b57cec5SDimitry Andric PPCTTIImpl::TTI::MemCmpExpansionOptions
enableMemCmpExpansion(bool OptSize,bool IsZeroCmp) const4370b57cec5SDimitry Andric PPCTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
4380b57cec5SDimitry Andric TTI::MemCmpExpansionOptions Options;
4390b57cec5SDimitry Andric Options.LoadSizes = {8, 4, 2, 1};
4400b57cec5SDimitry Andric Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4410b57cec5SDimitry Andric return Options;
4420b57cec5SDimitry Andric }
4430b57cec5SDimitry Andric
enableInterleavedAccessVectorization()4440b57cec5SDimitry Andric bool PPCTTIImpl::enableInterleavedAccessVectorization() {
4450b57cec5SDimitry Andric return true;
4460b57cec5SDimitry Andric }
4470b57cec5SDimitry Andric
getNumberOfRegisters(unsigned ClassID) const4488bcb0991SDimitry Andric unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
4498bcb0991SDimitry Andric assert(ClassID == GPRRC || ClassID == FPRRC ||
4508bcb0991SDimitry Andric ClassID == VRRC || ClassID == VSXRC);
4518bcb0991SDimitry Andric if (ST->hasVSX()) {
452480093f4SDimitry Andric assert(ClassID == GPRRC || ClassID == VSXRC || ClassID == VRRC);
453480093f4SDimitry Andric return ClassID == VSXRC ? 64 : 32;
4548bcb0991SDimitry Andric }
4558bcb0991SDimitry Andric assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
4568bcb0991SDimitry Andric return 32;
4578bcb0991SDimitry Andric }
4588bcb0991SDimitry Andric
getRegisterClassForType(bool Vector,Type * Ty) const4598bcb0991SDimitry Andric unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
4608bcb0991SDimitry Andric if (Vector)
4618bcb0991SDimitry Andric return ST->hasVSX() ? VSXRC : VRRC;
462480093f4SDimitry Andric else if (Ty && (Ty->getScalarType()->isFloatTy() ||
463480093f4SDimitry Andric Ty->getScalarType()->isDoubleTy()))
4648bcb0991SDimitry Andric return ST->hasVSX() ? VSXRC : FPRRC;
465480093f4SDimitry Andric else if (Ty && (Ty->getScalarType()->isFP128Ty() ||
466480093f4SDimitry Andric Ty->getScalarType()->isPPC_FP128Ty()))
467480093f4SDimitry Andric return VRRC;
468480093f4SDimitry Andric else if (Ty && Ty->getScalarType()->isHalfTy())
469480093f4SDimitry Andric return VSXRC;
4708bcb0991SDimitry Andric else
4718bcb0991SDimitry Andric return GPRRC;
4728bcb0991SDimitry Andric }
4738bcb0991SDimitry Andric
getRegisterClassName(unsigned ClassID) const4748bcb0991SDimitry Andric const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
4758bcb0991SDimitry Andric
4768bcb0991SDimitry Andric switch (ClassID) {
4778bcb0991SDimitry Andric default:
4788bcb0991SDimitry Andric llvm_unreachable("unknown register class");
4798bcb0991SDimitry Andric return "PPC::unknown register class";
4808bcb0991SDimitry Andric case GPRRC: return "PPC::GPRRC";
4818bcb0991SDimitry Andric case FPRRC: return "PPC::FPRRC";
4828bcb0991SDimitry Andric case VRRC: return "PPC::VRRC";
4838bcb0991SDimitry Andric case VSXRC: return "PPC::VSXRC";
4848bcb0991SDimitry Andric }
4850b57cec5SDimitry Andric }
4860b57cec5SDimitry Andric
487fe6060f1SDimitry Andric TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const488fe6060f1SDimitry Andric PPCTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
489fe6060f1SDimitry Andric switch (K) {
490fe6060f1SDimitry Andric case TargetTransformInfo::RGK_Scalar:
491fe6060f1SDimitry Andric return TypeSize::getFixed(ST->isPPC64() ? 64 : 32);
492fe6060f1SDimitry Andric case TargetTransformInfo::RGK_FixedWidthVector:
493fe6060f1SDimitry Andric return TypeSize::getFixed(ST->hasAltivec() ? 128 : 0);
494fe6060f1SDimitry Andric case TargetTransformInfo::RGK_ScalableVector:
495fe6060f1SDimitry Andric return TypeSize::getScalable(0);
4960b57cec5SDimitry Andric }
4970b57cec5SDimitry Andric
498fe6060f1SDimitry Andric llvm_unreachable("Unsupported register kind");
4990b57cec5SDimitry Andric }
5000b57cec5SDimitry Andric
getCacheLineSize() const5018bcb0991SDimitry Andric unsigned PPCTTIImpl::getCacheLineSize() const {
5025ffd83dbSDimitry Andric // Starting with P7 we have a cache line size of 128.
503480093f4SDimitry Andric unsigned Directive = ST->getCPUDirective();
504480093f4SDimitry Andric // Assume that Future CPU has the same cache line size as the others.
5050b57cec5SDimitry Andric if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
5065ffd83dbSDimitry Andric Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||
507*36b606aeSDimitry Andric Directive == PPC::DIR_PWR11 || Directive == PPC::DIR_PWR_FUTURE)
5080b57cec5SDimitry Andric return 128;
5090b57cec5SDimitry Andric
5100b57cec5SDimitry Andric // On other processors return a default of 64 bytes.
5110b57cec5SDimitry Andric return 64;
5120b57cec5SDimitry Andric }
5130b57cec5SDimitry Andric
getPrefetchDistance() const5148bcb0991SDimitry Andric unsigned PPCTTIImpl::getPrefetchDistance() const {
5150b57cec5SDimitry Andric return 300;
5160b57cec5SDimitry Andric }
5170b57cec5SDimitry Andric
getMaxInterleaveFactor(ElementCount VF)51806c3fb27SDimitry Andric unsigned PPCTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
519480093f4SDimitry Andric unsigned Directive = ST->getCPUDirective();
5200b57cec5SDimitry Andric // The 440 has no SIMD support, but floating-point instructions
5210b57cec5SDimitry Andric // have a 5-cycle latency, so unroll by 5x for latency hiding.
5220b57cec5SDimitry Andric if (Directive == PPC::DIR_440)
5230b57cec5SDimitry Andric return 5;
5240b57cec5SDimitry Andric
5250b57cec5SDimitry Andric // The A2 has no SIMD support, but floating-point instructions
5260b57cec5SDimitry Andric // have a 6-cycle latency, so unroll by 6x for latency hiding.
5270b57cec5SDimitry Andric if (Directive == PPC::DIR_A2)
5280b57cec5SDimitry Andric return 6;
5290b57cec5SDimitry Andric
5300b57cec5SDimitry Andric // FIXME: For lack of any better information, do no harm...
5310b57cec5SDimitry Andric if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
5320b57cec5SDimitry Andric return 1;
5330b57cec5SDimitry Andric
5340b57cec5SDimitry Andric // For P7 and P8, floating-point instructions have a 6-cycle latency and
5350b57cec5SDimitry Andric // there are two execution units, so unroll by 12x for latency hiding.
5360b57cec5SDimitry Andric // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
5375ffd83dbSDimitry Andric // FIXME: the same for P10 as previous gen until POWER10 scheduling is ready
538480093f4SDimitry Andric // Assume that future is the same as the others.
5390b57cec5SDimitry Andric if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
5405ffd83dbSDimitry Andric Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||
541*36b606aeSDimitry Andric Directive == PPC::DIR_PWR11 || Directive == PPC::DIR_PWR_FUTURE)
5420b57cec5SDimitry Andric return 12;
5430b57cec5SDimitry Andric
5440b57cec5SDimitry Andric // For most things, modern systems have two execution units (and
5450b57cec5SDimitry Andric // out-of-order execution).
5460b57cec5SDimitry Andric return 2;
5470b57cec5SDimitry Andric }
5480b57cec5SDimitry Andric
5494824e7fdSDimitry Andric // Returns a cost adjustment factor to adjust the cost of vector instructions
5504824e7fdSDimitry Andric // on targets which there is overlap between the vector and scalar units,
5514824e7fdSDimitry Andric // thereby reducing the overall throughput of vector code wrt. scalar code.
5524824e7fdSDimitry Andric // An invalid instruction cost is returned if the type is an MMA vector type.
vectorCostAdjustmentFactor(unsigned Opcode,Type * Ty1,Type * Ty2)5534824e7fdSDimitry Andric InstructionCost PPCTTIImpl::vectorCostAdjustmentFactor(unsigned Opcode,
5544824e7fdSDimitry Andric Type *Ty1, Type *Ty2) {
5554824e7fdSDimitry Andric // If the vector type is of an MMA type (v256i1, v512i1), an invalid
5564824e7fdSDimitry Andric // instruction cost is returned. This is to signify to other cost computing
5574824e7fdSDimitry Andric // functions to return the maximum instruction cost in order to prevent any
5584824e7fdSDimitry Andric // opportunities for the optimizer to produce MMA types within the IR.
5594824e7fdSDimitry Andric if (isMMAType(Ty1))
5604824e7fdSDimitry Andric return InstructionCost::getInvalid();
5614824e7fdSDimitry Andric
5620b57cec5SDimitry Andric if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
5634824e7fdSDimitry Andric return InstructionCost(1);
5640b57cec5SDimitry Andric
565bdd1243dSDimitry Andric std::pair<InstructionCost, MVT> LT1 = getTypeLegalizationCost(Ty1);
5660b57cec5SDimitry Andric // If type legalization involves splitting the vector, we don't want to
5670b57cec5SDimitry Andric // double the cost at every step - only the last step.
5680b57cec5SDimitry Andric if (LT1.first != 1 || !LT1.second.isVector())
5694824e7fdSDimitry Andric return InstructionCost(1);
5700b57cec5SDimitry Andric
5710b57cec5SDimitry Andric int ISD = TLI->InstructionOpcodeToISD(Opcode);
5720b57cec5SDimitry Andric if (TLI->isOperationExpand(ISD, LT1.second))
5734824e7fdSDimitry Andric return InstructionCost(1);
5740b57cec5SDimitry Andric
5750b57cec5SDimitry Andric if (Ty2) {
576bdd1243dSDimitry Andric std::pair<InstructionCost, MVT> LT2 = getTypeLegalizationCost(Ty2);
5770b57cec5SDimitry Andric if (LT2.first != 1 || !LT2.second.isVector())
5784824e7fdSDimitry Andric return InstructionCost(1);
5790b57cec5SDimitry Andric }
5800b57cec5SDimitry Andric
5814824e7fdSDimitry Andric return InstructionCost(2);
5820b57cec5SDimitry Andric }
5830b57cec5SDimitry Andric
getArithmeticInstrCost(unsigned Opcode,Type * Ty,TTI::TargetCostKind CostKind,TTI::OperandValueInfo Op1Info,TTI::OperandValueInfo Op2Info,ArrayRef<const Value * > Args,const Instruction * CxtI)584fe6060f1SDimitry Andric InstructionCost PPCTTIImpl::getArithmeticInstrCost(
585fe6060f1SDimitry Andric unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
586bdd1243dSDimitry Andric TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info,
587bdd1243dSDimitry Andric ArrayRef<const Value *> Args,
588480093f4SDimitry Andric const Instruction *CxtI) {
5890b57cec5SDimitry Andric assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
5904824e7fdSDimitry Andric
5914824e7fdSDimitry Andric InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Ty, nullptr);
5924824e7fdSDimitry Andric if (!CostFactor.isValid())
5934824e7fdSDimitry Andric return InstructionCost::getMax();
5944824e7fdSDimitry Andric
5955ffd83dbSDimitry Andric // TODO: Handle more cost kinds.
5965ffd83dbSDimitry Andric if (CostKind != TTI::TCK_RecipThroughput)
5975ffd83dbSDimitry Andric return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
598bdd1243dSDimitry Andric Op2Info, Args, CxtI);
5990b57cec5SDimitry Andric
6000b57cec5SDimitry Andric // Fallback to the default implementation.
601fe6060f1SDimitry Andric InstructionCost Cost = BaseT::getArithmeticInstrCost(
602bdd1243dSDimitry Andric Opcode, Ty, CostKind, Op1Info, Op2Info);
6034824e7fdSDimitry Andric return Cost * CostFactor;
6040b57cec5SDimitry Andric }
6050b57cec5SDimitry Andric
getShuffleCost(TTI::ShuffleKind Kind,Type * Tp,ArrayRef<int> Mask,TTI::TargetCostKind CostKind,int Index,Type * SubTp,ArrayRef<const Value * > Args,const Instruction * CxtI)606fe6060f1SDimitry Andric InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
607bdd1243dSDimitry Andric ArrayRef<int> Mask,
608bdd1243dSDimitry Andric TTI::TargetCostKind CostKind,
609bdd1243dSDimitry Andric int Index, Type *SubTp,
6100fca6ea1SDimitry Andric ArrayRef<const Value *> Args,
6110fca6ea1SDimitry Andric const Instruction *CxtI) {
6124824e7fdSDimitry Andric
6134824e7fdSDimitry Andric InstructionCost CostFactor =
6144824e7fdSDimitry Andric vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);
6154824e7fdSDimitry Andric if (!CostFactor.isValid())
6164824e7fdSDimitry Andric return InstructionCost::getMax();
6174824e7fdSDimitry Andric
6180b57cec5SDimitry Andric // Legalize the type.
619bdd1243dSDimitry Andric std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
6200b57cec5SDimitry Andric
621e8d8bef9SDimitry Andric // PPC, for both Altivec/VSX, support cheap arbitrary permutations
6220b57cec5SDimitry Andric // (at least in the sense that there need only be one non-loop-invariant
6230b57cec5SDimitry Andric // instruction). We need one such shuffle instruction for each actual
6240b57cec5SDimitry Andric // register (this is not true for arbitrary shuffles, but is true for the
6250b57cec5SDimitry Andric // structured types of shuffles covered by TTI::ShuffleKind).
6264824e7fdSDimitry Andric return LT.first * CostFactor;
6270b57cec5SDimitry Andric }
6280b57cec5SDimitry Andric
getCFInstrCost(unsigned Opcode,TTI::TargetCostKind CostKind,const Instruction * I)629fe6060f1SDimitry Andric InstructionCost PPCTTIImpl::getCFInstrCost(unsigned Opcode,
630fe6060f1SDimitry Andric TTI::TargetCostKind CostKind,
631fe6060f1SDimitry Andric const Instruction *I) {
6325ffd83dbSDimitry Andric if (CostKind != TTI::TCK_RecipThroughput)
6335ffd83dbSDimitry Andric return Opcode == Instruction::PHI ? 0 : 1;
6345ffd83dbSDimitry Andric // Branches are assumed to be predicted.
635fe6060f1SDimitry Andric return 0;
6365ffd83dbSDimitry Andric }
6375ffd83dbSDimitry Andric
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,TTI::CastContextHint CCH,TTI::TargetCostKind CostKind,const Instruction * I)638fe6060f1SDimitry Andric InstructionCost PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
639fe6060f1SDimitry Andric Type *Src,
640e8d8bef9SDimitry Andric TTI::CastContextHint CCH,
6415ffd83dbSDimitry Andric TTI::TargetCostKind CostKind,
6420b57cec5SDimitry Andric const Instruction *I) {
6430b57cec5SDimitry Andric assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
6440b57cec5SDimitry Andric
6454824e7fdSDimitry Andric InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Dst, Src);
6464824e7fdSDimitry Andric if (!CostFactor.isValid())
6474824e7fdSDimitry Andric return InstructionCost::getMax();
6484824e7fdSDimitry Andric
649fe6060f1SDimitry Andric InstructionCost Cost =
650fe6060f1SDimitry Andric BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
6514824e7fdSDimitry Andric Cost *= CostFactor;
6525ffd83dbSDimitry Andric // TODO: Allow non-throughput costs that aren't binary.
6535ffd83dbSDimitry Andric if (CostKind != TTI::TCK_RecipThroughput)
6545ffd83dbSDimitry Andric return Cost == 0 ? 0 : 1;
6555ffd83dbSDimitry Andric return Cost;
6560b57cec5SDimitry Andric }
6570b57cec5SDimitry Andric
getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,CmpInst::Predicate VecPred,TTI::TargetCostKind CostKind,const Instruction * I)658fe6060f1SDimitry Andric InstructionCost PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
659fe6060f1SDimitry Andric Type *CondTy,
660e8d8bef9SDimitry Andric CmpInst::Predicate VecPred,
6615ffd83dbSDimitry Andric TTI::TargetCostKind CostKind,
6620b57cec5SDimitry Andric const Instruction *I) {
6634824e7fdSDimitry Andric InstructionCost CostFactor =
6644824e7fdSDimitry Andric vectorCostAdjustmentFactor(Opcode, ValTy, nullptr);
6654824e7fdSDimitry Andric if (!CostFactor.isValid())
6664824e7fdSDimitry Andric return InstructionCost::getMax();
6674824e7fdSDimitry Andric
668fe6060f1SDimitry Andric InstructionCost Cost =
669e8d8bef9SDimitry Andric BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
6705ffd83dbSDimitry Andric // TODO: Handle other cost kinds.
6715ffd83dbSDimitry Andric if (CostKind != TTI::TCK_RecipThroughput)
6725ffd83dbSDimitry Andric return Cost;
6734824e7fdSDimitry Andric return Cost * CostFactor;
6740b57cec5SDimitry Andric }
6750b57cec5SDimitry Andric
getVectorInstrCost(unsigned Opcode,Type * Val,TTI::TargetCostKind CostKind,unsigned Index,Value * Op0,Value * Op1)676fe6060f1SDimitry Andric InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
677bdd1243dSDimitry Andric TTI::TargetCostKind CostKind,
678bdd1243dSDimitry Andric unsigned Index, Value *Op0,
679bdd1243dSDimitry Andric Value *Op1) {
6800b57cec5SDimitry Andric assert(Val->isVectorTy() && "This must be a vector type");
6810b57cec5SDimitry Andric
6820b57cec5SDimitry Andric int ISD = TLI->InstructionOpcodeToISD(Opcode);
6830b57cec5SDimitry Andric assert(ISD && "Invalid opcode");
6840b57cec5SDimitry Andric
6854824e7fdSDimitry Andric InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Val, nullptr);
6864824e7fdSDimitry Andric if (!CostFactor.isValid())
6874824e7fdSDimitry Andric return InstructionCost::getMax();
6884824e7fdSDimitry Andric
689bdd1243dSDimitry Andric InstructionCost Cost =
690bdd1243dSDimitry Andric BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
6914824e7fdSDimitry Andric Cost *= CostFactor;
6920b57cec5SDimitry Andric
6930b57cec5SDimitry Andric if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
6940b57cec5SDimitry Andric // Double-precision scalars are already located in index #0 (or #1 if LE).
6950b57cec5SDimitry Andric if (ISD == ISD::EXTRACT_VECTOR_ELT &&
6960b57cec5SDimitry Andric Index == (ST->isLittleEndian() ? 1 : 0))
6970b57cec5SDimitry Andric return 0;
6980b57cec5SDimitry Andric
6990b57cec5SDimitry Andric return Cost;
7000b57cec5SDimitry Andric
7010fca6ea1SDimitry Andric } else if (Val->getScalarType()->isIntegerTy()) {
7025f757f3fSDimitry Andric unsigned EltSize = Val->getScalarSizeInBits();
7035f757f3fSDimitry Andric // Computing on 1 bit values requires extra mask or compare operations.
7040fca6ea1SDimitry Andric unsigned MaskCostForOneBitSize = (VecMaskCost && EltSize == 1) ? 1 : 0;
7050fca6ea1SDimitry Andric // Computing on non const index requires extra mask or compare operations.
7060fca6ea1SDimitry Andric unsigned MaskCostForIdx = (Index != -1U) ? 0 : 1;
7078bcb0991SDimitry Andric if (ST->hasP9Altivec()) {
7080fca6ea1SDimitry Andric // P10 has vxform insert which can handle non const index. The
7090fca6ea1SDimitry Andric // MaskCostForIdx is for masking the index.
7100fca6ea1SDimitry Andric // P9 has insert for const index. A move-to VSR and a permute/insert.
7110fca6ea1SDimitry Andric // Assume vector operation cost for both (cost will be 2x on P9).
7120fca6ea1SDimitry Andric if (ISD == ISD::INSERT_VECTOR_ELT) {
7130fca6ea1SDimitry Andric if (ST->hasP10Vector())
7140fca6ea1SDimitry Andric return CostFactor + MaskCostForIdx;
7150fca6ea1SDimitry Andric else if (Index != -1U)
7164824e7fdSDimitry Andric return 2 * CostFactor;
7170fca6ea1SDimitry Andric } else if (ISD == ISD::EXTRACT_VECTOR_ELT) {
7188bcb0991SDimitry Andric // It's an extract. Maybe we can do a cheap move-from VSR.
7198bcb0991SDimitry Andric unsigned EltSize = Val->getScalarSizeInBits();
7200fca6ea1SDimitry Andric // P9 has both mfvsrd and mfvsrld for 64 bit integer.
7210fca6ea1SDimitry Andric if (EltSize == 64 && Index != -1U)
7228bcb0991SDimitry Andric return 1;
7230fca6ea1SDimitry Andric else if (EltSize == 32) {
7248bcb0991SDimitry Andric unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
7258bcb0991SDimitry Andric if (Index == MfvsrwzIndex)
7268bcb0991SDimitry Andric return 1;
7270fca6ea1SDimitry Andric
7280fca6ea1SDimitry Andric // For other indexs like non const, P9 has vxform extract. The
7290fca6ea1SDimitry Andric // MaskCostForIdx is for masking the index.
7300fca6ea1SDimitry Andric return CostFactor + MaskCostForIdx;
7318bcb0991SDimitry Andric }
7328bcb0991SDimitry Andric
7338bcb0991SDimitry Andric // We need a vector extract (or mfvsrld). Assume vector operation cost.
7348bcb0991SDimitry Andric // The cost of the load constant for a vector extract is disregarded
7358bcb0991SDimitry Andric // (invariant, easily schedulable).
7360fca6ea1SDimitry Andric return CostFactor + MaskCostForOneBitSize + MaskCostForIdx;
7370fca6ea1SDimitry Andric }
7380fca6ea1SDimitry Andric } else if (ST->hasDirectMove() && Index != -1U) {
7398bcb0991SDimitry Andric // Assume permute has standard cost.
7408bcb0991SDimitry Andric // Assume move-to/move-from VSR have 2x standard cost.
7415f757f3fSDimitry Andric if (ISD == ISD::INSERT_VECTOR_ELT)
7428bcb0991SDimitry Andric return 3;
7430fca6ea1SDimitry Andric return 3 + MaskCostForOneBitSize;
7445f757f3fSDimitry Andric }
7450b57cec5SDimitry Andric }
7460b57cec5SDimitry Andric
7470b57cec5SDimitry Andric // Estimated cost of a load-hit-store delay. This was obtained
7480b57cec5SDimitry Andric // experimentally as a minimum needed to prevent unprofitable
7490b57cec5SDimitry Andric // vectorization for the paq8p benchmark. It may need to be
7500b57cec5SDimitry Andric // raised further if other unprofitable cases remain.
7510b57cec5SDimitry Andric unsigned LHSPenalty = 2;
7520b57cec5SDimitry Andric if (ISD == ISD::INSERT_VECTOR_ELT)
7530b57cec5SDimitry Andric LHSPenalty += 7;
7540b57cec5SDimitry Andric
7550b57cec5SDimitry Andric // Vector element insert/extract with Altivec is very expensive,
7560b57cec5SDimitry Andric // because they require store and reload with the attendant
7570b57cec5SDimitry Andric // processor stall for load-hit-store. Until VSX is available,
7580b57cec5SDimitry Andric // these need to be estimated as very costly.
7590b57cec5SDimitry Andric if (ISD == ISD::EXTRACT_VECTOR_ELT ||
7600b57cec5SDimitry Andric ISD == ISD::INSERT_VECTOR_ELT)
7610b57cec5SDimitry Andric return LHSPenalty + Cost;
7620b57cec5SDimitry Andric
7630b57cec5SDimitry Andric return Cost;
7640b57cec5SDimitry Andric }
7650b57cec5SDimitry Andric
getMemoryOpCost(unsigned Opcode,Type * Src,MaybeAlign Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,TTI::OperandValueInfo OpInfo,const Instruction * I)766fe6060f1SDimitry Andric InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
767fe6060f1SDimitry Andric MaybeAlign Alignment,
768fe6060f1SDimitry Andric unsigned AddressSpace,
7695ffd83dbSDimitry Andric TTI::TargetCostKind CostKind,
770bdd1243dSDimitry Andric TTI::OperandValueInfo OpInfo,
771480093f4SDimitry Andric const Instruction *I) {
7724824e7fdSDimitry Andric
7734824e7fdSDimitry Andric InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Src, nullptr);
7744824e7fdSDimitry Andric if (!CostFactor.isValid())
7754824e7fdSDimitry Andric return InstructionCost::getMax();
7764824e7fdSDimitry Andric
7775ffd83dbSDimitry Andric if (TLI->getValueType(DL, Src, true) == MVT::Other)
7785ffd83dbSDimitry Andric return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
7795ffd83dbSDimitry Andric CostKind);
7800b57cec5SDimitry Andric // Legalize the type.
781bdd1243dSDimitry Andric std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
7820b57cec5SDimitry Andric assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
7830b57cec5SDimitry Andric "Invalid Opcode");
7840b57cec5SDimitry Andric
785fe6060f1SDimitry Andric InstructionCost Cost =
786fe6060f1SDimitry Andric BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
7875ffd83dbSDimitry Andric // TODO: Handle other cost kinds.
7885ffd83dbSDimitry Andric if (CostKind != TTI::TCK_RecipThroughput)
7895ffd83dbSDimitry Andric return Cost;
7905ffd83dbSDimitry Andric
7914824e7fdSDimitry Andric Cost *= CostFactor;
7920b57cec5SDimitry Andric
7930b57cec5SDimitry Andric bool IsAltivecType = ST->hasAltivec() &&
7940b57cec5SDimitry Andric (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
7950b57cec5SDimitry Andric LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
7960b57cec5SDimitry Andric bool IsVSXType = ST->hasVSX() &&
7970b57cec5SDimitry Andric (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
7980b57cec5SDimitry Andric
7990b57cec5SDimitry Andric // VSX has 32b/64b load instructions. Legalization can handle loading of
8000b57cec5SDimitry Andric // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
8010b57cec5SDimitry Andric // PPCTargetLowering can't compute the cost appropriately. So here we
8027a6dacacSDimitry Andric // explicitly check this case. There are also corresponding store
8037a6dacacSDimitry Andric // instructions.
8040b57cec5SDimitry Andric unsigned MemBytes = Src->getPrimitiveSizeInBits();
8057a6dacacSDimitry Andric if (ST->hasVSX() && IsAltivecType &&
8060b57cec5SDimitry Andric (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)))
8070b57cec5SDimitry Andric return 1;
8080b57cec5SDimitry Andric
8090b57cec5SDimitry Andric // Aligned loads and stores are easy.
8100b57cec5SDimitry Andric unsigned SrcBytes = LT.second.getStoreSize();
8115ffd83dbSDimitry Andric if (!SrcBytes || !Alignment || *Alignment >= SrcBytes)
8120b57cec5SDimitry Andric return Cost;
8130b57cec5SDimitry Andric
8140b57cec5SDimitry Andric // If we can use the permutation-based load sequence, then this is also
8150b57cec5SDimitry Andric // relatively cheap (not counting loop-invariant instructions): one load plus
8160b57cec5SDimitry Andric // one permute (the last load in a series has extra cost, but we're
8170b57cec5SDimitry Andric // neglecting that here). Note that on the P7, we could do unaligned loads
8180b57cec5SDimitry Andric // for Altivec types using the VSX instructions, but that's more expensive
8190b57cec5SDimitry Andric // than using the permutation-based load sequence. On the P8, that's no
8200b57cec5SDimitry Andric // longer true.
821e8d8bef9SDimitry Andric if (Opcode == Instruction::Load && (!ST->hasP8Vector() && IsAltivecType) &&
8225ffd83dbSDimitry Andric *Alignment >= LT.second.getScalarType().getStoreSize())
8230b57cec5SDimitry Andric return Cost + LT.first; // Add the cost of the permutations.
8240b57cec5SDimitry Andric
8250b57cec5SDimitry Andric // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
8260b57cec5SDimitry Andric // P7, unaligned vector loads are more expensive than the permutation-based
8270b57cec5SDimitry Andric // load sequence, so that might be used instead, but regardless, the net cost
8280b57cec5SDimitry Andric // is about the same (not counting loop-invariant instructions).
8290b57cec5SDimitry Andric if (IsVSXType || (ST->hasVSX() && IsAltivecType))
8300b57cec5SDimitry Andric return Cost;
8310b57cec5SDimitry Andric
8320b57cec5SDimitry Andric // Newer PPC supports unaligned memory access.
8330b57cec5SDimitry Andric if (TLI->allowsMisalignedMemoryAccesses(LT.second, 0))
8340b57cec5SDimitry Andric return Cost;
8350b57cec5SDimitry Andric
8360b57cec5SDimitry Andric // PPC in general does not support unaligned loads and stores. They'll need
8370b57cec5SDimitry Andric // to be decomposed based on the alignment factor.
8380b57cec5SDimitry Andric
8390b57cec5SDimitry Andric // Add the cost of each scalar load or store.
840480093f4SDimitry Andric assert(Alignment);
841480093f4SDimitry Andric Cost += LT.first * ((SrcBytes / Alignment->value()) - 1);
8420b57cec5SDimitry Andric
8430b57cec5SDimitry Andric // For a vector type, there is also scalarization overhead (only for
8440b57cec5SDimitry Andric // stores, loads are expanded using the vector-load + permutation sequence,
8450b57cec5SDimitry Andric // which is much less expensive).
8460b57cec5SDimitry Andric if (Src->isVectorTy() && Opcode == Instruction::Store)
8475ffd83dbSDimitry Andric for (int i = 0, e = cast<FixedVectorType>(Src)->getNumElements(); i < e;
8485ffd83dbSDimitry Andric ++i)
849bdd1243dSDimitry Andric Cost += getVectorInstrCost(Instruction::ExtractElement, Src, CostKind, i,
850bdd1243dSDimitry Andric nullptr, nullptr);
8510b57cec5SDimitry Andric
8520b57cec5SDimitry Andric return Cost;
8530b57cec5SDimitry Andric }
8540b57cec5SDimitry Andric
getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,bool UseMaskForCond,bool UseMaskForGaps)855fe6060f1SDimitry Andric InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(
8565ffd83dbSDimitry Andric unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
8575ffd83dbSDimitry Andric Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
8585ffd83dbSDimitry Andric bool UseMaskForCond, bool UseMaskForGaps) {
8594824e7fdSDimitry Andric InstructionCost CostFactor =
8604824e7fdSDimitry Andric vectorCostAdjustmentFactor(Opcode, VecTy, nullptr);
8614824e7fdSDimitry Andric if (!CostFactor.isValid())
8624824e7fdSDimitry Andric return InstructionCost::getMax();
8634824e7fdSDimitry Andric
8640b57cec5SDimitry Andric if (UseMaskForCond || UseMaskForGaps)
8650b57cec5SDimitry Andric return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
8665ffd83dbSDimitry Andric Alignment, AddressSpace, CostKind,
8670b57cec5SDimitry Andric UseMaskForCond, UseMaskForGaps);
8680b57cec5SDimitry Andric
8690b57cec5SDimitry Andric assert(isa<VectorType>(VecTy) &&
8700b57cec5SDimitry Andric "Expect a vector type for interleaved memory op");
8710b57cec5SDimitry Andric
8720b57cec5SDimitry Andric // Legalize the type.
873bdd1243dSDimitry Andric std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(VecTy);
8740b57cec5SDimitry Andric
8750b57cec5SDimitry Andric // Firstly, the cost of load/store operation.
876fe6060f1SDimitry Andric InstructionCost Cost = getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment),
877fe6060f1SDimitry Andric AddressSpace, CostKind);
8780b57cec5SDimitry Andric
879e8d8bef9SDimitry Andric // PPC, for both Altivec/VSX, support cheap arbitrary permutations
8800b57cec5SDimitry Andric // (at least in the sense that there need only be one non-loop-invariant
8810b57cec5SDimitry Andric // instruction). For each result vector, we need one shuffle per incoming
8820b57cec5SDimitry Andric // vector (except that the first shuffle can take two incoming vectors
8830b57cec5SDimitry Andric // because it does not need to take itself).
8840b57cec5SDimitry Andric Cost += Factor*(LT.first-1);
8850b57cec5SDimitry Andric
8860b57cec5SDimitry Andric return Cost;
8870b57cec5SDimitry Andric }
8880b57cec5SDimitry Andric
889fe6060f1SDimitry Andric InstructionCost
getIntrinsicInstrCost(const IntrinsicCostAttributes & ICA,TTI::TargetCostKind CostKind)890fe6060f1SDimitry Andric PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
8915ffd83dbSDimitry Andric TTI::TargetCostKind CostKind) {
8925ffd83dbSDimitry Andric return BaseT::getIntrinsicInstrCost(ICA, CostKind);
893480093f4SDimitry Andric }
894480093f4SDimitry Andric
areTypesABICompatible(const Function * Caller,const Function * Callee,const ArrayRef<Type * > & Types) const8950eae32dcSDimitry Andric bool PPCTTIImpl::areTypesABICompatible(const Function *Caller,
8960eae32dcSDimitry Andric const Function *Callee,
8970eae32dcSDimitry Andric const ArrayRef<Type *> &Types) const {
89823408297SDimitry Andric
89923408297SDimitry Andric // We need to ensure that argument promotion does not
90023408297SDimitry Andric // attempt to promote pointers to MMA types (__vector_pair
90123408297SDimitry Andric // and __vector_quad) since these types explicitly cannot be
90223408297SDimitry Andric // passed as arguments. Both of these types are larger than
90323408297SDimitry Andric // the 128-bit Altivec vectors and have a scalar size of 1 bit.
9040eae32dcSDimitry Andric if (!BaseT::areTypesABICompatible(Caller, Callee, Types))
90523408297SDimitry Andric return false;
90623408297SDimitry Andric
9070eae32dcSDimitry Andric return llvm::none_of(Types, [](Type *Ty) {
9080eae32dcSDimitry Andric if (Ty->isSized())
9090eae32dcSDimitry Andric return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128;
91023408297SDimitry Andric return false;
91123408297SDimitry Andric });
91223408297SDimitry Andric }
91323408297SDimitry Andric
canSaveCmp(Loop * L,BranchInst ** BI,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * LibInfo)9140b57cec5SDimitry Andric bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
9150b57cec5SDimitry Andric LoopInfo *LI, DominatorTree *DT,
9160b57cec5SDimitry Andric AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
9170b57cec5SDimitry Andric // Process nested loops first.
91804eeddc0SDimitry Andric for (Loop *I : *L)
91904eeddc0SDimitry Andric if (canSaveCmp(I, BI, SE, LI, DT, AC, LibInfo))
9200b57cec5SDimitry Andric return false; // Stop search.
9210b57cec5SDimitry Andric
9220b57cec5SDimitry Andric HardwareLoopInfo HWLoopInfo(L);
9230b57cec5SDimitry Andric
9240b57cec5SDimitry Andric if (!HWLoopInfo.canAnalyze(*LI))
9250b57cec5SDimitry Andric return false;
9260b57cec5SDimitry Andric
9270b57cec5SDimitry Andric if (!isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo))
9280b57cec5SDimitry Andric return false;
9290b57cec5SDimitry Andric
9300b57cec5SDimitry Andric if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT))
9310b57cec5SDimitry Andric return false;
9320b57cec5SDimitry Andric
9330b57cec5SDimitry Andric *BI = HWLoopInfo.ExitBranch;
9340b57cec5SDimitry Andric return true;
9350b57cec5SDimitry Andric }
9365ffd83dbSDimitry Andric
isLSRCostLess(const TargetTransformInfo::LSRCost & C1,const TargetTransformInfo::LSRCost & C2)93781ad6265SDimitry Andric bool PPCTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
93881ad6265SDimitry Andric const TargetTransformInfo::LSRCost &C2) {
9395ffd83dbSDimitry Andric // PowerPC default behaviour here is "instruction number 1st priority".
9405ffd83dbSDimitry Andric // If LsrNoInsnsCost is set, call default implementation.
9415ffd83dbSDimitry Andric if (!LsrNoInsnsCost)
9425ffd83dbSDimitry Andric return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, C1.NumIVMuls,
9435ffd83dbSDimitry Andric C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
9445ffd83dbSDimitry Andric std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, C2.NumIVMuls,
9455ffd83dbSDimitry Andric C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost);
9465ffd83dbSDimitry Andric else
9475ffd83dbSDimitry Andric return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
9485ffd83dbSDimitry Andric }
949e8d8bef9SDimitry Andric
isNumRegsMajorCostOfLSR()950e8d8bef9SDimitry Andric bool PPCTTIImpl::isNumRegsMajorCostOfLSR() {
951e8d8bef9SDimitry Andric return false;
952e8d8bef9SDimitry Andric }
953e8d8bef9SDimitry Andric
shouldBuildRelLookupTables() const954fe6060f1SDimitry Andric bool PPCTTIImpl::shouldBuildRelLookupTables() const {
955fe6060f1SDimitry Andric const PPCTargetMachine &TM = ST->getTargetMachine();
956fe6060f1SDimitry Andric // XCOFF hasn't implemented lowerRelativeReference, disable non-ELF for now.
957fe6060f1SDimitry Andric if (!TM.isELFv2ABI())
958fe6060f1SDimitry Andric return false;
959fe6060f1SDimitry Andric return BaseT::shouldBuildRelLookupTables();
960fe6060f1SDimitry Andric }
961fe6060f1SDimitry Andric
getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)962e8d8bef9SDimitry Andric bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
963e8d8bef9SDimitry Andric MemIntrinsicInfo &Info) {
964e8d8bef9SDimitry Andric switch (Inst->getIntrinsicID()) {
965e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_lvx:
966e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_lvxl:
967e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_lvebx:
968e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_lvehx:
969e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_lvewx:
970e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_lxvd2x:
971e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_lxvw4x:
972e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_lxvd2x_be:
973e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_lxvw4x_be:
974e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_lxvl:
975e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_lxvll:
976e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_lxvp: {
977e8d8bef9SDimitry Andric Info.PtrVal = Inst->getArgOperand(0);
978e8d8bef9SDimitry Andric Info.ReadMem = true;
979e8d8bef9SDimitry Andric Info.WriteMem = false;
980e8d8bef9SDimitry Andric return true;
981e8d8bef9SDimitry Andric }
982e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_stvx:
983e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_stvxl:
984e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_stvebx:
985e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_stvehx:
986e8d8bef9SDimitry Andric case Intrinsic::ppc_altivec_stvewx:
987e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_stxvd2x:
988e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_stxvw4x:
989e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_stxvd2x_be:
990e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_stxvw4x_be:
991e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_stxvl:
992e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_stxvll:
993e8d8bef9SDimitry Andric case Intrinsic::ppc_vsx_stxvp: {
994e8d8bef9SDimitry Andric Info.PtrVal = Inst->getArgOperand(1);
995e8d8bef9SDimitry Andric Info.ReadMem = false;
996e8d8bef9SDimitry Andric Info.WriteMem = true;
997e8d8bef9SDimitry Andric return true;
998e8d8bef9SDimitry Andric }
999bdd1243dSDimitry Andric case Intrinsic::ppc_stbcx:
1000bdd1243dSDimitry Andric case Intrinsic::ppc_sthcx:
1001bdd1243dSDimitry Andric case Intrinsic::ppc_stdcx:
1002bdd1243dSDimitry Andric case Intrinsic::ppc_stwcx: {
1003bdd1243dSDimitry Andric Info.PtrVal = Inst->getArgOperand(0);
1004bdd1243dSDimitry Andric Info.ReadMem = false;
1005bdd1243dSDimitry Andric Info.WriteMem = true;
1006bdd1243dSDimitry Andric return true;
1007bdd1243dSDimitry Andric }
1008e8d8bef9SDimitry Andric default:
1009e8d8bef9SDimitry Andric break;
1010e8d8bef9SDimitry Andric }
1011e8d8bef9SDimitry Andric
1012e8d8bef9SDimitry Andric return false;
1013e8d8bef9SDimitry Andric }
10140eae32dcSDimitry Andric
hasActiveVectorLength(unsigned Opcode,Type * DataType,Align Alignment) const10150eae32dcSDimitry Andric bool PPCTTIImpl::hasActiveVectorLength(unsigned Opcode, Type *DataType,
10160eae32dcSDimitry Andric Align Alignment) const {
10170eae32dcSDimitry Andric // Only load and stores instructions can have variable vector length on Power.
10180eae32dcSDimitry Andric if (Opcode != Instruction::Load && Opcode != Instruction::Store)
10190eae32dcSDimitry Andric return false;
10200eae32dcSDimitry Andric // Loads/stores with length instructions use bits 0-7 of the GPR operand and
10210eae32dcSDimitry Andric // therefore cannot be used in 32-bit mode.
10220eae32dcSDimitry Andric if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
10230eae32dcSDimitry Andric return false;
10240eae32dcSDimitry Andric if (isa<FixedVectorType>(DataType)) {
10250eae32dcSDimitry Andric unsigned VecWidth = DataType->getPrimitiveSizeInBits();
10260eae32dcSDimitry Andric return VecWidth == 128;
10270eae32dcSDimitry Andric }
10280eae32dcSDimitry Andric Type *ScalarTy = DataType->getScalarType();
10290eae32dcSDimitry Andric
10300eae32dcSDimitry Andric if (ScalarTy->isPointerTy())
10310eae32dcSDimitry Andric return true;
10320eae32dcSDimitry Andric
10330eae32dcSDimitry Andric if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
10340eae32dcSDimitry Andric return true;
10350eae32dcSDimitry Andric
10360eae32dcSDimitry Andric if (!ScalarTy->isIntegerTy())
10370eae32dcSDimitry Andric return false;
10380eae32dcSDimitry Andric
10390eae32dcSDimitry Andric unsigned IntWidth = ScalarTy->getIntegerBitWidth();
10400eae32dcSDimitry Andric return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
10410eae32dcSDimitry Andric }
10420eae32dcSDimitry Andric
getVPMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,const Instruction * I)10430eae32dcSDimitry Andric InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
10440eae32dcSDimitry Andric Align Alignment,
10450eae32dcSDimitry Andric unsigned AddressSpace,
10460eae32dcSDimitry Andric TTI::TargetCostKind CostKind,
10470eae32dcSDimitry Andric const Instruction *I) {
10480eae32dcSDimitry Andric InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
10490eae32dcSDimitry Andric AddressSpace, CostKind, I);
10500eae32dcSDimitry Andric if (TLI->getValueType(DL, Src, true) == MVT::Other)
10510eae32dcSDimitry Andric return Cost;
10520eae32dcSDimitry Andric // TODO: Handle other cost kinds.
10530eae32dcSDimitry Andric if (CostKind != TTI::TCK_RecipThroughput)
10540eae32dcSDimitry Andric return Cost;
10550eae32dcSDimitry Andric
10560eae32dcSDimitry Andric assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
10570eae32dcSDimitry Andric "Invalid Opcode");
10580eae32dcSDimitry Andric
10590eae32dcSDimitry Andric auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
10600eae32dcSDimitry Andric assert(SrcVTy && "Expected a vector type for VP memory operations");
10610eae32dcSDimitry Andric
10620eae32dcSDimitry Andric if (hasActiveVectorLength(Opcode, Src, Alignment)) {
1063bdd1243dSDimitry Andric std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcVTy);
10640eae32dcSDimitry Andric
10650eae32dcSDimitry Andric InstructionCost CostFactor =
10660eae32dcSDimitry Andric vectorCostAdjustmentFactor(Opcode, Src, nullptr);
10670eae32dcSDimitry Andric if (!CostFactor.isValid())
10680eae32dcSDimitry Andric return InstructionCost::getMax();
10690eae32dcSDimitry Andric
10700eae32dcSDimitry Andric InstructionCost Cost = LT.first * CostFactor;
10710eae32dcSDimitry Andric assert(Cost.isValid() && "Expected valid cost");
10720eae32dcSDimitry Andric
10730eae32dcSDimitry Andric // On P9 but not on P10, if the op is misaligned then it will cause a
10740eae32dcSDimitry Andric // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
10750eae32dcSDimitry Andric // ones.
10760eae32dcSDimitry Andric const Align DesiredAlignment(16);
10770eae32dcSDimitry Andric if (Alignment >= DesiredAlignment || ST->getCPUDirective() != PPC::DIR_PWR9)
10780eae32dcSDimitry Andric return Cost;
10790eae32dcSDimitry Andric
10800eae32dcSDimitry Andric // Since alignment may be under estimated, we try to compute the probability
10810eae32dcSDimitry Andric // that the actual address is aligned to the desired boundary. For example
10820eae32dcSDimitry Andric // an 8-byte aligned load is assumed to be actually 16-byte aligned half the
10830eae32dcSDimitry Andric // time, while a 4-byte aligned load has a 25% chance of being 16-byte
10840eae32dcSDimitry Andric // aligned.
10850eae32dcSDimitry Andric float AlignmentProb = ((float)Alignment.value()) / DesiredAlignment.value();
10860eae32dcSDimitry Andric float MisalignmentProb = 1.0 - AlignmentProb;
10870eae32dcSDimitry Andric return (MisalignmentProb * P9PipelineFlushEstimate) +
10880eae32dcSDimitry Andric (AlignmentProb * *Cost.getValue());
10890eae32dcSDimitry Andric }
10900eae32dcSDimitry Andric
10910eae32dcSDimitry Andric // Usually we should not get to this point, but the following is an attempt to
10920eae32dcSDimitry Andric // model the cost of legalization. Currently we can only lower intrinsics with
10930eae32dcSDimitry Andric // evl but no mask, on Power 9/10. Otherwise, we must scalarize.
10940eae32dcSDimitry Andric return getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
10950eae32dcSDimitry Andric }
1096bdd1243dSDimitry Andric
supportsTailCallFor(const CallBase * CB) const1097bdd1243dSDimitry Andric bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {
109806c3fb27SDimitry Andric return TLI->supportsTailCallFor(CB);
1099bdd1243dSDimitry Andric }
1100