1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file provides helpers for the implementation of 10 /// a TargetTransformInfo-conforming class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 17 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/DataLayout.h" 21 #include "llvm/IR/GetElementPtrTypeIterator.h" 22 #include "llvm/IR/IntrinsicInst.h" 23 #include "llvm/IR/Operator.h" 24 #include "llvm/IR/PatternMatch.h" 25 #include <optional> 26 #include <utility> 27 28 namespace llvm { 29 30 class Function; 31 32 /// Base class for use as a mix-in that aids implementing 33 /// a TargetTransformInfo-compatible class. 34 class TargetTransformInfoImplBase { 35 36 protected: 37 typedef TargetTransformInfo TTI; 38 39 const DataLayout &DL; 40 TargetTransformInfoImplBase(const DataLayout & DL)41 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 42 43 public: 44 virtual ~TargetTransformInfoImplBase(); 45 46 // Provide value semantics. MSVC requires that we spell all of these out. 47 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default; TargetTransformInfoImplBase(TargetTransformInfoImplBase && Arg)48 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 49 getDataLayout()50 virtual const DataLayout &getDataLayout() const { return DL; } 51 52 // FIXME: It looks like this implementation is dead. All clients appear to 53 // use the (non-const) version from `TargetTransformInfoImplCRTPBase`. getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands,Type * AccessType,TTI::TargetCostKind CostKind)54 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 55 ArrayRef<const Value *> Operands, 56 Type *AccessType, 57 TTI::TargetCostKind CostKind) const { 58 // In the basic model, we just assume that all-constant GEPs will be folded 59 // into their uses via addressing modes. 60 for (const Value *Operand : Operands) 61 if (!isa<Constant>(Operand)) 62 return TTI::TCC_Basic; 63 64 return TTI::TCC_Free; 65 } 66 67 virtual InstructionCost getPointersChainCost(ArrayRef<const Value * > Ptrs,const Value * Base,const TTI::PointersChainInfo & Info,Type * AccessTy,TTI::TargetCostKind CostKind)68 getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base, 69 const TTI::PointersChainInfo &Info, Type *AccessTy, 70 TTI::TargetCostKind CostKind) const { 71 llvm_unreachable("Not implemented"); 72 } 73 74 virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst & SI,unsigned & JTSize,ProfileSummaryInfo * PSI,BlockFrequencyInfo * BFI)75 getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, 76 ProfileSummaryInfo *PSI, 77 BlockFrequencyInfo *BFI) const { 78 (void)PSI; 79 (void)BFI; 80 JTSize = 0; 81 return SI.getNumCases(); 82 } 83 84 virtual InstructionCost getInstructionCost(const User * U,ArrayRef<const Value * > Operands,TTI::TargetCostKind CostKind)85 getInstructionCost(const User *U, ArrayRef<const Value *> Operands, 86 TTI::TargetCostKind CostKind) const { 87 llvm_unreachable("Not implemented"); 88 } 89 getInliningThresholdMultiplier()90 virtual unsigned getInliningThresholdMultiplier() const { return 1; } getInliningCostBenefitAnalysisSavingsMultiplier()91 virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const { 92 return 8; 93 } getInliningCostBenefitAnalysisProfitableMultiplier()94 virtual unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const { 95 return 8; 96 } getInliningLastCallToStaticBonus()97 virtual int getInliningLastCallToStaticBonus() const { 98 // This is the value of InlineConstants::LastCallToStaticBonus before it was 99 // removed along with the introduction of this function. 100 return 15000; 101 } adjustInliningThreshold(const CallBase * CB)102 virtual unsigned adjustInliningThreshold(const CallBase *CB) const { 103 return 0; 104 } getCallerAllocaCost(const CallBase * CB,const AllocaInst * AI)105 virtual unsigned getCallerAllocaCost(const CallBase *CB, 106 const AllocaInst *AI) const { 107 return 0; 108 }; 109 getInlinerVectorBonusPercent()110 virtual int getInlinerVectorBonusPercent() const { return 150; } 111 getMemcpyCost(const Instruction * I)112 virtual InstructionCost getMemcpyCost(const Instruction *I) const { 113 return TTI::TCC_Expensive; 114 } 115 getMaxMemIntrinsicInlineSizeThreshold()116 virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { return 64; } 117 118 // Although this default value is arbitrary, it is not random. It is assumed 119 // that a condition that evaluates the same way by a higher percentage than 120 // this is best represented as control flow. Therefore, the default value N 121 // should be set such that the win from N% correct executions is greater than 122 // the loss from (100 - N)% mispredicted executions for the majority of 123 // intended targets. getPredictableBranchThreshold()124 virtual BranchProbability getPredictableBranchThreshold() const { 125 return BranchProbability(99, 100); 126 } 127 getBranchMispredictPenalty()128 virtual InstructionCost getBranchMispredictPenalty() const { return 0; } 129 130 virtual bool hasBranchDivergence(const Function *F = nullptr) const { 131 return false; 132 } 133 isSourceOfDivergence(const Value * V)134 virtual bool isSourceOfDivergence(const Value *V) const { return false; } 135 isAlwaysUniform(const Value * V)136 virtual bool isAlwaysUniform(const Value *V) const { return false; } 137 isValidAddrSpaceCast(unsigned FromAS,unsigned ToAS)138 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { 139 return false; 140 } 141 addrspacesMayAlias(unsigned AS0,unsigned AS1)142 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const { 143 return true; 144 } 145 getFlatAddressSpace()146 virtual unsigned getFlatAddressSpace() const { return -1; } 147 collectFlatAddressOperands(SmallVectorImpl<int> & OpIndexes,Intrinsic::ID IID)148 virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 149 Intrinsic::ID IID) const { 150 return false; 151 } 152 isNoopAddrSpaceCast(unsigned,unsigned)153 virtual bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } 154 virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS)155 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { 156 return AS == 0; 157 }; 158 getAssumedAddrSpace(const Value * V)159 virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; } 160 isSingleThreaded()161 virtual bool isSingleThreaded() const { return false; } 162 163 virtual std::pair<const Value *, unsigned> getPredicatedAddrSpace(const Value * V)164 getPredicatedAddrSpace(const Value *V) const { 165 return std::make_pair(nullptr, -1); 166 } 167 rewriteIntrinsicWithAddressSpace(IntrinsicInst * II,Value * OldV,Value * NewV)168 virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, 169 Value *OldV, 170 Value *NewV) const { 171 return nullptr; 172 } 173 isLoweredToCall(const Function * F)174 virtual bool isLoweredToCall(const Function *F) const { 175 assert(F && "A concrete function must be provided to this routine."); 176 177 // FIXME: These should almost certainly not be handled here, and instead 178 // handled with the help of TLI or the target itself. This was largely 179 // ported from existing analysis heuristics here so that such refactorings 180 // can take place in the future. 181 182 if (F->isIntrinsic()) 183 return false; 184 185 if (F->hasLocalLinkage() || !F->hasName()) 186 return true; 187 188 StringRef Name = F->getName(); 189 190 // These will all likely lower to a single selection DAG node. 191 // clang-format off 192 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 193 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || 194 Name == "fmin" || Name == "fminf" || Name == "fminl" || 195 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 196 Name == "sin" || Name == "sinf" || Name == "sinl" || 197 Name == "cos" || Name == "cosf" || Name == "cosl" || 198 Name == "tan" || Name == "tanf" || Name == "tanl" || 199 Name == "asin" || Name == "asinf" || Name == "asinl" || 200 Name == "acos" || Name == "acosf" || Name == "acosl" || 201 Name == "atan" || Name == "atanf" || Name == "atanl" || 202 Name == "atan2" || Name == "atan2f" || Name == "atan2l"|| 203 Name == "sinh" || Name == "sinhf" || Name == "sinhl" || 204 Name == "cosh" || Name == "coshf" || Name == "coshl" || 205 Name == "tanh" || Name == "tanhf" || Name == "tanhl" || 206 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" || 207 Name == "exp10" || Name == "exp10l" || Name == "exp10f") 208 return false; 209 // clang-format on 210 // These are all likely to be optimized into something smaller. 211 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 212 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 213 Name == "floorf" || Name == "ceil" || Name == "round" || 214 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 215 Name == "llabs") 216 return false; 217 218 return true; 219 } 220 isHardwareLoopProfitable(Loop * L,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * LibInfo,HardwareLoopInfo & HWLoopInfo)221 virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 222 AssumptionCache &AC, 223 TargetLibraryInfo *LibInfo, 224 HardwareLoopInfo &HWLoopInfo) const { 225 return false; 226 } 227 getEpilogueVectorizationMinVF()228 virtual unsigned getEpilogueVectorizationMinVF() const { return 16; } 229 preferPredicateOverEpilogue(TailFoldingInfo * TFI)230 virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { 231 return false; 232 } 233 234 virtual TailFoldingStyle 235 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const { 236 return TailFoldingStyle::DataWithoutLaneMask; 237 } 238 239 virtual std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,IntrinsicInst & II)240 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { 241 return std::nullopt; 242 } 243 244 virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedMask,KnownBits & Known,bool & KnownBitsComputed)245 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 246 APInt DemandedMask, KnownBits &Known, 247 bool &KnownBitsComputed) const { 248 return std::nullopt; 249 } 250 simplifyDemandedVectorEltsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedElts,APInt & UndefElts,APInt & UndefElts2,APInt & UndefElts3,std::function<void (Instruction *,unsigned,APInt,APInt &)> SimplifyAndSetOp)251 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 252 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 253 APInt &UndefElts2, APInt &UndefElts3, 254 std::function<void(Instruction *, unsigned, APInt, APInt &)> 255 SimplifyAndSetOp) const { 256 return std::nullopt; 257 } 258 getUnrollingPreferences(Loop *,ScalarEvolution &,TTI::UnrollingPreferences &,OptimizationRemarkEmitter *)259 virtual void getUnrollingPreferences(Loop *, ScalarEvolution &, 260 TTI::UnrollingPreferences &, 261 OptimizationRemarkEmitter *) const {} 262 getPeelingPreferences(Loop *,ScalarEvolution &,TTI::PeelingPreferences &)263 virtual void getPeelingPreferences(Loop *, ScalarEvolution &, 264 TTI::PeelingPreferences &) const {} 265 isLegalAddImmediate(int64_t Imm)266 virtual bool isLegalAddImmediate(int64_t Imm) const { return false; } 267 isLegalAddScalableImmediate(int64_t Imm)268 virtual bool isLegalAddScalableImmediate(int64_t Imm) const { return false; } 269 isLegalICmpImmediate(int64_t Imm)270 virtual bool isLegalICmpImmediate(int64_t Imm) const { return false; } 271 272 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 273 int64_t BaseOffset, bool HasBaseReg, 274 int64_t Scale, unsigned AddrSpace, 275 Instruction *I = nullptr, 276 int64_t ScalableOffset = 0) const { 277 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 278 // taken from the implementation of LSR. 279 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 280 } 281 isLSRCostLess(const TTI::LSRCost & C1,const TTI::LSRCost & C2)282 virtual bool isLSRCostLess(const TTI::LSRCost &C1, 283 const TTI::LSRCost &C2) const { 284 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 285 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 286 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 287 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 288 } 289 isNumRegsMajorCostOfLSR()290 virtual bool isNumRegsMajorCostOfLSR() const { return true; } 291 shouldDropLSRSolutionIfLessProfitable()292 virtual bool shouldDropLSRSolutionIfLessProfitable() const { return false; } 293 isProfitableLSRChainElement(Instruction * I)294 virtual bool isProfitableLSRChainElement(Instruction *I) const { 295 return false; 296 } 297 canMacroFuseCmp()298 virtual bool canMacroFuseCmp() const { return false; } 299 canSaveCmp(Loop * L,BranchInst ** BI,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * LibInfo)300 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, 301 LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, 302 TargetLibraryInfo *LibInfo) const { 303 return false; 304 } 305 306 virtual TTI::AddressingModeKind getPreferredAddressingMode(const Loop * L,ScalarEvolution * SE)307 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { 308 return TTI::AMK_None; 309 } 310 isLegalMaskedStore(Type * DataType,Align Alignment,unsigned AddressSpace)311 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment, 312 unsigned AddressSpace) const { 313 return false; 314 } 315 isLegalMaskedLoad(Type * DataType,Align Alignment,unsigned AddressSpace)316 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment, 317 unsigned AddressSpace) const { 318 return false; 319 } 320 isLegalNTStore(Type * DataType,Align Alignment)321 virtual bool isLegalNTStore(Type *DataType, Align Alignment) const { 322 // By default, assume nontemporal memory stores are available for stores 323 // that are aligned and have a size that is a power of 2. 324 unsigned DataSize = DL.getTypeStoreSize(DataType); 325 return Alignment >= DataSize && isPowerOf2_32(DataSize); 326 } 327 isLegalNTLoad(Type * DataType,Align Alignment)328 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const { 329 // By default, assume nontemporal memory loads are available for loads that 330 // are aligned and have a size that is a power of 2. 331 unsigned DataSize = DL.getTypeStoreSize(DataType); 332 return Alignment >= DataSize && isPowerOf2_32(DataSize); 333 } 334 isLegalBroadcastLoad(Type * ElementTy,ElementCount NumElements)335 virtual bool isLegalBroadcastLoad(Type *ElementTy, 336 ElementCount NumElements) const { 337 return false; 338 } 339 isLegalMaskedScatter(Type * DataType,Align Alignment)340 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 341 return false; 342 } 343 isLegalMaskedGather(Type * DataType,Align Alignment)344 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 345 return false; 346 } 347 forceScalarizeMaskedGather(VectorType * DataType,Align Alignment)348 virtual bool forceScalarizeMaskedGather(VectorType *DataType, 349 Align Alignment) const { 350 return false; 351 } 352 forceScalarizeMaskedScatter(VectorType * DataType,Align Alignment)353 virtual bool forceScalarizeMaskedScatter(VectorType *DataType, 354 Align Alignment) const { 355 return false; 356 } 357 isLegalMaskedCompressStore(Type * DataType,Align Alignment)358 virtual bool isLegalMaskedCompressStore(Type *DataType, 359 Align Alignment) const { 360 return false; 361 } 362 isLegalAltInstr(VectorType * VecTy,unsigned Opcode0,unsigned Opcode1,const SmallBitVector & OpcodeMask)363 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, 364 unsigned Opcode1, 365 const SmallBitVector &OpcodeMask) const { 366 return false; 367 } 368 isLegalMaskedExpandLoad(Type * DataType,Align Alignment)369 virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const { 370 return false; 371 } 372 isLegalStridedLoadStore(Type * DataType,Align Alignment)373 virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const { 374 return false; 375 } 376 isLegalInterleavedAccessType(VectorType * VTy,unsigned Factor,Align Alignment,unsigned AddrSpace)377 virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, 378 Align Alignment, 379 unsigned AddrSpace) const { 380 return false; 381 } 382 isLegalMaskedVectorHistogram(Type * AddrType,Type * DataType)383 virtual bool isLegalMaskedVectorHistogram(Type *AddrType, 384 Type *DataType) const { 385 return false; 386 } 387 enableOrderedReductions()388 virtual bool enableOrderedReductions() const { return false; } 389 hasDivRemOp(Type * DataType,bool IsSigned)390 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) const { 391 return false; 392 } 393 hasVolatileVariant(Instruction * I,unsigned AddrSpace)394 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { 395 return false; 396 } 397 prefersVectorizedAddressing()398 virtual bool prefersVectorizedAddressing() const { return true; } 399 getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,StackOffset BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)400 virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 401 StackOffset BaseOffset, 402 bool HasBaseReg, int64_t Scale, 403 unsigned AddrSpace) const { 404 // Guess that all legal addressing mode are free. 405 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset.getFixed(), HasBaseReg, 406 Scale, AddrSpace, /*I=*/nullptr, 407 BaseOffset.getScalable())) 408 return 0; 409 return InstructionCost::getInvalid(); 410 } 411 LSRWithInstrQueries()412 virtual bool LSRWithInstrQueries() const { return false; } 413 isTruncateFree(Type * Ty1,Type * Ty2)414 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } 415 isProfitableToHoist(Instruction * I)416 virtual bool isProfitableToHoist(Instruction *I) const { return true; } 417 useAA()418 virtual bool useAA() const { return false; } 419 isTypeLegal(Type * Ty)420 virtual bool isTypeLegal(Type *Ty) const { return false; } 421 getRegUsageForType(Type * Ty)422 virtual unsigned getRegUsageForType(Type *Ty) const { return 1; } 423 shouldBuildLookupTables()424 virtual bool shouldBuildLookupTables() const { return true; } 425 shouldBuildLookupTablesForConstant(Constant * C)426 virtual bool shouldBuildLookupTablesForConstant(Constant *C) const { 427 return true; 428 } 429 shouldBuildRelLookupTables()430 virtual bool shouldBuildRelLookupTables() const { return false; } 431 useColdCCForColdCall(Function & F)432 virtual bool useColdCCForColdCall(Function &F) const { return false; } 433 isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID)434 virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const { 435 return false; 436 } 437 isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,unsigned ScalarOpdIdx)438 virtual bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, 439 unsigned ScalarOpdIdx) const { 440 return false; 441 } 442 isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,int OpdIdx)443 virtual bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, 444 int OpdIdx) const { 445 return OpdIdx == -1; 446 } 447 448 virtual bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID,int RetIdx)449 isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, 450 int RetIdx) const { 451 return RetIdx == 0; 452 } 453 454 virtual InstructionCost getScalarizationOverhead( 455 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, 456 TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, 457 ArrayRef<Value *> VL = {}) const { 458 return 0; 459 } 460 461 virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value * > Args,ArrayRef<Type * > Tys,TTI::TargetCostKind CostKind)462 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 463 ArrayRef<Type *> Tys, 464 TTI::TargetCostKind CostKind) const { 465 return 0; 466 } 467 supportsEfficientVectorElementLoadStore()468 virtual bool supportsEfficientVectorElementLoadStore() const { return false; } 469 supportsTailCalls()470 virtual bool supportsTailCalls() const { return true; } 471 supportsTailCallFor(const CallBase * CB)472 virtual bool supportsTailCallFor(const CallBase *CB) const { 473 llvm_unreachable("Not implemented"); 474 } 475 enableAggressiveInterleaving(bool LoopHasReductions)476 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) const { 477 return false; 478 } 479 480 virtual TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,bool IsZeroCmp)481 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { 482 return {}; 483 } 484 enableSelectOptimize()485 virtual bool enableSelectOptimize() const { return true; } 486 shouldTreatInstructionLikeSelect(const Instruction * I)487 virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const { 488 // A select with two constant operands will usually be better left as a 489 // select. 490 using namespace llvm::PatternMatch; 491 if (match(I, m_Select(m_Value(), m_Constant(), m_Constant()))) 492 return false; 493 // If the select is a logical-and/logical-or then it is better treated as a 494 // and/or by the backend. 495 return isa<SelectInst>(I) && 496 !match(I, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()), 497 m_LogicalOr(m_Value(), m_Value()))); 498 } 499 enableInterleavedAccessVectorization()500 virtual bool enableInterleavedAccessVectorization() const { return false; } 501 enableMaskedInterleavedAccessVectorization()502 virtual bool enableMaskedInterleavedAccessVectorization() const { 503 return false; 504 } 505 isFPVectorizationPotentiallyUnsafe()506 virtual bool isFPVectorizationPotentiallyUnsafe() const { return false; } 507 allowsMisalignedMemoryAccesses(LLVMContext & Context,unsigned BitWidth,unsigned AddressSpace,Align Alignment,unsigned * Fast)508 virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, 509 unsigned BitWidth, 510 unsigned AddressSpace, 511 Align Alignment, 512 unsigned *Fast) const { 513 return false; 514 } 515 516 virtual TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)517 getPopcntSupport(unsigned IntTyWidthInBit) const { 518 return TTI::PSK_Software; 519 } 520 haveFastSqrt(Type * Ty)521 virtual bool haveFastSqrt(Type *Ty) const { return false; } 522 isExpensiveToSpeculativelyExecute(const Instruction * I)523 virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) const { 524 return true; 525 } 526 isFCmpOrdCheaperThanFCmpZero(Type * Ty)527 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } 528 getFPOpCost(Type * Ty)529 virtual InstructionCost getFPOpCost(Type *Ty) const { 530 return TargetTransformInfo::TCC_Basic; 531 } 532 getIntImmCodeSizeCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)533 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, 534 const APInt &Imm, 535 Type *Ty) const { 536 return 0; 537 } 538 getIntImmCost(const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)539 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 540 TTI::TargetCostKind CostKind) const { 541 return TTI::TCC_Basic; 542 } 543 544 virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 545 const APInt &Imm, Type *Ty, 546 TTI::TargetCostKind CostKind, 547 Instruction *Inst = nullptr) const { 548 return TTI::TCC_Free; 549 } 550 551 virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)552 getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 553 Type *Ty, TTI::TargetCostKind CostKind) const { 554 return TTI::TCC_Free; 555 } 556 preferToKeepConstantsAttached(const Instruction & Inst,const Function & Fn)557 virtual bool preferToKeepConstantsAttached(const Instruction &Inst, 558 const Function &Fn) const { 559 return false; 560 } 561 getNumberOfRegisters(unsigned ClassID)562 virtual unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } hasConditionalLoadStoreForType(Type * Ty,bool IsStore)563 virtual bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const { 564 return false; 565 } 566 567 virtual unsigned getRegisterClassForType(bool Vector, 568 Type *Ty = nullptr) const { 569 return Vector ? 1 : 0; 570 } 571 getRegisterClassName(unsigned ClassID)572 virtual const char *getRegisterClassName(unsigned ClassID) const { 573 switch (ClassID) { 574 default: 575 return "Generic::Unknown Register Class"; 576 case 0: 577 return "Generic::ScalarRC"; 578 case 1: 579 return "Generic::VectorRC"; 580 } 581 } 582 583 virtual TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K)584 getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 585 return TypeSize::getFixed(32); 586 } 587 getMinVectorRegisterBitWidth()588 virtual unsigned getMinVectorRegisterBitWidth() const { return 128; } 589 getMaxVScale()590 virtual std::optional<unsigned> getMaxVScale() const { return std::nullopt; } getVScaleForTuning()591 virtual std::optional<unsigned> getVScaleForTuning() const { 592 return std::nullopt; 593 } isVScaleKnownToBeAPowerOfTwo()594 virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; } 595 596 virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K)597 shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { 598 return false; 599 } 600 getMinimumVF(unsigned ElemWidth,bool IsScalable)601 virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { 602 return ElementCount::get(0, IsScalable); 603 } 604 getMaximumVF(unsigned ElemWidth,unsigned Opcode)605 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { 606 return 0; 607 } getStoreMinimumVF(unsigned VF,Type *,Type *)608 virtual unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { 609 return VF; 610 } 611 shouldConsiderAddressTypePromotion(const Instruction & I,bool & AllowPromotionWithoutCommonHeader)612 virtual bool shouldConsiderAddressTypePromotion( 613 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { 614 AllowPromotionWithoutCommonHeader = false; 615 return false; 616 } 617 getCacheLineSize()618 virtual unsigned getCacheLineSize() const { return 0; } 619 virtual std::optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level)620 getCacheSize(TargetTransformInfo::CacheLevel Level) const { 621 switch (Level) { 622 case TargetTransformInfo::CacheLevel::L1D: 623 [[fallthrough]]; 624 case TargetTransformInfo::CacheLevel::L2D: 625 return std::nullopt; 626 } 627 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 628 } 629 630 virtual std::optional<unsigned> getCacheAssociativity(TargetTransformInfo::CacheLevel Level)631 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { 632 switch (Level) { 633 case TargetTransformInfo::CacheLevel::L1D: 634 [[fallthrough]]; 635 case TargetTransformInfo::CacheLevel::L2D: 636 return std::nullopt; 637 } 638 639 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 640 } 641 getMinPageSize()642 virtual std::optional<unsigned> getMinPageSize() const { return {}; } 643 getPrefetchDistance()644 virtual unsigned getPrefetchDistance() const { return 0; } getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)645 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, 646 unsigned NumStridedMemAccesses, 647 unsigned NumPrefetches, 648 bool HasCall) const { 649 return 1; 650 } getMaxPrefetchIterationsAhead()651 virtual unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } enableWritePrefetching()652 virtual bool enableWritePrefetching() const { return false; } shouldPrefetchAddressSpace(unsigned AS)653 virtual bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; } 654 getPartialReductionCost(unsigned Opcode,Type * InputTypeA,Type * InputTypeB,Type * AccumType,ElementCount VF,TTI::PartialReductionExtendKind OpAExtend,TTI::PartialReductionExtendKind OpBExtend,std::optional<unsigned> BinOp,TTI::TargetCostKind CostKind)655 virtual InstructionCost getPartialReductionCost( 656 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, 657 ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, 658 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp, 659 TTI::TargetCostKind CostKind) const { 660 return InstructionCost::getInvalid(); 661 } 662 getMaxInterleaveFactor(ElementCount VF)663 virtual unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; } 664 665 virtual InstructionCost getArithmeticInstrCost( 666 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 667 TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, 668 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) const { 669 // Widenable conditions will eventually lower into constants, so some 670 // operations with them will be trivially optimized away. 671 auto IsWidenableCondition = [](const Value *V) { 672 if (auto *II = dyn_cast<IntrinsicInst>(V)) 673 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition) 674 return true; 675 return false; 676 }; 677 // FIXME: A number of transformation tests seem to require these values 678 // which seems a little odd for how arbitary there are. 679 switch (Opcode) { 680 default: 681 break; 682 case Instruction::FDiv: 683 case Instruction::FRem: 684 case Instruction::SDiv: 685 case Instruction::SRem: 686 case Instruction::UDiv: 687 case Instruction::URem: 688 // FIXME: Unlikely to be true for CodeSize. 689 return TTI::TCC_Expensive; 690 case Instruction::And: 691 case Instruction::Or: 692 if (any_of(Args, IsWidenableCondition)) 693 return TTI::TCC_Free; 694 break; 695 } 696 697 // Assume a 3cy latency for fp arithmetic ops. 698 if (CostKind == TTI::TCK_Latency) 699 if (Ty->getScalarType()->isFloatingPointTy()) 700 return 3; 701 702 return 1; 703 } 704 getAltInstrCost(VectorType * VecTy,unsigned Opcode0,unsigned Opcode1,const SmallBitVector & OpcodeMask,TTI::TargetCostKind CostKind)705 virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, 706 unsigned Opcode1, 707 const SmallBitVector &OpcodeMask, 708 TTI::TargetCostKind CostKind) const { 709 return InstructionCost::getInvalid(); 710 } 711 712 virtual InstructionCost 713 getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, 714 ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, 715 VectorType *SubTp, ArrayRef<const Value *> Args = {}, 716 const Instruction *CxtI = nullptr) const { 717 return 1; 718 } 719 getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,TTI::CastContextHint CCH,TTI::TargetCostKind CostKind,const Instruction * I)720 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, 721 Type *Src, TTI::CastContextHint CCH, 722 TTI::TargetCostKind CostKind, 723 const Instruction *I) const { 724 switch (Opcode) { 725 default: 726 break; 727 case Instruction::IntToPtr: { 728 unsigned SrcSize = Src->getScalarSizeInBits(); 729 if (DL.isLegalInteger(SrcSize) && 730 SrcSize <= DL.getPointerTypeSizeInBits(Dst)) 731 return 0; 732 break; 733 } 734 case Instruction::PtrToInt: { 735 unsigned DstSize = Dst->getScalarSizeInBits(); 736 if (DL.isLegalInteger(DstSize) && 737 DstSize >= DL.getPointerTypeSizeInBits(Src)) 738 return 0; 739 break; 740 } 741 case Instruction::BitCast: 742 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy())) 743 // Identity and pointer-to-pointer casts are free. 744 return 0; 745 break; 746 case Instruction::Trunc: { 747 // trunc to a native type is free (assuming the target has compare and 748 // shift-right of the same width). 749 TypeSize DstSize = DL.getTypeSizeInBits(Dst); 750 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue())) 751 return 0; 752 break; 753 } 754 } 755 return 1; 756 } 757 758 virtual InstructionCost getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index,TTI::TargetCostKind CostKind)759 getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, 760 unsigned Index, TTI::TargetCostKind CostKind) const { 761 return 1; 762 } 763 764 virtual InstructionCost getCFInstrCost(unsigned Opcode, 765 TTI::TargetCostKind CostKind, 766 const Instruction *I = nullptr) const { 767 // A phi would be free, unless we're costing the throughput because it 768 // will require a register. 769 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) 770 return 0; 771 return 1; 772 } 773 getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,CmpInst::Predicate VecPred,TTI::TargetCostKind CostKind,TTI::OperandValueInfo Op1Info,TTI::OperandValueInfo Op2Info,const Instruction * I)774 virtual InstructionCost getCmpSelInstrCost( 775 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, 776 TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, 777 TTI::OperandValueInfo Op2Info, const Instruction *I) const { 778 return 1; 779 } 780 getVectorInstrCost(unsigned Opcode,Type * Val,TTI::TargetCostKind CostKind,unsigned Index,const Value * Op0,const Value * Op1)781 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 782 TTI::TargetCostKind CostKind, 783 unsigned Index, const Value *Op0, 784 const Value *Op1) const { 785 return 1; 786 } 787 788 /// \param ScalarUserAndIdx encodes the information about extracts from a 789 /// vector with 'Scalar' being the value being extracted,'User' being the user 790 /// of the extract(nullptr if user is not known before vectorization) and 791 /// 'Idx' being the extract lane. getVectorInstrCost(unsigned Opcode,Type * Val,TTI::TargetCostKind CostKind,unsigned Index,Value * Scalar,ArrayRef<std::tuple<Value *,User *,int>> ScalarUserAndIdx)792 virtual InstructionCost getVectorInstrCost( 793 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, 794 Value *Scalar, 795 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const { 796 return 1; 797 } 798 getVectorInstrCost(const Instruction & I,Type * Val,TTI::TargetCostKind CostKind,unsigned Index)799 virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, 800 TTI::TargetCostKind CostKind, 801 unsigned Index) const { 802 return 1; 803 } 804 805 virtual InstructionCost getReplicationShuffleCost(Type * EltTy,int ReplicationFactor,int VF,const APInt & DemandedDstElts,TTI::TargetCostKind CostKind)806 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, 807 const APInt &DemandedDstElts, 808 TTI::TargetCostKind CostKind) const { 809 return 1; 810 } 811 812 virtual InstructionCost getInsertExtractValueCost(unsigned Opcode,TTI::TargetCostKind CostKind)813 getInsertExtractValueCost(unsigned Opcode, 814 TTI::TargetCostKind CostKind) const { 815 // Note: The `insertvalue` cost here is chosen to match the default case of 816 // getInstructionCost() -- as prior to adding this helper `insertvalue` was 817 // not handled. 818 if (Opcode == Instruction::InsertValue && 819 CostKind != TTI::TCK_RecipThroughput) 820 return TTI::TCC_Basic; 821 return TTI::TCC_Free; 822 } 823 824 virtual InstructionCost getMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,TTI::OperandValueInfo OpInfo,const Instruction * I)825 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 826 unsigned AddressSpace, TTI::TargetCostKind CostKind, 827 TTI::OperandValueInfo OpInfo, const Instruction *I) const { 828 return 1; 829 } 830 getVPMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,const Instruction * I)831 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, 832 Align Alignment, 833 unsigned AddressSpace, 834 TTI::TargetCostKind CostKind, 835 const Instruction *I) const { 836 return 1; 837 } 838 839 virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind)840 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 841 unsigned AddressSpace, 842 TTI::TargetCostKind CostKind) const { 843 return 1; 844 } 845 846 virtual InstructionCost 847 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, 848 bool VariableMask, Align Alignment, 849 TTI::TargetCostKind CostKind, 850 const Instruction *I = nullptr) const { 851 return 1; 852 } 853 854 virtual InstructionCost getExpandCompressMemoryOpCost( 855 unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment, 856 TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const { 857 return 1; 858 } 859 860 virtual InstructionCost 861 getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, 862 bool VariableMask, Align Alignment, 863 TTI::TargetCostKind CostKind, 864 const Instruction *I = nullptr) const { 865 return InstructionCost::getInvalid(); 866 } 867 getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,bool UseMaskForCond,bool UseMaskForGaps)868 virtual InstructionCost getInterleavedMemoryOpCost( 869 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 870 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 871 bool UseMaskForCond, bool UseMaskForGaps) const { 872 return 1; 873 } 874 875 virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes & ICA,TTI::TargetCostKind CostKind)876 getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 877 TTI::TargetCostKind CostKind) const { 878 switch (ICA.getID()) { 879 default: 880 break; 881 case Intrinsic::experimental_vector_histogram_add: 882 // For now, we want explicit support from the target for histograms. 883 return InstructionCost::getInvalid(); 884 case Intrinsic::allow_runtime_check: 885 case Intrinsic::allow_ubsan_check: 886 case Intrinsic::annotation: 887 case Intrinsic::assume: 888 case Intrinsic::sideeffect: 889 case Intrinsic::pseudoprobe: 890 case Intrinsic::arithmetic_fence: 891 case Intrinsic::dbg_assign: 892 case Intrinsic::dbg_declare: 893 case Intrinsic::dbg_value: 894 case Intrinsic::dbg_label: 895 case Intrinsic::invariant_start: 896 case Intrinsic::invariant_end: 897 case Intrinsic::launder_invariant_group: 898 case Intrinsic::strip_invariant_group: 899 case Intrinsic::is_constant: 900 case Intrinsic::lifetime_start: 901 case Intrinsic::lifetime_end: 902 case Intrinsic::experimental_noalias_scope_decl: 903 case Intrinsic::objectsize: 904 case Intrinsic::ptr_annotation: 905 case Intrinsic::var_annotation: 906 case Intrinsic::experimental_gc_result: 907 case Intrinsic::experimental_gc_relocate: 908 case Intrinsic::coro_alloc: 909 case Intrinsic::coro_begin: 910 case Intrinsic::coro_begin_custom_abi: 911 case Intrinsic::coro_free: 912 case Intrinsic::coro_end: 913 case Intrinsic::coro_frame: 914 case Intrinsic::coro_size: 915 case Intrinsic::coro_align: 916 case Intrinsic::coro_suspend: 917 case Intrinsic::coro_subfn_addr: 918 case Intrinsic::threadlocal_address: 919 case Intrinsic::experimental_widenable_condition: 920 case Intrinsic::ssa_copy: 921 // These intrinsics don't actually represent code after lowering. 922 return 0; 923 } 924 return 1; 925 } 926 getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys,TTI::TargetCostKind CostKind)927 virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, 928 ArrayRef<Type *> Tys, 929 TTI::TargetCostKind CostKind) const { 930 return 1; 931 } 932 933 // Assume that we have a register of the right size for the type. getNumberOfParts(Type * Tp)934 virtual unsigned getNumberOfParts(Type *Tp) const { return 1; } 935 getAddressComputationCost(Type * Tp,ScalarEvolution *,const SCEV *)936 virtual InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, 937 const SCEV *) const { 938 return 0; 939 } 940 941 virtual InstructionCost getArithmeticReductionCost(unsigned,VectorType *,std::optional<FastMathFlags> FMF,TTI::TargetCostKind)942 getArithmeticReductionCost(unsigned, VectorType *, 943 std::optional<FastMathFlags> FMF, 944 TTI::TargetCostKind) const { 945 return 1; 946 } 947 getMinMaxReductionCost(Intrinsic::ID IID,VectorType *,FastMathFlags,TTI::TargetCostKind)948 virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, 949 VectorType *, FastMathFlags, 950 TTI::TargetCostKind) const { 951 return 1; 952 } 953 954 virtual InstructionCost getExtendedReductionCost(unsigned Opcode,bool IsUnsigned,Type * ResTy,VectorType * Ty,std::optional<FastMathFlags> FMF,TTI::TargetCostKind CostKind)955 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, 956 VectorType *Ty, std::optional<FastMathFlags> FMF, 957 TTI::TargetCostKind CostKind) const { 958 return 1; 959 } 960 961 virtual InstructionCost getMulAccReductionCost(bool IsUnsigned,Type * ResTy,VectorType * Ty,TTI::TargetCostKind CostKind)962 getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, 963 TTI::TargetCostKind CostKind) const { 964 return 1; 965 } 966 967 virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)968 getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { 969 return 0; 970 } 971 getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)972 virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, 973 MemIntrinsicInfo &Info) const { 974 return false; 975 } 976 getAtomicMemIntrinsicMaxElementSize()977 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const { 978 // Note for overrides: You must ensure for all element unordered-atomic 979 // memory intrinsics that all power-of-2 element sizes up to, and 980 // including, the return value of this method have a corresponding 981 // runtime lib call. These runtime lib call definitions can be found 982 // in RuntimeLibcalls.h 983 return 0; 984 } 985 986 virtual Value * 987 getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, 988 bool CanCreate = true) const { 989 return nullptr; 990 } 991 992 virtual Type * getMemcpyLoopLoweringType(LLVMContext & Context,Value * Length,unsigned SrcAddrSpace,unsigned DestAddrSpace,Align SrcAlign,Align DestAlign,std::optional<uint32_t> AtomicElementSize)993 getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 994 unsigned SrcAddrSpace, unsigned DestAddrSpace, 995 Align SrcAlign, Align DestAlign, 996 std::optional<uint32_t> AtomicElementSize) const { 997 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8) 998 : Type::getInt8Ty(Context); 999 } 1000 getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type * > & OpsOut,LLVMContext & Context,unsigned RemainingBytes,unsigned SrcAddrSpace,unsigned DestAddrSpace,Align SrcAlign,Align DestAlign,std::optional<uint32_t> AtomicCpySize)1001 virtual void getMemcpyLoopResidualLoweringType( 1002 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 1003 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 1004 Align SrcAlign, Align DestAlign, 1005 std::optional<uint32_t> AtomicCpySize) const { 1006 unsigned OpSizeInBytes = AtomicCpySize.value_or(1); 1007 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8); 1008 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes) 1009 OpsOut.push_back(OpType); 1010 } 1011 areInlineCompatible(const Function * Caller,const Function * Callee)1012 virtual bool areInlineCompatible(const Function *Caller, 1013 const Function *Callee) const { 1014 return (Caller->getFnAttribute("target-cpu") == 1015 Callee->getFnAttribute("target-cpu")) && 1016 (Caller->getFnAttribute("target-features") == 1017 Callee->getFnAttribute("target-features")); 1018 } 1019 getInlineCallPenalty(const Function * F,const CallBase & Call,unsigned DefaultCallPenalty)1020 virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, 1021 unsigned DefaultCallPenalty) const { 1022 return DefaultCallPenalty; 1023 } 1024 areTypesABICompatible(const Function * Caller,const Function * Callee,const ArrayRef<Type * > & Types)1025 virtual bool areTypesABICompatible(const Function *Caller, 1026 const Function *Callee, 1027 const ArrayRef<Type *> &Types) const { 1028 return (Caller->getFnAttribute("target-cpu") == 1029 Callee->getFnAttribute("target-cpu")) && 1030 (Caller->getFnAttribute("target-features") == 1031 Callee->getFnAttribute("target-features")); 1032 } 1033 isIndexedLoadLegal(TTI::MemIndexedMode Mode,Type * Ty)1034 virtual bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty) const { 1035 return false; 1036 } 1037 isIndexedStoreLegal(TTI::MemIndexedMode Mode,Type * Ty)1038 virtual bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty) const { 1039 return false; 1040 } 1041 getLoadStoreVecRegBitWidth(unsigned AddrSpace)1042 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { 1043 return 128; 1044 } 1045 isLegalToVectorizeLoad(LoadInst * LI)1046 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 1047 isLegalToVectorizeStore(StoreInst * SI)1048 virtual bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 1049 isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace)1050 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 1051 Align Alignment, 1052 unsigned AddrSpace) const { 1053 return true; 1054 } 1055 isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace)1056 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 1057 Align Alignment, 1058 unsigned AddrSpace) const { 1059 return true; 1060 } 1061 isLegalToVectorizeReduction(const RecurrenceDescriptor & RdxDesc,ElementCount VF)1062 virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 1063 ElementCount VF) const { 1064 return true; 1065 } 1066 isElementTypeLegalForScalableVector(Type * Ty)1067 virtual bool isElementTypeLegalForScalableVector(Type *Ty) const { 1068 return true; 1069 } 1070 getLoadVectorFactor(unsigned VF,unsigned LoadSize,unsigned ChainSizeInBytes,VectorType * VecTy)1071 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 1072 unsigned ChainSizeInBytes, 1073 VectorType *VecTy) const { 1074 return VF; 1075 } 1076 getStoreVectorFactor(unsigned VF,unsigned StoreSize,unsigned ChainSizeInBytes,VectorType * VecTy)1077 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 1078 unsigned ChainSizeInBytes, 1079 VectorType *VecTy) const { 1080 return VF; 1081 } 1082 preferFixedOverScalableIfEqualCost()1083 virtual bool preferFixedOverScalableIfEqualCost() const { return false; } 1084 preferInLoopReduction(RecurKind Kind,Type * Ty)1085 virtual bool preferInLoopReduction(RecurKind Kind, Type *Ty) const { 1086 return false; 1087 } preferAlternateOpcodeVectorization()1088 virtual bool preferAlternateOpcodeVectorization() const { return true; } 1089 preferPredicatedReductionSelect()1090 virtual bool preferPredicatedReductionSelect() const { return false; } 1091 preferEpilogueVectorization()1092 virtual bool preferEpilogueVectorization() const { return true; } 1093 shouldExpandReduction(const IntrinsicInst * II)1094 virtual bool shouldExpandReduction(const IntrinsicInst *II) const { 1095 return true; 1096 } 1097 1098 virtual TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst * II)1099 getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const { 1100 return TTI::ReductionShuffle::SplitHalf; 1101 } 1102 getGISelRematGlobalCost()1103 virtual unsigned getGISelRematGlobalCost() const { return 1; } 1104 getMinTripCountTailFoldingThreshold()1105 virtual unsigned getMinTripCountTailFoldingThreshold() const { return 0; } 1106 supportsScalableVectors()1107 virtual bool supportsScalableVectors() const { return false; } 1108 enableScalableVectorization()1109 virtual bool enableScalableVectorization() const { return false; } 1110 hasActiveVectorLength()1111 virtual bool hasActiveVectorLength() const { return false; } 1112 isProfitableToSinkOperands(Instruction * I,SmallVectorImpl<Use * > & Ops)1113 virtual bool isProfitableToSinkOperands(Instruction *I, 1114 SmallVectorImpl<Use *> &Ops) const { 1115 return false; 1116 } 1117 isVectorShiftByScalarCheap(Type * Ty)1118 virtual bool isVectorShiftByScalarCheap(Type *Ty) const { return false; } 1119 1120 virtual TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic & PI)1121 getVPLegalizationStrategy(const VPIntrinsic &PI) const { 1122 return TargetTransformInfo::VPLegalization( 1123 /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard, 1124 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); 1125 } 1126 hasArmWideBranch(bool)1127 virtual bool hasArmWideBranch(bool) const { return false; } 1128 getFeatureMask(const Function & F)1129 virtual uint64_t getFeatureMask(const Function &F) const { return 0; } 1130 isMultiversionedFunction(const Function & F)1131 virtual bool isMultiversionedFunction(const Function &F) const { 1132 return false; 1133 } 1134 getMaxNumArgs()1135 virtual unsigned getMaxNumArgs() const { return UINT_MAX; } 1136 getNumBytesToPadGlobalArray(unsigned Size,Type * ArrayType)1137 virtual unsigned getNumBytesToPadGlobalArray(unsigned Size, 1138 Type *ArrayType) const { 1139 return 0; 1140 } 1141 collectKernelLaunchBounds(const Function & F,SmallVectorImpl<std::pair<StringRef,int64_t>> & LB)1142 virtual void collectKernelLaunchBounds( 1143 const Function &F, 1144 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {} 1145 1146 protected: 1147 // Obtain the minimum required size to hold the value (without the sign) 1148 // In case of a vector it returns the min required size for one element. minRequiredElementSize(const Value * Val,bool & isSigned)1149 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { 1150 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 1151 const auto *VectorValue = cast<Constant>(Val); 1152 1153 // In case of a vector need to pick the max between the min 1154 // required size for each element 1155 auto *VT = cast<FixedVectorType>(Val->getType()); 1156 1157 // Assume unsigned elements 1158 isSigned = false; 1159 1160 // The max required size is the size of the vector element type 1161 unsigned MaxRequiredSize = 1162 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue(); 1163 1164 unsigned MinRequiredSize = 0; 1165 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 1166 if (auto *IntElement = 1167 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 1168 bool signedElement = IntElement->getValue().isNegative(); 1169 // Get the element min required size. 1170 unsigned ElementMinRequiredSize = 1171 IntElement->getValue().getSignificantBits() - 1; 1172 // In case one element is signed then all the vector is signed. 1173 isSigned |= signedElement; 1174 // Save the max required bit size between all the elements. 1175 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 1176 } else { 1177 // not an int constant element 1178 return MaxRequiredSize; 1179 } 1180 } 1181 return MinRequiredSize; 1182 } 1183 1184 if (const auto *CI = dyn_cast<ConstantInt>(Val)) { 1185 isSigned = CI->getValue().isNegative(); 1186 return CI->getValue().getSignificantBits() - 1; 1187 } 1188 1189 if (const auto *Cast = dyn_cast<SExtInst>(Val)) { 1190 isSigned = true; 1191 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 1192 } 1193 1194 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) { 1195 isSigned = false; 1196 return Cast->getSrcTy()->getScalarSizeInBits(); 1197 } 1198 1199 isSigned = false; 1200 return Val->getType()->getScalarSizeInBits(); 1201 } 1202 isStridedAccess(const SCEV * Ptr)1203 bool isStridedAccess(const SCEV *Ptr) const { 1204 return Ptr && isa<SCEVAddRecExpr>(Ptr); 1205 } 1206 getConstantStrideStep(ScalarEvolution * SE,const SCEV * Ptr)1207 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 1208 const SCEV *Ptr) const { 1209 if (!isStridedAccess(Ptr)) 1210 return nullptr; 1211 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 1212 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 1213 } 1214 isConstantStridedAccessLessThan(ScalarEvolution * SE,const SCEV * Ptr,int64_t MergeDistance)1215 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 1216 int64_t MergeDistance) const { 1217 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 1218 if (!Step) 1219 return false; 1220 APInt StrideVal = Step->getAPInt(); 1221 if (StrideVal.getBitWidth() > 64) 1222 return false; 1223 // FIXME: Need to take absolute value for negative stride case. 1224 return StrideVal.getSExtValue() < MergeDistance; 1225 } 1226 }; 1227 1228 /// CRTP base class for use as a mix-in that aids implementing 1229 /// a TargetTransformInfo-compatible class. 1230 template <typename T> 1231 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 1232 private: 1233 typedef TargetTransformInfoImplBase BaseT; 1234 1235 protected: TargetTransformInfoImplCRTPBase(const DataLayout & DL)1236 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 1237 1238 public: getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands,Type * AccessType,TTI::TargetCostKind CostKind)1239 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 1240 ArrayRef<const Value *> Operands, Type *AccessType, 1241 TTI::TargetCostKind CostKind) const override { 1242 assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); 1243 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 1244 bool HasBaseReg = (BaseGV == nullptr); 1245 1246 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 1247 APInt BaseOffset(PtrSizeBits, 0); 1248 int64_t Scale = 0; 1249 1250 auto GTI = gep_type_begin(PointeeType, Operands); 1251 Type *TargetType = nullptr; 1252 1253 // Handle the case where the GEP instruction has a single operand, 1254 // the basis, therefore TargetType is a nullptr. 1255 if (Operands.empty()) 1256 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 1257 1258 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 1259 TargetType = GTI.getIndexedType(); 1260 // We assume that the cost of Scalar GEP with constant index and the 1261 // cost of Vector GEP with splat constant index are the same. 1262 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 1263 if (!ConstIdx) 1264 if (auto Splat = getSplatValue(*I)) 1265 ConstIdx = dyn_cast<ConstantInt>(Splat); 1266 if (StructType *STy = GTI.getStructTypeOrNull()) { 1267 // For structures the index is always splat or scalar constant 1268 assert(ConstIdx && "Unexpected GEP index"); 1269 uint64_t Field = ConstIdx->getZExtValue(); 1270 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 1271 } else { 1272 // If this operand is a scalable type, bail out early. 1273 // TODO: Make isLegalAddressingMode TypeSize aware. 1274 if (TargetType->isScalableTy()) 1275 return TTI::TCC_Basic; 1276 int64_t ElementSize = 1277 GTI.getSequentialElementStride(DL).getFixedValue(); 1278 if (ConstIdx) { 1279 BaseOffset += 1280 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 1281 } else { 1282 // Needs scale register. 1283 if (Scale != 0) 1284 // No addressing mode takes two scale registers. 1285 return TTI::TCC_Basic; 1286 Scale = ElementSize; 1287 } 1288 } 1289 } 1290 1291 // If we haven't been provided a hint, use the target type for now. 1292 // 1293 // TODO: Take a look at potentially removing this: This is *slightly* wrong 1294 // as it's possible to have a GEP with a foldable target type but a memory 1295 // access that isn't foldable. For example, this load isn't foldable on 1296 // RISC-V: 1297 // 1298 // %p = getelementptr i32, ptr %base, i32 42 1299 // %x = load <2 x i32>, ptr %p 1300 if (!AccessType) 1301 AccessType = TargetType; 1302 1303 // If the final address of the GEP is a legal addressing mode for the given 1304 // access type, then we can fold it into its users. 1305 if (static_cast<const T *>(this)->isLegalAddressingMode( 1306 AccessType, const_cast<GlobalValue *>(BaseGV), 1307 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, 1308 Ptr->getType()->getPointerAddressSpace())) 1309 return TTI::TCC_Free; 1310 1311 // TODO: Instead of returning TCC_Basic here, we should use 1312 // getArithmeticInstrCost. Or better yet, provide a hook to let the target 1313 // model it. 1314 return TTI::TCC_Basic; 1315 } 1316 1317 InstructionCost getPointersChainCost(ArrayRef<const Value * > Ptrs,const Value * Base,const TTI::PointersChainInfo & Info,Type * AccessTy,TTI::TargetCostKind CostKind)1318 getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base, 1319 const TTI::PointersChainInfo &Info, Type *AccessTy, 1320 TTI::TargetCostKind CostKind) const override { 1321 InstructionCost Cost = TTI::TCC_Free; 1322 // In the basic model we take into account GEP instructions only 1323 // (although here can come alloca instruction, a value, constants and/or 1324 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a 1325 // pointer). Typically, if Base is a not a GEP-instruction and all the 1326 // pointers are relative to the same base address, all the rest are 1327 // either GEP instructions, PHIs, bitcasts or constants. When we have same 1328 // base, we just calculate cost of each non-Base GEP as an ADD operation if 1329 // any their index is a non-const. 1330 // If no known dependecies between the pointers cost is calculated as a sum 1331 // of costs of GEP instructions. 1332 for (const Value *V : Ptrs) { 1333 const auto *GEP = dyn_cast<GetElementPtrInst>(V); 1334 if (!GEP) 1335 continue; 1336 if (Info.isSameBase() && V != Base) { 1337 if (GEP->hasAllConstantIndices()) 1338 continue; 1339 Cost += static_cast<const T *>(this)->getArithmeticInstrCost( 1340 Instruction::Add, GEP->getType(), CostKind, 1341 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, 1342 {}); 1343 } else { 1344 SmallVector<const Value *> Indices(GEP->indices()); 1345 Cost += static_cast<const T *>(this)->getGEPCost( 1346 GEP->getSourceElementType(), GEP->getPointerOperand(), Indices, 1347 AccessTy, CostKind); 1348 } 1349 } 1350 return Cost; 1351 } 1352 1353 InstructionCost getInstructionCost(const User * U,ArrayRef<const Value * > Operands,TTI::TargetCostKind CostKind)1354 getInstructionCost(const User *U, ArrayRef<const Value *> Operands, 1355 TTI::TargetCostKind CostKind) const override { 1356 using namespace llvm::PatternMatch; 1357 1358 auto *TargetTTI = static_cast<const T *>(this); 1359 // Handle non-intrinsic calls, invokes, and callbr. 1360 // FIXME: Unlikely to be true for anything but CodeSize. 1361 auto *CB = dyn_cast<CallBase>(U); 1362 if (CB && !isa<IntrinsicInst>(U)) { 1363 if (const Function *F = CB->getCalledFunction()) { 1364 if (!TargetTTI->isLoweredToCall(F)) 1365 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 1366 1367 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1); 1368 } 1369 // For indirect or other calls, scale cost by number of arguments. 1370 return TTI::TCC_Basic * (CB->arg_size() + 1); 1371 } 1372 1373 Type *Ty = U->getType(); 1374 unsigned Opcode = Operator::getOpcode(U); 1375 auto *I = dyn_cast<Instruction>(U); 1376 switch (Opcode) { 1377 default: 1378 break; 1379 case Instruction::Call: { 1380 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call"); 1381 auto *Intrinsic = cast<IntrinsicInst>(U); 1382 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB); 1383 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); 1384 } 1385 case Instruction::Br: 1386 case Instruction::Ret: 1387 case Instruction::PHI: 1388 case Instruction::Switch: 1389 return TargetTTI->getCFInstrCost(Opcode, CostKind, I); 1390 case Instruction::Freeze: 1391 return TTI::TCC_Free; 1392 case Instruction::ExtractValue: 1393 case Instruction::InsertValue: 1394 return TargetTTI->getInsertExtractValueCost(Opcode, CostKind); 1395 case Instruction::Alloca: 1396 if (cast<AllocaInst>(U)->isStaticAlloca()) 1397 return TTI::TCC_Free; 1398 break; 1399 case Instruction::GetElementPtr: { 1400 const auto *GEP = cast<GEPOperator>(U); 1401 Type *AccessType = nullptr; 1402 // For now, only provide the AccessType in the simple case where the GEP 1403 // only has one user. 1404 if (GEP->hasOneUser() && I) 1405 AccessType = I->user_back()->getAccessType(); 1406 1407 return TargetTTI->getGEPCost(GEP->getSourceElementType(), 1408 Operands.front(), Operands.drop_front(), 1409 AccessType, CostKind); 1410 } 1411 case Instruction::Add: 1412 case Instruction::FAdd: 1413 case Instruction::Sub: 1414 case Instruction::FSub: 1415 case Instruction::Mul: 1416 case Instruction::FMul: 1417 case Instruction::UDiv: 1418 case Instruction::SDiv: 1419 case Instruction::FDiv: 1420 case Instruction::URem: 1421 case Instruction::SRem: 1422 case Instruction::FRem: 1423 case Instruction::Shl: 1424 case Instruction::LShr: 1425 case Instruction::AShr: 1426 case Instruction::And: 1427 case Instruction::Or: 1428 case Instruction::Xor: 1429 case Instruction::FNeg: { 1430 const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Operands[0]); 1431 TTI::OperandValueInfo Op2Info; 1432 if (Opcode != Instruction::FNeg) 1433 Op2Info = TTI::getOperandInfo(Operands[1]); 1434 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, 1435 Op2Info, Operands, I); 1436 } 1437 case Instruction::IntToPtr: 1438 case Instruction::PtrToInt: 1439 case Instruction::SIToFP: 1440 case Instruction::UIToFP: 1441 case Instruction::FPToUI: 1442 case Instruction::FPToSI: 1443 case Instruction::Trunc: 1444 case Instruction::FPTrunc: 1445 case Instruction::BitCast: 1446 case Instruction::FPExt: 1447 case Instruction::SExt: 1448 case Instruction::ZExt: 1449 case Instruction::AddrSpaceCast: { 1450 Type *OpTy = Operands[0]->getType(); 1451 return TargetTTI->getCastInstrCost( 1452 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); 1453 } 1454 case Instruction::Store: { 1455 auto *SI = cast<StoreInst>(U); 1456 Type *ValTy = Operands[0]->getType(); 1457 TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(Operands[0]); 1458 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(), 1459 SI->getPointerAddressSpace(), CostKind, 1460 OpInfo, I); 1461 } 1462 case Instruction::Load: { 1463 // FIXME: Arbitary cost which could come from the backend. 1464 if (CostKind == TTI::TCK_Latency) 1465 return 4; 1466 auto *LI = cast<LoadInst>(U); 1467 Type *LoadType = U->getType(); 1468 // If there is a non-register sized type, the cost estimation may expand 1469 // it to be several instructions to load into multiple registers on the 1470 // target. But, if the only use of the load is a trunc instruction to a 1471 // register sized type, the instruction selector can combine these 1472 // instructions to be a single load. So, in this case, we use the 1473 // destination type of the trunc instruction rather than the load to 1474 // accurately estimate the cost of this load instruction. 1475 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() && 1476 !LoadType->isVectorTy()) { 1477 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin())) 1478 LoadType = TI->getDestTy(); 1479 } 1480 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(), 1481 LI->getPointerAddressSpace(), CostKind, 1482 {TTI::OK_AnyValue, TTI::OP_None}, I); 1483 } 1484 case Instruction::Select: { 1485 const Value *Op0, *Op1; 1486 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) || 1487 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { 1488 // select x, y, false --> x & y 1489 // select x, true, y --> x | y 1490 const auto Op1Info = TTI::getOperandInfo(Op0); 1491 const auto Op2Info = TTI::getOperandInfo(Op1); 1492 assert(Op0->getType()->getScalarSizeInBits() == 1 && 1493 Op1->getType()->getScalarSizeInBits() == 1); 1494 1495 SmallVector<const Value *, 2> Operands{Op0, Op1}; 1496 return TargetTTI->getArithmeticInstrCost( 1497 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, 1498 CostKind, Op1Info, Op2Info, Operands, I); 1499 } 1500 const auto Op1Info = TTI::getOperandInfo(Operands[1]); 1501 const auto Op2Info = TTI::getOperandInfo(Operands[2]); 1502 Type *CondTy = Operands[0]->getType(); 1503 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, 1504 CmpInst::BAD_ICMP_PREDICATE, 1505 CostKind, Op1Info, Op2Info, I); 1506 } 1507 case Instruction::ICmp: 1508 case Instruction::FCmp: { 1509 const auto Op1Info = TTI::getOperandInfo(Operands[0]); 1510 const auto Op2Info = TTI::getOperandInfo(Operands[1]); 1511 Type *ValTy = Operands[0]->getType(); 1512 // TODO: Also handle ICmp/FCmp constant expressions. 1513 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), 1514 I ? cast<CmpInst>(I)->getPredicate() 1515 : CmpInst::BAD_ICMP_PREDICATE, 1516 CostKind, Op1Info, Op2Info, I); 1517 } 1518 case Instruction::InsertElement: { 1519 auto *IE = dyn_cast<InsertElementInst>(U); 1520 if (!IE) 1521 return TTI::TCC_Basic; // FIXME 1522 unsigned Idx = -1; 1523 if (auto *CI = dyn_cast<ConstantInt>(Operands[2])) 1524 if (CI->getValue().getActiveBits() <= 32) 1525 Idx = CI->getZExtValue(); 1526 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx); 1527 } 1528 case Instruction::ShuffleVector: { 1529 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U); 1530 if (!Shuffle) 1531 return TTI::TCC_Basic; // FIXME 1532 1533 auto *VecTy = cast<VectorType>(U->getType()); 1534 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType()); 1535 ArrayRef<int> Mask = Shuffle->getShuffleMask(); 1536 int NumSubElts, SubIndex; 1537 1538 // Treat undef/poison mask as free (no matter the length). 1539 if (all_of(Mask, [](int M) { return M < 0; })) 1540 return TTI::TCC_Free; 1541 1542 // TODO: move more of this inside improveShuffleKindFromMask. 1543 if (Shuffle->changesLength()) { 1544 // Treat a 'subvector widening' as a free shuffle. 1545 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) 1546 return TTI::TCC_Free; 1547 1548 if (Shuffle->isExtractSubvectorMask(SubIndex)) 1549 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecTy, 1550 VecSrcTy, Mask, CostKind, SubIndex, 1551 VecTy, Operands, Shuffle); 1552 1553 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1554 return TargetTTI->getShuffleCost( 1555 TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind, 1556 SubIndex, 1557 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), 1558 Operands, Shuffle); 1559 1560 int ReplicationFactor, VF; 1561 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { 1562 APInt DemandedDstElts = APInt::getZero(Mask.size()); 1563 for (auto I : enumerate(Mask)) { 1564 if (I.value() != PoisonMaskElem) 1565 DemandedDstElts.setBit(I.index()); 1566 } 1567 return TargetTTI->getReplicationShuffleCost( 1568 VecSrcTy->getElementType(), ReplicationFactor, VF, 1569 DemandedDstElts, CostKind); 1570 } 1571 1572 bool IsUnary = isa<UndefValue>(Operands[1]); 1573 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue(); 1574 SmallVector<int, 16> AdjustMask(Mask); 1575 1576 // Widening shuffle - widening the source(s) to the new length 1577 // (treated as free - see above), and then perform the adjusted 1578 // shuffle at that width. 1579 if (Shuffle->increasesLength()) { 1580 for (int &M : AdjustMask) 1581 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M; 1582 1583 return TargetTTI->getShuffleCost( 1584 IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy, 1585 VecTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle); 1586 } 1587 1588 // Narrowing shuffle - perform shuffle at original wider width and 1589 // then extract the lower elements. 1590 // FIXME: This can assume widening, which is not true of all vector 1591 // architectures (and is not even the default). 1592 AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem); 1593 1594 InstructionCost ShuffleCost = TargetTTI->getShuffleCost( 1595 IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, 1596 VecSrcTy, VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, 1597 Shuffle); 1598 1599 SmallVector<int, 16> ExtractMask(Mask.size()); 1600 std::iota(ExtractMask.begin(), ExtractMask.end(), 0); 1601 return ShuffleCost + TargetTTI->getShuffleCost( 1602 TTI::SK_ExtractSubvector, VecTy, VecSrcTy, 1603 ExtractMask, CostKind, 0, VecTy, {}, Shuffle); 1604 } 1605 1606 if (Shuffle->isIdentity()) 1607 return TTI::TCC_Free; 1608 1609 if (Shuffle->isReverse()) 1610 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, VecSrcTy, Mask, 1611 CostKind, 0, nullptr, Operands, 1612 Shuffle); 1613 1614 if (Shuffle->isTranspose()) 1615 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, VecSrcTy, 1616 Mask, CostKind, 0, nullptr, Operands, 1617 Shuffle); 1618 1619 if (Shuffle->isZeroEltSplat()) 1620 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, VecSrcTy, 1621 Mask, CostKind, 0, nullptr, Operands, 1622 Shuffle); 1623 1624 if (Shuffle->isSingleSource()) 1625 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 1626 VecSrcTy, Mask, CostKind, 0, nullptr, 1627 Operands, Shuffle); 1628 1629 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1630 return TargetTTI->getShuffleCost( 1631 TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind, SubIndex, 1632 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands, 1633 Shuffle); 1634 1635 if (Shuffle->isSelect()) 1636 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, VecSrcTy, Mask, 1637 CostKind, 0, nullptr, Operands, 1638 Shuffle); 1639 1640 if (Shuffle->isSplice(SubIndex)) 1641 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, VecSrcTy, Mask, 1642 CostKind, SubIndex, nullptr, Operands, 1643 Shuffle); 1644 1645 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, VecSrcTy, 1646 Mask, CostKind, 0, nullptr, Operands, 1647 Shuffle); 1648 } 1649 case Instruction::ExtractElement: { 1650 auto *EEI = dyn_cast<ExtractElementInst>(U); 1651 if (!EEI) 1652 return TTI::TCC_Basic; // FIXME 1653 unsigned Idx = -1; 1654 if (auto *CI = dyn_cast<ConstantInt>(Operands[1])) 1655 if (CI->getValue().getActiveBits() <= 32) 1656 Idx = CI->getZExtValue(); 1657 Type *DstTy = Operands[0]->getType(); 1658 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx); 1659 } 1660 } 1661 1662 // By default, just classify everything remaining as 'basic'. 1663 return TTI::TCC_Basic; 1664 } 1665 isExpensiveToSpeculativelyExecute(const Instruction * I)1666 bool isExpensiveToSpeculativelyExecute(const Instruction *I) const override { 1667 auto *TargetTTI = static_cast<const T *>(this); 1668 SmallVector<const Value *, 4> Ops(I->operand_values()); 1669 InstructionCost Cost = TargetTTI->getInstructionCost( 1670 I, Ops, TargetTransformInfo::TCK_SizeAndLatency); 1671 return Cost >= TargetTransformInfo::TCC_Expensive; 1672 } 1673 supportsTailCallFor(const CallBase * CB)1674 bool supportsTailCallFor(const CallBase *CB) const override { 1675 return static_cast<const T *>(this)->supportsTailCalls(); 1676 } 1677 }; 1678 } // namespace llvm 1679 1680 #endif 1681