1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file provides helpers for the implementation of 10 /// a TargetTransformInfo-conforming class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 17 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/DataLayout.h" 21 #include "llvm/IR/GetElementPtrTypeIterator.h" 22 #include "llvm/IR/IntrinsicInst.h" 23 #include "llvm/IR/Operator.h" 24 #include "llvm/IR/PatternMatch.h" 25 #include <optional> 26 #include <utility> 27 28 namespace llvm { 29 30 class Function; 31 32 /// Base class for use as a mix-in that aids implementing 33 /// a TargetTransformInfo-compatible class. 34 class TargetTransformInfoImplBase { 35 36 protected: 37 typedef TargetTransformInfo TTI; 38 39 const DataLayout &DL; 40 TargetTransformInfoImplBase(const DataLayout & DL)41 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 42 43 public: 44 // Provide value semantics. MSVC requires that we spell all of these out. 45 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default; TargetTransformInfoImplBase(TargetTransformInfoImplBase && Arg)46 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 47 getDataLayout()48 const DataLayout &getDataLayout() const { return DL; } 49 getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands,Type * AccessType,TTI::TargetCostKind CostKind)50 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 51 ArrayRef<const Value *> Operands, Type *AccessType, 52 TTI::TargetCostKind CostKind) const { 53 // In the basic model, we just assume that all-constant GEPs will be folded 54 // into their uses via addressing modes. 55 for (const Value *Operand : Operands) 56 if (!isa<Constant>(Operand)) 57 return TTI::TCC_Basic; 58 59 return TTI::TCC_Free; 60 } 61 getEstimatedNumberOfCaseClusters(const SwitchInst & SI,unsigned & JTSize,ProfileSummaryInfo * PSI,BlockFrequencyInfo * BFI)62 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 63 unsigned &JTSize, 64 ProfileSummaryInfo *PSI, 65 BlockFrequencyInfo *BFI) const { 66 (void)PSI; 67 (void)BFI; 68 JTSize = 0; 69 return SI.getNumCases(); 70 } 71 getInliningThresholdMultiplier()72 unsigned getInliningThresholdMultiplier() const { return 1; } getInliningCostBenefitAnalysisSavingsMultiplier()73 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const { return 8; } getInliningCostBenefitAnalysisProfitableMultiplier()74 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const { 75 return 8; 76 } adjustInliningThreshold(const CallBase * CB)77 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; } getCallerAllocaCost(const CallBase * CB,const AllocaInst * AI)78 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const { 79 return 0; 80 }; 81 getInlinerVectorBonusPercent()82 int getInlinerVectorBonusPercent() const { return 150; } 83 getMemcpyCost(const Instruction * I)84 InstructionCost getMemcpyCost(const Instruction *I) const { 85 return TTI::TCC_Expensive; 86 } 87 getMaxMemIntrinsicInlineSizeThreshold()88 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { 89 return 64; 90 } 91 92 // Although this default value is arbitrary, it is not random. It is assumed 93 // that a condition that evaluates the same way by a higher percentage than 94 // this is best represented as control flow. Therefore, the default value N 95 // should be set such that the win from N% correct executions is greater than 96 // the loss from (100 - N)% mispredicted executions for the majority of 97 // intended targets. getPredictableBranchThreshold()98 BranchProbability getPredictableBranchThreshold() const { 99 return BranchProbability(99, 100); 100 } 101 getBranchMispredictPenalty()102 InstructionCost getBranchMispredictPenalty() const { return 0; } 103 104 bool hasBranchDivergence(const Function *F = nullptr) const { return false; } 105 isSourceOfDivergence(const Value * V)106 bool isSourceOfDivergence(const Value *V) const { return false; } 107 isAlwaysUniform(const Value * V)108 bool isAlwaysUniform(const Value *V) const { return false; } 109 isValidAddrSpaceCast(unsigned FromAS,unsigned ToAS)110 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { 111 return false; 112 } 113 addrspacesMayAlias(unsigned AS0,unsigned AS1)114 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const { 115 return true; 116 } 117 getFlatAddressSpace()118 unsigned getFlatAddressSpace() const { return -1; } 119 collectFlatAddressOperands(SmallVectorImpl<int> & OpIndexes,Intrinsic::ID IID)120 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 121 Intrinsic::ID IID) const { 122 return false; 123 } 124 isNoopAddrSpaceCast(unsigned,unsigned)125 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS)126 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { 127 return AS == 0; 128 }; 129 getAssumedAddrSpace(const Value * V)130 unsigned getAssumedAddrSpace(const Value *V) const { return -1; } 131 isSingleThreaded()132 bool isSingleThreaded() const { return false; } 133 134 std::pair<const Value *, unsigned> getPredicatedAddrSpace(const Value * V)135 getPredicatedAddrSpace(const Value *V) const { 136 return std::make_pair(nullptr, -1); 137 } 138 rewriteIntrinsicWithAddressSpace(IntrinsicInst * II,Value * OldV,Value * NewV)139 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 140 Value *NewV) const { 141 return nullptr; 142 } 143 isLoweredToCall(const Function * F)144 bool isLoweredToCall(const Function *F) const { 145 assert(F && "A concrete function must be provided to this routine."); 146 147 // FIXME: These should almost certainly not be handled here, and instead 148 // handled with the help of TLI or the target itself. This was largely 149 // ported from existing analysis heuristics here so that such refactorings 150 // can take place in the future. 151 152 if (F->isIntrinsic()) 153 return false; 154 155 if (F->hasLocalLinkage() || !F->hasName()) 156 return true; 157 158 StringRef Name = F->getName(); 159 160 // These will all likely lower to a single selection DAG node. 161 // clang-format off 162 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 163 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || 164 Name == "fmin" || Name == "fminf" || Name == "fminl" || 165 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 166 Name == "sin" || Name == "sinf" || Name == "sinl" || 167 Name == "cos" || Name == "cosf" || Name == "cosl" || 168 Name == "tan" || Name == "tanf" || Name == "tanl" || 169 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") 170 return false; 171 // clang-format on 172 // These are all likely to be optimized into something smaller. 173 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 174 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 175 Name == "floorf" || Name == "ceil" || Name == "round" || 176 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 177 Name == "llabs") 178 return false; 179 180 return true; 181 } 182 isHardwareLoopProfitable(Loop * L,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * LibInfo,HardwareLoopInfo & HWLoopInfo)183 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 184 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 185 HardwareLoopInfo &HWLoopInfo) const { 186 return false; 187 } 188 preferPredicateOverEpilogue(TailFoldingInfo * TFI)189 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; } 190 191 TailFoldingStyle 192 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const { 193 return TailFoldingStyle::DataWithoutLaneMask; 194 } 195 instCombineIntrinsic(InstCombiner & IC,IntrinsicInst & II)196 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 197 IntrinsicInst &II) const { 198 return std::nullopt; 199 } 200 201 std::optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedMask,KnownBits & Known,bool & KnownBitsComputed)202 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 203 APInt DemandedMask, KnownBits &Known, 204 bool &KnownBitsComputed) const { 205 return std::nullopt; 206 } 207 simplifyDemandedVectorEltsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedElts,APInt & UndefElts,APInt & UndefElts2,APInt & UndefElts3,std::function<void (Instruction *,unsigned,APInt,APInt &)> SimplifyAndSetOp)208 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 209 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 210 APInt &UndefElts2, APInt &UndefElts3, 211 std::function<void(Instruction *, unsigned, APInt, APInt &)> 212 SimplifyAndSetOp) const { 213 return std::nullopt; 214 } 215 getUnrollingPreferences(Loop *,ScalarEvolution &,TTI::UnrollingPreferences &,OptimizationRemarkEmitter *)216 void getUnrollingPreferences(Loop *, ScalarEvolution &, 217 TTI::UnrollingPreferences &, 218 OptimizationRemarkEmitter *) const {} 219 getPeelingPreferences(Loop *,ScalarEvolution &,TTI::PeelingPreferences &)220 void getPeelingPreferences(Loop *, ScalarEvolution &, 221 TTI::PeelingPreferences &) const {} 222 isLegalAddImmediate(int64_t Imm)223 bool isLegalAddImmediate(int64_t Imm) const { return false; } 224 isLegalAddScalableImmediate(int64_t Imm)225 bool isLegalAddScalableImmediate(int64_t Imm) const { return false; } 226 isLegalICmpImmediate(int64_t Imm)227 bool isLegalICmpImmediate(int64_t Imm) const { return false; } 228 229 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 230 bool HasBaseReg, int64_t Scale, unsigned AddrSpace, 231 Instruction *I = nullptr, 232 int64_t ScalableOffset = 0) const { 233 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 234 // taken from the implementation of LSR. 235 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 236 } 237 isLSRCostLess(const TTI::LSRCost & C1,const TTI::LSRCost & C2)238 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const { 239 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 240 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 241 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 242 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 243 } 244 isNumRegsMajorCostOfLSR()245 bool isNumRegsMajorCostOfLSR() const { return true; } 246 shouldFoldTerminatingConditionAfterLSR()247 bool shouldFoldTerminatingConditionAfterLSR() const { return false; } 248 shouldDropLSRSolutionIfLessProfitable()249 bool shouldDropLSRSolutionIfLessProfitable() const { return false; } 250 isProfitableLSRChainElement(Instruction * I)251 bool isProfitableLSRChainElement(Instruction *I) const { return false; } 252 canMacroFuseCmp()253 bool canMacroFuseCmp() const { return false; } 254 canSaveCmp(Loop * L,BranchInst ** BI,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * LibInfo)255 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 256 DominatorTree *DT, AssumptionCache *AC, 257 TargetLibraryInfo *LibInfo) const { 258 return false; 259 } 260 261 TTI::AddressingModeKind getPreferredAddressingMode(const Loop * L,ScalarEvolution * SE)262 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { 263 return TTI::AMK_None; 264 } 265 isLegalMaskedStore(Type * DataType,Align Alignment)266 bool isLegalMaskedStore(Type *DataType, Align Alignment) const { 267 return false; 268 } 269 isLegalMaskedLoad(Type * DataType,Align Alignment)270 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const { 271 return false; 272 } 273 isLegalNTStore(Type * DataType,Align Alignment)274 bool isLegalNTStore(Type *DataType, Align Alignment) const { 275 // By default, assume nontemporal memory stores are available for stores 276 // that are aligned and have a size that is a power of 2. 277 unsigned DataSize = DL.getTypeStoreSize(DataType); 278 return Alignment >= DataSize && isPowerOf2_32(DataSize); 279 } 280 isLegalNTLoad(Type * DataType,Align Alignment)281 bool isLegalNTLoad(Type *DataType, Align Alignment) const { 282 // By default, assume nontemporal memory loads are available for loads that 283 // are aligned and have a size that is a power of 2. 284 unsigned DataSize = DL.getTypeStoreSize(DataType); 285 return Alignment >= DataSize && isPowerOf2_32(DataSize); 286 } 287 isLegalBroadcastLoad(Type * ElementTy,ElementCount NumElements)288 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { 289 return false; 290 } 291 isLegalMaskedScatter(Type * DataType,Align Alignment)292 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 293 return false; 294 } 295 isLegalMaskedGather(Type * DataType,Align Alignment)296 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 297 return false; 298 } 299 forceScalarizeMaskedGather(VectorType * DataType,Align Alignment)300 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const { 301 return false; 302 } 303 forceScalarizeMaskedScatter(VectorType * DataType,Align Alignment)304 bool forceScalarizeMaskedScatter(VectorType *DataType, 305 Align Alignment) const { 306 return false; 307 } 308 isLegalMaskedCompressStore(Type * DataType,Align Alignment)309 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const { 310 return false; 311 } 312 isLegalAltInstr(VectorType * VecTy,unsigned Opcode0,unsigned Opcode1,const SmallBitVector & OpcodeMask)313 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, 314 const SmallBitVector &OpcodeMask) const { 315 return false; 316 } 317 isLegalMaskedExpandLoad(Type * DataType,Align Alignment)318 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const { 319 return false; 320 } 321 isLegalStridedLoadStore(Type * DataType,Align Alignment)322 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const { 323 return false; 324 } 325 isLegalMaskedVectorHistogram(Type * AddrType,Type * DataType)326 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const { 327 return false; 328 } 329 enableOrderedReductions()330 bool enableOrderedReductions() const { return false; } 331 hasDivRemOp(Type * DataType,bool IsSigned)332 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } 333 hasVolatileVariant(Instruction * I,unsigned AddrSpace)334 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { 335 return false; 336 } 337 prefersVectorizedAddressing()338 bool prefersVectorizedAddressing() const { return true; } 339 getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,StackOffset BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)340 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 341 StackOffset BaseOffset, bool HasBaseReg, 342 int64_t Scale, 343 unsigned AddrSpace) const { 344 // Guess that all legal addressing mode are free. 345 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset.getFixed(), HasBaseReg, 346 Scale, AddrSpace, /*I=*/nullptr, 347 BaseOffset.getScalable())) 348 return 0; 349 return -1; 350 } 351 LSRWithInstrQueries()352 bool LSRWithInstrQueries() const { return false; } 353 isTruncateFree(Type * Ty1,Type * Ty2)354 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } 355 isProfitableToHoist(Instruction * I)356 bool isProfitableToHoist(Instruction *I) const { return true; } 357 useAA()358 bool useAA() const { return false; } 359 isTypeLegal(Type * Ty)360 bool isTypeLegal(Type *Ty) const { return false; } 361 getRegUsageForType(Type * Ty)362 unsigned getRegUsageForType(Type *Ty) const { return 1; } 363 shouldBuildLookupTables()364 bool shouldBuildLookupTables() const { return true; } 365 shouldBuildLookupTablesForConstant(Constant * C)366 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } 367 shouldBuildRelLookupTables()368 bool shouldBuildRelLookupTables() const { return false; } 369 useColdCCForColdCall(Function & F)370 bool useColdCCForColdCall(Function &F) const { return false; } 371 getScalarizationOverhead(VectorType * Ty,const APInt & DemandedElts,bool Insert,bool Extract,TTI::TargetCostKind CostKind)372 InstructionCost getScalarizationOverhead(VectorType *Ty, 373 const APInt &DemandedElts, 374 bool Insert, bool Extract, 375 TTI::TargetCostKind CostKind) const { 376 return 0; 377 } 378 379 InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value * > Args,ArrayRef<Type * > Tys,TTI::TargetCostKind CostKind)380 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 381 ArrayRef<Type *> Tys, 382 TTI::TargetCostKind CostKind) const { 383 return 0; 384 } 385 supportsEfficientVectorElementLoadStore()386 bool supportsEfficientVectorElementLoadStore() const { return false; } 387 supportsTailCalls()388 bool supportsTailCalls() const { return true; } 389 enableAggressiveInterleaving(bool LoopHasReductions)390 bool enableAggressiveInterleaving(bool LoopHasReductions) const { 391 return false; 392 } 393 enableMemCmpExpansion(bool OptSize,bool IsZeroCmp)394 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 395 bool IsZeroCmp) const { 396 return {}; 397 } 398 enableSelectOptimize()399 bool enableSelectOptimize() const { return true; } 400 shouldTreatInstructionLikeSelect(const Instruction * I)401 bool shouldTreatInstructionLikeSelect(const Instruction *I) { 402 // If the select is a logical-and/logical-or then it is better treated as a 403 // and/or by the backend. 404 using namespace llvm::PatternMatch; 405 return isa<SelectInst>(I) && 406 !match(I, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()), 407 m_LogicalOr(m_Value(), m_Value()))); 408 } 409 enableInterleavedAccessVectorization()410 bool enableInterleavedAccessVectorization() const { return false; } 411 enableMaskedInterleavedAccessVectorization()412 bool enableMaskedInterleavedAccessVectorization() const { return false; } 413 isFPVectorizationPotentiallyUnsafe()414 bool isFPVectorizationPotentiallyUnsafe() const { return false; } 415 allowsMisalignedMemoryAccesses(LLVMContext & Context,unsigned BitWidth,unsigned AddressSpace,Align Alignment,unsigned * Fast)416 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 417 unsigned AddressSpace, Align Alignment, 418 unsigned *Fast) const { 419 return false; 420 } 421 getPopcntSupport(unsigned IntTyWidthInBit)422 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { 423 return TTI::PSK_Software; 424 } 425 haveFastSqrt(Type * Ty)426 bool haveFastSqrt(Type *Ty) const { return false; } 427 isExpensiveToSpeculativelyExecute(const Instruction * I)428 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; } 429 isFCmpOrdCheaperThanFCmpZero(Type * Ty)430 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } 431 getFPOpCost(Type * Ty)432 InstructionCost getFPOpCost(Type *Ty) const { 433 return TargetTransformInfo::TCC_Basic; 434 } 435 getIntImmCodeSizeCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)436 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, 437 const APInt &Imm, Type *Ty) const { 438 return 0; 439 } 440 getIntImmCost(const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)441 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 442 TTI::TargetCostKind CostKind) const { 443 return TTI::TCC_Basic; 444 } 445 446 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 447 const APInt &Imm, Type *Ty, 448 TTI::TargetCostKind CostKind, 449 Instruction *Inst = nullptr) const { 450 return TTI::TCC_Free; 451 } 452 getIntImmCostIntrin(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)453 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 454 const APInt &Imm, Type *Ty, 455 TTI::TargetCostKind CostKind) const { 456 return TTI::TCC_Free; 457 } 458 preferToKeepConstantsAttached(const Instruction & Inst,const Function & Fn)459 bool preferToKeepConstantsAttached(const Instruction &Inst, 460 const Function &Fn) const { 461 return false; 462 } 463 getNumberOfRegisters(unsigned ClassID)464 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } hasConditionalLoadStoreForType(Type * Ty)465 bool hasConditionalLoadStoreForType(Type *Ty) const { return false; } 466 467 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { 468 return Vector ? 1 : 0; 469 }; 470 getRegisterClassName(unsigned ClassID)471 const char *getRegisterClassName(unsigned ClassID) const { 472 switch (ClassID) { 473 default: 474 return "Generic::Unknown Register Class"; 475 case 0: 476 return "Generic::ScalarRC"; 477 case 1: 478 return "Generic::VectorRC"; 479 } 480 } 481 getRegisterBitWidth(TargetTransformInfo::RegisterKind K)482 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 483 return TypeSize::getFixed(32); 484 } 485 getMinVectorRegisterBitWidth()486 unsigned getMinVectorRegisterBitWidth() const { return 128; } 487 getMaxVScale()488 std::optional<unsigned> getMaxVScale() const { return std::nullopt; } getVScaleForTuning()489 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; } isVScaleKnownToBeAPowerOfTwo()490 bool isVScaleKnownToBeAPowerOfTwo() const { return false; } 491 492 bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K)493 shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { 494 return false; 495 } 496 getMinimumVF(unsigned ElemWidth,bool IsScalable)497 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { 498 return ElementCount::get(0, IsScalable); 499 } 500 getMaximumVF(unsigned ElemWidth,unsigned Opcode)501 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } getStoreMinimumVF(unsigned VF,Type *,Type *)502 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; } 503 shouldConsiderAddressTypePromotion(const Instruction & I,bool & AllowPromotionWithoutCommonHeader)504 bool shouldConsiderAddressTypePromotion( 505 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { 506 AllowPromotionWithoutCommonHeader = false; 507 return false; 508 } 509 getCacheLineSize()510 unsigned getCacheLineSize() const { return 0; } 511 std::optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level)512 getCacheSize(TargetTransformInfo::CacheLevel Level) const { 513 switch (Level) { 514 case TargetTransformInfo::CacheLevel::L1D: 515 [[fallthrough]]; 516 case TargetTransformInfo::CacheLevel::L2D: 517 return std::nullopt; 518 } 519 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 520 } 521 522 std::optional<unsigned> getCacheAssociativity(TargetTransformInfo::CacheLevel Level)523 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { 524 switch (Level) { 525 case TargetTransformInfo::CacheLevel::L1D: 526 [[fallthrough]]; 527 case TargetTransformInfo::CacheLevel::L2D: 528 return std::nullopt; 529 } 530 531 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 532 } 533 getMinPageSize()534 std::optional<unsigned> getMinPageSize() const { return {}; } 535 getPrefetchDistance()536 unsigned getPrefetchDistance() const { return 0; } getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)537 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 538 unsigned NumStridedMemAccesses, 539 unsigned NumPrefetches, bool HasCall) const { 540 return 1; 541 } getMaxPrefetchIterationsAhead()542 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } enableWritePrefetching()543 bool enableWritePrefetching() const { return false; } shouldPrefetchAddressSpace(unsigned AS)544 bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; } 545 getMaxInterleaveFactor(ElementCount VF)546 unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; } 547 548 InstructionCost getArithmeticInstrCost( 549 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 550 TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, 551 ArrayRef<const Value *> Args, 552 const Instruction *CxtI = nullptr) const { 553 // Widenable conditions will eventually lower into constants, so some 554 // operations with them will be trivially optimized away. 555 auto IsWidenableCondition = [](const Value *V) { 556 if (auto *II = dyn_cast<IntrinsicInst>(V)) 557 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition) 558 return true; 559 return false; 560 }; 561 // FIXME: A number of transformation tests seem to require these values 562 // which seems a little odd for how arbitary there are. 563 switch (Opcode) { 564 default: 565 break; 566 case Instruction::FDiv: 567 case Instruction::FRem: 568 case Instruction::SDiv: 569 case Instruction::SRem: 570 case Instruction::UDiv: 571 case Instruction::URem: 572 // FIXME: Unlikely to be true for CodeSize. 573 return TTI::TCC_Expensive; 574 case Instruction::And: 575 case Instruction::Or: 576 if (any_of(Args, IsWidenableCondition)) 577 return TTI::TCC_Free; 578 break; 579 } 580 581 // Assume a 3cy latency for fp arithmetic ops. 582 if (CostKind == TTI::TCK_Latency) 583 if (Ty->getScalarType()->isFloatingPointTy()) 584 return 3; 585 586 return 1; 587 } 588 getAltInstrCost(VectorType * VecTy,unsigned Opcode0,unsigned Opcode1,const SmallBitVector & OpcodeMask,TTI::TargetCostKind CostKind)589 InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, 590 unsigned Opcode1, 591 const SmallBitVector &OpcodeMask, 592 TTI::TargetCostKind CostKind) const { 593 return InstructionCost::getInvalid(); 594 } 595 596 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, 597 ArrayRef<int> Mask, 598 TTI::TargetCostKind CostKind, int Index, 599 VectorType *SubTp, 600 ArrayRef<const Value *> Args = std::nullopt, 601 const Instruction *CxtI = nullptr) const { 602 return 1; 603 } 604 getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,TTI::CastContextHint CCH,TTI::TargetCostKind CostKind,const Instruction * I)605 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 606 TTI::CastContextHint CCH, 607 TTI::TargetCostKind CostKind, 608 const Instruction *I) const { 609 switch (Opcode) { 610 default: 611 break; 612 case Instruction::IntToPtr: { 613 unsigned SrcSize = Src->getScalarSizeInBits(); 614 if (DL.isLegalInteger(SrcSize) && 615 SrcSize <= DL.getPointerTypeSizeInBits(Dst)) 616 return 0; 617 break; 618 } 619 case Instruction::PtrToInt: { 620 unsigned DstSize = Dst->getScalarSizeInBits(); 621 if (DL.isLegalInteger(DstSize) && 622 DstSize >= DL.getPointerTypeSizeInBits(Src)) 623 return 0; 624 break; 625 } 626 case Instruction::BitCast: 627 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy())) 628 // Identity and pointer-to-pointer casts are free. 629 return 0; 630 break; 631 case Instruction::Trunc: { 632 // trunc to a native type is free (assuming the target has compare and 633 // shift-right of the same width). 634 TypeSize DstSize = DL.getTypeSizeInBits(Dst); 635 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue())) 636 return 0; 637 break; 638 } 639 } 640 return 1; 641 } 642 getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)643 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 644 VectorType *VecTy, 645 unsigned Index) const { 646 return 1; 647 } 648 649 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 650 const Instruction *I = nullptr) const { 651 // A phi would be free, unless we're costing the throughput because it 652 // will require a register. 653 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) 654 return 0; 655 return 1; 656 } 657 getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,CmpInst::Predicate VecPred,TTI::TargetCostKind CostKind,const Instruction * I)658 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 659 CmpInst::Predicate VecPred, 660 TTI::TargetCostKind CostKind, 661 const Instruction *I) const { 662 return 1; 663 } 664 getVectorInstrCost(unsigned Opcode,Type * Val,TTI::TargetCostKind CostKind,unsigned Index,Value * Op0,Value * Op1)665 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 666 TTI::TargetCostKind CostKind, 667 unsigned Index, Value *Op0, 668 Value *Op1) const { 669 return 1; 670 } 671 getVectorInstrCost(const Instruction & I,Type * Val,TTI::TargetCostKind CostKind,unsigned Index)672 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, 673 TTI::TargetCostKind CostKind, 674 unsigned Index) const { 675 return 1; 676 } 677 getReplicationShuffleCost(Type * EltTy,int ReplicationFactor,int VF,const APInt & DemandedDstElts,TTI::TargetCostKind CostKind)678 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, 679 const APInt &DemandedDstElts, 680 TTI::TargetCostKind CostKind) { 681 return 1; 682 } 683 getMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,TTI::OperandValueInfo OpInfo,const Instruction * I)684 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 685 unsigned AddressSpace, 686 TTI::TargetCostKind CostKind, 687 TTI::OperandValueInfo OpInfo, 688 const Instruction *I) const { 689 return 1; 690 } 691 getVPMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,const Instruction * I)692 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 693 unsigned AddressSpace, 694 TTI::TargetCostKind CostKind, 695 const Instruction *I) const { 696 return 1; 697 } 698 getMaskedMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind)699 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 700 Align Alignment, unsigned AddressSpace, 701 TTI::TargetCostKind CostKind) const { 702 return 1; 703 } 704 705 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 706 const Value *Ptr, bool VariableMask, 707 Align Alignment, 708 TTI::TargetCostKind CostKind, 709 const Instruction *I = nullptr) const { 710 return 1; 711 } 712 713 InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, 714 const Value *Ptr, bool VariableMask, 715 Align Alignment, 716 TTI::TargetCostKind CostKind, 717 const Instruction *I = nullptr) const { 718 return InstructionCost::getInvalid(); 719 } 720 getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,bool UseMaskForCond,bool UseMaskForGaps)721 unsigned getInterleavedMemoryOpCost( 722 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 723 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 724 bool UseMaskForCond, bool UseMaskForGaps) const { 725 return 1; 726 } 727 getIntrinsicInstrCost(const IntrinsicCostAttributes & ICA,TTI::TargetCostKind CostKind)728 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 729 TTI::TargetCostKind CostKind) const { 730 switch (ICA.getID()) { 731 default: 732 break; 733 case Intrinsic::experimental_vector_histogram_add: 734 // For now, we want explicit support from the target for histograms. 735 return InstructionCost::getInvalid(); 736 case Intrinsic::allow_runtime_check: 737 case Intrinsic::allow_ubsan_check: 738 case Intrinsic::annotation: 739 case Intrinsic::assume: 740 case Intrinsic::sideeffect: 741 case Intrinsic::pseudoprobe: 742 case Intrinsic::arithmetic_fence: 743 case Intrinsic::dbg_assign: 744 case Intrinsic::dbg_declare: 745 case Intrinsic::dbg_value: 746 case Intrinsic::dbg_label: 747 case Intrinsic::invariant_start: 748 case Intrinsic::invariant_end: 749 case Intrinsic::launder_invariant_group: 750 case Intrinsic::strip_invariant_group: 751 case Intrinsic::is_constant: 752 case Intrinsic::lifetime_start: 753 case Intrinsic::lifetime_end: 754 case Intrinsic::experimental_noalias_scope_decl: 755 case Intrinsic::objectsize: 756 case Intrinsic::ptr_annotation: 757 case Intrinsic::var_annotation: 758 case Intrinsic::experimental_gc_result: 759 case Intrinsic::experimental_gc_relocate: 760 case Intrinsic::coro_alloc: 761 case Intrinsic::coro_begin: 762 case Intrinsic::coro_free: 763 case Intrinsic::coro_end: 764 case Intrinsic::coro_frame: 765 case Intrinsic::coro_size: 766 case Intrinsic::coro_align: 767 case Intrinsic::coro_suspend: 768 case Intrinsic::coro_subfn_addr: 769 case Intrinsic::threadlocal_address: 770 case Intrinsic::experimental_widenable_condition: 771 case Intrinsic::ssa_copy: 772 // These intrinsics don't actually represent code after lowering. 773 return 0; 774 } 775 return 1; 776 } 777 getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys,TTI::TargetCostKind CostKind)778 InstructionCost getCallInstrCost(Function *F, Type *RetTy, 779 ArrayRef<Type *> Tys, 780 TTI::TargetCostKind CostKind) const { 781 return 1; 782 } 783 784 // Assume that we have a register of the right size for the type. getNumberOfParts(Type * Tp)785 unsigned getNumberOfParts(Type *Tp) const { return 1; } 786 getAddressComputationCost(Type * Tp,ScalarEvolution *,const SCEV *)787 InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, 788 const SCEV *) const { 789 return 0; 790 } 791 getArithmeticReductionCost(unsigned,VectorType *,std::optional<FastMathFlags> FMF,TTI::TargetCostKind)792 InstructionCost getArithmeticReductionCost(unsigned, VectorType *, 793 std::optional<FastMathFlags> FMF, 794 TTI::TargetCostKind) const { 795 return 1; 796 } 797 getMinMaxReductionCost(Intrinsic::ID IID,VectorType *,FastMathFlags,TTI::TargetCostKind)798 InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, 799 FastMathFlags, 800 TTI::TargetCostKind) const { 801 return 1; 802 } 803 getExtendedReductionCost(unsigned Opcode,bool IsUnsigned,Type * ResTy,VectorType * Ty,FastMathFlags FMF,TTI::TargetCostKind CostKind)804 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, 805 Type *ResTy, VectorType *Ty, 806 FastMathFlags FMF, 807 TTI::TargetCostKind CostKind) const { 808 return 1; 809 } 810 getMulAccReductionCost(bool IsUnsigned,Type * ResTy,VectorType * Ty,TTI::TargetCostKind CostKind)811 InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, 812 VectorType *Ty, 813 TTI::TargetCostKind CostKind) const { 814 return 1; 815 } 816 getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)817 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { 818 return 0; 819 } 820 getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)821 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { 822 return false; 823 } 824 getAtomicMemIntrinsicMaxElementSize()825 unsigned getAtomicMemIntrinsicMaxElementSize() const { 826 // Note for overrides: You must ensure for all element unordered-atomic 827 // memory intrinsics that all power-of-2 element sizes up to, and 828 // including, the return value of this method have a corresponding 829 // runtime lib call. These runtime lib call definitions can be found 830 // in RuntimeLibcalls.h 831 return 0; 832 } 833 getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)834 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 835 Type *ExpectedType) const { 836 return nullptr; 837 } 838 839 Type * getMemcpyLoopLoweringType(LLVMContext & Context,Value * Length,unsigned SrcAddrSpace,unsigned DestAddrSpace,unsigned SrcAlign,unsigned DestAlign,std::optional<uint32_t> AtomicElementSize)840 getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 841 unsigned SrcAddrSpace, unsigned DestAddrSpace, 842 unsigned SrcAlign, unsigned DestAlign, 843 std::optional<uint32_t> AtomicElementSize) const { 844 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8) 845 : Type::getInt8Ty(Context); 846 } 847 getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type * > & OpsOut,LLVMContext & Context,unsigned RemainingBytes,unsigned SrcAddrSpace,unsigned DestAddrSpace,unsigned SrcAlign,unsigned DestAlign,std::optional<uint32_t> AtomicCpySize)848 void getMemcpyLoopResidualLoweringType( 849 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 850 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 851 unsigned SrcAlign, unsigned DestAlign, 852 std::optional<uint32_t> AtomicCpySize) const { 853 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1; 854 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8); 855 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes) 856 OpsOut.push_back(OpType); 857 } 858 areInlineCompatible(const Function * Caller,const Function * Callee)859 bool areInlineCompatible(const Function *Caller, 860 const Function *Callee) const { 861 return (Caller->getFnAttribute("target-cpu") == 862 Callee->getFnAttribute("target-cpu")) && 863 (Caller->getFnAttribute("target-features") == 864 Callee->getFnAttribute("target-features")); 865 } 866 getInlineCallPenalty(const Function * F,const CallBase & Call,unsigned DefaultCallPenalty)867 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, 868 unsigned DefaultCallPenalty) const { 869 return DefaultCallPenalty; 870 } 871 areTypesABICompatible(const Function * Caller,const Function * Callee,const ArrayRef<Type * > & Types)872 bool areTypesABICompatible(const Function *Caller, const Function *Callee, 873 const ArrayRef<Type *> &Types) const { 874 return (Caller->getFnAttribute("target-cpu") == 875 Callee->getFnAttribute("target-cpu")) && 876 (Caller->getFnAttribute("target-features") == 877 Callee->getFnAttribute("target-features")); 878 } 879 isIndexedLoadLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)880 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, 881 const DataLayout &DL) const { 882 return false; 883 } 884 isIndexedStoreLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)885 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, 886 const DataLayout &DL) const { 887 return false; 888 } 889 getLoadStoreVecRegBitWidth(unsigned AddrSpace)890 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 891 isLegalToVectorizeLoad(LoadInst * LI)892 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 893 isLegalToVectorizeStore(StoreInst * SI)894 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 895 isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace)896 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 897 unsigned AddrSpace) const { 898 return true; 899 } 900 isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace)901 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 902 unsigned AddrSpace) const { 903 return true; 904 } 905 isLegalToVectorizeReduction(const RecurrenceDescriptor & RdxDesc,ElementCount VF)906 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 907 ElementCount VF) const { 908 return true; 909 } 910 isElementTypeLegalForScalableVector(Type * Ty)911 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; } 912 getLoadVectorFactor(unsigned VF,unsigned LoadSize,unsigned ChainSizeInBytes,VectorType * VecTy)913 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 914 unsigned ChainSizeInBytes, 915 VectorType *VecTy) const { 916 return VF; 917 } 918 getStoreVectorFactor(unsigned VF,unsigned StoreSize,unsigned ChainSizeInBytes,VectorType * VecTy)919 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 920 unsigned ChainSizeInBytes, 921 VectorType *VecTy) const { 922 return VF; 923 } 924 preferFixedOverScalableIfEqualCost()925 bool preferFixedOverScalableIfEqualCost() const { return false; } 926 preferInLoopReduction(unsigned Opcode,Type * Ty,TTI::ReductionFlags Flags)927 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 928 TTI::ReductionFlags Flags) const { 929 return false; 930 } 931 preferPredicatedReductionSelect(unsigned Opcode,Type * Ty,TTI::ReductionFlags Flags)932 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 933 TTI::ReductionFlags Flags) const { 934 return false; 935 } 936 preferEpilogueVectorization()937 bool preferEpilogueVectorization() const { 938 return true; 939 } 940 shouldExpandReduction(const IntrinsicInst * II)941 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } 942 943 TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst * II)944 getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const { 945 return TTI::ReductionShuffle::SplitHalf; 946 } 947 getGISelRematGlobalCost()948 unsigned getGISelRematGlobalCost() const { return 1; } 949 getMinTripCountTailFoldingThreshold()950 unsigned getMinTripCountTailFoldingThreshold() const { return 0; } 951 supportsScalableVectors()952 bool supportsScalableVectors() const { return false; } 953 enableScalableVectorization()954 bool enableScalableVectorization() const { return false; } 955 hasActiveVectorLength(unsigned Opcode,Type * DataType,Align Alignment)956 bool hasActiveVectorLength(unsigned Opcode, Type *DataType, 957 Align Alignment) const { 958 return false; 959 } 960 961 TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic & PI)962 getVPLegalizationStrategy(const VPIntrinsic &PI) const { 963 return TargetTransformInfo::VPLegalization( 964 /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard, 965 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); 966 } 967 hasArmWideBranch(bool)968 bool hasArmWideBranch(bool) const { return false; } 969 getMaxNumArgs()970 unsigned getMaxNumArgs() const { return UINT_MAX; } 971 972 protected: 973 // Obtain the minimum required size to hold the value (without the sign) 974 // In case of a vector it returns the min required size for one element. minRequiredElementSize(const Value * Val,bool & isSigned)975 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { 976 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 977 const auto *VectorValue = cast<Constant>(Val); 978 979 // In case of a vector need to pick the max between the min 980 // required size for each element 981 auto *VT = cast<FixedVectorType>(Val->getType()); 982 983 // Assume unsigned elements 984 isSigned = false; 985 986 // The max required size is the size of the vector element type 987 unsigned MaxRequiredSize = 988 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue(); 989 990 unsigned MinRequiredSize = 0; 991 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 992 if (auto *IntElement = 993 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 994 bool signedElement = IntElement->getValue().isNegative(); 995 // Get the element min required size. 996 unsigned ElementMinRequiredSize = 997 IntElement->getValue().getSignificantBits() - 1; 998 // In case one element is signed then all the vector is signed. 999 isSigned |= signedElement; 1000 // Save the max required bit size between all the elements. 1001 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 1002 } else { 1003 // not an int constant element 1004 return MaxRequiredSize; 1005 } 1006 } 1007 return MinRequiredSize; 1008 } 1009 1010 if (const auto *CI = dyn_cast<ConstantInt>(Val)) { 1011 isSigned = CI->getValue().isNegative(); 1012 return CI->getValue().getSignificantBits() - 1; 1013 } 1014 1015 if (const auto *Cast = dyn_cast<SExtInst>(Val)) { 1016 isSigned = true; 1017 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 1018 } 1019 1020 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) { 1021 isSigned = false; 1022 return Cast->getSrcTy()->getScalarSizeInBits(); 1023 } 1024 1025 isSigned = false; 1026 return Val->getType()->getScalarSizeInBits(); 1027 } 1028 isStridedAccess(const SCEV * Ptr)1029 bool isStridedAccess(const SCEV *Ptr) const { 1030 return Ptr && isa<SCEVAddRecExpr>(Ptr); 1031 } 1032 getConstantStrideStep(ScalarEvolution * SE,const SCEV * Ptr)1033 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 1034 const SCEV *Ptr) const { 1035 if (!isStridedAccess(Ptr)) 1036 return nullptr; 1037 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 1038 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 1039 } 1040 isConstantStridedAccessLessThan(ScalarEvolution * SE,const SCEV * Ptr,int64_t MergeDistance)1041 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 1042 int64_t MergeDistance) const { 1043 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 1044 if (!Step) 1045 return false; 1046 APInt StrideVal = Step->getAPInt(); 1047 if (StrideVal.getBitWidth() > 64) 1048 return false; 1049 // FIXME: Need to take absolute value for negative stride case. 1050 return StrideVal.getSExtValue() < MergeDistance; 1051 } 1052 }; 1053 1054 /// CRTP base class for use as a mix-in that aids implementing 1055 /// a TargetTransformInfo-compatible class. 1056 template <typename T> 1057 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 1058 private: 1059 typedef TargetTransformInfoImplBase BaseT; 1060 1061 protected: TargetTransformInfoImplCRTPBase(const DataLayout & DL)1062 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 1063 1064 public: 1065 using BaseT::getGEPCost; 1066 getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands,Type * AccessType,TTI::TargetCostKind CostKind)1067 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 1068 ArrayRef<const Value *> Operands, Type *AccessType, 1069 TTI::TargetCostKind CostKind) { 1070 assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); 1071 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 1072 bool HasBaseReg = (BaseGV == nullptr); 1073 1074 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 1075 APInt BaseOffset(PtrSizeBits, 0); 1076 int64_t Scale = 0; 1077 1078 auto GTI = gep_type_begin(PointeeType, Operands); 1079 Type *TargetType = nullptr; 1080 1081 // Handle the case where the GEP instruction has a single operand, 1082 // the basis, therefore TargetType is a nullptr. 1083 if (Operands.empty()) 1084 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 1085 1086 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 1087 TargetType = GTI.getIndexedType(); 1088 // We assume that the cost of Scalar GEP with constant index and the 1089 // cost of Vector GEP with splat constant index are the same. 1090 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 1091 if (!ConstIdx) 1092 if (auto Splat = getSplatValue(*I)) 1093 ConstIdx = dyn_cast<ConstantInt>(Splat); 1094 if (StructType *STy = GTI.getStructTypeOrNull()) { 1095 // For structures the index is always splat or scalar constant 1096 assert(ConstIdx && "Unexpected GEP index"); 1097 uint64_t Field = ConstIdx->getZExtValue(); 1098 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 1099 } else { 1100 // If this operand is a scalable type, bail out early. 1101 // TODO: Make isLegalAddressingMode TypeSize aware. 1102 if (TargetType->isScalableTy()) 1103 return TTI::TCC_Basic; 1104 int64_t ElementSize = 1105 GTI.getSequentialElementStride(DL).getFixedValue(); 1106 if (ConstIdx) { 1107 BaseOffset += 1108 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 1109 } else { 1110 // Needs scale register. 1111 if (Scale != 0) 1112 // No addressing mode takes two scale registers. 1113 return TTI::TCC_Basic; 1114 Scale = ElementSize; 1115 } 1116 } 1117 } 1118 1119 // If we haven't been provided a hint, use the target type for now. 1120 // 1121 // TODO: Take a look at potentially removing this: This is *slightly* wrong 1122 // as it's possible to have a GEP with a foldable target type but a memory 1123 // access that isn't foldable. For example, this load isn't foldable on 1124 // RISC-V: 1125 // 1126 // %p = getelementptr i32, ptr %base, i32 42 1127 // %x = load <2 x i32>, ptr %p 1128 if (!AccessType) 1129 AccessType = TargetType; 1130 1131 // If the final address of the GEP is a legal addressing mode for the given 1132 // access type, then we can fold it into its users. 1133 if (static_cast<T *>(this)->isLegalAddressingMode( 1134 AccessType, const_cast<GlobalValue *>(BaseGV), 1135 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, 1136 Ptr->getType()->getPointerAddressSpace())) 1137 return TTI::TCC_Free; 1138 1139 // TODO: Instead of returning TCC_Basic here, we should use 1140 // getArithmeticInstrCost. Or better yet, provide a hook to let the target 1141 // model it. 1142 return TTI::TCC_Basic; 1143 } 1144 getPointersChainCost(ArrayRef<const Value * > Ptrs,const Value * Base,const TTI::PointersChainInfo & Info,Type * AccessTy,TTI::TargetCostKind CostKind)1145 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs, 1146 const Value *Base, 1147 const TTI::PointersChainInfo &Info, 1148 Type *AccessTy, 1149 TTI::TargetCostKind CostKind) { 1150 InstructionCost Cost = TTI::TCC_Free; 1151 // In the basic model we take into account GEP instructions only 1152 // (although here can come alloca instruction, a value, constants and/or 1153 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a 1154 // pointer). Typically, if Base is a not a GEP-instruction and all the 1155 // pointers are relative to the same base address, all the rest are 1156 // either GEP instructions, PHIs, bitcasts or constants. When we have same 1157 // base, we just calculate cost of each non-Base GEP as an ADD operation if 1158 // any their index is a non-const. 1159 // If no known dependecies between the pointers cost is calculated as a sum 1160 // of costs of GEP instructions. 1161 for (const Value *V : Ptrs) { 1162 const auto *GEP = dyn_cast<GetElementPtrInst>(V); 1163 if (!GEP) 1164 continue; 1165 if (Info.isSameBase() && V != Base) { 1166 if (GEP->hasAllConstantIndices()) 1167 continue; 1168 Cost += static_cast<T *>(this)->getArithmeticInstrCost( 1169 Instruction::Add, GEP->getType(), CostKind, 1170 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, 1171 std::nullopt); 1172 } else { 1173 SmallVector<const Value *> Indices(GEP->indices()); 1174 Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), 1175 GEP->getPointerOperand(), 1176 Indices, AccessTy, CostKind); 1177 } 1178 } 1179 return Cost; 1180 } 1181 getInstructionCost(const User * U,ArrayRef<const Value * > Operands,TTI::TargetCostKind CostKind)1182 InstructionCost getInstructionCost(const User *U, 1183 ArrayRef<const Value *> Operands, 1184 TTI::TargetCostKind CostKind) { 1185 using namespace llvm::PatternMatch; 1186 1187 auto *TargetTTI = static_cast<T *>(this); 1188 // Handle non-intrinsic calls, invokes, and callbr. 1189 // FIXME: Unlikely to be true for anything but CodeSize. 1190 auto *CB = dyn_cast<CallBase>(U); 1191 if (CB && !isa<IntrinsicInst>(U)) { 1192 if (const Function *F = CB->getCalledFunction()) { 1193 if (!TargetTTI->isLoweredToCall(F)) 1194 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 1195 1196 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1); 1197 } 1198 // For indirect or other calls, scale cost by number of arguments. 1199 return TTI::TCC_Basic * (CB->arg_size() + 1); 1200 } 1201 1202 Type *Ty = U->getType(); 1203 unsigned Opcode = Operator::getOpcode(U); 1204 auto *I = dyn_cast<Instruction>(U); 1205 switch (Opcode) { 1206 default: 1207 break; 1208 case Instruction::Call: { 1209 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call"); 1210 auto *Intrinsic = cast<IntrinsicInst>(U); 1211 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB); 1212 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); 1213 } 1214 case Instruction::Br: 1215 case Instruction::Ret: 1216 case Instruction::PHI: 1217 case Instruction::Switch: 1218 return TargetTTI->getCFInstrCost(Opcode, CostKind, I); 1219 case Instruction::ExtractValue: 1220 case Instruction::Freeze: 1221 return TTI::TCC_Free; 1222 case Instruction::Alloca: 1223 if (cast<AllocaInst>(U)->isStaticAlloca()) 1224 return TTI::TCC_Free; 1225 break; 1226 case Instruction::GetElementPtr: { 1227 const auto *GEP = cast<GEPOperator>(U); 1228 Type *AccessType = nullptr; 1229 // For now, only provide the AccessType in the simple case where the GEP 1230 // only has one user. 1231 if (GEP->hasOneUser() && I) 1232 AccessType = I->user_back()->getAccessType(); 1233 1234 return TargetTTI->getGEPCost(GEP->getSourceElementType(), 1235 Operands.front(), Operands.drop_front(), 1236 AccessType, CostKind); 1237 } 1238 case Instruction::Add: 1239 case Instruction::FAdd: 1240 case Instruction::Sub: 1241 case Instruction::FSub: 1242 case Instruction::Mul: 1243 case Instruction::FMul: 1244 case Instruction::UDiv: 1245 case Instruction::SDiv: 1246 case Instruction::FDiv: 1247 case Instruction::URem: 1248 case Instruction::SRem: 1249 case Instruction::FRem: 1250 case Instruction::Shl: 1251 case Instruction::LShr: 1252 case Instruction::AShr: 1253 case Instruction::And: 1254 case Instruction::Or: 1255 case Instruction::Xor: 1256 case Instruction::FNeg: { 1257 const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Operands[0]); 1258 TTI::OperandValueInfo Op2Info; 1259 if (Opcode != Instruction::FNeg) 1260 Op2Info = TTI::getOperandInfo(Operands[1]); 1261 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, 1262 Op2Info, Operands, I); 1263 } 1264 case Instruction::IntToPtr: 1265 case Instruction::PtrToInt: 1266 case Instruction::SIToFP: 1267 case Instruction::UIToFP: 1268 case Instruction::FPToUI: 1269 case Instruction::FPToSI: 1270 case Instruction::Trunc: 1271 case Instruction::FPTrunc: 1272 case Instruction::BitCast: 1273 case Instruction::FPExt: 1274 case Instruction::SExt: 1275 case Instruction::ZExt: 1276 case Instruction::AddrSpaceCast: { 1277 Type *OpTy = Operands[0]->getType(); 1278 return TargetTTI->getCastInstrCost( 1279 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); 1280 } 1281 case Instruction::Store: { 1282 auto *SI = cast<StoreInst>(U); 1283 Type *ValTy = Operands[0]->getType(); 1284 TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(Operands[0]); 1285 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(), 1286 SI->getPointerAddressSpace(), CostKind, 1287 OpInfo, I); 1288 } 1289 case Instruction::Load: { 1290 // FIXME: Arbitary cost which could come from the backend. 1291 if (CostKind == TTI::TCK_Latency) 1292 return 4; 1293 auto *LI = cast<LoadInst>(U); 1294 Type *LoadType = U->getType(); 1295 // If there is a non-register sized type, the cost estimation may expand 1296 // it to be several instructions to load into multiple registers on the 1297 // target. But, if the only use of the load is a trunc instruction to a 1298 // register sized type, the instruction selector can combine these 1299 // instructions to be a single load. So, in this case, we use the 1300 // destination type of the trunc instruction rather than the load to 1301 // accurately estimate the cost of this load instruction. 1302 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() && 1303 !LoadType->isVectorTy()) { 1304 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin())) 1305 LoadType = TI->getDestTy(); 1306 } 1307 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(), 1308 LI->getPointerAddressSpace(), CostKind, 1309 {TTI::OK_AnyValue, TTI::OP_None}, I); 1310 } 1311 case Instruction::Select: { 1312 const Value *Op0, *Op1; 1313 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) || 1314 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { 1315 // select x, y, false --> x & y 1316 // select x, true, y --> x | y 1317 const auto Op1Info = TTI::getOperandInfo(Op0); 1318 const auto Op2Info = TTI::getOperandInfo(Op1); 1319 assert(Op0->getType()->getScalarSizeInBits() == 1 && 1320 Op1->getType()->getScalarSizeInBits() == 1); 1321 1322 SmallVector<const Value *, 2> Operands{Op0, Op1}; 1323 return TargetTTI->getArithmeticInstrCost( 1324 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, 1325 CostKind, Op1Info, Op2Info, Operands, I); 1326 } 1327 Type *CondTy = Operands[0]->getType(); 1328 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, 1329 CmpInst::BAD_ICMP_PREDICATE, 1330 CostKind, I); 1331 } 1332 case Instruction::ICmp: 1333 case Instruction::FCmp: { 1334 Type *ValTy = Operands[0]->getType(); 1335 // TODO: Also handle ICmp/FCmp constant expressions. 1336 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), 1337 I ? cast<CmpInst>(I)->getPredicate() 1338 : CmpInst::BAD_ICMP_PREDICATE, 1339 CostKind, I); 1340 } 1341 case Instruction::InsertElement: { 1342 auto *IE = dyn_cast<InsertElementInst>(U); 1343 if (!IE) 1344 return TTI::TCC_Basic; // FIXME 1345 unsigned Idx = -1; 1346 if (auto *CI = dyn_cast<ConstantInt>(Operands[2])) 1347 if (CI->getValue().getActiveBits() <= 32) 1348 Idx = CI->getZExtValue(); 1349 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx); 1350 } 1351 case Instruction::ShuffleVector: { 1352 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U); 1353 if (!Shuffle) 1354 return TTI::TCC_Basic; // FIXME 1355 1356 auto *VecTy = cast<VectorType>(U->getType()); 1357 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType()); 1358 ArrayRef<int> Mask = Shuffle->getShuffleMask(); 1359 int NumSubElts, SubIndex; 1360 1361 // TODO: move more of this inside improveShuffleKindFromMask. 1362 if (Shuffle->changesLength()) { 1363 // Treat a 'subvector widening' as a free shuffle. 1364 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) 1365 return 0; 1366 1367 if (Shuffle->isExtractSubvectorMask(SubIndex)) 1368 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, 1369 Mask, CostKind, SubIndex, VecTy, 1370 Operands, Shuffle); 1371 1372 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1373 return TargetTTI->getShuffleCost( 1374 TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex, 1375 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), 1376 Operands, Shuffle); 1377 1378 int ReplicationFactor, VF; 1379 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { 1380 APInt DemandedDstElts = APInt::getZero(Mask.size()); 1381 for (auto I : enumerate(Mask)) { 1382 if (I.value() != PoisonMaskElem) 1383 DemandedDstElts.setBit(I.index()); 1384 } 1385 return TargetTTI->getReplicationShuffleCost( 1386 VecSrcTy->getElementType(), ReplicationFactor, VF, 1387 DemandedDstElts, CostKind); 1388 } 1389 1390 bool IsUnary = isa<UndefValue>(Operands[1]); 1391 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue(); 1392 SmallVector<int, 16> AdjustMask(Mask.begin(), Mask.end()); 1393 1394 // Widening shuffle - widening the source(s) to the new length 1395 // (treated as free - see above), and then perform the adjusted 1396 // shuffle at that width. 1397 if (Shuffle->increasesLength()) { 1398 for (int &M : AdjustMask) 1399 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M; 1400 1401 return TargetTTI->getShuffleCost( 1402 IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy, 1403 AdjustMask, CostKind, 0, nullptr, Operands, Shuffle); 1404 } 1405 1406 // Narrowing shuffle - perform shuffle at original wider width and 1407 // then extract the lower elements. 1408 AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem); 1409 1410 InstructionCost ShuffleCost = TargetTTI->getShuffleCost( 1411 IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, 1412 VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle); 1413 1414 SmallVector<int, 16> ExtractMask(Mask.size()); 1415 std::iota(ExtractMask.begin(), ExtractMask.end(), 0); 1416 return ShuffleCost + TargetTTI->getShuffleCost( 1417 TTI::SK_ExtractSubvector, VecSrcTy, 1418 ExtractMask, CostKind, 0, VecTy, {}, Shuffle); 1419 } 1420 1421 if (Shuffle->isIdentity()) 1422 return 0; 1423 1424 if (Shuffle->isReverse()) 1425 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind, 1426 0, nullptr, Operands, Shuffle); 1427 1428 if (Shuffle->isSelect()) 1429 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind, 1430 0, nullptr, Operands, Shuffle); 1431 1432 if (Shuffle->isTranspose()) 1433 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask, 1434 CostKind, 0, nullptr, Operands, 1435 Shuffle); 1436 1437 if (Shuffle->isZeroEltSplat()) 1438 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask, 1439 CostKind, 0, nullptr, Operands, 1440 Shuffle); 1441 1442 if (Shuffle->isSingleSource()) 1443 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask, 1444 CostKind, 0, nullptr, Operands, 1445 Shuffle); 1446 1447 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1448 return TargetTTI->getShuffleCost( 1449 TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex, 1450 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands, 1451 Shuffle); 1452 1453 if (Shuffle->isSplice(SubIndex)) 1454 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind, 1455 SubIndex, nullptr, Operands, Shuffle); 1456 1457 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask, 1458 CostKind, 0, nullptr, Operands, Shuffle); 1459 } 1460 case Instruction::ExtractElement: { 1461 auto *EEI = dyn_cast<ExtractElementInst>(U); 1462 if (!EEI) 1463 return TTI::TCC_Basic; // FIXME 1464 unsigned Idx = -1; 1465 if (auto *CI = dyn_cast<ConstantInt>(Operands[1])) 1466 if (CI->getValue().getActiveBits() <= 32) 1467 Idx = CI->getZExtValue(); 1468 Type *DstTy = Operands[0]->getType(); 1469 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx); 1470 } 1471 } 1472 1473 // By default, just classify everything as 'basic' or -1 to represent that 1474 // don't know the throughput cost. 1475 return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic; 1476 } 1477 isExpensiveToSpeculativelyExecute(const Instruction * I)1478 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { 1479 auto *TargetTTI = static_cast<T *>(this); 1480 SmallVector<const Value *, 4> Ops(I->operand_values()); 1481 InstructionCost Cost = TargetTTI->getInstructionCost( 1482 I, Ops, TargetTransformInfo::TCK_SizeAndLatency); 1483 return Cost >= TargetTransformInfo::TCC_Expensive; 1484 } 1485 supportsTailCallFor(const CallBase * CB)1486 bool supportsTailCallFor(const CallBase *CB) const { 1487 return static_cast<const T *>(this)->supportsTailCalls(); 1488 } 1489 }; 1490 } // namespace llvm 1491 1492 #endif 1493