xref: /freebsd/contrib/llvm-project/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file provides helpers for the implementation of
10 /// a TargetTransformInfo-conforming class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16 
17 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
18 #include "llvm/Analysis/TargetTransformInfo.h"
19 #include "llvm/Analysis/VectorUtils.h"
20 #include "llvm/IR/DataLayout.h"
21 #include "llvm/IR/GetElementPtrTypeIterator.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/Operator.h"
24 #include "llvm/IR/PatternMatch.h"
25 #include <optional>
26 #include <utility>
27 
28 namespace llvm {
29 
30 class Function;
31 
32 /// Base class for use as a mix-in that aids implementing
33 /// a TargetTransformInfo-compatible class.
34 class TargetTransformInfoImplBase {
35 
36 protected:
37   typedef TargetTransformInfo TTI;
38 
39   const DataLayout &DL;
40 
TargetTransformInfoImplBase(const DataLayout & DL)41   explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
42 
43 public:
44   virtual ~TargetTransformInfoImplBase();
45 
46   // Provide value semantics. MSVC requires that we spell all of these out.
47   TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default;
TargetTransformInfoImplBase(TargetTransformInfoImplBase && Arg)48   TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
49 
getDataLayout()50   virtual const DataLayout &getDataLayout() const { return DL; }
51 
52   // FIXME: It looks like this implementation is dead. All clients appear to
53   //  use the (non-const) version from `TargetTransformInfoImplCRTPBase`.
getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands,Type * AccessType,TTI::TargetCostKind CostKind)54   virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
55                                      ArrayRef<const Value *> Operands,
56                                      Type *AccessType,
57                                      TTI::TargetCostKind CostKind) const {
58     // In the basic model, we just assume that all-constant GEPs will be folded
59     // into their uses via addressing modes.
60     for (const Value *Operand : Operands)
61       if (!isa<Constant>(Operand))
62         return TTI::TCC_Basic;
63 
64     return TTI::TCC_Free;
65   }
66 
67   virtual InstructionCost
getPointersChainCost(ArrayRef<const Value * > Ptrs,const Value * Base,const TTI::PointersChainInfo & Info,Type * AccessTy,TTI::TargetCostKind CostKind)68   getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
69                        const TTI::PointersChainInfo &Info, Type *AccessTy,
70                        TTI::TargetCostKind CostKind) const {
71     llvm_unreachable("Not implemented");
72   }
73 
74   virtual unsigned
getEstimatedNumberOfCaseClusters(const SwitchInst & SI,unsigned & JTSize,ProfileSummaryInfo * PSI,BlockFrequencyInfo * BFI)75   getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
76                                    ProfileSummaryInfo *PSI,
77                                    BlockFrequencyInfo *BFI) const {
78     (void)PSI;
79     (void)BFI;
80     JTSize = 0;
81     return SI.getNumCases();
82   }
83 
84   virtual InstructionCost
getInstructionCost(const User * U,ArrayRef<const Value * > Operands,TTI::TargetCostKind CostKind)85   getInstructionCost(const User *U, ArrayRef<const Value *> Operands,
86                      TTI::TargetCostKind CostKind) const {
87     llvm_unreachable("Not implemented");
88   }
89 
getInliningThresholdMultiplier()90   virtual unsigned getInliningThresholdMultiplier() const { return 1; }
getInliningCostBenefitAnalysisSavingsMultiplier()91   virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const {
92     return 8;
93   }
getInliningCostBenefitAnalysisProfitableMultiplier()94   virtual unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
95     return 8;
96   }
getInliningLastCallToStaticBonus()97   virtual int getInliningLastCallToStaticBonus() const {
98     // This is the value of InlineConstants::LastCallToStaticBonus before it was
99     // removed along with the introduction of this function.
100     return 15000;
101   }
adjustInliningThreshold(const CallBase * CB)102   virtual unsigned adjustInliningThreshold(const CallBase *CB) const {
103     return 0;
104   }
getCallerAllocaCost(const CallBase * CB,const AllocaInst * AI)105   virtual unsigned getCallerAllocaCost(const CallBase *CB,
106                                        const AllocaInst *AI) const {
107     return 0;
108   };
109 
getInlinerVectorBonusPercent()110   virtual int getInlinerVectorBonusPercent() const { return 150; }
111 
getMemcpyCost(const Instruction * I)112   virtual InstructionCost getMemcpyCost(const Instruction *I) const {
113     return TTI::TCC_Expensive;
114   }
115 
getMaxMemIntrinsicInlineSizeThreshold()116   virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { return 64; }
117 
118   // Although this default value is arbitrary, it is not random. It is assumed
119   // that a condition that evaluates the same way by a higher percentage than
120   // this is best represented as control flow. Therefore, the default value N
121   // should be set such that the win from N% correct executions is greater than
122   // the loss from (100 - N)% mispredicted executions for the majority of
123   //  intended targets.
getPredictableBranchThreshold()124   virtual BranchProbability getPredictableBranchThreshold() const {
125     return BranchProbability(99, 100);
126   }
127 
getBranchMispredictPenalty()128   virtual InstructionCost getBranchMispredictPenalty() const { return 0; }
129 
130   virtual bool hasBranchDivergence(const Function *F = nullptr) const {
131     return false;
132   }
133 
isSourceOfDivergence(const Value * V)134   virtual bool isSourceOfDivergence(const Value *V) const { return false; }
135 
isAlwaysUniform(const Value * V)136   virtual bool isAlwaysUniform(const Value *V) const { return false; }
137 
isValidAddrSpaceCast(unsigned FromAS,unsigned ToAS)138   virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
139     return false;
140   }
141 
addrspacesMayAlias(unsigned AS0,unsigned AS1)142   virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
143     return true;
144   }
145 
getFlatAddressSpace()146   virtual unsigned getFlatAddressSpace() const { return -1; }
147 
collectFlatAddressOperands(SmallVectorImpl<int> & OpIndexes,Intrinsic::ID IID)148   virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
149                                           Intrinsic::ID IID) const {
150     return false;
151   }
152 
isNoopAddrSpaceCast(unsigned,unsigned)153   virtual bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
154   virtual bool
canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS)155   canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
156     return AS == 0;
157   };
158 
getAssumedAddrSpace(const Value * V)159   virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
160 
isSingleThreaded()161   virtual bool isSingleThreaded() const { return false; }
162 
163   virtual std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value * V)164   getPredicatedAddrSpace(const Value *V) const {
165     return std::make_pair(nullptr, -1);
166   }
167 
rewriteIntrinsicWithAddressSpace(IntrinsicInst * II,Value * OldV,Value * NewV)168   virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
169                                                   Value *OldV,
170                                                   Value *NewV) const {
171     return nullptr;
172   }
173 
isLoweredToCall(const Function * F)174   virtual bool isLoweredToCall(const Function *F) const {
175     assert(F && "A concrete function must be provided to this routine.");
176 
177     // FIXME: These should almost certainly not be handled here, and instead
178     // handled with the help of TLI or the target itself. This was largely
179     // ported from existing analysis heuristics here so that such refactorings
180     // can take place in the future.
181 
182     if (F->isIntrinsic())
183       return false;
184 
185     if (F->hasLocalLinkage() || !F->hasName())
186       return true;
187 
188     StringRef Name = F->getName();
189 
190     // These will all likely lower to a single selection DAG node.
191     // clang-format off
192     if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
193         Name == "fabs"  || Name == "fabsf"  || Name == "fabsl" ||
194         Name == "fmin"  || Name == "fminf"  || Name == "fminl" ||
195         Name == "fmax"  || Name == "fmaxf"  || Name == "fmaxl" ||
196         Name == "sin"   || Name == "sinf"   || Name == "sinl"  ||
197         Name == "cos"   || Name == "cosf"   || Name == "cosl"  ||
198         Name == "tan"   || Name == "tanf"   || Name == "tanl"  ||
199         Name == "asin"  || Name == "asinf"  || Name == "asinl" ||
200         Name == "acos"  || Name == "acosf"  || Name == "acosl" ||
201         Name == "atan"  || Name == "atanf"  || Name == "atanl" ||
202         Name == "atan2" || Name == "atan2f" || Name == "atan2l"||
203         Name == "sinh"  || Name == "sinhf"  || Name == "sinhl" ||
204         Name == "cosh"  || Name == "coshf"  || Name == "coshl" ||
205         Name == "tanh"  || Name == "tanhf"  || Name == "tanhl" ||
206         Name == "sqrt"  || Name == "sqrtf"  || Name == "sqrtl" ||
207         Name == "exp10"  || Name == "exp10l"  || Name == "exp10f")
208       return false;
209     // clang-format on
210     // These are all likely to be optimized into something smaller.
211     if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
212         Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
213         Name == "floorf" || Name == "ceil" || Name == "round" ||
214         Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
215         Name == "llabs")
216       return false;
217 
218     return true;
219   }
220 
isHardwareLoopProfitable(Loop * L,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * LibInfo,HardwareLoopInfo & HWLoopInfo)221   virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
222                                         AssumptionCache &AC,
223                                         TargetLibraryInfo *LibInfo,
224                                         HardwareLoopInfo &HWLoopInfo) const {
225     return false;
226   }
227 
getEpilogueVectorizationMinVF()228   virtual unsigned getEpilogueVectorizationMinVF() const { return 16; }
229 
preferPredicateOverEpilogue(TailFoldingInfo * TFI)230   virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const {
231     return false;
232   }
233 
234   virtual TailFoldingStyle
235   getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
236     return TailFoldingStyle::DataWithoutLaneMask;
237   }
238 
239   virtual std::optional<Instruction *>
instCombineIntrinsic(InstCombiner & IC,IntrinsicInst & II)240   instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
241     return std::nullopt;
242   }
243 
244   virtual std::optional<Value *>
simplifyDemandedUseBitsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedMask,KnownBits & Known,bool & KnownBitsComputed)245   simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
246                                    APInt DemandedMask, KnownBits &Known,
247                                    bool &KnownBitsComputed) const {
248     return std::nullopt;
249   }
250 
simplifyDemandedVectorEltsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedElts,APInt & UndefElts,APInt & UndefElts2,APInt & UndefElts3,std::function<void (Instruction *,unsigned,APInt,APInt &)> SimplifyAndSetOp)251   virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
252       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
253       APInt &UndefElts2, APInt &UndefElts3,
254       std::function<void(Instruction *, unsigned, APInt, APInt &)>
255           SimplifyAndSetOp) const {
256     return std::nullopt;
257   }
258 
getUnrollingPreferences(Loop *,ScalarEvolution &,TTI::UnrollingPreferences &,OptimizationRemarkEmitter *)259   virtual void getUnrollingPreferences(Loop *, ScalarEvolution &,
260                                        TTI::UnrollingPreferences &,
261                                        OptimizationRemarkEmitter *) const {}
262 
getPeelingPreferences(Loop *,ScalarEvolution &,TTI::PeelingPreferences &)263   virtual void getPeelingPreferences(Loop *, ScalarEvolution &,
264                                      TTI::PeelingPreferences &) const {}
265 
isLegalAddImmediate(int64_t Imm)266   virtual bool isLegalAddImmediate(int64_t Imm) const { return false; }
267 
isLegalAddScalableImmediate(int64_t Imm)268   virtual bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
269 
isLegalICmpImmediate(int64_t Imm)270   virtual bool isLegalICmpImmediate(int64_t Imm) const { return false; }
271 
272   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
273                                      int64_t BaseOffset, bool HasBaseReg,
274                                      int64_t Scale, unsigned AddrSpace,
275                                      Instruction *I = nullptr,
276                                      int64_t ScalableOffset = 0) const {
277     // Guess that only reg and reg+reg addressing is allowed. This heuristic is
278     // taken from the implementation of LSR.
279     return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
280   }
281 
isLSRCostLess(const TTI::LSRCost & C1,const TTI::LSRCost & C2)282   virtual bool isLSRCostLess(const TTI::LSRCost &C1,
283                              const TTI::LSRCost &C2) const {
284     return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
285                     C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
286            std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
287                     C2.ScaleCost, C2.ImmCost, C2.SetupCost);
288   }
289 
isNumRegsMajorCostOfLSR()290   virtual bool isNumRegsMajorCostOfLSR() const { return true; }
291 
shouldDropLSRSolutionIfLessProfitable()292   virtual bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
293 
isProfitableLSRChainElement(Instruction * I)294   virtual bool isProfitableLSRChainElement(Instruction *I) const {
295     return false;
296   }
297 
canMacroFuseCmp()298   virtual bool canMacroFuseCmp() const { return false; }
299 
canSaveCmp(Loop * L,BranchInst ** BI,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * LibInfo)300   virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
301                           LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
302                           TargetLibraryInfo *LibInfo) const {
303     return false;
304   }
305 
306   virtual TTI::AddressingModeKind
getPreferredAddressingMode(const Loop * L,ScalarEvolution * SE)307   getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const {
308     return TTI::AMK_None;
309   }
310 
isLegalMaskedStore(Type * DataType,Align Alignment,unsigned AddressSpace)311   virtual bool isLegalMaskedStore(Type *DataType, Align Alignment,
312                                   unsigned AddressSpace) const {
313     return false;
314   }
315 
isLegalMaskedLoad(Type * DataType,Align Alignment,unsigned AddressSpace)316   virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment,
317                                  unsigned AddressSpace) const {
318     return false;
319   }
320 
isLegalNTStore(Type * DataType,Align Alignment)321   virtual bool isLegalNTStore(Type *DataType, Align Alignment) const {
322     // By default, assume nontemporal memory stores are available for stores
323     // that are aligned and have a size that is a power of 2.
324     unsigned DataSize = DL.getTypeStoreSize(DataType);
325     return Alignment >= DataSize && isPowerOf2_32(DataSize);
326   }
327 
isLegalNTLoad(Type * DataType,Align Alignment)328   virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const {
329     // By default, assume nontemporal memory loads are available for loads that
330     // are aligned and have a size that is a power of 2.
331     unsigned DataSize = DL.getTypeStoreSize(DataType);
332     return Alignment >= DataSize && isPowerOf2_32(DataSize);
333   }
334 
isLegalBroadcastLoad(Type * ElementTy,ElementCount NumElements)335   virtual bool isLegalBroadcastLoad(Type *ElementTy,
336                                     ElementCount NumElements) const {
337     return false;
338   }
339 
isLegalMaskedScatter(Type * DataType,Align Alignment)340   virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
341     return false;
342   }
343 
isLegalMaskedGather(Type * DataType,Align Alignment)344   virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
345     return false;
346   }
347 
forceScalarizeMaskedGather(VectorType * DataType,Align Alignment)348   virtual bool forceScalarizeMaskedGather(VectorType *DataType,
349                                           Align Alignment) const {
350     return false;
351   }
352 
forceScalarizeMaskedScatter(VectorType * DataType,Align Alignment)353   virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
354                                            Align Alignment) const {
355     return false;
356   }
357 
isLegalMaskedCompressStore(Type * DataType,Align Alignment)358   virtual bool isLegalMaskedCompressStore(Type *DataType,
359                                           Align Alignment) const {
360     return false;
361   }
362 
isLegalAltInstr(VectorType * VecTy,unsigned Opcode0,unsigned Opcode1,const SmallBitVector & OpcodeMask)363   virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
364                                unsigned Opcode1,
365                                const SmallBitVector &OpcodeMask) const {
366     return false;
367   }
368 
isLegalMaskedExpandLoad(Type * DataType,Align Alignment)369   virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const {
370     return false;
371   }
372 
isLegalStridedLoadStore(Type * DataType,Align Alignment)373   virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const {
374     return false;
375   }
376 
isLegalInterleavedAccessType(VectorType * VTy,unsigned Factor,Align Alignment,unsigned AddrSpace)377   virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
378                                             Align Alignment,
379                                             unsigned AddrSpace) const {
380     return false;
381   }
382 
isLegalMaskedVectorHistogram(Type * AddrType,Type * DataType)383   virtual bool isLegalMaskedVectorHistogram(Type *AddrType,
384                                             Type *DataType) const {
385     return false;
386   }
387 
enableOrderedReductions()388   virtual bool enableOrderedReductions() const { return false; }
389 
hasDivRemOp(Type * DataType,bool IsSigned)390   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) const {
391     return false;
392   }
393 
hasVolatileVariant(Instruction * I,unsigned AddrSpace)394   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
395     return false;
396   }
397 
prefersVectorizedAddressing()398   virtual bool prefersVectorizedAddressing() const { return true; }
399 
getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,StackOffset BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)400   virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
401                                                StackOffset BaseOffset,
402                                                bool HasBaseReg, int64_t Scale,
403                                                unsigned AddrSpace) const {
404     // Guess that all legal addressing mode are free.
405     if (isLegalAddressingMode(Ty, BaseGV, BaseOffset.getFixed(), HasBaseReg,
406                               Scale, AddrSpace, /*I=*/nullptr,
407                               BaseOffset.getScalable()))
408       return 0;
409     return InstructionCost::getInvalid();
410   }
411 
LSRWithInstrQueries()412   virtual bool LSRWithInstrQueries() const { return false; }
413 
isTruncateFree(Type * Ty1,Type * Ty2)414   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
415 
isProfitableToHoist(Instruction * I)416   virtual bool isProfitableToHoist(Instruction *I) const { return true; }
417 
useAA()418   virtual bool useAA() const { return false; }
419 
isTypeLegal(Type * Ty)420   virtual bool isTypeLegal(Type *Ty) const { return false; }
421 
getRegUsageForType(Type * Ty)422   virtual unsigned getRegUsageForType(Type *Ty) const { return 1; }
423 
shouldBuildLookupTables()424   virtual bool shouldBuildLookupTables() const { return true; }
425 
shouldBuildLookupTablesForConstant(Constant * C)426   virtual bool shouldBuildLookupTablesForConstant(Constant *C) const {
427     return true;
428   }
429 
shouldBuildRelLookupTables()430   virtual bool shouldBuildRelLookupTables() const { return false; }
431 
useColdCCForColdCall(Function & F)432   virtual bool useColdCCForColdCall(Function &F) const { return false; }
433 
isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID)434   virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const {
435     return false;
436   }
437 
isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,unsigned ScalarOpdIdx)438   virtual bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
439                                                   unsigned ScalarOpdIdx) const {
440     return false;
441   }
442 
isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,int OpdIdx)443   virtual bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
444                                                       int OpdIdx) const {
445     return OpdIdx == -1;
446   }
447 
448   virtual bool
isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID,int RetIdx)449   isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID,
450                                                    int RetIdx) const {
451     return RetIdx == 0;
452   }
453 
454   virtual InstructionCost getScalarizationOverhead(
455       VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
456       TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
457       ArrayRef<Value *> VL = {}) const {
458     return 0;
459   }
460 
461   virtual InstructionCost
getOperandsScalarizationOverhead(ArrayRef<const Value * > Args,ArrayRef<Type * > Tys,TTI::TargetCostKind CostKind)462   getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
463                                    ArrayRef<Type *> Tys,
464                                    TTI::TargetCostKind CostKind) const {
465     return 0;
466   }
467 
supportsEfficientVectorElementLoadStore()468   virtual bool supportsEfficientVectorElementLoadStore() const { return false; }
469 
supportsTailCalls()470   virtual bool supportsTailCalls() const { return true; }
471 
supportsTailCallFor(const CallBase * CB)472   virtual bool supportsTailCallFor(const CallBase *CB) const {
473     llvm_unreachable("Not implemented");
474   }
475 
enableAggressiveInterleaving(bool LoopHasReductions)476   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) const {
477     return false;
478   }
479 
480   virtual TTI::MemCmpExpansionOptions
enableMemCmpExpansion(bool OptSize,bool IsZeroCmp)481   enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
482     return {};
483   }
484 
enableSelectOptimize()485   virtual bool enableSelectOptimize() const { return true; }
486 
shouldTreatInstructionLikeSelect(const Instruction * I)487   virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const {
488     // A select with two constant operands will usually be better left as a
489     // select.
490     using namespace llvm::PatternMatch;
491     if (match(I, m_Select(m_Value(), m_Constant(), m_Constant())))
492       return false;
493     // If the select is a logical-and/logical-or then it is better treated as a
494     // and/or by the backend.
495     return isa<SelectInst>(I) &&
496            !match(I, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()),
497                                  m_LogicalOr(m_Value(), m_Value())));
498   }
499 
enableInterleavedAccessVectorization()500   virtual bool enableInterleavedAccessVectorization() const { return false; }
501 
enableMaskedInterleavedAccessVectorization()502   virtual bool enableMaskedInterleavedAccessVectorization() const {
503     return false;
504   }
505 
isFPVectorizationPotentiallyUnsafe()506   virtual bool isFPVectorizationPotentiallyUnsafe() const { return false; }
507 
allowsMisalignedMemoryAccesses(LLVMContext & Context,unsigned BitWidth,unsigned AddressSpace,Align Alignment,unsigned * Fast)508   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
509                                               unsigned BitWidth,
510                                               unsigned AddressSpace,
511                                               Align Alignment,
512                                               unsigned *Fast) const {
513     return false;
514   }
515 
516   virtual TTI::PopcntSupportKind
getPopcntSupport(unsigned IntTyWidthInBit)517   getPopcntSupport(unsigned IntTyWidthInBit) const {
518     return TTI::PSK_Software;
519   }
520 
haveFastSqrt(Type * Ty)521   virtual bool haveFastSqrt(Type *Ty) const { return false; }
522 
isExpensiveToSpeculativelyExecute(const Instruction * I)523   virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) const {
524     return true;
525   }
526 
isFCmpOrdCheaperThanFCmpZero(Type * Ty)527   virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
528 
getFPOpCost(Type * Ty)529   virtual InstructionCost getFPOpCost(Type *Ty) const {
530     return TargetTransformInfo::TCC_Basic;
531   }
532 
getIntImmCodeSizeCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)533   virtual InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
534                                                 const APInt &Imm,
535                                                 Type *Ty) const {
536     return 0;
537   }
538 
getIntImmCost(const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)539   virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
540                                         TTI::TargetCostKind CostKind) const {
541     return TTI::TCC_Basic;
542   }
543 
544   virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
545                                             const APInt &Imm, Type *Ty,
546                                             TTI::TargetCostKind CostKind,
547                                             Instruction *Inst = nullptr) const {
548     return TTI::TCC_Free;
549   }
550 
551   virtual InstructionCost
getIntImmCostIntrin(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)552   getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
553                       Type *Ty, TTI::TargetCostKind CostKind) const {
554     return TTI::TCC_Free;
555   }
556 
preferToKeepConstantsAttached(const Instruction & Inst,const Function & Fn)557   virtual bool preferToKeepConstantsAttached(const Instruction &Inst,
558                                              const Function &Fn) const {
559     return false;
560   }
561 
getNumberOfRegisters(unsigned ClassID)562   virtual unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
hasConditionalLoadStoreForType(Type * Ty,bool IsStore)563   virtual bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const {
564     return false;
565   }
566 
567   virtual unsigned getRegisterClassForType(bool Vector,
568                                            Type *Ty = nullptr) const {
569     return Vector ? 1 : 0;
570   }
571 
getRegisterClassName(unsigned ClassID)572   virtual const char *getRegisterClassName(unsigned ClassID) const {
573     switch (ClassID) {
574     default:
575       return "Generic::Unknown Register Class";
576     case 0:
577       return "Generic::ScalarRC";
578     case 1:
579       return "Generic::VectorRC";
580     }
581   }
582 
583   virtual TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K)584   getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
585     return TypeSize::getFixed(32);
586   }
587 
getMinVectorRegisterBitWidth()588   virtual unsigned getMinVectorRegisterBitWidth() const { return 128; }
589 
getMaxVScale()590   virtual std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
getVScaleForTuning()591   virtual std::optional<unsigned> getVScaleForTuning() const {
592     return std::nullopt;
593   }
isVScaleKnownToBeAPowerOfTwo()594   virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
595 
596   virtual bool
shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K)597   shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
598     return false;
599   }
600 
getMinimumVF(unsigned ElemWidth,bool IsScalable)601   virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
602     return ElementCount::get(0, IsScalable);
603   }
604 
getMaximumVF(unsigned ElemWidth,unsigned Opcode)605   virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
606     return 0;
607   }
getStoreMinimumVF(unsigned VF,Type *,Type *)608   virtual unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const {
609     return VF;
610   }
611 
shouldConsiderAddressTypePromotion(const Instruction & I,bool & AllowPromotionWithoutCommonHeader)612   virtual bool shouldConsiderAddressTypePromotion(
613       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
614     AllowPromotionWithoutCommonHeader = false;
615     return false;
616   }
617 
getCacheLineSize()618   virtual unsigned getCacheLineSize() const { return 0; }
619   virtual std::optional<unsigned>
getCacheSize(TargetTransformInfo::CacheLevel Level)620   getCacheSize(TargetTransformInfo::CacheLevel Level) const {
621     switch (Level) {
622     case TargetTransformInfo::CacheLevel::L1D:
623       [[fallthrough]];
624     case TargetTransformInfo::CacheLevel::L2D:
625       return std::nullopt;
626     }
627     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
628   }
629 
630   virtual std::optional<unsigned>
getCacheAssociativity(TargetTransformInfo::CacheLevel Level)631   getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
632     switch (Level) {
633     case TargetTransformInfo::CacheLevel::L1D:
634       [[fallthrough]];
635     case TargetTransformInfo::CacheLevel::L2D:
636       return std::nullopt;
637     }
638 
639     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
640   }
641 
getMinPageSize()642   virtual std::optional<unsigned> getMinPageSize() const { return {}; }
643 
getPrefetchDistance()644   virtual unsigned getPrefetchDistance() const { return 0; }
getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)645   virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
646                                         unsigned NumStridedMemAccesses,
647                                         unsigned NumPrefetches,
648                                         bool HasCall) const {
649     return 1;
650   }
getMaxPrefetchIterationsAhead()651   virtual unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
enableWritePrefetching()652   virtual bool enableWritePrefetching() const { return false; }
shouldPrefetchAddressSpace(unsigned AS)653   virtual bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
654 
getPartialReductionCost(unsigned Opcode,Type * InputTypeA,Type * InputTypeB,Type * AccumType,ElementCount VF,TTI::PartialReductionExtendKind OpAExtend,TTI::PartialReductionExtendKind OpBExtend,std::optional<unsigned> BinOp,TTI::TargetCostKind CostKind)655   virtual InstructionCost getPartialReductionCost(
656       unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
657       ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
658       TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
659       TTI::TargetCostKind CostKind) const {
660     return InstructionCost::getInvalid();
661   }
662 
getMaxInterleaveFactor(ElementCount VF)663   virtual unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
664 
665   virtual InstructionCost getArithmeticInstrCost(
666       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
667       TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info,
668       ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) const {
669     // Widenable conditions will eventually lower into constants, so some
670     // operations with them will be trivially optimized away.
671     auto IsWidenableCondition = [](const Value *V) {
672       if (auto *II = dyn_cast<IntrinsicInst>(V))
673         if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
674           return true;
675       return false;
676     };
677     // FIXME: A number of transformation tests seem to require these values
678     // which seems a little odd for how arbitary there are.
679     switch (Opcode) {
680     default:
681       break;
682     case Instruction::FDiv:
683     case Instruction::FRem:
684     case Instruction::SDiv:
685     case Instruction::SRem:
686     case Instruction::UDiv:
687     case Instruction::URem:
688       // FIXME: Unlikely to be true for CodeSize.
689       return TTI::TCC_Expensive;
690     case Instruction::And:
691     case Instruction::Or:
692       if (any_of(Args, IsWidenableCondition))
693         return TTI::TCC_Free;
694       break;
695     }
696 
697     // Assume a 3cy latency for fp arithmetic ops.
698     if (CostKind == TTI::TCK_Latency)
699       if (Ty->getScalarType()->isFloatingPointTy())
700         return 3;
701 
702     return 1;
703   }
704 
getAltInstrCost(VectorType * VecTy,unsigned Opcode0,unsigned Opcode1,const SmallBitVector & OpcodeMask,TTI::TargetCostKind CostKind)705   virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
706                                           unsigned Opcode1,
707                                           const SmallBitVector &OpcodeMask,
708                                           TTI::TargetCostKind CostKind) const {
709     return InstructionCost::getInvalid();
710   }
711 
712   virtual InstructionCost
713   getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
714                  ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
715                  VectorType *SubTp, ArrayRef<const Value *> Args = {},
716                  const Instruction *CxtI = nullptr) const {
717     return 1;
718   }
719 
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,TTI::CastContextHint CCH,TTI::TargetCostKind CostKind,const Instruction * I)720   virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
721                                            Type *Src, TTI::CastContextHint CCH,
722                                            TTI::TargetCostKind CostKind,
723                                            const Instruction *I) const {
724     switch (Opcode) {
725     default:
726       break;
727     case Instruction::IntToPtr: {
728       unsigned SrcSize = Src->getScalarSizeInBits();
729       if (DL.isLegalInteger(SrcSize) &&
730           SrcSize <= DL.getPointerTypeSizeInBits(Dst))
731         return 0;
732       break;
733     }
734     case Instruction::PtrToInt: {
735       unsigned DstSize = Dst->getScalarSizeInBits();
736       if (DL.isLegalInteger(DstSize) &&
737           DstSize >= DL.getPointerTypeSizeInBits(Src))
738         return 0;
739       break;
740     }
741     case Instruction::BitCast:
742       if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
743         // Identity and pointer-to-pointer casts are free.
744         return 0;
745       break;
746     case Instruction::Trunc: {
747       // trunc to a native type is free (assuming the target has compare and
748       // shift-right of the same width).
749       TypeSize DstSize = DL.getTypeSizeInBits(Dst);
750       if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
751         return 0;
752       break;
753     }
754     }
755     return 1;
756   }
757 
758   virtual InstructionCost
getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index,TTI::TargetCostKind CostKind)759   getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
760                            unsigned Index, TTI::TargetCostKind CostKind) const {
761     return 1;
762   }
763 
764   virtual InstructionCost getCFInstrCost(unsigned Opcode,
765                                          TTI::TargetCostKind CostKind,
766                                          const Instruction *I = nullptr) const {
767     // A phi would be free, unless we're costing the throughput because it
768     // will require a register.
769     if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
770       return 0;
771     return 1;
772   }
773 
getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,CmpInst::Predicate VecPred,TTI::TargetCostKind CostKind,TTI::OperandValueInfo Op1Info,TTI::OperandValueInfo Op2Info,const Instruction * I)774   virtual InstructionCost getCmpSelInstrCost(
775       unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
776       TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
777       TTI::OperandValueInfo Op2Info, const Instruction *I) const {
778     return 1;
779   }
780 
getVectorInstrCost(unsigned Opcode,Type * Val,TTI::TargetCostKind CostKind,unsigned Index,const Value * Op0,const Value * Op1)781   virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
782                                              TTI::TargetCostKind CostKind,
783                                              unsigned Index, const Value *Op0,
784                                              const Value *Op1) const {
785     return 1;
786   }
787 
788   /// \param ScalarUserAndIdx encodes the information about extracts from a
789   /// vector with 'Scalar' being the value being extracted,'User' being the user
790   /// of the extract(nullptr if user is not known before vectorization) and
791   /// 'Idx' being the extract lane.
getVectorInstrCost(unsigned Opcode,Type * Val,TTI::TargetCostKind CostKind,unsigned Index,Value * Scalar,ArrayRef<std::tuple<Value *,User *,int>> ScalarUserAndIdx)792   virtual InstructionCost getVectorInstrCost(
793       unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
794       Value *Scalar,
795       ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx) const {
796     return 1;
797   }
798 
getVectorInstrCost(const Instruction & I,Type * Val,TTI::TargetCostKind CostKind,unsigned Index)799   virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
800                                              TTI::TargetCostKind CostKind,
801                                              unsigned Index) const {
802     return 1;
803   }
804 
805   virtual InstructionCost
getReplicationShuffleCost(Type * EltTy,int ReplicationFactor,int VF,const APInt & DemandedDstElts,TTI::TargetCostKind CostKind)806   getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
807                             const APInt &DemandedDstElts,
808                             TTI::TargetCostKind CostKind) const {
809     return 1;
810   }
811 
812   virtual InstructionCost
getInsertExtractValueCost(unsigned Opcode,TTI::TargetCostKind CostKind)813   getInsertExtractValueCost(unsigned Opcode,
814                             TTI::TargetCostKind CostKind) const {
815     // Note: The `insertvalue` cost here is chosen to match the default case of
816     // getInstructionCost() -- as prior to adding this helper `insertvalue` was
817     // not handled.
818     if (Opcode == Instruction::InsertValue &&
819         CostKind != TTI::TCK_RecipThroughput)
820       return TTI::TCC_Basic;
821     return TTI::TCC_Free;
822   }
823 
824   virtual InstructionCost
getMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,TTI::OperandValueInfo OpInfo,const Instruction * I)825   getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
826                   unsigned AddressSpace, TTI::TargetCostKind CostKind,
827                   TTI::OperandValueInfo OpInfo, const Instruction *I) const {
828     return 1;
829   }
830 
getVPMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,const Instruction * I)831   virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
832                                             Align Alignment,
833                                             unsigned AddressSpace,
834                                             TTI::TargetCostKind CostKind,
835                                             const Instruction *I) const {
836     return 1;
837   }
838 
839   virtual InstructionCost
getMaskedMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind)840   getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
841                         unsigned AddressSpace,
842                         TTI::TargetCostKind CostKind) const {
843     return 1;
844   }
845 
846   virtual InstructionCost
847   getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
848                          bool VariableMask, Align Alignment,
849                          TTI::TargetCostKind CostKind,
850                          const Instruction *I = nullptr) const {
851     return 1;
852   }
853 
854   virtual InstructionCost getExpandCompressMemoryOpCost(
855       unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
856       TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const {
857     return 1;
858   }
859 
860   virtual InstructionCost
861   getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
862                          bool VariableMask, Align Alignment,
863                          TTI::TargetCostKind CostKind,
864                          const Instruction *I = nullptr) const {
865     return InstructionCost::getInvalid();
866   }
867 
getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,bool UseMaskForCond,bool UseMaskForGaps)868   virtual InstructionCost getInterleavedMemoryOpCost(
869       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
870       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
871       bool UseMaskForCond, bool UseMaskForGaps) const {
872     return 1;
873   }
874 
875   virtual InstructionCost
getIntrinsicInstrCost(const IntrinsicCostAttributes & ICA,TTI::TargetCostKind CostKind)876   getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
877                         TTI::TargetCostKind CostKind) const {
878     switch (ICA.getID()) {
879     default:
880       break;
881     case Intrinsic::experimental_vector_histogram_add:
882       // For now, we want explicit support from the target for histograms.
883       return InstructionCost::getInvalid();
884     case Intrinsic::allow_runtime_check:
885     case Intrinsic::allow_ubsan_check:
886     case Intrinsic::annotation:
887     case Intrinsic::assume:
888     case Intrinsic::sideeffect:
889     case Intrinsic::pseudoprobe:
890     case Intrinsic::arithmetic_fence:
891     case Intrinsic::dbg_assign:
892     case Intrinsic::dbg_declare:
893     case Intrinsic::dbg_value:
894     case Intrinsic::dbg_label:
895     case Intrinsic::invariant_start:
896     case Intrinsic::invariant_end:
897     case Intrinsic::launder_invariant_group:
898     case Intrinsic::strip_invariant_group:
899     case Intrinsic::is_constant:
900     case Intrinsic::lifetime_start:
901     case Intrinsic::lifetime_end:
902     case Intrinsic::experimental_noalias_scope_decl:
903     case Intrinsic::objectsize:
904     case Intrinsic::ptr_annotation:
905     case Intrinsic::var_annotation:
906     case Intrinsic::experimental_gc_result:
907     case Intrinsic::experimental_gc_relocate:
908     case Intrinsic::coro_alloc:
909     case Intrinsic::coro_begin:
910     case Intrinsic::coro_begin_custom_abi:
911     case Intrinsic::coro_free:
912     case Intrinsic::coro_end:
913     case Intrinsic::coro_frame:
914     case Intrinsic::coro_size:
915     case Intrinsic::coro_align:
916     case Intrinsic::coro_suspend:
917     case Intrinsic::coro_subfn_addr:
918     case Intrinsic::threadlocal_address:
919     case Intrinsic::experimental_widenable_condition:
920     case Intrinsic::ssa_copy:
921       // These intrinsics don't actually represent code after lowering.
922       return 0;
923     }
924     return 1;
925   }
926 
getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys,TTI::TargetCostKind CostKind)927   virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
928                                            ArrayRef<Type *> Tys,
929                                            TTI::TargetCostKind CostKind) const {
930     return 1;
931   }
932 
933   // Assume that we have a register of the right size for the type.
getNumberOfParts(Type * Tp)934   virtual unsigned getNumberOfParts(Type *Tp) const { return 1; }
935 
getAddressComputationCost(Type * Tp,ScalarEvolution *,const SCEV *)936   virtual InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
937                                                     const SCEV *) const {
938     return 0;
939   }
940 
941   virtual InstructionCost
getArithmeticReductionCost(unsigned,VectorType *,std::optional<FastMathFlags> FMF,TTI::TargetCostKind)942   getArithmeticReductionCost(unsigned, VectorType *,
943                              std::optional<FastMathFlags> FMF,
944                              TTI::TargetCostKind) const {
945     return 1;
946   }
947 
getMinMaxReductionCost(Intrinsic::ID IID,VectorType *,FastMathFlags,TTI::TargetCostKind)948   virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID,
949                                                  VectorType *, FastMathFlags,
950                                                  TTI::TargetCostKind) const {
951     return 1;
952   }
953 
954   virtual InstructionCost
getExtendedReductionCost(unsigned Opcode,bool IsUnsigned,Type * ResTy,VectorType * Ty,std::optional<FastMathFlags> FMF,TTI::TargetCostKind CostKind)955   getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
956                            VectorType *Ty, std::optional<FastMathFlags> FMF,
957                            TTI::TargetCostKind CostKind) const {
958     return 1;
959   }
960 
961   virtual InstructionCost
getMulAccReductionCost(bool IsUnsigned,Type * ResTy,VectorType * Ty,TTI::TargetCostKind CostKind)962   getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
963                          TTI::TargetCostKind CostKind) const {
964     return 1;
965   }
966 
967   virtual InstructionCost
getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)968   getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
969     return 0;
970   }
971 
getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)972   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
973                                   MemIntrinsicInfo &Info) const {
974     return false;
975   }
976 
getAtomicMemIntrinsicMaxElementSize()977   virtual unsigned getAtomicMemIntrinsicMaxElementSize() const {
978     // Note for overrides: You must ensure for all element unordered-atomic
979     // memory intrinsics that all power-of-2 element sizes up to, and
980     // including, the return value of this method have a corresponding
981     // runtime lib call. These runtime lib call definitions can be found
982     // in RuntimeLibcalls.h
983     return 0;
984   }
985 
986   virtual Value *
987   getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
988                                     bool CanCreate = true) const {
989     return nullptr;
990   }
991 
992   virtual Type *
getMemcpyLoopLoweringType(LLVMContext & Context,Value * Length,unsigned SrcAddrSpace,unsigned DestAddrSpace,Align SrcAlign,Align DestAlign,std::optional<uint32_t> AtomicElementSize)993   getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
994                             unsigned SrcAddrSpace, unsigned DestAddrSpace,
995                             Align SrcAlign, Align DestAlign,
996                             std::optional<uint32_t> AtomicElementSize) const {
997     return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
998                              : Type::getInt8Ty(Context);
999   }
1000 
getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type * > & OpsOut,LLVMContext & Context,unsigned RemainingBytes,unsigned SrcAddrSpace,unsigned DestAddrSpace,Align SrcAlign,Align DestAlign,std::optional<uint32_t> AtomicCpySize)1001   virtual void getMemcpyLoopResidualLoweringType(
1002       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1003       unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1004       Align SrcAlign, Align DestAlign,
1005       std::optional<uint32_t> AtomicCpySize) const {
1006     unsigned OpSizeInBytes = AtomicCpySize.value_or(1);
1007     Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
1008     for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
1009       OpsOut.push_back(OpType);
1010   }
1011 
areInlineCompatible(const Function * Caller,const Function * Callee)1012   virtual bool areInlineCompatible(const Function *Caller,
1013                                    const Function *Callee) const {
1014     return (Caller->getFnAttribute("target-cpu") ==
1015             Callee->getFnAttribute("target-cpu")) &&
1016            (Caller->getFnAttribute("target-features") ==
1017             Callee->getFnAttribute("target-features"));
1018   }
1019 
getInlineCallPenalty(const Function * F,const CallBase & Call,unsigned DefaultCallPenalty)1020   virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1021                                         unsigned DefaultCallPenalty) const {
1022     return DefaultCallPenalty;
1023   }
1024 
areTypesABICompatible(const Function * Caller,const Function * Callee,const ArrayRef<Type * > & Types)1025   virtual bool areTypesABICompatible(const Function *Caller,
1026                                      const Function *Callee,
1027                                      const ArrayRef<Type *> &Types) const {
1028     return (Caller->getFnAttribute("target-cpu") ==
1029             Callee->getFnAttribute("target-cpu")) &&
1030            (Caller->getFnAttribute("target-features") ==
1031             Callee->getFnAttribute("target-features"));
1032   }
1033 
isIndexedLoadLegal(TTI::MemIndexedMode Mode,Type * Ty)1034   virtual bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty) const {
1035     return false;
1036   }
1037 
isIndexedStoreLegal(TTI::MemIndexedMode Mode,Type * Ty)1038   virtual bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty) const {
1039     return false;
1040   }
1041 
getLoadStoreVecRegBitWidth(unsigned AddrSpace)1042   virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
1043     return 128;
1044   }
1045 
isLegalToVectorizeLoad(LoadInst * LI)1046   virtual bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
1047 
isLegalToVectorizeStore(StoreInst * SI)1048   virtual bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
1049 
isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace)1050   virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1051                                            Align Alignment,
1052                                            unsigned AddrSpace) const {
1053     return true;
1054   }
1055 
isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace)1056   virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1057                                             Align Alignment,
1058                                             unsigned AddrSpace) const {
1059     return true;
1060   }
1061 
isLegalToVectorizeReduction(const RecurrenceDescriptor & RdxDesc,ElementCount VF)1062   virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1063                                            ElementCount VF) const {
1064     return true;
1065   }
1066 
isElementTypeLegalForScalableVector(Type * Ty)1067   virtual bool isElementTypeLegalForScalableVector(Type *Ty) const {
1068     return true;
1069   }
1070 
getLoadVectorFactor(unsigned VF,unsigned LoadSize,unsigned ChainSizeInBytes,VectorType * VecTy)1071   virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1072                                        unsigned ChainSizeInBytes,
1073                                        VectorType *VecTy) const {
1074     return VF;
1075   }
1076 
getStoreVectorFactor(unsigned VF,unsigned StoreSize,unsigned ChainSizeInBytes,VectorType * VecTy)1077   virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1078                                         unsigned ChainSizeInBytes,
1079                                         VectorType *VecTy) const {
1080     return VF;
1081   }
1082 
preferFixedOverScalableIfEqualCost()1083   virtual bool preferFixedOverScalableIfEqualCost() const { return false; }
1084 
preferInLoopReduction(RecurKind Kind,Type * Ty)1085   virtual bool preferInLoopReduction(RecurKind Kind, Type *Ty) const {
1086     return false;
1087   }
preferAlternateOpcodeVectorization()1088   virtual bool preferAlternateOpcodeVectorization() const { return true; }
1089 
preferPredicatedReductionSelect()1090   virtual bool preferPredicatedReductionSelect() const { return false; }
1091 
preferEpilogueVectorization()1092   virtual bool preferEpilogueVectorization() const { return true; }
1093 
shouldExpandReduction(const IntrinsicInst * II)1094   virtual bool shouldExpandReduction(const IntrinsicInst *II) const {
1095     return true;
1096   }
1097 
1098   virtual TTI::ReductionShuffle
getPreferredExpandedReductionShuffle(const IntrinsicInst * II)1099   getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const {
1100     return TTI::ReductionShuffle::SplitHalf;
1101   }
1102 
getGISelRematGlobalCost()1103   virtual unsigned getGISelRematGlobalCost() const { return 1; }
1104 
getMinTripCountTailFoldingThreshold()1105   virtual unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
1106 
supportsScalableVectors()1107   virtual bool supportsScalableVectors() const { return false; }
1108 
enableScalableVectorization()1109   virtual bool enableScalableVectorization() const { return false; }
1110 
hasActiveVectorLength()1111   virtual bool hasActiveVectorLength() const { return false; }
1112 
isProfitableToSinkOperands(Instruction * I,SmallVectorImpl<Use * > & Ops)1113   virtual bool isProfitableToSinkOperands(Instruction *I,
1114                                           SmallVectorImpl<Use *> &Ops) const {
1115     return false;
1116   }
1117 
isVectorShiftByScalarCheap(Type * Ty)1118   virtual bool isVectorShiftByScalarCheap(Type *Ty) const { return false; }
1119 
1120   virtual TargetTransformInfo::VPLegalization
getVPLegalizationStrategy(const VPIntrinsic & PI)1121   getVPLegalizationStrategy(const VPIntrinsic &PI) const {
1122     return TargetTransformInfo::VPLegalization(
1123         /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
1124         /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
1125   }
1126 
hasArmWideBranch(bool)1127   virtual bool hasArmWideBranch(bool) const { return false; }
1128 
getFeatureMask(const Function & F)1129   virtual uint64_t getFeatureMask(const Function &F) const { return 0; }
1130 
isMultiversionedFunction(const Function & F)1131   virtual bool isMultiversionedFunction(const Function &F) const {
1132     return false;
1133   }
1134 
getMaxNumArgs()1135   virtual unsigned getMaxNumArgs() const { return UINT_MAX; }
1136 
getNumBytesToPadGlobalArray(unsigned Size,Type * ArrayType)1137   virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
1138                                                Type *ArrayType) const {
1139     return 0;
1140   }
1141 
collectKernelLaunchBounds(const Function & F,SmallVectorImpl<std::pair<StringRef,int64_t>> & LB)1142   virtual void collectKernelLaunchBounds(
1143       const Function &F,
1144       SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
1145 
1146 protected:
1147   // Obtain the minimum required size to hold the value (without the sign)
1148   // In case of a vector it returns the min required size for one element.
minRequiredElementSize(const Value * Val,bool & isSigned)1149   unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
1150     if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
1151       const auto *VectorValue = cast<Constant>(Val);
1152 
1153       // In case of a vector need to pick the max between the min
1154       // required size for each element
1155       auto *VT = cast<FixedVectorType>(Val->getType());
1156 
1157       // Assume unsigned elements
1158       isSigned = false;
1159 
1160       // The max required size is the size of the vector element type
1161       unsigned MaxRequiredSize =
1162           VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
1163 
1164       unsigned MinRequiredSize = 0;
1165       for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
1166         if (auto *IntElement =
1167                 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
1168           bool signedElement = IntElement->getValue().isNegative();
1169           // Get the element min required size.
1170           unsigned ElementMinRequiredSize =
1171               IntElement->getValue().getSignificantBits() - 1;
1172           // In case one element is signed then all the vector is signed.
1173           isSigned |= signedElement;
1174           // Save the max required bit size between all the elements.
1175           MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
1176         } else {
1177           // not an int constant element
1178           return MaxRequiredSize;
1179         }
1180       }
1181       return MinRequiredSize;
1182     }
1183 
1184     if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
1185       isSigned = CI->getValue().isNegative();
1186       return CI->getValue().getSignificantBits() - 1;
1187     }
1188 
1189     if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
1190       isSigned = true;
1191       return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1192     }
1193 
1194     if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1195       isSigned = false;
1196       return Cast->getSrcTy()->getScalarSizeInBits();
1197     }
1198 
1199     isSigned = false;
1200     return Val->getType()->getScalarSizeInBits();
1201   }
1202 
isStridedAccess(const SCEV * Ptr)1203   bool isStridedAccess(const SCEV *Ptr) const {
1204     return Ptr && isa<SCEVAddRecExpr>(Ptr);
1205   }
1206 
getConstantStrideStep(ScalarEvolution * SE,const SCEV * Ptr)1207   const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
1208                                             const SCEV *Ptr) const {
1209     if (!isStridedAccess(Ptr))
1210       return nullptr;
1211     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
1212     return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
1213   }
1214 
isConstantStridedAccessLessThan(ScalarEvolution * SE,const SCEV * Ptr,int64_t MergeDistance)1215   bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
1216                                        int64_t MergeDistance) const {
1217     const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
1218     if (!Step)
1219       return false;
1220     APInt StrideVal = Step->getAPInt();
1221     if (StrideVal.getBitWidth() > 64)
1222       return false;
1223     // FIXME: Need to take absolute value for negative stride case.
1224     return StrideVal.getSExtValue() < MergeDistance;
1225   }
1226 };
1227 
1228 /// CRTP base class for use as a mix-in that aids implementing
1229 /// a TargetTransformInfo-compatible class.
1230 template <typename T>
1231 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
1232 private:
1233   typedef TargetTransformInfoImplBase BaseT;
1234 
1235 protected:
TargetTransformInfoImplCRTPBase(const DataLayout & DL)1236   explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
1237 
1238 public:
getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands,Type * AccessType,TTI::TargetCostKind CostKind)1239   InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1240                              ArrayRef<const Value *> Operands, Type *AccessType,
1241                              TTI::TargetCostKind CostKind) const override {
1242     assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
1243     auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
1244     bool HasBaseReg = (BaseGV == nullptr);
1245 
1246     auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
1247     APInt BaseOffset(PtrSizeBits, 0);
1248     int64_t Scale = 0;
1249 
1250     auto GTI = gep_type_begin(PointeeType, Operands);
1251     Type *TargetType = nullptr;
1252 
1253     // Handle the case where the GEP instruction has a single operand,
1254     // the basis, therefore TargetType is a nullptr.
1255     if (Operands.empty())
1256       return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
1257 
1258     for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
1259       TargetType = GTI.getIndexedType();
1260       // We assume that the cost of Scalar GEP with constant index and the
1261       // cost of Vector GEP with splat constant index are the same.
1262       const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
1263       if (!ConstIdx)
1264         if (auto Splat = getSplatValue(*I))
1265           ConstIdx = dyn_cast<ConstantInt>(Splat);
1266       if (StructType *STy = GTI.getStructTypeOrNull()) {
1267         // For structures the index is always splat or scalar constant
1268         assert(ConstIdx && "Unexpected GEP index");
1269         uint64_t Field = ConstIdx->getZExtValue();
1270         BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
1271       } else {
1272         // If this operand is a scalable type, bail out early.
1273         // TODO: Make isLegalAddressingMode TypeSize aware.
1274         if (TargetType->isScalableTy())
1275           return TTI::TCC_Basic;
1276         int64_t ElementSize =
1277             GTI.getSequentialElementStride(DL).getFixedValue();
1278         if (ConstIdx) {
1279           BaseOffset +=
1280               ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
1281         } else {
1282           // Needs scale register.
1283           if (Scale != 0)
1284             // No addressing mode takes two scale registers.
1285             return TTI::TCC_Basic;
1286           Scale = ElementSize;
1287         }
1288       }
1289     }
1290 
1291     // If we haven't been provided a hint, use the target type for now.
1292     //
1293     // TODO: Take a look at potentially removing this: This is *slightly* wrong
1294     // as it's possible to have a GEP with a foldable target type but a memory
1295     // access that isn't foldable. For example, this load isn't foldable on
1296     // RISC-V:
1297     //
1298     // %p = getelementptr i32, ptr %base, i32 42
1299     // %x = load <2 x i32>, ptr %p
1300     if (!AccessType)
1301       AccessType = TargetType;
1302 
1303     // If the final address of the GEP is a legal addressing mode for the given
1304     // access type, then we can fold it into its users.
1305     if (static_cast<const T *>(this)->isLegalAddressingMode(
1306             AccessType, const_cast<GlobalValue *>(BaseGV),
1307             BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
1308             Ptr->getType()->getPointerAddressSpace()))
1309       return TTI::TCC_Free;
1310 
1311     // TODO: Instead of returning TCC_Basic here, we should use
1312     // getArithmeticInstrCost. Or better yet, provide a hook to let the target
1313     // model it.
1314     return TTI::TCC_Basic;
1315   }
1316 
1317   InstructionCost
getPointersChainCost(ArrayRef<const Value * > Ptrs,const Value * Base,const TTI::PointersChainInfo & Info,Type * AccessTy,TTI::TargetCostKind CostKind)1318   getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
1319                        const TTI::PointersChainInfo &Info, Type *AccessTy,
1320                        TTI::TargetCostKind CostKind) const override {
1321     InstructionCost Cost = TTI::TCC_Free;
1322     // In the basic model we take into account GEP instructions only
1323     // (although here can come alloca instruction, a value, constants and/or
1324     // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
1325     // pointer). Typically, if Base is a not a GEP-instruction and all the
1326     // pointers are relative to the same base address, all the rest are
1327     // either GEP instructions, PHIs, bitcasts or constants. When we have same
1328     // base, we just calculate cost of each non-Base GEP as an ADD operation if
1329     // any their index is a non-const.
1330     // If no known dependecies between the pointers cost is calculated as a sum
1331     // of costs of GEP instructions.
1332     for (const Value *V : Ptrs) {
1333       const auto *GEP = dyn_cast<GetElementPtrInst>(V);
1334       if (!GEP)
1335         continue;
1336       if (Info.isSameBase() && V != Base) {
1337         if (GEP->hasAllConstantIndices())
1338           continue;
1339         Cost += static_cast<const T *>(this)->getArithmeticInstrCost(
1340             Instruction::Add, GEP->getType(), CostKind,
1341             {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None},
1342             {});
1343       } else {
1344         SmallVector<const Value *> Indices(GEP->indices());
1345         Cost += static_cast<const T *>(this)->getGEPCost(
1346             GEP->getSourceElementType(), GEP->getPointerOperand(), Indices,
1347             AccessTy, CostKind);
1348       }
1349     }
1350     return Cost;
1351   }
1352 
1353   InstructionCost
getInstructionCost(const User * U,ArrayRef<const Value * > Operands,TTI::TargetCostKind CostKind)1354   getInstructionCost(const User *U, ArrayRef<const Value *> Operands,
1355                      TTI::TargetCostKind CostKind) const override {
1356     using namespace llvm::PatternMatch;
1357 
1358     auto *TargetTTI = static_cast<const T *>(this);
1359     // Handle non-intrinsic calls, invokes, and callbr.
1360     // FIXME: Unlikely to be true for anything but CodeSize.
1361     auto *CB = dyn_cast<CallBase>(U);
1362     if (CB && !isa<IntrinsicInst>(U)) {
1363       if (const Function *F = CB->getCalledFunction()) {
1364         if (!TargetTTI->isLoweredToCall(F))
1365           return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1366 
1367         return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1368       }
1369       // For indirect or other calls, scale cost by number of arguments.
1370       return TTI::TCC_Basic * (CB->arg_size() + 1);
1371     }
1372 
1373     Type *Ty = U->getType();
1374     unsigned Opcode = Operator::getOpcode(U);
1375     auto *I = dyn_cast<Instruction>(U);
1376     switch (Opcode) {
1377     default:
1378       break;
1379     case Instruction::Call: {
1380       assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1381       auto *Intrinsic = cast<IntrinsicInst>(U);
1382       IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1383       return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1384     }
1385     case Instruction::Br:
1386     case Instruction::Ret:
1387     case Instruction::PHI:
1388     case Instruction::Switch:
1389       return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1390     case Instruction::Freeze:
1391       return TTI::TCC_Free;
1392     case Instruction::ExtractValue:
1393     case Instruction::InsertValue:
1394       return TargetTTI->getInsertExtractValueCost(Opcode, CostKind);
1395     case Instruction::Alloca:
1396       if (cast<AllocaInst>(U)->isStaticAlloca())
1397         return TTI::TCC_Free;
1398       break;
1399     case Instruction::GetElementPtr: {
1400       const auto *GEP = cast<GEPOperator>(U);
1401       Type *AccessType = nullptr;
1402       // For now, only provide the AccessType in the simple case where the GEP
1403       // only has one user.
1404       if (GEP->hasOneUser() && I)
1405         AccessType = I->user_back()->getAccessType();
1406 
1407       return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1408                                    Operands.front(), Operands.drop_front(),
1409                                    AccessType, CostKind);
1410     }
1411     case Instruction::Add:
1412     case Instruction::FAdd:
1413     case Instruction::Sub:
1414     case Instruction::FSub:
1415     case Instruction::Mul:
1416     case Instruction::FMul:
1417     case Instruction::UDiv:
1418     case Instruction::SDiv:
1419     case Instruction::FDiv:
1420     case Instruction::URem:
1421     case Instruction::SRem:
1422     case Instruction::FRem:
1423     case Instruction::Shl:
1424     case Instruction::LShr:
1425     case Instruction::AShr:
1426     case Instruction::And:
1427     case Instruction::Or:
1428     case Instruction::Xor:
1429     case Instruction::FNeg: {
1430       const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Operands[0]);
1431       TTI::OperandValueInfo Op2Info;
1432       if (Opcode != Instruction::FNeg)
1433         Op2Info = TTI::getOperandInfo(Operands[1]);
1434       return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1435                                                Op2Info, Operands, I);
1436     }
1437     case Instruction::IntToPtr:
1438     case Instruction::PtrToInt:
1439     case Instruction::SIToFP:
1440     case Instruction::UIToFP:
1441     case Instruction::FPToUI:
1442     case Instruction::FPToSI:
1443     case Instruction::Trunc:
1444     case Instruction::FPTrunc:
1445     case Instruction::BitCast:
1446     case Instruction::FPExt:
1447     case Instruction::SExt:
1448     case Instruction::ZExt:
1449     case Instruction::AddrSpaceCast: {
1450       Type *OpTy = Operands[0]->getType();
1451       return TargetTTI->getCastInstrCost(
1452           Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1453     }
1454     case Instruction::Store: {
1455       auto *SI = cast<StoreInst>(U);
1456       Type *ValTy = Operands[0]->getType();
1457       TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(Operands[0]);
1458       return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1459                                         SI->getPointerAddressSpace(), CostKind,
1460                                         OpInfo, I);
1461     }
1462     case Instruction::Load: {
1463       // FIXME: Arbitary cost which could come from the backend.
1464       if (CostKind == TTI::TCK_Latency)
1465         return 4;
1466       auto *LI = cast<LoadInst>(U);
1467       Type *LoadType = U->getType();
1468       // If there is a non-register sized type, the cost estimation may expand
1469       // it to be several instructions to load into multiple registers on the
1470       // target.  But, if the only use of the load is a trunc instruction to a
1471       // register sized type, the instruction selector can combine these
1472       // instructions to be a single load.  So, in this case, we use the
1473       // destination type of the trunc instruction rather than the load to
1474       // accurately estimate the cost of this load instruction.
1475       if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1476           !LoadType->isVectorTy()) {
1477         if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1478           LoadType = TI->getDestTy();
1479       }
1480       return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1481                                         LI->getPointerAddressSpace(), CostKind,
1482                                         {TTI::OK_AnyValue, TTI::OP_None}, I);
1483     }
1484     case Instruction::Select: {
1485       const Value *Op0, *Op1;
1486       if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
1487           match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
1488         // select x, y, false --> x & y
1489         // select x, true, y --> x | y
1490         const auto Op1Info = TTI::getOperandInfo(Op0);
1491         const auto Op2Info = TTI::getOperandInfo(Op1);
1492         assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1493                Op1->getType()->getScalarSizeInBits() == 1);
1494 
1495         SmallVector<const Value *, 2> Operands{Op0, Op1};
1496         return TargetTTI->getArithmeticInstrCost(
1497             match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1498             CostKind, Op1Info, Op2Info, Operands, I);
1499       }
1500       const auto Op1Info = TTI::getOperandInfo(Operands[1]);
1501       const auto Op2Info = TTI::getOperandInfo(Operands[2]);
1502       Type *CondTy = Operands[0]->getType();
1503       return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1504                                            CmpInst::BAD_ICMP_PREDICATE,
1505                                            CostKind, Op1Info, Op2Info, I);
1506     }
1507     case Instruction::ICmp:
1508     case Instruction::FCmp: {
1509       const auto Op1Info = TTI::getOperandInfo(Operands[0]);
1510       const auto Op2Info = TTI::getOperandInfo(Operands[1]);
1511       Type *ValTy = Operands[0]->getType();
1512       // TODO: Also handle ICmp/FCmp constant expressions.
1513       return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1514                                            I ? cast<CmpInst>(I)->getPredicate()
1515                                              : CmpInst::BAD_ICMP_PREDICATE,
1516                                            CostKind, Op1Info, Op2Info, I);
1517     }
1518     case Instruction::InsertElement: {
1519       auto *IE = dyn_cast<InsertElementInst>(U);
1520       if (!IE)
1521         return TTI::TCC_Basic; // FIXME
1522       unsigned Idx = -1;
1523       if (auto *CI = dyn_cast<ConstantInt>(Operands[2]))
1524         if (CI->getValue().getActiveBits() <= 32)
1525           Idx = CI->getZExtValue();
1526       return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1527     }
1528     case Instruction::ShuffleVector: {
1529       auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1530       if (!Shuffle)
1531         return TTI::TCC_Basic; // FIXME
1532 
1533       auto *VecTy = cast<VectorType>(U->getType());
1534       auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
1535       ArrayRef<int> Mask = Shuffle->getShuffleMask();
1536       int NumSubElts, SubIndex;
1537 
1538       // Treat undef/poison mask as free (no matter the length).
1539       if (all_of(Mask, [](int M) { return M < 0; }))
1540         return TTI::TCC_Free;
1541 
1542       // TODO: move more of this inside improveShuffleKindFromMask.
1543       if (Shuffle->changesLength()) {
1544         // Treat a 'subvector widening' as a free shuffle.
1545         if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1546           return TTI::TCC_Free;
1547 
1548         if (Shuffle->isExtractSubvectorMask(SubIndex))
1549           return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
1550                                            VecSrcTy, Mask, CostKind, SubIndex,
1551                                            VecTy, Operands, Shuffle);
1552 
1553         if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1554           return TargetTTI->getShuffleCost(
1555               TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind,
1556               SubIndex,
1557               FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1558               Operands, Shuffle);
1559 
1560         int ReplicationFactor, VF;
1561         if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1562           APInt DemandedDstElts = APInt::getZero(Mask.size());
1563           for (auto I : enumerate(Mask)) {
1564             if (I.value() != PoisonMaskElem)
1565               DemandedDstElts.setBit(I.index());
1566           }
1567           return TargetTTI->getReplicationShuffleCost(
1568               VecSrcTy->getElementType(), ReplicationFactor, VF,
1569               DemandedDstElts, CostKind);
1570         }
1571 
1572         bool IsUnary = isa<UndefValue>(Operands[1]);
1573         NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1574         SmallVector<int, 16> AdjustMask(Mask);
1575 
1576         // Widening shuffle - widening the source(s) to the new length
1577         // (treated as free - see above), and then perform the adjusted
1578         // shuffle at that width.
1579         if (Shuffle->increasesLength()) {
1580           for (int &M : AdjustMask)
1581             M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1582 
1583           return TargetTTI->getShuffleCost(
1584               IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
1585               VecTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1586         }
1587 
1588         // Narrowing shuffle - perform shuffle at original wider width and
1589         // then extract the lower elements.
1590         // FIXME: This can assume widening, which is not true of all vector
1591         // architectures (and is not even the default).
1592         AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
1593 
1594         InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
1595             IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
1596             VecSrcTy, VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands,
1597             Shuffle);
1598 
1599         SmallVector<int, 16> ExtractMask(Mask.size());
1600         std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
1601         return ShuffleCost + TargetTTI->getShuffleCost(
1602                                  TTI::SK_ExtractSubvector, VecTy, VecSrcTy,
1603                                  ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
1604       }
1605 
1606       if (Shuffle->isIdentity())
1607         return TTI::TCC_Free;
1608 
1609       if (Shuffle->isReverse())
1610         return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, VecSrcTy, Mask,
1611                                          CostKind, 0, nullptr, Operands,
1612                                          Shuffle);
1613 
1614       if (Shuffle->isTranspose())
1615         return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, VecSrcTy,
1616                                          Mask, CostKind, 0, nullptr, Operands,
1617                                          Shuffle);
1618 
1619       if (Shuffle->isZeroEltSplat())
1620         return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, VecSrcTy,
1621                                          Mask, CostKind, 0, nullptr, Operands,
1622                                          Shuffle);
1623 
1624       if (Shuffle->isSingleSource())
1625         return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1626                                          VecSrcTy, Mask, CostKind, 0, nullptr,
1627                                          Operands, Shuffle);
1628 
1629       if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1630         return TargetTTI->getShuffleCost(
1631             TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind, SubIndex,
1632             FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
1633             Shuffle);
1634 
1635       if (Shuffle->isSelect())
1636         return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, VecSrcTy, Mask,
1637                                          CostKind, 0, nullptr, Operands,
1638                                          Shuffle);
1639 
1640       if (Shuffle->isSplice(SubIndex))
1641         return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, VecSrcTy, Mask,
1642                                          CostKind, SubIndex, nullptr, Operands,
1643                                          Shuffle);
1644 
1645       return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, VecSrcTy,
1646                                        Mask, CostKind, 0, nullptr, Operands,
1647                                        Shuffle);
1648     }
1649     case Instruction::ExtractElement: {
1650       auto *EEI = dyn_cast<ExtractElementInst>(U);
1651       if (!EEI)
1652         return TTI::TCC_Basic; // FIXME
1653       unsigned Idx = -1;
1654       if (auto *CI = dyn_cast<ConstantInt>(Operands[1]))
1655         if (CI->getValue().getActiveBits() <= 32)
1656           Idx = CI->getZExtValue();
1657       Type *DstTy = Operands[0]->getType();
1658       return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1659     }
1660     }
1661 
1662     // By default, just classify everything remaining as 'basic'.
1663     return TTI::TCC_Basic;
1664   }
1665 
isExpensiveToSpeculativelyExecute(const Instruction * I)1666   bool isExpensiveToSpeculativelyExecute(const Instruction *I) const override {
1667     auto *TargetTTI = static_cast<const T *>(this);
1668     SmallVector<const Value *, 4> Ops(I->operand_values());
1669     InstructionCost Cost = TargetTTI->getInstructionCost(
1670         I, Ops, TargetTransformInfo::TCK_SizeAndLatency);
1671     return Cost >= TargetTransformInfo::TCC_Expensive;
1672   }
1673 
supportsTailCallFor(const CallBase * CB)1674   bool supportsTailCallFor(const CallBase *CB) const override {
1675     return static_cast<const T *>(this)->supportsTailCalls();
1676   }
1677 };
1678 } // namespace llvm
1679 
1680 #endif
1681