xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h (revision e64bea71c21eb42e97aa615188ba91f6cce0d36d)
1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file a TargetTransformInfoImplBase conforming object specific to the
10 /// AArch64 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
18 
19 #include "AArch64.h"
20 #include "AArch64Subtarget.h"
21 #include "AArch64TargetMachine.h"
22 #include "llvm/Analysis/TargetTransformInfo.h"
23 #include "llvm/CodeGen/BasicTTIImpl.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/Intrinsics.h"
26 #include "llvm/Support/InstructionCost.h"
27 #include <cstdint>
28 #include <optional>
29 
30 namespace llvm {
31 
32 class APInt;
33 class Instruction;
34 class IntrinsicInst;
35 class Loop;
36 class SCEV;
37 class ScalarEvolution;
38 class Type;
39 class Value;
40 class VectorType;
41 
42 class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
43   using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
44   using TTI = TargetTransformInfo;
45 
46   friend BaseT;
47 
48   const AArch64Subtarget *ST;
49   const AArch64TargetLowering *TLI;
50 
51   static const FeatureBitset InlineInverseFeatures;
52 
getST()53   const AArch64Subtarget *getST() const { return ST; }
getTLI()54   const AArch64TargetLowering *getTLI() const { return TLI; }
55 
56   enum MemIntrinsicType {
57     VECTOR_LDST_TWO_ELEMENTS,
58     VECTOR_LDST_THREE_ELEMENTS,
59     VECTOR_LDST_FOUR_ELEMENTS
60   };
61 
62   bool isWideningInstruction(Type *DstTy, unsigned Opcode,
63                              ArrayRef<const Value *> Args,
64                              Type *SrcOverrideTy = nullptr) const;
65 
66   // A helper function called by 'getVectorInstrCost'.
67   //
68   // 'Val' and 'Index' are forwarded from 'getVectorInstrCost';
69   // \param ScalarUserAndIdx encodes the information about extracts from a
70   /// vector with 'Scalar' being the value being extracted,'User' being the user
71   /// of the extract(nullptr if user is not known before vectorization) and
72   /// 'Idx' being the extract lane.
73   InstructionCost getVectorInstrCostHelper(
74       unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
75       const Instruction *I = nullptr, Value *Scalar = nullptr,
76       ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}) const;
77 
78 public:
AArch64TTIImpl(const AArch64TargetMachine * TM,const Function & F)79   explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
80       : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
81         TLI(ST->getTargetLowering()) {}
82 
83   bool areInlineCompatible(const Function *Caller,
84                            const Function *Callee) const override;
85 
86   bool areTypesABICompatible(const Function *Caller, const Function *Callee,
87                              const ArrayRef<Type *> &Types) const override;
88 
89   unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
90                                 unsigned DefaultCallPenalty) const override;
91 
92   uint64_t getFeatureMask(const Function &F) const override;
93 
94   bool isMultiversionedFunction(const Function &F) const override;
95 
96   /// \name Scalar TTI Implementations
97   /// @{
98 
99   using BaseT::getIntImmCost;
100   InstructionCost getIntImmCost(int64_t Val) const;
101   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
102                                 TTI::TargetCostKind CostKind) const override;
103   InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
104                                     const APInt &Imm, Type *Ty,
105                                     TTI::TargetCostKind CostKind,
106                                     Instruction *Inst = nullptr) const override;
107   InstructionCost
108   getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
109                       Type *Ty, TTI::TargetCostKind CostKind) const override;
110   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
111 
112   /// @}
113 
114   /// \name Vector TTI Implementations
115   /// @{
116 
enableInterleavedAccessVectorization()117   bool enableInterleavedAccessVectorization() const override { return true; }
118 
enableMaskedInterleavedAccessVectorization()119   bool enableMaskedInterleavedAccessVectorization() const override {
120     return ST->hasSVE();
121   }
122 
getNumberOfRegisters(unsigned ClassID)123   unsigned getNumberOfRegisters(unsigned ClassID) const override {
124     bool Vector = (ClassID == 1);
125     if (Vector) {
126       if (ST->hasNEON())
127         return 32;
128       return 0;
129     }
130     return 31;
131   }
132 
133   InstructionCost
134   getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
135                         TTI::TargetCostKind CostKind) const override;
136 
137   std::optional<Instruction *>
138   instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
139 
140   std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
141       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
142       APInt &UndefElts2, APInt &UndefElts3,
143       std::function<void(Instruction *, unsigned, APInt, APInt &)>
144           SimplifyAndSetOp) const override;
145 
146   TypeSize
147   getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override;
148 
getMinVectorRegisterBitWidth()149   unsigned getMinVectorRegisterBitWidth() const override {
150     return ST->getMinVectorRegisterBitWidth();
151   }
152 
getVScaleForTuning()153   std::optional<unsigned> getVScaleForTuning() const override {
154     return ST->getVScaleForTuning();
155   }
156 
isVScaleKnownToBeAPowerOfTwo()157   bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
158 
159   bool shouldMaximizeVectorBandwidth(
160       TargetTransformInfo::RegisterKind K) const override;
161 
162   /// Try to return an estimate cost factor that can be used as a multiplier
163   /// when scalarizing an operation for a vector with ElementCount \p VF.
164   /// For scalable vectors this currently takes the most pessimistic view based
165   /// upon the maximum possible value for vscale.
getMaxNumElements(ElementCount VF)166   unsigned getMaxNumElements(ElementCount VF) const {
167     if (!VF.isScalable())
168       return VF.getFixedValue();
169 
170     return VF.getKnownMinValue() * ST->getVScaleForTuning();
171   }
172 
173   unsigned getMaxInterleaveFactor(ElementCount VF) const override;
174 
175   bool prefersVectorizedAddressing() const override;
176 
177   InstructionCost
178   getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
179                         unsigned AddressSpace,
180                         TTI::TargetCostKind CostKind) const override;
181 
182   InstructionCost
183   getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
184                          bool VariableMask, Align Alignment,
185                          TTI::TargetCostKind CostKind,
186                          const Instruction *I = nullptr) const override;
187 
188   bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst,
189                           Type *Src) const;
190 
191   InstructionCost
192   getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
193                    TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
194                    const Instruction *I = nullptr) const override;
195 
196   InstructionCost
197   getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
198                            unsigned Index,
199                            TTI::TargetCostKind CostKind) const override;
200 
201   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
202                                  const Instruction *I = nullptr) const override;
203 
204   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
205                                      TTI::TargetCostKind CostKind,
206                                      unsigned Index, const Value *Op0,
207                                      const Value *Op1) const override;
208 
209   /// \param ScalarUserAndIdx encodes the information about extracts from a
210   /// vector with 'Scalar' being the value being extracted,'User' being the user
211   /// of the extract(nullptr if user is not known before vectorization) and
212   /// 'Idx' being the extract lane.
213   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
214                                      TTI::TargetCostKind CostKind,
215                                      unsigned Index, Value *Scalar,
216                                      ArrayRef<std::tuple<Value *, User *, int>>
217                                          ScalarUserAndIdx) const override;
218 
219   InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
220                                      TTI::TargetCostKind CostKind,
221                                      unsigned Index) const override;
222 
223   InstructionCost
224   getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
225                          TTI::TargetCostKind CostKind) const override;
226 
227   InstructionCost
228   getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy,
229                                 TTI::TargetCostKind CostKind) const;
230 
231   InstructionCost getSpliceCost(VectorType *Tp, int Index,
232                                 TTI::TargetCostKind CostKind) const;
233 
234   InstructionCost getArithmeticInstrCost(
235       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
236       TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
237       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
238       ArrayRef<const Value *> Args = {},
239       const Instruction *CxtI = nullptr) const override;
240 
241   InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
242                                             const SCEV *Ptr) const override;
243 
244   InstructionCost getCmpSelInstrCost(
245       unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
246       TTI::TargetCostKind CostKind,
247       TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
248       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
249       const Instruction *I = nullptr) const override;
250 
251   TTI::MemCmpExpansionOptions
252   enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override;
253   bool useNeonVector(const Type *Ty) const;
254 
255   InstructionCost getMemoryOpCost(
256       unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
257       TTI::TargetCostKind CostKind,
258       TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
259       const Instruction *I = nullptr) const override;
260 
261   InstructionCost
262   getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const override;
263 
264   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
265                                TTI::UnrollingPreferences &UP,
266                                OptimizationRemarkEmitter *ORE) const override;
267 
268   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
269                              TTI::PeelingPreferences &PP) const override;
270 
271   Value *
272   getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType,
273                                     bool CanCreate = true) const override;
274 
275   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
276                           MemIntrinsicInfo &Info) const override;
277 
isElementTypeLegalForScalableVector(Type * Ty)278   bool isElementTypeLegalForScalableVector(Type *Ty) const override {
279     if (Ty->isPointerTy())
280       return true;
281 
282     if (Ty->isBFloatTy() && ST->hasBF16())
283       return true;
284 
285     if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
286       return true;
287 
288     if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
289         Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
290       return true;
291 
292     return false;
293   }
294 
isLegalMaskedLoadStore(Type * DataType,Align Alignment)295   bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const {
296     if (!ST->hasSVE())
297       return false;
298 
299     // For fixed vectors, avoid scalarization if using SVE for them.
300     if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors() &&
301         DataType->getPrimitiveSizeInBits() != 128)
302       return false; // Fall back to scalarization of masked operations.
303 
304     return isElementTypeLegalForScalableVector(DataType->getScalarType());
305   }
306 
isLegalMaskedLoad(Type * DataType,Align Alignment,unsigned)307   bool isLegalMaskedLoad(Type *DataType, Align Alignment,
308                          unsigned /*AddressSpace*/) const override {
309     return isLegalMaskedLoadStore(DataType, Alignment);
310   }
311 
isLegalMaskedStore(Type * DataType,Align Alignment,unsigned)312   bool isLegalMaskedStore(Type *DataType, Align Alignment,
313                           unsigned /*AddressSpace*/) const override {
314     return isLegalMaskedLoadStore(DataType, Alignment);
315   }
316 
isLegalMaskedGatherScatter(Type * DataType)317   bool isLegalMaskedGatherScatter(Type *DataType) const {
318     if (!ST->isSVEAvailable())
319       return false;
320 
321     // For fixed vectors, scalarize if not using SVE for them.
322     auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType);
323     if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
324                          DataTypeFVTy->getNumElements() < 2))
325       return false;
326 
327     return isElementTypeLegalForScalableVector(DataType->getScalarType());
328   }
329 
isLegalMaskedGather(Type * DataType,Align Alignment)330   bool isLegalMaskedGather(Type *DataType, Align Alignment) const override {
331     return isLegalMaskedGatherScatter(DataType);
332   }
333 
isLegalMaskedScatter(Type * DataType,Align Alignment)334   bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override {
335     return isLegalMaskedGatherScatter(DataType);
336   }
337 
isLegalBroadcastLoad(Type * ElementTy,ElementCount NumElements)338   bool isLegalBroadcastLoad(Type *ElementTy,
339                             ElementCount NumElements) const override {
340     // Return true if we can generate a `ld1r` splat load instruction.
341     if (!ST->hasNEON() || NumElements.isScalable())
342       return false;
343     switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) {
344     case 8:
345     case 16:
346     case 32:
347     case 64: {
348       // We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
349       unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
350       return VectorBits >= 64;
351     }
352     }
353     return false;
354   }
355 
isLegalNTStoreLoad(Type * DataType,Align Alignment)356   bool isLegalNTStoreLoad(Type *DataType, Align Alignment) const {
357     // NOTE: The logic below is mostly geared towards LV, which calls it with
358     //       vectors with 2 elements. We might want to improve that, if other
359     //       users show up.
360     // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if
361     // the vector can be halved so that each half fits into a register. That's
362     // the case if the element type fits into a register and the number of
363     // elements is a power of 2 > 1.
364     if (auto *DataTypeTy = dyn_cast<FixedVectorType>(DataType)) {
365       unsigned NumElements = DataTypeTy->getNumElements();
366       unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();
367       return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
368              EltSize <= 128 && isPowerOf2_64(EltSize);
369     }
370     return BaseT::isLegalNTStore(DataType, Alignment);
371   }
372 
isLegalNTStore(Type * DataType,Align Alignment)373   bool isLegalNTStore(Type *DataType, Align Alignment) const override {
374     return isLegalNTStoreLoad(DataType, Alignment);
375   }
376 
isLegalNTLoad(Type * DataType,Align Alignment)377   bool isLegalNTLoad(Type *DataType, Align Alignment) const override {
378     // Only supports little-endian targets.
379     if (ST->isLittleEndian())
380       return isLegalNTStoreLoad(DataType, Alignment);
381     return BaseT::isLegalNTLoad(DataType, Alignment);
382   }
383 
384   InstructionCost getPartialReductionCost(
385       unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
386       ElementCount VF, TTI::PartialReductionExtendKind OpAExtend,
387       TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
388       TTI::TargetCostKind CostKind) const override;
389 
enableOrderedReductions()390   bool enableOrderedReductions() const override { return true; }
391 
392   InstructionCost getInterleavedMemoryOpCost(
393       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
394       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
395       bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
396 
397   bool shouldConsiderAddressTypePromotion(
398       const Instruction &I,
399       bool &AllowPromotionWithoutCommonHeader) const override;
400 
shouldExpandReduction(const IntrinsicInst * II)401   bool shouldExpandReduction(const IntrinsicInst *II) const override {
402     return false;
403   }
404 
getGISelRematGlobalCost()405   unsigned getGISelRematGlobalCost() const override { return 2; }
406 
getMinTripCountTailFoldingThreshold()407   unsigned getMinTripCountTailFoldingThreshold() const override {
408     return ST->hasSVE() ? 5 : 0;
409   }
410 
411   TailFoldingStyle
getPreferredTailFoldingStyle(bool IVUpdateMayOverflow)412   getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override {
413     if (ST->hasSVE())
414       return IVUpdateMayOverflow
415                  ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck
416                  : TailFoldingStyle::DataAndControlFlow;
417 
418     return TailFoldingStyle::DataWithoutLaneMask;
419   }
420 
421   bool preferFixedOverScalableIfEqualCost() const override;
422 
423   unsigned getEpilogueVectorizationMinVF() const override;
424 
425   bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override;
426 
supportsScalableVectors()427   bool supportsScalableVectors() const override {
428     return ST->isSVEorStreamingSVEAvailable();
429   }
430 
431   bool enableScalableVectorization() const override;
432 
433   bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
434                                    ElementCount VF) const override;
435 
preferPredicatedReductionSelect()436   bool preferPredicatedReductionSelect() const override { return ST->hasSVE(); }
437 
438   InstructionCost
439   getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
440                              std::optional<FastMathFlags> FMF,
441                              TTI::TargetCostKind CostKind) const override;
442 
443   InstructionCost
444   getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
445                            VectorType *ValTy, std::optional<FastMathFlags> FMF,
446                            TTI::TargetCostKind CostKind) const override;
447 
448   InstructionCost getMulAccReductionCost(
449       bool IsUnsigned, Type *ResTy, VectorType *Ty,
450       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const override;
451 
452   InstructionCost
453   getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
454                  ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
455                  VectorType *SubTp, ArrayRef<const Value *> Args = {},
456                  const Instruction *CxtI = nullptr) const override;
457 
458   InstructionCost getScalarizationOverhead(
459       VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
460       TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
461       ArrayRef<Value *> VL = {}) const override;
462 
463   /// Return the cost of the scaling factor used in the addressing
464   /// mode represented by AM for this target, for a load/store
465   /// of the specified type.
466   /// If the AM is supported, the return value must be >= 0.
467   /// If the AM is not supported, it returns an invalid cost.
468   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
469                                        StackOffset BaseOffset, bool HasBaseReg,
470                                        int64_t Scale,
471                                        unsigned AddrSpace) const override;
472 
enableSelectOptimize()473   bool enableSelectOptimize() const override {
474     return ST->enableSelectOptimize();
475   }
476 
477   bool shouldTreatInstructionLikeSelect(const Instruction *I) const override;
478 
getStoreMinimumVF(unsigned VF,Type * ScalarMemTy,Type * ScalarValTy)479   unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
480                              Type *ScalarValTy) const override {
481     // We can vectorize store v4i8.
482     if (ScalarMemTy->isIntegerTy(8) && isPowerOf2_32(VF) && VF >= 4)
483       return 4;
484 
485     return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
486   }
487 
getMinPageSize()488   std::optional<unsigned> getMinPageSize() const override { return 4096; }
489 
490   bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
491                      const TargetTransformInfo::LSRCost &C2) const override;
492 
493   bool isProfitableToSinkOperands(Instruction *I,
494                                   SmallVectorImpl<Use *> &Ops) const override;
495   /// @}
496 };
497 
498 } // end namespace llvm
499 
500 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
501