xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h (revision ba3c1f5972d7b90feb6e6da47905ff2757e0fe57)
1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file a TargetTransformInfo::Concept conforming object specific to the
10 /// AArch64 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
18 
19 #include "AArch64.h"
20 #include "AArch64Subtarget.h"
21 #include "AArch64TargetMachine.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/CodeGen/BasicTTIImpl.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include <cstdint>
28 #include <optional>
29 
30 namespace llvm {
31 
32 class APInt;
33 class Instruction;
34 class IntrinsicInst;
35 class Loop;
36 class SCEV;
37 class ScalarEvolution;
38 class Type;
39 class Value;
40 class VectorType;
41 
42 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
43   using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
44   using TTI = TargetTransformInfo;
45 
46   friend BaseT;
47 
48   const AArch64Subtarget *ST;
49   const AArch64TargetLowering *TLI;
50 
51   const AArch64Subtarget *getST() const { return ST; }
52   const AArch64TargetLowering *getTLI() const { return TLI; }
53 
54   enum MemIntrinsicType {
55     VECTOR_LDST_TWO_ELEMENTS,
56     VECTOR_LDST_THREE_ELEMENTS,
57     VECTOR_LDST_FOUR_ELEMENTS
58   };
59 
60   bool isWideningInstruction(Type *Ty, unsigned Opcode,
61                              ArrayRef<const Value *> Args);
62 
63   // A helper function called by 'getVectorInstrCost'.
64   //
65   // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
66   // indicates whether the vector instruction is available in the input IR or
67   // just imaginary in vectorizer passes.
68   InstructionCost getVectorInstrCostHelper(Type *Val, unsigned Index,
69                                            bool HasRealUse);
70 
71 public:
72   explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
73       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
74         TLI(ST->getTargetLowering()) {}
75 
76   bool areInlineCompatible(const Function *Caller,
77                            const Function *Callee) const;
78 
79   /// \name Scalar TTI Implementations
80   /// @{
81 
82   using BaseT::getIntImmCost;
83   InstructionCost getIntImmCost(int64_t Val);
84   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
85                                 TTI::TargetCostKind CostKind);
86   InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
87                                     const APInt &Imm, Type *Ty,
88                                     TTI::TargetCostKind CostKind,
89                                     Instruction *Inst = nullptr);
90   InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
91                                       const APInt &Imm, Type *Ty,
92                                       TTI::TargetCostKind CostKind);
93   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
94 
95   /// @}
96 
97   /// \name Vector TTI Implementations
98   /// @{
99 
100   bool enableInterleavedAccessVectorization() { return true; }
101 
102   unsigned getNumberOfRegisters(unsigned ClassID) const {
103     bool Vector = (ClassID == 1);
104     if (Vector) {
105       if (ST->hasNEON())
106         return 32;
107       return 0;
108     }
109     return 31;
110   }
111 
112   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
113                                         TTI::TargetCostKind CostKind);
114 
115   std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
116                                                     IntrinsicInst &II) const;
117 
118   std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
119       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
120       APInt &UndefElts2, APInt &UndefElts3,
121       std::function<void(Instruction *, unsigned, APInt, APInt &)>
122           SimplifyAndSetOp) const;
123 
124   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
125 
126   unsigned getMinVectorRegisterBitWidth() const {
127     return ST->getMinVectorRegisterBitWidth();
128   }
129 
130   std::optional<unsigned> getVScaleForTuning() const {
131     return ST->getVScaleForTuning();
132   }
133 
134   bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
135 
136   /// Try to return an estimate cost factor that can be used as a multiplier
137   /// when scalarizing an operation for a vector with ElementCount \p VF.
138   /// For scalable vectors this currently takes the most pessimistic view based
139   /// upon the maximum possible value for vscale.
140   unsigned getMaxNumElements(ElementCount VF) const {
141     if (!VF.isScalable())
142       return VF.getFixedValue();
143 
144     return VF.getKnownMinValue() * ST->getVScaleForTuning();
145   }
146 
147   unsigned getMaxInterleaveFactor(unsigned VF);
148 
149   bool prefersVectorizedAddressing() const;
150 
151   InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
152                                         Align Alignment, unsigned AddressSpace,
153                                         TTI::TargetCostKind CostKind);
154 
155   InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
156                                          const Value *Ptr, bool VariableMask,
157                                          Align Alignment,
158                                          TTI::TargetCostKind CostKind,
159                                          const Instruction *I = nullptr);
160 
161   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
162                                    TTI::CastContextHint CCH,
163                                    TTI::TargetCostKind CostKind,
164                                    const Instruction *I = nullptr);
165 
166   InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
167                                            VectorType *VecTy, unsigned Index);
168 
169   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
170                                  const Instruction *I = nullptr);
171 
172   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
173                                      TTI::TargetCostKind CostKind,
174                                      unsigned Index, Value *Op0, Value *Op1);
175   InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
176                                      TTI::TargetCostKind CostKind,
177                                      unsigned Index);
178 
179   InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
180                                          bool IsUnsigned,
181                                          TTI::TargetCostKind CostKind);
182 
183   InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
184                                                 VectorType *ValTy,
185                                                 TTI::TargetCostKind CostKind);
186 
187   InstructionCost getSpliceCost(VectorType *Tp, int Index);
188 
189   InstructionCost getArithmeticInstrCost(
190       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
191       TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
192       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
193       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
194       const Instruction *CxtI = nullptr);
195 
196   InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
197                                             const SCEV *Ptr);
198 
199   InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
200                                      CmpInst::Predicate VecPred,
201                                      TTI::TargetCostKind CostKind,
202                                      const Instruction *I = nullptr);
203 
204   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
205                                                     bool IsZeroCmp) const;
206   bool useNeonVector(const Type *Ty) const;
207 
208   InstructionCost
209   getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
210                   unsigned AddressSpace, TTI::TargetCostKind CostKind,
211                   TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
212                   const Instruction *I = nullptr);
213 
214   InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
215 
216   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
217                                TTI::UnrollingPreferences &UP,
218                                OptimizationRemarkEmitter *ORE);
219 
220   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
221                              TTI::PeelingPreferences &PP);
222 
223   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
224                                            Type *ExpectedType);
225 
226   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
227 
228   bool isElementTypeLegalForScalableVector(Type *Ty) const {
229     if (Ty->isPointerTy())
230       return true;
231 
232     if (Ty->isBFloatTy() && ST->hasBF16())
233       return true;
234 
235     if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
236       return true;
237 
238     if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
239         Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
240       return true;
241 
242     return false;
243   }
244 
245   bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
246     if (!ST->hasSVE())
247       return false;
248 
249     // For fixed vectors, avoid scalarization if using SVE for them.
250     if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
251       return false; // Fall back to scalarization of masked operations.
252 
253     return isElementTypeLegalForScalableVector(DataType->getScalarType());
254   }
255 
256   bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
257     return isLegalMaskedLoadStore(DataType, Alignment);
258   }
259 
260   bool isLegalMaskedStore(Type *DataType, Align Alignment) {
261     return isLegalMaskedLoadStore(DataType, Alignment);
262   }
263 
264   bool isLegalMaskedGatherScatter(Type *DataType) const {
265     if (!ST->hasSVE() || ST->forceStreamingCompatibleSVE())
266       return false;
267 
268     // For fixed vectors, scalarize if not using SVE for them.
269     auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType);
270     if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
271                          DataTypeFVTy->getNumElements() < 2))
272       return false;
273 
274     return isElementTypeLegalForScalableVector(DataType->getScalarType());
275   }
276 
277   bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
278     return isLegalMaskedGatherScatter(DataType);
279   }
280   bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
281     return isLegalMaskedGatherScatter(DataType);
282   }
283 
284   bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
285     // Return true if we can generate a `ld1r` splat load instruction.
286     if (!ST->hasNEON() || NumElements.isScalable())
287       return false;
288     switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) {
289     case 8:
290     case 16:
291     case 32:
292     case 64: {
293       // We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
294       unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
295       return VectorBits >= 64;
296     }
297     }
298     return false;
299   }
300 
301   bool isLegalNTStoreLoad(Type *DataType, Align Alignment) {
302     // NOTE: The logic below is mostly geared towards LV, which calls it with
303     //       vectors with 2 elements. We might want to improve that, if other
304     //       users show up.
305     // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if
306     // the vector can be halved so that each half fits into a register. That's
307     // the case if the element type fits into a register and the number of
308     // elements is a power of 2 > 1.
309     if (auto *DataTypeTy = dyn_cast<FixedVectorType>(DataType)) {
310       unsigned NumElements = DataTypeTy->getNumElements();
311       unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();
312       return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
313              EltSize <= 128 && isPowerOf2_64(EltSize);
314     }
315     return BaseT::isLegalNTStore(DataType, Alignment);
316   }
317 
318   bool isLegalNTStore(Type *DataType, Align Alignment) {
319     return isLegalNTStoreLoad(DataType, Alignment);
320   }
321 
322   bool isLegalNTLoad(Type *DataType, Align Alignment) {
323     // Only supports little-endian targets.
324     if (ST->isLittleEndian())
325       return isLegalNTStoreLoad(DataType, Alignment);
326     return BaseT::isLegalNTLoad(DataType, Alignment);
327   }
328 
329   bool enableOrderedReductions() const { return true; }
330 
331   InstructionCost getInterleavedMemoryOpCost(
332       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
333       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
334       bool UseMaskForCond = false, bool UseMaskForGaps = false);
335 
336   bool
337   shouldConsiderAddressTypePromotion(const Instruction &I,
338                                      bool &AllowPromotionWithoutCommonHeader);
339 
340   bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
341 
342   unsigned getGISelRematGlobalCost() const {
343     return 2;
344   }
345 
346   unsigned getMinTripCountTailFoldingThreshold() const {
347     return ST->hasSVE() ? 5 : 0;
348   }
349 
350   PredicationStyle emitGetActiveLaneMask() const {
351     if (ST->hasSVE())
352       return PredicationStyle::DataAndControlFlow;
353     return PredicationStyle::None;
354   }
355 
356   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
357                                    AssumptionCache &AC, TargetLibraryInfo *TLI,
358                                    DominatorTree *DT,
359                                    LoopVectorizationLegality *LVL,
360                                    InterleavedAccessInfo *IAI);
361 
362   bool supportsScalableVectors() const { return ST->hasSVE(); }
363 
364   bool enableScalableVectorization() const { return ST->hasSVE(); }
365 
366   bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
367                                    ElementCount VF) const;
368 
369   bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
370                                        TTI::ReductionFlags Flags) const {
371     return ST->hasSVE();
372   }
373 
374   InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
375                                              std::optional<FastMathFlags> FMF,
376                                              TTI::TargetCostKind CostKind);
377 
378   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
379                                  ArrayRef<int> Mask,
380                                  TTI::TargetCostKind CostKind, int Index,
381                                  VectorType *SubTp,
382                                  ArrayRef<const Value *> Args = std::nullopt);
383 
384   /// Return the cost of the scaling factor used in the addressing
385   /// mode represented by AM for this target, for a load/store
386   /// of the specified type.
387   /// If the AM is supported, the return value must be >= 0.
388   /// If the AM is not supported, it returns a negative value.
389   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
390                                        int64_t BaseOffset, bool HasBaseReg,
391                                        int64_t Scale, unsigned AddrSpace) const;
392   /// @}
393 
394   bool enableSelectOptimize() { return ST->enableSelectOptimize(); }
395 };
396 
397 } // end namespace llvm
398 
399 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
400