xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h (revision 370e009188ba90c3290b1479aa06ec98b66e140a)
1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file a TargetTransformInfo::Concept conforming object specific to the
10 /// AArch64 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
18 
19 #include "AArch64.h"
20 #include "AArch64Subtarget.h"
21 #include "AArch64TargetMachine.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/CodeGen/BasicTTIImpl.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include <cstdint>
28 
29 namespace llvm {
30 
31 class APInt;
32 class Instruction;
33 class IntrinsicInst;
34 class Loop;
35 class SCEV;
36 class ScalarEvolution;
37 class Type;
38 class Value;
39 class VectorType;
40 
41 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
42   using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
43   using TTI = TargetTransformInfo;
44 
45   friend BaseT;
46 
47   const AArch64Subtarget *ST;
48   const AArch64TargetLowering *TLI;
49 
50   const AArch64Subtarget *getST() const { return ST; }
51   const AArch64TargetLowering *getTLI() const { return TLI; }
52 
53   enum MemIntrinsicType {
54     VECTOR_LDST_TWO_ELEMENTS,
55     VECTOR_LDST_THREE_ELEMENTS,
56     VECTOR_LDST_FOUR_ELEMENTS
57   };
58 
59   bool isWideningInstruction(Type *Ty, unsigned Opcode,
60                              ArrayRef<const Value *> Args);
61 
62 public:
63   explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
64       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
65         TLI(ST->getTargetLowering()) {}
66 
67   bool areInlineCompatible(const Function *Caller,
68                            const Function *Callee) const;
69 
70   /// \name Scalar TTI Implementations
71   /// @{
72 
73   using BaseT::getIntImmCost;
74   InstructionCost getIntImmCost(int64_t Val);
75   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
76                                 TTI::TargetCostKind CostKind);
77   InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
78                                     const APInt &Imm, Type *Ty,
79                                     TTI::TargetCostKind CostKind,
80                                     Instruction *Inst = nullptr);
81   InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
82                                       const APInt &Imm, Type *Ty,
83                                       TTI::TargetCostKind CostKind);
84   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
85 
86   /// @}
87 
88   /// \name Vector TTI Implementations
89   /// @{
90 
91   bool enableInterleavedAccessVectorization() { return true; }
92 
93   unsigned getNumberOfRegisters(unsigned ClassID) const {
94     bool Vector = (ClassID == 1);
95     if (Vector) {
96       if (ST->hasNEON())
97         return 32;
98       return 0;
99     }
100     return 31;
101   }
102 
103   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
104                                         TTI::TargetCostKind CostKind);
105 
106   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
107                                                IntrinsicInst &II) const;
108 
109   Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
110       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
111       APInt &UndefElts2, APInt &UndefElts3,
112       std::function<void(Instruction *, unsigned, APInt, APInt &)>
113           SimplifyAndSetOp) const;
114 
115   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
116     switch (K) {
117     case TargetTransformInfo::RGK_Scalar:
118       return TypeSize::getFixed(64);
119     case TargetTransformInfo::RGK_FixedWidthVector:
120       if (ST->hasSVE())
121         return TypeSize::getFixed(
122             std::max(ST->getMinSVEVectorSizeInBits(), 128u));
123       return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
124     case TargetTransformInfo::RGK_ScalableVector:
125       return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
126     }
127     llvm_unreachable("Unsupported register kind");
128   }
129 
130   unsigned getMinVectorRegisterBitWidth() const {
131     return ST->getMinVectorRegisterBitWidth();
132   }
133 
134   Optional<unsigned> getVScaleForTuning() const {
135     return ST->getVScaleForTuning();
136   }
137 
138   bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
139 
140   /// Try to return an estimate cost factor that can be used as a multiplier
141   /// when scalarizing an operation for a vector with ElementCount \p VF.
142   /// For scalable vectors this currently takes the most pessimistic view based
143   /// upon the maximum possible value for vscale.
144   unsigned getMaxNumElements(ElementCount VF) const {
145     if (!VF.isScalable())
146       return VF.getFixedValue();
147 
148     return VF.getKnownMinValue() * ST->getVScaleForTuning();
149   }
150 
151   unsigned getMaxInterleaveFactor(unsigned VF);
152 
153   bool prefersVectorizedAddressing() const;
154 
155   InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
156                                         Align Alignment, unsigned AddressSpace,
157                                         TTI::TargetCostKind CostKind);
158 
159   InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
160                                          const Value *Ptr, bool VariableMask,
161                                          Align Alignment,
162                                          TTI::TargetCostKind CostKind,
163                                          const Instruction *I = nullptr);
164 
165   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
166                                    TTI::CastContextHint CCH,
167                                    TTI::TargetCostKind CostKind,
168                                    const Instruction *I = nullptr);
169 
170   InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
171                                            VectorType *VecTy, unsigned Index);
172 
173   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
174                                  const Instruction *I = nullptr);
175 
176   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
177                                      unsigned Index);
178 
179   InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
180                                          bool IsUnsigned,
181                                          TTI::TargetCostKind CostKind);
182 
183   InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
184                                                 VectorType *ValTy,
185                                                 TTI::TargetCostKind CostKind);
186 
187   InstructionCost getSpliceCost(VectorType *Tp, int Index);
188 
189   InstructionCost getArithmeticInstrCost(
190       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
191       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
192       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
193       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
194       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
195       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
196       const Instruction *CxtI = nullptr);
197 
198   InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
199                                             const SCEV *Ptr);
200 
201   InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
202                                      CmpInst::Predicate VecPred,
203                                      TTI::TargetCostKind CostKind,
204                                      const Instruction *I = nullptr);
205 
206   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
207                                                     bool IsZeroCmp) const;
208   bool useNeonVector(const Type *Ty) const;
209 
210   InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
211                                   MaybeAlign Alignment, unsigned AddressSpace,
212                                   TTI::TargetCostKind CostKind,
213                                   const Instruction *I = nullptr);
214 
215   InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
216 
217   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
218                                TTI::UnrollingPreferences &UP,
219                                OptimizationRemarkEmitter *ORE);
220 
221   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
222                              TTI::PeelingPreferences &PP);
223 
224   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
225                                            Type *ExpectedType);
226 
227   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
228 
229   bool isElementTypeLegalForScalableVector(Type *Ty) const {
230     if (Ty->isPointerTy())
231       return true;
232 
233     if (Ty->isBFloatTy() && ST->hasBF16())
234       return true;
235 
236     if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
237       return true;
238 
239     if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
240         Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
241       return true;
242 
243     return false;
244   }
245 
246   bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
247     if (!ST->hasSVE())
248       return false;
249 
250     // For fixed vectors, avoid scalarization if using SVE for them.
251     if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
252       return false; // Fall back to scalarization of masked operations.
253 
254     return isElementTypeLegalForScalableVector(DataType->getScalarType());
255   }
256 
257   bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
258     return isLegalMaskedLoadStore(DataType, Alignment);
259   }
260 
261   bool isLegalMaskedStore(Type *DataType, Align Alignment) {
262     return isLegalMaskedLoadStore(DataType, Alignment);
263   }
264 
265   bool isLegalMaskedGatherScatter(Type *DataType) const {
266     if (!ST->hasSVE())
267       return false;
268 
269     // For fixed vectors, scalarize if not using SVE for them.
270     auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType);
271     if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
272                          DataTypeFVTy->getNumElements() < 2))
273       return false;
274 
275     return isElementTypeLegalForScalableVector(DataType->getScalarType());
276   }
277 
278   bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
279     return isLegalMaskedGatherScatter(DataType);
280   }
281   bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
282     return isLegalMaskedGatherScatter(DataType);
283   }
284 
285   bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
286     // Return true if we can generate a `ld1r` splat load instruction.
287     if (!ST->hasNEON() || NumElements.isScalable())
288       return false;
289     switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) {
290     case 8:
291     case 16:
292     case 32:
293     case 64: {
294       // We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
295       unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
296       return VectorBits >= 64;
297     }
298     }
299     return false;
300   }
301 
302   bool isLegalNTStore(Type *DataType, Align Alignment) {
303     // NOTE: The logic below is mostly geared towards LV, which calls it with
304     //       vectors with 2 elements. We might want to improve that, if other
305     //       users show up.
306     // Nontemporal vector stores can be directly lowered to STNP, if the vector
307     // can be halved so that each half fits into a register. That's the case if
308     // the element type fits into a register and the number of elements is a
309     // power of 2 > 1.
310     if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
311       unsigned NumElements =
312           cast<FixedVectorType>(DataTypeVTy)->getNumElements();
313       unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
314       return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
315              EltSize <= 128 && isPowerOf2_64(EltSize);
316     }
317     return BaseT::isLegalNTStore(DataType, Alignment);
318   }
319 
320   bool enableOrderedReductions() const { return true; }
321 
322   InstructionCost getInterleavedMemoryOpCost(
323       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
324       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
325       bool UseMaskForCond = false, bool UseMaskForGaps = false);
326 
327   bool
328   shouldConsiderAddressTypePromotion(const Instruction &I,
329                                      bool &AllowPromotionWithoutCommonHeader);
330 
331   bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
332 
333   unsigned getGISelRematGlobalCost() const {
334     return 2;
335   }
336 
337   unsigned getMinTripCountTailFoldingThreshold() const {
338     return ST->hasSVE() ? 5 : 0;
339   }
340 
341   PredicationStyle emitGetActiveLaneMask() const {
342     if (ST->hasSVE())
343       return PredicationStyle::DataAndControlFlow;
344     return PredicationStyle::None;
345   }
346 
347   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
348                                    AssumptionCache &AC, TargetLibraryInfo *TLI,
349                                    DominatorTree *DT,
350                                    LoopVectorizationLegality *LVL);
351 
352   bool supportsScalableVectors() const { return ST->hasSVE(); }
353 
354   bool enableScalableVectorization() const { return ST->hasSVE(); }
355 
356   bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
357                                    ElementCount VF) const;
358 
359   bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
360                                        TTI::ReductionFlags Flags) const {
361     return ST->hasSVE();
362   }
363 
364   InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
365                                              Optional<FastMathFlags> FMF,
366                                              TTI::TargetCostKind CostKind);
367 
368   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
369                                  ArrayRef<int> Mask, int Index,
370                                  VectorType *SubTp,
371                                  ArrayRef<const Value *> Args = None);
372   /// @}
373 };
374 
375 } // end namespace llvm
376 
377 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
378