xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h (revision cfd6422a5217410fbd66f7a7a8a64d9d85e61229)
1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file a TargetTransformInfo::Concept conforming object specific to the
10 /// AArch64 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
18 
19 #include "AArch64.h"
20 #include "AArch64Subtarget.h"
21 #include "AArch64TargetMachine.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/CodeGen/BasicTTIImpl.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include <cstdint>
28 
29 namespace llvm {
30 
31 class APInt;
32 class Instruction;
33 class IntrinsicInst;
34 class Loop;
35 class SCEV;
36 class ScalarEvolution;
37 class Type;
38 class Value;
39 class VectorType;
40 
41 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
42   using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
43   using TTI = TargetTransformInfo;
44 
45   friend BaseT;
46 
47   const AArch64Subtarget *ST;
48   const AArch64TargetLowering *TLI;
49 
50   const AArch64Subtarget *getST() const { return ST; }
51   const AArch64TargetLowering *getTLI() const { return TLI; }
52 
53   enum MemIntrinsicType {
54     VECTOR_LDST_TWO_ELEMENTS,
55     VECTOR_LDST_THREE_ELEMENTS,
56     VECTOR_LDST_FOUR_ELEMENTS
57   };
58 
59   bool isWideningInstruction(Type *Ty, unsigned Opcode,
60                              ArrayRef<const Value *> Args);
61 
62 public:
63   explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
64       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
65         TLI(ST->getTargetLowering()) {}
66 
67   bool areInlineCompatible(const Function *Caller,
68                            const Function *Callee) const;
69 
70   /// \name Scalar TTI Implementations
71   /// @{
72 
73   using BaseT::getIntImmCost;
74   int getIntImmCost(int64_t Val);
75   int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
76   int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
77                         Type *Ty, TTI::TargetCostKind CostKind);
78   int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
79                           Type *Ty, TTI::TargetCostKind CostKind);
80   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
81 
82   /// @}
83 
84   /// \name Vector TTI Implementations
85   /// @{
86 
87   bool enableInterleavedAccessVectorization() { return true; }
88 
89   unsigned getNumberOfRegisters(unsigned ClassID) const {
90     bool Vector = (ClassID == 1);
91     if (Vector) {
92       if (ST->hasNEON())
93         return 32;
94       return 0;
95     }
96     return 31;
97   }
98 
99   unsigned getRegisterBitWidth(bool Vector) const {
100     if (Vector) {
101       if (ST->hasSVE())
102         return std::max(ST->getMinSVEVectorSizeInBits(), 128u);
103       if (ST->hasNEON())
104         return 128;
105       return 0;
106     }
107     return 64;
108   }
109 
110   unsigned getMinVectorRegisterBitWidth() {
111     return ST->getMinVectorRegisterBitWidth();
112   }
113 
114   unsigned getMaxInterleaveFactor(unsigned VF);
115 
116   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
117                        TTI::TargetCostKind CostKind,
118                        const Instruction *I = nullptr);
119 
120   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
121                                unsigned Index);
122 
123   unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
124 
125   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
126 
127   int getArithmeticInstrCost(
128       unsigned Opcode, Type *Ty,
129       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
130       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
131       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
132       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
133       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
134       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
135       const Instruction *CxtI = nullptr);
136 
137   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
138 
139   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
140                          TTI::TargetCostKind CostKind,
141                          const Instruction *I = nullptr);
142 
143   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
144                                                     bool IsZeroCmp) const;
145 
146   int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
147                       unsigned AddressSpace,
148                       TTI::TargetCostKind CostKind,
149                       const Instruction *I = nullptr);
150 
151   int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
152 
153   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
154                                TTI::UnrollingPreferences &UP);
155 
156   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
157                              TTI::PeelingPreferences &PP);
158 
159   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
160                                            Type *ExpectedType);
161 
162   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
163 
164   bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
165     if (!isa<ScalableVectorType>(DataType) || !ST->hasSVE())
166       return false;
167 
168     Type *Ty = cast<ScalableVectorType>(DataType)->getElementType();
169     if (Ty->isBFloatTy() || Ty->isHalfTy() ||
170         Ty->isFloatTy() || Ty->isDoubleTy())
171       return true;
172 
173     if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
174         Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
175       return true;
176 
177     return false;
178   }
179 
180   bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
181     return isLegalMaskedLoadStore(DataType, Alignment);
182   }
183 
184   bool isLegalMaskedStore(Type *DataType, Align Alignment) {
185     return isLegalMaskedLoadStore(DataType, Alignment);
186   }
187 
188   bool isLegalNTStore(Type *DataType, Align Alignment) {
189     // NOTE: The logic below is mostly geared towards LV, which calls it with
190     //       vectors with 2 elements. We might want to improve that, if other
191     //       users show up.
192     // Nontemporal vector stores can be directly lowered to STNP, if the vector
193     // can be halved so that each half fits into a register. That's the case if
194     // the element type fits into a register and the number of elements is a
195     // power of 2 > 1.
196     if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
197       unsigned NumElements =
198           cast<FixedVectorType>(DataTypeVTy)->getNumElements();
199       unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
200       return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
201              EltSize <= 128 && isPowerOf2_64(EltSize);
202     }
203     return BaseT::isLegalNTStore(DataType, Alignment);
204   }
205 
206   int getInterleavedMemoryOpCost(
207       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
208       Align Alignment, unsigned AddressSpace,
209       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
210       bool UseMaskForCond = false, bool UseMaskForGaps = false);
211 
212   bool
213   shouldConsiderAddressTypePromotion(const Instruction &I,
214                                      bool &AllowPromotionWithoutCommonHeader);
215 
216   bool shouldExpandReduction(const IntrinsicInst *II) const {
217     switch (II->getIntrinsicID()) {
218     case Intrinsic::experimental_vector_reduce_v2_fadd:
219     case Intrinsic::experimental_vector_reduce_v2_fmul:
220       // We don't have legalization support for ordered FP reductions.
221       return !II->getFastMathFlags().allowReassoc();
222 
223     case Intrinsic::experimental_vector_reduce_fmax:
224     case Intrinsic::experimental_vector_reduce_fmin:
225       // Lowering asserts that there are no NaNs.
226       return !II->getFastMathFlags().noNaNs();
227 
228     default:
229       // Don't expand anything else, let legalization deal with it.
230       return false;
231     }
232   }
233 
234   unsigned getGISelRematGlobalCost() const {
235     return 2;
236   }
237 
238   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
239                              TTI::ReductionFlags Flags) const;
240 
241   int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
242                                  bool IsPairwiseForm,
243                                  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
244 
245   int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
246                      VectorType *SubTp);
247   /// @}
248 };
249 
250 } // end namespace llvm
251 
252 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
253