1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file a TargetTransformInfo::Concept conforming object specific to the 10 /// AArch64 target machine. It uses the target's detailed information to 11 /// provide more precise answers to certain TTI queries, while letting the 12 /// target independent and default TTI implementations handle the rest. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 18 19 #include "AArch64.h" 20 #include "AArch64Subtarget.h" 21 #include "AArch64TargetMachine.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Analysis/TargetTransformInfo.h" 24 #include "llvm/CodeGen/BasicTTIImpl.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/IR/Intrinsics.h" 27 #include <cstdint> 28 29 namespace llvm { 30 31 class APInt; 32 class Instruction; 33 class IntrinsicInst; 34 class Loop; 35 class SCEV; 36 class ScalarEvolution; 37 class Type; 38 class Value; 39 class VectorType; 40 41 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> { 42 using BaseT = BasicTTIImplBase<AArch64TTIImpl>; 43 using TTI = TargetTransformInfo; 44 45 friend BaseT; 46 47 const AArch64Subtarget *ST; 48 const AArch64TargetLowering *TLI; 49 50 const AArch64Subtarget *getST() const { return ST; } 51 const AArch64TargetLowering *getTLI() const { return TLI; } 52 53 enum MemIntrinsicType { 54 VECTOR_LDST_TWO_ELEMENTS, 55 VECTOR_LDST_THREE_ELEMENTS, 56 VECTOR_LDST_FOUR_ELEMENTS 57 }; 58 59 bool isWideningInstruction(Type *Ty, unsigned Opcode, 60 ArrayRef<const Value *> Args); 61 62 public: 63 explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) 64 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), 65 TLI(ST->getTargetLowering()) {} 66 67 bool areInlineCompatible(const Function *Caller, 68 const Function *Callee) const; 69 70 /// \name Scalar TTI Implementations 71 /// @{ 72 73 using BaseT::getIntImmCost; 74 int getIntImmCost(int64_t Val); 75 int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); 76 int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, 77 Type *Ty, TTI::TargetCostKind CostKind, 78 Instruction *Inst = nullptr); 79 int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 80 Type *Ty, TTI::TargetCostKind CostKind); 81 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); 82 83 /// @} 84 85 /// \name Vector TTI Implementations 86 /// @{ 87 88 bool enableInterleavedAccessVectorization() { return true; } 89 90 unsigned getNumberOfRegisters(unsigned ClassID) const { 91 bool Vector = (ClassID == 1); 92 if (Vector) { 93 if (ST->hasNEON()) 94 return 32; 95 return 0; 96 } 97 return 31; 98 } 99 100 unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 101 TTI::TargetCostKind CostKind); 102 103 unsigned getRegisterBitWidth(bool Vector) const { 104 if (Vector) { 105 if (ST->hasSVE()) 106 return std::max(ST->getMinSVEVectorSizeInBits(), 128u); 107 if (ST->hasNEON()) 108 return 128; 109 return 0; 110 } 111 return 64; 112 } 113 114 unsigned getMinVectorRegisterBitWidth() { 115 return ST->getMinVectorRegisterBitWidth(); 116 } 117 118 Optional<unsigned> getMaxVScale() const { 119 if (ST->hasSVE()) 120 return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock; 121 return BaseT::getMaxVScale(); 122 } 123 124 unsigned getMaxInterleaveFactor(unsigned VF); 125 126 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 127 const Value *Ptr, bool VariableMask, 128 Align Alignment, TTI::TargetCostKind CostKind, 129 const Instruction *I = nullptr); 130 131 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 132 TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, 133 const Instruction *I = nullptr); 134 135 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, 136 unsigned Index); 137 138 unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); 139 140 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); 141 142 int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, 143 bool IsPairwise, bool IsUnsigned, 144 TTI::TargetCostKind CostKind); 145 146 int getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, 147 bool IsPairwiseForm, 148 TTI::TargetCostKind CostKind); 149 150 int getArithmeticInstrCost( 151 unsigned Opcode, Type *Ty, 152 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 153 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, 154 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, 155 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 156 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, 157 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 158 const Instruction *CxtI = nullptr); 159 160 int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr); 161 162 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 163 CmpInst::Predicate VecPred, 164 TTI::TargetCostKind CostKind, 165 const Instruction *I = nullptr); 166 167 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 168 bool IsZeroCmp) const; 169 bool useNeonVector(const Type *Ty) const; 170 171 int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, 172 unsigned AddressSpace, 173 TTI::TargetCostKind CostKind, 174 const Instruction *I = nullptr); 175 176 int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); 177 178 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 179 TTI::UnrollingPreferences &UP); 180 181 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 182 TTI::PeelingPreferences &PP); 183 184 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 185 Type *ExpectedType); 186 187 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); 188 189 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) { 190 if (!isa<ScalableVectorType>(DataType) || !ST->hasSVE()) 191 return false; 192 193 Type *Ty = cast<ScalableVectorType>(DataType)->getElementType(); 194 if (Ty->isPointerTy()) 195 return true; 196 197 if (Ty->isBFloatTy() || Ty->isHalfTy() || 198 Ty->isFloatTy() || Ty->isDoubleTy()) 199 return true; 200 201 if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) || 202 Ty->isIntegerTy(32) || Ty->isIntegerTy(64)) 203 return true; 204 205 return false; 206 } 207 208 bool isLegalMaskedLoad(Type *DataType, Align Alignment) { 209 return isLegalMaskedLoadStore(DataType, Alignment); 210 } 211 212 bool isLegalMaskedStore(Type *DataType, Align Alignment) { 213 return isLegalMaskedLoadStore(DataType, Alignment); 214 } 215 216 bool isLegalNTStore(Type *DataType, Align Alignment) { 217 // NOTE: The logic below is mostly geared towards LV, which calls it with 218 // vectors with 2 elements. We might want to improve that, if other 219 // users show up. 220 // Nontemporal vector stores can be directly lowered to STNP, if the vector 221 // can be halved so that each half fits into a register. That's the case if 222 // the element type fits into a register and the number of elements is a 223 // power of 2 > 1. 224 if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) { 225 unsigned NumElements = 226 cast<FixedVectorType>(DataTypeVTy)->getNumElements(); 227 unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits(); 228 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 && 229 EltSize <= 128 && isPowerOf2_64(EltSize); 230 } 231 return BaseT::isLegalNTStore(DataType, Alignment); 232 } 233 234 int getInterleavedMemoryOpCost( 235 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 236 Align Alignment, unsigned AddressSpace, 237 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, 238 bool UseMaskForCond = false, bool UseMaskForGaps = false); 239 240 bool 241 shouldConsiderAddressTypePromotion(const Instruction &I, 242 bool &AllowPromotionWithoutCommonHeader); 243 244 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 245 246 unsigned getGISelRematGlobalCost() const { 247 return 2; 248 } 249 250 bool supportsScalableVectors() const { return ST->hasSVE(); } 251 252 bool useReductionIntrinsic(unsigned Opcode, Type *Ty, 253 TTI::ReductionFlags Flags) const; 254 255 int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, 256 bool IsPairwiseForm, 257 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); 258 259 int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, 260 VectorType *SubTp); 261 /// @} 262 }; 263 264 } // end namespace llvm 265 266 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 267