1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file a TargetTransformInfo::Concept conforming object specific to the 10 /// AArch64 target machine. It uses the target's detailed information to 11 /// provide more precise answers to certain TTI queries, while letting the 12 /// target independent and default TTI implementations handle the rest. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 18 19 #include "AArch64.h" 20 #include "AArch64Subtarget.h" 21 #include "AArch64TargetMachine.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Analysis/TargetTransformInfo.h" 24 #include "llvm/CodeGen/BasicTTIImpl.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/IR/Intrinsics.h" 27 #include <cstdint> 28 29 namespace llvm { 30 31 class APInt; 32 class Instruction; 33 class IntrinsicInst; 34 class Loop; 35 class SCEV; 36 class ScalarEvolution; 37 class Type; 38 class Value; 39 class VectorType; 40 41 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> { 42 using BaseT = BasicTTIImplBase<AArch64TTIImpl>; 43 using TTI = TargetTransformInfo; 44 45 friend BaseT; 46 47 const AArch64Subtarget *ST; 48 const AArch64TargetLowering *TLI; 49 50 const AArch64Subtarget *getST() const { return ST; } 51 const AArch64TargetLowering *getTLI() const { return TLI; } 52 53 enum MemIntrinsicType { 54 VECTOR_LDST_TWO_ELEMENTS, 55 VECTOR_LDST_THREE_ELEMENTS, 56 VECTOR_LDST_FOUR_ELEMENTS 57 }; 58 59 bool isWideningInstruction(Type *Ty, unsigned Opcode, 60 ArrayRef<const Value *> Args); 61 62 public: 63 explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) 64 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), 65 TLI(ST->getTargetLowering()) {} 66 67 bool areInlineCompatible(const Function *Caller, 68 const Function *Callee) const; 69 70 /// \name Scalar TTI Implementations 71 /// @{ 72 73 using BaseT::getIntImmCost; 74 InstructionCost getIntImmCost(int64_t Val); 75 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 76 TTI::TargetCostKind CostKind); 77 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 78 const APInt &Imm, Type *Ty, 79 TTI::TargetCostKind CostKind, 80 Instruction *Inst = nullptr); 81 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 82 const APInt &Imm, Type *Ty, 83 TTI::TargetCostKind CostKind); 84 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); 85 86 /// @} 87 88 /// \name Vector TTI Implementations 89 /// @{ 90 91 bool enableInterleavedAccessVectorization() { return true; } 92 93 unsigned getNumberOfRegisters(unsigned ClassID) const { 94 bool Vector = (ClassID == 1); 95 if (Vector) { 96 if (ST->hasNEON()) 97 return 32; 98 return 0; 99 } 100 return 31; 101 } 102 103 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 104 TTI::TargetCostKind CostKind); 105 106 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 107 IntrinsicInst &II) const; 108 109 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 110 switch (K) { 111 case TargetTransformInfo::RGK_Scalar: 112 return TypeSize::getFixed(64); 113 case TargetTransformInfo::RGK_FixedWidthVector: 114 if (ST->hasSVE()) 115 return TypeSize::getFixed( 116 std::max(ST->getMinSVEVectorSizeInBits(), 128u)); 117 return TypeSize::getFixed(ST->hasNEON() ? 128 : 0); 118 case TargetTransformInfo::RGK_ScalableVector: 119 return TypeSize::getScalable(ST->hasSVE() ? 128 : 0); 120 } 121 llvm_unreachable("Unsupported register kind"); 122 } 123 124 unsigned getMinVectorRegisterBitWidth() const { 125 return ST->getMinVectorRegisterBitWidth(); 126 } 127 128 Optional<unsigned> getMaxVScale() const { 129 if (ST->hasSVE()) 130 return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock; 131 return BaseT::getMaxVScale(); 132 } 133 134 /// Try to return an estimate cost factor that can be used as a multiplier 135 /// when scalarizing an operation for a vector with ElementCount \p VF. 136 /// For scalable vectors this currently takes the most pessimistic view based 137 /// upon the maximum possible value for vscale. 138 unsigned getMaxNumElements(ElementCount VF) const { 139 if (!VF.isScalable()) 140 return VF.getFixedValue(); 141 Optional<unsigned> MaxNumVScale = getMaxVScale(); 142 assert(MaxNumVScale && "Expected valid max vscale value"); 143 return *MaxNumVScale * VF.getKnownMinValue(); 144 } 145 146 unsigned getMaxInterleaveFactor(unsigned VF); 147 148 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 149 Align Alignment, unsigned AddressSpace, 150 TTI::TargetCostKind CostKind); 151 152 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 153 const Value *Ptr, bool VariableMask, 154 Align Alignment, 155 TTI::TargetCostKind CostKind, 156 const Instruction *I = nullptr); 157 158 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 159 TTI::CastContextHint CCH, 160 TTI::TargetCostKind CostKind, 161 const Instruction *I = nullptr); 162 163 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 164 VectorType *VecTy, unsigned Index); 165 166 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 167 const Instruction *I = nullptr); 168 169 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 170 unsigned Index); 171 172 InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, 173 bool IsUnsigned, 174 TTI::TargetCostKind CostKind); 175 176 InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, 177 VectorType *ValTy, 178 TTI::TargetCostKind CostKind); 179 180 InstructionCost getSpliceCost(VectorType *Tp, int Index); 181 182 InstructionCost getArithmeticInstrCost( 183 unsigned Opcode, Type *Ty, 184 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 185 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, 186 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, 187 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 188 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, 189 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 190 const Instruction *CxtI = nullptr); 191 192 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, 193 const SCEV *Ptr); 194 195 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 196 CmpInst::Predicate VecPred, 197 TTI::TargetCostKind CostKind, 198 const Instruction *I = nullptr); 199 200 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 201 bool IsZeroCmp) const; 202 bool useNeonVector(const Type *Ty) const; 203 204 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, 205 MaybeAlign Alignment, unsigned AddressSpace, 206 TTI::TargetCostKind CostKind, 207 const Instruction *I = nullptr); 208 209 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); 210 211 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 212 TTI::UnrollingPreferences &UP); 213 214 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 215 TTI::PeelingPreferences &PP); 216 217 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 218 Type *ExpectedType); 219 220 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); 221 222 bool isElementTypeLegalForScalableVector(Type *Ty) const { 223 if (Ty->isPointerTy()) 224 return true; 225 226 if (Ty->isBFloatTy() && ST->hasBF16()) 227 return true; 228 229 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) 230 return true; 231 232 if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) || 233 Ty->isIntegerTy(32) || Ty->isIntegerTy(64)) 234 return true; 235 236 return false; 237 } 238 239 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) { 240 if (!ST->hasSVE()) 241 return false; 242 243 // For fixed vectors, avoid scalarization if using SVE for them. 244 if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors()) 245 return false; // Fall back to scalarization of masked operations. 246 247 return !DataType->getScalarType()->isIntegerTy(1) && 248 isElementTypeLegalForScalableVector(DataType->getScalarType()); 249 } 250 251 bool isLegalMaskedLoad(Type *DataType, Align Alignment) { 252 return isLegalMaskedLoadStore(DataType, Alignment); 253 } 254 255 bool isLegalMaskedStore(Type *DataType, Align Alignment) { 256 return isLegalMaskedLoadStore(DataType, Alignment); 257 } 258 259 bool isLegalMaskedGatherScatter(Type *DataType) const { 260 if (!ST->hasSVE()) 261 return false; 262 263 // For fixed vectors, scalarize if not using SVE for them. 264 auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType); 265 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() || 266 DataTypeFVTy->getNumElements() < 2)) 267 return false; 268 269 return !DataType->getScalarType()->isIntegerTy(1) && 270 isElementTypeLegalForScalableVector(DataType->getScalarType()); 271 } 272 273 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 274 return isLegalMaskedGatherScatter(DataType); 275 } 276 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 277 return isLegalMaskedGatherScatter(DataType); 278 } 279 280 bool isLegalNTStore(Type *DataType, Align Alignment) { 281 // NOTE: The logic below is mostly geared towards LV, which calls it with 282 // vectors with 2 elements. We might want to improve that, if other 283 // users show up. 284 // Nontemporal vector stores can be directly lowered to STNP, if the vector 285 // can be halved so that each half fits into a register. That's the case if 286 // the element type fits into a register and the number of elements is a 287 // power of 2 > 1. 288 if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) { 289 unsigned NumElements = 290 cast<FixedVectorType>(DataTypeVTy)->getNumElements(); 291 unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits(); 292 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 && 293 EltSize <= 128 && isPowerOf2_64(EltSize); 294 } 295 return BaseT::isLegalNTStore(DataType, Alignment); 296 } 297 298 InstructionCost getInterleavedMemoryOpCost( 299 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 300 Align Alignment, unsigned AddressSpace, 301 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, 302 bool UseMaskForCond = false, bool UseMaskForGaps = false); 303 304 bool 305 shouldConsiderAddressTypePromotion(const Instruction &I, 306 bool &AllowPromotionWithoutCommonHeader); 307 308 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 309 310 unsigned getGISelRematGlobalCost() const { 311 return 2; 312 } 313 314 bool supportsScalableVectors() const { return ST->hasSVE(); } 315 316 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 317 ElementCount VF) const; 318 319 InstructionCost getArithmeticReductionCost( 320 unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF, 321 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); 322 323 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 324 ArrayRef<int> Mask, int Index, 325 VectorType *SubTp); 326 /// @} 327 }; 328 329 } // end namespace llvm 330 331 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 332