1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file a TargetTransformInfo::Concept conforming object specific to the 10 /// AArch64 target machine. It uses the target's detailed information to 11 /// provide more precise answers to certain TTI queries, while letting the 12 /// target independent and default TTI implementations handle the rest. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 18 19 #include "AArch64.h" 20 #include "AArch64Subtarget.h" 21 #include "AArch64TargetMachine.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Analysis/TargetTransformInfo.h" 24 #include "llvm/CodeGen/BasicTTIImpl.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/IR/Intrinsics.h" 27 #include <cstdint> 28 29 namespace llvm { 30 31 class APInt; 32 class Instruction; 33 class IntrinsicInst; 34 class Loop; 35 class SCEV; 36 class ScalarEvolution; 37 class Type; 38 class Value; 39 class VectorType; 40 41 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> { 42 using BaseT = BasicTTIImplBase<AArch64TTIImpl>; 43 using TTI = TargetTransformInfo; 44 45 friend BaseT; 46 47 const AArch64Subtarget *ST; 48 const AArch64TargetLowering *TLI; 49 50 const AArch64Subtarget *getST() const { return ST; } 51 const AArch64TargetLowering *getTLI() const { return TLI; } 52 53 enum MemIntrinsicType { 54 VECTOR_LDST_TWO_ELEMENTS, 55 VECTOR_LDST_THREE_ELEMENTS, 56 VECTOR_LDST_FOUR_ELEMENTS 57 }; 58 59 bool isWideningInstruction(Type *Ty, unsigned Opcode, 60 ArrayRef<const Value *> Args); 61 62 public: 63 explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) 64 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), 65 TLI(ST->getTargetLowering()) {} 66 67 bool areInlineCompatible(const Function *Caller, 68 const Function *Callee) const; 69 70 /// \name Scalar TTI Implementations 71 /// @{ 72 73 using BaseT::getIntImmCost; 74 InstructionCost getIntImmCost(int64_t Val); 75 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 76 TTI::TargetCostKind CostKind); 77 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 78 const APInt &Imm, Type *Ty, 79 TTI::TargetCostKind CostKind, 80 Instruction *Inst = nullptr); 81 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 82 const APInt &Imm, Type *Ty, 83 TTI::TargetCostKind CostKind); 84 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); 85 86 /// @} 87 88 /// \name Vector TTI Implementations 89 /// @{ 90 91 bool enableInterleavedAccessVectorization() { return true; } 92 93 unsigned getNumberOfRegisters(unsigned ClassID) const { 94 bool Vector = (ClassID == 1); 95 if (Vector) { 96 if (ST->hasNEON()) 97 return 32; 98 return 0; 99 } 100 return 31; 101 } 102 103 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 104 TTI::TargetCostKind CostKind); 105 106 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 107 IntrinsicInst &II) const; 108 109 Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 110 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 111 APInt &UndefElts2, APInt &UndefElts3, 112 std::function<void(Instruction *, unsigned, APInt, APInt &)> 113 SimplifyAndSetOp) const; 114 115 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 116 switch (K) { 117 case TargetTransformInfo::RGK_Scalar: 118 return TypeSize::getFixed(64); 119 case TargetTransformInfo::RGK_FixedWidthVector: 120 if (ST->hasSVE()) 121 return TypeSize::getFixed( 122 std::max(ST->getMinSVEVectorSizeInBits(), 128u)); 123 return TypeSize::getFixed(ST->hasNEON() ? 128 : 0); 124 case TargetTransformInfo::RGK_ScalableVector: 125 return TypeSize::getScalable(ST->hasSVE() ? 128 : 0); 126 } 127 llvm_unreachable("Unsupported register kind"); 128 } 129 130 unsigned getMinVectorRegisterBitWidth() const { 131 return ST->getMinVectorRegisterBitWidth(); 132 } 133 134 Optional<unsigned> getVScaleForTuning() const { 135 return ST->getVScaleForTuning(); 136 } 137 138 bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const; 139 140 /// Try to return an estimate cost factor that can be used as a multiplier 141 /// when scalarizing an operation for a vector with ElementCount \p VF. 142 /// For scalable vectors this currently takes the most pessimistic view based 143 /// upon the maximum possible value for vscale. 144 unsigned getMaxNumElements(ElementCount VF) const { 145 if (!VF.isScalable()) 146 return VF.getFixedValue(); 147 148 return VF.getKnownMinValue() * ST->getVScaleForTuning(); 149 } 150 151 unsigned getMaxInterleaveFactor(unsigned VF); 152 153 bool prefersVectorizedAddressing() const; 154 155 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 156 Align Alignment, unsigned AddressSpace, 157 TTI::TargetCostKind CostKind); 158 159 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 160 const Value *Ptr, bool VariableMask, 161 Align Alignment, 162 TTI::TargetCostKind CostKind, 163 const Instruction *I = nullptr); 164 165 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 166 TTI::CastContextHint CCH, 167 TTI::TargetCostKind CostKind, 168 const Instruction *I = nullptr); 169 170 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 171 VectorType *VecTy, unsigned Index); 172 173 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 174 const Instruction *I = nullptr); 175 176 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 177 unsigned Index); 178 179 InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, 180 bool IsUnsigned, 181 TTI::TargetCostKind CostKind); 182 183 InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, 184 VectorType *ValTy, 185 TTI::TargetCostKind CostKind); 186 187 InstructionCost getSpliceCost(VectorType *Tp, int Index); 188 189 InstructionCost getArithmeticInstrCost( 190 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 191 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, 192 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, 193 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 194 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, 195 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 196 const Instruction *CxtI = nullptr); 197 198 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, 199 const SCEV *Ptr); 200 201 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 202 CmpInst::Predicate VecPred, 203 TTI::TargetCostKind CostKind, 204 const Instruction *I = nullptr); 205 206 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 207 bool IsZeroCmp) const; 208 bool useNeonVector(const Type *Ty) const; 209 210 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, 211 MaybeAlign Alignment, unsigned AddressSpace, 212 TTI::TargetCostKind CostKind, 213 const Instruction *I = nullptr); 214 215 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); 216 217 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 218 TTI::UnrollingPreferences &UP, 219 OptimizationRemarkEmitter *ORE); 220 221 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 222 TTI::PeelingPreferences &PP); 223 224 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 225 Type *ExpectedType); 226 227 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); 228 229 bool isElementTypeLegalForScalableVector(Type *Ty) const { 230 if (Ty->isPointerTy()) 231 return true; 232 233 if (Ty->isBFloatTy() && ST->hasBF16()) 234 return true; 235 236 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) 237 return true; 238 239 if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) || 240 Ty->isIntegerTy(32) || Ty->isIntegerTy(64)) 241 return true; 242 243 return false; 244 } 245 246 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) { 247 if (!ST->hasSVE()) 248 return false; 249 250 // For fixed vectors, avoid scalarization if using SVE for them. 251 if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors()) 252 return false; // Fall back to scalarization of masked operations. 253 254 return isElementTypeLegalForScalableVector(DataType->getScalarType()); 255 } 256 257 bool isLegalMaskedLoad(Type *DataType, Align Alignment) { 258 return isLegalMaskedLoadStore(DataType, Alignment); 259 } 260 261 bool isLegalMaskedStore(Type *DataType, Align Alignment) { 262 return isLegalMaskedLoadStore(DataType, Alignment); 263 } 264 265 bool isLegalMaskedGatherScatter(Type *DataType) const { 266 if (!ST->hasSVE()) 267 return false; 268 269 // For fixed vectors, scalarize if not using SVE for them. 270 auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType); 271 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() || 272 DataTypeFVTy->getNumElements() < 2)) 273 return false; 274 275 return isElementTypeLegalForScalableVector(DataType->getScalarType()); 276 } 277 278 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 279 return isLegalMaskedGatherScatter(DataType); 280 } 281 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 282 return isLegalMaskedGatherScatter(DataType); 283 } 284 285 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { 286 // Return true if we can generate a `ld1r` splat load instruction. 287 if (!ST->hasNEON() || NumElements.isScalable()) 288 return false; 289 switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) { 290 case 8: 291 case 16: 292 case 32: 293 case 64: { 294 // We accept bit-widths >= 64bits and elements {8,16,32,64} bits. 295 unsigned VectorBits = NumElements.getFixedValue() * ElementBits; 296 return VectorBits >= 64; 297 } 298 } 299 return false; 300 } 301 302 bool isLegalNTStore(Type *DataType, Align Alignment) { 303 // NOTE: The logic below is mostly geared towards LV, which calls it with 304 // vectors with 2 elements. We might want to improve that, if other 305 // users show up. 306 // Nontemporal vector stores can be directly lowered to STNP, if the vector 307 // can be halved so that each half fits into a register. That's the case if 308 // the element type fits into a register and the number of elements is a 309 // power of 2 > 1. 310 if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) { 311 unsigned NumElements = 312 cast<FixedVectorType>(DataTypeVTy)->getNumElements(); 313 unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits(); 314 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 && 315 EltSize <= 128 && isPowerOf2_64(EltSize); 316 } 317 return BaseT::isLegalNTStore(DataType, Alignment); 318 } 319 320 bool enableOrderedReductions() const { return true; } 321 322 InstructionCost getInterleavedMemoryOpCost( 323 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 324 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 325 bool UseMaskForCond = false, bool UseMaskForGaps = false); 326 327 bool 328 shouldConsiderAddressTypePromotion(const Instruction &I, 329 bool &AllowPromotionWithoutCommonHeader); 330 331 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 332 333 unsigned getGISelRematGlobalCost() const { 334 return 2; 335 } 336 337 PredicationStyle emitGetActiveLaneMask() const { 338 if (ST->hasSVE()) 339 return PredicationStyle::DataAndControlFlow; 340 return PredicationStyle::None; 341 } 342 343 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 344 AssumptionCache &AC, TargetLibraryInfo *TLI, 345 DominatorTree *DT, 346 LoopVectorizationLegality *LVL); 347 348 bool supportsScalableVectors() const { return ST->hasSVE(); } 349 350 bool enableScalableVectorization() const { return ST->hasSVE(); } 351 352 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 353 ElementCount VF) const; 354 355 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 356 TTI::ReductionFlags Flags) const { 357 return ST->hasSVE(); 358 } 359 360 InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, 361 Optional<FastMathFlags> FMF, 362 TTI::TargetCostKind CostKind); 363 364 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 365 ArrayRef<int> Mask, int Index, 366 VectorType *SubTp, 367 ArrayRef<const Value *> Args = None); 368 /// @} 369 }; 370 371 } // end namespace llvm 372 373 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 374