1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file a TargetTransformInfo::Concept conforming object specific to the 10 /// AArch64 target machine. It uses the target's detailed information to 11 /// provide more precise answers to certain TTI queries, while letting the 12 /// target independent and default TTI implementations handle the rest. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 18 19 #include "AArch64.h" 20 #include "AArch64Subtarget.h" 21 #include "AArch64TargetMachine.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Analysis/TargetTransformInfo.h" 24 #include "llvm/CodeGen/BasicTTIImpl.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/IR/Intrinsics.h" 27 #include <cstdint> 28 #include <optional> 29 30 namespace llvm { 31 32 class APInt; 33 class Instruction; 34 class IntrinsicInst; 35 class Loop; 36 class SCEV; 37 class ScalarEvolution; 38 class Type; 39 class Value; 40 class VectorType; 41 42 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> { 43 using BaseT = BasicTTIImplBase<AArch64TTIImpl>; 44 using TTI = TargetTransformInfo; 45 46 friend BaseT; 47 48 const AArch64Subtarget *ST; 49 const AArch64TargetLowering *TLI; 50 51 const AArch64Subtarget *getST() const { return ST; } 52 const AArch64TargetLowering *getTLI() const { return TLI; } 53 54 enum MemIntrinsicType { 55 VECTOR_LDST_TWO_ELEMENTS, 56 VECTOR_LDST_THREE_ELEMENTS, 57 VECTOR_LDST_FOUR_ELEMENTS 58 }; 59 60 bool isWideningInstruction(Type *Ty, unsigned Opcode, 61 ArrayRef<const Value *> Args); 62 63 // A helper function called by 'getVectorInstrCost'. 64 // 65 // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse' 66 // indicates whether the vector instruction is available in the input IR or 67 // just imaginary in vectorizer passes. 68 InstructionCost getVectorInstrCostHelper(Type *Val, unsigned Index, 69 bool HasRealUse); 70 71 public: 72 explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) 73 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), 74 TLI(ST->getTargetLowering()) {} 75 76 bool areInlineCompatible(const Function *Caller, 77 const Function *Callee) const; 78 79 /// \name Scalar TTI Implementations 80 /// @{ 81 82 using BaseT::getIntImmCost; 83 InstructionCost getIntImmCost(int64_t Val); 84 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 85 TTI::TargetCostKind CostKind); 86 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 87 const APInt &Imm, Type *Ty, 88 TTI::TargetCostKind CostKind, 89 Instruction *Inst = nullptr); 90 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 91 const APInt &Imm, Type *Ty, 92 TTI::TargetCostKind CostKind); 93 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); 94 95 /// @} 96 97 /// \name Vector TTI Implementations 98 /// @{ 99 100 bool enableInterleavedAccessVectorization() { return true; } 101 102 unsigned getNumberOfRegisters(unsigned ClassID) const { 103 bool Vector = (ClassID == 1); 104 if (Vector) { 105 if (ST->hasNEON()) 106 return 32; 107 return 0; 108 } 109 return 31; 110 } 111 112 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 113 TTI::TargetCostKind CostKind); 114 115 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 116 IntrinsicInst &II) const; 117 118 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 119 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 120 APInt &UndefElts2, APInt &UndefElts3, 121 std::function<void(Instruction *, unsigned, APInt, APInt &)> 122 SimplifyAndSetOp) const; 123 124 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; 125 126 unsigned getMinVectorRegisterBitWidth() const { 127 return ST->getMinVectorRegisterBitWidth(); 128 } 129 130 std::optional<unsigned> getVScaleForTuning() const { 131 return ST->getVScaleForTuning(); 132 } 133 134 bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const; 135 136 /// Try to return an estimate cost factor that can be used as a multiplier 137 /// when scalarizing an operation for a vector with ElementCount \p VF. 138 /// For scalable vectors this currently takes the most pessimistic view based 139 /// upon the maximum possible value for vscale. 140 unsigned getMaxNumElements(ElementCount VF) const { 141 if (!VF.isScalable()) 142 return VF.getFixedValue(); 143 144 return VF.getKnownMinValue() * ST->getVScaleForTuning(); 145 } 146 147 unsigned getMaxInterleaveFactor(unsigned VF); 148 149 bool prefersVectorizedAddressing() const; 150 151 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 152 Align Alignment, unsigned AddressSpace, 153 TTI::TargetCostKind CostKind); 154 155 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 156 const Value *Ptr, bool VariableMask, 157 Align Alignment, 158 TTI::TargetCostKind CostKind, 159 const Instruction *I = nullptr); 160 161 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 162 TTI::CastContextHint CCH, 163 TTI::TargetCostKind CostKind, 164 const Instruction *I = nullptr); 165 166 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 167 VectorType *VecTy, unsigned Index); 168 169 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 170 const Instruction *I = nullptr); 171 172 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 173 TTI::TargetCostKind CostKind, 174 unsigned Index, Value *Op0, Value *Op1); 175 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, 176 TTI::TargetCostKind CostKind, 177 unsigned Index); 178 179 InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, 180 bool IsUnsigned, 181 TTI::TargetCostKind CostKind); 182 183 InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, 184 VectorType *ValTy, 185 TTI::TargetCostKind CostKind); 186 187 InstructionCost getSpliceCost(VectorType *Tp, int Index); 188 189 InstructionCost getArithmeticInstrCost( 190 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 191 TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, 192 TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, 193 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 194 const Instruction *CxtI = nullptr); 195 196 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, 197 const SCEV *Ptr); 198 199 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 200 CmpInst::Predicate VecPred, 201 TTI::TargetCostKind CostKind, 202 const Instruction *I = nullptr); 203 204 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 205 bool IsZeroCmp) const; 206 bool useNeonVector(const Type *Ty) const; 207 208 InstructionCost 209 getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, 210 unsigned AddressSpace, TTI::TargetCostKind CostKind, 211 TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None}, 212 const Instruction *I = nullptr); 213 214 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); 215 216 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 217 TTI::UnrollingPreferences &UP, 218 OptimizationRemarkEmitter *ORE); 219 220 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 221 TTI::PeelingPreferences &PP); 222 223 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 224 Type *ExpectedType); 225 226 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); 227 228 bool isElementTypeLegalForScalableVector(Type *Ty) const { 229 if (Ty->isPointerTy()) 230 return true; 231 232 if (Ty->isBFloatTy() && ST->hasBF16()) 233 return true; 234 235 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) 236 return true; 237 238 if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) || 239 Ty->isIntegerTy(32) || Ty->isIntegerTy(64)) 240 return true; 241 242 return false; 243 } 244 245 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) { 246 if (!ST->hasSVE()) 247 return false; 248 249 // For fixed vectors, avoid scalarization if using SVE for them. 250 if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors()) 251 return false; // Fall back to scalarization of masked operations. 252 253 return isElementTypeLegalForScalableVector(DataType->getScalarType()); 254 } 255 256 bool isLegalMaskedLoad(Type *DataType, Align Alignment) { 257 return isLegalMaskedLoadStore(DataType, Alignment); 258 } 259 260 bool isLegalMaskedStore(Type *DataType, Align Alignment) { 261 return isLegalMaskedLoadStore(DataType, Alignment); 262 } 263 264 bool isLegalMaskedGatherScatter(Type *DataType) const { 265 if (!ST->hasSVE() || ST->forceStreamingCompatibleSVE()) 266 return false; 267 268 // For fixed vectors, scalarize if not using SVE for them. 269 auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType); 270 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() || 271 DataTypeFVTy->getNumElements() < 2)) 272 return false; 273 274 return isElementTypeLegalForScalableVector(DataType->getScalarType()); 275 } 276 277 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 278 return isLegalMaskedGatherScatter(DataType); 279 } 280 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 281 return isLegalMaskedGatherScatter(DataType); 282 } 283 284 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { 285 // Return true if we can generate a `ld1r` splat load instruction. 286 if (!ST->hasNEON() || NumElements.isScalable()) 287 return false; 288 switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) { 289 case 8: 290 case 16: 291 case 32: 292 case 64: { 293 // We accept bit-widths >= 64bits and elements {8,16,32,64} bits. 294 unsigned VectorBits = NumElements.getFixedValue() * ElementBits; 295 return VectorBits >= 64; 296 } 297 } 298 return false; 299 } 300 301 bool isLegalNTStoreLoad(Type *DataType, Align Alignment) { 302 // NOTE: The logic below is mostly geared towards LV, which calls it with 303 // vectors with 2 elements. We might want to improve that, if other 304 // users show up. 305 // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if 306 // the vector can be halved so that each half fits into a register. That's 307 // the case if the element type fits into a register and the number of 308 // elements is a power of 2 > 1. 309 if (auto *DataTypeTy = dyn_cast<FixedVectorType>(DataType)) { 310 unsigned NumElements = DataTypeTy->getNumElements(); 311 unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits(); 312 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 && 313 EltSize <= 128 && isPowerOf2_64(EltSize); 314 } 315 return BaseT::isLegalNTStore(DataType, Alignment); 316 } 317 318 bool isLegalNTStore(Type *DataType, Align Alignment) { 319 return isLegalNTStoreLoad(DataType, Alignment); 320 } 321 322 bool isLegalNTLoad(Type *DataType, Align Alignment) { 323 // Only supports little-endian targets. 324 if (ST->isLittleEndian()) 325 return isLegalNTStoreLoad(DataType, Alignment); 326 return BaseT::isLegalNTLoad(DataType, Alignment); 327 } 328 329 bool enableOrderedReductions() const { return true; } 330 331 InstructionCost getInterleavedMemoryOpCost( 332 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 333 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 334 bool UseMaskForCond = false, bool UseMaskForGaps = false); 335 336 bool 337 shouldConsiderAddressTypePromotion(const Instruction &I, 338 bool &AllowPromotionWithoutCommonHeader); 339 340 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 341 342 unsigned getGISelRematGlobalCost() const { 343 return 2; 344 } 345 346 unsigned getMinTripCountTailFoldingThreshold() const { 347 return ST->hasSVE() ? 5 : 0; 348 } 349 350 PredicationStyle emitGetActiveLaneMask() const { 351 if (ST->hasSVE()) 352 return PredicationStyle::DataAndControlFlow; 353 return PredicationStyle::None; 354 } 355 356 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 357 AssumptionCache &AC, TargetLibraryInfo *TLI, 358 DominatorTree *DT, 359 LoopVectorizationLegality *LVL, 360 InterleavedAccessInfo *IAI); 361 362 bool supportsScalableVectors() const { return ST->hasSVE(); } 363 364 bool enableScalableVectorization() const { return ST->hasSVE(); } 365 366 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 367 ElementCount VF) const; 368 369 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 370 TTI::ReductionFlags Flags) const { 371 return ST->hasSVE(); 372 } 373 374 InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, 375 std::optional<FastMathFlags> FMF, 376 TTI::TargetCostKind CostKind); 377 378 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 379 ArrayRef<int> Mask, 380 TTI::TargetCostKind CostKind, int Index, 381 VectorType *SubTp, 382 ArrayRef<const Value *> Args = std::nullopt); 383 384 /// Return the cost of the scaling factor used in the addressing 385 /// mode represented by AM for this target, for a load/store 386 /// of the specified type. 387 /// If the AM is supported, the return value must be >= 0. 388 /// If the AM is not supported, it returns a negative value. 389 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 390 int64_t BaseOffset, bool HasBaseReg, 391 int64_t Scale, unsigned AddrSpace) const; 392 /// @} 393 394 bool enableSelectOptimize() { return ST->enableSelectOptimize(); } 395 }; 396 397 } // end namespace llvm 398 399 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 400