1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file a TargetTransformInfoImplBase conforming object specific to the 10 /// AArch64 target machine. It uses the target's detailed information to 11 /// provide more precise answers to certain TTI queries, while letting the 12 /// target independent and default TTI implementations handle the rest. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 18 19 #include "AArch64.h" 20 #include "AArch64Subtarget.h" 21 #include "AArch64TargetMachine.h" 22 #include "llvm/Analysis/TargetTransformInfo.h" 23 #include "llvm/CodeGen/BasicTTIImpl.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/IR/Intrinsics.h" 26 #include "llvm/Support/InstructionCost.h" 27 #include <cstdint> 28 #include <optional> 29 30 namespace llvm { 31 32 class APInt; 33 class Instruction; 34 class IntrinsicInst; 35 class Loop; 36 class SCEV; 37 class ScalarEvolution; 38 class Type; 39 class Value; 40 class VectorType; 41 42 class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> { 43 using BaseT = BasicTTIImplBase<AArch64TTIImpl>; 44 using TTI = TargetTransformInfo; 45 46 friend BaseT; 47 48 const AArch64Subtarget *ST; 49 const AArch64TargetLowering *TLI; 50 51 static const FeatureBitset InlineInverseFeatures; 52 getST()53 const AArch64Subtarget *getST() const { return ST; } getTLI()54 const AArch64TargetLowering *getTLI() const { return TLI; } 55 56 enum MemIntrinsicType { 57 VECTOR_LDST_TWO_ELEMENTS, 58 VECTOR_LDST_THREE_ELEMENTS, 59 VECTOR_LDST_FOUR_ELEMENTS 60 }; 61 62 bool isWideningInstruction(Type *DstTy, unsigned Opcode, 63 ArrayRef<const Value *> Args, 64 Type *SrcOverrideTy = nullptr) const; 65 66 // A helper function called by 'getVectorInstrCost'. 67 // 68 // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 69 // \param ScalarUserAndIdx encodes the information about extracts from a 70 /// vector with 'Scalar' being the value being extracted,'User' being the user 71 /// of the extract(nullptr if user is not known before vectorization) and 72 /// 'Idx' being the extract lane. 73 InstructionCost getVectorInstrCostHelper( 74 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, 75 const Instruction *I = nullptr, Value *Scalar = nullptr, 76 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx = {}) const; 77 78 public: AArch64TTIImpl(const AArch64TargetMachine * TM,const Function & F)79 explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) 80 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), 81 TLI(ST->getTargetLowering()) {} 82 83 bool areInlineCompatible(const Function *Caller, 84 const Function *Callee) const override; 85 86 bool areTypesABICompatible(const Function *Caller, const Function *Callee, 87 const ArrayRef<Type *> &Types) const override; 88 89 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, 90 unsigned DefaultCallPenalty) const override; 91 92 uint64_t getFeatureMask(const Function &F) const override; 93 94 bool isMultiversionedFunction(const Function &F) const override; 95 96 /// \name Scalar TTI Implementations 97 /// @{ 98 99 using BaseT::getIntImmCost; 100 InstructionCost getIntImmCost(int64_t Val) const; 101 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 102 TTI::TargetCostKind CostKind) const override; 103 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 104 const APInt &Imm, Type *Ty, 105 TTI::TargetCostKind CostKind, 106 Instruction *Inst = nullptr) const override; 107 InstructionCost 108 getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 109 Type *Ty, TTI::TargetCostKind CostKind) const override; 110 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; 111 112 /// @} 113 114 /// \name Vector TTI Implementations 115 /// @{ 116 enableInterleavedAccessVectorization()117 bool enableInterleavedAccessVectorization() const override { return true; } 118 enableMaskedInterleavedAccessVectorization()119 bool enableMaskedInterleavedAccessVectorization() const override { 120 return ST->hasSVE(); 121 } 122 getNumberOfRegisters(unsigned ClassID)123 unsigned getNumberOfRegisters(unsigned ClassID) const override { 124 bool Vector = (ClassID == 1); 125 if (Vector) { 126 if (ST->hasNEON()) 127 return 32; 128 return 0; 129 } 130 return 31; 131 } 132 133 InstructionCost 134 getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 135 TTI::TargetCostKind CostKind) const override; 136 137 std::optional<Instruction *> 138 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override; 139 140 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 141 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 142 APInt &UndefElts2, APInt &UndefElts3, 143 std::function<void(Instruction *, unsigned, APInt, APInt &)> 144 SimplifyAndSetOp) const override; 145 146 TypeSize 147 getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override; 148 getMinVectorRegisterBitWidth()149 unsigned getMinVectorRegisterBitWidth() const override { 150 return ST->getMinVectorRegisterBitWidth(); 151 } 152 getVScaleForTuning()153 std::optional<unsigned> getVScaleForTuning() const override { 154 return ST->getVScaleForTuning(); 155 } 156 isVScaleKnownToBeAPowerOfTwo()157 bool isVScaleKnownToBeAPowerOfTwo() const override { return true; } 158 159 bool shouldMaximizeVectorBandwidth( 160 TargetTransformInfo::RegisterKind K) const override; 161 162 /// Try to return an estimate cost factor that can be used as a multiplier 163 /// when scalarizing an operation for a vector with ElementCount \p VF. 164 /// For scalable vectors this currently takes the most pessimistic view based 165 /// upon the maximum possible value for vscale. getMaxNumElements(ElementCount VF)166 unsigned getMaxNumElements(ElementCount VF) const { 167 if (!VF.isScalable()) 168 return VF.getFixedValue(); 169 170 return VF.getKnownMinValue() * ST->getVScaleForTuning(); 171 } 172 173 unsigned getMaxInterleaveFactor(ElementCount VF) const override; 174 175 bool prefersVectorizedAddressing() const override; 176 177 InstructionCost 178 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 179 unsigned AddressSpace, 180 TTI::TargetCostKind CostKind) const override; 181 182 InstructionCost 183 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, 184 bool VariableMask, Align Alignment, 185 TTI::TargetCostKind CostKind, 186 const Instruction *I = nullptr) const override; 187 188 bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, 189 Type *Src) const; 190 191 InstructionCost 192 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 193 TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, 194 const Instruction *I = nullptr) const override; 195 196 InstructionCost 197 getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, 198 unsigned Index, 199 TTI::TargetCostKind CostKind) const override; 200 201 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 202 const Instruction *I = nullptr) const override; 203 204 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 205 TTI::TargetCostKind CostKind, 206 unsigned Index, const Value *Op0, 207 const Value *Op1) const override; 208 209 /// \param ScalarUserAndIdx encodes the information about extracts from a 210 /// vector with 'Scalar' being the value being extracted,'User' being the user 211 /// of the extract(nullptr if user is not known before vectorization) and 212 /// 'Idx' being the extract lane. 213 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 214 TTI::TargetCostKind CostKind, 215 unsigned Index, Value *Scalar, 216 ArrayRef<std::tuple<Value *, User *, int>> 217 ScalarUserAndIdx) const override; 218 219 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, 220 TTI::TargetCostKind CostKind, 221 unsigned Index) const override; 222 223 InstructionCost 224 getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, 225 TTI::TargetCostKind CostKind) const override; 226 227 InstructionCost 228 getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, 229 TTI::TargetCostKind CostKind) const; 230 231 InstructionCost getSpliceCost(VectorType *Tp, int Index, 232 TTI::TargetCostKind CostKind) const; 233 234 InstructionCost getArithmeticInstrCost( 235 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 236 TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, 237 TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, 238 ArrayRef<const Value *> Args = {}, 239 const Instruction *CxtI = nullptr) const override; 240 241 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, 242 const SCEV *Ptr) const override; 243 244 InstructionCost getCmpSelInstrCost( 245 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, 246 TTI::TargetCostKind CostKind, 247 TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, 248 TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, 249 const Instruction *I = nullptr) const override; 250 251 TTI::MemCmpExpansionOptions 252 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; 253 bool useNeonVector(const Type *Ty) const; 254 255 InstructionCost getMemoryOpCost( 256 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, 257 TTI::TargetCostKind CostKind, 258 TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None}, 259 const Instruction *I = nullptr) const override; 260 261 InstructionCost 262 getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const override; 263 264 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 265 TTI::UnrollingPreferences &UP, 266 OptimizationRemarkEmitter *ORE) const override; 267 268 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 269 TTI::PeelingPreferences &PP) const override; 270 271 Value * 272 getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, 273 bool CanCreate = true) const override; 274 275 bool getTgtMemIntrinsic(IntrinsicInst *Inst, 276 MemIntrinsicInfo &Info) const override; 277 isElementTypeLegalForScalableVector(Type * Ty)278 bool isElementTypeLegalForScalableVector(Type *Ty) const override { 279 if (Ty->isPointerTy()) 280 return true; 281 282 if (Ty->isBFloatTy() && ST->hasBF16()) 283 return true; 284 285 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) 286 return true; 287 288 if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) || 289 Ty->isIntegerTy(32) || Ty->isIntegerTy(64)) 290 return true; 291 292 return false; 293 } 294 isLegalMaskedLoadStore(Type * DataType,Align Alignment)295 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const { 296 if (!ST->hasSVE()) 297 return false; 298 299 // For fixed vectors, avoid scalarization if using SVE for them. 300 if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors() && 301 DataType->getPrimitiveSizeInBits() != 128) 302 return false; // Fall back to scalarization of masked operations. 303 304 return isElementTypeLegalForScalableVector(DataType->getScalarType()); 305 } 306 isLegalMaskedLoad(Type * DataType,Align Alignment,unsigned)307 bool isLegalMaskedLoad(Type *DataType, Align Alignment, 308 unsigned /*AddressSpace*/) const override { 309 return isLegalMaskedLoadStore(DataType, Alignment); 310 } 311 isLegalMaskedStore(Type * DataType,Align Alignment,unsigned)312 bool isLegalMaskedStore(Type *DataType, Align Alignment, 313 unsigned /*AddressSpace*/) const override { 314 return isLegalMaskedLoadStore(DataType, Alignment); 315 } 316 isLegalMaskedGatherScatter(Type * DataType)317 bool isLegalMaskedGatherScatter(Type *DataType) const { 318 if (!ST->isSVEAvailable()) 319 return false; 320 321 // For fixed vectors, scalarize if not using SVE for them. 322 auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType); 323 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() || 324 DataTypeFVTy->getNumElements() < 2)) 325 return false; 326 327 return isElementTypeLegalForScalableVector(DataType->getScalarType()); 328 } 329 isLegalMaskedGather(Type * DataType,Align Alignment)330 bool isLegalMaskedGather(Type *DataType, Align Alignment) const override { 331 return isLegalMaskedGatherScatter(DataType); 332 } 333 isLegalMaskedScatter(Type * DataType,Align Alignment)334 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override { 335 return isLegalMaskedGatherScatter(DataType); 336 } 337 isLegalBroadcastLoad(Type * ElementTy,ElementCount NumElements)338 bool isLegalBroadcastLoad(Type *ElementTy, 339 ElementCount NumElements) const override { 340 // Return true if we can generate a `ld1r` splat load instruction. 341 if (!ST->hasNEON() || NumElements.isScalable()) 342 return false; 343 switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) { 344 case 8: 345 case 16: 346 case 32: 347 case 64: { 348 // We accept bit-widths >= 64bits and elements {8,16,32,64} bits. 349 unsigned VectorBits = NumElements.getFixedValue() * ElementBits; 350 return VectorBits >= 64; 351 } 352 } 353 return false; 354 } 355 isLegalNTStoreLoad(Type * DataType,Align Alignment)356 bool isLegalNTStoreLoad(Type *DataType, Align Alignment) const { 357 // NOTE: The logic below is mostly geared towards LV, which calls it with 358 // vectors with 2 elements. We might want to improve that, if other 359 // users show up. 360 // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if 361 // the vector can be halved so that each half fits into a register. That's 362 // the case if the element type fits into a register and the number of 363 // elements is a power of 2 > 1. 364 if (auto *DataTypeTy = dyn_cast<FixedVectorType>(DataType)) { 365 unsigned NumElements = DataTypeTy->getNumElements(); 366 unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits(); 367 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 && 368 EltSize <= 128 && isPowerOf2_64(EltSize); 369 } 370 return BaseT::isLegalNTStore(DataType, Alignment); 371 } 372 isLegalNTStore(Type * DataType,Align Alignment)373 bool isLegalNTStore(Type *DataType, Align Alignment) const override { 374 return isLegalNTStoreLoad(DataType, Alignment); 375 } 376 isLegalNTLoad(Type * DataType,Align Alignment)377 bool isLegalNTLoad(Type *DataType, Align Alignment) const override { 378 // Only supports little-endian targets. 379 if (ST->isLittleEndian()) 380 return isLegalNTStoreLoad(DataType, Alignment); 381 return BaseT::isLegalNTLoad(DataType, Alignment); 382 } 383 384 InstructionCost getPartialReductionCost( 385 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, 386 ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, 387 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp, 388 TTI::TargetCostKind CostKind) const override; 389 enableOrderedReductions()390 bool enableOrderedReductions() const override { return true; } 391 392 InstructionCost getInterleavedMemoryOpCost( 393 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 394 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 395 bool UseMaskForCond = false, bool UseMaskForGaps = false) const override; 396 397 bool shouldConsiderAddressTypePromotion( 398 const Instruction &I, 399 bool &AllowPromotionWithoutCommonHeader) const override; 400 shouldExpandReduction(const IntrinsicInst * II)401 bool shouldExpandReduction(const IntrinsicInst *II) const override { 402 return false; 403 } 404 getGISelRematGlobalCost()405 unsigned getGISelRematGlobalCost() const override { return 2; } 406 getMinTripCountTailFoldingThreshold()407 unsigned getMinTripCountTailFoldingThreshold() const override { 408 return ST->hasSVE() ? 5 : 0; 409 } 410 411 TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow)412 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override { 413 if (ST->hasSVE()) 414 return IVUpdateMayOverflow 415 ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck 416 : TailFoldingStyle::DataAndControlFlow; 417 418 return TailFoldingStyle::DataWithoutLaneMask; 419 } 420 421 bool preferFixedOverScalableIfEqualCost() const override; 422 423 unsigned getEpilogueVectorizationMinVF() const override; 424 425 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override; 426 supportsScalableVectors()427 bool supportsScalableVectors() const override { 428 return ST->isSVEorStreamingSVEAvailable(); 429 } 430 431 bool enableScalableVectorization() const override; 432 433 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 434 ElementCount VF) const override; 435 preferPredicatedReductionSelect()436 bool preferPredicatedReductionSelect() const override { return ST->hasSVE(); } 437 438 InstructionCost 439 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, 440 std::optional<FastMathFlags> FMF, 441 TTI::TargetCostKind CostKind) const override; 442 443 InstructionCost 444 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, 445 VectorType *ValTy, std::optional<FastMathFlags> FMF, 446 TTI::TargetCostKind CostKind) const override; 447 448 InstructionCost getMulAccReductionCost( 449 bool IsUnsigned, Type *ResTy, VectorType *Ty, 450 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const override; 451 452 InstructionCost 453 getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, 454 ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index, 455 VectorType *SubTp, ArrayRef<const Value *> Args = {}, 456 const Instruction *CxtI = nullptr) const override; 457 458 InstructionCost getScalarizationOverhead( 459 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, 460 TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, 461 ArrayRef<Value *> VL = {}) const override; 462 463 /// Return the cost of the scaling factor used in the addressing 464 /// mode represented by AM for this target, for a load/store 465 /// of the specified type. 466 /// If the AM is supported, the return value must be >= 0. 467 /// If the AM is not supported, it returns an invalid cost. 468 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 469 StackOffset BaseOffset, bool HasBaseReg, 470 int64_t Scale, 471 unsigned AddrSpace) const override; 472 enableSelectOptimize()473 bool enableSelectOptimize() const override { 474 return ST->enableSelectOptimize(); 475 } 476 477 bool shouldTreatInstructionLikeSelect(const Instruction *I) const override; 478 getStoreMinimumVF(unsigned VF,Type * ScalarMemTy,Type * ScalarValTy)479 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, 480 Type *ScalarValTy) const override { 481 // We can vectorize store v4i8. 482 if (ScalarMemTy->isIntegerTy(8) && isPowerOf2_32(VF) && VF >= 4) 483 return 4; 484 485 return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy); 486 } 487 getMinPageSize()488 std::optional<unsigned> getMinPageSize() const override { return 4096; } 489 490 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, 491 const TargetTransformInfo::LSRCost &C2) const override; 492 493 bool isProfitableToSinkOperands(Instruction *I, 494 SmallVectorImpl<Use *> &Ops) const override; 495 /// @} 496 }; 497 498 } // end namespace llvm 499 500 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 501