1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file a TargetTransformInfo::Concept conforming object specific to the 10 /// AArch64 target machine. It uses the target's detailed information to 11 /// provide more precise answers to certain TTI queries, while letting the 12 /// target independent and default TTI implementations handle the rest. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 18 19 #include "AArch64.h" 20 #include "AArch64Subtarget.h" 21 #include "AArch64TargetMachine.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Analysis/TargetTransformInfo.h" 24 #include "llvm/CodeGen/BasicTTIImpl.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/IR/Intrinsics.h" 27 #include <cstdint> 28 #include <optional> 29 30 namespace llvm { 31 32 class APInt; 33 class Instruction; 34 class IntrinsicInst; 35 class Loop; 36 class SCEV; 37 class ScalarEvolution; 38 class Type; 39 class Value; 40 class VectorType; 41 42 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> { 43 using BaseT = BasicTTIImplBase<AArch64TTIImpl>; 44 using TTI = TargetTransformInfo; 45 46 friend BaseT; 47 48 const AArch64Subtarget *ST; 49 const AArch64TargetLowering *TLI; 50 51 const AArch64Subtarget *getST() const { return ST; } 52 const AArch64TargetLowering *getTLI() const { return TLI; } 53 54 enum MemIntrinsicType { 55 VECTOR_LDST_TWO_ELEMENTS, 56 VECTOR_LDST_THREE_ELEMENTS, 57 VECTOR_LDST_FOUR_ELEMENTS 58 }; 59 60 bool isWideningInstruction(Type *DstTy, unsigned Opcode, 61 ArrayRef<const Value *> Args, 62 Type *SrcOverrideTy = nullptr); 63 64 // A helper function called by 'getVectorInstrCost'. 65 // 66 // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse' 67 // indicates whether the vector instruction is available in the input IR or 68 // just imaginary in vectorizer passes. 69 InstructionCost getVectorInstrCostHelper(const Instruction *I, Type *Val, 70 unsigned Index, bool HasRealUse); 71 72 public: 73 explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) 74 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), 75 TLI(ST->getTargetLowering()) {} 76 77 bool areInlineCompatible(const Function *Caller, 78 const Function *Callee) const; 79 80 bool areTypesABICompatible(const Function *Caller, const Function *Callee, 81 const ArrayRef<Type *> &Types) const; 82 83 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, 84 unsigned DefaultCallPenalty) const; 85 86 /// \name Scalar TTI Implementations 87 /// @{ 88 89 using BaseT::getIntImmCost; 90 InstructionCost getIntImmCost(int64_t Val); 91 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 92 TTI::TargetCostKind CostKind); 93 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 94 const APInt &Imm, Type *Ty, 95 TTI::TargetCostKind CostKind, 96 Instruction *Inst = nullptr); 97 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 98 const APInt &Imm, Type *Ty, 99 TTI::TargetCostKind CostKind); 100 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); 101 102 /// @} 103 104 /// \name Vector TTI Implementations 105 /// @{ 106 107 bool enableInterleavedAccessVectorization() { return true; } 108 109 bool enableMaskedInterleavedAccessVectorization() { return ST->hasSVE(); } 110 111 unsigned getNumberOfRegisters(unsigned ClassID) const { 112 bool Vector = (ClassID == 1); 113 if (Vector) { 114 if (ST->hasNEON()) 115 return 32; 116 return 0; 117 } 118 return 31; 119 } 120 121 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 122 TTI::TargetCostKind CostKind); 123 124 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 125 IntrinsicInst &II) const; 126 127 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 128 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 129 APInt &UndefElts2, APInt &UndefElts3, 130 std::function<void(Instruction *, unsigned, APInt, APInt &)> 131 SimplifyAndSetOp) const; 132 133 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; 134 135 unsigned getMinVectorRegisterBitWidth() const { 136 return ST->getMinVectorRegisterBitWidth(); 137 } 138 139 std::optional<unsigned> getVScaleForTuning() const { 140 return ST->getVScaleForTuning(); 141 } 142 143 bool isVScaleKnownToBeAPowerOfTwo() const { return true; } 144 145 bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const; 146 147 /// Try to return an estimate cost factor that can be used as a multiplier 148 /// when scalarizing an operation for a vector with ElementCount \p VF. 149 /// For scalable vectors this currently takes the most pessimistic view based 150 /// upon the maximum possible value for vscale. 151 unsigned getMaxNumElements(ElementCount VF) const { 152 if (!VF.isScalable()) 153 return VF.getFixedValue(); 154 155 return VF.getKnownMinValue() * ST->getVScaleForTuning(); 156 } 157 158 unsigned getMaxInterleaveFactor(ElementCount VF); 159 160 bool prefersVectorizedAddressing() const; 161 162 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 163 Align Alignment, unsigned AddressSpace, 164 TTI::TargetCostKind CostKind); 165 166 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 167 const Value *Ptr, bool VariableMask, 168 Align Alignment, 169 TTI::TargetCostKind CostKind, 170 const Instruction *I = nullptr); 171 172 bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src); 173 174 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 175 TTI::CastContextHint CCH, 176 TTI::TargetCostKind CostKind, 177 const Instruction *I = nullptr); 178 179 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 180 VectorType *VecTy, unsigned Index); 181 182 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 183 const Instruction *I = nullptr); 184 185 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 186 TTI::TargetCostKind CostKind, 187 unsigned Index, Value *Op0, Value *Op1); 188 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, 189 TTI::TargetCostKind CostKind, 190 unsigned Index); 191 192 InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, 193 FastMathFlags FMF, 194 TTI::TargetCostKind CostKind); 195 196 InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, 197 VectorType *ValTy, 198 TTI::TargetCostKind CostKind); 199 200 InstructionCost getSpliceCost(VectorType *Tp, int Index); 201 202 InstructionCost getArithmeticInstrCost( 203 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 204 TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, 205 TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, 206 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 207 const Instruction *CxtI = nullptr); 208 209 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, 210 const SCEV *Ptr); 211 212 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 213 CmpInst::Predicate VecPred, 214 TTI::TargetCostKind CostKind, 215 const Instruction *I = nullptr); 216 217 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 218 bool IsZeroCmp) const; 219 bool useNeonVector(const Type *Ty) const; 220 221 InstructionCost 222 getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, 223 unsigned AddressSpace, TTI::TargetCostKind CostKind, 224 TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None}, 225 const Instruction *I = nullptr); 226 227 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); 228 229 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 230 TTI::UnrollingPreferences &UP, 231 OptimizationRemarkEmitter *ORE); 232 233 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 234 TTI::PeelingPreferences &PP); 235 236 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 237 Type *ExpectedType); 238 239 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); 240 241 bool isElementTypeLegalForScalableVector(Type *Ty) const { 242 if (Ty->isPointerTy()) 243 return true; 244 245 if (Ty->isBFloatTy() && ST->hasBF16()) 246 return true; 247 248 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) 249 return true; 250 251 if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) || 252 Ty->isIntegerTy(32) || Ty->isIntegerTy(64)) 253 return true; 254 255 return false; 256 } 257 258 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) { 259 if (!ST->hasSVE()) 260 return false; 261 262 // For fixed vectors, avoid scalarization if using SVE for them. 263 if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors() && 264 DataType->getPrimitiveSizeInBits() != 128) 265 return false; // Fall back to scalarization of masked operations. 266 267 return isElementTypeLegalForScalableVector(DataType->getScalarType()); 268 } 269 270 bool isLegalMaskedLoad(Type *DataType, Align Alignment) { 271 return isLegalMaskedLoadStore(DataType, Alignment); 272 } 273 274 bool isLegalMaskedStore(Type *DataType, Align Alignment) { 275 return isLegalMaskedLoadStore(DataType, Alignment); 276 } 277 278 bool isLegalMaskedGatherScatter(Type *DataType) const { 279 if (!ST->hasSVE() || !ST->isNeonAvailable()) 280 return false; 281 282 // For fixed vectors, scalarize if not using SVE for them. 283 auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType); 284 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() || 285 DataTypeFVTy->getNumElements() < 2)) 286 return false; 287 288 return isElementTypeLegalForScalableVector(DataType->getScalarType()); 289 } 290 291 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 292 return isLegalMaskedGatherScatter(DataType); 293 } 294 295 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 296 return isLegalMaskedGatherScatter(DataType); 297 } 298 299 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { 300 // Return true if we can generate a `ld1r` splat load instruction. 301 if (!ST->hasNEON() || NumElements.isScalable()) 302 return false; 303 switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) { 304 case 8: 305 case 16: 306 case 32: 307 case 64: { 308 // We accept bit-widths >= 64bits and elements {8,16,32,64} bits. 309 unsigned VectorBits = NumElements.getFixedValue() * ElementBits; 310 return VectorBits >= 64; 311 } 312 } 313 return false; 314 } 315 316 bool isLegalNTStoreLoad(Type *DataType, Align Alignment) { 317 // NOTE: The logic below is mostly geared towards LV, which calls it with 318 // vectors with 2 elements. We might want to improve that, if other 319 // users show up. 320 // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if 321 // the vector can be halved so that each half fits into a register. That's 322 // the case if the element type fits into a register and the number of 323 // elements is a power of 2 > 1. 324 if (auto *DataTypeTy = dyn_cast<FixedVectorType>(DataType)) { 325 unsigned NumElements = DataTypeTy->getNumElements(); 326 unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits(); 327 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 && 328 EltSize <= 128 && isPowerOf2_64(EltSize); 329 } 330 return BaseT::isLegalNTStore(DataType, Alignment); 331 } 332 333 bool isLegalNTStore(Type *DataType, Align Alignment) { 334 return isLegalNTStoreLoad(DataType, Alignment); 335 } 336 337 bool isLegalNTLoad(Type *DataType, Align Alignment) { 338 // Only supports little-endian targets. 339 if (ST->isLittleEndian()) 340 return isLegalNTStoreLoad(DataType, Alignment); 341 return BaseT::isLegalNTLoad(DataType, Alignment); 342 } 343 344 bool enableOrderedReductions() const { return true; } 345 346 InstructionCost getInterleavedMemoryOpCost( 347 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 348 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 349 bool UseMaskForCond = false, bool UseMaskForGaps = false); 350 351 bool 352 shouldConsiderAddressTypePromotion(const Instruction &I, 353 bool &AllowPromotionWithoutCommonHeader); 354 355 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 356 357 unsigned getGISelRematGlobalCost() const { 358 return 2; 359 } 360 361 unsigned getMinTripCountTailFoldingThreshold() const { 362 return ST->hasSVE() ? 5 : 0; 363 } 364 365 TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const { 366 if (ST->hasSVE()) 367 return IVUpdateMayOverflow 368 ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck 369 : TailFoldingStyle::DataAndControlFlow; 370 371 return TailFoldingStyle::DataWithoutLaneMask; 372 } 373 374 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI); 375 376 bool supportsScalableVectors() const { return ST->hasSVE(); } 377 378 bool enableScalableVectorization() const { return ST->hasSVE(); } 379 380 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 381 ElementCount VF) const; 382 383 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 384 TTI::ReductionFlags Flags) const { 385 return ST->hasSVE(); 386 } 387 388 InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, 389 std::optional<FastMathFlags> FMF, 390 TTI::TargetCostKind CostKind); 391 392 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 393 ArrayRef<int> Mask, 394 TTI::TargetCostKind CostKind, int Index, 395 VectorType *SubTp, 396 ArrayRef<const Value *> Args = std::nullopt); 397 398 InstructionCost getScalarizationOverhead(VectorType *Ty, 399 const APInt &DemandedElts, 400 bool Insert, bool Extract, 401 TTI::TargetCostKind CostKind); 402 403 /// Return the cost of the scaling factor used in the addressing 404 /// mode represented by AM for this target, for a load/store 405 /// of the specified type. 406 /// If the AM is supported, the return value must be >= 0. 407 /// If the AM is not supported, it returns a negative value. 408 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 409 int64_t BaseOffset, bool HasBaseReg, 410 int64_t Scale, unsigned AddrSpace) const; 411 /// @} 412 413 bool enableSelectOptimize() { return ST->enableSelectOptimize(); } 414 415 bool shouldTreatInstructionLikeSelect(const Instruction *I); 416 417 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, 418 Type *ScalarValTy) const { 419 // We can vectorize store v4i8. 420 if (ScalarMemTy->isIntegerTy(8) && isPowerOf2_32(VF) && VF >= 4) 421 return 4; 422 423 return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy); 424 } 425 426 std::optional<unsigned> getMinPageSize() const { return 4096; } 427 }; 428 429 } // end namespace llvm 430 431 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 432