1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file a TargetTransformInfo::Concept conforming object specific to the 10 /// AArch64 target machine. It uses the target's detailed information to 11 /// provide more precise answers to certain TTI queries, while letting the 12 /// target independent and default TTI implementations handle the rest. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 18 19 #include "AArch64.h" 20 #include "AArch64Subtarget.h" 21 #include "AArch64TargetMachine.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Analysis/TargetTransformInfo.h" 24 #include "llvm/CodeGen/BasicTTIImpl.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/IR/Intrinsics.h" 27 #include <cstdint> 28 #include <optional> 29 30 namespace llvm { 31 32 class APInt; 33 class Instruction; 34 class IntrinsicInst; 35 class Loop; 36 class SCEV; 37 class ScalarEvolution; 38 class Type; 39 class Value; 40 class VectorType; 41 42 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> { 43 using BaseT = BasicTTIImplBase<AArch64TTIImpl>; 44 using TTI = TargetTransformInfo; 45 46 friend BaseT; 47 48 const AArch64Subtarget *ST; 49 const AArch64TargetLowering *TLI; 50 51 const AArch64Subtarget *getST() const { return ST; } 52 const AArch64TargetLowering *getTLI() const { return TLI; } 53 54 enum MemIntrinsicType { 55 VECTOR_LDST_TWO_ELEMENTS, 56 VECTOR_LDST_THREE_ELEMENTS, 57 VECTOR_LDST_FOUR_ELEMENTS 58 }; 59 60 bool isWideningInstruction(Type *DstTy, unsigned Opcode, 61 ArrayRef<const Value *> Args, 62 Type *SrcOverrideTy = nullptr); 63 64 // A helper function called by 'getVectorInstrCost'. 65 // 66 // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse' 67 // indicates whether the vector instruction is available in the input IR or 68 // just imaginary in vectorizer passes. 69 InstructionCost getVectorInstrCostHelper(const Instruction *I, Type *Val, 70 unsigned Index, bool HasRealUse); 71 72 public: 73 explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) 74 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), 75 TLI(ST->getTargetLowering()) {} 76 77 bool areInlineCompatible(const Function *Caller, 78 const Function *Callee) const; 79 80 bool areTypesABICompatible(const Function *Caller, const Function *Callee, 81 const ArrayRef<Type *> &Types) const; 82 83 /// \name Scalar TTI Implementations 84 /// @{ 85 86 using BaseT::getIntImmCost; 87 InstructionCost getIntImmCost(int64_t Val); 88 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 89 TTI::TargetCostKind CostKind); 90 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 91 const APInt &Imm, Type *Ty, 92 TTI::TargetCostKind CostKind, 93 Instruction *Inst = nullptr); 94 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 95 const APInt &Imm, Type *Ty, 96 TTI::TargetCostKind CostKind); 97 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); 98 99 /// @} 100 101 /// \name Vector TTI Implementations 102 /// @{ 103 104 bool enableInterleavedAccessVectorization() { return true; } 105 106 bool enableMaskedInterleavedAccessVectorization() { return ST->hasSVE(); } 107 108 unsigned getNumberOfRegisters(unsigned ClassID) const { 109 bool Vector = (ClassID == 1); 110 if (Vector) { 111 if (ST->hasNEON()) 112 return 32; 113 return 0; 114 } 115 return 31; 116 } 117 118 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 119 TTI::TargetCostKind CostKind); 120 121 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 122 IntrinsicInst &II) const; 123 124 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 125 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 126 APInt &UndefElts2, APInt &UndefElts3, 127 std::function<void(Instruction *, unsigned, APInt, APInt &)> 128 SimplifyAndSetOp) const; 129 130 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; 131 132 unsigned getMinVectorRegisterBitWidth() const { 133 return ST->getMinVectorRegisterBitWidth(); 134 } 135 136 std::optional<unsigned> getVScaleForTuning() const { 137 return ST->getVScaleForTuning(); 138 } 139 140 bool isVScaleKnownToBeAPowerOfTwo() const { return true; } 141 142 bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const; 143 144 /// Try to return an estimate cost factor that can be used as a multiplier 145 /// when scalarizing an operation for a vector with ElementCount \p VF. 146 /// For scalable vectors this currently takes the most pessimistic view based 147 /// upon the maximum possible value for vscale. 148 unsigned getMaxNumElements(ElementCount VF) const { 149 if (!VF.isScalable()) 150 return VF.getFixedValue(); 151 152 return VF.getKnownMinValue() * ST->getVScaleForTuning(); 153 } 154 155 unsigned getMaxInterleaveFactor(ElementCount VF); 156 157 bool prefersVectorizedAddressing() const; 158 159 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 160 Align Alignment, unsigned AddressSpace, 161 TTI::TargetCostKind CostKind); 162 163 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 164 const Value *Ptr, bool VariableMask, 165 Align Alignment, 166 TTI::TargetCostKind CostKind, 167 const Instruction *I = nullptr); 168 169 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 170 TTI::CastContextHint CCH, 171 TTI::TargetCostKind CostKind, 172 const Instruction *I = nullptr); 173 174 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 175 VectorType *VecTy, unsigned Index); 176 177 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 178 const Instruction *I = nullptr); 179 180 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 181 TTI::TargetCostKind CostKind, 182 unsigned Index, Value *Op0, Value *Op1); 183 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, 184 TTI::TargetCostKind CostKind, 185 unsigned Index); 186 187 InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, 188 FastMathFlags FMF, 189 TTI::TargetCostKind CostKind); 190 191 InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, 192 VectorType *ValTy, 193 TTI::TargetCostKind CostKind); 194 195 InstructionCost getSpliceCost(VectorType *Tp, int Index); 196 197 InstructionCost getArithmeticInstrCost( 198 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 199 TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, 200 TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, 201 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 202 const Instruction *CxtI = nullptr); 203 204 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, 205 const SCEV *Ptr); 206 207 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 208 CmpInst::Predicate VecPred, 209 TTI::TargetCostKind CostKind, 210 const Instruction *I = nullptr); 211 212 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 213 bool IsZeroCmp) const; 214 bool useNeonVector(const Type *Ty) const; 215 216 InstructionCost 217 getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, 218 unsigned AddressSpace, TTI::TargetCostKind CostKind, 219 TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None}, 220 const Instruction *I = nullptr); 221 222 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); 223 224 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 225 TTI::UnrollingPreferences &UP, 226 OptimizationRemarkEmitter *ORE); 227 228 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 229 TTI::PeelingPreferences &PP); 230 231 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 232 Type *ExpectedType); 233 234 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); 235 236 bool isElementTypeLegalForScalableVector(Type *Ty) const { 237 if (Ty->isPointerTy()) 238 return true; 239 240 if (Ty->isBFloatTy() && ST->hasBF16()) 241 return true; 242 243 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) 244 return true; 245 246 if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) || 247 Ty->isIntegerTy(32) || Ty->isIntegerTy(64)) 248 return true; 249 250 return false; 251 } 252 253 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) { 254 if (!ST->hasSVE()) 255 return false; 256 257 // For fixed vectors, avoid scalarization if using SVE for them. 258 if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors()) 259 return false; // Fall back to scalarization of masked operations. 260 261 return isElementTypeLegalForScalableVector(DataType->getScalarType()); 262 } 263 264 bool isLegalMaskedLoad(Type *DataType, Align Alignment) { 265 return isLegalMaskedLoadStore(DataType, Alignment); 266 } 267 268 bool isLegalMaskedStore(Type *DataType, Align Alignment) { 269 return isLegalMaskedLoadStore(DataType, Alignment); 270 } 271 272 bool isLegalMaskedGatherScatter(Type *DataType) const { 273 if (!ST->hasSVE() || !ST->isNeonAvailable()) 274 return false; 275 276 // For fixed vectors, scalarize if not using SVE for them. 277 auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType); 278 if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() || 279 DataTypeFVTy->getNumElements() < 2)) 280 return false; 281 282 return isElementTypeLegalForScalableVector(DataType->getScalarType()); 283 } 284 285 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 286 return isLegalMaskedGatherScatter(DataType); 287 } 288 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 289 return isLegalMaskedGatherScatter(DataType); 290 } 291 292 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { 293 // Return true if we can generate a `ld1r` splat load instruction. 294 if (!ST->hasNEON() || NumElements.isScalable()) 295 return false; 296 switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) { 297 case 8: 298 case 16: 299 case 32: 300 case 64: { 301 // We accept bit-widths >= 64bits and elements {8,16,32,64} bits. 302 unsigned VectorBits = NumElements.getFixedValue() * ElementBits; 303 return VectorBits >= 64; 304 } 305 } 306 return false; 307 } 308 309 bool isLegalNTStoreLoad(Type *DataType, Align Alignment) { 310 // NOTE: The logic below is mostly geared towards LV, which calls it with 311 // vectors with 2 elements. We might want to improve that, if other 312 // users show up. 313 // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if 314 // the vector can be halved so that each half fits into a register. That's 315 // the case if the element type fits into a register and the number of 316 // elements is a power of 2 > 1. 317 if (auto *DataTypeTy = dyn_cast<FixedVectorType>(DataType)) { 318 unsigned NumElements = DataTypeTy->getNumElements(); 319 unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits(); 320 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 && 321 EltSize <= 128 && isPowerOf2_64(EltSize); 322 } 323 return BaseT::isLegalNTStore(DataType, Alignment); 324 } 325 326 bool isLegalNTStore(Type *DataType, Align Alignment) { 327 return isLegalNTStoreLoad(DataType, Alignment); 328 } 329 330 bool isLegalNTLoad(Type *DataType, Align Alignment) { 331 // Only supports little-endian targets. 332 if (ST->isLittleEndian()) 333 return isLegalNTStoreLoad(DataType, Alignment); 334 return BaseT::isLegalNTLoad(DataType, Alignment); 335 } 336 337 bool enableOrderedReductions() const { return true; } 338 339 InstructionCost getInterleavedMemoryOpCost( 340 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 341 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 342 bool UseMaskForCond = false, bool UseMaskForGaps = false); 343 344 bool 345 shouldConsiderAddressTypePromotion(const Instruction &I, 346 bool &AllowPromotionWithoutCommonHeader); 347 348 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 349 350 unsigned getGISelRematGlobalCost() const { 351 return 2; 352 } 353 354 unsigned getMinTripCountTailFoldingThreshold() const { 355 return ST->hasSVE() ? 5 : 0; 356 } 357 358 TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const { 359 if (ST->hasSVE()) 360 return IVUpdateMayOverflow 361 ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck 362 : TailFoldingStyle::DataAndControlFlow; 363 364 return TailFoldingStyle::DataWithoutLaneMask; 365 } 366 367 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI); 368 369 bool supportsScalableVectors() const { return ST->hasSVE(); } 370 371 bool enableScalableVectorization() const { return ST->hasSVE(); } 372 373 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 374 ElementCount VF) const; 375 376 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 377 TTI::ReductionFlags Flags) const { 378 return ST->hasSVE(); 379 } 380 381 InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, 382 std::optional<FastMathFlags> FMF, 383 TTI::TargetCostKind CostKind); 384 385 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 386 ArrayRef<int> Mask, 387 TTI::TargetCostKind CostKind, int Index, 388 VectorType *SubTp, 389 ArrayRef<const Value *> Args = std::nullopt); 390 391 /// Return the cost of the scaling factor used in the addressing 392 /// mode represented by AM for this target, for a load/store 393 /// of the specified type. 394 /// If the AM is supported, the return value must be >= 0. 395 /// If the AM is not supported, it returns a negative value. 396 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 397 int64_t BaseOffset, bool HasBaseReg, 398 int64_t Scale, unsigned AddrSpace) const; 399 /// @} 400 401 bool enableSelectOptimize() { return ST->enableSelectOptimize(); } 402 403 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, 404 Type *ScalarValTy) const { 405 // We can vectorize store v4i8. 406 if (ScalarMemTy->isIntegerTy(8) && isPowerOf2_32(VF) && VF >= 4) 407 return 4; 408 409 return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy); 410 } 411 }; 412 413 } // end namespace llvm 414 415 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 416