1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file a TargetTransformInfo::Concept conforming object specific to the 11 /// ARM target machine. It uses the target's detailed information to 12 /// provide more precise answers to certain TTI queries, while letting the 13 /// target independent and default TTI implementations handle the rest. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H 18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H 19 20 #include "ARM.h" 21 #include "ARMSubtarget.h" 22 #include "ARMTargetMachine.h" 23 #include "llvm/ADT/ArrayRef.h" 24 #include "llvm/Analysis/TargetTransformInfo.h" 25 #include "llvm/CodeGen/BasicTTIImpl.h" 26 #include "llvm/IR/Constant.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/TargetParser/SubtargetFeature.h" 29 #include <optional> 30 31 namespace llvm { 32 33 class APInt; 34 class ARMTargetLowering; 35 class Instruction; 36 class Loop; 37 class SCEV; 38 class ScalarEvolution; 39 class Type; 40 class Value; 41 42 namespace TailPredication { 43 enum Mode { 44 Disabled = 0, 45 EnabledNoReductions, 46 Enabled, 47 ForceEnabledNoReductions, 48 ForceEnabled 49 }; 50 } 51 52 // For controlling conversion of memcpy into Tail Predicated loop. 53 namespace TPLoop { 54 enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow }; 55 } 56 57 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { 58 using BaseT = BasicTTIImplBase<ARMTTIImpl>; 59 using TTI = TargetTransformInfo; 60 61 friend BaseT; 62 63 const ARMSubtarget *ST; 64 const ARMTargetLowering *TLI; 65 66 // Currently the following features are excluded from InlineFeaturesAllowed. 67 // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32 68 // Depending on whether they are set or unset, different 69 // instructions/registers are available. For example, inlining a callee with 70 // -thumb-mode in a caller with +thumb-mode, may cause the assembler to 71 // fail if the callee uses ARM only instructions, e.g. in inline asm. 72 const FeatureBitset InlineFeaturesAllowed = { 73 ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, 74 ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, 75 ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb, 76 ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, 77 ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, 78 ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, 79 ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, 80 ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, 81 ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, 82 ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, 83 ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, 84 ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, 85 ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, 86 ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, 87 ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, 88 ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx, 89 ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb, 90 ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR, 91 ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack, 92 ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP, 93 ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass, 94 ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, 95 ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, 96 ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates 97 }; 98 99 const ARMSubtarget *getST() const { return ST; } 100 const ARMTargetLowering *getTLI() const { return TLI; } 101 102 public: 103 explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F) 104 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), 105 TLI(ST->getTargetLowering()) {} 106 107 bool areInlineCompatible(const Function *Caller, 108 const Function *Callee) const; 109 110 bool enableInterleavedAccessVectorization() { return true; } 111 112 TTI::AddressingModeKind 113 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const; 114 115 /// Floating-point computation using ARMv8 AArch32 Advanced 116 /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD 117 /// and Arm MVE are IEEE-754 compliant. 118 bool isFPVectorizationPotentiallyUnsafe() { 119 return !ST->isTargetDarwin() && !ST->hasMVEFloatOps(); 120 } 121 122 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 123 IntrinsicInst &II) const; 124 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 125 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 126 APInt &UndefElts2, APInt &UndefElts3, 127 std::function<void(Instruction *, unsigned, APInt, APInt &)> 128 SimplifyAndSetOp) const; 129 130 /// \name Scalar TTI Implementations 131 /// @{ 132 133 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, 134 const APInt &Imm, Type *Ty); 135 136 using BaseT::getIntImmCost; 137 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 138 TTI::TargetCostKind CostKind); 139 140 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 141 const APInt &Imm, Type *Ty, 142 TTI::TargetCostKind CostKind, 143 Instruction *Inst = nullptr); 144 145 /// @} 146 147 /// \name Vector TTI Implementations 148 /// @{ 149 150 unsigned getNumberOfRegisters(unsigned ClassID) const { 151 bool Vector = (ClassID == 1); 152 if (Vector) { 153 if (ST->hasNEON()) 154 return 16; 155 if (ST->hasMVEIntegerOps()) 156 return 8; 157 return 0; 158 } 159 160 if (ST->isThumb1Only()) 161 return 8; 162 return 13; 163 } 164 165 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 166 switch (K) { 167 case TargetTransformInfo::RGK_Scalar: 168 return TypeSize::getFixed(32); 169 case TargetTransformInfo::RGK_FixedWidthVector: 170 if (ST->hasNEON()) 171 return TypeSize::getFixed(128); 172 if (ST->hasMVEIntegerOps()) 173 return TypeSize::getFixed(128); 174 return TypeSize::getFixed(0); 175 case TargetTransformInfo::RGK_ScalableVector: 176 return TypeSize::getScalable(0); 177 } 178 llvm_unreachable("Unsupported register kind"); 179 } 180 181 unsigned getMaxInterleaveFactor(ElementCount VF) { 182 return ST->getMaxInterleaveFactor(); 183 } 184 185 bool isProfitableLSRChainElement(Instruction *I); 186 187 bool isLegalMaskedLoad(Type *DataTy, Align Alignment); 188 189 bool isLegalMaskedStore(Type *DataTy, Align Alignment) { 190 return isLegalMaskedLoad(DataTy, Alignment); 191 } 192 193 bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) { 194 // For MVE, we have a custom lowering pass that will already have custom 195 // legalised any gathers that we can lower to MVE intrinsics, and want to 196 // expand all the rest. The pass runs before the masked intrinsic lowering 197 // pass. 198 return true; 199 } 200 201 bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) { 202 return forceScalarizeMaskedGather(VTy, Alignment); 203 } 204 205 bool isLegalMaskedGather(Type *Ty, Align Alignment); 206 207 bool isLegalMaskedScatter(Type *Ty, Align Alignment) { 208 return isLegalMaskedGather(Ty, Alignment); 209 } 210 211 InstructionCost getMemcpyCost(const Instruction *I); 212 213 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { 214 return ST->getMaxInlineSizeThreshold(); 215 } 216 217 int getNumMemOps(const IntrinsicInst *I) const; 218 219 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 220 ArrayRef<int> Mask, 221 TTI::TargetCostKind CostKind, int Index, 222 VectorType *SubTp, 223 ArrayRef<const Value *> Args = std::nullopt, 224 const Instruction *CxtI = nullptr); 225 226 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 227 TTI::ReductionFlags Flags) const; 228 229 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 230 TTI::ReductionFlags Flags) const; 231 232 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 233 234 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 235 const Instruction *I = nullptr); 236 237 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 238 TTI::CastContextHint CCH, 239 TTI::TargetCostKind CostKind, 240 const Instruction *I = nullptr); 241 242 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 243 CmpInst::Predicate VecPred, 244 TTI::TargetCostKind CostKind, 245 const Instruction *I = nullptr); 246 247 using BaseT::getVectorInstrCost; 248 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 249 TTI::TargetCostKind CostKind, 250 unsigned Index, Value *Op0, Value *Op1); 251 252 InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, 253 const SCEV *Ptr); 254 255 InstructionCost getArithmeticInstrCost( 256 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 257 TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, 258 TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, 259 ArrayRef<const Value *> Args = std::nullopt, 260 const Instruction *CxtI = nullptr); 261 262 InstructionCost 263 getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, 264 unsigned AddressSpace, TTI::TargetCostKind CostKind, 265 TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None}, 266 const Instruction *I = nullptr); 267 268 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 269 Align Alignment, unsigned AddressSpace, 270 TTI::TargetCostKind CostKind); 271 272 InstructionCost getInterleavedMemoryOpCost( 273 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 274 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 275 bool UseMaskForCond = false, bool UseMaskForGaps = false); 276 277 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 278 const Value *Ptr, bool VariableMask, 279 Align Alignment, 280 TTI::TargetCostKind CostKind, 281 const Instruction *I = nullptr); 282 283 InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, 284 std::optional<FastMathFlags> FMF, 285 TTI::TargetCostKind CostKind); 286 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, 287 Type *ResTy, VectorType *ValTy, 288 FastMathFlags FMF, 289 TTI::TargetCostKind CostKind); 290 InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, 291 VectorType *ValTy, 292 TTI::TargetCostKind CostKind); 293 294 InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, 295 FastMathFlags FMF, 296 TTI::TargetCostKind CostKind); 297 298 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 299 TTI::TargetCostKind CostKind); 300 301 /// getScalingFactorCost - Return the cost of the scaling used in 302 /// addressing mode represented by AM. 303 /// If the AM is supported, the return value must be >= 0. 304 /// If the AM is not supported, the return value must be negative. 305 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 306 StackOffset BaseOffset, bool HasBaseReg, 307 int64_t Scale, unsigned AddrSpace) const; 308 309 bool maybeLoweredToCall(Instruction &I); 310 bool isLoweredToCall(const Function *F); 311 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 312 AssumptionCache &AC, 313 TargetLibraryInfo *LibInfo, 314 HardwareLoopInfo &HWLoopInfo); 315 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI); 316 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 317 TTI::UnrollingPreferences &UP, 318 OptimizationRemarkEmitter *ORE); 319 320 TailFoldingStyle 321 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const; 322 323 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 324 TTI::PeelingPreferences &PP); 325 bool shouldBuildLookupTablesForConstant(Constant *C) const { 326 // In the ROPI and RWPI relocation models we can't have pointers to global 327 // variables or functions in constant data, so don't convert switches to 328 // lookup tables if any of the values would need relocation. 329 if (ST->isROPI() || ST->isRWPI()) 330 return !C->needsDynamicRelocation(); 331 332 return true; 333 } 334 335 bool hasArmWideBranch(bool Thumb) const; 336 337 /// @} 338 }; 339 340 /// isVREVMask - Check if a vector shuffle corresponds to a VREV 341 /// instruction with the specified blocksize. (The order of the elements 342 /// within each block of the vector is reversed.) 343 inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { 344 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && 345 "Only possible block sizes for VREV are: 16, 32, 64"); 346 347 unsigned EltSz = VT.getScalarSizeInBits(); 348 if (EltSz != 8 && EltSz != 16 && EltSz != 32) 349 return false; 350 351 unsigned BlockElts = M[0] + 1; 352 // If the first shuffle index is UNDEF, be optimistic. 353 if (M[0] < 0) 354 BlockElts = BlockSize / EltSz; 355 356 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 357 return false; 358 359 for (unsigned i = 0, e = M.size(); i < e; ++i) { 360 if (M[i] < 0) 361 continue; // ignore UNDEF indices 362 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) 363 return false; 364 } 365 366 return true; 367 } 368 369 } // end namespace llvm 370 371 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H 372