1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file a TargetTransformInfo::Concept conforming object specific to the 11 /// ARM target machine. It uses the target's detailed information to 12 /// provide more precise answers to certain TTI queries, while letting the 13 /// target independent and default TTI implementations handle the rest. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H 18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H 19 20 #include "ARM.h" 21 #include "ARMSubtarget.h" 22 #include "ARMTargetMachine.h" 23 #include "llvm/ADT/ArrayRef.h" 24 #include "llvm/Analysis/TargetTransformInfo.h" 25 #include "llvm/CodeGen/BasicTTIImpl.h" 26 #include "llvm/IR/Constant.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/MC/SubtargetFeature.h" 29 30 namespace llvm { 31 32 class APInt; 33 class ARMTargetLowering; 34 class Instruction; 35 class Loop; 36 class SCEV; 37 class ScalarEvolution; 38 class Type; 39 class Value; 40 41 namespace TailPredication { 42 enum Mode { 43 Disabled = 0, 44 EnabledNoReductions, 45 Enabled, 46 ForceEnabledNoReductions, 47 ForceEnabled 48 }; 49 } 50 51 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { 52 using BaseT = BasicTTIImplBase<ARMTTIImpl>; 53 using TTI = TargetTransformInfo; 54 55 friend BaseT; 56 57 const ARMSubtarget *ST; 58 const ARMTargetLowering *TLI; 59 60 // Currently the following features are excluded from InlineFeaturesAllowed. 61 // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32 62 // Depending on whether they are set or unset, different 63 // instructions/registers are available. For example, inlining a callee with 64 // -thumb-mode in a caller with +thumb-mode, may cause the assembler to 65 // fail if the callee uses ARM only instructions, e.g. in inline asm. 66 const FeatureBitset InlineFeaturesAllowed = { 67 ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, 68 ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, 69 ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb, 70 ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, 71 ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, 72 ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, 73 ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, 74 ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, 75 ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, 76 ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, 77 ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, 78 ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, 79 ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, 80 ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, 81 ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, 82 ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx, 83 ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb, 84 ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR, 85 ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack, 86 ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP, 87 ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass, 88 ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, 89 ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, 90 ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates 91 }; 92 93 const ARMSubtarget *getST() const { return ST; } 94 const ARMTargetLowering *getTLI() const { return TLI; } 95 96 public: 97 explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F) 98 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), 99 TLI(ST->getTargetLowering()) {} 100 101 bool areInlineCompatible(const Function *Caller, 102 const Function *Callee) const; 103 104 bool enableInterleavedAccessVectorization() { return true; } 105 106 bool shouldFavorBackedgeIndex(const Loop *L) const; 107 bool shouldFavorPostInc() const; 108 109 /// Floating-point computation using ARMv8 AArch32 Advanced 110 /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD 111 /// and Arm MVE are IEEE-754 compliant. 112 bool isFPVectorizationPotentiallyUnsafe() { 113 return !ST->isTargetDarwin() && !ST->hasMVEFloatOps(); 114 } 115 116 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 117 IntrinsicInst &II) const; 118 119 /// \name Scalar TTI Implementations 120 /// @{ 121 122 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 123 Type *Ty); 124 125 using BaseT::getIntImmCost; 126 int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); 127 128 int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, 129 Type *Ty, TTI::TargetCostKind CostKind, 130 Instruction *Inst = nullptr); 131 132 /// @} 133 134 /// \name Vector TTI Implementations 135 /// @{ 136 137 unsigned getNumberOfRegisters(unsigned ClassID) const { 138 bool Vector = (ClassID == 1); 139 if (Vector) { 140 if (ST->hasNEON()) 141 return 16; 142 if (ST->hasMVEIntegerOps()) 143 return 8; 144 return 0; 145 } 146 147 if (ST->isThumb1Only()) 148 return 8; 149 return 13; 150 } 151 152 unsigned getRegisterBitWidth(bool Vector) const { 153 if (Vector) { 154 if (ST->hasNEON()) 155 return 128; 156 if (ST->hasMVEIntegerOps()) 157 return 128; 158 return 0; 159 } 160 161 return 32; 162 } 163 164 unsigned getMaxInterleaveFactor(unsigned VF) { 165 return ST->getMaxInterleaveFactor(); 166 } 167 168 bool isProfitableLSRChainElement(Instruction *I); 169 170 bool isLegalMaskedLoad(Type *DataTy, Align Alignment); 171 172 bool isLegalMaskedStore(Type *DataTy, Align Alignment) { 173 return isLegalMaskedLoad(DataTy, Alignment); 174 } 175 176 bool isLegalMaskedGather(Type *Ty, Align Alignment); 177 178 bool isLegalMaskedScatter(Type *Ty, Align Alignment) { 179 return isLegalMaskedGather(Ty, Alignment); 180 } 181 182 int getMemcpyCost(const Instruction *I); 183 184 int getNumMemOps(const IntrinsicInst *I) const; 185 186 int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, 187 VectorType *SubTp); 188 189 bool useReductionIntrinsic(unsigned Opcode, Type *Ty, 190 TTI::ReductionFlags Flags) const; 191 192 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 193 TTI::ReductionFlags Flags) const; 194 195 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 196 TTI::ReductionFlags Flags) const; 197 198 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 199 200 int getCFInstrCost(unsigned Opcode, 201 TTI::TargetCostKind CostKind); 202 203 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 204 TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, 205 const Instruction *I = nullptr); 206 207 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 208 CmpInst::Predicate VecPred, 209 TTI::TargetCostKind CostKind, 210 const Instruction *I = nullptr); 211 212 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); 213 214 int getAddressComputationCost(Type *Val, ScalarEvolution *SE, 215 const SCEV *Ptr); 216 217 int getArithmeticInstrCost( 218 unsigned Opcode, Type *Ty, 219 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 220 TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, 221 TTI::OperandValueKind Op2Info = TTI::OK_AnyValue, 222 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 223 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, 224 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 225 const Instruction *CxtI = nullptr); 226 227 int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, 228 unsigned AddressSpace, 229 TTI::TargetCostKind CostKind, 230 const Instruction *I = nullptr); 231 232 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 233 unsigned AddressSpace, 234 TTI::TargetCostKind CostKind); 235 236 int getInterleavedMemoryOpCost( 237 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 238 Align Alignment, unsigned AddressSpace, 239 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, 240 bool UseMaskForCond = false, bool UseMaskForGaps = false); 241 242 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 243 const Value *Ptr, bool VariableMask, 244 Align Alignment, TTI::TargetCostKind CostKind, 245 const Instruction *I = nullptr); 246 247 int getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, 248 bool IsPairwiseForm, 249 TTI::TargetCostKind CostKind); 250 InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, 251 Type *ResTy, VectorType *ValTy, 252 TTI::TargetCostKind CostKind); 253 254 int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 255 TTI::TargetCostKind CostKind); 256 257 bool maybeLoweredToCall(Instruction &I); 258 bool isLoweredToCall(const Function *F); 259 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 260 AssumptionCache &AC, 261 TargetLibraryInfo *LibInfo, 262 HardwareLoopInfo &HWLoopInfo); 263 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, 264 ScalarEvolution &SE, 265 AssumptionCache &AC, 266 TargetLibraryInfo *TLI, 267 DominatorTree *DT, 268 const LoopAccessInfo *LAI); 269 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 270 TTI::UnrollingPreferences &UP); 271 272 bool emitGetActiveLaneMask() const; 273 274 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 275 TTI::PeelingPreferences &PP); 276 bool shouldBuildLookupTablesForConstant(Constant *C) const { 277 // In the ROPI and RWPI relocation models we can't have pointers to global 278 // variables or functions in constant data, so don't convert switches to 279 // lookup tables if any of the values would need relocation. 280 if (ST->isROPI() || ST->isRWPI()) 281 return !C->needsRelocation(); 282 283 return true; 284 } 285 /// @} 286 }; 287 288 } // end namespace llvm 289 290 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H 291