1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file a TargetTransformInfo::Concept conforming object specific to the 11 /// ARM target machine. It uses the target's detailed information to 12 /// provide more precise answers to certain TTI queries, while letting the 13 /// target independent and default TTI implementations handle the rest. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H 18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H 19 20 #include "ARM.h" 21 #include "ARMSubtarget.h" 22 #include "ARMTargetMachine.h" 23 #include "llvm/ADT/ArrayRef.h" 24 #include "llvm/Analysis/TargetTransformInfo.h" 25 #include "llvm/CodeGen/BasicTTIImpl.h" 26 #include "llvm/IR/Constant.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/MC/SubtargetFeature.h" 29 30 namespace llvm { 31 32 class APInt; 33 class ARMTargetLowering; 34 class Instruction; 35 class Loop; 36 class SCEV; 37 class ScalarEvolution; 38 class Type; 39 class Value; 40 41 namespace TailPredication { 42 enum Mode { 43 Disabled = 0, 44 EnabledNoReductions, 45 Enabled, 46 ForceEnabledNoReductions, 47 ForceEnabled 48 }; 49 } 50 51 // For controlling conversion of memcpy into Tail Predicated loop. 52 namespace TPLoop { 53 enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow }; 54 } 55 56 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { 57 using BaseT = BasicTTIImplBase<ARMTTIImpl>; 58 using TTI = TargetTransformInfo; 59 60 friend BaseT; 61 62 const ARMSubtarget *ST; 63 const ARMTargetLowering *TLI; 64 65 // Currently the following features are excluded from InlineFeaturesAllowed. 66 // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32 67 // Depending on whether they are set or unset, different 68 // instructions/registers are available. For example, inlining a callee with 69 // -thumb-mode in a caller with +thumb-mode, may cause the assembler to 70 // fail if the callee uses ARM only instructions, e.g. in inline asm. 71 const FeatureBitset InlineFeaturesAllowed = { 72 ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, 73 ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, 74 ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb, 75 ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, 76 ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, 77 ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, 78 ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, 79 ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, 80 ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, 81 ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, 82 ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, 83 ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, 84 ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, 85 ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, 86 ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, 87 ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx, 88 ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb, 89 ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR, 90 ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack, 91 ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP, 92 ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass, 93 ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, 94 ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, 95 ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates 96 }; 97 98 const ARMSubtarget *getST() const { return ST; } 99 const ARMTargetLowering *getTLI() const { return TLI; } 100 101 public: 102 explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F) 103 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), 104 TLI(ST->getTargetLowering()) {} 105 106 bool areInlineCompatible(const Function *Caller, 107 const Function *Callee) const; 108 109 bool enableInterleavedAccessVectorization() { return true; } 110 111 TTI::AddressingModeKind 112 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const; 113 114 /// Floating-point computation using ARMv8 AArch32 Advanced 115 /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD 116 /// and Arm MVE are IEEE-754 compliant. 117 bool isFPVectorizationPotentiallyUnsafe() { 118 return !ST->isTargetDarwin() && !ST->hasMVEFloatOps(); 119 } 120 121 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 122 IntrinsicInst &II) const; 123 Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 124 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 125 APInt &UndefElts2, APInt &UndefElts3, 126 std::function<void(Instruction *, unsigned, APInt, APInt &)> 127 SimplifyAndSetOp) const; 128 129 /// \name Scalar TTI Implementations 130 /// @{ 131 132 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, 133 const APInt &Imm, Type *Ty); 134 135 using BaseT::getIntImmCost; 136 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 137 TTI::TargetCostKind CostKind); 138 139 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 140 const APInt &Imm, Type *Ty, 141 TTI::TargetCostKind CostKind, 142 Instruction *Inst = nullptr); 143 144 /// @} 145 146 /// \name Vector TTI Implementations 147 /// @{ 148 149 unsigned getNumberOfRegisters(unsigned ClassID) const { 150 bool Vector = (ClassID == 1); 151 if (Vector) { 152 if (ST->hasNEON()) 153 return 16; 154 if (ST->hasMVEIntegerOps()) 155 return 8; 156 return 0; 157 } 158 159 if (ST->isThumb1Only()) 160 return 8; 161 return 13; 162 } 163 164 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 165 switch (K) { 166 case TargetTransformInfo::RGK_Scalar: 167 return TypeSize::getFixed(32); 168 case TargetTransformInfo::RGK_FixedWidthVector: 169 if (ST->hasNEON()) 170 return TypeSize::getFixed(128); 171 if (ST->hasMVEIntegerOps()) 172 return TypeSize::getFixed(128); 173 return TypeSize::getFixed(0); 174 case TargetTransformInfo::RGK_ScalableVector: 175 return TypeSize::getScalable(0); 176 } 177 llvm_unreachable("Unsupported register kind"); 178 } 179 180 unsigned getMaxInterleaveFactor(unsigned VF) { 181 return ST->getMaxInterleaveFactor(); 182 } 183 184 bool isProfitableLSRChainElement(Instruction *I); 185 186 bool isLegalMaskedLoad(Type *DataTy, Align Alignment); 187 188 bool isLegalMaskedStore(Type *DataTy, Align Alignment) { 189 return isLegalMaskedLoad(DataTy, Alignment); 190 } 191 192 bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) { 193 // For MVE, we have a custom lowering pass that will already have custom 194 // legalised any gathers that we can lower to MVE intrinsics, and want to 195 // expand all the rest. The pass runs before the masked intrinsic lowering 196 // pass. 197 return true; 198 } 199 200 bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) { 201 return forceScalarizeMaskedGather(VTy, Alignment); 202 } 203 204 bool isLegalMaskedGather(Type *Ty, Align Alignment); 205 206 bool isLegalMaskedScatter(Type *Ty, Align Alignment) { 207 return isLegalMaskedGather(Ty, Alignment); 208 } 209 210 InstructionCost getMemcpyCost(const Instruction *I); 211 212 int getNumMemOps(const IntrinsicInst *I) const; 213 214 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 215 ArrayRef<int> Mask, int Index, 216 VectorType *SubTp, 217 ArrayRef<const Value *> Args = None); 218 219 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 220 TTI::ReductionFlags Flags) const; 221 222 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 223 TTI::ReductionFlags Flags) const; 224 225 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 226 227 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 228 const Instruction *I = nullptr); 229 230 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 231 TTI::CastContextHint CCH, 232 TTI::TargetCostKind CostKind, 233 const Instruction *I = nullptr); 234 235 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 236 CmpInst::Predicate VecPred, 237 TTI::TargetCostKind CostKind, 238 const Instruction *I = nullptr); 239 240 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 241 unsigned Index); 242 243 InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE, 244 const SCEV *Ptr); 245 246 InstructionCost getArithmeticInstrCost( 247 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 248 TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, 249 TTI::OperandValueKind Op2Info = TTI::OK_AnyValue, 250 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 251 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, 252 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 253 const Instruction *CxtI = nullptr); 254 255 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, 256 MaybeAlign Alignment, unsigned AddressSpace, 257 TTI::TargetCostKind CostKind, 258 const Instruction *I = nullptr); 259 260 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 261 Align Alignment, unsigned AddressSpace, 262 TTI::TargetCostKind CostKind); 263 264 InstructionCost getInterleavedMemoryOpCost( 265 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 266 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 267 bool UseMaskForCond = false, bool UseMaskForGaps = false); 268 269 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 270 const Value *Ptr, bool VariableMask, 271 Align Alignment, 272 TTI::TargetCostKind CostKind, 273 const Instruction *I = nullptr); 274 275 InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, 276 Optional<FastMathFlags> FMF, 277 TTI::TargetCostKind CostKind); 278 InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, 279 Type *ResTy, VectorType *ValTy, 280 TTI::TargetCostKind CostKind); 281 282 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 283 TTI::TargetCostKind CostKind); 284 285 bool maybeLoweredToCall(Instruction &I); 286 bool isLoweredToCall(const Function *F); 287 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 288 AssumptionCache &AC, 289 TargetLibraryInfo *LibInfo, 290 HardwareLoopInfo &HWLoopInfo); 291 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 292 AssumptionCache &AC, TargetLibraryInfo *TLI, 293 DominatorTree *DT, 294 LoopVectorizationLegality *LVL); 295 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 296 TTI::UnrollingPreferences &UP, 297 OptimizationRemarkEmitter *ORE); 298 299 PredicationStyle emitGetActiveLaneMask() const; 300 301 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 302 TTI::PeelingPreferences &PP); 303 bool shouldBuildLookupTablesForConstant(Constant *C) const { 304 // In the ROPI and RWPI relocation models we can't have pointers to global 305 // variables or functions in constant data, so don't convert switches to 306 // lookup tables if any of the values would need relocation. 307 if (ST->isROPI() || ST->isRWPI()) 308 return !C->needsDynamicRelocation(); 309 310 return true; 311 } 312 /// @} 313 }; 314 315 /// isVREVMask - Check if a vector shuffle corresponds to a VREV 316 /// instruction with the specified blocksize. (The order of the elements 317 /// within each block of the vector is reversed.) 318 inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { 319 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && 320 "Only possible block sizes for VREV are: 16, 32, 64"); 321 322 unsigned EltSz = VT.getScalarSizeInBits(); 323 if (EltSz != 8 && EltSz != 16 && EltSz != 32) 324 return false; 325 326 unsigned BlockElts = M[0] + 1; 327 // If the first shuffle index is UNDEF, be optimistic. 328 if (M[0] < 0) 329 BlockElts = BlockSize / EltSz; 330 331 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 332 return false; 333 334 for (unsigned i = 0, e = M.size(); i < e; ++i) { 335 if (M[i] < 0) 336 continue; // ignore UNDEF indices 337 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) 338 return false; 339 } 340 341 return true; 342 } 343 344 } // end namespace llvm 345 346 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H 347