1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// ARM target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19
20 #include "ARM.h"
21 #include "ARMSubtarget.h"
22 #include "ARMTargetMachine.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/CodeGen/BasicTTIImpl.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/TargetParser/SubtargetFeature.h"
29 #include <optional>
30
31 namespace llvm {
32
33 class APInt;
34 class ARMTargetLowering;
35 class Instruction;
36 class Loop;
37 class SCEV;
38 class ScalarEvolution;
39 class Type;
40 class Value;
41
42 namespace TailPredication {
43 enum Mode {
44 Disabled = 0,
45 EnabledNoReductions,
46 Enabled,
47 ForceEnabledNoReductions,
48 ForceEnabled
49 };
50 }
51
52 // For controlling conversion of memcpy into Tail Predicated loop.
53 namespace TPLoop {
54 enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow };
55 }
56
57 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
58 using BaseT = BasicTTIImplBase<ARMTTIImpl>;
59 using TTI = TargetTransformInfo;
60
61 friend BaseT;
62
63 const ARMSubtarget *ST;
64 const ARMTargetLowering *TLI;
65
66 // Currently the following features are excluded from InlineFeaturesAllowed.
67 // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
68 // Depending on whether they are set or unset, different
69 // instructions/registers are available. For example, inlining a callee with
70 // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
71 // fail if the callee uses ARM only instructions, e.g. in inline asm.
72 const FeatureBitset InlineFeaturesAllowed = {
73 ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
74 ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
75 ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
76 ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
77 ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
78 ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
79 ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
80 ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
81 ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
82 ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
83 ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
84 ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
85 ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
86 ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
87 ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
88 ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
89 ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
90 ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
91 ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
92 ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
93 ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
94 ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
95 ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
96 ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
97 };
98
getST()99 const ARMSubtarget *getST() const { return ST; }
getTLI()100 const ARMTargetLowering *getTLI() const { return TLI; }
101
102 public:
ARMTTIImpl(const ARMBaseTargetMachine * TM,const Function & F)103 explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
104 : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
105 TLI(ST->getTargetLowering()) {}
106
107 bool areInlineCompatible(const Function *Caller,
108 const Function *Callee) const;
109
enableInterleavedAccessVectorization()110 bool enableInterleavedAccessVectorization() { return true; }
111
112 TTI::AddressingModeKind
113 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
114
115 /// Floating-point computation using ARMv8 AArch32 Advanced
116 /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
117 /// and Arm MVE are IEEE-754 compliant.
isFPVectorizationPotentiallyUnsafe()118 bool isFPVectorizationPotentiallyUnsafe() {
119 return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
120 }
121
122 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
123 IntrinsicInst &II) const;
124 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
125 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
126 APInt &UndefElts2, APInt &UndefElts3,
127 std::function<void(Instruction *, unsigned, APInt, APInt &)>
128 SimplifyAndSetOp) const;
129
130 /// \name Scalar TTI Implementations
131 /// @{
132
133 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
134 const APInt &Imm, Type *Ty);
135
136 using BaseT::getIntImmCost;
137 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
138 TTI::TargetCostKind CostKind);
139
140 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
141 const APInt &Imm, Type *Ty,
142 TTI::TargetCostKind CostKind,
143 Instruction *Inst = nullptr);
144
145 /// @}
146
147 /// \name Vector TTI Implementations
148 /// @{
149
getNumberOfRegisters(unsigned ClassID)150 unsigned getNumberOfRegisters(unsigned ClassID) const {
151 bool Vector = (ClassID == 1);
152 if (Vector) {
153 if (ST->hasNEON())
154 return 16;
155 if (ST->hasMVEIntegerOps())
156 return 8;
157 return 0;
158 }
159
160 if (ST->isThumb1Only())
161 return 8;
162 return 13;
163 }
164
getRegisterBitWidth(TargetTransformInfo::RegisterKind K)165 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
166 switch (K) {
167 case TargetTransformInfo::RGK_Scalar:
168 return TypeSize::getFixed(32);
169 case TargetTransformInfo::RGK_FixedWidthVector:
170 if (ST->hasNEON())
171 return TypeSize::getFixed(128);
172 if (ST->hasMVEIntegerOps())
173 return TypeSize::getFixed(128);
174 return TypeSize::getFixed(0);
175 case TargetTransformInfo::RGK_ScalableVector:
176 return TypeSize::getScalable(0);
177 }
178 llvm_unreachable("Unsupported register kind");
179 }
180
getMaxInterleaveFactor(ElementCount VF)181 unsigned getMaxInterleaveFactor(ElementCount VF) {
182 return ST->getMaxInterleaveFactor();
183 }
184
185 bool isProfitableLSRChainElement(Instruction *I);
186
187 bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
188
isLegalMaskedStore(Type * DataTy,Align Alignment)189 bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
190 return isLegalMaskedLoad(DataTy, Alignment);
191 }
192
forceScalarizeMaskedGather(VectorType * VTy,Align Alignment)193 bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
194 // For MVE, we have a custom lowering pass that will already have custom
195 // legalised any gathers that we can lower to MVE intrinsics, and want to
196 // expand all the rest. The pass runs before the masked intrinsic lowering
197 // pass.
198 return true;
199 }
200
forceScalarizeMaskedScatter(VectorType * VTy,Align Alignment)201 bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
202 return forceScalarizeMaskedGather(VTy, Alignment);
203 }
204
205 bool isLegalMaskedGather(Type *Ty, Align Alignment);
206
isLegalMaskedScatter(Type * Ty,Align Alignment)207 bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
208 return isLegalMaskedGather(Ty, Alignment);
209 }
210
211 InstructionCost getMemcpyCost(const Instruction *I);
212
getMaxMemIntrinsicInlineSizeThreshold()213 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const {
214 return ST->getMaxInlineSizeThreshold();
215 }
216
217 int getNumMemOps(const IntrinsicInst *I) const;
218
219 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
220 ArrayRef<int> Mask,
221 TTI::TargetCostKind CostKind, int Index,
222 VectorType *SubTp,
223 ArrayRef<const Value *> Args = std::nullopt,
224 const Instruction *CxtI = nullptr);
225
226 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
227 TTI::ReductionFlags Flags) const;
228
229 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
230 TTI::ReductionFlags Flags) const;
231
shouldExpandReduction(const IntrinsicInst * II)232 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
233
234 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
235 const Instruction *I = nullptr);
236
237 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
238 TTI::CastContextHint CCH,
239 TTI::TargetCostKind CostKind,
240 const Instruction *I = nullptr);
241
242 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
243 CmpInst::Predicate VecPred,
244 TTI::TargetCostKind CostKind,
245 const Instruction *I = nullptr);
246
247 using BaseT::getVectorInstrCost;
248 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
249 TTI::TargetCostKind CostKind,
250 unsigned Index, Value *Op0, Value *Op1);
251
252 InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
253 const SCEV *Ptr);
254
255 InstructionCost getArithmeticInstrCost(
256 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
257 TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
258 TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
259 ArrayRef<const Value *> Args = std::nullopt,
260 const Instruction *CxtI = nullptr);
261
262 InstructionCost
263 getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
264 unsigned AddressSpace, TTI::TargetCostKind CostKind,
265 TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
266 const Instruction *I = nullptr);
267
268 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
269 Align Alignment, unsigned AddressSpace,
270 TTI::TargetCostKind CostKind);
271
272 InstructionCost getInterleavedMemoryOpCost(
273 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
274 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
275 bool UseMaskForCond = false, bool UseMaskForGaps = false);
276
277 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
278 const Value *Ptr, bool VariableMask,
279 Align Alignment,
280 TTI::TargetCostKind CostKind,
281 const Instruction *I = nullptr);
282
283 InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
284 std::optional<FastMathFlags> FMF,
285 TTI::TargetCostKind CostKind);
286 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
287 Type *ResTy, VectorType *ValTy,
288 FastMathFlags FMF,
289 TTI::TargetCostKind CostKind);
290 InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
291 VectorType *ValTy,
292 TTI::TargetCostKind CostKind);
293
294 InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
295 FastMathFlags FMF,
296 TTI::TargetCostKind CostKind);
297
298 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
299 TTI::TargetCostKind CostKind);
300
301 /// getScalingFactorCost - Return the cost of the scaling used in
302 /// addressing mode represented by AM.
303 /// If the AM is supported, the return value must be >= 0.
304 /// If the AM is not supported, the return value must be negative.
305 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
306 StackOffset BaseOffset, bool HasBaseReg,
307 int64_t Scale, unsigned AddrSpace) const;
308
309 bool maybeLoweredToCall(Instruction &I);
310 bool isLoweredToCall(const Function *F);
311 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
312 AssumptionCache &AC,
313 TargetLibraryInfo *LibInfo,
314 HardwareLoopInfo &HWLoopInfo);
315 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI);
316 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
317 TTI::UnrollingPreferences &UP,
318 OptimizationRemarkEmitter *ORE);
319
320 TailFoldingStyle
321 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
322
323 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
324 TTI::PeelingPreferences &PP);
shouldBuildLookupTablesForConstant(Constant * C)325 bool shouldBuildLookupTablesForConstant(Constant *C) const {
326 // In the ROPI and RWPI relocation models we can't have pointers to global
327 // variables or functions in constant data, so don't convert switches to
328 // lookup tables if any of the values would need relocation.
329 if (ST->isROPI() || ST->isRWPI())
330 return !C->needsDynamicRelocation();
331
332 return true;
333 }
334
335 bool hasArmWideBranch(bool Thumb) const;
336
337 /// @}
338 };
339
340 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
341 /// instruction with the specified blocksize. (The order of the elements
342 /// within each block of the vector is reversed.)
isVREVMask(ArrayRef<int> M,EVT VT,unsigned BlockSize)343 inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
344 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
345 "Only possible block sizes for VREV are: 16, 32, 64");
346
347 unsigned EltSz = VT.getScalarSizeInBits();
348 if (EltSz != 8 && EltSz != 16 && EltSz != 32)
349 return false;
350
351 unsigned BlockElts = M[0] + 1;
352 // If the first shuffle index is UNDEF, be optimistic.
353 if (M[0] < 0)
354 BlockElts = BlockSize / EltSz;
355
356 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
357 return false;
358
359 for (unsigned i = 0, e = M.size(); i < e; ++i) {
360 if (M[i] < 0)
361 continue; // ignore UNDEF indices
362 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
363 return false;
364 }
365
366 return true;
367 }
368
369 } // end namespace llvm
370
371 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
372