xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfoImplBase conforming object specific to the
11 /// ARM target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19 
20 #include "ARM.h"
21 #include "ARMSubtarget.h"
22 #include "ARMTargetMachine.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/CodeGen/BasicTTIImpl.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/TargetParser/SubtargetFeature.h"
29 #include <optional>
30 
31 namespace llvm {
32 
33 class APInt;
34 class ARMTargetLowering;
35 class Instruction;
36 class Loop;
37 class SCEV;
38 class ScalarEvolution;
39 class Type;
40 class Value;
41 
42 namespace TailPredication {
43   enum Mode {
44     Disabled = 0,
45     EnabledNoReductions,
46     Enabled,
47     ForceEnabledNoReductions,
48     ForceEnabled
49   };
50 }
51 
52 // For controlling conversion of memcpy into Tail Predicated loop.
53 namespace TPLoop {
54 enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow };
55 }
56 
57 class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
58   using BaseT = BasicTTIImplBase<ARMTTIImpl>;
59   using TTI = TargetTransformInfo;
60 
61   friend BaseT;
62 
63   const ARMSubtarget *ST;
64   const ARMTargetLowering *TLI;
65 
66   // Currently the following features are excluded from InlineFeaturesAllowed.
67   // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
68   // Depending on whether they are set or unset, different
69   // instructions/registers are available. For example, inlining a callee with
70   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
71   // fail if the callee uses ARM only instructions, e.g. in inline asm.
72   const FeatureBitset InlineFeaturesAllowed = {
73       ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
74       ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
75       ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
76       ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
77       ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
78       ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
79       ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
80       ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
81       ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
82       ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
83       ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
84       ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
85       ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
86       ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
87       ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
88       ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
89       ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
90       ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
91       ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
92       ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
93       ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
94       ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
95       ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
96       ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
97   };
98 
getST()99   const ARMSubtarget *getST() const { return ST; }
getTLI()100   const ARMTargetLowering *getTLI() const { return TLI; }
101 
102 public:
ARMTTIImpl(const ARMBaseTargetMachine * TM,const Function & F)103   explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
104       : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
105         TLI(ST->getTargetLowering()) {}
106 
107   bool areInlineCompatible(const Function *Caller,
108                            const Function *Callee) const override;
109 
enableInterleavedAccessVectorization()110   bool enableInterleavedAccessVectorization() const override { return true; }
111 
112   TTI::AddressingModeKind
113   getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override;
114 
115   /// Floating-point computation using ARMv8 AArch32 Advanced
116   /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
117   /// and Arm MVE are IEEE-754 compliant.
isFPVectorizationPotentiallyUnsafe()118   bool isFPVectorizationPotentiallyUnsafe() const override {
119     return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
120   }
121 
122   std::optional<Instruction *>
123   instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
124   std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
125       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
126       APInt &UndefElts2, APInt &UndefElts3,
127       std::function<void(Instruction *, unsigned, APInt, APInt &)>
128           SimplifyAndSetOp) const override;
129 
130   /// \name Scalar TTI Implementations
131   /// @{
132 
133   InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
134                                         const APInt &Imm,
135                                         Type *Ty) const override;
136 
137   using BaseT::getIntImmCost;
138   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
139                                 TTI::TargetCostKind CostKind) const override;
140 
141   InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
142                                     const APInt &Imm, Type *Ty,
143                                     TTI::TargetCostKind CostKind,
144                                     Instruction *Inst = nullptr) const override;
145 
146   /// @}
147 
148   /// \name Vector TTI Implementations
149   /// @{
150 
getNumberOfRegisters(unsigned ClassID)151   unsigned getNumberOfRegisters(unsigned ClassID) const override {
152     bool Vector = (ClassID == 1);
153     if (Vector) {
154       if (ST->hasNEON())
155         return 16;
156       if (ST->hasMVEIntegerOps())
157         return 8;
158       return 0;
159     }
160 
161     if (ST->isThumb1Only())
162       return 8;
163     return 13;
164   }
165 
166   TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K)167   getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override {
168     switch (K) {
169     case TargetTransformInfo::RGK_Scalar:
170       return TypeSize::getFixed(32);
171     case TargetTransformInfo::RGK_FixedWidthVector:
172       if (ST->hasNEON())
173         return TypeSize::getFixed(128);
174       if (ST->hasMVEIntegerOps())
175         return TypeSize::getFixed(128);
176       return TypeSize::getFixed(0);
177     case TargetTransformInfo::RGK_ScalableVector:
178       return TypeSize::getScalable(0);
179     }
180     llvm_unreachable("Unsupported register kind");
181   }
182 
getMaxInterleaveFactor(ElementCount VF)183   unsigned getMaxInterleaveFactor(ElementCount VF) const override {
184     return ST->getMaxInterleaveFactor();
185   }
186 
187   bool isProfitableLSRChainElement(Instruction *I) const override;
188 
189   bool isLegalMaskedLoad(Type *DataTy, Align Alignment,
190                          unsigned AddressSpace) const override;
191 
isLegalMaskedStore(Type * DataTy,Align Alignment,unsigned AddressSpace)192   bool isLegalMaskedStore(Type *DataTy, Align Alignment,
193                           unsigned AddressSpace) const override {
194     return isLegalMaskedLoad(DataTy, Alignment, AddressSpace);
195   }
196 
forceScalarizeMaskedGather(VectorType * VTy,Align Alignment)197   bool forceScalarizeMaskedGather(VectorType *VTy,
198                                   Align Alignment) const override {
199     // For MVE, we have a custom lowering pass that will already have custom
200     // legalised any gathers that we can lower to MVE intrinsics, and want to
201     // expand all the rest. The pass runs before the masked intrinsic lowering
202     // pass.
203     return true;
204   }
205 
forceScalarizeMaskedScatter(VectorType * VTy,Align Alignment)206   bool forceScalarizeMaskedScatter(VectorType *VTy,
207                                    Align Alignment) const override {
208     return forceScalarizeMaskedGather(VTy, Alignment);
209   }
210 
211   bool isLegalMaskedGather(Type *Ty, Align Alignment) const override;
212 
isLegalMaskedScatter(Type * Ty,Align Alignment)213   bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override {
214     return isLegalMaskedGather(Ty, Alignment);
215   }
216 
217   InstructionCost getMemcpyCost(const Instruction *I) const override;
218 
getMaxMemIntrinsicInlineSizeThreshold()219   uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
220     return ST->getMaxInlineSizeThreshold();
221   }
222 
223   int getNumMemOps(const IntrinsicInst *I) const;
224 
225   InstructionCost
226   getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
227                  ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
228                  VectorType *SubTp, ArrayRef<const Value *> Args = {},
229                  const Instruction *CxtI = nullptr) const override;
230 
231   bool preferInLoopReduction(RecurKind Kind, Type *Ty) const override;
232 
233   bool preferPredicatedReductionSelect() const override;
234 
shouldExpandReduction(const IntrinsicInst * II)235   bool shouldExpandReduction(const IntrinsicInst *II) const override {
236     return false;
237   }
238 
239   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
240                                  const Instruction *I = nullptr) const override;
241 
242   InstructionCost
243   getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
244                    TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
245                    const Instruction *I = nullptr) const override;
246 
247   InstructionCost getCmpSelInstrCost(
248       unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
249       TTI::TargetCostKind CostKind,
250       TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
251       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
252       const Instruction *I = nullptr) const override;
253 
254   using BaseT::getVectorInstrCost;
255   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
256                                      TTI::TargetCostKind CostKind,
257                                      unsigned Index, const Value *Op0,
258                                      const Value *Op1) const override;
259 
260   InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
261                                             const SCEV *Ptr) const override;
262 
263   InstructionCost getArithmeticInstrCost(
264       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
265       TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
266       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
267       ArrayRef<const Value *> Args = {},
268       const Instruction *CxtI = nullptr) const override;
269 
270   InstructionCost getMemoryOpCost(
271       unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
272       TTI::TargetCostKind CostKind,
273       TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
274       const Instruction *I = nullptr) const override;
275 
276   InstructionCost
277   getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
278                         unsigned AddressSpace,
279                         TTI::TargetCostKind CostKind) const override;
280 
281   InstructionCost getInterleavedMemoryOpCost(
282       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
283       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
284       bool UseMaskForCond = false, bool UseMaskForGaps = false) const override;
285 
286   InstructionCost
287   getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
288                          bool VariableMask, Align Alignment,
289                          TTI::TargetCostKind CostKind,
290                          const Instruction *I = nullptr) const override;
291 
292   InstructionCost
293   getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
294                              std::optional<FastMathFlags> FMF,
295                              TTI::TargetCostKind CostKind) const override;
296   InstructionCost
297   getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
298                            VectorType *ValTy, std::optional<FastMathFlags> FMF,
299                            TTI::TargetCostKind CostKind) const override;
300   InstructionCost
301   getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *ValTy,
302                          TTI::TargetCostKind CostKind) const override;
303 
304   InstructionCost
305   getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
306                          TTI::TargetCostKind CostKind) const override;
307 
308   InstructionCost
309   getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
310                         TTI::TargetCostKind CostKind) const override;
311 
312   /// getScalingFactorCost - Return the cost of the scaling used in
313   /// addressing mode represented by AM.
314   /// If the AM is supported, the return value must be >= 0.
315   /// If the AM is not supported, the return value is an invalid cost.
316   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
317                                        StackOffset BaseOffset, bool HasBaseReg,
318                                        int64_t Scale,
319                                        unsigned AddrSpace) const override;
320 
321   bool maybeLoweredToCall(Instruction &I) const;
322   bool isLoweredToCall(const Function *F) const override;
323   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
324                                 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
325                                 HardwareLoopInfo &HWLoopInfo) const override;
326   bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override;
327   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
328                                TTI::UnrollingPreferences &UP,
329                                OptimizationRemarkEmitter *ORE) const override;
330 
331   TailFoldingStyle
332   getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const override;
333 
334   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
335                              TTI::PeelingPreferences &PP) const override;
shouldBuildLookupTablesForConstant(Constant * C)336   bool shouldBuildLookupTablesForConstant(Constant *C) const override {
337     // In the ROPI and RWPI relocation models we can't have pointers to global
338     // variables or functions in constant data, so don't convert switches to
339     // lookup tables if any of the values would need relocation.
340     if (ST->isROPI() || ST->isRWPI())
341       return !C->needsDynamicRelocation();
342 
343     return true;
344   }
345 
346   bool hasArmWideBranch(bool Thumb) const override;
347 
348   bool isProfitableToSinkOperands(Instruction *I,
349                                   SmallVectorImpl<Use *> &Ops) const override;
350 
351   unsigned getNumBytesToPadGlobalArray(unsigned Size,
352                                        Type *ArrayType) const override;
353 
354   /// @}
355 };
356 
357 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
358 /// instruction with the specified blocksize.  (The order of the elements
359 /// within each block of the vector is reversed.)
isVREVMask(ArrayRef<int> M,EVT VT,unsigned BlockSize)360 inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
361   assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
362          "Only possible block sizes for VREV are: 16, 32, 64");
363 
364   unsigned EltSz = VT.getScalarSizeInBits();
365   if (EltSz != 8 && EltSz != 16 && EltSz != 32)
366     return false;
367 
368   unsigned BlockElts = M[0] + 1;
369   // If the first shuffle index is UNDEF, be optimistic.
370   if (M[0] < 0)
371     BlockElts = BlockSize / EltSz;
372 
373   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
374     return false;
375 
376   for (unsigned i = 0, e = M.size(); i < e; ++i) {
377     if (M[i] < 0)
378       continue; // ignore UNDEF indices
379     if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
380       return false;
381   }
382 
383   return true;
384 }
385 
386 } // end namespace llvm
387 
388 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
389