xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// ARM target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19 
20 #include "ARM.h"
21 #include "ARMSubtarget.h"
22 #include "ARMTargetMachine.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/CodeGen/BasicTTIImpl.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/TargetParser/SubtargetFeature.h"
29 #include <optional>
30 
31 namespace llvm {
32 
33 class APInt;
34 class ARMTargetLowering;
35 class Instruction;
36 class Loop;
37 class SCEV;
38 class ScalarEvolution;
39 class Type;
40 class Value;
41 
42 namespace TailPredication {
43   enum Mode {
44     Disabled = 0,
45     EnabledNoReductions,
46     Enabled,
47     ForceEnabledNoReductions,
48     ForceEnabled
49   };
50 }
51 
52 // For controlling conversion of memcpy into Tail Predicated loop.
53 namespace TPLoop {
54 enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow };
55 }
56 
57 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
58   using BaseT = BasicTTIImplBase<ARMTTIImpl>;
59   using TTI = TargetTransformInfo;
60 
61   friend BaseT;
62 
63   const ARMSubtarget *ST;
64   const ARMTargetLowering *TLI;
65 
66   // Currently the following features are excluded from InlineFeaturesAllowed.
67   // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
68   // Depending on whether they are set or unset, different
69   // instructions/registers are available. For example, inlining a callee with
70   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
71   // fail if the callee uses ARM only instructions, e.g. in inline asm.
72   const FeatureBitset InlineFeaturesAllowed = {
73       ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
74       ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
75       ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
76       ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
77       ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
78       ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
79       ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
80       ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
81       ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
82       ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
83       ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
84       ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
85       ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
86       ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
87       ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
88       ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
89       ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
90       ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
91       ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
92       ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
93       ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
94       ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
95       ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
96       ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
97   };
98 
getST()99   const ARMSubtarget *getST() const { return ST; }
getTLI()100   const ARMTargetLowering *getTLI() const { return TLI; }
101 
102 public:
ARMTTIImpl(const ARMBaseTargetMachine * TM,const Function & F)103   explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
104       : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
105         TLI(ST->getTargetLowering()) {}
106 
107   bool areInlineCompatible(const Function *Caller,
108                            const Function *Callee) const;
109 
enableInterleavedAccessVectorization()110   bool enableInterleavedAccessVectorization() { return true; }
111 
112   TTI::AddressingModeKind
113     getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
114 
115   /// Floating-point computation using ARMv8 AArch32 Advanced
116   /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
117   /// and Arm MVE are IEEE-754 compliant.
isFPVectorizationPotentiallyUnsafe()118   bool isFPVectorizationPotentiallyUnsafe() {
119     return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
120   }
121 
122   std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
123                                                     IntrinsicInst &II) const;
124   std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
125       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
126       APInt &UndefElts2, APInt &UndefElts3,
127       std::function<void(Instruction *, unsigned, APInt, APInt &)>
128           SimplifyAndSetOp) const;
129 
130   /// \name Scalar TTI Implementations
131   /// @{
132 
133   InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
134                                         const APInt &Imm, Type *Ty);
135 
136   using BaseT::getIntImmCost;
137   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
138                                 TTI::TargetCostKind CostKind);
139 
140   InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
141                                     const APInt &Imm, Type *Ty,
142                                     TTI::TargetCostKind CostKind,
143                                     Instruction *Inst = nullptr);
144 
145   /// @}
146 
147   /// \name Vector TTI Implementations
148   /// @{
149 
getNumberOfRegisters(unsigned ClassID)150   unsigned getNumberOfRegisters(unsigned ClassID) const {
151     bool Vector = (ClassID == 1);
152     if (Vector) {
153       if (ST->hasNEON())
154         return 16;
155       if (ST->hasMVEIntegerOps())
156         return 8;
157       return 0;
158     }
159 
160     if (ST->isThumb1Only())
161       return 8;
162     return 13;
163   }
164 
getRegisterBitWidth(TargetTransformInfo::RegisterKind K)165   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
166     switch (K) {
167     case TargetTransformInfo::RGK_Scalar:
168       return TypeSize::getFixed(32);
169     case TargetTransformInfo::RGK_FixedWidthVector:
170       if (ST->hasNEON())
171         return TypeSize::getFixed(128);
172       if (ST->hasMVEIntegerOps())
173         return TypeSize::getFixed(128);
174       return TypeSize::getFixed(0);
175     case TargetTransformInfo::RGK_ScalableVector:
176       return TypeSize::getScalable(0);
177     }
178     llvm_unreachable("Unsupported register kind");
179   }
180 
getMaxInterleaveFactor(ElementCount VF)181   unsigned getMaxInterleaveFactor(ElementCount VF) {
182     return ST->getMaxInterleaveFactor();
183   }
184 
185   bool isProfitableLSRChainElement(Instruction *I);
186 
187   bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
188 
isLegalMaskedStore(Type * DataTy,Align Alignment)189   bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
190     return isLegalMaskedLoad(DataTy, Alignment);
191   }
192 
forceScalarizeMaskedGather(VectorType * VTy,Align Alignment)193   bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
194     // For MVE, we have a custom lowering pass that will already have custom
195     // legalised any gathers that we can lower to MVE intrinsics, and want to
196     // expand all the rest. The pass runs before the masked intrinsic lowering
197     // pass.
198     return true;
199   }
200 
forceScalarizeMaskedScatter(VectorType * VTy,Align Alignment)201   bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
202     return forceScalarizeMaskedGather(VTy, Alignment);
203   }
204 
205   bool isLegalMaskedGather(Type *Ty, Align Alignment);
206 
isLegalMaskedScatter(Type * Ty,Align Alignment)207   bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
208     return isLegalMaskedGather(Ty, Alignment);
209   }
210 
211   InstructionCost getMemcpyCost(const Instruction *I);
212 
getMaxMemIntrinsicInlineSizeThreshold()213   uint64_t getMaxMemIntrinsicInlineSizeThreshold() const {
214     return ST->getMaxInlineSizeThreshold();
215   }
216 
217   int getNumMemOps(const IntrinsicInst *I) const;
218 
219   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
220                                  ArrayRef<int> Mask,
221                                  TTI::TargetCostKind CostKind, int Index,
222                                  VectorType *SubTp,
223                                  ArrayRef<const Value *> Args = std::nullopt,
224                                  const Instruction *CxtI = nullptr);
225 
226   bool preferInLoopReduction(unsigned Opcode, Type *Ty,
227                              TTI::ReductionFlags Flags) const;
228 
229   bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
230                                        TTI::ReductionFlags Flags) const;
231 
shouldExpandReduction(const IntrinsicInst * II)232   bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
233 
234   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
235                                  const Instruction *I = nullptr);
236 
237   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
238                                    TTI::CastContextHint CCH,
239                                    TTI::TargetCostKind CostKind,
240                                    const Instruction *I = nullptr);
241 
242   InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
243                                      CmpInst::Predicate VecPred,
244                                      TTI::TargetCostKind CostKind,
245                                      const Instruction *I = nullptr);
246 
247   using BaseT::getVectorInstrCost;
248   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
249                                      TTI::TargetCostKind CostKind,
250                                      unsigned Index, Value *Op0, Value *Op1);
251 
252   InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
253                                             const SCEV *Ptr);
254 
255   InstructionCost getArithmeticInstrCost(
256       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
257       TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
258       TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
259       ArrayRef<const Value *> Args = std::nullopt,
260       const Instruction *CxtI = nullptr);
261 
262   InstructionCost
263   getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
264                   unsigned AddressSpace, TTI::TargetCostKind CostKind,
265                   TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
266                   const Instruction *I = nullptr);
267 
268   InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
269                                         Align Alignment, unsigned AddressSpace,
270                                         TTI::TargetCostKind CostKind);
271 
272   InstructionCost getInterleavedMemoryOpCost(
273       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
274       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
275       bool UseMaskForCond = false, bool UseMaskForGaps = false);
276 
277   InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
278                                          const Value *Ptr, bool VariableMask,
279                                          Align Alignment,
280                                          TTI::TargetCostKind CostKind,
281                                          const Instruction *I = nullptr);
282 
283   InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
284                                              std::optional<FastMathFlags> FMF,
285                                              TTI::TargetCostKind CostKind);
286   InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
287                                            Type *ResTy, VectorType *ValTy,
288                                            FastMathFlags FMF,
289                                            TTI::TargetCostKind CostKind);
290   InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
291                                          VectorType *ValTy,
292                                          TTI::TargetCostKind CostKind);
293 
294   InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
295                                          FastMathFlags FMF,
296                                          TTI::TargetCostKind CostKind);
297 
298   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
299                                         TTI::TargetCostKind CostKind);
300 
301   /// getScalingFactorCost - Return the cost of the scaling used in
302   /// addressing mode represented by AM.
303   /// If the AM is supported, the return value must be >= 0.
304   /// If the AM is not supported, the return value must be negative.
305   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
306                                        StackOffset BaseOffset, bool HasBaseReg,
307                                        int64_t Scale, unsigned AddrSpace) const;
308 
309   bool maybeLoweredToCall(Instruction &I);
310   bool isLoweredToCall(const Function *F);
311   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
312                                 AssumptionCache &AC,
313                                 TargetLibraryInfo *LibInfo,
314                                 HardwareLoopInfo &HWLoopInfo);
315   bool preferPredicateOverEpilogue(TailFoldingInfo *TFI);
316   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
317                                TTI::UnrollingPreferences &UP,
318                                OptimizationRemarkEmitter *ORE);
319 
320   TailFoldingStyle
321   getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
322 
323   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
324                              TTI::PeelingPreferences &PP);
shouldBuildLookupTablesForConstant(Constant * C)325   bool shouldBuildLookupTablesForConstant(Constant *C) const {
326     // In the ROPI and RWPI relocation models we can't have pointers to global
327     // variables or functions in constant data, so don't convert switches to
328     // lookup tables if any of the values would need relocation.
329     if (ST->isROPI() || ST->isRWPI())
330       return !C->needsDynamicRelocation();
331 
332     return true;
333   }
334 
335   bool hasArmWideBranch(bool Thumb) const;
336 
337   /// @}
338 };
339 
340 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
341 /// instruction with the specified blocksize.  (The order of the elements
342 /// within each block of the vector is reversed.)
isVREVMask(ArrayRef<int> M,EVT VT,unsigned BlockSize)343 inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
344   assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
345          "Only possible block sizes for VREV are: 16, 32, 64");
346 
347   unsigned EltSz = VT.getScalarSizeInBits();
348   if (EltSz != 8 && EltSz != 16 && EltSz != 32)
349     return false;
350 
351   unsigned BlockElts = M[0] + 1;
352   // If the first shuffle index is UNDEF, be optimistic.
353   if (M[0] < 0)
354     BlockElts = BlockSize / EltSz;
355 
356   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
357     return false;
358 
359   for (unsigned i = 0, e = M.size(); i < e; ++i) {
360     if (M[i] < 0)
361       continue; // ignore UNDEF indices
362     if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
363       return false;
364   }
365 
366   return true;
367 }
368 
369 } // end namespace llvm
370 
371 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
372