xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h (revision 924226fba12cc9a228c73b956e1b7fa24c60b055)
1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// ARM target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19 
20 #include "ARM.h"
21 #include "ARMSubtarget.h"
22 #include "ARMTargetMachine.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/CodeGen/BasicTTIImpl.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 
30 namespace llvm {
31 
32 class APInt;
33 class ARMTargetLowering;
34 class Instruction;
35 class Loop;
36 class SCEV;
37 class ScalarEvolution;
38 class Type;
39 class Value;
40 
41 namespace TailPredication {
42   enum Mode {
43     Disabled = 0,
44     EnabledNoReductions,
45     Enabled,
46     ForceEnabledNoReductions,
47     ForceEnabled
48   };
49 }
50 
51 // For controlling conversion of memcpy into Tail Predicated loop.
52 namespace TPLoop {
53 enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow };
54 }
55 
56 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
57   using BaseT = BasicTTIImplBase<ARMTTIImpl>;
58   using TTI = TargetTransformInfo;
59 
60   friend BaseT;
61 
62   const ARMSubtarget *ST;
63   const ARMTargetLowering *TLI;
64 
65   // Currently the following features are excluded from InlineFeaturesAllowed.
66   // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
67   // Depending on whether they are set or unset, different
68   // instructions/registers are available. For example, inlining a callee with
69   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
70   // fail if the callee uses ARM only instructions, e.g. in inline asm.
71   const FeatureBitset InlineFeaturesAllowed = {
72       ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
73       ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
74       ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
75       ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
76       ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
77       ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
78       ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
79       ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
80       ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
81       ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
82       ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
83       ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
84       ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
85       ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
86       ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
87       ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
88       ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
89       ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
90       ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
91       ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
92       ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
93       ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
94       ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
95       ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
96   };
97 
98   const ARMSubtarget *getST() const { return ST; }
99   const ARMTargetLowering *getTLI() const { return TLI; }
100 
101 public:
102   explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
103       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
104         TLI(ST->getTargetLowering()) {}
105 
106   bool areInlineCompatible(const Function *Caller,
107                            const Function *Callee) const;
108 
109   bool enableInterleavedAccessVectorization() { return true; }
110 
111   TTI::AddressingModeKind
112     getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
113 
114   /// Floating-point computation using ARMv8 AArch32 Advanced
115   /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
116   /// and Arm MVE are IEEE-754 compliant.
117   bool isFPVectorizationPotentiallyUnsafe() {
118     return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
119   }
120 
121   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
122                                                IntrinsicInst &II) const;
123   Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
124       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
125       APInt &UndefElts2, APInt &UndefElts3,
126       std::function<void(Instruction *, unsigned, APInt, APInt &)>
127           SimplifyAndSetOp) const;
128 
129   /// \name Scalar TTI Implementations
130   /// @{
131 
132   InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
133                                         const APInt &Imm, Type *Ty);
134 
135   using BaseT::getIntImmCost;
136   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
137                                 TTI::TargetCostKind CostKind);
138 
139   InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
140                                     const APInt &Imm, Type *Ty,
141                                     TTI::TargetCostKind CostKind,
142                                     Instruction *Inst = nullptr);
143 
144   /// @}
145 
146   /// \name Vector TTI Implementations
147   /// @{
148 
149   unsigned getNumberOfRegisters(unsigned ClassID) const {
150     bool Vector = (ClassID == 1);
151     if (Vector) {
152       if (ST->hasNEON())
153         return 16;
154       if (ST->hasMVEIntegerOps())
155         return 8;
156       return 0;
157     }
158 
159     if (ST->isThumb1Only())
160       return 8;
161     return 13;
162   }
163 
164   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
165     switch (K) {
166     case TargetTransformInfo::RGK_Scalar:
167       return TypeSize::getFixed(32);
168     case TargetTransformInfo::RGK_FixedWidthVector:
169       if (ST->hasNEON())
170         return TypeSize::getFixed(128);
171       if (ST->hasMVEIntegerOps())
172         return TypeSize::getFixed(128);
173       return TypeSize::getFixed(0);
174     case TargetTransformInfo::RGK_ScalableVector:
175       return TypeSize::getScalable(0);
176     }
177     llvm_unreachable("Unsupported register kind");
178   }
179 
180   unsigned getMaxInterleaveFactor(unsigned VF) {
181     return ST->getMaxInterleaveFactor();
182   }
183 
184   bool isProfitableLSRChainElement(Instruction *I);
185 
186   bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
187 
188   bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
189     return isLegalMaskedLoad(DataTy, Alignment);
190   }
191 
192   bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
193     // For MVE, we have a custom lowering pass that will already have custom
194     // legalised any gathers that we can lower to MVE intrinsics, and want to
195     // expand all the rest. The pass runs before the masked intrinsic lowering
196     // pass.
197     return true;
198   }
199 
200   bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
201     return forceScalarizeMaskedGather(VTy, Alignment);
202   }
203 
204   bool isLegalMaskedGather(Type *Ty, Align Alignment);
205 
206   bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
207     return isLegalMaskedGather(Ty, Alignment);
208   }
209 
210   InstructionCost getMemcpyCost(const Instruction *I);
211 
212   int getNumMemOps(const IntrinsicInst *I) const;
213 
214   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
215                                  ArrayRef<int> Mask, int Index,
216                                  VectorType *SubTp);
217 
218   bool preferInLoopReduction(unsigned Opcode, Type *Ty,
219                              TTI::ReductionFlags Flags) const;
220 
221   bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
222                                        TTI::ReductionFlags Flags) const;
223 
224   bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
225 
226   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
227                                  const Instruction *I = nullptr);
228 
229   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
230                                    TTI::CastContextHint CCH,
231                                    TTI::TargetCostKind CostKind,
232                                    const Instruction *I = nullptr);
233 
234   InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
235                                      CmpInst::Predicate VecPred,
236                                      TTI::TargetCostKind CostKind,
237                                      const Instruction *I = nullptr);
238 
239   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
240                                      unsigned Index);
241 
242   InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
243                                             const SCEV *Ptr);
244 
245   InstructionCost getArithmeticInstrCost(
246       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
247       TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
248       TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
249       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
250       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
251       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
252       const Instruction *CxtI = nullptr);
253 
254   InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
255                                   MaybeAlign Alignment, unsigned AddressSpace,
256                                   TTI::TargetCostKind CostKind,
257                                   const Instruction *I = nullptr);
258 
259   InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
260                                         Align Alignment, unsigned AddressSpace,
261                                         TTI::TargetCostKind CostKind);
262 
263   InstructionCost getInterleavedMemoryOpCost(
264       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
265       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
266       bool UseMaskForCond = false, bool UseMaskForGaps = false);
267 
268   InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
269                                          const Value *Ptr, bool VariableMask,
270                                          Align Alignment,
271                                          TTI::TargetCostKind CostKind,
272                                          const Instruction *I = nullptr);
273 
274   InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
275                                              Optional<FastMathFlags> FMF,
276                                              TTI::TargetCostKind CostKind);
277   InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
278                                               Type *ResTy, VectorType *ValTy,
279                                               TTI::TargetCostKind CostKind);
280 
281   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
282                                         TTI::TargetCostKind CostKind);
283 
284   bool maybeLoweredToCall(Instruction &I);
285   bool isLoweredToCall(const Function *F);
286   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
287                                 AssumptionCache &AC,
288                                 TargetLibraryInfo *LibInfo,
289                                 HardwareLoopInfo &HWLoopInfo);
290   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
291                                    ScalarEvolution &SE,
292                                    AssumptionCache &AC,
293                                    TargetLibraryInfo *TLI,
294                                    DominatorTree *DT,
295                                    const LoopAccessInfo *LAI);
296   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
297                                TTI::UnrollingPreferences &UP,
298                                OptimizationRemarkEmitter *ORE);
299 
300   bool emitGetActiveLaneMask() const;
301 
302   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
303                              TTI::PeelingPreferences &PP);
304   bool shouldBuildLookupTablesForConstant(Constant *C) const {
305     // In the ROPI and RWPI relocation models we can't have pointers to global
306     // variables or functions in constant data, so don't convert switches to
307     // lookup tables if any of the values would need relocation.
308     if (ST->isROPI() || ST->isRWPI())
309       return !C->needsDynamicRelocation();
310 
311     return true;
312   }
313   /// @}
314 };
315 
316 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
317 /// instruction with the specified blocksize.  (The order of the elements
318 /// within each block of the vector is reversed.)
319 inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
320   assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
321          "Only possible block sizes for VREV are: 16, 32, 64");
322 
323   unsigned EltSz = VT.getScalarSizeInBits();
324   if (EltSz != 8 && EltSz != 16 && EltSz != 32)
325     return false;
326 
327   unsigned BlockElts = M[0] + 1;
328   // If the first shuffle index is UNDEF, be optimistic.
329   if (M[0] < 0)
330     BlockElts = BlockSize / EltSz;
331 
332   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
333     return false;
334 
335   for (unsigned i = 0, e = M.size(); i < e; ++i) {
336     if (M[i] < 0)
337       continue; // ignore UNDEF indices
338     if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
339       return false;
340   }
341 
342   return true;
343 }
344 
345 } // end namespace llvm
346 
347 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
348