xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetTransformInfo.h (revision 4e99f45480598189d49d45a825533a6c9e12f02c)
1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// ARM target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19 
20 #include "ARM.h"
21 #include "ARMSubtarget.h"
22 #include "ARMTargetMachine.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/CodeGen/BasicTTIImpl.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 
30 namespace llvm {
31 
32 class APInt;
33 class ARMTargetLowering;
34 class Instruction;
35 class Loop;
36 class SCEV;
37 class ScalarEvolution;
38 class Type;
39 class Value;
40 
41 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
42   using BaseT = BasicTTIImplBase<ARMTTIImpl>;
43   using TTI = TargetTransformInfo;
44 
45   friend BaseT;
46 
47   const ARMSubtarget *ST;
48   const ARMTargetLowering *TLI;
49 
50   // Currently the following features are excluded from InlineFeatureWhitelist.
51   // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
52   // Depending on whether they are set or unset, different
53   // instructions/registers are available. For example, inlining a callee with
54   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
55   // fail if the callee uses ARM only instructions, e.g. in inline asm.
56   const FeatureBitset InlineFeatureWhitelist = {
57       ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
58       ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
59       ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
60       ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
61       ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
62       ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
63       ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
64       ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
65       ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
66       ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
67       ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
68       ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
69       ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
70       ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
71       ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
72       ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
73       ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
74       ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
75       ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
76       ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
77       ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
78       ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
79       ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
80       ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
81   };
82 
83   const ARMSubtarget *getST() const { return ST; }
84   const ARMTargetLowering *getTLI() const { return TLI; }
85 
86 public:
87   explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
88       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
89         TLI(ST->getTargetLowering()) {}
90 
91   bool areInlineCompatible(const Function *Caller,
92                            const Function *Callee) const;
93 
94   bool enableInterleavedAccessVectorization() { return true; }
95 
96   bool shouldFavorBackedgeIndex(const Loop *L) const {
97     if (L->getHeader()->getParent()->hasOptSize())
98       return false;
99     return ST->isMClass() && ST->isThumb2() && L->getNumBlocks() == 1;
100   }
101 
102   /// Floating-point computation using ARMv8 AArch32 Advanced
103   /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
104   /// and Arm MVE are IEEE-754 compliant.
105   bool isFPVectorizationPotentiallyUnsafe() {
106     return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
107   }
108 
109   /// \name Scalar TTI Implementations
110   /// @{
111 
112   int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
113                             Type *Ty);
114 
115   using BaseT::getIntImmCost;
116   int getIntImmCost(const APInt &Imm, Type *Ty);
117 
118   int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
119 
120   /// @}
121 
122   /// \name Vector TTI Implementations
123   /// @{
124 
125   unsigned getNumberOfRegisters(unsigned ClassID) const {
126     bool Vector = (ClassID == 1);
127     if (Vector) {
128       if (ST->hasNEON())
129         return 16;
130       if (ST->hasMVEIntegerOps())
131         return 8;
132       return 0;
133     }
134 
135     if (ST->isThumb1Only())
136       return 8;
137     return 13;
138   }
139 
140   unsigned getRegisterBitWidth(bool Vector) const {
141     if (Vector) {
142       if (ST->hasNEON())
143         return 128;
144       if (ST->hasMVEIntegerOps())
145         return 128;
146       return 0;
147     }
148 
149     return 32;
150   }
151 
152   unsigned getMaxInterleaveFactor(unsigned VF) {
153     return ST->getMaxInterleaveFactor();
154   }
155 
156   bool isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment);
157 
158   bool isLegalMaskedStore(Type *DataTy, MaybeAlign Alignment) {
159     return isLegalMaskedLoad(DataTy, Alignment);
160   }
161 
162   bool isLegalMaskedGather(Type *Ty, MaybeAlign Alignment);
163 
164   bool isLegalMaskedScatter(Type *Ty, MaybeAlign Alignment) { return false; }
165 
166   int getMemcpyCost(const Instruction *I);
167 
168   int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
169 
170   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
171                              TTI::ReductionFlags Flags) const;
172 
173   bool shouldExpandReduction(const IntrinsicInst *II) const {
174     switch (II->getIntrinsicID()) {
175     case Intrinsic::experimental_vector_reduce_v2_fadd:
176     case Intrinsic::experimental_vector_reduce_v2_fmul:
177       // We don't have legalization support for ordered FP reductions.
178       if (!II->getFastMathFlags().allowReassoc())
179         return true;
180       // Can't legalize reductions with soft floats.
181       return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs();
182 
183     case Intrinsic::experimental_vector_reduce_fmin:
184     case Intrinsic::experimental_vector_reduce_fmax:
185       // Can't legalize reductions with soft floats, and NoNan will create
186       // fminimum which we do not know how to lower.
187       return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs() ||
188              !II->getFastMathFlags().noNaNs();
189 
190     default:
191       // Don't expand anything else, let legalization deal with it.
192       return false;
193     }
194   }
195 
196   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
197                        const Instruction *I = nullptr);
198 
199   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
200                          const Instruction *I = nullptr);
201 
202   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
203 
204   int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
205                                 const SCEV *Ptr);
206 
207   int getArithmeticInstrCost(
208       unsigned Opcode, Type *Ty,
209       TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
210       TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
211       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
212       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
213       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
214       const Instruction *CxtI = nullptr);
215 
216   int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
217                       unsigned AddressSpace, const Instruction *I = nullptr);
218 
219   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
220                                  ArrayRef<unsigned> Indices, unsigned Alignment,
221                                  unsigned AddressSpace,
222                                  bool UseMaskForCond = false,
223                                  bool UseMaskForGaps = false);
224 
225   bool isLoweredToCall(const Function *F);
226   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
227                                 AssumptionCache &AC,
228                                 TargetLibraryInfo *LibInfo,
229                                 HardwareLoopInfo &HWLoopInfo);
230   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
231                                    ScalarEvolution &SE,
232                                    AssumptionCache &AC,
233                                    TargetLibraryInfo *TLI,
234                                    DominatorTree *DT,
235                                    const LoopAccessInfo *LAI);
236   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
237                                TTI::UnrollingPreferences &UP);
238 
239   bool shouldBuildLookupTablesForConstant(Constant *C) const {
240     // In the ROPI and RWPI relocation models we can't have pointers to global
241     // variables or functions in constant data, so don't convert switches to
242     // lookup tables if any of the values would need relocation.
243     if (ST->isROPI() || ST->isRWPI())
244       return !C->needsRelocation();
245 
246     return true;
247   }
248   /// @}
249 };
250 
251 } // end namespace llvm
252 
253 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
254