xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h (revision 2e3507c25e42292b45a5482e116d278f5515d04d)
1 //===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the ARM specific subclass of TargetSubtargetInfo.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
14 #define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
15 
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMFrameLowering.h"
20 #include "ARMISelLowering.h"
21 #include "ARMMachineFunctionInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
25 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
26 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/RegisterBankInfo.h"
29 #include "llvm/CodeGen/TargetSubtargetInfo.h"
30 #include "llvm/MC/MCInstrItineraries.h"
31 #include "llvm/MC/MCSchedule.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetOptions.h"
34 #include "llvm/TargetParser/Triple.h"
35 #include <memory>
36 #include <string>
37 
38 #define GET_SUBTARGETINFO_HEADER
39 #include "ARMGenSubtargetInfo.inc"
40 
41 namespace llvm {
42 
43 class ARMBaseTargetMachine;
44 class GlobalValue;
45 class StringRef;
46 
47 class ARMSubtarget : public ARMGenSubtargetInfo {
48 protected:
49   enum ARMProcFamilyEnum {
50     Others,
51 
52     CortexA12,
53     CortexA15,
54     CortexA17,
55     CortexA32,
56     CortexA35,
57     CortexA5,
58     CortexA53,
59     CortexA55,
60     CortexA57,
61     CortexA7,
62     CortexA72,
63     CortexA73,
64     CortexA75,
65     CortexA76,
66     CortexA77,
67     CortexA78,
68     CortexA78C,
69     CortexA710,
70     CortexA8,
71     CortexA9,
72     CortexM3,
73     CortexM7,
74     CortexR4,
75     CortexR4F,
76     CortexR5,
77     CortexR52,
78     CortexR7,
79     CortexX1,
80     CortexX1C,
81     Exynos,
82     Krait,
83     Kryo,
84     NeoverseN1,
85     NeoverseN2,
86     NeoverseV1,
87     Swift
88   };
89   enum ARMProcClassEnum {
90     None,
91 
92     AClass,
93     MClass,
94     RClass
95   };
96   enum ARMArchEnum {
97     ARMv4,
98     ARMv4t,
99     ARMv5,
100     ARMv5t,
101     ARMv5te,
102     ARMv5tej,
103     ARMv6,
104     ARMv6k,
105     ARMv6kz,
106     ARMv6m,
107     ARMv6sm,
108     ARMv6t2,
109     ARMv7a,
110     ARMv7em,
111     ARMv7m,
112     ARMv7r,
113     ARMv7ve,
114     ARMv81a,
115     ARMv82a,
116     ARMv83a,
117     ARMv84a,
118     ARMv85a,
119     ARMv86a,
120     ARMv87a,
121     ARMv88a,
122     ARMv89a,
123     ARMv8a,
124     ARMv8mBaseline,
125     ARMv8mMainline,
126     ARMv8r,
127     ARMv81mMainline,
128     ARMv9a,
129     ARMv91a,
130     ARMv92a,
131     ARMv93a,
132     ARMv94a,
133   };
134 
135 public:
136   /// What kind of timing do load multiple/store multiple instructions have.
137   enum ARMLdStMultipleTiming {
138     /// Can load/store 2 registers/cycle.
139     DoubleIssue,
140     /// Can load/store 2 registers/cycle, but needs an extra cycle if the access
141     /// is not 64-bit aligned.
142     DoubleIssueCheckUnalignedAccess,
143     /// Can load/store 1 register/cycle.
144     SingleIssue,
145     /// Can load/store 1 register/cycle, but needs an extra cycle for address
146     /// computation and potentially also for register writeback.
147     SingleIssuePlusExtras,
148   };
149 
150 protected:
151 // Bool members corresponding to the SubtargetFeatures defined in tablegen
152 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
153   bool ATTRIBUTE = DEFAULT;
154 #include "ARMGenSubtargetInfo.inc"
155 
156   /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
157   ARMProcFamilyEnum ARMProcFamily = Others;
158 
159   /// ARMProcClass - ARM processor class: None, AClass, RClass or MClass.
160   ARMProcClassEnum ARMProcClass = None;
161 
162   /// ARMArch - ARM architecture
163   ARMArchEnum ARMArch = ARMv4t;
164 
165   /// UseMulOps - True if non-microcoded fused integer multiply-add and
166   /// multiply-subtract instructions should be used.
167   bool UseMulOps = false;
168 
169   /// SupportsTailCall - True if the OS supports tail call. The dynamic linker
170   /// must be able to synthesize call stubs for interworking between ARM and
171   /// Thumb.
172   bool SupportsTailCall = false;
173 
174   /// RestrictIT - If true, the subtarget disallows generation of complex IT
175   ///  blocks.
176   bool RestrictIT = false;
177 
178   /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
179   bool UseSjLjEH = false;
180 
181   /// stackAlignment - The minimum alignment known to hold of the stack frame on
182   /// entry to the function and which must be maintained by every function.
183   Align stackAlignment = Align(4);
184 
185   /// CPUString - String name of used CPU.
186   std::string CPUString;
187 
188   unsigned MaxInterleaveFactor = 1;
189 
190   /// Clearance before partial register updates (in number of instructions)
191   unsigned PartialUpdateClearance = 0;
192 
193   /// What kind of timing do load multiple/store multiple have (double issue,
194   /// single issue etc).
195   ARMLdStMultipleTiming LdStMultipleTiming = SingleIssue;
196 
197   /// The adjustment that we need to apply to get the operand latency from the
198   /// operand cycle returned by the itinerary data for pre-ISel operands.
199   int PreISelOperandLatencyAdjustment = 2;
200 
201   /// What alignment is preferred for loop bodies, in log2(bytes).
202   unsigned PrefLoopLogAlignment = 0;
203 
204   /// The cost factor for MVE instructions, representing the multiple beats an
205   // instruction can take. The default is 2, (set in initSubtargetFeatures so
206   // that we can use subtarget features less than 2).
207   unsigned MVEVectorCostFactor = 0;
208 
209   /// OptMinSize - True if we're optimising for minimum code size, equal to
210   /// the function attribute.
211   bool OptMinSize = false;
212 
213   /// IsLittle - The target is Little Endian
214   bool IsLittle;
215 
216   /// TargetTriple - What processor and OS we're targeting.
217   Triple TargetTriple;
218 
219   /// SchedModel - Processor specific instruction costs.
220   MCSchedModel SchedModel;
221 
222   /// Selected instruction itineraries (one entry per itinerary class.)
223   InstrItineraryData InstrItins;
224 
225   /// Options passed via command line that could influence the target
226   const TargetOptions &Options;
227 
228   const ARMBaseTargetMachine &TM;
229 
230 public:
231   /// This constructor initializes the data members to match that
232   /// of the specified triple.
233   ///
234   ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
235                const ARMBaseTargetMachine &TM, bool IsLittle,
236                bool MinSize = false);
237 
238   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
239   /// that still makes it profitable to inline the call.
240   unsigned getMaxInlineSizeThreshold() const {
241     return 64;
242   }
243 
244   /// getMaxMemcpyTPInlineSizeThreshold - Returns the maximum size
245   /// that still makes it profitable to inline a llvm.memcpy as a Tail
246   /// Predicated loop.
247   /// This threshold should only be used for constant size inputs.
248   unsigned getMaxMemcpyTPInlineSizeThreshold() const { return 128; }
249 
250   /// ParseSubtargetFeatures - Parses features string setting specified
251   /// subtarget options.  Definition of function is auto generated by tblgen.
252   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
253 
254   /// initializeSubtargetDependencies - Initializes using a CPU and feature string
255   /// so that we can use initializer lists for subtarget initialization.
256   ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
257 
258   const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
259     return &TSInfo;
260   }
261 
262   const ARMBaseInstrInfo *getInstrInfo() const override {
263     return InstrInfo.get();
264   }
265 
266   const ARMTargetLowering *getTargetLowering() const override {
267     return &TLInfo;
268   }
269 
270   const ARMFrameLowering *getFrameLowering() const override {
271     return FrameLowering.get();
272   }
273 
274   const ARMBaseRegisterInfo *getRegisterInfo() const override {
275     return &InstrInfo->getRegisterInfo();
276   }
277 
278   const CallLowering *getCallLowering() const override;
279   InstructionSelector *getInstructionSelector() const override;
280   const LegalizerInfo *getLegalizerInfo() const override;
281   const RegisterBankInfo *getRegBankInfo() const override;
282 
283 private:
284   ARMSelectionDAGInfo TSInfo;
285   // Either Thumb1FrameLowering or ARMFrameLowering.
286   std::unique_ptr<ARMFrameLowering> FrameLowering;
287   // Either Thumb1InstrInfo or Thumb2InstrInfo.
288   std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
289   ARMTargetLowering   TLInfo;
290 
291   /// GlobalISel related APIs.
292   std::unique_ptr<CallLowering> CallLoweringInfo;
293   std::unique_ptr<InstructionSelector> InstSelector;
294   std::unique_ptr<LegalizerInfo> Legalizer;
295   std::unique_ptr<RegisterBankInfo> RegBankInfo;
296 
297   void initializeEnvironment();
298   void initSubtargetFeatures(StringRef CPU, StringRef FS);
299   ARMFrameLowering *initializeFrameLowering(StringRef CPU, StringRef FS);
300 
301   std::bitset<8> CoprocCDE = {};
302 public:
303 // Getters for SubtargetFeatures defined in tablegen
304 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
305   bool GETTER() const { return ATTRIBUTE; }
306 #include "ARMGenSubtargetInfo.inc"
307 
308   /// @{
309   /// These functions are obsolete, please consider adding subtarget features
310   /// or properties instead of calling them.
311   bool isCortexA5() const { return ARMProcFamily == CortexA5; }
312   bool isCortexA7() const { return ARMProcFamily == CortexA7; }
313   bool isCortexA8() const { return ARMProcFamily == CortexA8; }
314   bool isCortexA9() const { return ARMProcFamily == CortexA9; }
315   bool isCortexA15() const { return ARMProcFamily == CortexA15; }
316   bool isSwift()    const { return ARMProcFamily == Swift; }
317   bool isCortexM3() const { return ARMProcFamily == CortexM3; }
318   bool isCortexM7() const { return ARMProcFamily == CortexM7; }
319   bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); }
320   bool isCortexR5() const { return ARMProcFamily == CortexR5; }
321   bool isKrait() const { return ARMProcFamily == Krait; }
322   /// @}
323 
324   bool hasARMOps() const { return !NoARM; }
325 
326   bool useNEONForSinglePrecisionFP() const {
327     return hasNEON() && hasNEONForFP();
328   }
329 
330   bool hasVFP2Base() const { return hasVFPv2SP(); }
331   bool hasVFP3Base() const { return hasVFPv3D16SP(); }
332   bool hasVFP4Base() const { return hasVFPv4D16SP(); }
333   bool hasFPARMv8Base() const { return hasFPARMv8D16SP(); }
334 
335   bool hasAnyDataBarrier() const {
336     return HasDataBarrier || (hasV6Ops() && !isThumb());
337   }
338 
339   bool useMulOps() const { return UseMulOps; }
340   bool useFPVMLx() const { return !SlowFPVMLx; }
341   bool useFPVFMx() const {
342     return !isTargetDarwin() && hasVFP4Base() && !SlowFPVFMx;
343   }
344   bool useFPVFMx16() const { return useFPVFMx() && hasFullFP16(); }
345   bool useFPVFMx64() const { return useFPVFMx() && hasFP64(); }
346   bool useSjLjEH() const { return UseSjLjEH; }
347   bool hasBaseDSP() const {
348     if (isThumb())
349       return hasThumb2() && hasDSP();
350     else
351       return hasV5TEOps();
352   }
353 
354   /// Return true if the CPU supports any kind of instruction fusion.
355   bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); }
356 
357   const Triple &getTargetTriple() const { return TargetTriple; }
358 
359   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
360   bool isTargetIOS() const { return TargetTriple.isiOS(); }
361   bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
362   bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); }
363   bool isTargetDriverKit() const { return TargetTriple.isDriverKit(); }
364   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
365   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
366   bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
367   bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
368 
369   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
370   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
371   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
372 
373   // ARM EABI is the bare-metal EABI described in ARM ABI documents and
374   // can be accessed via -target arm-none-eabi. This is NOT GNUEABI.
375   // FIXME: Add a flag for bare-metal for that target and set Triple::EABI
376   // even for GNUEABI, so we can make a distinction here and still conform to
377   // the EABI on GNU (and Android) mode. This requires change in Clang, too.
378   // FIXME: The Darwin exception is temporary, while we move users to
379   // "*-*-*-macho" triples as quickly as possible.
380   bool isTargetAEABI() const {
381     return (TargetTriple.getEnvironment() == Triple::EABI ||
382             TargetTriple.getEnvironment() == Triple::EABIHF) &&
383            !isTargetDarwin() && !isTargetWindows();
384   }
385   bool isTargetGNUAEABI() const {
386     return (TargetTriple.getEnvironment() == Triple::GNUEABI ||
387             TargetTriple.getEnvironment() == Triple::GNUEABIHF) &&
388            !isTargetDarwin() && !isTargetWindows();
389   }
390   bool isTargetMuslAEABI() const {
391     return (TargetTriple.getEnvironment() == Triple::MuslEABI ||
392             TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
393             TargetTriple.getEnvironment() == Triple::OpenHOS) &&
394            !isTargetDarwin() && !isTargetWindows();
395   }
396 
397   // ARM Targets that support EHABI exception handling standard
398   // Darwin uses SjLj. Other targets might need more checks.
399   bool isTargetEHABICompatible() const {
400     return TargetTriple.isTargetEHABICompatible();
401   }
402 
403   bool isTargetHardFloat() const;
404 
405   bool isReadTPSoft() const {
406     return !(isReadTPTPIDRURW() || isReadTPTPIDRURO() || isReadTPTPIDRPRW());
407   }
408 
409   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
410 
411   bool isXRaySupported() const override;
412 
413   bool isAPCS_ABI() const;
414   bool isAAPCS_ABI() const;
415   bool isAAPCS16_ABI() const;
416 
417   bool isROPI() const;
418   bool isRWPI() const;
419 
420   bool useMachineScheduler() const { return UseMISched; }
421   bool useMachinePipeliner() const { return UseMIPipeliner; }
422   bool hasMinSize() const { return OptMinSize; }
423   bool isThumb1Only() const { return isThumb() && !hasThumb2(); }
424   bool isThumb2() const { return isThumb() && hasThumb2(); }
425   bool isMClass() const { return ARMProcClass == MClass; }
426   bool isRClass() const { return ARMProcClass == RClass; }
427   bool isAClass() const { return ARMProcClass == AClass; }
428 
429   bool isR9Reserved() const {
430     return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9;
431   }
432 
433   MCPhysReg getFramePointerReg() const {
434     if (isTargetDarwin() ||
435         (!isTargetWindows() && isThumb() && !createAAPCSFrameChain()))
436       return ARM::R7;
437     return ARM::R11;
438   }
439 
440   /// Returns true if the frame setup is split into two separate pushes (first
441   /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
442   /// to lr. This is always required on Thumb1-only targets, as the push and
443   /// pop instructions can't access the high registers.
444   bool splitFramePushPop(const MachineFunction &MF) const {
445     if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress())
446       return true;
447     return (getFramePointerReg() == ARM::R7 &&
448             MF.getTarget().Options.DisableFramePointerElim(MF)) ||
449            isThumb1Only();
450   }
451 
452   bool splitFramePointerPush(const MachineFunction &MF) const;
453 
454   bool useStride4VFPs() const;
455 
456   bool useMovt() const;
457 
458   bool supportsTailCall() const { return SupportsTailCall; }
459 
460   bool allowsUnalignedMem() const { return !StrictAlign; }
461 
462   bool restrictIT() const { return RestrictIT; }
463 
464   const std::string & getCPUString() const { return CPUString; }
465 
466   bool isLittle() const { return IsLittle; }
467 
468   unsigned getMispredictionPenalty() const;
469 
470   /// Returns true if machine scheduler should be enabled.
471   bool enableMachineScheduler() const override;
472 
473   /// Returns true if machine pipeliner should be enabled.
474   bool enableMachinePipeliner() const override;
475   bool useDFAforSMS() const override;
476 
477   /// True for some subtargets at > -O0.
478   bool enablePostRAScheduler() const override;
479 
480   /// True for some subtargets at > -O0.
481   bool enablePostRAMachineScheduler() const override;
482 
483   /// Check whether this subtarget wants to use subregister liveness.
484   bool enableSubRegLiveness() const override;
485 
486   /// Enable use of alias analysis during code generation (during MI
487   /// scheduling, DAGCombine, etc.).
488   bool useAA() const override { return true; }
489 
490   /// getInstrItins - Return the instruction itineraries based on subtarget
491   /// selection.
492   const InstrItineraryData *getInstrItineraryData() const override {
493     return &InstrItins;
494   }
495 
496   /// getStackAlignment - Returns the minimum alignment known to hold of the
497   /// stack frame on entry to the function and which must be maintained by every
498   /// function for this subtarget.
499   Align getStackAlignment() const { return stackAlignment; }
500 
501   // Returns the required alignment for LDRD/STRD instructions
502   Align getDualLoadStoreAlignment() const {
503     return Align(hasV7Ops() || allowsUnalignedMem() ? 4 : 8);
504   }
505 
506   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
507 
508   unsigned getPartialUpdateClearance() const { return PartialUpdateClearance; }
509 
510   ARMLdStMultipleTiming getLdStMultipleTiming() const {
511     return LdStMultipleTiming;
512   }
513 
514   int getPreISelOperandLatencyAdjustment() const {
515     return PreISelOperandLatencyAdjustment;
516   }
517 
518   /// True if the GV will be accessed via an indirect symbol.
519   bool isGVIndirectSymbol(const GlobalValue *GV) const;
520 
521   /// Returns the constant pool modifier needed to access the GV.
522   bool isGVInGOT(const GlobalValue *GV) const;
523 
524   /// True if fast-isel is used.
525   bool useFastISel() const;
526 
527   /// Returns the correct return opcode for the current feature set.
528   /// Use BX if available to allow mixing thumb/arm code, but fall back
529   /// to plain mov pc,lr on ARMv4.
530   unsigned getReturnOpcode() const {
531     if (isThumb())
532       return ARM::tBX_RET;
533     if (hasV4TOps())
534       return ARM::BX_RET;
535     return ARM::MOVPCLR;
536   }
537 
538   /// Allow movt+movw for PIC global address calculation.
539   /// ELF does not have GOT relocations for movt+movw.
540   /// ROPI does not use GOT.
541   bool allowPositionIndependentMovt() const {
542     return isROPI() || !isTargetELF();
543   }
544 
545   unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }
546 
547   unsigned
548   getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const {
549     if (CostKind == TargetTransformInfo::TCK_CodeSize)
550       return 1;
551     return MVEVectorCostFactor;
552   }
553 
554   bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
555                                    unsigned PhysReg) const override;
556   unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
557 };
558 
559 } // end namespace llvm
560 
561 #endif  // LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
562