xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the AArch64 specific subclass of TargetSubtarget.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
14 #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
15 
16 #include "AArch64FrameLowering.h"
17 #include "AArch64ISelLowering.h"
18 #include "AArch64InstrInfo.h"
19 #include "AArch64PointerAuth.h"
20 #include "AArch64RegisterInfo.h"
21 #include "AArch64SelectionDAGInfo.h"
22 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
23 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
25 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
26 #include "llvm/CodeGen/RegisterBankInfo.h"
27 #include "llvm/CodeGen/TargetSubtargetInfo.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/TargetParser/Triple.h"
30 
31 #define GET_SUBTARGETINFO_HEADER
32 #include "AArch64GenSubtargetInfo.inc"
33 
34 namespace llvm {
35 class GlobalValue;
36 class StringRef;
37 
38 class AArch64Subtarget final : public AArch64GenSubtargetInfo {
39 public:
40   enum ARMProcFamilyEnum : uint8_t {
41     Generic,
42 #define ARM_PROCESSOR_FAMILY(ENUM) ENUM,
43 #include "llvm/TargetParser/AArch64TargetParserDef.inc"
44 #undef ARM_PROCESSOR_FAMILY
45   };
46 
47 protected:
48   /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
49   ARMProcFamilyEnum ARMProcFamily = Generic;
50 
51   // Enable 64-bit vectorization in SLP.
52   unsigned MinVectorRegisterBitWidth = 64;
53 
54 // Bool members corresponding to the SubtargetFeatures defined in tablegen
55 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
56   bool ATTRIBUTE = DEFAULT;
57 #include "AArch64GenSubtargetInfo.inc"
58 
59   unsigned EpilogueVectorizationMinVF = 16;
60   uint8_t MaxInterleaveFactor = 2;
61   uint8_t VectorInsertExtractBaseCost = 2;
62   uint16_t CacheLineSize = 0;
63   // Default scatter/gather overhead.
64   unsigned ScatterOverhead = 10;
65   unsigned GatherOverhead = 10;
66   uint16_t PrefetchDistance = 0;
67   uint16_t MinPrefetchStride = 1;
68   unsigned MaxPrefetchIterationsAhead = UINT_MAX;
69   Align PrefFunctionAlignment;
70   Align PrefLoopAlignment;
71   unsigned MaxBytesForLoopAlignment = 0;
72   unsigned MinimumJumpTableEntries = 4;
73   unsigned MaxJumpTableSize = 0;
74 
75   // ReserveXRegister[i] - X#i is not available as a general purpose register.
76   BitVector ReserveXRegister;
77 
78   // ReserveXRegisterForRA[i] - X#i is not available for register allocator.
79   BitVector ReserveXRegisterForRA;
80 
81   // CustomCallUsedXRegister[i] - X#i call saved.
82   BitVector CustomCallSavedXRegs;
83 
84   bool IsLittle;
85 
86   bool IsStreaming;
87   bool IsStreamingCompatible;
88   std::optional<unsigned> StreamingHazardSize;
89   unsigned MinSVEVectorSizeInBits;
90   unsigned MaxSVEVectorSizeInBits;
91   unsigned VScaleForTuning = 1;
92   TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;
93 
94   bool EnableSubregLiveness;
95 
96   /// TargetTriple - What processor and OS we're targeting.
97   Triple TargetTriple;
98 
99   AArch64FrameLowering FrameLowering;
100   AArch64InstrInfo InstrInfo;
101   AArch64SelectionDAGInfo TSInfo;
102   AArch64TargetLowering TLInfo;
103 
104   /// GlobalISel related APIs.
105   std::unique_ptr<CallLowering> CallLoweringInfo;
106   std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
107   std::unique_ptr<InstructionSelector> InstSelector;
108   std::unique_ptr<LegalizerInfo> Legalizer;
109   std::unique_ptr<RegisterBankInfo> RegBankInfo;
110 
111 private:
112   /// initializeSubtargetDependencies - Initializes using CPUString and the
113   /// passed in feature string so that we can use initializer lists for
114   /// subtarget initialization.
115   AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
116                                                     StringRef CPUString,
117                                                     StringRef TuneCPUString,
118                                                     bool HasMinSize);
119 
120   /// Initialize properties based on the selected processor family.
121   void initializeProperties(bool HasMinSize);
122 
123 public:
124   /// This constructor initializes the data members to match that
125   /// of the specified triple.
126   AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
127                    StringRef FS, const TargetMachine &TM, bool LittleEndian,
128                    unsigned MinSVEVectorSizeInBitsOverride = 0,
129                    unsigned MaxSVEVectorSizeInBitsOverride = 0,
130                    bool IsStreaming = false, bool IsStreamingCompatible = false,
131                    bool HasMinSize = false);
132 
133   virtual unsigned getHwModeSet() const override;
134 
135 // Getters for SubtargetFeatures defined in tablegen
136 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
137   bool GETTER() const { return ATTRIBUTE; }
138 #include "AArch64GenSubtargetInfo.inc"
139 
getSelectionDAGInfo()140   const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
141     return &TSInfo;
142   }
getFrameLowering()143   const AArch64FrameLowering *getFrameLowering() const override {
144     return &FrameLowering;
145   }
getTargetLowering()146   const AArch64TargetLowering *getTargetLowering() const override {
147     return &TLInfo;
148   }
getInstrInfo()149   const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
getRegisterInfo()150   const AArch64RegisterInfo *getRegisterInfo() const override {
151     return &getInstrInfo()->getRegisterInfo();
152   }
153   const CallLowering *getCallLowering() const override;
154   const InlineAsmLowering *getInlineAsmLowering() const override;
155   InstructionSelector *getInstructionSelector() const override;
156   const LegalizerInfo *getLegalizerInfo() const override;
157   const RegisterBankInfo *getRegBankInfo() const override;
getTargetTriple()158   const Triple &getTargetTriple() const { return TargetTriple; }
enableMachineScheduler()159   bool enableMachineScheduler() const override { return true; }
enablePostRAScheduler()160   bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
enableSubRegLiveness()161   bool enableSubRegLiveness() const override { return EnableSubregLiveness; }
162 
163   bool enableMachinePipeliner() const override;
useDFAforSMS()164   bool useDFAforSMS() const override { return false; }
165 
166   /// Returns ARM processor family.
167   /// Avoid this function! CPU specifics should be kept local to this class
168   /// and preferably modeled with SubtargetFeatures or properties in
169   /// initializeProperties().
getProcFamily()170   ARMProcFamilyEnum getProcFamily() const {
171     return ARMProcFamily;
172   }
173 
isXRaySupported()174   bool isXRaySupported() const override { return true; }
175 
176   /// Returns true if the function has a streaming body.
isStreaming()177   bool isStreaming() const { return IsStreaming; }
178 
179   /// Returns true if the function has a streaming-compatible body.
isStreamingCompatible()180   bool isStreamingCompatible() const { return IsStreamingCompatible; }
181 
182   /// Returns the size of memory region that if accessed by both the CPU and
183   /// the SME unit could result in a hazard. 0 = disabled.
getStreamingHazardSize()184   unsigned getStreamingHazardSize() const {
185     return StreamingHazardSize.value_or(
186         !hasSMEFA64() && hasSME() && hasSVE() ? 1024 : 0);
187   }
188 
189   /// Returns true if the target has NEON and the function at runtime is known
190   /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
191   /// mode, which disables NEON instructions).
isNeonAvailable()192   bool isNeonAvailable() const {
193     return hasNEON() &&
194            (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
195   }
196 
197   /// Returns true if the target has SVE and can use the full range of SVE
198   /// instructions, for example because it knows the function is known not to be
199   /// in streaming-SVE mode or when the target has FEAT_FA64 enabled.
isSVEAvailable()200   bool isSVEAvailable() const {
201     return hasSVE() &&
202            (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
203   }
204 
205   /// Returns true if the target has access to the streaming-compatible subset
206   /// of SVE instructions.
isStreamingSVEAvailable()207   bool isStreamingSVEAvailable() const { return hasSME() && isStreaming(); }
208 
209   /// Returns true if the target has access to either the full range of SVE
210   /// instructions, or the streaming-compatible subset of SVE instructions.
isSVEorStreamingSVEAvailable()211   bool isSVEorStreamingSVEAvailable() const {
212     return hasSVE() || isStreamingSVEAvailable();
213   }
214 
getMinVectorRegisterBitWidth()215   unsigned getMinVectorRegisterBitWidth() const {
216     // Don't assume any minimum vector size when PSTATE.SM may not be 0, because
217     // we don't yet support streaming-compatible codegen support that we trust
218     // is safe for functions that may be executed in streaming-SVE mode.
219     // By returning '0' here, we disable vectorization.
220     if (!isSVEAvailable() && !isNeonAvailable())
221       return 0;
222     return MinVectorRegisterBitWidth;
223   }
224 
isXRegisterReserved(size_t i)225   bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
isXRegisterReservedForRA(size_t i)226   bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; }
getNumXRegisterReserved()227   unsigned getNumXRegisterReserved() const {
228     BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs());
229     AllReservedX |= ReserveXRegister;
230     AllReservedX |= ReserveXRegisterForRA;
231     return AllReservedX.count();
232   }
isLRReservedForRA()233   bool isLRReservedForRA() const { return ReserveLRForRA; }
isXRegCustomCalleeSaved(size_t i)234   bool isXRegCustomCalleeSaved(size_t i) const {
235     return CustomCallSavedXRegs[i];
236   }
hasCustomCallingConv()237   bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
238 
239   /// Return true if the CPU supports any kind of instruction fusion.
hasFusion()240   bool hasFusion() const {
241     return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
242            hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
243            hasFuseAdrpAdd() || hasFuseLiterals();
244   }
245 
getEpilogueVectorizationMinVF()246   unsigned getEpilogueVectorizationMinVF() const {
247     return EpilogueVectorizationMinVF;
248   }
getMaxInterleaveFactor()249   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
250   unsigned getVectorInsertExtractBaseCost() const;
getCacheLineSize()251   unsigned getCacheLineSize() const override { return CacheLineSize; }
getScatterOverhead()252   unsigned getScatterOverhead() const { return ScatterOverhead; }
getGatherOverhead()253   unsigned getGatherOverhead() const { return GatherOverhead; }
getPrefetchDistance()254   unsigned getPrefetchDistance() const override { return PrefetchDistance; }
getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)255   unsigned getMinPrefetchStride(unsigned NumMemAccesses,
256                                 unsigned NumStridedMemAccesses,
257                                 unsigned NumPrefetches,
258                                 bool HasCall) const override {
259     return MinPrefetchStride;
260   }
getMaxPrefetchIterationsAhead()261   unsigned getMaxPrefetchIterationsAhead() const override {
262     return MaxPrefetchIterationsAhead;
263   }
getPrefFunctionAlignment()264   Align getPrefFunctionAlignment() const {
265     return PrefFunctionAlignment;
266   }
getPrefLoopAlignment()267   Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
268 
getMaxBytesForLoopAlignment()269   unsigned getMaxBytesForLoopAlignment() const {
270     return MaxBytesForLoopAlignment;
271   }
272 
getMaximumJumpTableSize()273   unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
getMinimumJumpTableEntries()274   unsigned getMinimumJumpTableEntries() const {
275     return MinimumJumpTableEntries;
276   }
277 
278   /// CPU has TBI (top byte of addresses is ignored during HW address
279   /// translation) and OS enables it.
280   bool supportsAddressTopByteIgnored() const;
281 
isLittleEndian()282   bool isLittleEndian() const { return IsLittle; }
283 
isTargetDarwin()284   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
isTargetIOS()285   bool isTargetIOS() const { return TargetTriple.isiOS(); }
isTargetLinux()286   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
isTargetWindows()287   bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
isTargetAndroid()288   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
isTargetFuchsia()289   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
isWindowsArm64EC()290   bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); }
291 
isTargetCOFF()292   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
isTargetELF()293   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
isTargetMachO()294   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
295 
isTargetILP32()296   bool isTargetILP32() const {
297     return TargetTriple.isArch32Bit() ||
298            TargetTriple.getEnvironment() == Triple::GNUILP32;
299   }
300 
301   bool useAA() const override;
302 
addrSinkUsingGEPs()303   bool addrSinkUsingGEPs() const override {
304     // Keeping GEPs inbounds is important for exploiting AArch64
305     // addressing-modes in ILP32 mode.
306     return useAA() || isTargetILP32();
307   }
308 
useSmallAddressing()309   bool useSmallAddressing() const {
310     switch (TLInfo.getTargetMachine().getCodeModel()) {
311       case CodeModel::Kernel:
312         // Kernel is currently allowed only for Fuchsia targets,
313         // where it is the same as Small for almost all purposes.
314       case CodeModel::Small:
315         return true;
316       default:
317         return false;
318     }
319   }
320 
321   /// Returns whether the operating system makes it safer to store sensitive
322   /// values in x16 and x17 as opposed to other registers.
323   bool isX16X17Safer() const;
324 
325   /// ParseSubtargetFeatures - Parses features string setting specified
326   /// subtarget options.  Definition of function is auto generated by tblgen.
327   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
328 
329   /// ClassifyGlobalReference - Find the target operand flags that describe
330   /// how a global value should be referenced for the current subtarget.
331   unsigned ClassifyGlobalReference(const GlobalValue *GV,
332                                    const TargetMachine &TM) const;
333 
334   unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
335                                            const TargetMachine &TM) const;
336 
337   /// This function is design to compatible with the function def in other
338   /// targets and escape build error about the virtual function def in base
339   /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it.
340   unsigned char
classifyGlobalFunctionReference(const GlobalValue * GV)341   classifyGlobalFunctionReference(const GlobalValue *GV) const override {
342     return 0;
343   }
344 
345   void overrideSchedPolicy(MachineSchedPolicy &Policy,
346                            unsigned NumRegionInstrs) const override;
347   void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
348                              SDep &Dep,
349                              const TargetSchedModel *SchedModel) const override;
350 
351   bool enableEarlyIfConversion() const override;
352 
353   std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
354 
isCallingConvWin64(CallingConv::ID CC,bool IsVarArg)355   bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const {
356     switch (CC) {
357     case CallingConv::C:
358     case CallingConv::Fast:
359     case CallingConv::Swift:
360     case CallingConv::SwiftTail:
361       return isTargetWindows();
362     case CallingConv::PreserveNone:
363       return IsVarArg && isTargetWindows();
364     case CallingConv::Win64:
365       return true;
366     default:
367       return false;
368     }
369   }
370 
371   /// Return whether FrameLowering should always set the "extended frame
372   /// present" bit in FP, or set it based on a symbol in the runtime.
swiftAsyncContextIsDynamicallySet()373   bool swiftAsyncContextIsDynamicallySet() const {
374     // Older OS versions (particularly system unwinders) are confused by the
375     // Swift extended frame, so when building code that might be run on them we
376     // must dynamically query the concurrency library to determine whether
377     // extended frames should be flagged as present.
378     const Triple &TT = getTargetTriple();
379 
380     unsigned Major = TT.getOSVersion().getMajor();
381     switch(TT.getOS()) {
382     default:
383       return false;
384     case Triple::IOS:
385     case Triple::TvOS:
386       return Major < 15;
387     case Triple::WatchOS:
388       return Major < 8;
389     case Triple::MacOSX:
390     case Triple::Darwin:
391       return Major < 12;
392     }
393   }
394 
395   void mirFileLoaded(MachineFunction &MF) const override;
396 
397   // Return the known range for the bit length of SVE data registers. A value
398   // of 0 means nothing is known about that particular limit beyond what's
399   // implied by the architecture.
getMaxSVEVectorSizeInBits()400   unsigned getMaxSVEVectorSizeInBits() const {
401     assert(isSVEorStreamingSVEAvailable() &&
402            "Tried to get SVE vector length without SVE support!");
403     return MaxSVEVectorSizeInBits;
404   }
405 
getMinSVEVectorSizeInBits()406   unsigned getMinSVEVectorSizeInBits() const {
407     assert(isSVEorStreamingSVEAvailable() &&
408            "Tried to get SVE vector length without SVE support!");
409     return MinSVEVectorSizeInBits;
410   }
411 
412   // Return the known bit length of SVE data registers. A value of 0 means the
413   // length is unknown beyond what's implied by the architecture.
getSVEVectorSizeInBits()414   unsigned getSVEVectorSizeInBits() const {
415     assert(isSVEorStreamingSVEAvailable() &&
416            "Tried to get SVE vector length without SVE support!");
417     if (MinSVEVectorSizeInBits == MaxSVEVectorSizeInBits)
418       return MaxSVEVectorSizeInBits;
419     return 0;
420   }
421 
useSVEForFixedLengthVectors()422   bool useSVEForFixedLengthVectors() const {
423     if (!isSVEorStreamingSVEAvailable())
424       return false;
425 
426     // Prefer NEON unless larger SVE registers are available.
427     return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256;
428   }
429 
useSVEForFixedLengthVectors(EVT VT)430   bool useSVEForFixedLengthVectors(EVT VT) const {
431     if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector())
432       return false;
433     return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock ||
434            !isNeonAvailable();
435   }
436 
getVScaleForTuning()437   unsigned getVScaleForTuning() const { return VScaleForTuning; }
438 
getSVETailFoldingDefaultOpts()439   TailFoldingOpts getSVETailFoldingDefaultOpts() const {
440     return DefaultSVETFOpts;
441   }
442 
443   /// Returns true to use the addvl/inc/dec instructions, as opposed to separate
444   /// add + cnt instructions.
445   bool useScalarIncVL() const;
446 
getChkStkName()447   const char* getChkStkName() const {
448     if (isWindowsArm64EC())
449       return "#__chkstk_arm64ec";
450     return "__chkstk";
451   }
452 
getSecurityCheckCookieName()453   const char* getSecurityCheckCookieName() const {
454     if (isWindowsArm64EC())
455       return "#__security_check_cookie_arm64ec";
456     return "__security_check_cookie";
457   }
458 
459   /// Choose a method of checking LR before performing a tail call.
460   AArch64PAuth::AuthCheckMethod
461   getAuthenticatedLRCheckMethod(const MachineFunction &MF) const;
462 
463   /// Compute the integer discriminator for a given BlockAddress constant, if
464   /// blockaddress signing is enabled, or std::nullopt otherwise.
465   /// Blockaddress signing is controlled by the function attribute
466   /// "ptrauth-indirect-gotos" on the parent function.
467   /// Note that this assumes the discriminator is independent of the indirect
468   /// goto branch site itself, i.e., it's the same for all BlockAddresses in
469   /// a function.
470   std::optional<uint16_t>
471   getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const;
472 };
473 } // End llvm namespace
474 
475 #endif
476