xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1 //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the AArch64 specific subclass of TargetSubtarget.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
14 #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
15 
16 #include "AArch64FrameLowering.h"
17 #include "AArch64ISelLowering.h"
18 #include "AArch64InstrInfo.h"
19 #include "AArch64PointerAuth.h"
20 #include "AArch64RegisterInfo.h"
21 #include "AArch64SelectionDAGInfo.h"
22 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
23 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
25 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
26 #include "llvm/CodeGen/RegisterBankInfo.h"
27 #include "llvm/CodeGen/TargetSubtargetInfo.h"
28 #include "llvm/IR/DataLayout.h"
29 
30 #define GET_SUBTARGETINFO_HEADER
31 #include "AArch64GenSubtargetInfo.inc"
32 
33 namespace llvm {
34 class GlobalValue;
35 class StringRef;
36 class Triple;
37 
38 class AArch64Subtarget final : public AArch64GenSubtargetInfo {
39 public:
40   enum ARMProcFamilyEnum : uint8_t {
41     Others,
42 #define ARM_PROCESSOR_FAMILY(ENUM) ENUM,
43 #include "llvm/TargetParser/AArch64TargetParserDef.inc"
44 #undef ARM_PROCESSOR_FAMILY
45   };
46 
47 protected:
48   /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
49   ARMProcFamilyEnum ARMProcFamily = Others;
50 
51   // Enable 64-bit vectorization in SLP.
52   unsigned MinVectorRegisterBitWidth = 64;
53 
54 // Bool members corresponding to the SubtargetFeatures defined in tablegen
55 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
56   bool ATTRIBUTE = DEFAULT;
57 #include "AArch64GenSubtargetInfo.inc"
58 
59   uint8_t MaxInterleaveFactor = 2;
60   uint8_t VectorInsertExtractBaseCost = 2;
61   uint16_t CacheLineSize = 0;
62   uint16_t PrefetchDistance = 0;
63   uint16_t MinPrefetchStride = 1;
64   unsigned MaxPrefetchIterationsAhead = UINT_MAX;
65   Align PrefFunctionAlignment;
66   Align PrefLoopAlignment;
67   unsigned MaxBytesForLoopAlignment = 0;
68   unsigned MinimumJumpTableEntries = 4;
69   unsigned MaxJumpTableSize = 0;
70 
71   // ReserveXRegister[i] - X#i is not available as a general purpose register.
72   BitVector ReserveXRegister;
73 
74   // ReserveXRegisterForRA[i] - X#i is not available for register allocator.
75   BitVector ReserveXRegisterForRA;
76 
77   // CustomCallUsedXRegister[i] - X#i call saved.
78   BitVector CustomCallSavedXRegs;
79 
80   bool IsLittle;
81 
82   bool IsStreaming;
83   bool IsStreamingCompatible;
84   unsigned MinSVEVectorSizeInBits;
85   unsigned MaxSVEVectorSizeInBits;
86   unsigned VScaleForTuning = 2;
87   TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;
88 
89   /// TargetTriple - What processor and OS we're targeting.
90   Triple TargetTriple;
91 
92   AArch64FrameLowering FrameLowering;
93   AArch64InstrInfo InstrInfo;
94   AArch64SelectionDAGInfo TSInfo;
95   AArch64TargetLowering TLInfo;
96 
97   /// GlobalISel related APIs.
98   std::unique_ptr<CallLowering> CallLoweringInfo;
99   std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
100   std::unique_ptr<InstructionSelector> InstSelector;
101   std::unique_ptr<LegalizerInfo> Legalizer;
102   std::unique_ptr<RegisterBankInfo> RegBankInfo;
103 
104 private:
105   /// initializeSubtargetDependencies - Initializes using CPUString and the
106   /// passed in feature string so that we can use initializer lists for
107   /// subtarget initialization.
108   AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
109                                                     StringRef CPUString,
110                                                     StringRef TuneCPUString,
111                                                     bool HasMinSize);
112 
113   /// Initialize properties based on the selected processor family.
114   void initializeProperties(bool HasMinSize);
115 
116 public:
117   /// This constructor initializes the data members to match that
118   /// of the specified triple.
119   AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
120                    StringRef FS, const TargetMachine &TM, bool LittleEndian,
121                    unsigned MinSVEVectorSizeInBitsOverride = 0,
122                    unsigned MaxSVEVectorSizeInBitsOverride = 0,
123                    bool IsStreaming = false, bool IsStreamingCompatible = false,
124                    bool HasMinSize = false);
125 
126 // Getters for SubtargetFeatures defined in tablegen
127 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
128   bool GETTER() const { return ATTRIBUTE; }
129 #include "AArch64GenSubtargetInfo.inc"
130 
131   const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
132     return &TSInfo;
133   }
134   const AArch64FrameLowering *getFrameLowering() const override {
135     return &FrameLowering;
136   }
137   const AArch64TargetLowering *getTargetLowering() const override {
138     return &TLInfo;
139   }
140   const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
141   const AArch64RegisterInfo *getRegisterInfo() const override {
142     return &getInstrInfo()->getRegisterInfo();
143   }
144   const CallLowering *getCallLowering() const override;
145   const InlineAsmLowering *getInlineAsmLowering() const override;
146   InstructionSelector *getInstructionSelector() const override;
147   const LegalizerInfo *getLegalizerInfo() const override;
148   const RegisterBankInfo *getRegBankInfo() const override;
149   const Triple &getTargetTriple() const { return TargetTriple; }
150   bool enableMachineScheduler() const override { return true; }
151   bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
152 
153   bool enableMachinePipeliner() const override;
154   bool useDFAforSMS() const override { return false; }
155 
156   /// Returns ARM processor family.
157   /// Avoid this function! CPU specifics should be kept local to this class
158   /// and preferably modeled with SubtargetFeatures or properties in
159   /// initializeProperties().
160   ARMProcFamilyEnum getProcFamily() const {
161     return ARMProcFamily;
162   }
163 
164   bool isXRaySupported() const override { return true; }
165 
166   /// Returns true if the function has a streaming body.
167   bool isStreaming() const { return IsStreaming; }
168 
169   /// Returns true if the function has a streaming-compatible body.
170   bool isStreamingCompatible() const { return IsStreamingCompatible; }
171 
172   /// Returns true if the target has NEON and the function at runtime is known
173   /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
174   /// mode, which disables NEON instructions).
175   bool isNeonAvailable() const {
176     return hasNEON() &&
177            (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
178   }
179 
180   /// Returns true if the target has SVE and can use the full range of SVE
181   /// instructions, for example because it knows the function is known not to be
182   /// in streaming-SVE mode or when the target has FEAT_FA64 enabled.
183   bool isSVEAvailable() const {
184     return hasSVE() &&
185            (hasSMEFA64() || (!isStreaming() && !isStreamingCompatible()));
186   }
187 
188   /// Returns true if the target has access to either the full range of SVE instructions,
189   /// or the streaming-compatible subset of SVE instructions.
190   bool isSVEorStreamingSVEAvailable() const {
191     return hasSVE() || (hasSME() && isStreaming());
192   }
193 
194   unsigned getMinVectorRegisterBitWidth() const {
195     // Don't assume any minimum vector size when PSTATE.SM may not be 0, because
196     // we don't yet support streaming-compatible codegen support that we trust
197     // is safe for functions that may be executed in streaming-SVE mode.
198     // By returning '0' here, we disable vectorization.
199     if (!isSVEAvailable() && !isNeonAvailable())
200       return 0;
201     return MinVectorRegisterBitWidth;
202   }
203 
204   bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
205   bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; }
206   unsigned getNumXRegisterReserved() const {
207     BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs());
208     AllReservedX |= ReserveXRegister;
209     AllReservedX |= ReserveXRegisterForRA;
210     return AllReservedX.count();
211   }
212   bool isLRReservedForRA() const { return ReserveLRForRA; }
213   bool isXRegCustomCalleeSaved(size_t i) const {
214     return CustomCallSavedXRegs[i];
215   }
216   bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
217 
218   /// Return true if the CPU supports any kind of instruction fusion.
219   bool hasFusion() const {
220     return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
221            hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
222            hasFuseAdrpAdd() || hasFuseLiterals();
223   }
224 
225   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
226   unsigned getVectorInsertExtractBaseCost() const;
227   unsigned getCacheLineSize() const override { return CacheLineSize; }
228   unsigned getPrefetchDistance() const override { return PrefetchDistance; }
229   unsigned getMinPrefetchStride(unsigned NumMemAccesses,
230                                 unsigned NumStridedMemAccesses,
231                                 unsigned NumPrefetches,
232                                 bool HasCall) const override {
233     return MinPrefetchStride;
234   }
235   unsigned getMaxPrefetchIterationsAhead() const override {
236     return MaxPrefetchIterationsAhead;
237   }
238   Align getPrefFunctionAlignment() const {
239     return PrefFunctionAlignment;
240   }
241   Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
242 
243   unsigned getMaxBytesForLoopAlignment() const {
244     return MaxBytesForLoopAlignment;
245   }
246 
247   unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
248   unsigned getMinimumJumpTableEntries() const {
249     return MinimumJumpTableEntries;
250   }
251 
252   /// CPU has TBI (top byte of addresses is ignored during HW address
253   /// translation) and OS enables it.
254   bool supportsAddressTopByteIgnored() const;
255 
256   bool isLittleEndian() const { return IsLittle; }
257 
258   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
259   bool isTargetIOS() const { return TargetTriple.isiOS(); }
260   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
261   bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
262   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
263   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
264   bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); }
265 
266   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
267   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
268   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
269 
270   bool isTargetILP32() const {
271     return TargetTriple.isArch32Bit() ||
272            TargetTriple.getEnvironment() == Triple::GNUILP32;
273   }
274 
275   bool useAA() const override;
276 
277   bool addrSinkUsingGEPs() const override {
278     // Keeping GEPs inbounds is important for exploiting AArch64
279     // addressing-modes in ILP32 mode.
280     return useAA() || isTargetILP32();
281   }
282 
283   bool useSmallAddressing() const {
284     switch (TLInfo.getTargetMachine().getCodeModel()) {
285       case CodeModel::Kernel:
286         // Kernel is currently allowed only for Fuchsia targets,
287         // where it is the same as Small for almost all purposes.
288       case CodeModel::Small:
289         return true;
290       default:
291         return false;
292     }
293   }
294 
295   /// ParseSubtargetFeatures - Parses features string setting specified
296   /// subtarget options.  Definition of function is auto generated by tblgen.
297   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
298 
299   /// ClassifyGlobalReference - Find the target operand flags that describe
300   /// how a global value should be referenced for the current subtarget.
301   unsigned ClassifyGlobalReference(const GlobalValue *GV,
302                                    const TargetMachine &TM) const;
303 
304   unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
305                                            const TargetMachine &TM) const;
306 
307   /// This function is design to compatible with the function def in other
308   /// targets and escape build error about the virtual function def in base
309   /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it.
310   unsigned char
311   classifyGlobalFunctionReference(const GlobalValue *GV) const override {
312     return 0;
313   }
314 
315   void overrideSchedPolicy(MachineSchedPolicy &Policy,
316                            unsigned NumRegionInstrs) const override;
317   void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
318                              SDep &Dep,
319                              const TargetSchedModel *SchedModel) const override;
320 
321   bool enableEarlyIfConversion() const override;
322 
323   std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
324 
325   bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const {
326     switch (CC) {
327     case CallingConv::C:
328     case CallingConv::Fast:
329     case CallingConv::Swift:
330     case CallingConv::SwiftTail:
331       return isTargetWindows();
332     case CallingConv::PreserveNone:
333       return IsVarArg && isTargetWindows();
334     case CallingConv::Win64:
335       return true;
336     default:
337       return false;
338     }
339   }
340 
341   /// Return whether FrameLowering should always set the "extended frame
342   /// present" bit in FP, or set it based on a symbol in the runtime.
343   bool swiftAsyncContextIsDynamicallySet() const {
344     // Older OS versions (particularly system unwinders) are confused by the
345     // Swift extended frame, so when building code that might be run on them we
346     // must dynamically query the concurrency library to determine whether
347     // extended frames should be flagged as present.
348     const Triple &TT = getTargetTriple();
349 
350     unsigned Major = TT.getOSVersion().getMajor();
351     switch(TT.getOS()) {
352     default:
353       return false;
354     case Triple::IOS:
355     case Triple::TvOS:
356       return Major < 15;
357     case Triple::WatchOS:
358       return Major < 8;
359     case Triple::MacOSX:
360     case Triple::Darwin:
361       return Major < 12;
362     }
363   }
364 
365   void mirFileLoaded(MachineFunction &MF) const override;
366 
367   // Return the known range for the bit length of SVE data registers. A value
368   // of 0 means nothing is known about that particular limit beyong what's
369   // implied by the architecture.
370   unsigned getMaxSVEVectorSizeInBits() const {
371     assert(isSVEorStreamingSVEAvailable() &&
372            "Tried to get SVE vector length without SVE support!");
373     return MaxSVEVectorSizeInBits;
374   }
375 
376   unsigned getMinSVEVectorSizeInBits() const {
377     assert(isSVEorStreamingSVEAvailable() &&
378            "Tried to get SVE vector length without SVE support!");
379     return MinSVEVectorSizeInBits;
380   }
381 
382   bool useSVEForFixedLengthVectors() const {
383     if (!isSVEorStreamingSVEAvailable())
384       return false;
385 
386     // Prefer NEON unless larger SVE registers are available.
387     return !isNeonAvailable() || getMinSVEVectorSizeInBits() >= 256;
388   }
389 
390   bool useSVEForFixedLengthVectors(EVT VT) const {
391     if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector())
392       return false;
393     return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock ||
394            !isNeonAvailable();
395   }
396 
397   unsigned getVScaleForTuning() const { return VScaleForTuning; }
398 
399   TailFoldingOpts getSVETailFoldingDefaultOpts() const {
400     return DefaultSVETFOpts;
401   }
402 
403   const char* getChkStkName() const {
404     if (isWindowsArm64EC())
405       return "#__chkstk_arm64ec";
406     return "__chkstk";
407   }
408 
409   const char* getSecurityCheckCookieName() const {
410     if (isWindowsArm64EC())
411       return "#__security_check_cookie_arm64ec";
412     return "__security_check_cookie";
413   }
414 
415   /// Choose a method of checking LR before performing a tail call.
416   AArch64PAuth::AuthCheckMethod
417   getAuthenticatedLRCheckMethod(const MachineFunction &MF) const;
418 
419   /// Compute the integer discriminator for a given BlockAddress constant, if
420   /// blockaddress signing is enabled, or std::nullopt otherwise.
421   /// Blockaddress signing is controlled by the function attribute
422   /// "ptrauth-indirect-gotos" on the parent function.
423   /// Note that this assumes the discriminator is independent of the indirect
424   /// goto branch site itself, i.e., it's the same for all BlockAddresses in
425   /// a function.
426   std::optional<uint16_t>
427   getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const;
428 
429   const PseudoSourceValue *getAddressCheckPSV() const {
430     return AddressCheckPSV.get();
431   }
432 
433 private:
434   /// Pseudo value representing memory load performed to check an address.
435   ///
436   /// This load operation is solely used for its side-effects: if the address
437   /// is not mapped (or not readable), it triggers CPU exception, otherwise
438   /// execution proceeds and the value is not used.
439   class AddressCheckPseudoSourceValue : public PseudoSourceValue {
440   public:
441     AddressCheckPseudoSourceValue(const TargetMachine &TM)
442         : PseudoSourceValue(TargetCustom, TM) {}
443 
444     bool isConstant(const MachineFrameInfo *) const override { return false; }
445     bool isAliased(const MachineFrameInfo *) const override { return true; }
446     bool mayAlias(const MachineFrameInfo *) const override { return true; }
447     void printCustom(raw_ostream &OS) const override { OS << "AddressCheck"; }
448   };
449 
450   std::unique_ptr<AddressCheckPseudoSourceValue> AddressCheckPSV;
451 };
452 } // End llvm namespace
453 
454 #endif
455