xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the X86 specific subclass of TargetSubtargetInfo.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
14 #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
15 
16 #include "X86FrameLowering.h"
17 #include "X86ISelLowering.h"
18 #include "X86InstrInfo.h"
19 #include "X86SelectionDAGInfo.h"
20 #include "llvm/CodeGen/TargetSubtargetInfo.h"
21 #include "llvm/IR/CallingConv.h"
22 #include "llvm/TargetParser/Triple.h"
23 #include <climits>
24 #include <memory>
25 
26 #define GET_SUBTARGETINFO_HEADER
27 #include "X86GenSubtargetInfo.inc"
28 
29 namespace llvm {
30 
31 class CallLowering;
32 class GlobalValue;
33 class InstructionSelector;
34 class LegalizerInfo;
35 class RegisterBankInfo;
36 class StringRef;
37 class TargetMachine;
38 
39 /// The X86 backend supports a number of different styles of PIC.
40 ///
41 namespace PICStyles {
42 
43 enum class Style {
44   StubPIC,          // Used on i386-darwin in pic mode.
45   GOT,              // Used on 32 bit elf on when in pic mode.
46   RIPRel,           // Used on X86-64 when in pic mode.
47   None              // Set when not in pic mode.
48 };
49 
50 } // end namespace PICStyles
51 
52 class X86Subtarget final : public X86GenSubtargetInfo {
53   enum X86SSEEnum {
54     NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
55   };
56 
57   /// Which PIC style to use
58   PICStyles::Style PICStyle;
59 
60   const TargetMachine &TM;
61 
62   /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
63   X86SSEEnum X86SSELevel = NoSSE;
64 
65 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
66   bool ATTRIBUTE = DEFAULT;
67 #include "X86GenSubtargetInfo.inc"
68   /// The minimum alignment known to hold of the stack frame on
69   /// entry to the function and which must be maintained by every function.
70   Align stackAlignment = Align(4);
71 
72   Align TileConfigAlignment = Align(4);
73 
74   /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
75   ///
76   // FIXME: this is a known good value for Yonah. How about others?
77   unsigned MaxInlineSizeThreshold = 128;
78 
79   /// What processor and OS we're targeting.
80   Triple TargetTriple;
81 
82   /// GlobalISel related APIs.
83   std::unique_ptr<CallLowering> CallLoweringInfo;
84   std::unique_ptr<LegalizerInfo> Legalizer;
85   std::unique_ptr<RegisterBankInfo> RegBankInfo;
86   std::unique_ptr<InstructionSelector> InstSelector;
87 
88   /// Override the stack alignment.
89   MaybeAlign StackAlignOverride;
90 
91   /// Preferred vector width from function attribute.
92   unsigned PreferVectorWidthOverride;
93 
94   /// Resolved preferred vector width from function attribute and subtarget
95   /// features.
96   unsigned PreferVectorWidth = UINT32_MAX;
97 
98   /// Required vector width from function attribute.
99   unsigned RequiredVectorWidth;
100 
101   X86SelectionDAGInfo TSInfo;
102   // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
103   // X86TargetLowering needs.
104   X86InstrInfo InstrInfo;
105   X86TargetLowering TLInfo;
106   X86FrameLowering FrameLowering;
107 
108 public:
109   /// This constructor initializes the data members to match that
110   /// of the specified triple.
111   ///
112   X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
113                const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
114                unsigned PreferVectorWidthOverride,
115                unsigned RequiredVectorWidth);
116   ~X86Subtarget() override;
117 
getTargetLowering()118   const X86TargetLowering *getTargetLowering() const override {
119     return &TLInfo;
120   }
121 
getInstrInfo()122   const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
123 
getFrameLowering()124   const X86FrameLowering *getFrameLowering() const override {
125     return &FrameLowering;
126   }
127 
getSelectionDAGInfo()128   const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
129     return &TSInfo;
130   }
131 
getRegisterInfo()132   const X86RegisterInfo *getRegisterInfo() const override {
133     return &getInstrInfo()->getRegisterInfo();
134   }
135 
getTileConfigSize()136   unsigned getTileConfigSize() const { return 64; }
getTileConfigAlignment()137   Align getTileConfigAlignment() const { return TileConfigAlignment; }
138 
139   /// Returns the minimum alignment known to hold of the
140   /// stack frame on entry to the function and which must be maintained by every
141   /// function for this subtarget.
getStackAlignment()142   Align getStackAlignment() const { return stackAlignment; }
143 
144   /// Returns the maximum memset / memcpy size
145   /// that still makes it profitable to inline the call.
getMaxInlineSizeThreshold()146   unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
147 
148   /// ParseSubtargetFeatures - Parses features string setting specified
149   /// subtarget options.  Definition of function is auto generated by tblgen.
150   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
151 
152   /// Methods used by Global ISel
153   const CallLowering *getCallLowering() const override;
154   InstructionSelector *getInstructionSelector() const override;
155   const LegalizerInfo *getLegalizerInfo() const override;
156   const RegisterBankInfo *getRegBankInfo() const override;
157 
158 private:
159   /// Initialize the full set of dependencies so we can use an initializer
160   /// list for X86Subtarget.
161   X86Subtarget &initializeSubtargetDependencies(StringRef CPU,
162                                                 StringRef TuneCPU,
163                                                 StringRef FS);
164   void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
165 
166 public:
167 
168 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
169   bool GETTER() const { return ATTRIBUTE; }
170 #include "X86GenSubtargetInfo.inc"
171 
172   /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
isTarget64BitILP32()173   bool isTarget64BitILP32() const {
174     return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl());
175   }
176 
177   /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
isTarget64BitLP64()178   bool isTarget64BitLP64() const {
179     return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl());
180   }
181 
getPICStyle()182   PICStyles::Style getPICStyle() const { return PICStyle; }
setPICStyle(PICStyles::Style Style)183   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
184 
canUseCMPXCHG8B()185   bool canUseCMPXCHG8B() const { return hasCX8(); }
canUseCMPXCHG16B()186   bool canUseCMPXCHG16B() const {
187     // CX16 is just the CPUID bit, instruction requires 64-bit mode too.
188     return hasCX16() && is64Bit();
189   }
190   // SSE codegen depends on cmovs, and all SSE1+ processors support them.
191   // All 64-bit processors support cmov.
canUseCMOV()192   bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); }
hasSSE1()193   bool hasSSE1() const { return X86SSELevel >= SSE1; }
hasSSE2()194   bool hasSSE2() const { return X86SSELevel >= SSE2; }
hasSSE3()195   bool hasSSE3() const { return X86SSELevel >= SSE3; }
hasSSSE3()196   bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
hasSSE41()197   bool hasSSE41() const { return X86SSELevel >= SSE41; }
hasSSE42()198   bool hasSSE42() const { return X86SSELevel >= SSE42; }
hasAVX()199   bool hasAVX() const { return X86SSELevel >= AVX; }
hasAVX2()200   bool hasAVX2() const { return X86SSELevel >= AVX2; }
hasAVX512()201   bool hasAVX512() const { return X86SSELevel >= AVX512; }
hasInt256()202   bool hasInt256() const { return hasAVX2(); }
hasAnyFMA()203   bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
hasPrefetchW()204   bool hasPrefetchW() const {
205     // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
206     // its own CPUID bit as part of deprecating 3DNow.
207     return hasPRFCHW();
208   }
hasSSEPrefetch()209   bool hasSSEPrefetch() const {
210     // We also implicitly enable these when we have a write prefix supporting
211     // cache level OR if we have prfchw.
212     return hasSSE1() || hasPRFCHW() || hasPREFETCHI();
213   }
canUseLAHFSAHF()214   bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
215   // These are generic getters that OR together all of the thunk types
216   // supported by the subtarget. Therefore useIndirectThunk*() will return true
217   // if any respective thunk feature is enabled.
useIndirectThunkCalls()218   bool useIndirectThunkCalls() const {
219     return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
220   }
useIndirectThunkBranches()221   bool useIndirectThunkBranches() const {
222     return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
223   }
224 
getPreferVectorWidth()225   unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
getRequiredVectorWidth()226   unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
227 
228   // Helper functions to determine when we should allow widening to 512-bit
229   // during codegen.
230   // TODO: Currently we're always allowing widening on CPUs without VLX,
231   // because for many cases we don't have a better option.
canExtendTo512DQ()232   bool canExtendTo512DQ() const {
233     return hasAVX512() && hasEVEX512() &&
234            (!hasVLX() || getPreferVectorWidth() >= 512);
235   }
canExtendTo512BW()236   bool canExtendTo512BW() const  {
237     return hasBWI() && canExtendTo512DQ();
238   }
239 
hasNoDomainDelay()240   bool hasNoDomainDelay() const { return NoDomainDelay; }
hasNoDomainDelayMov()241   bool hasNoDomainDelayMov() const {
242       return hasNoDomainDelay() || NoDomainDelayMov;
243   }
hasNoDomainDelayBlend()244   bool hasNoDomainDelayBlend() const {
245       return hasNoDomainDelay() || NoDomainDelayBlend;
246   }
hasNoDomainDelayShuffle()247   bool hasNoDomainDelayShuffle() const {
248       return hasNoDomainDelay() || NoDomainDelayShuffle;
249   }
250 
251   // If there are no 512-bit vectors and we prefer not to use 512-bit registers,
252   // disable them in the legalizer.
useAVX512Regs()253   bool useAVX512Regs() const {
254     return hasAVX512() && hasEVEX512() &&
255            (canExtendTo512DQ() || RequiredVectorWidth > 256);
256   }
257 
useLight256BitInstructions()258   bool useLight256BitInstructions() const {
259     return getPreferVectorWidth() >= 256 || AllowLight256Bit;
260   }
261 
useBWIRegs()262   bool useBWIRegs() const {
263     return hasBWI() && useAVX512Regs();
264   }
265 
266   // Returns true if the destination register of a BSF/BSR instruction is
267   // not touched if the source register is zero.
268   // NOTE: i32->i64 implicit zext isn't guaranteed by BSR/BSF pass through.
hasBitScanPassThrough()269   bool hasBitScanPassThrough() const { return is64Bit(); }
270 
isXRaySupported()271   bool isXRaySupported() const override { return is64Bit(); }
272 
273   /// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for
274   /// no-sse2). There isn't any reason to disable it if the target processor
275   /// supports it.
hasCLFLUSH()276   bool hasCLFLUSH() const { return hasSSE2() || is64Bit(); }
277 
278   /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
279   /// no-sse2). There isn't any reason to disable it if the target processor
280   /// supports it.
hasMFence()281   bool hasMFence() const { return hasSSE2() || is64Bit(); }
282 
283   /// Avoid use of `mfence` for`fence seq_cst`, and instead use `lock or`.
avoidMFence()284   bool avoidMFence() const { return is64Bit(); }
285 
getTargetTriple()286   const Triple &getTargetTriple() const { return TargetTriple; }
287 
isTargetDarwin()288   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
isTargetFreeBSD()289   bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
isTargetDragonFly()290   bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
isTargetSolaris()291   bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
isTargetPS()292   bool isTargetPS() const { return TargetTriple.isPS(); }
293 
isTargetELF()294   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
isTargetCOFF()295   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
isTargetMachO()296   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
297 
isTargetLinux()298   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
isTargetKFreeBSD()299   bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
isTargetGlibc()300   bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
isTargetAndroid()301   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
isTargetNaCl()302   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
isTargetNaCl32()303   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
isTargetNaCl64()304   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
isTargetMCU()305   bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
isTargetFuchsia()306   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
307 
isTargetWindowsMSVC()308   bool isTargetWindowsMSVC() const {
309     return TargetTriple.isWindowsMSVCEnvironment();
310   }
311 
isTargetWindowsCoreCLR()312   bool isTargetWindowsCoreCLR() const {
313     return TargetTriple.isWindowsCoreCLREnvironment();
314   }
315 
isTargetWindowsCygwin()316   bool isTargetWindowsCygwin() const {
317     return TargetTriple.isWindowsCygwinEnvironment();
318   }
319 
isTargetWindowsGNU()320   bool isTargetWindowsGNU() const {
321     return TargetTriple.isWindowsGNUEnvironment();
322   }
323 
isTargetWindowsItanium()324   bool isTargetWindowsItanium() const {
325     return TargetTriple.isWindowsItaniumEnvironment();
326   }
327 
isTargetCygMing()328   bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
329 
isUEFI()330   bool isUEFI() const { return TargetTriple.isUEFI(); }
331 
isOSWindows()332   bool isOSWindows() const { return TargetTriple.isOSWindows(); }
333 
isTargetUEFI64()334   bool isTargetUEFI64() const { return Is64Bit && isUEFI(); }
335 
isTargetWin64()336   bool isTargetWin64() const { return Is64Bit && isOSWindows(); }
337 
isTargetWin32()338   bool isTargetWin32() const { return !Is64Bit && isOSWindows(); }
339 
isPICStyleGOT()340   bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
isPICStyleRIPRel()341   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
342 
isPICStyleStubPIC()343   bool isPICStyleStubPIC() const {
344     return PICStyle == PICStyles::Style::StubPIC;
345   }
346 
347   bool isPositionIndependent() const;
348 
isCallingConvWin64(CallingConv::ID CC)349   bool isCallingConvWin64(CallingConv::ID CC) const {
350     switch (CC) {
351     // On Win64, all these conventions just use the default convention.
352     case CallingConv::C:
353     case CallingConv::Fast:
354     case CallingConv::Tail:
355       return isTargetWin64() || isTargetUEFI64();
356     case CallingConv::Swift:
357     case CallingConv::SwiftTail:
358     case CallingConv::X86_FastCall:
359     case CallingConv::X86_StdCall:
360     case CallingConv::X86_ThisCall:
361     case CallingConv::X86_VectorCall:
362     case CallingConv::Intel_OCL_BI:
363       return isTargetWin64();
364     // This convention allows using the Win64 convention on other targets.
365     case CallingConv::Win64:
366       return true;
367     // This convention allows using the SysV convention on Windows targets.
368     case CallingConv::X86_64_SysV:
369       return false;
370     // Otherwise, who knows what this is.
371     default:
372       return false;
373     }
374   }
375 
376   /// Classify a global variable reference for the current subtarget according
377   /// to how we should reference it in a non-pcrel context.
378   unsigned char classifyLocalReference(const GlobalValue *GV) const;
379 
380   unsigned char classifyGlobalReference(const GlobalValue *GV,
381                                         const Module &M) const;
382   unsigned char classifyGlobalReference(const GlobalValue *GV) const;
383 
384   /// Classify a global function reference for the current subtarget.
385   unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
386                                                 const Module &M) const;
387   unsigned char
388   classifyGlobalFunctionReference(const GlobalValue *GV) const override;
389 
390   /// Classify a blockaddress reference for the current subtarget according to
391   /// how we should reference it in a non-pcrel context.
392   unsigned char classifyBlockAddressReference() const;
393 
394   /// Return true if the subtarget allows calls to immediate address.
395   bool isLegalToCallImmediateAddr() const;
396 
397   /// Return whether FrameLowering should always set the "extended frame
398   /// present" bit in FP, or set it based on a symbol in the runtime.
swiftAsyncContextIsDynamicallySet()399   bool swiftAsyncContextIsDynamicallySet() const {
400     // Older OS versions (particularly system unwinders) are confused by the
401     // Swift extended frame, so when building code that might be run on them we
402     // must dynamically query the concurrency library to determine whether
403     // extended frames should be flagged as present.
404     const Triple &TT = getTargetTriple();
405 
406     unsigned Major = TT.getOSVersion().getMajor();
407     switch(TT.getOS()) {
408     default:
409       return false;
410     case Triple::IOS:
411     case Triple::TvOS:
412       return Major < 15;
413     case Triple::WatchOS:
414       return Major < 8;
415     case Triple::MacOSX:
416     case Triple::Darwin:
417       return Major < 12;
418     }
419   }
420 
421   /// If we are using indirect thunks, we need to expand indirectbr to avoid it
422   /// lowering to an actual indirect jump.
enableIndirectBrExpand()423   bool enableIndirectBrExpand() const override {
424     return useIndirectThunkBranches();
425   }
426 
427   /// Enable the MachineScheduler pass for all X86 subtargets.
enableMachineScheduler()428   bool enableMachineScheduler() const override { return true; }
429 
430   bool enableEarlyIfConversion() const override;
431 
432   void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
433                               &Mutations) const override;
434 
getAntiDepBreakMode()435   AntiDepBreakMode getAntiDepBreakMode() const override {
436     return TargetSubtargetInfo::ANTIDEP_CRITICAL;
437   }
438 };
439 
440 } // end namespace llvm
441 
442 #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H
443