xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86Subtarget.h (revision b9128a37faafede823eb456aa65a11ac69997284)
1 //===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the X86 specific subclass of TargetSubtargetInfo.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
14 #define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
15 
16 #include "X86FrameLowering.h"
17 #include "X86ISelLowering.h"
18 #include "X86InstrInfo.h"
19 #include "X86SelectionDAGInfo.h"
20 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
21 #include "llvm/CodeGen/TargetSubtargetInfo.h"
22 #include "llvm/IR/CallingConv.h"
23 #include "llvm/TargetParser/Triple.h"
24 #include <climits>
25 #include <memory>
26 
27 #define GET_SUBTARGETINFO_HEADER
28 #include "X86GenSubtargetInfo.inc"
29 
30 namespace llvm {
31 
32 class CallLowering;
33 class GlobalValue;
34 class InstructionSelector;
35 class LegalizerInfo;
36 class RegisterBankInfo;
37 class StringRef;
38 class TargetMachine;
39 
40 /// The X86 backend supports a number of different styles of PIC.
41 ///
42 namespace PICStyles {
43 
44 enum class Style {
45   StubPIC,          // Used on i386-darwin in pic mode.
46   GOT,              // Used on 32 bit elf on when in pic mode.
47   RIPRel,           // Used on X86-64 when in pic mode.
48   None              // Set when not in pic mode.
49 };
50 
51 } // end namespace PICStyles
52 
53 class X86Subtarget final : public X86GenSubtargetInfo {
54   enum X86SSEEnum {
55     NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
56   };
57 
58   enum X863DNowEnum {
59     NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
60   };
61 
62   /// Which PIC style to use
63   PICStyles::Style PICStyle;
64 
65   const TargetMachine &TM;
66 
67   /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
68   X86SSEEnum X86SSELevel = NoSSE;
69 
70   /// MMX, 3DNow, 3DNow Athlon, or none supported.
71   X863DNowEnum X863DNowLevel = NoThreeDNow;
72 
73 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
74   bool ATTRIBUTE = DEFAULT;
75 #include "X86GenSubtargetInfo.inc"
76   /// The minimum alignment known to hold of the stack frame on
77   /// entry to the function and which must be maintained by every function.
78   Align stackAlignment = Align(4);
79 
80   Align TileConfigAlignment = Align(4);
81 
82   /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
83   ///
84   // FIXME: this is a known good value for Yonah. How about others?
85   unsigned MaxInlineSizeThreshold = 128;
86 
87   /// What processor and OS we're targeting.
88   Triple TargetTriple;
89 
90   /// GlobalISel related APIs.
91   std::unique_ptr<CallLowering> CallLoweringInfo;
92   std::unique_ptr<LegalizerInfo> Legalizer;
93   std::unique_ptr<RegisterBankInfo> RegBankInfo;
94   std::unique_ptr<InstructionSelector> InstSelector;
95 
96   /// Override the stack alignment.
97   MaybeAlign StackAlignOverride;
98 
99   /// Preferred vector width from function attribute.
100   unsigned PreferVectorWidthOverride;
101 
102   /// Resolved preferred vector width from function attribute and subtarget
103   /// features.
104   unsigned PreferVectorWidth = UINT32_MAX;
105 
106   /// Required vector width from function attribute.
107   unsigned RequiredVectorWidth;
108 
109   X86SelectionDAGInfo TSInfo;
110   // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
111   // X86TargetLowering needs.
112   X86InstrInfo InstrInfo;
113   X86TargetLowering TLInfo;
114   X86FrameLowering FrameLowering;
115 
116 public:
117   /// This constructor initializes the data members to match that
118   /// of the specified triple.
119   ///
120   X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
121                const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
122                unsigned PreferVectorWidthOverride,
123                unsigned RequiredVectorWidth);
124 
125   const X86TargetLowering *getTargetLowering() const override {
126     return &TLInfo;
127   }
128 
129   const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
130 
131   const X86FrameLowering *getFrameLowering() const override {
132     return &FrameLowering;
133   }
134 
135   const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
136     return &TSInfo;
137   }
138 
139   const X86RegisterInfo *getRegisterInfo() const override {
140     return &getInstrInfo()->getRegisterInfo();
141   }
142 
143   unsigned getTileConfigSize() const { return 64; }
144   Align getTileConfigAlignment() const { return TileConfigAlignment; }
145 
146   /// Returns the minimum alignment known to hold of the
147   /// stack frame on entry to the function and which must be maintained by every
148   /// function for this subtarget.
149   Align getStackAlignment() const { return stackAlignment; }
150 
151   /// Returns the maximum memset / memcpy size
152   /// that still makes it profitable to inline the call.
153   unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
154 
155   /// ParseSubtargetFeatures - Parses features string setting specified
156   /// subtarget options.  Definition of function is auto generated by tblgen.
157   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
158 
159   /// Methods used by Global ISel
160   const CallLowering *getCallLowering() const override;
161   InstructionSelector *getInstructionSelector() const override;
162   const LegalizerInfo *getLegalizerInfo() const override;
163   const RegisterBankInfo *getRegBankInfo() const override;
164 
165 private:
166   /// Initialize the full set of dependencies so we can use an initializer
167   /// list for X86Subtarget.
168   X86Subtarget &initializeSubtargetDependencies(StringRef CPU,
169                                                 StringRef TuneCPU,
170                                                 StringRef FS);
171   void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
172 
173 public:
174 
175 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
176   bool GETTER() const { return ATTRIBUTE; }
177 #include "X86GenSubtargetInfo.inc"
178 
179   /// Is this x86_64 with the ILP32 programming model (x32 ABI)?
180   bool isTarget64BitILP32() const {
181     return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl());
182   }
183 
184   /// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
185   bool isTarget64BitLP64() const {
186     return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl());
187   }
188 
189   PICStyles::Style getPICStyle() const { return PICStyle; }
190   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
191 
192   bool canUseCMPXCHG8B() const { return hasCX8(); }
193   bool canUseCMPXCHG16B() const {
194     // CX16 is just the CPUID bit, instruction requires 64-bit mode too.
195     return hasCX16() && is64Bit();
196   }
197   // SSE codegen depends on cmovs, and all SSE1+ processors support them.
198   // All 64-bit processors support cmov.
199   bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); }
200   bool hasSSE1() const { return X86SSELevel >= SSE1; }
201   bool hasSSE2() const { return X86SSELevel >= SSE2; }
202   bool hasSSE3() const { return X86SSELevel >= SSE3; }
203   bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
204   bool hasSSE41() const { return X86SSELevel >= SSE41; }
205   bool hasSSE42() const { return X86SSELevel >= SSE42; }
206   bool hasAVX() const { return X86SSELevel >= AVX; }
207   bool hasAVX2() const { return X86SSELevel >= AVX2; }
208   bool hasAVX512() const { return X86SSELevel >= AVX512; }
209   bool hasInt256() const { return hasAVX2(); }
210   bool hasMMX() const { return X863DNowLevel >= MMX; }
211   bool hasThreeDNow() const { return X863DNowLevel >= ThreeDNow; }
212   bool hasThreeDNowA() const { return X863DNowLevel >= ThreeDNowA; }
213   bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
214   bool hasPrefetchW() const {
215     // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
216     // its own CPUID bit as part of deprecating 3DNow. Intel eventually added
217     // it and KNL has another that prefetches to L2 cache. We assume the
218     // L1 version exists if the L2 version does.
219     return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1();
220   }
221   bool hasSSEPrefetch() const {
222     // We implicitly enable these when we have a write prefix supporting cache
223     // level OR if we have prfchw, but don't already have a read prefetch from
224     // 3dnow.
225     return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() ||
226            hasPREFETCHI();
227   }
228   bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
229   // These are generic getters that OR together all of the thunk types
230   // supported by the subtarget. Therefore useIndirectThunk*() will return true
231   // if any respective thunk feature is enabled.
232   bool useIndirectThunkCalls() const {
233     return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
234   }
235   bool useIndirectThunkBranches() const {
236     return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
237   }
238 
239   unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
240   unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
241 
242   // Helper functions to determine when we should allow widening to 512-bit
243   // during codegen.
244   // TODO: Currently we're always allowing widening on CPUs without VLX,
245   // because for many cases we don't have a better option.
246   bool canExtendTo512DQ() const {
247     return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
248   }
249   bool canExtendTo512BW() const  {
250     return hasBWI() && canExtendTo512DQ();
251   }
252 
253   bool hasNoDomainDelay() const { return NoDomainDelay; }
254   bool hasNoDomainDelayMov() const {
255       return hasNoDomainDelay() || NoDomainDelayMov;
256   }
257   bool hasNoDomainDelayBlend() const {
258       return hasNoDomainDelay() || NoDomainDelayBlend;
259   }
260   bool hasNoDomainDelayShuffle() const {
261       return hasNoDomainDelay() || NoDomainDelayShuffle;
262   }
263 
264   // If there are no 512-bit vectors and we prefer not to use 512-bit registers,
265   // disable them in the legalizer.
266   bool useAVX512Regs() const {
267     return hasAVX512() && hasEVEX512() &&
268            (canExtendTo512DQ() || RequiredVectorWidth > 256);
269   }
270 
271   bool useLight256BitInstructions() const {
272     return getPreferVectorWidth() >= 256 || AllowLight256Bit;
273   }
274 
275   bool useBWIRegs() const {
276     return hasBWI() && useAVX512Regs();
277   }
278 
279   bool isXRaySupported() const override { return is64Bit(); }
280 
281   /// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for
282   /// no-sse2). There isn't any reason to disable it if the target processor
283   /// supports it.
284   bool hasCLFLUSH() const { return hasSSE2() || is64Bit(); }
285 
286   /// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
287   /// no-sse2). There isn't any reason to disable it if the target processor
288   /// supports it.
289   bool hasMFence() const { return hasSSE2() || is64Bit(); }
290 
291   const Triple &getTargetTriple() const { return TargetTriple; }
292 
293   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
294   bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
295   bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
296   bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
297   bool isTargetPS() const { return TargetTriple.isPS(); }
298 
299   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
300   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
301   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
302 
303   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
304   bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
305   bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
306   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
307   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
308   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
309   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
310   bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
311   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
312 
313   bool isTargetWindowsMSVC() const {
314     return TargetTriple.isWindowsMSVCEnvironment();
315   }
316 
317   bool isTargetWindowsCoreCLR() const {
318     return TargetTriple.isWindowsCoreCLREnvironment();
319   }
320 
321   bool isTargetWindowsCygwin() const {
322     return TargetTriple.isWindowsCygwinEnvironment();
323   }
324 
325   bool isTargetWindowsGNU() const {
326     return TargetTriple.isWindowsGNUEnvironment();
327   }
328 
329   bool isTargetWindowsItanium() const {
330     return TargetTriple.isWindowsItaniumEnvironment();
331   }
332 
333   bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
334 
335   bool isOSWindows() const { return TargetTriple.isOSWindows(); }
336 
337   bool isTargetWin64() const { return Is64Bit && isOSWindows(); }
338 
339   bool isTargetWin32() const { return !Is64Bit && isOSWindows(); }
340 
341   bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
342   bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
343 
344   bool isPICStyleStubPIC() const {
345     return PICStyle == PICStyles::Style::StubPIC;
346   }
347 
348   bool isPositionIndependent() const;
349 
350   bool isCallingConvWin64(CallingConv::ID CC) const {
351     switch (CC) {
352     // On Win64, all these conventions just use the default convention.
353     case CallingConv::C:
354     case CallingConv::Fast:
355     case CallingConv::Tail:
356     case CallingConv::Swift:
357     case CallingConv::SwiftTail:
358     case CallingConv::X86_FastCall:
359     case CallingConv::X86_StdCall:
360     case CallingConv::X86_ThisCall:
361     case CallingConv::X86_VectorCall:
362     case CallingConv::Intel_OCL_BI:
363       return isTargetWin64();
364     // This convention allows using the Win64 convention on other targets.
365     case CallingConv::Win64:
366       return true;
367     // This convention allows using the SysV convention on Windows targets.
368     case CallingConv::X86_64_SysV:
369       return false;
370     // Otherwise, who knows what this is.
371     default:
372       return false;
373     }
374   }
375 
376   /// Classify a global variable reference for the current subtarget according
377   /// to how we should reference it in a non-pcrel context.
378   unsigned char classifyLocalReference(const GlobalValue *GV) const;
379 
380   unsigned char classifyGlobalReference(const GlobalValue *GV,
381                                         const Module &M) const;
382   unsigned char classifyGlobalReference(const GlobalValue *GV) const;
383 
384   /// Classify a global function reference for the current subtarget.
385   unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
386                                                 const Module &M) const;
387   unsigned char
388   classifyGlobalFunctionReference(const GlobalValue *GV) const override;
389 
390   /// Classify a blockaddress reference for the current subtarget according to
391   /// how we should reference it in a non-pcrel context.
392   unsigned char classifyBlockAddressReference() const;
393 
394   /// Return true if the subtarget allows calls to immediate address.
395   bool isLegalToCallImmediateAddr() const;
396 
397   /// Return whether FrameLowering should always set the "extended frame
398   /// present" bit in FP, or set it based on a symbol in the runtime.
399   bool swiftAsyncContextIsDynamicallySet() const {
400     // Older OS versions (particularly system unwinders) are confused by the
401     // Swift extended frame, so when building code that might be run on them we
402     // must dynamically query the concurrency library to determine whether
403     // extended frames should be flagged as present.
404     const Triple &TT = getTargetTriple();
405 
406     unsigned Major = TT.getOSVersion().getMajor();
407     switch(TT.getOS()) {
408     default:
409       return false;
410     case Triple::IOS:
411     case Triple::TvOS:
412       return Major < 15;
413     case Triple::WatchOS:
414       return Major < 8;
415     case Triple::MacOSX:
416     case Triple::Darwin:
417       return Major < 12;
418     }
419   }
420 
421   /// If we are using indirect thunks, we need to expand indirectbr to avoid it
422   /// lowering to an actual indirect jump.
423   bool enableIndirectBrExpand() const override {
424     return useIndirectThunkBranches();
425   }
426 
427   /// Enable the MachineScheduler pass for all X86 subtargets.
428   bool enableMachineScheduler() const override { return true; }
429 
430   bool enableEarlyIfConversion() const override;
431 
432   void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
433                               &Mutations) const override;
434 
435   AntiDepBreakMode getAntiDepBreakMode() const override {
436     return TargetSubtargetInfo::ANTIDEP_CRITICAL;
437   }
438 };
439 
440 } // end namespace llvm
441 
442 #endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H
443