xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h (revision f59662030254e1bc4f7f135e7617e94b46385893)
1 //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMD GCN specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16 
17 #include "AMDGPUCallLowering.h"
18 #include "AMDGPURegisterBankInfo.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIFrameLowering.h"
21 #include "SIISelLowering.h"
22 #include "SIInstrInfo.h"
23 #include "Utils/AMDGPUBaseInfo.h"
24 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
25 
26 #define GET_SUBTARGETINFO_HEADER
27 #include "AMDGPUGenSubtargetInfo.inc"
28 
29 namespace llvm {
30 
31 class GCNTargetMachine;
32 
33 class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
34                            public AMDGPUSubtarget {
35 public:
36   using AMDGPUSubtarget::getMaxWavesPerEU;
37 
38   // Following 2 enums are documented at:
39   //   - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
40   enum class TrapHandlerAbi {
41     NONE   = 0x00,
42     AMDHSA = 0x01,
43   };
44 
45   enum class TrapID {
46     LLVMAMDHSATrap      = 0x02,
47     LLVMAMDHSADebugTrap = 0x03,
48   };
49 
50 private:
51   /// GlobalISel related APIs.
52   std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
53   std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
54   std::unique_ptr<InstructionSelector> InstSelector;
55   std::unique_ptr<LegalizerInfo> Legalizer;
56   std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
57 
58 protected:
59   // Basic subtarget description.
60   Triple TargetTriple;
61   AMDGPU::IsaInfo::AMDGPUTargetID TargetID;
62   unsigned Gen = INVALID;
63   InstrItineraryData InstrItins;
64   int LDSBankCount = 0;
65   unsigned MaxPrivateElementSize = 0;
66 
67   // Possibly statically set by tablegen, but may want to be overridden.
68   bool FastDenormalF32 = false;
69   bool HalfRate64Ops = false;
70   bool FullRate64Ops = false;
71 
72   // Dynamically set bits that enable features.
73   bool FlatForGlobal = false;
74   bool AutoWaitcntBeforeBarrier = false;
75   bool BackOffBarrier = false;
76   bool UnalignedScratchAccess = false;
77   bool UnalignedAccessMode = false;
78   bool HasApertureRegs = false;
79   bool SupportsXNACK = false;
80 
81   // This should not be used directly. 'TargetID' tracks the dynamic settings
82   // for XNACK.
83   bool EnableXNACK = false;
84 
85   bool EnableTgSplit = false;
86   bool EnableCuMode = false;
87   bool TrapHandler = false;
88 
89   // Used as options.
90   bool EnableLoadStoreOpt = false;
91   bool EnableUnsafeDSOffsetFolding = false;
92   bool EnableSIScheduler = false;
93   bool EnableDS128 = false;
94   bool EnablePRTStrictNull = false;
95   bool DumpCode = false;
96 
97   // Subtarget statically properties set by tablegen
98   bool FP64 = false;
99   bool FMA = false;
100   bool MIMG_R128 = false;
101   bool CIInsts = false;
102   bool GFX8Insts = false;
103   bool GFX9Insts = false;
104   bool GFX90AInsts = false;
105   bool GFX940Insts = false;
106   bool GFX10Insts = false;
107   bool GFX11Insts = false;
108   bool GFX10_3Insts = false;
109   bool GFX7GFX8GFX9Insts = false;
110   bool SGPRInitBug = false;
111   bool UserSGPRInit16Bug = false;
112   bool NegativeScratchOffsetBug = false;
113   bool NegativeUnalignedScratchOffsetBug = false;
114   bool HasSMemRealTime = false;
115   bool HasIntClamp = false;
116   bool HasFmaMixInsts = false;
117   bool HasMovrel = false;
118   bool HasVGPRIndexMode = false;
119   bool HasScalarStores = false;
120   bool HasScalarAtomics = false;
121   bool HasSDWAOmod = false;
122   bool HasSDWAScalar = false;
123   bool HasSDWASdst = false;
124   bool HasSDWAMac = false;
125   bool HasSDWAOutModsVOPC = false;
126   bool HasDPP = false;
127   bool HasDPP8 = false;
128   bool Has64BitDPP = false;
129   bool HasPackedFP32Ops = false;
130   bool HasImageInsts = false;
131   bool HasExtendedImageInsts = false;
132   bool HasR128A16 = false;
133   bool HasA16 = false;
134   bool HasG16 = false;
135   bool HasNSAEncoding = false;
136   bool HasPartialNSAEncoding = false;
137   bool GFX10_AEncoding = false;
138   bool GFX10_BEncoding = false;
139   bool HasDLInsts = false;
140   bool HasFmacF64Inst = false;
141   bool HasDot1Insts = false;
142   bool HasDot2Insts = false;
143   bool HasDot3Insts = false;
144   bool HasDot4Insts = false;
145   bool HasDot5Insts = false;
146   bool HasDot6Insts = false;
147   bool HasDot7Insts = false;
148   bool HasDot8Insts = false;
149   bool HasDot9Insts = false;
150   bool HasDot10Insts = false;
151   bool HasMAIInsts = false;
152   bool HasFP8Insts = false;
153   bool HasPkFmacF16Inst = false;
154   bool HasAtomicDsPkAdd16Insts = false;
155   bool HasAtomicFlatPkAdd16Insts = false;
156   bool HasAtomicFaddRtnInsts = false;
157   bool HasAtomicFaddNoRtnInsts = false;
158   bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
159   bool HasAtomicBufferGlobalPkAddF16Insts = false;
160   bool HasAtomicGlobalPkAddBF16Inst = false;
161   bool HasFlatAtomicFaddF32Inst = false;
162   bool SupportsSRAMECC = false;
163 
164   // This should not be used directly. 'TargetID' tracks the dynamic settings
165   // for SRAMECC.
166   bool EnableSRAMECC = false;
167 
168   bool HasNoSdstCMPX = false;
169   bool HasVscnt = false;
170   bool HasGetWaveIdInst = false;
171   bool HasSMemTimeInst = false;
172   bool HasShaderCyclesRegister = false;
173   bool HasVOP3Literal = false;
174   bool HasNoDataDepHazard = false;
175   bool FlatAddressSpace = false;
176   bool FlatInstOffsets = false;
177   bool FlatGlobalInsts = false;
178   bool FlatScratchInsts = false;
179   bool ScalarFlatScratchInsts = false;
180   bool HasArchitectedFlatScratch = false;
181   bool EnableFlatScratch = false;
182   bool HasArchitectedSGPRs = false;
183   bool AddNoCarryInsts = false;
184   bool HasUnpackedD16VMem = false;
185   bool LDSMisalignedBug = false;
186   bool HasMFMAInlineLiteralBug = false;
187   bool UnalignedBufferAccess = false;
188   bool UnalignedDSAccess = false;
189   bool HasPackedTID = false;
190   bool ScalarizeGlobal = false;
191 
192   bool HasVcmpxPermlaneHazard = false;
193   bool HasVMEMtoScalarWriteHazard = false;
194   bool HasSMEMtoVectorWriteHazard = false;
195   bool HasInstFwdPrefetchBug = false;
196   bool HasVcmpxExecWARHazard = false;
197   bool HasLdsBranchVmemWARHazard = false;
198   bool HasNSAtoVMEMBug = false;
199   bool HasNSAClauseBug = false;
200   bool HasOffset3fBug = false;
201   bool HasFlatSegmentOffsetBug = false;
202   bool HasImageStoreD16Bug = false;
203   bool HasImageGather4D16Bug = false;
204   bool HasGFX11FullVGPRs = false;
205   bool HasMADIntraFwdBug = false;
206   bool HasVOPDInsts = false;
207   bool HasVALUTransUseHazard = false;
208   bool HasForceStoreSC0SC1 = false;
209 
210   // Dummy feature to use for assembler in tablegen.
211   bool FeatureDisable = false;
212 
213   SelectionDAGTargetInfo TSInfo;
214 private:
215   SIInstrInfo InstrInfo;
216   SITargetLowering TLInfo;
217   SIFrameLowering FrameLowering;
218 
219 public:
220   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
221                const GCNTargetMachine &TM);
222   ~GCNSubtarget() override;
223 
224   GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
225                                                    StringRef GPU, StringRef FS);
226 
227   const SIInstrInfo *getInstrInfo() const override {
228     return &InstrInfo;
229   }
230 
231   const SIFrameLowering *getFrameLowering() const override {
232     return &FrameLowering;
233   }
234 
235   const SITargetLowering *getTargetLowering() const override {
236     return &TLInfo;
237   }
238 
239   const SIRegisterInfo *getRegisterInfo() const override {
240     return &InstrInfo.getRegisterInfo();
241   }
242 
243   const CallLowering *getCallLowering() const override {
244     return CallLoweringInfo.get();
245   }
246 
247   const InlineAsmLowering *getInlineAsmLowering() const override {
248     return InlineAsmLoweringInfo.get();
249   }
250 
251   InstructionSelector *getInstructionSelector() const override {
252     return InstSelector.get();
253   }
254 
255   const LegalizerInfo *getLegalizerInfo() const override {
256     return Legalizer.get();
257   }
258 
259   const AMDGPURegisterBankInfo *getRegBankInfo() const override {
260     return RegBankInfo.get();
261   }
262 
263   const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const {
264     return TargetID;
265   }
266 
267   // Nothing implemented, just prevent crashes on use.
268   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
269     return &TSInfo;
270   }
271 
272   const InstrItineraryData *getInstrItineraryData() const override {
273     return &InstrItins;
274   }
275 
276   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
277 
278   Generation getGeneration() const {
279     return (Generation)Gen;
280   }
281 
282   unsigned getMaxWaveScratchSize() const {
283     // See COMPUTE_TMPRING_SIZE.WAVESIZE.
284     if (getGeneration() < GFX11) {
285       // 13-bit field in units of 256-dword.
286       return (256 * 4) * ((1 << 13) - 1);
287     }
288     // 15-bit field in units of 64-dword.
289     return (64 * 4) * ((1 << 15) - 1);
290   }
291 
292   /// Return the number of high bits known to be zero for a frame index.
293   unsigned getKnownHighZeroBitsForFrameIndex() const {
294     return llvm::countl_zero(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
295   }
296 
297   int getLDSBankCount() const {
298     return LDSBankCount;
299   }
300 
301   unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
302     return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
303   }
304 
305   unsigned getConstantBusLimit(unsigned Opcode) const;
306 
307   /// Returns if the result of this instruction with a 16-bit result returned in
308   /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
309   /// the original value.
310   bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
311 
312   bool supportsWGP() const { return getGeneration() >= GFX10; }
313 
314   bool hasIntClamp() const {
315     return HasIntClamp;
316   }
317 
318   bool hasFP64() const {
319     return FP64;
320   }
321 
322   bool hasMIMG_R128() const {
323     return MIMG_R128;
324   }
325 
326   bool hasHWFP64() const {
327     return FP64;
328   }
329 
330   bool hasHalfRate64Ops() const {
331     return HalfRate64Ops;
332   }
333 
334   bool hasFullRate64Ops() const {
335     return FullRate64Ops;
336   }
337 
338   bool hasAddr64() const {
339     return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
340   }
341 
342   bool hasFlat() const {
343     return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS);
344   }
345 
346   // Return true if the target only has the reverse operand versions of VALU
347   // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
348   bool hasOnlyRevVALUShifts() const {
349     return getGeneration() >= VOLCANIC_ISLANDS;
350   }
351 
352   bool hasFractBug() const {
353     return getGeneration() == SOUTHERN_ISLANDS;
354   }
355 
356   bool hasBFE() const {
357     return true;
358   }
359 
360   bool hasBFI() const {
361     return true;
362   }
363 
364   bool hasBFM() const {
365     return hasBFE();
366   }
367 
368   bool hasBCNT(unsigned Size) const {
369     return true;
370   }
371 
372   bool hasFFBL() const {
373     return true;
374   }
375 
376   bool hasFFBH() const {
377     return true;
378   }
379 
380   bool hasMed3_16() const {
381     return getGeneration() >= AMDGPUSubtarget::GFX9;
382   }
383 
384   bool hasMin3Max3_16() const {
385     return getGeneration() >= AMDGPUSubtarget::GFX9;
386   }
387 
388   bool hasFmaMixInsts() const {
389     return HasFmaMixInsts;
390   }
391 
392   bool hasCARRY() const {
393     return true;
394   }
395 
396   bool hasFMA() const {
397     return FMA;
398   }
399 
400   bool hasSwap() const {
401     return GFX9Insts;
402   }
403 
404   bool hasScalarPackInsts() const {
405     return GFX9Insts;
406   }
407 
408   bool hasScalarMulHiInsts() const {
409     return GFX9Insts;
410   }
411 
412   TrapHandlerAbi getTrapHandlerAbi() const {
413     return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE;
414   }
415 
416   bool supportsGetDoorbellID() const {
417     // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
418     return getGeneration() >= GFX9;
419   }
420 
421   /// True if the offset field of DS instructions works as expected. On SI, the
422   /// offset uses a 16-bit adder and does not always wrap properly.
423   bool hasUsableDSOffset() const {
424     return getGeneration() >= SEA_ISLANDS;
425   }
426 
427   bool unsafeDSOffsetFoldingEnabled() const {
428     return EnableUnsafeDSOffsetFolding;
429   }
430 
431   /// Condition output from div_scale is usable.
432   bool hasUsableDivScaleConditionOutput() const {
433     return getGeneration() != SOUTHERN_ISLANDS;
434   }
435 
436   /// Extra wait hazard is needed in some cases before
437   /// s_cbranch_vccnz/s_cbranch_vccz.
438   bool hasReadVCCZBug() const {
439     return getGeneration() <= SEA_ISLANDS;
440   }
441 
442   /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
443   bool partialVCCWritesUpdateVCCZ() const {
444     return getGeneration() >= GFX10;
445   }
446 
447   /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
448   /// was written by a VALU instruction.
449   bool hasSMRDReadVALUDefHazard() const {
450     return getGeneration() == SOUTHERN_ISLANDS;
451   }
452 
453   /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
454   /// SGPR was written by a VALU Instruction.
455   bool hasVMEMReadSGPRVALUDefHazard() const {
456     return getGeneration() >= VOLCANIC_ISLANDS;
457   }
458 
459   bool hasRFEHazards() const {
460     return getGeneration() >= VOLCANIC_ISLANDS;
461   }
462 
463   /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
464   unsigned getSetRegWaitStates() const {
465     return getGeneration() <= SEA_ISLANDS ? 1 : 2;
466   }
467 
468   bool dumpCode() const {
469     return DumpCode;
470   }
471 
472   /// Return the amount of LDS that can be used that will not restrict the
473   /// occupancy lower than WaveCount.
474   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
475                                            const Function &) const;
476 
477   bool supportsMinMaxDenormModes() const {
478     return getGeneration() >= AMDGPUSubtarget::GFX9;
479   }
480 
481   /// \returns If target supports S_DENORM_MODE.
482   bool hasDenormModeInst() const {
483     return getGeneration() >= AMDGPUSubtarget::GFX10;
484   }
485 
486   bool useFlatForGlobal() const {
487     return FlatForGlobal;
488   }
489 
490   /// \returns If target supports ds_read/write_b128 and user enables generation
491   /// of ds_read/write_b128.
492   bool useDS128() const {
493     return CIInsts && EnableDS128;
494   }
495 
496   /// \return If target supports ds_read/write_b96/128.
497   bool hasDS96AndDS128() const {
498     return CIInsts;
499   }
500 
501   /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
502   bool haveRoundOpsF64() const {
503     return CIInsts;
504   }
505 
506   /// \returns If MUBUF instructions always perform range checking, even for
507   /// buffer resources used for private memory access.
508   bool privateMemoryResourceIsRangeChecked() const {
509     return getGeneration() < AMDGPUSubtarget::GFX9;
510   }
511 
512   /// \returns If target requires PRT Struct NULL support (zero result registers
513   /// for sparse texture support).
514   bool usePRTStrictNull() const {
515     return EnablePRTStrictNull;
516   }
517 
518   bool hasAutoWaitcntBeforeBarrier() const {
519     return AutoWaitcntBeforeBarrier;
520   }
521 
522   /// \returns true if the target supports backing off of s_barrier instructions
523   /// when an exception is raised.
524   bool supportsBackOffBarrier() const {
525     return BackOffBarrier;
526   }
527 
528   bool hasUnalignedBufferAccess() const {
529     return UnalignedBufferAccess;
530   }
531 
532   bool hasUnalignedBufferAccessEnabled() const {
533     return UnalignedBufferAccess && UnalignedAccessMode;
534   }
535 
536   bool hasUnalignedDSAccess() const {
537     return UnalignedDSAccess;
538   }
539 
540   bool hasUnalignedDSAccessEnabled() const {
541     return UnalignedDSAccess && UnalignedAccessMode;
542   }
543 
544   bool hasUnalignedScratchAccess() const {
545     return UnalignedScratchAccess;
546   }
547 
548   bool hasUnalignedAccessMode() const {
549     return UnalignedAccessMode;
550   }
551 
552   bool hasApertureRegs() const {
553     return HasApertureRegs;
554   }
555 
556   bool isTrapHandlerEnabled() const {
557     return TrapHandler;
558   }
559 
560   bool isXNACKEnabled() const {
561     return TargetID.isXnackOnOrAny();
562   }
563 
564   bool isTgSplitEnabled() const {
565     return EnableTgSplit;
566   }
567 
568   bool isCuModeEnabled() const {
569     return EnableCuMode;
570   }
571 
572   bool hasFlatAddressSpace() const {
573     return FlatAddressSpace;
574   }
575 
576   bool hasFlatScrRegister() const {
577     return hasFlatAddressSpace();
578   }
579 
580   bool hasFlatInstOffsets() const {
581     return FlatInstOffsets;
582   }
583 
584   bool hasFlatGlobalInsts() const {
585     return FlatGlobalInsts;
586   }
587 
588   bool hasFlatScratchInsts() const {
589     return FlatScratchInsts;
590   }
591 
592   // Check if target supports ST addressing mode with FLAT scratch instructions.
593   // The ST addressing mode means no registers are used, either VGPR or SGPR,
594   // but only immediate offset is swizzled and added to the FLAT scratch base.
595   bool hasFlatScratchSTMode() const {
596     return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());
597   }
598 
599   bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
600 
601   bool hasScalarFlatScratchInsts() const {
602     return ScalarFlatScratchInsts;
603   }
604 
605   bool enableFlatScratch() const {
606     return flatScratchIsArchitected() ||
607            (EnableFlatScratch && hasFlatScratchInsts());
608   }
609 
610   bool hasGlobalAddTidInsts() const {
611     return GFX10_BEncoding;
612   }
613 
614   bool hasAtomicCSub() const {
615     return GFX10_BEncoding;
616   }
617 
618   bool hasMultiDwordFlatScratchAddressing() const {
619     return getGeneration() >= GFX9;
620   }
621 
622   bool hasFlatSegmentOffsetBug() const {
623     return HasFlatSegmentOffsetBug;
624   }
625 
626   bool hasFlatLgkmVMemCountInOrder() const {
627     return getGeneration() > GFX9;
628   }
629 
630   bool hasD16LoadStore() const {
631     return getGeneration() >= GFX9;
632   }
633 
634   bool d16PreservesUnusedBits() const {
635     return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
636   }
637 
638   bool hasD16Images() const {
639     return getGeneration() >= VOLCANIC_ISLANDS;
640   }
641 
642   /// Return if most LDS instructions have an m0 use that require m0 to be
643   /// initialized.
644   bool ldsRequiresM0Init() const {
645     return getGeneration() < GFX9;
646   }
647 
648   // True if the hardware rewinds and replays GWS operations if a wave is
649   // preempted.
650   //
651   // If this is false, a GWS operation requires testing if a nack set the
652   // MEM_VIOL bit, and repeating if so.
653   bool hasGWSAutoReplay() const {
654     return getGeneration() >= GFX9;
655   }
656 
657   /// \returns if target has ds_gws_sema_release_all instruction.
658   bool hasGWSSemaReleaseAll() const {
659     return CIInsts;
660   }
661 
662   /// \returns true if the target has integer add/sub instructions that do not
663   /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
664   /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
665   /// for saturation.
666   bool hasAddNoCarry() const {
667     return AddNoCarryInsts;
668   }
669 
670   bool hasUnpackedD16VMem() const {
671     return HasUnpackedD16VMem;
672   }
673 
674   // Covers VS/PS/CS graphics shaders
675   bool isMesaGfxShader(const Function &F) const {
676     return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
677   }
678 
679   bool hasMad64_32() const {
680     return getGeneration() >= SEA_ISLANDS;
681   }
682 
683   bool hasSDWAOmod() const {
684     return HasSDWAOmod;
685   }
686 
687   bool hasSDWAScalar() const {
688     return HasSDWAScalar;
689   }
690 
691   bool hasSDWASdst() const {
692     return HasSDWASdst;
693   }
694 
695   bool hasSDWAMac() const {
696     return HasSDWAMac;
697   }
698 
699   bool hasSDWAOutModsVOPC() const {
700     return HasSDWAOutModsVOPC;
701   }
702 
703   bool hasDLInsts() const {
704     return HasDLInsts;
705   }
706 
707   bool hasFmacF64Inst() const { return HasFmacF64Inst; }
708 
709   bool hasDot1Insts() const {
710     return HasDot1Insts;
711   }
712 
713   bool hasDot2Insts() const {
714     return HasDot2Insts;
715   }
716 
717   bool hasDot3Insts() const {
718     return HasDot3Insts;
719   }
720 
721   bool hasDot4Insts() const {
722     return HasDot4Insts;
723   }
724 
725   bool hasDot5Insts() const {
726     return HasDot5Insts;
727   }
728 
729   bool hasDot6Insts() const {
730     return HasDot6Insts;
731   }
732 
733   bool hasDot7Insts() const {
734     return HasDot7Insts;
735   }
736 
737   bool hasDot8Insts() const {
738     return HasDot8Insts;
739   }
740 
741   bool hasDot9Insts() const {
742     return HasDot9Insts;
743   }
744 
745   bool hasDot10Insts() const {
746     return HasDot10Insts;
747   }
748 
749   bool hasMAIInsts() const {
750     return HasMAIInsts;
751   }
752 
753   bool hasFP8Insts() const {
754     return HasFP8Insts;
755   }
756 
757   bool hasPkFmacF16Inst() const {
758     return HasPkFmacF16Inst;
759   }
760 
761   bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; }
762 
763   bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; }
764 
765   bool hasAtomicFaddInsts() const {
766     return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;
767   }
768 
769   bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; }
770 
771   bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }
772 
773   bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const {
774     return HasAtomicBufferGlobalPkAddF16NoRtnInsts;
775   }
776 
777   bool hasAtomicBufferGlobalPkAddF16Insts() const {
778     return HasAtomicBufferGlobalPkAddF16Insts;
779   }
780 
781   bool hasAtomicGlobalPkAddBF16Inst() const {
782     return HasAtomicGlobalPkAddBF16Inst;
783   }
784 
785   bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
786 
787   bool hasNoSdstCMPX() const {
788     return HasNoSdstCMPX;
789   }
790 
791   bool hasVscnt() const {
792     return HasVscnt;
793   }
794 
795   bool hasGetWaveIdInst() const {
796     return HasGetWaveIdInst;
797   }
798 
799   bool hasSMemTimeInst() const {
800     return HasSMemTimeInst;
801   }
802 
803   bool hasShaderCyclesRegister() const {
804     return HasShaderCyclesRegister;
805   }
806 
807   bool hasVOP3Literal() const {
808     return HasVOP3Literal;
809   }
810 
811   bool hasNoDataDepHazard() const {
812     return HasNoDataDepHazard;
813   }
814 
815   bool vmemWriteNeedsExpWaitcnt() const {
816     return getGeneration() < SEA_ISLANDS;
817   }
818 
819   bool hasInstPrefetch() const { return getGeneration() >= GFX10; }
820 
821   // Scratch is allocated in 256 dword per wave blocks for the entire
822   // wavefront. When viewed from the perspective of an arbitrary workitem, this
823   // is 4-byte aligned.
824   //
825   // Only 4-byte alignment is really needed to access anything. Transformations
826   // on the pointer value itself may rely on the alignment / known low bits of
827   // the pointer. Set this to something above the minimum to avoid needing
828   // dynamic realignment in common cases.
829   Align getStackAlignment() const { return Align(16); }
830 
831   bool enableMachineScheduler() const override {
832     return true;
833   }
834 
835   bool useAA() const override;
836 
837   bool enableSubRegLiveness() const override {
838     return true;
839   }
840 
841   void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
842   bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
843 
844   // static wrappers
845   static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
846 
847   // XXX - Why is this here if it isn't in the default pass set?
848   bool enableEarlyIfConversion() const override {
849     return true;
850   }
851 
852   void overrideSchedPolicy(MachineSchedPolicy &Policy,
853                            unsigned NumRegionInstrs) const override;
854 
855   unsigned getMaxNumUserSGPRs() const {
856     return 16;
857   }
858 
859   bool hasSMemRealTime() const {
860     return HasSMemRealTime;
861   }
862 
863   bool hasMovrel() const {
864     return HasMovrel;
865   }
866 
867   bool hasVGPRIndexMode() const {
868     return HasVGPRIndexMode;
869   }
870 
871   bool useVGPRIndexMode() const;
872 
873   bool hasScalarCompareEq64() const {
874     return getGeneration() >= VOLCANIC_ISLANDS;
875   }
876 
877   bool hasScalarStores() const {
878     return HasScalarStores;
879   }
880 
881   bool hasScalarAtomics() const {
882     return HasScalarAtomics;
883   }
884 
885   bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
886 
887   /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
888   bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
889 
890   /// \returns true if the subtarget has the v_permlane64_b32 instruction.
891   bool hasPermLane64() const { return getGeneration() >= GFX11; }
892 
893   bool hasDPP() const {
894     return HasDPP;
895   }
896 
897   bool hasDPPBroadcasts() const {
898     return HasDPP && getGeneration() < GFX10;
899   }
900 
901   bool hasDPPWavefrontShifts() const {
902     return HasDPP && getGeneration() < GFX10;
903   }
904 
905   bool hasDPP8() const {
906     return HasDPP8;
907   }
908 
909   bool has64BitDPP() const {
910     return Has64BitDPP;
911   }
912 
913   bool hasPackedFP32Ops() const {
914     return HasPackedFP32Ops;
915   }
916 
917   bool hasFmaakFmamkF32Insts() const {
918     return getGeneration() >= GFX10 || hasGFX940Insts();
919   }
920 
921   bool hasImageInsts() const {
922     return HasImageInsts;
923   }
924 
925   bool hasExtendedImageInsts() const {
926     return HasExtendedImageInsts;
927   }
928 
929   bool hasR128A16() const {
930     return HasR128A16;
931   }
932 
933   bool hasA16() const { return HasA16; }
934 
935   bool hasG16() const { return HasG16; }
936 
937   bool hasOffset3fBug() const {
938     return HasOffset3fBug;
939   }
940 
941   bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
942 
943   bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }
944 
945   bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
946 
947   bool hasNSAEncoding() const { return HasNSAEncoding; }
948 
949   bool hasPartialNSAEncoding() const { return HasPartialNSAEncoding; }
950 
951   unsigned getNSAMaxSize() const { return AMDGPU::getNSAMaxSize(*this); }
952 
953   bool hasGFX10_AEncoding() const {
954     return GFX10_AEncoding;
955   }
956 
957   bool hasGFX10_BEncoding() const {
958     return GFX10_BEncoding;
959   }
960 
961   bool hasGFX10_3Insts() const {
962     return GFX10_3Insts;
963   }
964 
965   bool hasMadF16() const;
966 
967   bool hasMovB64() const { return GFX940Insts; }
968 
969   bool hasLshlAddB64() const { return GFX940Insts; }
970 
971   bool enableSIScheduler() const {
972     return EnableSIScheduler;
973   }
974 
975   bool loadStoreOptEnabled() const {
976     return EnableLoadStoreOpt;
977   }
978 
979   bool hasSGPRInitBug() const {
980     return SGPRInitBug;
981   }
982 
983   bool hasUserSGPRInit16Bug() const {
984     return UserSGPRInit16Bug && isWave32();
985   }
986 
987   bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; }
988 
989   bool hasNegativeUnalignedScratchOffsetBug() const {
990     return NegativeUnalignedScratchOffsetBug;
991   }
992 
993   bool hasMFMAInlineLiteralBug() const {
994     return HasMFMAInlineLiteralBug;
995   }
996 
997   bool has12DWordStoreHazard() const {
998     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
999   }
1000 
1001   // \returns true if the subtarget supports DWORDX3 load/store instructions.
1002   bool hasDwordx3LoadStores() const {
1003     return CIInsts;
1004   }
1005 
1006   bool hasReadM0MovRelInterpHazard() const {
1007     return getGeneration() == AMDGPUSubtarget::GFX9;
1008   }
1009 
1010   bool hasReadM0SendMsgHazard() const {
1011     return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
1012            getGeneration() <= AMDGPUSubtarget::GFX9;
1013   }
1014 
1015   bool hasReadM0LdsDmaHazard() const {
1016     return getGeneration() == AMDGPUSubtarget::GFX9;
1017   }
1018 
1019   bool hasReadM0LdsDirectHazard() const {
1020     return getGeneration() == AMDGPUSubtarget::GFX9;
1021   }
1022 
1023   bool hasVcmpxPermlaneHazard() const {
1024     return HasVcmpxPermlaneHazard;
1025   }
1026 
1027   bool hasVMEMtoScalarWriteHazard() const {
1028     return HasVMEMtoScalarWriteHazard;
1029   }
1030 
1031   bool hasSMEMtoVectorWriteHazard() const {
1032     return HasSMEMtoVectorWriteHazard;
1033   }
1034 
1035   bool hasLDSMisalignedBug() const {
1036     return LDSMisalignedBug && !EnableCuMode;
1037   }
1038 
1039   bool hasInstFwdPrefetchBug() const {
1040     return HasInstFwdPrefetchBug;
1041   }
1042 
1043   bool hasVcmpxExecWARHazard() const {
1044     return HasVcmpxExecWARHazard;
1045   }
1046 
1047   bool hasLdsBranchVmemWARHazard() const {
1048     return HasLdsBranchVmemWARHazard;
1049   }
1050 
1051   // Shift amount of a 64 bit shift cannot be a highest allocated register
1052   // if also at the end of the allocation block.
1053   bool hasShift64HighRegBug() const {
1054     return GFX90AInsts && !GFX940Insts;
1055   }
1056 
1057   // Has one cycle hazard on transcendental instruction feeding a
1058   // non transcendental VALU.
1059   bool hasTransForwardingHazard() const { return GFX940Insts; }
1060 
1061   // Has one cycle hazard on a VALU instruction partially writing dst with
1062   // a shift of result bits feeding another VALU instruction.
1063   bool hasDstSelForwardingHazard() const { return GFX940Insts; }
1064 
1065   // Cannot use op_sel with v_dot instructions.
1066   bool hasDOTOpSelHazard() const { return GFX940Insts; }
1067 
1068   // Does not have HW interlocs for VALU writing and then reading SGPRs.
1069   bool hasVDecCoExecHazard() const {
1070     return GFX940Insts;
1071   }
1072 
1073   bool hasNSAtoVMEMBug() const {
1074     return HasNSAtoVMEMBug;
1075   }
1076 
1077   bool hasNSAClauseBug() const { return HasNSAClauseBug; }
1078 
1079   bool hasHardClauses() const { return getGeneration() >= GFX10; }
1080 
1081   bool hasGFX90AInsts() const { return GFX90AInsts; }
1082 
1083   bool hasFPAtomicToDenormModeHazard() const {
1084     return getGeneration() == GFX10;
1085   }
1086 
1087   bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1088 
1089   bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1090 
1091   bool hasVALUPartialForwardingHazard() const {
1092     return getGeneration() >= GFX11;
1093   }
1094 
1095   bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; }
1096 
1097   bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; }
1098 
1099   bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; }
1100 
1101   /// Return if operations acting on VGPR tuples require even alignment.
1102   bool needsAlignedVGPRs() const { return GFX90AInsts; }
1103 
1104   /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
1105   bool hasSPackHL() const { return GFX11Insts; }
1106 
1107   /// Return true if the target's EXP instruction has the COMPR flag, which
1108   /// affects the meaning of the EN (enable) bits.
1109   bool hasCompressedExport() const { return !GFX11Insts; }
1110 
1111   /// Return true if the target's EXP instruction supports the NULL export
1112   /// target.
1113   bool hasNullExportTarget() const { return !GFX11Insts; }
1114 
1115   bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; }
1116 
1117   bool hasVOPDInsts() const { return HasVOPDInsts; }
1118 
1119   bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; }
1120 
1121   /// Return true if the target has the S_DELAY_ALU instruction.
1122   bool hasDelayAlu() const { return GFX11Insts; }
1123 
1124   bool hasPackedTID() const { return HasPackedTID; }
1125 
1126   // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1127   // hasGFX90AInsts is also true.
1128   bool hasGFX940Insts() const { return GFX940Insts; }
1129 
1130   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
1131   /// SGPRs
1132   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
1133 
1134   /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
1135   /// VGPRs
1136   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
1137 
1138   /// Return occupancy for the given function. Used LDS and a number of
1139   /// registers if provided.
1140   /// Note, occupancy can be affected by the scratch allocation as well, but
1141   /// we do not have enough information to compute it.
1142   unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
1143                             unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
1144 
1145   /// \returns true if the flat_scratch register should be initialized with the
1146   /// pointer to the wave's scratch memory rather than a size and offset.
1147   bool flatScratchIsPointer() const {
1148     return getGeneration() >= AMDGPUSubtarget::GFX9;
1149   }
1150 
1151   /// \returns true if the flat_scratch register is initialized by the HW.
1152   /// In this case it is readonly.
1153   bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; }
1154 
1155   /// \returns true if the architected SGPRs are enabled.
1156   bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; }
1157 
1158   /// \returns true if the machine has merged shaders in which s0-s7 are
1159   /// reserved by the hardware and user SGPRs start at s8
1160   bool hasMergedShaders() const {
1161     return getGeneration() >= GFX9;
1162   }
1163 
1164   // \returns true if the target supports the pre-NGG legacy geometry path.
1165   bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1166 
1167   /// \returns SGPR allocation granularity supported by the subtarget.
1168   unsigned getSGPRAllocGranule() const {
1169     return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
1170   }
1171 
1172   /// \returns SGPR encoding granularity supported by the subtarget.
1173   unsigned getSGPREncodingGranule() const {
1174     return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
1175   }
1176 
1177   /// \returns Total number of SGPRs supported by the subtarget.
1178   unsigned getTotalNumSGPRs() const {
1179     return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
1180   }
1181 
1182   /// \returns Addressable number of SGPRs supported by the subtarget.
1183   unsigned getAddressableNumSGPRs() const {
1184     return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
1185   }
1186 
1187   /// \returns Minimum number of SGPRs that meets the given number of waves per
1188   /// execution unit requirement supported by the subtarget.
1189   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
1190     return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
1191   }
1192 
1193   /// \returns Maximum number of SGPRs that meets the given number of waves per
1194   /// execution unit requirement supported by the subtarget.
1195   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
1196     return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
1197   }
1198 
1199   /// \returns Reserved number of SGPRs. This is common
1200   /// utility function called by MachineFunction and
1201   /// Function variants of getReservedNumSGPRs.
1202   unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
1203   /// \returns Reserved number of SGPRs for given machine function \p MF.
1204   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
1205 
1206   /// \returns Reserved number of SGPRs for given function \p F.
1207   unsigned getReservedNumSGPRs(const Function &F) const;
1208 
1209   /// \returns max num SGPRs. This is the common utility
1210   /// function called by MachineFunction and Function
1211   /// variants of getMaxNumSGPRs.
1212   unsigned getBaseMaxNumSGPRs(const Function &F,
1213                               std::pair<unsigned, unsigned> WavesPerEU,
1214                               unsigned PreloadedSGPRs,
1215                               unsigned ReservedNumSGPRs) const;
1216 
1217   /// \returns Maximum number of SGPRs that meets number of waves per execution
1218   /// unit requirement for function \p MF, or number of SGPRs explicitly
1219   /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
1220   ///
1221   /// \returns Value that meets number of waves per execution unit requirement
1222   /// if explicitly requested value cannot be converted to integer, violates
1223   /// subtarget's specifications, or does not meet number of waves per execution
1224   /// unit requirement.
1225   unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
1226 
1227   /// \returns Maximum number of SGPRs that meets number of waves per execution
1228   /// unit requirement for function \p F, or number of SGPRs explicitly
1229   /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
1230   ///
1231   /// \returns Value that meets number of waves per execution unit requirement
1232   /// if explicitly requested value cannot be converted to integer, violates
1233   /// subtarget's specifications, or does not meet number of waves per execution
1234   /// unit requirement.
1235   unsigned getMaxNumSGPRs(const Function &F) const;
1236 
1237   /// \returns VGPR allocation granularity supported by the subtarget.
1238   unsigned getVGPRAllocGranule() const {
1239     return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
1240   }
1241 
1242   /// \returns VGPR encoding granularity supported by the subtarget.
1243   unsigned getVGPREncodingGranule() const {
1244     return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
1245   }
1246 
1247   /// \returns Total number of VGPRs supported by the subtarget.
1248   unsigned getTotalNumVGPRs() const {
1249     return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
1250   }
1251 
1252   /// \returns Addressable number of VGPRs supported by the subtarget.
1253   unsigned getAddressableNumVGPRs() const {
1254     return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
1255   }
1256 
1257   /// \returns the minimum number of VGPRs that will prevent achieving more than
1258   /// the specified number of waves \p WavesPerEU.
1259   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
1260     return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
1261   }
1262 
1263   /// \returns the maximum number of VGPRs that can be used and still achieved
1264   /// at least the specified number of waves \p WavesPerEU.
1265   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1266     return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1267   }
1268 
1269   /// \returns max num VGPRs. This is the common utility function
1270   /// called by MachineFunction and Function variants of getMaxNumVGPRs.
1271   unsigned getBaseMaxNumVGPRs(const Function &F,
1272                               std::pair<unsigned, unsigned> WavesPerEU) const;
1273   /// \returns Maximum number of VGPRs that meets number of waves per execution
1274   /// unit requirement for function \p F, or number of VGPRs explicitly
1275   /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
1276   ///
1277   /// \returns Value that meets number of waves per execution unit requirement
1278   /// if explicitly requested value cannot be converted to integer, violates
1279   /// subtarget's specifications, or does not meet number of waves per execution
1280   /// unit requirement.
1281   unsigned getMaxNumVGPRs(const Function &F) const;
1282 
1283   unsigned getMaxNumAGPRs(const Function &F) const {
1284     return getMaxNumVGPRs(F);
1285   }
1286 
1287   /// \returns Maximum number of VGPRs that meets number of waves per execution
1288   /// unit requirement for function \p MF, or number of VGPRs explicitly
1289   /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1290   ///
1291   /// \returns Value that meets number of waves per execution unit requirement
1292   /// if explicitly requested value cannot be converted to integer, violates
1293   /// subtarget's specifications, or does not meet number of waves per execution
1294   /// unit requirement.
1295   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1296 
1297   void getPostRAMutations(
1298       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1299       const override;
1300 
1301   std::unique_ptr<ScheduleDAGMutation>
1302   createFillMFMAShadowMutation(const TargetInstrInfo *TII) const;
1303 
1304   bool isWave32() const {
1305     return getWavefrontSize() == 32;
1306   }
1307 
1308   bool isWave64() const {
1309     return getWavefrontSize() == 64;
1310   }
1311 
1312   const TargetRegisterClass *getBoolRC() const {
1313     return getRegisterInfo()->getBoolRC();
1314   }
1315 
1316   /// \returns Maximum number of work groups per compute unit supported by the
1317   /// subtarget and limited by given \p FlatWorkGroupSize.
1318   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1319     return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1320   }
1321 
1322   /// \returns Minimum flat work group size supported by the subtarget.
1323   unsigned getMinFlatWorkGroupSize() const override {
1324     return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
1325   }
1326 
1327   /// \returns Maximum flat work group size supported by the subtarget.
1328   unsigned getMaxFlatWorkGroupSize() const override {
1329     return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
1330   }
1331 
1332   /// \returns Number of waves per execution unit required to support the given
1333   /// \p FlatWorkGroupSize.
1334   unsigned
1335   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1336     return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1337   }
1338 
1339   /// \returns Minimum number of waves per execution unit supported by the
1340   /// subtarget.
1341   unsigned getMinWavesPerEU() const override {
1342     return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1343   }
1344 
1345   void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1346                              SDep &Dep) const override;
1347 
1348   // \returns true if it's beneficial on this subtarget for the scheduler to
1349   // cluster stores as well as loads.
1350   bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1351 
1352   // \returns the number of address arguments from which to enable MIMG NSA
1353   // on supported architectures.
1354   unsigned getNSAThreshold(const MachineFunction &MF) const;
1355 
1356   // \returns true if the subtarget has a hazard requiring an "s_nop 0"
1357   // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
1358   bool requiresNopBeforeDeallocVGPRs() const {
1359     // Currently all targets that support the dealloc VGPRs message also require
1360     // the nop.
1361     return true;
1362   }
1363 };
1364 
1365 } // end namespace llvm
1366 
1367 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
1368