xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h (revision c1d255d3ffdbe447de3ab875bf4e7d7accc5bfc5)
1 //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMD GCN specific subclass of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
16 
17 #include "AMDGPUCallLowering.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIFrameLowering.h"
20 #include "SIISelLowering.h"
21 #include "SIInstrInfo.h"
22 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
23 
24 namespace llvm {
25 
26 class MCInst;
27 class MCInstrInfo;
28 
29 } // namespace llvm
30 
31 #define GET_SUBTARGETINFO_HEADER
32 #include "AMDGPUGenSubtargetInfo.inc"
33 
34 namespace llvm {
35 
36 class GCNTargetMachine;
37 
38 class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
39                            public AMDGPUSubtarget {
40 
41   using AMDGPUSubtarget::getMaxWavesPerEU;
42 
43 public:
44   enum TrapHandlerAbi {
45     TrapHandlerAbiNone = 0,
46     TrapHandlerAbiHsa = 1
47   };
48 
49   enum TrapID {
50     TrapIDHardwareReserved = 0,
51     TrapIDHSADebugTrap = 1,
52     TrapIDLLVMTrap = 2,
53     TrapIDLLVMDebugTrap = 3,
54     TrapIDDebugBreakpoint = 7,
55     TrapIDDebugReserved8 = 8,
56     TrapIDDebugReservedFE = 0xfe,
57     TrapIDDebugReservedFF = 0xff
58   };
59 
60   enum TrapRegValues {
61     LLVMTrapHandlerRegValue = 1
62   };
63 
64 private:
65   /// GlobalISel related APIs.
66   std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
67   std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
68   std::unique_ptr<InstructionSelector> InstSelector;
69   std::unique_ptr<LegalizerInfo> Legalizer;
70   std::unique_ptr<RegisterBankInfo> RegBankInfo;
71 
72 protected:
73   // Basic subtarget description.
74   Triple TargetTriple;
75   AMDGPU::IsaInfo::AMDGPUTargetID TargetID;
76   unsigned Gen;
77   InstrItineraryData InstrItins;
78   int LDSBankCount;
79   unsigned MaxPrivateElementSize;
80 
81   // Possibly statically set by tablegen, but may want to be overridden.
82   bool FastFMAF32;
83   bool FastDenormalF32;
84   bool HalfRate64Ops;
85 
86   // Dynamically set bits that enable features.
87   bool FlatForGlobal;
88   bool AutoWaitcntBeforeBarrier;
89   bool UnalignedScratchAccess;
90   bool UnalignedAccessMode;
91   bool HasApertureRegs;
92   bool SupportsXNACK;
93 
94   // This should not be used directly. 'TargetID' tracks the dynamic settings
95   // for XNACK.
96   bool EnableXNACK;
97 
98   bool EnableCuMode;
99   bool TrapHandler;
100 
101   // Used as options.
102   bool EnableLoadStoreOpt;
103   bool EnableUnsafeDSOffsetFolding;
104   bool EnableSIScheduler;
105   bool EnableDS128;
106   bool EnablePRTStrictNull;
107   bool DumpCode;
108 
109   // Subtarget statically properties set by tablegen
110   bool FP64;
111   bool FMA;
112   bool MIMG_R128;
113   bool GCN3Encoding;
114   bool CIInsts;
115   bool GFX8Insts;
116   bool GFX9Insts;
117   bool GFX10Insts;
118   bool GFX10_3Insts;
119   bool GFX7GFX8GFX9Insts;
120   bool SGPRInitBug;
121   bool HasSMemRealTime;
122   bool HasIntClamp;
123   bool HasFmaMixInsts;
124   bool HasMovrel;
125   bool HasVGPRIndexMode;
126   bool HasScalarStores;
127   bool HasScalarAtomics;
128   bool HasSDWAOmod;
129   bool HasSDWAScalar;
130   bool HasSDWASdst;
131   bool HasSDWAMac;
132   bool HasSDWAOutModsVOPC;
133   bool HasDPP;
134   bool HasDPP8;
135   bool HasR128A16;
136   bool HasGFX10A16;
137   bool HasG16;
138   bool HasNSAEncoding;
139   bool GFX10_BEncoding;
140   bool HasDLInsts;
141   bool HasDot1Insts;
142   bool HasDot2Insts;
143   bool HasDot3Insts;
144   bool HasDot4Insts;
145   bool HasDot5Insts;
146   bool HasDot6Insts;
147   bool HasMAIInsts;
148   bool HasPkFmacF16Inst;
149   bool HasAtomicFaddInsts;
150   bool SupportsSRAMECC;
151 
152   // This should not be used directly. 'TargetID' tracks the dynamic settings
153   // for SRAMECC.
154   bool EnableSRAMECC;
155 
156   bool HasNoSdstCMPX;
157   bool HasVscnt;
158   bool HasGetWaveIdInst;
159   bool HasSMemTimeInst;
160   bool HasRegisterBanking;
161   bool HasVOP3Literal;
162   bool HasNoDataDepHazard;
163   bool FlatAddressSpace;
164   bool FlatInstOffsets;
165   bool FlatGlobalInsts;
166   bool FlatScratchInsts;
167   bool ScalarFlatScratchInsts;
168   bool AddNoCarryInsts;
169   bool HasUnpackedD16VMem;
170   bool LDSMisalignedBug;
171   bool HasMFMAInlineLiteralBug;
172   bool UnalignedBufferAccess;
173   bool UnalignedDSAccess;
174   bool ScalarizeGlobal;
175 
176   bool HasVcmpxPermlaneHazard;
177   bool HasVMEMtoScalarWriteHazard;
178   bool HasSMEMtoVectorWriteHazard;
179   bool HasInstFwdPrefetchBug;
180   bool HasVcmpxExecWARHazard;
181   bool HasLdsBranchVmemWARHazard;
182   bool HasNSAtoVMEMBug;
183   bool HasOffset3fBug;
184   bool HasFlatSegmentOffsetBug;
185   bool HasImageStoreD16Bug;
186   bool HasImageGather4D16Bug;
187 
188   // Dummy feature to use for assembler in tablegen.
189   bool FeatureDisable;
190 
191   SelectionDAGTargetInfo TSInfo;
192 private:
193   SIInstrInfo InstrInfo;
194   SITargetLowering TLInfo;
195   SIFrameLowering FrameLowering;
196 
197 public:
198   // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword.
199   static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1);
200 
201   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
202                const GCNTargetMachine &TM);
203   ~GCNSubtarget() override;
204 
205   GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
206                                                    StringRef GPU, StringRef FS);
207 
208   const SIInstrInfo *getInstrInfo() const override {
209     return &InstrInfo;
210   }
211 
212   const SIFrameLowering *getFrameLowering() const override {
213     return &FrameLowering;
214   }
215 
216   const SITargetLowering *getTargetLowering() const override {
217     return &TLInfo;
218   }
219 
220   const SIRegisterInfo *getRegisterInfo() const override {
221     return &InstrInfo.getRegisterInfo();
222   }
223 
224   const CallLowering *getCallLowering() const override {
225     return CallLoweringInfo.get();
226   }
227 
228   const InlineAsmLowering *getInlineAsmLowering() const override {
229     return InlineAsmLoweringInfo.get();
230   }
231 
232   InstructionSelector *getInstructionSelector() const override {
233     return InstSelector.get();
234   }
235 
236   const LegalizerInfo *getLegalizerInfo() const override {
237     return Legalizer.get();
238   }
239 
240   const RegisterBankInfo *getRegBankInfo() const override {
241     return RegBankInfo.get();
242   }
243 
244   // Nothing implemented, just prevent crashes on use.
245   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
246     return &TSInfo;
247   }
248 
249   const InstrItineraryData *getInstrItineraryData() const override {
250     return &InstrItins;
251   }
252 
253   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
254 
255   Generation getGeneration() const {
256     return (Generation)Gen;
257   }
258 
259   /// Return the number of high bits known to be zero fror a frame index.
260   unsigned getKnownHighZeroBitsForFrameIndex() const {
261     return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
262   }
263 
264   int getLDSBankCount() const {
265     return LDSBankCount;
266   }
267 
268   unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
269     return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
270   }
271 
272   unsigned getConstantBusLimit(unsigned Opcode) const;
273 
274   bool hasIntClamp() const {
275     return HasIntClamp;
276   }
277 
278   bool hasFP64() const {
279     return FP64;
280   }
281 
282   bool hasMIMG_R128() const {
283     return MIMG_R128;
284   }
285 
286   bool hasHWFP64() const {
287     return FP64;
288   }
289 
290   bool hasFastFMAF32() const {
291     return FastFMAF32;
292   }
293 
294   bool hasHalfRate64Ops() const {
295     return HalfRate64Ops;
296   }
297 
298   bool hasAddr64() const {
299     return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
300   }
301 
302   bool hasFlat() const {
303     return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS);
304   }
305 
306   // Return true if the target only has the reverse operand versions of VALU
307   // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
308   bool hasOnlyRevVALUShifts() const {
309     return getGeneration() >= VOLCANIC_ISLANDS;
310   }
311 
312   bool hasFractBug() const {
313     return getGeneration() == SOUTHERN_ISLANDS;
314   }
315 
316   bool hasBFE() const {
317     return true;
318   }
319 
320   bool hasBFI() const {
321     return true;
322   }
323 
324   bool hasBFM() const {
325     return hasBFE();
326   }
327 
328   bool hasBCNT(unsigned Size) const {
329     return true;
330   }
331 
332   bool hasFFBL() const {
333     return true;
334   }
335 
336   bool hasFFBH() const {
337     return true;
338   }
339 
340   bool hasMed3_16() const {
341     return getGeneration() >= AMDGPUSubtarget::GFX9;
342   }
343 
344   bool hasMin3Max3_16() const {
345     return getGeneration() >= AMDGPUSubtarget::GFX9;
346   }
347 
348   bool hasFmaMixInsts() const {
349     return HasFmaMixInsts;
350   }
351 
352   bool hasCARRY() const {
353     return true;
354   }
355 
356   bool hasFMA() const {
357     return FMA;
358   }
359 
360   bool hasSwap() const {
361     return GFX9Insts;
362   }
363 
364   bool hasScalarPackInsts() const {
365     return GFX9Insts;
366   }
367 
368   bool hasScalarMulHiInsts() const {
369     return GFX9Insts;
370   }
371 
372   TrapHandlerAbi getTrapHandlerAbi() const {
373     return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
374   }
375 
376   /// True if the offset field of DS instructions works as expected. On SI, the
377   /// offset uses a 16-bit adder and does not always wrap properly.
378   bool hasUsableDSOffset() const {
379     return getGeneration() >= SEA_ISLANDS;
380   }
381 
382   bool unsafeDSOffsetFoldingEnabled() const {
383     return EnableUnsafeDSOffsetFolding;
384   }
385 
386   /// Condition output from div_scale is usable.
387   bool hasUsableDivScaleConditionOutput() const {
388     return getGeneration() != SOUTHERN_ISLANDS;
389   }
390 
391   /// Extra wait hazard is needed in some cases before
392   /// s_cbranch_vccnz/s_cbranch_vccz.
393   bool hasReadVCCZBug() const {
394     return getGeneration() <= SEA_ISLANDS;
395   }
396 
397   /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
398   bool partialVCCWritesUpdateVCCZ() const {
399     return getGeneration() >= GFX10;
400   }
401 
402   /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
403   /// was written by a VALU instruction.
404   bool hasSMRDReadVALUDefHazard() const {
405     return getGeneration() == SOUTHERN_ISLANDS;
406   }
407 
408   /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
409   /// SGPR was written by a VALU Instruction.
410   bool hasVMEMReadSGPRVALUDefHazard() const {
411     return getGeneration() >= VOLCANIC_ISLANDS;
412   }
413 
414   bool hasRFEHazards() const {
415     return getGeneration() >= VOLCANIC_ISLANDS;
416   }
417 
418   /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
419   unsigned getSetRegWaitStates() const {
420     return getGeneration() <= SEA_ISLANDS ? 1 : 2;
421   }
422 
423   bool dumpCode() const {
424     return DumpCode;
425   }
426 
427   /// Return the amount of LDS that can be used that will not restrict the
428   /// occupancy lower than WaveCount.
429   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
430                                            const Function &) const;
431 
432   bool supportsMinMaxDenormModes() const {
433     return getGeneration() >= AMDGPUSubtarget::GFX9;
434   }
435 
436   /// \returns If target supports S_DENORM_MODE.
437   bool hasDenormModeInst() const {
438     return getGeneration() >= AMDGPUSubtarget::GFX10;
439   }
440 
441   bool useFlatForGlobal() const {
442     return FlatForGlobal;
443   }
444 
445   /// \returns If target supports ds_read/write_b128 and user enables generation
446   /// of ds_read/write_b128.
447   bool useDS128() const {
448     return CIInsts && EnableDS128;
449   }
450 
451   /// \return If target supports ds_read/write_b96/128.
452   bool hasDS96AndDS128() const {
453     return CIInsts;
454   }
455 
456   /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
457   bool haveRoundOpsF64() const {
458     return CIInsts;
459   }
460 
461   /// \returns If MUBUF instructions always perform range checking, even for
462   /// buffer resources used for private memory access.
463   bool privateMemoryResourceIsRangeChecked() const {
464     return getGeneration() < AMDGPUSubtarget::GFX9;
465   }
466 
467   /// \returns If target requires PRT Struct NULL support (zero result registers
468   /// for sparse texture support).
469   bool usePRTStrictNull() const {
470     return EnablePRTStrictNull;
471   }
472 
473   bool hasAutoWaitcntBeforeBarrier() const {
474     return AutoWaitcntBeforeBarrier;
475   }
476 
477   bool hasUnalignedBufferAccess() const {
478     return UnalignedBufferAccess;
479   }
480 
481   bool hasUnalignedBufferAccessEnabled() const {
482     return UnalignedBufferAccess && UnalignedAccessMode;
483   }
484 
485   bool hasUnalignedDSAccess() const {
486     return UnalignedDSAccess;
487   }
488 
489   bool hasUnalignedDSAccessEnabled() const {
490     return UnalignedDSAccess && UnalignedAccessMode;
491   }
492 
493   bool hasUnalignedScratchAccess() const {
494     return UnalignedScratchAccess;
495   }
496 
497   bool hasUnalignedAccessMode() const {
498     return UnalignedAccessMode;
499   }
500 
501   bool hasApertureRegs() const {
502     return HasApertureRegs;
503   }
504 
505   bool isTrapHandlerEnabled() const {
506     return TrapHandler;
507   }
508 
509   bool isXNACKEnabled() const {
510     return TargetID.isXnackOnOrAny();
511   }
512 
513   bool isCuModeEnabled() const {
514     return EnableCuMode;
515   }
516 
517   bool hasFlatAddressSpace() const {
518     return FlatAddressSpace;
519   }
520 
521   bool hasFlatScrRegister() const {
522     return hasFlatAddressSpace();
523   }
524 
525   bool hasFlatInstOffsets() const {
526     return FlatInstOffsets;
527   }
528 
529   bool hasFlatGlobalInsts() const {
530     return FlatGlobalInsts;
531   }
532 
533   bool hasFlatScratchInsts() const {
534     return FlatScratchInsts;
535   }
536 
537   // Check if target supports ST addressing mode with FLAT scratch instructions.
538   // The ST addressing mode means no registers are used, either VGPR or SGPR,
539   // but only immediate offset is swizzled and added to the FLAT scratch base.
540   bool hasFlatScratchSTMode() const {
541     return hasFlatScratchInsts() && hasGFX10_3Insts();
542   }
543 
544   bool hasScalarFlatScratchInsts() const {
545     return ScalarFlatScratchInsts;
546   }
547 
548   bool hasGlobalAddTidInsts() const {
549     return GFX10_BEncoding;
550   }
551 
552   bool hasAtomicCSub() const {
553     return GFX10_BEncoding;
554   }
555 
556   bool hasMultiDwordFlatScratchAddressing() const {
557     return getGeneration() >= GFX9;
558   }
559 
560   bool hasFlatSegmentOffsetBug() const {
561     return HasFlatSegmentOffsetBug;
562   }
563 
564   bool hasFlatLgkmVMemCountInOrder() const {
565     return getGeneration() > GFX9;
566   }
567 
568   bool hasD16LoadStore() const {
569     return getGeneration() >= GFX9;
570   }
571 
572   bool d16PreservesUnusedBits() const {
573     return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
574   }
575 
576   bool hasD16Images() const {
577     return getGeneration() >= VOLCANIC_ISLANDS;
578   }
579 
580   /// Return if most LDS instructions have an m0 use that require m0 to be
581   /// iniitalized.
582   bool ldsRequiresM0Init() const {
583     return getGeneration() < GFX9;
584   }
585 
586   // True if the hardware rewinds and replays GWS operations if a wave is
587   // preempted.
588   //
589   // If this is false, a GWS operation requires testing if a nack set the
590   // MEM_VIOL bit, and repeating if so.
591   bool hasGWSAutoReplay() const {
592     return getGeneration() >= GFX9;
593   }
594 
595   /// \returns if target has ds_gws_sema_release_all instruction.
596   bool hasGWSSemaReleaseAll() const {
597     return CIInsts;
598   }
599 
600   /// \returns true if the target has integer add/sub instructions that do not
601   /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
602   /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
603   /// for saturation.
604   bool hasAddNoCarry() const {
605     return AddNoCarryInsts;
606   }
607 
608   bool hasUnpackedD16VMem() const {
609     return HasUnpackedD16VMem;
610   }
611 
612   // Covers VS/PS/CS graphics shaders
613   bool isMesaGfxShader(const Function &F) const {
614     return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
615   }
616 
617   bool hasMad64_32() const {
618     return getGeneration() >= SEA_ISLANDS;
619   }
620 
621   bool hasSDWAOmod() const {
622     return HasSDWAOmod;
623   }
624 
625   bool hasSDWAScalar() const {
626     return HasSDWAScalar;
627   }
628 
629   bool hasSDWASdst() const {
630     return HasSDWASdst;
631   }
632 
633   bool hasSDWAMac() const {
634     return HasSDWAMac;
635   }
636 
637   bool hasSDWAOutModsVOPC() const {
638     return HasSDWAOutModsVOPC;
639   }
640 
641   bool hasDLInsts() const {
642     return HasDLInsts;
643   }
644 
645   bool hasDot1Insts() const {
646     return HasDot1Insts;
647   }
648 
649   bool hasDot2Insts() const {
650     return HasDot2Insts;
651   }
652 
653   bool hasDot3Insts() const {
654     return HasDot3Insts;
655   }
656 
657   bool hasDot4Insts() const {
658     return HasDot4Insts;
659   }
660 
661   bool hasDot5Insts() const {
662     return HasDot5Insts;
663   }
664 
665   bool hasDot6Insts() const {
666     return HasDot6Insts;
667   }
668 
669   bool hasMAIInsts() const {
670     return HasMAIInsts;
671   }
672 
673   bool hasPkFmacF16Inst() const {
674     return HasPkFmacF16Inst;
675   }
676 
677   bool hasAtomicFaddInsts() const {
678     return HasAtomicFaddInsts;
679   }
680 
681   bool hasNoSdstCMPX() const {
682     return HasNoSdstCMPX;
683   }
684 
685   bool hasVscnt() const {
686     return HasVscnt;
687   }
688 
689   bool hasGetWaveIdInst() const {
690     return HasGetWaveIdInst;
691   }
692 
693   bool hasSMemTimeInst() const {
694     return HasSMemTimeInst;
695   }
696 
697   bool hasRegisterBanking() const {
698     return HasRegisterBanking;
699   }
700 
701   bool hasVOP3Literal() const {
702     return HasVOP3Literal;
703   }
704 
705   bool hasNoDataDepHazard() const {
706     return HasNoDataDepHazard;
707   }
708 
709   bool vmemWriteNeedsExpWaitcnt() const {
710     return getGeneration() < SEA_ISLANDS;
711   }
712 
713   // Scratch is allocated in 256 dword per wave blocks for the entire
714   // wavefront. When viewed from the perspecive of an arbitrary workitem, this
715   // is 4-byte aligned.
716   //
717   // Only 4-byte alignment is really needed to access anything. Transformations
718   // on the pointer value itself may rely on the alignment / known low bits of
719   // the pointer. Set this to something above the minimum to avoid needing
720   // dynamic realignment in common cases.
721   Align getStackAlignment() const { return Align(16); }
722 
723   bool enableMachineScheduler() const override {
724     return true;
725   }
726 
727   bool useAA() const override;
728 
729   bool enableSubRegLiveness() const override {
730     return true;
731   }
732 
733   void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
734   bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
735 
736   // static wrappers
737   static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
738 
739   // XXX - Why is this here if it isn't in the default pass set?
740   bool enableEarlyIfConversion() const override {
741     return true;
742   }
743 
744   bool enableFlatScratch() const;
745 
746   void overrideSchedPolicy(MachineSchedPolicy &Policy,
747                            unsigned NumRegionInstrs) const override;
748 
749   unsigned getMaxNumUserSGPRs() const {
750     return 16;
751   }
752 
753   bool hasSMemRealTime() const {
754     return HasSMemRealTime;
755   }
756 
757   bool hasMovrel() const {
758     return HasMovrel;
759   }
760 
761   bool hasVGPRIndexMode() const {
762     return HasVGPRIndexMode;
763   }
764 
765   bool useVGPRIndexMode() const;
766 
767   bool hasScalarCompareEq64() const {
768     return getGeneration() >= VOLCANIC_ISLANDS;
769   }
770 
771   bool hasScalarStores() const {
772     return HasScalarStores;
773   }
774 
775   bool hasScalarAtomics() const {
776     return HasScalarAtomics;
777   }
778 
779   bool hasLDSFPAtomics() const {
780     return GFX8Insts;
781   }
782 
783   bool hasDPP() const {
784     return HasDPP;
785   }
786 
787   bool hasDPPBroadcasts() const {
788     return HasDPP && getGeneration() < GFX10;
789   }
790 
791   bool hasDPPWavefrontShifts() const {
792     return HasDPP && getGeneration() < GFX10;
793   }
794 
795   bool hasDPP8() const {
796     return HasDPP8;
797   }
798 
799   bool hasR128A16() const {
800     return HasR128A16;
801   }
802 
803   bool hasGFX10A16() const {
804     return HasGFX10A16;
805   }
806 
807   bool hasA16() const { return hasR128A16() || hasGFX10A16(); }
808 
809   bool hasG16() const { return HasG16; }
810 
811   bool hasOffset3fBug() const {
812     return HasOffset3fBug;
813   }
814 
815   bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
816 
817   bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }
818 
819   bool hasNSAEncoding() const { return HasNSAEncoding; }
820 
821   bool hasGFX10_BEncoding() const {
822     return GFX10_BEncoding;
823   }
824 
825   bool hasGFX10_3Insts() const {
826     return GFX10_3Insts;
827   }
828 
829   bool hasMadF16() const;
830 
831   bool enableSIScheduler() const {
832     return EnableSIScheduler;
833   }
834 
835   bool loadStoreOptEnabled() const {
836     return EnableLoadStoreOpt;
837   }
838 
839   bool hasSGPRInitBug() const {
840     return SGPRInitBug;
841   }
842 
843   bool hasMFMAInlineLiteralBug() const {
844     return HasMFMAInlineLiteralBug;
845   }
846 
847   bool has12DWordStoreHazard() const {
848     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
849   }
850 
851   // \returns true if the subtarget supports DWORDX3 load/store instructions.
852   bool hasDwordx3LoadStores() const {
853     return CIInsts;
854   }
855 
856   bool hasReadM0MovRelInterpHazard() const {
857     return getGeneration() == AMDGPUSubtarget::GFX9;
858   }
859 
860   bool hasReadM0SendMsgHazard() const {
861     return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
862            getGeneration() <= AMDGPUSubtarget::GFX9;
863   }
864 
865   bool hasVcmpxPermlaneHazard() const {
866     return HasVcmpxPermlaneHazard;
867   }
868 
869   bool hasVMEMtoScalarWriteHazard() const {
870     return HasVMEMtoScalarWriteHazard;
871   }
872 
873   bool hasSMEMtoVectorWriteHazard() const {
874     return HasSMEMtoVectorWriteHazard;
875   }
876 
877   bool hasLDSMisalignedBug() const {
878     return LDSMisalignedBug && !EnableCuMode;
879   }
880 
881   bool hasInstFwdPrefetchBug() const {
882     return HasInstFwdPrefetchBug;
883   }
884 
885   bool hasVcmpxExecWARHazard() const {
886     return HasVcmpxExecWARHazard;
887   }
888 
889   bool hasLdsBranchVmemWARHazard() const {
890     return HasLdsBranchVmemWARHazard;
891   }
892 
893   bool hasNSAtoVMEMBug() const {
894     return HasNSAtoVMEMBug;
895   }
896 
897   bool hasHardClauses() const { return getGeneration() >= GFX10; }
898 
899   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
900   /// SGPRs
901   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
902 
903   /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
904   /// VGPRs
905   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
906 
907   /// Return occupancy for the given function. Used LDS and a number of
908   /// registers if provided.
909   /// Note, occupancy can be affected by the scratch allocation as well, but
910   /// we do not have enough information to compute it.
911   unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
912                             unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
913 
914   /// \returns true if the flat_scratch register should be initialized with the
915   /// pointer to the wave's scratch memory rather than a size and offset.
916   bool flatScratchIsPointer() const {
917     return getGeneration() >= AMDGPUSubtarget::GFX9;
918   }
919 
920   /// \returns true if the machine has merged shaders in which s0-s7 are
921   /// reserved by the hardware and user SGPRs start at s8
922   bool hasMergedShaders() const {
923     return getGeneration() >= GFX9;
924   }
925 
926   /// \returns SGPR allocation granularity supported by the subtarget.
927   unsigned getSGPRAllocGranule() const {
928     return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
929   }
930 
931   /// \returns SGPR encoding granularity supported by the subtarget.
932   unsigned getSGPREncodingGranule() const {
933     return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
934   }
935 
936   /// \returns Total number of SGPRs supported by the subtarget.
937   unsigned getTotalNumSGPRs() const {
938     return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
939   }
940 
941   /// \returns Addressable number of SGPRs supported by the subtarget.
942   unsigned getAddressableNumSGPRs() const {
943     return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
944   }
945 
946   /// \returns Minimum number of SGPRs that meets the given number of waves per
947   /// execution unit requirement supported by the subtarget.
948   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
949     return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
950   }
951 
952   /// \returns Maximum number of SGPRs that meets the given number of waves per
953   /// execution unit requirement supported by the subtarget.
954   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
955     return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
956   }
957 
958   /// \returns Reserved number of SGPRs for given function \p MF.
959   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
960 
961   /// \returns Maximum number of SGPRs that meets number of waves per execution
962   /// unit requirement for function \p MF, or number of SGPRs explicitly
963   /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
964   ///
965   /// \returns Value that meets number of waves per execution unit requirement
966   /// if explicitly requested value cannot be converted to integer, violates
967   /// subtarget's specifications, or does not meet number of waves per execution
968   /// unit requirement.
969   unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
970 
971   /// \returns VGPR allocation granularity supported by the subtarget.
972   unsigned getVGPRAllocGranule() const {
973     return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
974   }
975 
976   /// \returns VGPR encoding granularity supported by the subtarget.
977   unsigned getVGPREncodingGranule() const {
978     return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
979   }
980 
981   /// \returns Total number of VGPRs supported by the subtarget.
982   unsigned getTotalNumVGPRs() const {
983     return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
984   }
985 
986   /// \returns Addressable number of VGPRs supported by the subtarget.
987   unsigned getAddressableNumVGPRs() const {
988     return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
989   }
990 
991   /// \returns Minimum number of VGPRs that meets given number of waves per
992   /// execution unit requirement supported by the subtarget.
993   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
994     return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
995   }
996 
997   /// \returns Maximum number of VGPRs that meets given number of waves per
998   /// execution unit requirement supported by the subtarget.
999   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
1000     return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
1001   }
1002 
1003   /// \returns Maximum number of VGPRs that meets number of waves per execution
1004   /// unit requirement for function \p MF, or number of VGPRs explicitly
1005   /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
1006   ///
1007   /// \returns Value that meets number of waves per execution unit requirement
1008   /// if explicitly requested value cannot be converted to integer, violates
1009   /// subtarget's specifications, or does not meet number of waves per execution
1010   /// unit requirement.
1011   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
1012 
1013   void getPostRAMutations(
1014       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
1015       const override;
1016 
1017   bool isWave32() const {
1018     return getWavefrontSize() == 32;
1019   }
1020 
1021   bool isWave64() const {
1022     return getWavefrontSize() == 64;
1023   }
1024 
1025   const TargetRegisterClass *getBoolRC() const {
1026     return getRegisterInfo()->getBoolRC();
1027   }
1028 
1029   /// \returns Maximum number of work groups per compute unit supported by the
1030   /// subtarget and limited by given \p FlatWorkGroupSize.
1031   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
1032     return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
1033   }
1034 
1035   /// \returns Minimum flat work group size supported by the subtarget.
1036   unsigned getMinFlatWorkGroupSize() const override {
1037     return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
1038   }
1039 
1040   /// \returns Maximum flat work group size supported by the subtarget.
1041   unsigned getMaxFlatWorkGroupSize() const override {
1042     return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
1043   }
1044 
1045   /// \returns Number of waves per execution unit required to support the given
1046   /// \p FlatWorkGroupSize.
1047   unsigned
1048   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
1049     return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
1050   }
1051 
1052   /// \returns Minimum number of waves per execution unit supported by the
1053   /// subtarget.
1054   unsigned getMinWavesPerEU() const override {
1055     return AMDGPU::IsaInfo::getMinWavesPerEU(this);
1056   }
1057 
1058   void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
1059                              SDep &Dep) const override;
1060 };
1061 
1062 } // end namespace llvm
1063 
1064 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
1065