xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIRegisterInfo
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
16 
17 #include "llvm/ADT/BitVector.h"
18 
19 #define GET_REGINFO_HEADER
20 #include "AMDGPUGenRegisterInfo.inc"
21 
22 #include "SIDefines.h"
23 
24 namespace llvm {
25 
26 class GCNSubtarget;
27 class LiveIntervals;
28 class LiveRegUnits;
29 class MachineInstrBuilder;
30 class RegisterBank;
31 struct SGPRSpillBuilder;
32 
33 /// Register allocation hint types. Helps eliminate unneeded COPY with True16
34 namespace AMDGPURI {
35 
36 enum { Size16 = 1, Size32 = 2 };
37 
38 } // end namespace AMDGPURI
39 
40 class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
41 private:
42   const GCNSubtarget &ST;
43   bool SpillSGPRToVGPR;
44   bool isWave32;
45   BitVector RegPressureIgnoredUnits;
46 
47   /// Sub reg indexes for getRegSplitParts.
48   /// First index represents subreg size from 1 to 32 Half DWORDS.
49   /// The inner vector is sorted by bit offset.
50   /// Provided a register can be fully split with given subregs,
51   /// all elements of the inner vector combined give a full lane mask.
52   static std::array<std::vector<int16_t>, 32> RegSplitParts;
53 
54   // Table representing sub reg of given width and offset.
55   // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512.
56   // Second index is 32 different dword offsets.
57   static std::array<std::array<uint16_t, 32>, 9> SubRegFromChannelTable;
58 
59   void reserveRegisterTuples(BitVector &, MCRegister Reg) const;
60 
61 public:
62   SIRegisterInfo(const GCNSubtarget &ST);
63 
64   struct SpilledReg {
65     Register VGPR;
66     int Lane = -1;
67 
68     SpilledReg() = default;
SpilledRegSpilledReg69     SpilledReg(Register R, int L) : VGPR(R), Lane(L) {}
70 
hasLaneSpilledReg71     bool hasLane() { return Lane != -1; }
hasRegSpilledReg72     bool hasReg() { return VGPR != 0; }
73   };
74 
75   /// \returns the sub reg enum value for the given \p Channel
76   /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
77   static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
78 
spillSGPRToVGPR()79   bool spillSGPRToVGPR() const {
80     return SpillSGPRToVGPR;
81   }
82 
83   /// Return the largest available SGPR aligned to \p Align for the register
84   /// class \p RC.
85   MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF,
86                                      const unsigned Align,
87                                      const TargetRegisterClass *RC) const;
88 
89   /// Return the end register initially reserved for the scratch buffer in case
90   /// spilling is needed.
91   MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
92 
93   /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number
94   /// of waves per execution unit required for the function \p MF.
95   std::pair<unsigned, unsigned>
96   getMaxNumVectorRegs(const MachineFunction &MF) const;
97 
98   BitVector getReservedRegs(const MachineFunction &MF) const override;
99   bool isAsmClobberable(const MachineFunction &MF,
100                         MCRegister PhysReg) const override;
101 
102   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
103   const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
104   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
105                                        CallingConv::ID) const override;
106   const uint32_t *getNoPreservedMask() const override;
107 
108   // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling
109   // conventions are free to use certain VGPRs without saving and restoring any
110   // lanes (not even inactive ones).
111   static bool isChainScratchRegister(Register VGPR);
112 
113   // Stack access is very expensive. CSRs are also the high registers, and we
114   // want to minimize the number of used registers.
getCSRFirstUseCost()115   unsigned getCSRFirstUseCost() const override {
116     return 100;
117   }
118 
119   // When building a block VGPR load, we only really transfer a subset of the
120   // registers in the block, based on a mask. Liveness analysis is not aware of
121   // the mask, so it might consider that any register in the block is available
122   // before the load and may therefore be scavenged. This is not ok for CSRs
123   // that are not clobbered, since the caller will expect them to be preserved.
124   // This method will add artificial implicit uses for those registers on the
125   // load instruction, so liveness analysis knows they're unavailable.
126   void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB,
127                                       Register BlockReg) const;
128 
129   const TargetRegisterClass *
130   getLargestLegalSuperClass(const TargetRegisterClass *RC,
131                             const MachineFunction &MF) const override;
132 
133   Register getFrameRegister(const MachineFunction &MF) const override;
134 
135   bool hasBasePointer(const MachineFunction &MF) const;
136   Register getBaseRegister() const;
137 
138   bool shouldRealignStack(const MachineFunction &MF) const override;
139   bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
140 
141   bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
142   bool requiresFrameIndexReplacementScavenging(
143     const MachineFunction &MF) const override;
144   bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override;
145 
146   int64_t getScratchInstrOffset(const MachineInstr *MI) const;
147 
148   int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
149                                    int Idx) const override;
150 
151   bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
152 
153   Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
154                                         int64_t Offset) const override;
155 
156   void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
157                          int64_t Offset) const override;
158 
159   bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
160                           int64_t Offset) const override;
161 
162   const TargetRegisterClass *getPointerRegClass(
163     const MachineFunction &MF, unsigned Kind = 0) const override;
164 
165   /// Returns a legal register class to copy a register in the specified class
166   /// to or from. If it is possible to copy the register directly without using
167   /// a cross register class copy, return the specified RC. Returns NULL if it
168   /// is not possible to copy between two registers of the specified class.
169   const TargetRegisterClass *
170   getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
171 
172   const TargetRegisterClass *
getRegClassForBlockOp(const MachineFunction & MF)173   getRegClassForBlockOp(const MachineFunction &MF) const {
174     return &AMDGPU::VReg_1024RegClass;
175   }
176 
177   void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
178                                bool IsLoad, bool IsKill = true) const;
179 
180   /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a
181   /// free VGPR lane to spill.
182   bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
183                  SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
184                  bool OnlyToVGPR = false,
185                  bool SpillToPhysVGPRLane = false) const;
186 
187   bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
188                    SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
189                    bool OnlyToVGPR = false,
190                    bool SpillToPhysVGPRLane = false) const;
191 
192   bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
193                           MachineBasicBlock &RestoreMBB, Register SGPR,
194                           RegScavenger *RS) const;
195 
196   bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
197                            unsigned FIOperandNum,
198                            RegScavenger *RS) const override;
199 
200   bool eliminateSGPRToVGPRSpillFrameIndex(
201       MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
202       SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
203       bool SpillToPhysVGPRLane = false) const;
204 
205   StringRef getRegAsmName(MCRegister Reg) const override;
206 
207   // Pseudo regs are not allowed
getHWRegIndex(MCRegister Reg)208   unsigned getHWRegIndex(MCRegister Reg) const {
209     return getEncodingValue(Reg) & 0xff;
210   }
211 
212   LLVM_READONLY
213   const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) const;
214 
215   LLVM_READONLY
216   const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;
217 
218   LLVM_READONLY
219   const TargetRegisterClass *
220   getVectorSuperClassForBitWidth(unsigned BitWidth) const;
221 
222   LLVM_READONLY
223   static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
224 
225   /// \returns true if this class contains only SGPR registers
isSGPRClass(const TargetRegisterClass * RC)226   static bool isSGPRClass(const TargetRegisterClass *RC) {
227     return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC);
228   }
229 
230   /// \returns true if this class ID contains only SGPR registers
isSGPRClassID(unsigned RCID)231   bool isSGPRClassID(unsigned RCID) const {
232     return isSGPRClass(getRegClass(RCID));
233   }
234 
235   bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
isSGPRPhysReg(Register Reg)236   bool isSGPRPhysReg(Register Reg) const {
237     return isSGPRClass(getPhysRegBaseClass(Reg));
238   }
239 
isVGPRPhysReg(Register Reg)240   bool isVGPRPhysReg(Register Reg) const {
241     return isVGPRClass(getPhysRegBaseClass(Reg));
242   }
243 
244   /// \returns true if this class contains only VGPR registers
isVGPRClass(const TargetRegisterClass * RC)245   static bool isVGPRClass(const TargetRegisterClass *RC) {
246     return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC);
247   }
248 
249   /// \returns true if this class contains only AGPR registers
isAGPRClass(const TargetRegisterClass * RC)250   static bool isAGPRClass(const TargetRegisterClass *RC) {
251     return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC);
252   }
253 
254   /// \returns true only if this class contains both VGPR and AGPR registers
isVectorSuperClass(const TargetRegisterClass * RC)255   bool isVectorSuperClass(const TargetRegisterClass *RC) const {
256     return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC);
257   }
258 
259   /// \returns true only if this class contains both VGPR and SGPR registers
isVSSuperClass(const TargetRegisterClass * RC)260   bool isVSSuperClass(const TargetRegisterClass *RC) const {
261     return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC);
262   }
263 
264   /// \returns true if this class contains VGPR registers.
hasVGPRs(const TargetRegisterClass * RC)265   static bool hasVGPRs(const TargetRegisterClass *RC) {
266     return RC->TSFlags & SIRCFlags::HasVGPR;
267   }
268 
269   /// \returns true if this class contains AGPR registers.
hasAGPRs(const TargetRegisterClass * RC)270   static bool hasAGPRs(const TargetRegisterClass *RC) {
271     return RC->TSFlags & SIRCFlags::HasAGPR;
272   }
273 
274   /// \returns true if this class contains SGPR registers.
hasSGPRs(const TargetRegisterClass * RC)275   static bool hasSGPRs(const TargetRegisterClass *RC) {
276     return RC->TSFlags & SIRCFlags::HasSGPR;
277   }
278 
279   /// \returns true if this class contains any vector registers.
hasVectorRegisters(const TargetRegisterClass * RC)280   static bool hasVectorRegisters(const TargetRegisterClass *RC) {
281     return hasVGPRs(RC) || hasAGPRs(RC);
282   }
283 
284   /// \returns A VGPR reg class with the same width as \p SRC
285   const TargetRegisterClass *
286   getEquivalentVGPRClass(const TargetRegisterClass *SRC) const;
287 
288   /// \returns An AGPR reg class with the same width as \p SRC
289   const TargetRegisterClass *
290   getEquivalentAGPRClass(const TargetRegisterClass *SRC) const;
291 
292   /// \returns A SGPR reg class with the same width as \p SRC
293   const TargetRegisterClass *
294   getEquivalentSGPRClass(const TargetRegisterClass *VRC) const;
295 
296   /// Returns a register class which is compatible with \p SuperRC, such that a
297   /// subregister exists with class \p SubRC with subregister index \p
298   /// SubIdx. If this is impossible (e.g., an unaligned subregister index within
299   /// a register tuple), return null.
300   const TargetRegisterClass *
301   getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
302                            const TargetRegisterClass *SubRC,
303                            unsigned SubIdx) const;
304 
305   /// \returns True if operands defined with this operand type can accept
306   /// a literal constant (i.e. any 32-bit immediate).
307   bool opCanUseLiteralConstant(unsigned OpType) const;
308 
309   /// \returns True if operands defined with this operand type can accept
310   /// an inline constant. i.e. An integer value in the range (-16, 64) or
311   /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
312   bool opCanUseInlineConstant(unsigned OpType) const;
313 
314   MCRegister findUnusedRegister(const MachineRegisterInfo &MRI,
315                                 const TargetRegisterClass *RC,
316                                 const MachineFunction &MF,
317                                 bool ReserveHighestVGPR = false) const;
318 
319   const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
320                                                Register Reg) const;
321   const TargetRegisterClass *
322   getRegClassForOperandReg(const MachineRegisterInfo &MRI,
323                            const MachineOperand &MO) const;
324 
325   bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
326   bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
isVectorRegister(const MachineRegisterInfo & MRI,Register Reg)327   bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
328     return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
329   }
330 
331   // FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs
332   // (such as VCC) which hold a wave-wide vector of boolean values. Examining
333   // just the register class is not suffcient; it needs to be combined with a
334   // value type. The next predicate isUniformReg() does this correctly.
isDivergentRegClass(const TargetRegisterClass * RC)335   bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
336     return !isSGPRClass(RC);
337   }
338 
339   bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI,
340                     Register Reg) const override;
341 
342   ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
343                                      unsigned EltSize) const;
344 
345   bool shouldCoalesce(MachineInstr *MI,
346                       const TargetRegisterClass *SrcRC,
347                       unsigned SubReg,
348                       const TargetRegisterClass *DstRC,
349                       unsigned DstSubReg,
350                       const TargetRegisterClass *NewRC,
351                       LiveIntervals &LIS) const override;
352 
353   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
354                                MachineFunction &MF) const override;
355 
356   unsigned getRegPressureSetLimit(const MachineFunction &MF,
357                                   unsigned Idx) const override;
358 
359   bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order,
360                              SmallVectorImpl<MCPhysReg> &Hints,
361                              const MachineFunction &MF, const VirtRegMap *VRM,
362                              const LiveRegMatrix *Matrix) const override;
363 
364   const int *getRegUnitPressureSets(unsigned RegUnit) const override;
365 
366   MCRegister getReturnAddressReg(const MachineFunction &MF) const;
367 
368   const TargetRegisterClass *
369   getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const;
370 
371   const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty,const RegisterBank & Bank)372   getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const {
373     return getRegClassForSizeOnBank(Ty.getSizeInBits(), Bank);
374   }
375 
376   const TargetRegisterClass *
377   getConstrainedRegClassForOperand(const MachineOperand &MO,
378                                  const MachineRegisterInfo &MRI) const override;
379 
getBoolRC()380   const TargetRegisterClass *getBoolRC() const {
381     return isWave32 ? &AMDGPU::SReg_32RegClass
382                     : &AMDGPU::SReg_64RegClass;
383   }
384 
getWaveMaskRegClass()385   const TargetRegisterClass *getWaveMaskRegClass() const {
386     return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
387                     : &AMDGPU::SReg_64_XEXECRegClass;
388   }
389 
390   // Return the appropriate register class to use for 64-bit VGPRs for the
391   // subtarget.
392   const TargetRegisterClass *getVGPR64Class() const;
393 
394   MCRegister getVCC() const;
395 
396   MCRegister getExec() const;
397 
398   const TargetRegisterClass *getRegClass(unsigned RCID) const;
399 
400   // Find reaching register definition
401   MachineInstr *findReachingDef(Register Reg, unsigned SubReg,
402                                 MachineInstr &Use,
403                                 MachineRegisterInfo &MRI,
404                                 LiveIntervals *LIS) const;
405 
406   const uint32_t *getAllVGPRRegMask() const;
407   const uint32_t *getAllAGPRRegMask() const;
408   const uint32_t *getAllVectorRegMask() const;
409   const uint32_t *getAllAllocatableSRegMask() const;
410 
411   // \returns number of 32 bit registers covered by a \p LM
getNumCoveredRegs(LaneBitmask LM)412   static unsigned getNumCoveredRegs(LaneBitmask LM) {
413     // The assumption is that every lo16 subreg is an even bit and every hi16
414     // is an adjacent odd bit or vice versa.
415     uint64_t Mask = LM.getAsInteger();
416     uint64_t Even = Mask & 0xAAAAAAAAAAAAAAAAULL;
417     Mask = (Even >> 1) | Mask;
418     uint64_t Odd = Mask & 0x5555555555555555ULL;
419     return llvm::popcount(Odd);
420   }
421 
422   // \returns a DWORD offset of a \p SubReg
getChannelFromSubReg(unsigned SubReg)423   unsigned getChannelFromSubReg(unsigned SubReg) const {
424     return SubReg ? (getSubRegIdxOffset(SubReg) + 31) / 32 : 0;
425   }
426 
427   // \returns a DWORD size of a \p SubReg
getNumChannelsFromSubReg(unsigned SubReg)428   unsigned getNumChannelsFromSubReg(unsigned SubReg) const {
429     return getNumCoveredRegs(getSubRegIndexLaneMask(SubReg));
430   }
431 
432   // For a given 16 bit \p Reg \returns a 32 bit register holding it.
433   // \returns \p Reg otherwise.
434   MCPhysReg get32BitRegister(MCPhysReg Reg) const;
435 
436   // Returns true if a given register class is properly aligned for
437   // the subtarget.
438   bool isProperlyAlignedRC(const TargetRegisterClass &RC) const;
439 
440   // Given \p RC returns corresponding aligned register class if required
441   // by the subtarget.
442   const TargetRegisterClass *
443   getProperlyAlignedRC(const TargetRegisterClass *RC) const;
444 
445   /// Return all SGPR128 which satisfy the waves per execution unit requirement
446   /// of the subtarget.
447   ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;
448 
449   /// Return all SGPR64 which satisfy the waves per execution unit requirement
450   /// of the subtarget.
451   ArrayRef<MCPhysReg> getAllSGPR64(const MachineFunction &MF) const;
452 
453   /// Return all SGPR32 which satisfy the waves per execution unit requirement
454   /// of the subtarget.
455   ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const;
456 
457   // Insert spill or restore instructions.
458   // When lowering spill pseudos, the RegScavenger should be set.
459   // For creating spill instructions during frame lowering, where no scavenger
460   // is available, LiveUnits can be used.
461   void buildSpillLoadStore(MachineBasicBlock &MBB,
462                            MachineBasicBlock::iterator MI, const DebugLoc &DL,
463                            unsigned LoadStoreOp, int Index, Register ValueReg,
464                            bool ValueIsKill, MCRegister ScratchOffsetReg,
465                            int64_t InstrOffset, MachineMemOperand *MMO,
466                            RegScavenger *RS,
467                            LiveRegUnits *LiveUnits = nullptr) const;
468 
469   // Return alignment in register file of first register in a register tuple.
getRegClassAlignmentNumBits(const TargetRegisterClass * RC)470   unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {
471     return (RC->TSFlags & SIRCFlags::RegTupleAlignUnitsMask) * 32;
472   }
473 
474   // Check if register class RC has required alignment.
isRegClassAligned(const TargetRegisterClass * RC,unsigned AlignNumBits)475   bool isRegClassAligned(const TargetRegisterClass *RC,
476                          unsigned AlignNumBits) const {
477     assert(AlignNumBits != 0);
478     unsigned RCAlign = getRegClassAlignmentNumBits(RC);
479     return RCAlign == AlignNumBits ||
480            (RCAlign > AlignNumBits && (RCAlign % AlignNumBits) == 0);
481   }
482 
483   // Return alignment of a SubReg relative to start of a register in RC class.
484   // No check if the subreg is supported by the current RC is made.
485   unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
486                                      unsigned SubReg) const;
487 
488   // \returns a number of registers of a given \p RC used in a function.
489   // Does not go inside function calls.
490   unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
491                               const TargetRegisterClass &RC) const;
492 
getVRegFlagValue(StringRef Name)493   std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override {
494     return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG
495                              : std::optional<uint8_t>{};
496   }
497 
498   SmallVector<StringLiteral>
499   getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override;
500 };
501 
502 namespace AMDGPU {
503 /// Get the size in bits of a register from the register class \p RC.
504 unsigned getRegBitWidth(const TargetRegisterClass &RC);
505 } // namespace AMDGPU
506 
507 } // End namespace llvm
508 
509 #endif
510