xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIRegisterInfo.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1 //===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIRegisterInfo
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
16 
17 #include "llvm/ADT/BitVector.h"
18 
19 #define GET_REGINFO_HEADER
20 #include "AMDGPUGenRegisterInfo.inc"
21 
22 #include "SIDefines.h"
23 
24 namespace llvm {
25 
26 class GCNSubtarget;
27 class LiveIntervals;
28 class LiveRegUnits;
29 class RegisterBank;
30 struct SGPRSpillBuilder;
31 
32 class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
33 private:
34   const GCNSubtarget &ST;
35   bool SpillSGPRToVGPR;
36   bool isWave32;
37   BitVector RegPressureIgnoredUnits;
38 
39   /// Sub reg indexes for getRegSplitParts.
40   /// First index represents subreg size from 1 to 16 DWORDs.
41   /// The inner vector is sorted by bit offset.
42   /// Provided a register can be fully split with given subregs,
43   /// all elements of the inner vector combined give a full lane mask.
44   static std::array<std::vector<int16_t>, 16> RegSplitParts;
45 
46   // Table representing sub reg of given width and offset.
47   // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512.
48   // Second index is 32 different dword offsets.
49   static std::array<std::array<uint16_t, 32>, 9> SubRegFromChannelTable;
50 
51   void reserveRegisterTuples(BitVector &, MCRegister Reg) const;
52 
53 public:
54   SIRegisterInfo(const GCNSubtarget &ST);
55 
56   struct SpilledReg {
57     Register VGPR;
58     int Lane = -1;
59 
60     SpilledReg() = default;
61     SpilledReg(Register R, int L) : VGPR(R), Lane(L) {}
62 
63     bool hasLane() { return Lane != -1; }
64     bool hasReg() { return VGPR != 0; }
65   };
66 
67   /// \returns the sub reg enum value for the given \p Channel
68   /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
69   static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
70 
71   bool spillSGPRToVGPR() const {
72     return SpillSGPRToVGPR;
73   }
74 
75   /// Return the largest available SGPR aligned to \p Align for the register
76   /// class \p RC.
77   MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF,
78                                      const unsigned Align,
79                                      const TargetRegisterClass *RC) const;
80 
81   /// Return the end register initially reserved for the scratch buffer in case
82   /// spilling is needed.
83   MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
84 
85   BitVector getReservedRegs(const MachineFunction &MF) const override;
86   bool isAsmClobberable(const MachineFunction &MF,
87                         MCRegister PhysReg) const override;
88 
89   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
90   const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
91   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
92                                        CallingConv::ID) const override;
93   const uint32_t *getNoPreservedMask() const override;
94 
95   // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling
96   // conventions are free to use certain VGPRs without saving and restoring any
97   // lanes (not even inactive ones).
98   static bool isChainScratchRegister(Register VGPR);
99 
100   // Stack access is very expensive. CSRs are also the high registers, and we
101   // want to minimize the number of used registers.
102   unsigned getCSRFirstUseCost() const override {
103     return 100;
104   }
105 
106   const TargetRegisterClass *
107   getLargestLegalSuperClass(const TargetRegisterClass *RC,
108                             const MachineFunction &MF) const override;
109 
110   Register getFrameRegister(const MachineFunction &MF) const override;
111 
112   bool hasBasePointer(const MachineFunction &MF) const;
113   Register getBaseRegister() const;
114 
115   bool shouldRealignStack(const MachineFunction &MF) const override;
116   bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
117 
118   bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
119   bool requiresFrameIndexReplacementScavenging(
120     const MachineFunction &MF) const override;
121   bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override;
122 
123   int64_t getScratchInstrOffset(const MachineInstr *MI) const;
124 
125   int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
126                                    int Idx) const override;
127 
128   bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
129 
130   Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
131                                         int64_t Offset) const override;
132 
133   void resolveFrameIndex(MachineInstr &MI, Register BaseReg,
134                          int64_t Offset) const override;
135 
136   bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg,
137                           int64_t Offset) const override;
138 
139   const TargetRegisterClass *getPointerRegClass(
140     const MachineFunction &MF, unsigned Kind = 0) const override;
141 
142   /// Returns a legal register class to copy a register in the specified class
143   /// to or from. If it is possible to copy the register directly without using
144   /// a cross register class copy, return the specified RC. Returns NULL if it
145   /// is not possible to copy between two registers of the specified class.
146   const TargetRegisterClass *
147   getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
148 
149   void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
150                                bool IsLoad, bool IsKill = true) const;
151 
152   /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a
153   /// free VGPR lane to spill.
154   bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
155                  SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
156                  bool OnlyToVGPR = false,
157                  bool SpillToPhysVGPRLane = false) const;
158 
159   bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
160                    SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
161                    bool OnlyToVGPR = false,
162                    bool SpillToPhysVGPRLane = false) const;
163 
164   bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
165                           MachineBasicBlock &RestoreMBB, Register SGPR,
166                           RegScavenger *RS) const;
167 
168   bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
169                            unsigned FIOperandNum,
170                            RegScavenger *RS) const override;
171 
172   bool eliminateSGPRToVGPRSpillFrameIndex(
173       MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
174       SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
175       bool SpillToPhysVGPRLane = false) const;
176 
177   StringRef getRegAsmName(MCRegister Reg) const override;
178 
179   // Pseudo regs are not allowed
180   unsigned getHWRegIndex(MCRegister Reg) const {
181     return getEncodingValue(Reg) & 0xff;
182   }
183 
184   LLVM_READONLY
185   const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) const;
186 
187   LLVM_READONLY
188   const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;
189 
190   LLVM_READONLY
191   const TargetRegisterClass *
192   getVectorSuperClassForBitWidth(unsigned BitWidth) const;
193 
194   LLVM_READONLY
195   static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
196 
197   /// \returns true if this class contains only SGPR registers
198   static bool isSGPRClass(const TargetRegisterClass *RC) {
199     return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC);
200   }
201 
202   /// \returns true if this class ID contains only SGPR registers
203   bool isSGPRClassID(unsigned RCID) const {
204     return isSGPRClass(getRegClass(RCID));
205   }
206 
207   bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
208 
209   /// \returns true if this class contains only VGPR registers
210   static bool isVGPRClass(const TargetRegisterClass *RC) {
211     return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC);
212   }
213 
214   /// \returns true if this class contains only AGPR registers
215   static bool isAGPRClass(const TargetRegisterClass *RC) {
216     return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC);
217   }
218 
219   /// \returns true only if this class contains both VGPR and AGPR registers
220   bool isVectorSuperClass(const TargetRegisterClass *RC) const {
221     return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC);
222   }
223 
224   /// \returns true only if this class contains both VGPR and SGPR registers
225   bool isVSSuperClass(const TargetRegisterClass *RC) const {
226     return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC);
227   }
228 
229   /// \returns true if this class contains VGPR registers.
230   static bool hasVGPRs(const TargetRegisterClass *RC) {
231     return RC->TSFlags & SIRCFlags::HasVGPR;
232   }
233 
234   /// \returns true if this class contains AGPR registers.
235   static bool hasAGPRs(const TargetRegisterClass *RC) {
236     return RC->TSFlags & SIRCFlags::HasAGPR;
237   }
238 
239   /// \returns true if this class contains SGPR registers.
240   static bool hasSGPRs(const TargetRegisterClass *RC) {
241     return RC->TSFlags & SIRCFlags::HasSGPR;
242   }
243 
244   /// \returns true if this class contains any vector registers.
245   static bool hasVectorRegisters(const TargetRegisterClass *RC) {
246     return hasVGPRs(RC) || hasAGPRs(RC);
247   }
248 
249   /// \returns A VGPR reg class with the same width as \p SRC
250   const TargetRegisterClass *
251   getEquivalentVGPRClass(const TargetRegisterClass *SRC) const;
252 
253   /// \returns An AGPR reg class with the same width as \p SRC
254   const TargetRegisterClass *
255   getEquivalentAGPRClass(const TargetRegisterClass *SRC) const;
256 
257   /// \returns A SGPR reg class with the same width as \p SRC
258   const TargetRegisterClass *
259   getEquivalentSGPRClass(const TargetRegisterClass *VRC) const;
260 
261   /// Returns a register class which is compatible with \p SuperRC, such that a
262   /// subregister exists with class \p SubRC with subregister index \p
263   /// SubIdx. If this is impossible (e.g., an unaligned subregister index within
264   /// a register tuple), return null.
265   const TargetRegisterClass *
266   getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
267                            const TargetRegisterClass *SubRC,
268                            unsigned SubIdx) const;
269 
270   bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
271                             unsigned DefSubReg,
272                             const TargetRegisterClass *SrcRC,
273                             unsigned SrcSubReg) const override;
274 
275   /// \returns True if operands defined with this operand type can accept
276   /// a literal constant (i.e. any 32-bit immediate).
277   bool opCanUseLiteralConstant(unsigned OpType) const;
278 
279   /// \returns True if operands defined with this operand type can accept
280   /// an inline constant. i.e. An integer value in the range (-16, 64) or
281   /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
282   bool opCanUseInlineConstant(unsigned OpType) const;
283 
284   MCRegister findUnusedRegister(const MachineRegisterInfo &MRI,
285                                 const TargetRegisterClass *RC,
286                                 const MachineFunction &MF,
287                                 bool ReserveHighestVGPR = false) const;
288 
289   const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
290                                                Register Reg) const;
291   const TargetRegisterClass *
292   getRegClassForOperandReg(const MachineRegisterInfo &MRI,
293                            const MachineOperand &MO) const;
294 
295   bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
296   bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
297   bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
298     return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
299   }
300 
301   // FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs
302   // (such as VCC) which hold a wave-wide vector of boolean values. Examining
303   // just the register class is not suffcient; it needs to be combined with a
304   // value type. The next predicate isUniformReg() does this correctly.
305   bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
306     return !isSGPRClass(RC);
307   }
308 
309   bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI,
310                     Register Reg) const override;
311 
312   ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
313                                      unsigned EltSize) const;
314 
315   bool shouldCoalesce(MachineInstr *MI,
316                       const TargetRegisterClass *SrcRC,
317                       unsigned SubReg,
318                       const TargetRegisterClass *DstRC,
319                       unsigned DstSubReg,
320                       const TargetRegisterClass *NewRC,
321                       LiveIntervals &LIS) const override;
322 
323   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
324                                MachineFunction &MF) const override;
325 
326   unsigned getRegPressureSetLimit(const MachineFunction &MF,
327                                   unsigned Idx) const override;
328 
329   const int *getRegUnitPressureSets(unsigned RegUnit) const override;
330 
331   MCRegister getReturnAddressReg(const MachineFunction &MF) const;
332 
333   const TargetRegisterClass *
334   getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const;
335 
336   const TargetRegisterClass *
337   getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const {
338     return getRegClassForSizeOnBank(Ty.getSizeInBits(), Bank);
339   }
340 
341   const TargetRegisterClass *
342   getConstrainedRegClassForOperand(const MachineOperand &MO,
343                                  const MachineRegisterInfo &MRI) const override;
344 
345   const TargetRegisterClass *getBoolRC() const {
346     return isWave32 ? &AMDGPU::SReg_32RegClass
347                     : &AMDGPU::SReg_64RegClass;
348   }
349 
350   const TargetRegisterClass *getWaveMaskRegClass() const {
351     return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
352                     : &AMDGPU::SReg_64_XEXECRegClass;
353   }
354 
355   // Return the appropriate register class to use for 64-bit VGPRs for the
356   // subtarget.
357   const TargetRegisterClass *getVGPR64Class() const;
358 
359   MCRegister getVCC() const;
360 
361   MCRegister getExec() const;
362 
363   const TargetRegisterClass *getRegClass(unsigned RCID) const;
364 
365   // Find reaching register definition
366   MachineInstr *findReachingDef(Register Reg, unsigned SubReg,
367                                 MachineInstr &Use,
368                                 MachineRegisterInfo &MRI,
369                                 LiveIntervals *LIS) const;
370 
371   const uint32_t *getAllVGPRRegMask() const;
372   const uint32_t *getAllAGPRRegMask() const;
373   const uint32_t *getAllVectorRegMask() const;
374   const uint32_t *getAllAllocatableSRegMask() const;
375 
376   // \returns number of 32 bit registers covered by a \p LM
377   static unsigned getNumCoveredRegs(LaneBitmask LM) {
378     // The assumption is that every lo16 subreg is an even bit and every hi16
379     // is an adjacent odd bit or vice versa.
380     uint64_t Mask = LM.getAsInteger();
381     uint64_t Even = Mask & 0xAAAAAAAAAAAAAAAAULL;
382     Mask = (Even >> 1) | Mask;
383     uint64_t Odd = Mask & 0x5555555555555555ULL;
384     return llvm::popcount(Odd);
385   }
386 
387   // \returns a DWORD offset of a \p SubReg
388   unsigned getChannelFromSubReg(unsigned SubReg) const {
389     return SubReg ? (getSubRegIdxOffset(SubReg) + 31) / 32 : 0;
390   }
391 
392   // \returns a DWORD size of a \p SubReg
393   unsigned getNumChannelsFromSubReg(unsigned SubReg) const {
394     return getNumCoveredRegs(getSubRegIndexLaneMask(SubReg));
395   }
396 
397   // For a given 16 bit \p Reg \returns a 32 bit register holding it.
398   // \returns \p Reg otherwise.
399   MCPhysReg get32BitRegister(MCPhysReg Reg) const;
400 
401   // Returns true if a given register class is properly aligned for
402   // the subtarget.
403   bool isProperlyAlignedRC(const TargetRegisterClass &RC) const;
404 
405   // Given \p RC returns corresponding aligned register class if required
406   // by the subtarget.
407   const TargetRegisterClass *
408   getProperlyAlignedRC(const TargetRegisterClass *RC) const;
409 
410   /// Return all SGPR128 which satisfy the waves per execution unit requirement
411   /// of the subtarget.
412   ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const;
413 
414   /// Return all SGPR64 which satisfy the waves per execution unit requirement
415   /// of the subtarget.
416   ArrayRef<MCPhysReg> getAllSGPR64(const MachineFunction &MF) const;
417 
418   /// Return all SGPR32 which satisfy the waves per execution unit requirement
419   /// of the subtarget.
420   ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const;
421 
422   // Insert spill or restore instructions.
423   // When lowering spill pseudos, the RegScavenger should be set.
424   // For creating spill instructions during frame lowering, where no scavenger
425   // is available, LiveUnits can be used.
426   void buildSpillLoadStore(MachineBasicBlock &MBB,
427                            MachineBasicBlock::iterator MI, const DebugLoc &DL,
428                            unsigned LoadStoreOp, int Index, Register ValueReg,
429                            bool ValueIsKill, MCRegister ScratchOffsetReg,
430                            int64_t InstrOffset, MachineMemOperand *MMO,
431                            RegScavenger *RS,
432                            LiveRegUnits *LiveUnits = nullptr) const;
433 
434   // Return alignment in register file of first register in a register tuple.
435   unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {
436     return (RC->TSFlags & SIRCFlags::RegTupleAlignUnitsMask) * 32;
437   }
438 
439   // Check if register class RC has required alignment.
440   bool isRegClassAligned(const TargetRegisterClass *RC,
441                          unsigned AlignNumBits) const {
442     assert(AlignNumBits != 0);
443     unsigned RCAlign = getRegClassAlignmentNumBits(RC);
444     return RCAlign == AlignNumBits ||
445            (RCAlign > AlignNumBits && (RCAlign % AlignNumBits) == 0);
446   }
447 
448   // Return alignment of a SubReg relative to start of a register in RC class.
449   // No check if the subreg is supported by the current RC is made.
450   unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
451                                      unsigned SubReg) const;
452 };
453 
454 namespace AMDGPU {
455 /// Get the size in bits of a register from the register class \p RC.
456 unsigned getRegBitWidth(const TargetRegisterClass &RC);
457 } // namespace AMDGPU
458 
459 } // End namespace llvm
460 
461 #endif
462