1 //===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition for SIRegisterInfo 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H 16 17 #include "llvm/ADT/BitVector.h" 18 19 #define GET_REGINFO_HEADER 20 #include "AMDGPUGenRegisterInfo.inc" 21 22 #include "SIDefines.h" 23 24 namespace llvm { 25 26 class GCNSubtarget; 27 class LiveIntervals; 28 class LiveRegUnits; 29 class RegisterBank; 30 struct SGPRSpillBuilder; 31 32 class SIRegisterInfo final : public AMDGPUGenRegisterInfo { 33 private: 34 const GCNSubtarget &ST; 35 bool SpillSGPRToVGPR; 36 bool isWave32; 37 BitVector RegPressureIgnoredUnits; 38 39 /// Sub reg indexes for getRegSplitParts. 40 /// First index represents subreg size from 1 to 16 DWORDs. 41 /// The inner vector is sorted by bit offset. 42 /// Provided a register can be fully split with given subregs, 43 /// all elements of the inner vector combined give a full lane mask. 44 static std::array<std::vector<int16_t>, 16> RegSplitParts; 45 46 // Table representing sub reg of given width and offset. 47 // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512. 48 // Second index is 32 different dword offsets. 49 static std::array<std::array<uint16_t, 32>, 9> SubRegFromChannelTable; 50 51 void reserveRegisterTuples(BitVector &, MCRegister Reg) const; 52 53 public: 54 SIRegisterInfo(const GCNSubtarget &ST); 55 56 struct SpilledReg { 57 Register VGPR; 58 int Lane = -1; 59 60 SpilledReg() = default; 61 SpilledReg(Register R, int L) : VGPR(R), Lane(L) {} 62 63 bool hasLane() { return Lane != -1; } 64 bool hasReg() { return VGPR != 0; } 65 }; 66 67 /// \returns the sub reg enum value for the given \p Channel 68 /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) 69 static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1); 70 71 bool spillSGPRToVGPR() const { 72 return SpillSGPRToVGPR; 73 } 74 75 /// Return the largest available SGPR aligned to \p Align for the register 76 /// class \p RC. 77 MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, 78 const unsigned Align, 79 const TargetRegisterClass *RC) const; 80 81 /// Return the end register initially reserved for the scratch buffer in case 82 /// spilling is needed. 83 MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const; 84 85 BitVector getReservedRegs(const MachineFunction &MF) const override; 86 bool isAsmClobberable(const MachineFunction &MF, 87 MCRegister PhysReg) const override; 88 89 const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; 90 const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const; 91 const uint32_t *getCallPreservedMask(const MachineFunction &MF, 92 CallingConv::ID) const override; 93 const uint32_t *getNoPreservedMask() const override; 94 95 // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling 96 // conventions are free to use certain VGPRs without saving and restoring any 97 // lanes (not even inactive ones). 98 static bool isChainScratchRegister(Register VGPR); 99 100 // Stack access is very expensive. CSRs are also the high registers, and we 101 // want to minimize the number of used registers. 102 unsigned getCSRFirstUseCost() const override { 103 return 100; 104 } 105 106 const TargetRegisterClass * 107 getLargestLegalSuperClass(const TargetRegisterClass *RC, 108 const MachineFunction &MF) const override; 109 110 Register getFrameRegister(const MachineFunction &MF) const override; 111 112 bool hasBasePointer(const MachineFunction &MF) const; 113 Register getBaseRegister() const; 114 115 bool shouldRealignStack(const MachineFunction &MF) const override; 116 bool requiresRegisterScavenging(const MachineFunction &Fn) const override; 117 118 bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; 119 bool requiresFrameIndexReplacementScavenging( 120 const MachineFunction &MF) const override; 121 bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override; 122 123 int64_t getScratchInstrOffset(const MachineInstr *MI) const; 124 125 int64_t getFrameIndexInstrOffset(const MachineInstr *MI, 126 int Idx) const override; 127 128 bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; 129 130 Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, 131 int64_t Offset) const override; 132 133 void resolveFrameIndex(MachineInstr &MI, Register BaseReg, 134 int64_t Offset) const override; 135 136 bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, 137 int64_t Offset) const override; 138 139 const TargetRegisterClass *getPointerRegClass( 140 const MachineFunction &MF, unsigned Kind = 0) const override; 141 142 /// Returns a legal register class to copy a register in the specified class 143 /// to or from. If it is possible to copy the register directly without using 144 /// a cross register class copy, return the specified RC. Returns NULL if it 145 /// is not possible to copy between two registers of the specified class. 146 const TargetRegisterClass * 147 getCrossCopyRegClass(const TargetRegisterClass *RC) const override; 148 149 void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, 150 bool IsLoad, bool IsKill = true) const; 151 152 /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a 153 /// free VGPR lane to spill. 154 bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, 155 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, 156 bool OnlyToVGPR = false, 157 bool SpillToPhysVGPRLane = false) const; 158 159 bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, 160 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, 161 bool OnlyToVGPR = false, 162 bool SpillToPhysVGPRLane = false) const; 163 164 bool spillEmergencySGPR(MachineBasicBlock::iterator MI, 165 MachineBasicBlock &RestoreMBB, Register SGPR, 166 RegScavenger *RS) const; 167 168 bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, 169 unsigned FIOperandNum, 170 RegScavenger *RS) const override; 171 172 bool eliminateSGPRToVGPRSpillFrameIndex( 173 MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, 174 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, 175 bool SpillToPhysVGPRLane = false) const; 176 177 StringRef getRegAsmName(MCRegister Reg) const override; 178 179 // Pseudo regs are not allowed 180 unsigned getHWRegIndex(MCRegister Reg) const { 181 return getEncodingValue(Reg) & 0xff; 182 } 183 184 LLVM_READONLY 185 const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) const; 186 187 LLVM_READONLY 188 const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const; 189 190 LLVM_READONLY 191 const TargetRegisterClass * 192 getVectorSuperClassForBitWidth(unsigned BitWidth) const; 193 194 LLVM_READONLY 195 static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth); 196 197 /// \returns true if this class contains only SGPR registers 198 static bool isSGPRClass(const TargetRegisterClass *RC) { 199 return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC); 200 } 201 202 /// \returns true if this class ID contains only SGPR registers 203 bool isSGPRClassID(unsigned RCID) const { 204 return isSGPRClass(getRegClass(RCID)); 205 } 206 207 bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const; 208 209 /// \returns true if this class contains only VGPR registers 210 static bool isVGPRClass(const TargetRegisterClass *RC) { 211 return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC); 212 } 213 214 /// \returns true if this class contains only AGPR registers 215 static bool isAGPRClass(const TargetRegisterClass *RC) { 216 return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC); 217 } 218 219 /// \returns true only if this class contains both VGPR and AGPR registers 220 bool isVectorSuperClass(const TargetRegisterClass *RC) const { 221 return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC); 222 } 223 224 /// \returns true only if this class contains both VGPR and SGPR registers 225 bool isVSSuperClass(const TargetRegisterClass *RC) const { 226 return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC); 227 } 228 229 /// \returns true if this class contains VGPR registers. 230 static bool hasVGPRs(const TargetRegisterClass *RC) { 231 return RC->TSFlags & SIRCFlags::HasVGPR; 232 } 233 234 /// \returns true if this class contains AGPR registers. 235 static bool hasAGPRs(const TargetRegisterClass *RC) { 236 return RC->TSFlags & SIRCFlags::HasAGPR; 237 } 238 239 /// \returns true if this class contains SGPR registers. 240 static bool hasSGPRs(const TargetRegisterClass *RC) { 241 return RC->TSFlags & SIRCFlags::HasSGPR; 242 } 243 244 /// \returns true if this class contains any vector registers. 245 static bool hasVectorRegisters(const TargetRegisterClass *RC) { 246 return hasVGPRs(RC) || hasAGPRs(RC); 247 } 248 249 /// \returns A VGPR reg class with the same width as \p SRC 250 const TargetRegisterClass * 251 getEquivalentVGPRClass(const TargetRegisterClass *SRC) const; 252 253 /// \returns An AGPR reg class with the same width as \p SRC 254 const TargetRegisterClass * 255 getEquivalentAGPRClass(const TargetRegisterClass *SRC) const; 256 257 /// \returns A SGPR reg class with the same width as \p SRC 258 const TargetRegisterClass * 259 getEquivalentSGPRClass(const TargetRegisterClass *VRC) const; 260 261 /// Returns a register class which is compatible with \p SuperRC, such that a 262 /// subregister exists with class \p SubRC with subregister index \p 263 /// SubIdx. If this is impossible (e.g., an unaligned subregister index within 264 /// a register tuple), return null. 265 const TargetRegisterClass * 266 getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, 267 const TargetRegisterClass *SubRC, 268 unsigned SubIdx) const; 269 270 bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, 271 unsigned DefSubReg, 272 const TargetRegisterClass *SrcRC, 273 unsigned SrcSubReg) const override; 274 275 /// \returns True if operands defined with this operand type can accept 276 /// a literal constant (i.e. any 32-bit immediate). 277 bool opCanUseLiteralConstant(unsigned OpType) const; 278 279 /// \returns True if operands defined with this operand type can accept 280 /// an inline constant. i.e. An integer value in the range (-16, 64) or 281 /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f. 282 bool opCanUseInlineConstant(unsigned OpType) const; 283 284 MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, 285 const TargetRegisterClass *RC, 286 const MachineFunction &MF, 287 bool ReserveHighestVGPR = false) const; 288 289 const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI, 290 Register Reg) const; 291 const TargetRegisterClass * 292 getRegClassForOperandReg(const MachineRegisterInfo &MRI, 293 const MachineOperand &MO) const; 294 295 bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const; 296 bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const; 297 bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const { 298 return isVGPR(MRI, Reg) || isAGPR(MRI, Reg); 299 } 300 301 // FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs 302 // (such as VCC) which hold a wave-wide vector of boolean values. Examining 303 // just the register class is not suffcient; it needs to be combined with a 304 // value type. The next predicate isUniformReg() does this correctly. 305 bool isDivergentRegClass(const TargetRegisterClass *RC) const override { 306 return !isSGPRClass(RC); 307 } 308 309 bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, 310 Register Reg) const override; 311 312 ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC, 313 unsigned EltSize) const; 314 315 bool shouldCoalesce(MachineInstr *MI, 316 const TargetRegisterClass *SrcRC, 317 unsigned SubReg, 318 const TargetRegisterClass *DstRC, 319 unsigned DstSubReg, 320 const TargetRegisterClass *NewRC, 321 LiveIntervals &LIS) const override; 322 323 unsigned getRegPressureLimit(const TargetRegisterClass *RC, 324 MachineFunction &MF) const override; 325 326 unsigned getRegPressureSetLimit(const MachineFunction &MF, 327 unsigned Idx) const override; 328 329 const int *getRegUnitPressureSets(unsigned RegUnit) const override; 330 331 MCRegister getReturnAddressReg(const MachineFunction &MF) const; 332 333 const TargetRegisterClass * 334 getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const; 335 336 const TargetRegisterClass * 337 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const { 338 return getRegClassForSizeOnBank(Ty.getSizeInBits(), Bank); 339 } 340 341 const TargetRegisterClass * 342 getConstrainedRegClassForOperand(const MachineOperand &MO, 343 const MachineRegisterInfo &MRI) const override; 344 345 const TargetRegisterClass *getBoolRC() const { 346 return isWave32 ? &AMDGPU::SReg_32RegClass 347 : &AMDGPU::SReg_64RegClass; 348 } 349 350 const TargetRegisterClass *getWaveMaskRegClass() const { 351 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass 352 : &AMDGPU::SReg_64_XEXECRegClass; 353 } 354 355 // Return the appropriate register class to use for 64-bit VGPRs for the 356 // subtarget. 357 const TargetRegisterClass *getVGPR64Class() const; 358 359 MCRegister getVCC() const; 360 361 MCRegister getExec() const; 362 363 const TargetRegisterClass *getRegClass(unsigned RCID) const; 364 365 // Find reaching register definition 366 MachineInstr *findReachingDef(Register Reg, unsigned SubReg, 367 MachineInstr &Use, 368 MachineRegisterInfo &MRI, 369 LiveIntervals *LIS) const; 370 371 const uint32_t *getAllVGPRRegMask() const; 372 const uint32_t *getAllAGPRRegMask() const; 373 const uint32_t *getAllVectorRegMask() const; 374 const uint32_t *getAllAllocatableSRegMask() const; 375 376 // \returns number of 32 bit registers covered by a \p LM 377 static unsigned getNumCoveredRegs(LaneBitmask LM) { 378 // The assumption is that every lo16 subreg is an even bit and every hi16 379 // is an adjacent odd bit or vice versa. 380 uint64_t Mask = LM.getAsInteger(); 381 uint64_t Even = Mask & 0xAAAAAAAAAAAAAAAAULL; 382 Mask = (Even >> 1) | Mask; 383 uint64_t Odd = Mask & 0x5555555555555555ULL; 384 return llvm::popcount(Odd); 385 } 386 387 // \returns a DWORD offset of a \p SubReg 388 unsigned getChannelFromSubReg(unsigned SubReg) const { 389 return SubReg ? (getSubRegIdxOffset(SubReg) + 31) / 32 : 0; 390 } 391 392 // \returns a DWORD size of a \p SubReg 393 unsigned getNumChannelsFromSubReg(unsigned SubReg) const { 394 return getNumCoveredRegs(getSubRegIndexLaneMask(SubReg)); 395 } 396 397 // For a given 16 bit \p Reg \returns a 32 bit register holding it. 398 // \returns \p Reg otherwise. 399 MCPhysReg get32BitRegister(MCPhysReg Reg) const; 400 401 // Returns true if a given register class is properly aligned for 402 // the subtarget. 403 bool isProperlyAlignedRC(const TargetRegisterClass &RC) const; 404 405 // Given \p RC returns corresponding aligned register class if required 406 // by the subtarget. 407 const TargetRegisterClass * 408 getProperlyAlignedRC(const TargetRegisterClass *RC) const; 409 410 /// Return all SGPR128 which satisfy the waves per execution unit requirement 411 /// of the subtarget. 412 ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const; 413 414 /// Return all SGPR64 which satisfy the waves per execution unit requirement 415 /// of the subtarget. 416 ArrayRef<MCPhysReg> getAllSGPR64(const MachineFunction &MF) const; 417 418 /// Return all SGPR32 which satisfy the waves per execution unit requirement 419 /// of the subtarget. 420 ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const; 421 422 // Insert spill or restore instructions. 423 // When lowering spill pseudos, the RegScavenger should be set. 424 // For creating spill instructions during frame lowering, where no scavenger 425 // is available, LiveUnits can be used. 426 void buildSpillLoadStore(MachineBasicBlock &MBB, 427 MachineBasicBlock::iterator MI, const DebugLoc &DL, 428 unsigned LoadStoreOp, int Index, Register ValueReg, 429 bool ValueIsKill, MCRegister ScratchOffsetReg, 430 int64_t InstrOffset, MachineMemOperand *MMO, 431 RegScavenger *RS, 432 LiveRegUnits *LiveUnits = nullptr) const; 433 434 // Return alignment in register file of first register in a register tuple. 435 unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const { 436 return (RC->TSFlags & SIRCFlags::RegTupleAlignUnitsMask) * 32; 437 } 438 439 // Check if register class RC has required alignment. 440 bool isRegClassAligned(const TargetRegisterClass *RC, 441 unsigned AlignNumBits) const { 442 assert(AlignNumBits != 0); 443 unsigned RCAlign = getRegClassAlignmentNumBits(RC); 444 return RCAlign == AlignNumBits || 445 (RCAlign > AlignNumBits && (RCAlign % AlignNumBits) == 0); 446 } 447 448 // Return alignment of a SubReg relative to start of a register in RC class. 449 // No check if the subreg is supported by the current RC is made. 450 unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, 451 unsigned SubReg) const; 452 }; 453 454 namespace AMDGPU { 455 /// Get the size in bits of a register from the register class \p RC. 456 unsigned getRegBitWidth(const TargetRegisterClass &RC); 457 } // namespace AMDGPU 458 459 } // End namespace llvm 460 461 #endif 462