1 //===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition for SIRegisterInfo 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H 15 #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H 16 17 #include "llvm/ADT/BitVector.h" 18 19 #define GET_REGINFO_HEADER 20 #include "AMDGPUGenRegisterInfo.inc" 21 22 #include "SIDefines.h" 23 24 namespace llvm { 25 26 class GCNSubtarget; 27 class LiveIntervals; 28 class LiveRegUnits; 29 class MachineInstrBuilder; 30 class RegisterBank; 31 struct SGPRSpillBuilder; 32 33 /// Register allocation hint types. Helps eliminate unneeded COPY with True16 34 namespace AMDGPURI { 35 36 enum { Size16 = 1, Size32 = 2 }; 37 38 } // end namespace AMDGPURI 39 40 class SIRegisterInfo final : public AMDGPUGenRegisterInfo { 41 private: 42 const GCNSubtarget &ST; 43 bool SpillSGPRToVGPR; 44 bool isWave32; 45 BitVector RegPressureIgnoredUnits; 46 47 /// Sub reg indexes for getRegSplitParts. 48 /// First index represents subreg size from 1 to 32 Half DWORDS. 49 /// The inner vector is sorted by bit offset. 50 /// Provided a register can be fully split with given subregs, 51 /// all elements of the inner vector combined give a full lane mask. 52 static std::array<std::vector<int16_t>, 32> RegSplitParts; 53 54 // Table representing sub reg of given width and offset. 55 // First index is subreg size: 32, 64, 96, 128, 160, 192, 224, 256, 512. 56 // Second index is 32 different dword offsets. 57 static std::array<std::array<uint16_t, 32>, 9> SubRegFromChannelTable; 58 59 void reserveRegisterTuples(BitVector &, MCRegister Reg) const; 60 61 public: 62 SIRegisterInfo(const GCNSubtarget &ST); 63 64 struct SpilledReg { 65 Register VGPR; 66 int Lane = -1; 67 68 SpilledReg() = default; SpilledRegSpilledReg69 SpilledReg(Register R, int L) : VGPR(R), Lane(L) {} 70 hasLaneSpilledReg71 bool hasLane() { return Lane != -1; } hasRegSpilledReg72 bool hasReg() { return VGPR != 0; } 73 }; 74 75 /// \returns the sub reg enum value for the given \p Channel 76 /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) 77 static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1); 78 spillSGPRToVGPR()79 bool spillSGPRToVGPR() const { 80 return SpillSGPRToVGPR; 81 } 82 83 /// Return the largest available SGPR aligned to \p Align for the register 84 /// class \p RC. 85 MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, 86 const unsigned Align, 87 const TargetRegisterClass *RC) const; 88 89 /// Return the end register initially reserved for the scratch buffer in case 90 /// spilling is needed. 91 MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const; 92 93 /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number 94 /// of waves per execution unit required for the function \p MF. 95 std::pair<unsigned, unsigned> 96 getMaxNumVectorRegs(const MachineFunction &MF) const; 97 98 BitVector getReservedRegs(const MachineFunction &MF) const override; 99 bool isAsmClobberable(const MachineFunction &MF, 100 MCRegister PhysReg) const override; 101 102 const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; 103 const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const; 104 const uint32_t *getCallPreservedMask(const MachineFunction &MF, 105 CallingConv::ID) const override; 106 const uint32_t *getNoPreservedMask() const override; 107 108 // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling 109 // conventions are free to use certain VGPRs without saving and restoring any 110 // lanes (not even inactive ones). 111 static bool isChainScratchRegister(Register VGPR); 112 113 // Stack access is very expensive. CSRs are also the high registers, and we 114 // want to minimize the number of used registers. getCSRFirstUseCost()115 unsigned getCSRFirstUseCost() const override { 116 return 100; 117 } 118 119 // When building a block VGPR load, we only really transfer a subset of the 120 // registers in the block, based on a mask. Liveness analysis is not aware of 121 // the mask, so it might consider that any register in the block is available 122 // before the load and may therefore be scavenged. This is not ok for CSRs 123 // that are not clobbered, since the caller will expect them to be preserved. 124 // This method will add artificial implicit uses for those registers on the 125 // load instruction, so liveness analysis knows they're unavailable. 126 void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, 127 Register BlockReg) const; 128 129 const TargetRegisterClass * 130 getLargestLegalSuperClass(const TargetRegisterClass *RC, 131 const MachineFunction &MF) const override; 132 133 Register getFrameRegister(const MachineFunction &MF) const override; 134 135 bool hasBasePointer(const MachineFunction &MF) const; 136 Register getBaseRegister() const; 137 138 bool shouldRealignStack(const MachineFunction &MF) const override; 139 bool requiresRegisterScavenging(const MachineFunction &Fn) const override; 140 141 bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; 142 bool requiresFrameIndexReplacementScavenging( 143 const MachineFunction &MF) const override; 144 bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override; 145 146 int64_t getScratchInstrOffset(const MachineInstr *MI) const; 147 148 int64_t getFrameIndexInstrOffset(const MachineInstr *MI, 149 int Idx) const override; 150 151 bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; 152 153 Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, 154 int64_t Offset) const override; 155 156 void resolveFrameIndex(MachineInstr &MI, Register BaseReg, 157 int64_t Offset) const override; 158 159 bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, 160 int64_t Offset) const override; 161 162 const TargetRegisterClass *getPointerRegClass( 163 const MachineFunction &MF, unsigned Kind = 0) const override; 164 165 /// Returns a legal register class to copy a register in the specified class 166 /// to or from. If it is possible to copy the register directly without using 167 /// a cross register class copy, return the specified RC. Returns NULL if it 168 /// is not possible to copy between two registers of the specified class. 169 const TargetRegisterClass * 170 getCrossCopyRegClass(const TargetRegisterClass *RC) const override; 171 172 const TargetRegisterClass * getRegClassForBlockOp(const MachineFunction & MF)173 getRegClassForBlockOp(const MachineFunction &MF) const { 174 return &AMDGPU::VReg_1024RegClass; 175 } 176 177 void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, 178 bool IsLoad, bool IsKill = true) const; 179 180 /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a 181 /// free VGPR lane to spill. 182 bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, 183 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, 184 bool OnlyToVGPR = false, 185 bool SpillToPhysVGPRLane = false) const; 186 187 bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, 188 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, 189 bool OnlyToVGPR = false, 190 bool SpillToPhysVGPRLane = false) const; 191 192 bool spillEmergencySGPR(MachineBasicBlock::iterator MI, 193 MachineBasicBlock &RestoreMBB, Register SGPR, 194 RegScavenger *RS) const; 195 196 bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, 197 unsigned FIOperandNum, 198 RegScavenger *RS) const override; 199 200 bool eliminateSGPRToVGPRSpillFrameIndex( 201 MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, 202 SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr, 203 bool SpillToPhysVGPRLane = false) const; 204 205 StringRef getRegAsmName(MCRegister Reg) const override; 206 207 // Pseudo regs are not allowed getHWRegIndex(MCRegister Reg)208 unsigned getHWRegIndex(MCRegister Reg) const { 209 return getEncodingValue(Reg) & 0xff; 210 } 211 212 LLVM_READONLY 213 const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) const; 214 215 LLVM_READONLY 216 const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const; 217 218 LLVM_READONLY 219 const TargetRegisterClass * 220 getVectorSuperClassForBitWidth(unsigned BitWidth) const; 221 222 LLVM_READONLY 223 static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth); 224 225 /// \returns true if this class contains only SGPR registers isSGPRClass(const TargetRegisterClass * RC)226 static bool isSGPRClass(const TargetRegisterClass *RC) { 227 return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC); 228 } 229 230 /// \returns true if this class ID contains only SGPR registers isSGPRClassID(unsigned RCID)231 bool isSGPRClassID(unsigned RCID) const { 232 return isSGPRClass(getRegClass(RCID)); 233 } 234 235 bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const; isSGPRPhysReg(Register Reg)236 bool isSGPRPhysReg(Register Reg) const { 237 return isSGPRClass(getPhysRegBaseClass(Reg)); 238 } 239 isVGPRPhysReg(Register Reg)240 bool isVGPRPhysReg(Register Reg) const { 241 return isVGPRClass(getPhysRegBaseClass(Reg)); 242 } 243 244 /// \returns true if this class contains only VGPR registers isVGPRClass(const TargetRegisterClass * RC)245 static bool isVGPRClass(const TargetRegisterClass *RC) { 246 return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC); 247 } 248 249 /// \returns true if this class contains only AGPR registers isAGPRClass(const TargetRegisterClass * RC)250 static bool isAGPRClass(const TargetRegisterClass *RC) { 251 return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC); 252 } 253 254 /// \returns true only if this class contains both VGPR and AGPR registers isVectorSuperClass(const TargetRegisterClass * RC)255 bool isVectorSuperClass(const TargetRegisterClass *RC) const { 256 return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC); 257 } 258 259 /// \returns true only if this class contains both VGPR and SGPR registers isVSSuperClass(const TargetRegisterClass * RC)260 bool isVSSuperClass(const TargetRegisterClass *RC) const { 261 return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC); 262 } 263 264 /// \returns true if this class contains VGPR registers. hasVGPRs(const TargetRegisterClass * RC)265 static bool hasVGPRs(const TargetRegisterClass *RC) { 266 return RC->TSFlags & SIRCFlags::HasVGPR; 267 } 268 269 /// \returns true if this class contains AGPR registers. hasAGPRs(const TargetRegisterClass * RC)270 static bool hasAGPRs(const TargetRegisterClass *RC) { 271 return RC->TSFlags & SIRCFlags::HasAGPR; 272 } 273 274 /// \returns true if this class contains SGPR registers. hasSGPRs(const TargetRegisterClass * RC)275 static bool hasSGPRs(const TargetRegisterClass *RC) { 276 return RC->TSFlags & SIRCFlags::HasSGPR; 277 } 278 279 /// \returns true if this class contains any vector registers. hasVectorRegisters(const TargetRegisterClass * RC)280 static bool hasVectorRegisters(const TargetRegisterClass *RC) { 281 return hasVGPRs(RC) || hasAGPRs(RC); 282 } 283 284 /// \returns A VGPR reg class with the same width as \p SRC 285 const TargetRegisterClass * 286 getEquivalentVGPRClass(const TargetRegisterClass *SRC) const; 287 288 /// \returns An AGPR reg class with the same width as \p SRC 289 const TargetRegisterClass * 290 getEquivalentAGPRClass(const TargetRegisterClass *SRC) const; 291 292 /// \returns A SGPR reg class with the same width as \p SRC 293 const TargetRegisterClass * 294 getEquivalentSGPRClass(const TargetRegisterClass *VRC) const; 295 296 /// Returns a register class which is compatible with \p SuperRC, such that a 297 /// subregister exists with class \p SubRC with subregister index \p 298 /// SubIdx. If this is impossible (e.g., an unaligned subregister index within 299 /// a register tuple), return null. 300 const TargetRegisterClass * 301 getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, 302 const TargetRegisterClass *SubRC, 303 unsigned SubIdx) const; 304 305 /// \returns True if operands defined with this operand type can accept 306 /// a literal constant (i.e. any 32-bit immediate). 307 bool opCanUseLiteralConstant(unsigned OpType) const; 308 309 /// \returns True if operands defined with this operand type can accept 310 /// an inline constant. i.e. An integer value in the range (-16, 64) or 311 /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f. 312 bool opCanUseInlineConstant(unsigned OpType) const; 313 314 MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, 315 const TargetRegisterClass *RC, 316 const MachineFunction &MF, 317 bool ReserveHighestVGPR = false) const; 318 319 const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI, 320 Register Reg) const; 321 const TargetRegisterClass * 322 getRegClassForOperandReg(const MachineRegisterInfo &MRI, 323 const MachineOperand &MO) const; 324 325 bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const; 326 bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const; isVectorRegister(const MachineRegisterInfo & MRI,Register Reg)327 bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const { 328 return isVGPR(MRI, Reg) || isAGPR(MRI, Reg); 329 } 330 331 // FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs 332 // (such as VCC) which hold a wave-wide vector of boolean values. Examining 333 // just the register class is not suffcient; it needs to be combined with a 334 // value type. The next predicate isUniformReg() does this correctly. isDivergentRegClass(const TargetRegisterClass * RC)335 bool isDivergentRegClass(const TargetRegisterClass *RC) const override { 336 return !isSGPRClass(RC); 337 } 338 339 bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, 340 Register Reg) const override; 341 342 ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC, 343 unsigned EltSize) const; 344 345 bool shouldCoalesce(MachineInstr *MI, 346 const TargetRegisterClass *SrcRC, 347 unsigned SubReg, 348 const TargetRegisterClass *DstRC, 349 unsigned DstSubReg, 350 const TargetRegisterClass *NewRC, 351 LiveIntervals &LIS) const override; 352 353 unsigned getRegPressureLimit(const TargetRegisterClass *RC, 354 MachineFunction &MF) const override; 355 356 unsigned getRegPressureSetLimit(const MachineFunction &MF, 357 unsigned Idx) const override; 358 359 bool getRegAllocationHints(Register VirtReg, ArrayRef<MCPhysReg> Order, 360 SmallVectorImpl<MCPhysReg> &Hints, 361 const MachineFunction &MF, const VirtRegMap *VRM, 362 const LiveRegMatrix *Matrix) const override; 363 364 const int *getRegUnitPressureSets(unsigned RegUnit) const override; 365 366 MCRegister getReturnAddressReg(const MachineFunction &MF) const; 367 368 const TargetRegisterClass * 369 getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const; 370 371 const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty,const RegisterBank & Bank)372 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const { 373 return getRegClassForSizeOnBank(Ty.getSizeInBits(), Bank); 374 } 375 376 const TargetRegisterClass * 377 getConstrainedRegClassForOperand(const MachineOperand &MO, 378 const MachineRegisterInfo &MRI) const override; 379 getBoolRC()380 const TargetRegisterClass *getBoolRC() const { 381 return isWave32 ? &AMDGPU::SReg_32RegClass 382 : &AMDGPU::SReg_64RegClass; 383 } 384 getWaveMaskRegClass()385 const TargetRegisterClass *getWaveMaskRegClass() const { 386 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass 387 : &AMDGPU::SReg_64_XEXECRegClass; 388 } 389 390 // Return the appropriate register class to use for 64-bit VGPRs for the 391 // subtarget. 392 const TargetRegisterClass *getVGPR64Class() const; 393 394 MCRegister getVCC() const; 395 396 MCRegister getExec() const; 397 398 const TargetRegisterClass *getRegClass(unsigned RCID) const; 399 400 // Find reaching register definition 401 MachineInstr *findReachingDef(Register Reg, unsigned SubReg, 402 MachineInstr &Use, 403 MachineRegisterInfo &MRI, 404 LiveIntervals *LIS) const; 405 406 const uint32_t *getAllVGPRRegMask() const; 407 const uint32_t *getAllAGPRRegMask() const; 408 const uint32_t *getAllVectorRegMask() const; 409 const uint32_t *getAllAllocatableSRegMask() const; 410 411 // \returns number of 32 bit registers covered by a \p LM getNumCoveredRegs(LaneBitmask LM)412 static unsigned getNumCoveredRegs(LaneBitmask LM) { 413 // The assumption is that every lo16 subreg is an even bit and every hi16 414 // is an adjacent odd bit or vice versa. 415 uint64_t Mask = LM.getAsInteger(); 416 uint64_t Even = Mask & 0xAAAAAAAAAAAAAAAAULL; 417 Mask = (Even >> 1) | Mask; 418 uint64_t Odd = Mask & 0x5555555555555555ULL; 419 return llvm::popcount(Odd); 420 } 421 422 // \returns a DWORD offset of a \p SubReg getChannelFromSubReg(unsigned SubReg)423 unsigned getChannelFromSubReg(unsigned SubReg) const { 424 return SubReg ? (getSubRegIdxOffset(SubReg) + 31) / 32 : 0; 425 } 426 427 // \returns a DWORD size of a \p SubReg getNumChannelsFromSubReg(unsigned SubReg)428 unsigned getNumChannelsFromSubReg(unsigned SubReg) const { 429 return getNumCoveredRegs(getSubRegIndexLaneMask(SubReg)); 430 } 431 432 // For a given 16 bit \p Reg \returns a 32 bit register holding it. 433 // \returns \p Reg otherwise. 434 MCPhysReg get32BitRegister(MCPhysReg Reg) const; 435 436 // Returns true if a given register class is properly aligned for 437 // the subtarget. 438 bool isProperlyAlignedRC(const TargetRegisterClass &RC) const; 439 440 // Given \p RC returns corresponding aligned register class if required 441 // by the subtarget. 442 const TargetRegisterClass * 443 getProperlyAlignedRC(const TargetRegisterClass *RC) const; 444 445 /// Return all SGPR128 which satisfy the waves per execution unit requirement 446 /// of the subtarget. 447 ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF) const; 448 449 /// Return all SGPR64 which satisfy the waves per execution unit requirement 450 /// of the subtarget. 451 ArrayRef<MCPhysReg> getAllSGPR64(const MachineFunction &MF) const; 452 453 /// Return all SGPR32 which satisfy the waves per execution unit requirement 454 /// of the subtarget. 455 ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const; 456 457 // Insert spill or restore instructions. 458 // When lowering spill pseudos, the RegScavenger should be set. 459 // For creating spill instructions during frame lowering, where no scavenger 460 // is available, LiveUnits can be used. 461 void buildSpillLoadStore(MachineBasicBlock &MBB, 462 MachineBasicBlock::iterator MI, const DebugLoc &DL, 463 unsigned LoadStoreOp, int Index, Register ValueReg, 464 bool ValueIsKill, MCRegister ScratchOffsetReg, 465 int64_t InstrOffset, MachineMemOperand *MMO, 466 RegScavenger *RS, 467 LiveRegUnits *LiveUnits = nullptr) const; 468 469 // Return alignment in register file of first register in a register tuple. getRegClassAlignmentNumBits(const TargetRegisterClass * RC)470 unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const { 471 return (RC->TSFlags & SIRCFlags::RegTupleAlignUnitsMask) * 32; 472 } 473 474 // Check if register class RC has required alignment. isRegClassAligned(const TargetRegisterClass * RC,unsigned AlignNumBits)475 bool isRegClassAligned(const TargetRegisterClass *RC, 476 unsigned AlignNumBits) const { 477 assert(AlignNumBits != 0); 478 unsigned RCAlign = getRegClassAlignmentNumBits(RC); 479 return RCAlign == AlignNumBits || 480 (RCAlign > AlignNumBits && (RCAlign % AlignNumBits) == 0); 481 } 482 483 // Return alignment of a SubReg relative to start of a register in RC class. 484 // No check if the subreg is supported by the current RC is made. 485 unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, 486 unsigned SubReg) const; 487 488 // \returns a number of registers of a given \p RC used in a function. 489 // Does not go inside function calls. 490 unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI, 491 const TargetRegisterClass &RC) const; 492 getVRegFlagValue(StringRef Name)493 std::optional<uint8_t> getVRegFlagValue(StringRef Name) const override { 494 return Name == "WWM_REG" ? AMDGPU::VirtRegFlag::WWM_REG 495 : std::optional<uint8_t>{}; 496 } 497 498 SmallVector<StringLiteral> 499 getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override; 500 }; 501 502 namespace AMDGPU { 503 /// Get the size in bits of a register from the register class \p RC. 504 unsigned getRegBitWidth(const TargetRegisterClass &RC); 505 } // namespace AMDGPU 506 507 } // End namespace llvm 508 509 #endif 510