1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Pass to pre-allocated WWM registers 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "GCNSubtarget.h" 16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 17 #include "SIMachineFunctionInfo.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/CodeGen/LiveIntervals.h" 20 #include "llvm/CodeGen/LiveRegMatrix.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/InitializePasses.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "si-pre-allocate-wwm-regs" 27 28 namespace { 29 30 class SIPreAllocateWWMRegs : public MachineFunctionPass { 31 private: 32 const SIInstrInfo *TII; 33 const SIRegisterInfo *TRI; 34 MachineRegisterInfo *MRI; 35 LiveIntervals *LIS; 36 LiveRegMatrix *Matrix; 37 VirtRegMap *VRM; 38 RegisterClassInfo RegClassInfo; 39 40 std::vector<unsigned> RegsToRewrite; 41 #ifndef NDEBUG 42 void printWWMInfo(const MachineInstr &MI); 43 #endif 44 45 public: 46 static char ID; 47 48 SIPreAllocateWWMRegs() : MachineFunctionPass(ID) { 49 initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry()); 50 } 51 52 bool runOnMachineFunction(MachineFunction &MF) override; 53 54 void getAnalysisUsage(AnalysisUsage &AU) const override { 55 AU.addRequired<LiveIntervals>(); 56 AU.addPreserved<LiveIntervals>(); 57 AU.addRequired<VirtRegMap>(); 58 AU.addRequired<LiveRegMatrix>(); 59 AU.addPreserved<SlotIndexes>(); 60 AU.setPreservesCFG(); 61 MachineFunctionPass::getAnalysisUsage(AU); 62 } 63 64 private: 65 bool processDef(MachineOperand &MO); 66 void rewriteRegs(MachineFunction &MF); 67 }; 68 69 } // End anonymous namespace. 70 71 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE, 72 "SI Pre-allocate WWM Registers", false, false) 73 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 74 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 75 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) 76 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE, 77 "SI Pre-allocate WWM Registers", false, false) 78 79 char SIPreAllocateWWMRegs::ID = 0; 80 81 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID; 82 83 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() { 84 return new SIPreAllocateWWMRegs(); 85 } 86 87 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { 88 if (!MO.isReg()) 89 return false; 90 91 Register Reg = MO.getReg(); 92 if (Reg.isPhysical()) 93 return false; 94 95 if (!TRI->isVGPR(*MRI, Reg)) 96 return false; 97 98 if (VRM->hasPhys(Reg)) 99 return false; 100 101 LiveInterval &LI = LIS->getInterval(Reg); 102 103 for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) { 104 if (!MRI->isPhysRegUsed(PhysReg) && 105 Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) { 106 Matrix->assign(LI, PhysReg); 107 assert(PhysReg != 0); 108 RegsToRewrite.push_back(Reg); 109 return true; 110 } 111 } 112 113 llvm_unreachable("physreg not found for WWM expression"); 114 return false; 115 } 116 117 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { 118 for (MachineBasicBlock &MBB : MF) { 119 for (MachineInstr &MI : MBB) { 120 for (MachineOperand &MO : MI.operands()) { 121 if (!MO.isReg()) 122 continue; 123 124 const Register VirtReg = MO.getReg(); 125 if (VirtReg.isPhysical()) 126 continue; 127 128 if (!VRM->hasPhys(VirtReg)) 129 continue; 130 131 Register PhysReg = VRM->getPhys(VirtReg); 132 const unsigned SubReg = MO.getSubReg(); 133 if (SubReg != 0) { 134 PhysReg = TRI->getSubReg(PhysReg, SubReg); 135 MO.setSubReg(0); 136 } 137 138 MO.setReg(PhysReg); 139 MO.setIsRenamable(false); 140 } 141 } 142 } 143 144 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 145 MachineFrameInfo &FrameInfo = MF.getFrameInfo(); 146 147 for (unsigned Reg : RegsToRewrite) { 148 LIS->removeInterval(Reg); 149 150 const Register PhysReg = VRM->getPhys(Reg); 151 assert(PhysReg != 0); 152 153 // Check if PhysReg is already reserved 154 if (!MFI->WWMReservedRegs.count(PhysReg)) { 155 Optional<int> FI; 156 if (!MFI->isEntryFunction()) { 157 // Create a stack object for a possible spill in the function prologue. 158 // Note: Non-CSR VGPR also need this as we may overwrite inactive lanes. 159 const TargetRegisterClass *RC = TRI->getPhysRegClass(PhysReg); 160 FI = FrameInfo.CreateSpillStackObject(TRI->getSpillSize(*RC), 161 TRI->getSpillAlign(*RC)); 162 } 163 MFI->reserveWWMRegister(PhysReg, FI); 164 } 165 } 166 167 RegsToRewrite.clear(); 168 169 // Update the set of reserved registers to include WWM ones. 170 MRI->freezeReservedRegs(MF); 171 } 172 173 #ifndef NDEBUG 174 LLVM_DUMP_METHOD void 175 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) { 176 177 unsigned Opc = MI.getOpcode(); 178 179 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) { 180 dbgs() << "Entering "; 181 } else { 182 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM); 183 dbgs() << "Exiting "; 184 } 185 186 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) { 187 dbgs() << "Strict WWM "; 188 } else { 189 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM); 190 dbgs() << "Strict WQM "; 191 } 192 193 dbgs() << "region: " << MI; 194 } 195 196 #endif 197 198 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { 199 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n"); 200 201 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 202 203 TII = ST.getInstrInfo(); 204 TRI = &TII->getRegisterInfo(); 205 MRI = &MF.getRegInfo(); 206 207 LIS = &getAnalysis<LiveIntervals>(); 208 Matrix = &getAnalysis<LiveRegMatrix>(); 209 VRM = &getAnalysis<VirtRegMap>(); 210 211 RegClassInfo.runOnMachineFunction(MF); 212 213 bool RegsAssigned = false; 214 215 // We use a reverse post-order traversal of the control-flow graph to 216 // guarantee that we visit definitions in dominance order. Since WWM 217 // expressions are guaranteed to never involve phi nodes, and we can only 218 // escape WWM through the special WWM instruction, this means that this is a 219 // perfect elimination order, so we can never do any better. 220 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); 221 222 for (MachineBasicBlock *MBB : RPOT) { 223 bool InWWM = false; 224 for (MachineInstr &MI : *MBB) { 225 if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 || 226 MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64) 227 RegsAssigned |= processDef(MI.getOperand(0)); 228 229 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM || 230 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) { 231 LLVM_DEBUG(printWWMInfo(MI)); 232 InWWM = true; 233 continue; 234 } 235 236 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM || 237 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) { 238 LLVM_DEBUG(printWWMInfo(MI)); 239 InWWM = false; 240 } 241 242 if (!InWWM) 243 continue; 244 245 LLVM_DEBUG(dbgs() << "Processing " << MI); 246 247 for (MachineOperand &DefOpnd : MI.defs()) { 248 RegsAssigned |= processDef(DefOpnd); 249 } 250 } 251 } 252 253 if (!RegsAssigned) 254 return false; 255 256 rewriteRegs(MF); 257 return true; 258 } 259