1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Pass to pre-allocated WWM registers 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "GCNSubtarget.h" 16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 17 #include "SIMachineFunctionInfo.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/CodeGen/LiveIntervals.h" 20 #include "llvm/CodeGen/LiveRegMatrix.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/RegisterClassInfo.h" 24 #include "llvm/CodeGen/VirtRegMap.h" 25 #include "llvm/InitializePasses.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "si-pre-allocate-wwm-regs" 30 31 namespace { 32 33 class SIPreAllocateWWMRegs : public MachineFunctionPass { 34 private: 35 const SIInstrInfo *TII; 36 const SIRegisterInfo *TRI; 37 MachineRegisterInfo *MRI; 38 LiveIntervals *LIS; 39 LiveRegMatrix *Matrix; 40 VirtRegMap *VRM; 41 RegisterClassInfo RegClassInfo; 42 43 std::vector<unsigned> RegsToRewrite; 44 #ifndef NDEBUG 45 void printWWMInfo(const MachineInstr &MI); 46 #endif 47 48 public: 49 static char ID; 50 51 SIPreAllocateWWMRegs() : MachineFunctionPass(ID) { 52 initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry()); 53 } 54 55 bool runOnMachineFunction(MachineFunction &MF) override; 56 57 void getAnalysisUsage(AnalysisUsage &AU) const override { 58 AU.addRequired<LiveIntervals>(); 59 AU.addPreserved<LiveIntervals>(); 60 AU.addRequired<VirtRegMap>(); 61 AU.addRequired<LiveRegMatrix>(); 62 AU.addPreserved<SlotIndexes>(); 63 AU.setPreservesCFG(); 64 MachineFunctionPass::getAnalysisUsage(AU); 65 } 66 67 private: 68 bool processDef(MachineOperand &MO); 69 void rewriteRegs(MachineFunction &MF); 70 }; 71 72 } // End anonymous namespace. 73 74 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE, 75 "SI Pre-allocate WWM Registers", false, false) 76 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 77 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 78 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) 79 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE, 80 "SI Pre-allocate WWM Registers", false, false) 81 82 char SIPreAllocateWWMRegs::ID = 0; 83 84 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID; 85 86 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() { 87 return new SIPreAllocateWWMRegs(); 88 } 89 90 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { 91 Register Reg = MO.getReg(); 92 if (Reg.isPhysical()) 93 return false; 94 95 if (!TRI->isVGPR(*MRI, Reg)) 96 return false; 97 98 if (VRM->hasPhys(Reg)) 99 return false; 100 101 LiveInterval &LI = LIS->getInterval(Reg); 102 103 for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) { 104 if (!MRI->isPhysRegUsed(PhysReg) && 105 Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) { 106 Matrix->assign(LI, PhysReg); 107 assert(PhysReg != 0); 108 RegsToRewrite.push_back(Reg); 109 return true; 110 } 111 } 112 113 llvm_unreachable("physreg not found for WWM expression"); 114 } 115 116 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { 117 for (MachineBasicBlock &MBB : MF) { 118 for (MachineInstr &MI : MBB) { 119 for (MachineOperand &MO : MI.operands()) { 120 if (!MO.isReg()) 121 continue; 122 123 const Register VirtReg = MO.getReg(); 124 if (VirtReg.isPhysical()) 125 continue; 126 127 if (!VRM->hasPhys(VirtReg)) 128 continue; 129 130 Register PhysReg = VRM->getPhys(VirtReg); 131 const unsigned SubReg = MO.getSubReg(); 132 if (SubReg != 0) { 133 PhysReg = TRI->getSubReg(PhysReg, SubReg); 134 MO.setSubReg(0); 135 } 136 137 MO.setReg(PhysReg); 138 MO.setIsRenamable(false); 139 } 140 } 141 } 142 143 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 144 145 for (unsigned Reg : RegsToRewrite) { 146 LIS->removeInterval(Reg); 147 148 const Register PhysReg = VRM->getPhys(Reg); 149 assert(PhysReg != 0); 150 151 MFI->reserveWWMRegister(PhysReg); 152 } 153 154 RegsToRewrite.clear(); 155 156 // Update the set of reserved registers to include WWM ones. 157 MRI->freezeReservedRegs(MF); 158 } 159 160 #ifndef NDEBUG 161 LLVM_DUMP_METHOD void 162 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) { 163 164 unsigned Opc = MI.getOpcode(); 165 166 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM || 167 Opc == AMDGPU::ENTER_PSEUDO_WM) { 168 dbgs() << "Entering "; 169 } else { 170 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM || 171 Opc == AMDGPU::EXIT_PSEUDO_WM); 172 dbgs() << "Exiting "; 173 } 174 175 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) { 176 dbgs() << "Strict WWM "; 177 } else if (Opc == AMDGPU::ENTER_PSEUDO_WM || Opc == AMDGPU::EXIT_PSEUDO_WM) { 178 dbgs() << "Pseudo WWM/WQM "; 179 } else { 180 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM); 181 dbgs() << "Strict WQM "; 182 } 183 184 dbgs() << "region: " << MI; 185 } 186 187 #endif 188 189 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { 190 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n"); 191 192 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 193 194 TII = ST.getInstrInfo(); 195 TRI = &TII->getRegisterInfo(); 196 MRI = &MF.getRegInfo(); 197 198 LIS = &getAnalysis<LiveIntervals>(); 199 Matrix = &getAnalysis<LiveRegMatrix>(); 200 VRM = &getAnalysis<VirtRegMap>(); 201 202 RegClassInfo.runOnMachineFunction(MF); 203 204 bool RegsAssigned = false; 205 206 // We use a reverse post-order traversal of the control-flow graph to 207 // guarantee that we visit definitions in dominance order. Since WWM 208 // expressions are guaranteed to never involve phi nodes, and we can only 209 // escape WWM through the special WWM instruction, this means that this is a 210 // perfect elimination order, so we can never do any better. 211 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); 212 213 for (MachineBasicBlock *MBB : RPOT) { 214 bool InWWM = false; 215 for (MachineInstr &MI : *MBB) { 216 if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 || 217 MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64) 218 RegsAssigned |= processDef(MI.getOperand(0)); 219 220 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM || 221 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM || 222 MI.getOpcode() == AMDGPU::ENTER_PSEUDO_WM) { 223 LLVM_DEBUG(printWWMInfo(MI)); 224 InWWM = true; 225 continue; 226 } 227 228 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM || 229 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM || 230 MI.getOpcode() == AMDGPU::EXIT_PSEUDO_WM) { 231 LLVM_DEBUG(printWWMInfo(MI)); 232 InWWM = false; 233 } 234 235 if (!InWWM) 236 continue; 237 238 LLVM_DEBUG(dbgs() << "Processing " << MI); 239 240 for (MachineOperand &DefOpnd : MI.defs()) { 241 RegsAssigned |= processDef(DefOpnd); 242 } 243 } 244 } 245 246 if (!RegsAssigned) 247 return false; 248 249 rewriteRegs(MF); 250 return true; 251 } 252