1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Pass to pre-allocated WWM registers 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "GCNSubtarget.h" 16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 17 #include "SIMachineFunctionInfo.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/CodeGen/LiveIntervals.h" 20 #include "llvm/CodeGen/LiveRegMatrix.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/RegisterClassInfo.h" 24 #include "llvm/CodeGen/VirtRegMap.h" 25 #include "llvm/InitializePasses.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "si-pre-allocate-wwm-regs" 30 31 static cl::opt<bool> 32 EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs", 33 cl::init(false), cl::Hidden); 34 35 namespace { 36 37 class SIPreAllocateWWMRegs : public MachineFunctionPass { 38 private: 39 const SIInstrInfo *TII; 40 const SIRegisterInfo *TRI; 41 MachineRegisterInfo *MRI; 42 LiveIntervals *LIS; 43 LiveRegMatrix *Matrix; 44 VirtRegMap *VRM; 45 RegisterClassInfo RegClassInfo; 46 47 std::vector<unsigned> RegsToRewrite; 48 #ifndef NDEBUG 49 void printWWMInfo(const MachineInstr &MI); 50 #endif 51 52 public: 53 static char ID; 54 55 SIPreAllocateWWMRegs() : MachineFunctionPass(ID) { 56 initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry()); 57 } 58 59 bool runOnMachineFunction(MachineFunction &MF) override; 60 61 void getAnalysisUsage(AnalysisUsage &AU) const override { 62 AU.addRequired<LiveIntervalsWrapperPass>(); 63 AU.addRequired<VirtRegMap>(); 64 AU.addRequired<LiveRegMatrix>(); 65 AU.setPreservesAll(); 66 MachineFunctionPass::getAnalysisUsage(AU); 67 } 68 69 private: 70 bool processDef(MachineOperand &MO); 71 void rewriteRegs(MachineFunction &MF); 72 }; 73 74 } // End anonymous namespace. 75 76 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE, 77 "SI Pre-allocate WWM Registers", false, false) 78 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) 79 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 80 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) 81 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE, 82 "SI Pre-allocate WWM Registers", false, false) 83 84 char SIPreAllocateWWMRegs::ID = 0; 85 86 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID; 87 88 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() { 89 return new SIPreAllocateWWMRegs(); 90 } 91 92 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { 93 Register Reg = MO.getReg(); 94 if (Reg.isPhysical()) 95 return false; 96 97 if (!TRI->isVGPR(*MRI, Reg)) 98 return false; 99 100 if (VRM->hasPhys(Reg)) 101 return false; 102 103 LiveInterval &LI = LIS->getInterval(Reg); 104 105 for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) { 106 if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) && 107 Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) { 108 Matrix->assign(LI, PhysReg); 109 assert(PhysReg != 0); 110 RegsToRewrite.push_back(Reg); 111 return true; 112 } 113 } 114 115 llvm_unreachable("physreg not found for WWM expression"); 116 } 117 118 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { 119 for (MachineBasicBlock &MBB : MF) { 120 for (MachineInstr &MI : MBB) { 121 for (MachineOperand &MO : MI.operands()) { 122 if (!MO.isReg()) 123 continue; 124 125 const Register VirtReg = MO.getReg(); 126 if (VirtReg.isPhysical()) 127 continue; 128 129 if (!VRM->hasPhys(VirtReg)) 130 continue; 131 132 Register PhysReg = VRM->getPhys(VirtReg); 133 const unsigned SubReg = MO.getSubReg(); 134 if (SubReg != 0) { 135 PhysReg = TRI->getSubReg(PhysReg, SubReg); 136 MO.setSubReg(0); 137 } 138 139 MO.setReg(PhysReg); 140 MO.setIsRenamable(false); 141 } 142 } 143 } 144 145 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 146 147 for (unsigned Reg : RegsToRewrite) { 148 LIS->removeInterval(Reg); 149 150 const Register PhysReg = VRM->getPhys(Reg); 151 assert(PhysReg != 0); 152 153 MFI->reserveWWMRegister(PhysReg); 154 } 155 156 RegsToRewrite.clear(); 157 158 // Update the set of reserved registers to include WWM ones. 159 MRI->freezeReservedRegs(); 160 } 161 162 #ifndef NDEBUG 163 LLVM_DUMP_METHOD void 164 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) { 165 166 unsigned Opc = MI.getOpcode(); 167 168 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) { 169 dbgs() << "Entering "; 170 } else { 171 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM); 172 dbgs() << "Exiting "; 173 } 174 175 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) { 176 dbgs() << "Strict WWM "; 177 } else { 178 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM); 179 dbgs() << "Strict WQM "; 180 } 181 182 dbgs() << "region: " << MI; 183 } 184 185 #endif 186 187 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { 188 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n"); 189 190 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 191 192 TII = ST.getInstrInfo(); 193 TRI = &TII->getRegisterInfo(); 194 MRI = &MF.getRegInfo(); 195 196 LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); 197 Matrix = &getAnalysis<LiveRegMatrix>(); 198 VRM = &getAnalysis<VirtRegMap>(); 199 200 RegClassInfo.runOnMachineFunction(MF); 201 202 bool PreallocateSGPRSpillVGPRs = 203 EnablePreallocateSGPRSpillVGPRs || 204 MF.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs"); 205 206 bool RegsAssigned = false; 207 208 // We use a reverse post-order traversal of the control-flow graph to 209 // guarantee that we visit definitions in dominance order. Since WWM 210 // expressions are guaranteed to never involve phi nodes, and we can only 211 // escape WWM through the special WWM instruction, this means that this is a 212 // perfect elimination order, so we can never do any better. 213 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); 214 215 for (MachineBasicBlock *MBB : RPOT) { 216 bool InWWM = false; 217 for (MachineInstr &MI : *MBB) { 218 if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 || 219 MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64) 220 RegsAssigned |= processDef(MI.getOperand(0)); 221 222 if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) { 223 if (!PreallocateSGPRSpillVGPRs) 224 continue; 225 RegsAssigned |= processDef(MI.getOperand(0)); 226 } 227 228 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM || 229 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) { 230 LLVM_DEBUG(printWWMInfo(MI)); 231 InWWM = true; 232 continue; 233 } 234 235 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM || 236 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) { 237 LLVM_DEBUG(printWWMInfo(MI)); 238 InWWM = false; 239 } 240 241 if (!InWWM) 242 continue; 243 244 LLVM_DEBUG(dbgs() << "Processing " << MI); 245 246 for (MachineOperand &DefOpnd : MI.defs()) { 247 RegsAssigned |= processDef(DefOpnd); 248 } 249 } 250 } 251 252 if (!RegsAssigned) 253 return false; 254 255 rewriteRegs(MF); 256 return true; 257 } 258