1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Pass to pre-allocated WWM registers 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "GCNSubtarget.h" 16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 17 #include "SIMachineFunctionInfo.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/CodeGen/LiveIntervals.h" 20 #include "llvm/CodeGen/LiveRegMatrix.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/RegisterClassInfo.h" 24 #include "llvm/CodeGen/VirtRegMap.h" 25 #include "llvm/InitializePasses.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "si-pre-allocate-wwm-regs" 30 31 static cl::opt<bool> 32 EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs", 33 cl::init(false), cl::Hidden); 34 35 namespace { 36 37 class SIPreAllocateWWMRegs : public MachineFunctionPass { 38 private: 39 const SIInstrInfo *TII; 40 const SIRegisterInfo *TRI; 41 MachineRegisterInfo *MRI; 42 LiveIntervals *LIS; 43 LiveRegMatrix *Matrix; 44 VirtRegMap *VRM; 45 RegisterClassInfo RegClassInfo; 46 47 std::vector<unsigned> RegsToRewrite; 48 #ifndef NDEBUG 49 void printWWMInfo(const MachineInstr &MI); 50 #endif 51 52 public: 53 static char ID; 54 55 SIPreAllocateWWMRegs() : MachineFunctionPass(ID) { 56 initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry()); 57 } 58 59 bool runOnMachineFunction(MachineFunction &MF) override; 60 61 void getAnalysisUsage(AnalysisUsage &AU) const override { 62 AU.addRequired<LiveIntervals>(); 63 AU.addRequired<VirtRegMap>(); 64 AU.addRequired<LiveRegMatrix>(); 65 AU.setPreservesAll(); 66 MachineFunctionPass::getAnalysisUsage(AU); 67 } 68 69 private: 70 bool processDef(MachineOperand &MO); 71 void rewriteRegs(MachineFunction &MF); 72 }; 73 74 } // End anonymous namespace. 75 76 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE, 77 "SI Pre-allocate WWM Registers", false, false) 78 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 79 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 80 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) 81 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE, 82 "SI Pre-allocate WWM Registers", false, false) 83 84 char SIPreAllocateWWMRegs::ID = 0; 85 86 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID; 87 88 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() { 89 return new SIPreAllocateWWMRegs(); 90 } 91 92 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) { 93 Register Reg = MO.getReg(); 94 if (Reg.isPhysical()) 95 return false; 96 97 if (!TRI->isVGPR(*MRI, Reg)) 98 return false; 99 100 if (VRM->hasPhys(Reg)) 101 return false; 102 103 LiveInterval &LI = LIS->getInterval(Reg); 104 105 for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) { 106 if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) && 107 Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) { 108 Matrix->assign(LI, PhysReg); 109 assert(PhysReg != 0); 110 RegsToRewrite.push_back(Reg); 111 return true; 112 } 113 } 114 115 llvm_unreachable("physreg not found for WWM expression"); 116 } 117 118 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) { 119 for (MachineBasicBlock &MBB : MF) { 120 for (MachineInstr &MI : MBB) { 121 for (MachineOperand &MO : MI.operands()) { 122 if (!MO.isReg()) 123 continue; 124 125 const Register VirtReg = MO.getReg(); 126 if (VirtReg.isPhysical()) 127 continue; 128 129 if (!VRM->hasPhys(VirtReg)) 130 continue; 131 132 Register PhysReg = VRM->getPhys(VirtReg); 133 const unsigned SubReg = MO.getSubReg(); 134 if (SubReg != 0) { 135 PhysReg = TRI->getSubReg(PhysReg, SubReg); 136 MO.setSubReg(0); 137 } 138 139 MO.setReg(PhysReg); 140 MO.setIsRenamable(false); 141 } 142 } 143 } 144 145 SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 146 147 for (unsigned Reg : RegsToRewrite) { 148 LIS->removeInterval(Reg); 149 150 const Register PhysReg = VRM->getPhys(Reg); 151 assert(PhysReg != 0); 152 153 MFI->reserveWWMRegister(PhysReg); 154 } 155 156 RegsToRewrite.clear(); 157 158 // Update the set of reserved registers to include WWM ones. 159 MRI->freezeReservedRegs(MF); 160 } 161 162 #ifndef NDEBUG 163 LLVM_DUMP_METHOD void 164 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) { 165 166 unsigned Opc = MI.getOpcode(); 167 168 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM || 169 Opc == AMDGPU::ENTER_PSEUDO_WM) { 170 dbgs() << "Entering "; 171 } else { 172 assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM || 173 Opc == AMDGPU::EXIT_PSEUDO_WM); 174 dbgs() << "Exiting "; 175 } 176 177 if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) { 178 dbgs() << "Strict WWM "; 179 } else if (Opc == AMDGPU::ENTER_PSEUDO_WM || Opc == AMDGPU::EXIT_PSEUDO_WM) { 180 dbgs() << "Pseudo WWM/WQM "; 181 } else { 182 assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM); 183 dbgs() << "Strict WQM "; 184 } 185 186 dbgs() << "region: " << MI; 187 } 188 189 #endif 190 191 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) { 192 LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n"); 193 194 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 195 196 TII = ST.getInstrInfo(); 197 TRI = &TII->getRegisterInfo(); 198 MRI = &MF.getRegInfo(); 199 200 LIS = &getAnalysis<LiveIntervals>(); 201 Matrix = &getAnalysis<LiveRegMatrix>(); 202 VRM = &getAnalysis<VirtRegMap>(); 203 204 RegClassInfo.runOnMachineFunction(MF); 205 206 bool PreallocateSGPRSpillVGPRs = 207 EnablePreallocateSGPRSpillVGPRs || 208 MF.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs"); 209 210 bool RegsAssigned = false; 211 212 // We use a reverse post-order traversal of the control-flow graph to 213 // guarantee that we visit definitions in dominance order. Since WWM 214 // expressions are guaranteed to never involve phi nodes, and we can only 215 // escape WWM through the special WWM instruction, this means that this is a 216 // perfect elimination order, so we can never do any better. 217 ReversePostOrderTraversal<MachineFunction*> RPOT(&MF); 218 219 for (MachineBasicBlock *MBB : RPOT) { 220 bool InWWM = false; 221 for (MachineInstr &MI : *MBB) { 222 if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 || 223 MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64) 224 RegsAssigned |= processDef(MI.getOperand(0)); 225 226 if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) { 227 if (!PreallocateSGPRSpillVGPRs) 228 continue; 229 RegsAssigned |= processDef(MI.getOperand(0)); 230 } 231 232 if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM || 233 MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM || 234 MI.getOpcode() == AMDGPU::ENTER_PSEUDO_WM) { 235 LLVM_DEBUG(printWWMInfo(MI)); 236 InWWM = true; 237 continue; 238 } 239 240 if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM || 241 MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM || 242 MI.getOpcode() == AMDGPU::EXIT_PSEUDO_WM) { 243 LLVM_DEBUG(printWWMInfo(MI)); 244 InWWM = false; 245 } 246 247 if (!InWWM) 248 continue; 249 250 LLVM_DEBUG(dbgs() << "Processing " << MI); 251 252 for (MachineOperand &DefOpnd : MI.defs()) { 253 RegsAssigned |= processDef(DefOpnd); 254 } 255 } 256 } 257 258 if (!RegsAssigned) 259 return false; 260 261 rewriteRegs(MF); 262 return true; 263 } 264