1 //===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// \brief Try to reassign registers on GFX10+ from non-sequential to sequential 11 /// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA 12 /// with sequential versions where possible. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "GCNSubtarget.h" 18 #include "SIMachineFunctionInfo.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/LiveIntervals.h" 21 #include "llvm/CodeGen/LiveRegMatrix.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/InitializePasses.h" 24 25 using namespace llvm; 26 27 #define DEBUG_TYPE "amdgpu-nsa-reassign" 28 29 STATISTIC(NumNSAInstructions, 30 "Number of NSA instructions with non-sequential address found"); 31 STATISTIC(NumNSAConverted, 32 "Number of NSA instructions changed to sequential"); 33 34 namespace { 35 36 class GCNNSAReassign : public MachineFunctionPass { 37 public: 38 static char ID; 39 40 GCNNSAReassign() : MachineFunctionPass(ID) { 41 initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry()); 42 } 43 44 bool runOnMachineFunction(MachineFunction &MF) override; 45 46 StringRef getPassName() const override { return "GCN NSA Reassign"; } 47 48 void getAnalysisUsage(AnalysisUsage &AU) const override { 49 AU.addRequired<LiveIntervals>(); 50 AU.addRequired<VirtRegMap>(); 51 AU.addRequired<LiveRegMatrix>(); 52 AU.setPreservesAll(); 53 MachineFunctionPass::getAnalysisUsage(AU); 54 } 55 56 private: 57 typedef enum { 58 NOT_NSA, // Not an NSA instruction 59 FIXED, // NSA which we cannot modify 60 NON_CONTIGUOUS, // NSA with non-sequential address which we can try 61 // to optimize. 62 CONTIGUOUS // NSA with all sequential address registers 63 } NSA_Status; 64 65 const GCNSubtarget *ST; 66 67 const MachineRegisterInfo *MRI; 68 69 const SIRegisterInfo *TRI; 70 71 VirtRegMap *VRM; 72 73 LiveRegMatrix *LRM; 74 75 LiveIntervals *LIS; 76 77 unsigned MaxNumVGPRs; 78 79 const MCPhysReg *CSRegs; 80 81 NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const; 82 83 bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals, 84 unsigned StartReg) const; 85 86 bool canAssign(unsigned StartReg, unsigned NumRegs) const; 87 88 bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const; 89 }; 90 91 } // End anonymous namespace. 92 93 INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign", 94 false, false) 95 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 96 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 97 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) 98 INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign", 99 false, false) 100 101 102 char GCNNSAReassign::ID = 0; 103 104 char &llvm::GCNNSAReassignID = GCNNSAReassign::ID; 105 106 bool 107 GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals, 108 unsigned StartReg) const { 109 unsigned NumRegs = Intervals.size(); 110 111 for (unsigned N = 0; N < NumRegs; ++N) 112 if (VRM->hasPhys(Intervals[N]->reg())) 113 LRM->unassign(*Intervals[N]); 114 115 for (unsigned N = 0; N < NumRegs; ++N) 116 if (LRM->checkInterference(*Intervals[N], MCRegister::from(StartReg + N))) 117 return false; 118 119 for (unsigned N = 0; N < NumRegs; ++N) 120 LRM->assign(*Intervals[N], MCRegister::from(StartReg + N)); 121 122 return true; 123 } 124 125 bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const { 126 for (unsigned N = 0; N < NumRegs; ++N) { 127 unsigned Reg = StartReg + N; 128 if (!MRI->isAllocatable(Reg)) 129 return false; 130 131 for (unsigned I = 0; CSRegs[I]; ++I) 132 if (TRI->isSubRegisterEq(Reg, CSRegs[I]) && 133 !LRM->isPhysRegUsed(CSRegs[I])) 134 return false; 135 } 136 137 return true; 138 } 139 140 bool 141 GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const { 142 unsigned NumRegs = Intervals.size(); 143 144 if (NumRegs > MaxNumVGPRs) 145 return false; 146 unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0; 147 148 for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) { 149 if (!canAssign(Reg, NumRegs)) 150 continue; 151 152 if (tryAssignRegisters(Intervals, Reg)) 153 return true; 154 } 155 156 return false; 157 } 158 159 GCNNSAReassign::NSA_Status 160 GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { 161 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 162 if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA) 163 return NSA_Status::NOT_NSA; 164 165 int VAddr0Idx = 166 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); 167 168 unsigned VgprBase = 0; 169 bool NSA = false; 170 for (unsigned I = 0; I < Info->VAddrDwords; ++I) { 171 const MachineOperand &Op = MI.getOperand(VAddr0Idx + I); 172 Register Reg = Op.getReg(); 173 if (Reg.isPhysical() || !VRM->isAssignedReg(Reg)) 174 return NSA_Status::FIXED; 175 176 Register PhysReg = VRM->getPhys(Reg); 177 178 if (!Fast) { 179 if (!PhysReg) 180 return NSA_Status::FIXED; 181 182 // Bail if address is not a VGPR32. That should be possible to extend the 183 // optimization to work with subregs of a wider register tuples, but the 184 // logic to find free registers will be much more complicated with much 185 // less chances for success. That seems reasonable to assume that in most 186 // cases a tuple is used because a vector variable contains different 187 // parts of an address and it is either already consequitive or cannot 188 // be reassigned if not. If needed it is better to rely on register 189 // coalescer to process such address tuples. 190 if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg()) 191 return NSA_Status::FIXED; 192 193 // InlineSpiller does not call LRM::assign() after an LI split leaving 194 // it in an inconsistent state, so we cannot call LRM::unassign(). 195 // See llvm bug #48911. 196 // Skip reassign if a register has originated from such split. 197 // FIXME: Remove the workaround when bug #48911 is fixed. 198 if (VRM->getPreSplitReg(Reg)) 199 return NSA_Status::FIXED; 200 201 const MachineInstr *Def = MRI->getUniqueVRegDef(Reg); 202 203 if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg) 204 return NSA_Status::FIXED; 205 206 for (auto U : MRI->use_nodbg_operands(Reg)) { 207 if (U.isImplicit()) 208 return NSA_Status::FIXED; 209 const MachineInstr *UseInst = U.getParent(); 210 if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg) 211 return NSA_Status::FIXED; 212 } 213 214 if (!LIS->hasInterval(Reg)) 215 return NSA_Status::FIXED; 216 } 217 218 if (I == 0) 219 VgprBase = PhysReg; 220 else if (VgprBase + I != PhysReg) 221 NSA = true; 222 } 223 224 return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS; 225 } 226 227 bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { 228 ST = &MF.getSubtarget<GCNSubtarget>(); 229 if (ST->getGeneration() < GCNSubtarget::GFX10) 230 return false; 231 232 MRI = &MF.getRegInfo(); 233 TRI = ST->getRegisterInfo(); 234 VRM = &getAnalysis<VirtRegMap>(); 235 LRM = &getAnalysis<LiveRegMatrix>(); 236 LIS = &getAnalysis<LiveIntervals>(); 237 238 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 239 MaxNumVGPRs = ST->getMaxNumVGPRs(MF); 240 MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs); 241 CSRegs = MRI->getCalleeSavedRegs(); 242 243 using Candidate = std::pair<const MachineInstr*, bool>; 244 SmallVector<Candidate, 32> Candidates; 245 for (const MachineBasicBlock &MBB : MF) { 246 for (const MachineInstr &MI : MBB) { 247 switch (CheckNSA(MI)) { 248 default: 249 continue; 250 case NSA_Status::CONTIGUOUS: 251 Candidates.push_back(std::make_pair(&MI, true)); 252 break; 253 case NSA_Status::NON_CONTIGUOUS: 254 Candidates.push_back(std::make_pair(&MI, false)); 255 ++NumNSAInstructions; 256 break; 257 } 258 } 259 } 260 261 bool Changed = false; 262 for (auto &C : Candidates) { 263 if (C.second) 264 continue; 265 266 const MachineInstr *MI = C.first; 267 if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) { 268 // Already happen to be fixed. 269 C.second = true; 270 ++NumNSAConverted; 271 continue; 272 } 273 274 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode()); 275 int VAddr0Idx = 276 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0); 277 278 SmallVector<LiveInterval *, 16> Intervals; 279 SmallVector<MCRegister, 16> OrigRegs; 280 SlotIndex MinInd, MaxInd; 281 for (unsigned I = 0; I < Info->VAddrDwords; ++I) { 282 const MachineOperand &Op = MI->getOperand(VAddr0Idx + I); 283 Register Reg = Op.getReg(); 284 LiveInterval *LI = &LIS->getInterval(Reg); 285 if (llvm::is_contained(Intervals, LI)) { 286 // Same register used, unable to make sequential 287 Intervals.clear(); 288 break; 289 } 290 Intervals.push_back(LI); 291 OrigRegs.push_back(VRM->getPhys(Reg)); 292 if (LI->empty()) { 293 // The address input is undef, so it doesn't contribute to the relevant 294 // range. Seed a reasonable index range if required. 295 if (I == 0) 296 MinInd = MaxInd = LIS->getInstructionIndex(*MI); 297 continue; 298 } 299 MinInd = I != 0 ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex(); 300 MaxInd = I != 0 ? std::max(MaxInd, LI->endIndex()) : LI->endIndex(); 301 } 302 303 if (Intervals.empty()) 304 continue; 305 306 LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI 307 << "\tOriginal allocation:\t"; 308 for (auto *LI 309 : Intervals) dbgs() 310 << " " << llvm::printReg((VRM->getPhys(LI->reg())), TRI); 311 dbgs() << '\n'); 312 313 bool Success = scavengeRegs(Intervals); 314 if (!Success) { 315 LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n"); 316 if (VRM->hasPhys(Intervals.back()->reg())) // Did not change allocation. 317 continue; 318 } else { 319 // Check we did not make it worse for other instructions. 320 auto I = std::lower_bound(Candidates.begin(), &C, MinInd, 321 [this](const Candidate &C, SlotIndex I) { 322 return LIS->getInstructionIndex(*C.first) < I; 323 }); 324 for (auto E = Candidates.end(); Success && I != E && 325 LIS->getInstructionIndex(*I->first) < MaxInd; ++I) { 326 if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) { 327 Success = false; 328 LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first); 329 } 330 } 331 } 332 333 if (!Success) { 334 for (unsigned I = 0; I < Info->VAddrDwords; ++I) 335 if (VRM->hasPhys(Intervals[I]->reg())) 336 LRM->unassign(*Intervals[I]); 337 338 for (unsigned I = 0; I < Info->VAddrDwords; ++I) 339 LRM->assign(*Intervals[I], OrigRegs[I]); 340 341 continue; 342 } 343 344 C.second = true; 345 ++NumNSAConverted; 346 LLVM_DEBUG( 347 dbgs() << "\tNew allocation:\t\t [" 348 << llvm::printReg((VRM->getPhys(Intervals.front()->reg())), TRI) 349 << " : " 350 << llvm::printReg((VRM->getPhys(Intervals.back()->reg())), TRI) 351 << "]\n"); 352 Changed = true; 353 } 354 355 return Changed; 356 } 357