1 //===-- GCNNSAReassign.cpp - Reassign registers in NSA unstructions -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// \brief Try to reassign registers on GFX10+ from non-sequential to sequential 11 /// in NSA image instructions. Later SIShrinkInstructions pass will relace NSA 12 /// with sequential versions where possible. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "SIInstrInfo.h" 19 #include "SIMachineFunctionInfo.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/LiveInterval.h" 22 #include "llvm/CodeGen/LiveIntervals.h" 23 #include "llvm/CodeGen/LiveRegMatrix.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/VirtRegMap.h" 26 #include "llvm/Support/MathExtras.h" 27 #include <algorithm> 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "amdgpu-nsa-reassign" 32 33 STATISTIC(NumNSAInstructions, 34 "Number of NSA instructions with non-sequential address found"); 35 STATISTIC(NumNSAConverted, 36 "Number of NSA instructions changed to sequential"); 37 38 namespace { 39 40 class GCNNSAReassign : public MachineFunctionPass { 41 public: 42 static char ID; 43 44 GCNNSAReassign() : MachineFunctionPass(ID) { 45 initializeGCNNSAReassignPass(*PassRegistry::getPassRegistry()); 46 } 47 48 bool runOnMachineFunction(MachineFunction &MF) override; 49 50 StringRef getPassName() const override { return "GCN NSA Reassign"; } 51 52 void getAnalysisUsage(AnalysisUsage &AU) const override { 53 AU.addRequired<LiveIntervals>(); 54 AU.addRequired<VirtRegMap>(); 55 AU.addRequired<LiveRegMatrix>(); 56 AU.setPreservesAll(); 57 MachineFunctionPass::getAnalysisUsage(AU); 58 } 59 60 private: 61 typedef enum { 62 NOT_NSA, // Not an NSA instruction 63 FIXED, // NSA which we cannot modify 64 NON_CONTIGUOUS, // NSA with non-sequential address which we can try 65 // to optimize. 66 CONTIGUOUS // NSA with all sequential address registers 67 } NSA_Status; 68 69 const GCNSubtarget *ST; 70 71 const MachineRegisterInfo *MRI; 72 73 const SIRegisterInfo *TRI; 74 75 VirtRegMap *VRM; 76 77 LiveRegMatrix *LRM; 78 79 LiveIntervals *LIS; 80 81 unsigned MaxNumVGPRs; 82 83 const MCPhysReg *CSRegs; 84 85 NSA_Status CheckNSA(const MachineInstr &MI, bool Fast = false) const; 86 87 bool tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals, 88 unsigned StartReg) const; 89 90 bool canAssign(unsigned StartReg, unsigned NumRegs) const; 91 92 bool scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const; 93 }; 94 95 } // End anonymous namespace. 96 97 INITIALIZE_PASS_BEGIN(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign", 98 false, false) 99 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 100 INITIALIZE_PASS_DEPENDENCY(VirtRegMap) 101 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix) 102 INITIALIZE_PASS_END(GCNNSAReassign, DEBUG_TYPE, "GCN NSA Reassign", 103 false, false) 104 105 106 char GCNNSAReassign::ID = 0; 107 108 char &llvm::GCNNSAReassignID = GCNNSAReassign::ID; 109 110 bool 111 GCNNSAReassign::tryAssignRegisters(SmallVectorImpl<LiveInterval *> &Intervals, 112 unsigned StartReg) const { 113 unsigned NumRegs = Intervals.size(); 114 115 for (unsigned N = 0; N < NumRegs; ++N) 116 if (VRM->hasPhys(Intervals[N]->reg)) 117 LRM->unassign(*Intervals[N]); 118 119 for (unsigned N = 0; N < NumRegs; ++N) 120 if (LRM->checkInterference(*Intervals[N], StartReg + N)) 121 return false; 122 123 for (unsigned N = 0; N < NumRegs; ++N) 124 LRM->assign(*Intervals[N], StartReg + N); 125 126 return true; 127 } 128 129 bool GCNNSAReassign::canAssign(unsigned StartReg, unsigned NumRegs) const { 130 for (unsigned N = 0; N < NumRegs; ++N) { 131 unsigned Reg = StartReg + N; 132 if (!MRI->isAllocatable(Reg)) 133 return false; 134 135 for (unsigned I = 0; CSRegs[I]; ++I) 136 if (TRI->isSubRegisterEq(Reg, CSRegs[I]) && 137 !LRM->isPhysRegUsed(CSRegs[I])) 138 return false; 139 } 140 141 return true; 142 } 143 144 bool 145 GCNNSAReassign::scavengeRegs(SmallVectorImpl<LiveInterval *> &Intervals) const { 146 unsigned NumRegs = Intervals.size(); 147 148 if (NumRegs > MaxNumVGPRs) 149 return false; 150 unsigned MaxReg = MaxNumVGPRs - NumRegs + AMDGPU::VGPR0; 151 152 for (unsigned Reg = AMDGPU::VGPR0; Reg <= MaxReg; ++Reg) { 153 if (!canAssign(Reg, NumRegs)) 154 continue; 155 156 if (tryAssignRegisters(Intervals, Reg)) 157 return true; 158 } 159 160 return false; 161 } 162 163 GCNNSAReassign::NSA_Status 164 GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { 165 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 166 if (!Info || Info->MIMGEncoding != AMDGPU::MIMGEncGfx10NSA) 167 return NSA_Status::NOT_NSA; 168 169 int VAddr0Idx = 170 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0); 171 172 unsigned VgprBase = 0; 173 bool NSA = false; 174 for (unsigned I = 0; I < Info->VAddrDwords; ++I) { 175 const MachineOperand &Op = MI.getOperand(VAddr0Idx + I); 176 unsigned Reg = Op.getReg(); 177 if (TargetRegisterInfo::isPhysicalRegister(Reg) || !VRM->isAssignedReg(Reg)) 178 return NSA_Status::FIXED; 179 180 unsigned PhysReg = VRM->getPhys(Reg); 181 182 if (!Fast) { 183 if (!PhysReg) 184 return NSA_Status::FIXED; 185 186 // Bail if address is not a VGPR32. That should be possible to extend the 187 // optimization to work with subregs of a wider register tuples, but the 188 // logic to find free registers will be much more complicated with much 189 // less chances for success. That seems reasonable to assume that in most 190 // cases a tuple is used because a vector variable contains different 191 // parts of an address and it is either already consequitive or cannot 192 // be reassigned if not. If needed it is better to rely on register 193 // coalescer to process such address tuples. 194 if (MRI->getRegClass(Reg) != &AMDGPU::VGPR_32RegClass || Op.getSubReg()) 195 return NSA_Status::FIXED; 196 197 const MachineInstr *Def = MRI->getUniqueVRegDef(Reg); 198 199 if (Def && Def->isCopy() && Def->getOperand(1).getReg() == PhysReg) 200 return NSA_Status::FIXED; 201 202 for (auto U : MRI->use_nodbg_operands(Reg)) { 203 if (U.isImplicit()) 204 return NSA_Status::FIXED; 205 const MachineInstr *UseInst = U.getParent(); 206 if (UseInst->isCopy() && UseInst->getOperand(0).getReg() == PhysReg) 207 return NSA_Status::FIXED; 208 } 209 210 if (!LIS->hasInterval(Reg)) 211 return NSA_Status::FIXED; 212 } 213 214 if (I == 0) 215 VgprBase = PhysReg; 216 else if (VgprBase + I != PhysReg) 217 NSA = true; 218 } 219 220 return NSA ? NSA_Status::NON_CONTIGUOUS : NSA_Status::CONTIGUOUS; 221 } 222 223 bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { 224 ST = &MF.getSubtarget<GCNSubtarget>(); 225 if (ST->getGeneration() < GCNSubtarget::GFX10) 226 return false; 227 228 MRI = &MF.getRegInfo(); 229 TRI = ST->getRegisterInfo(); 230 VRM = &getAnalysis<VirtRegMap>(); 231 LRM = &getAnalysis<LiveRegMatrix>(); 232 LIS = &getAnalysis<LiveIntervals>(); 233 234 const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 235 MaxNumVGPRs = ST->getMaxNumVGPRs(MF); 236 MaxNumVGPRs = std::min(ST->getMaxNumVGPRs(MFI->getOccupancy()), MaxNumVGPRs); 237 CSRegs = MRI->getCalleeSavedRegs(); 238 239 using Candidate = std::pair<const MachineInstr*, bool>; 240 SmallVector<Candidate, 32> Candidates; 241 for (const MachineBasicBlock &MBB : MF) { 242 for (const MachineInstr &MI : MBB) { 243 switch (CheckNSA(MI)) { 244 default: 245 continue; 246 case NSA_Status::CONTIGUOUS: 247 Candidates.push_back(std::make_pair(&MI, true)); 248 break; 249 case NSA_Status::NON_CONTIGUOUS: 250 Candidates.push_back(std::make_pair(&MI, false)); 251 ++NumNSAInstructions; 252 break; 253 } 254 } 255 } 256 257 bool Changed = false; 258 for (auto &C : Candidates) { 259 if (C.second) 260 continue; 261 262 const MachineInstr *MI = C.first; 263 if (CheckNSA(*MI, true) == NSA_Status::CONTIGUOUS) { 264 // Already happen to be fixed. 265 C.second = true; 266 ++NumNSAConverted; 267 continue; 268 } 269 270 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI->getOpcode()); 271 int VAddr0Idx = 272 AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr0); 273 274 SmallVector<LiveInterval *, 16> Intervals; 275 SmallVector<unsigned, 16> OrigRegs; 276 SlotIndex MinInd, MaxInd; 277 for (unsigned I = 0; I < Info->VAddrDwords; ++I) { 278 const MachineOperand &Op = MI->getOperand(VAddr0Idx + I); 279 unsigned Reg = Op.getReg(); 280 LiveInterval *LI = &LIS->getInterval(Reg); 281 if (llvm::find(Intervals, LI) != Intervals.end()) { 282 // Same register used, unable to make sequential 283 Intervals.clear(); 284 break; 285 } 286 Intervals.push_back(LI); 287 OrigRegs.push_back(VRM->getPhys(Reg)); 288 MinInd = I ? std::min(MinInd, LI->beginIndex()) : LI->beginIndex(); 289 MaxInd = I ? std::max(MaxInd, LI->endIndex()) : LI->endIndex(); 290 } 291 292 if (Intervals.empty()) 293 continue; 294 295 LLVM_DEBUG(dbgs() << "Attempting to reassign NSA: " << *MI 296 << "\tOriginal allocation:\t"; 297 for(auto *LI : Intervals) 298 dbgs() << " " << llvm::printReg((VRM->getPhys(LI->reg)), TRI); 299 dbgs() << '\n'); 300 301 bool Success = scavengeRegs(Intervals); 302 if (!Success) { 303 LLVM_DEBUG(dbgs() << "\tCannot reallocate.\n"); 304 if (VRM->hasPhys(Intervals.back()->reg)) // Did not change allocation. 305 continue; 306 } else { 307 // Check we did not make it worse for other instructions. 308 auto I = std::lower_bound(Candidates.begin(), &C, MinInd, 309 [this](const Candidate &C, SlotIndex I) { 310 return LIS->getInstructionIndex(*C.first) < I; 311 }); 312 for (auto E = Candidates.end(); Success && I != E && 313 LIS->getInstructionIndex(*I->first) < MaxInd; ++I) { 314 if (I->second && CheckNSA(*I->first, true) < NSA_Status::CONTIGUOUS) { 315 Success = false; 316 LLVM_DEBUG(dbgs() << "\tNSA conversion conflict with " << *I->first); 317 } 318 } 319 } 320 321 if (!Success) { 322 for (unsigned I = 0; I < Info->VAddrDwords; ++I) 323 if (VRM->hasPhys(Intervals[I]->reg)) 324 LRM->unassign(*Intervals[I]); 325 326 for (unsigned I = 0; I < Info->VAddrDwords; ++I) 327 LRM->assign(*Intervals[I], OrigRegs[I]); 328 329 continue; 330 } 331 332 C.second = true; 333 ++NumNSAConverted; 334 LLVM_DEBUG(dbgs() << "\tNew allocation:\t\t [" 335 << llvm::printReg((VRM->getPhys(Intervals.front()->reg)), TRI) 336 << " : " 337 << llvm::printReg((VRM->getPhys(Intervals.back()->reg)), TRI) 338 << "]\n"); 339 Changed = true; 340 } 341 342 return Changed; 343 } 344