1 //===- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Copies from VGPR to SGPR registers are illegal and the register coalescer 11 /// will sometimes generate these illegal copies in situations like this: 12 /// 13 /// Register Class <vsrc> is the union of <vgpr> and <sgpr> 14 /// 15 /// BB0: 16 /// %0 <sgpr> = SCALAR_INST 17 /// %1 <vsrc> = COPY %0 <sgpr> 18 /// ... 19 /// BRANCH %cond BB1, BB2 20 /// BB1: 21 /// %2 <vgpr> = VECTOR_INST 22 /// %3 <vsrc> = COPY %2 <vgpr> 23 /// BB2: 24 /// %4 <vsrc> = PHI %1 <vsrc>, <%bb.0>, %3 <vrsc>, <%bb.1> 25 /// %5 <vgpr> = VECTOR_INST %4 <vsrc> 26 /// 27 /// 28 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting 29 /// code will look like this: 30 /// 31 /// BB0: 32 /// %0 <sgpr> = SCALAR_INST 33 /// ... 34 /// BRANCH %cond BB1, BB2 35 /// BB1: 36 /// %2 <vgpr> = VECTOR_INST 37 /// %3 <vsrc> = COPY %2 <vgpr> 38 /// BB2: 39 /// %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <vsrc>, <%bb.1> 40 /// %5 <vgpr> = VECTOR_INST %4 <sgpr> 41 /// 42 /// Now that the result of the PHI instruction is an SGPR, the register 43 /// allocator is now forced to constrain the register class of %3 to 44 /// <sgpr> so we end up with final code like this: 45 /// 46 /// BB0: 47 /// %0 <sgpr> = SCALAR_INST 48 /// ... 49 /// BRANCH %cond BB1, BB2 50 /// BB1: 51 /// %2 <vgpr> = VECTOR_INST 52 /// %3 <sgpr> = COPY %2 <vgpr> 53 /// BB2: 54 /// %4 <sgpr> = PHI %0 <sgpr>, <%bb.0>, %3 <sgpr>, <%bb.1> 55 /// %5 <vgpr> = VECTOR_INST %4 <sgpr> 56 /// 57 /// Now this code contains an illegal copy from a VGPR to an SGPR. 58 /// 59 /// In order to avoid this problem, this pass searches for PHI instructions 60 /// which define a <vsrc> register and constrains its definition class to 61 /// <vgpr> if the user of the PHI's definition register is a vector instruction. 62 /// If the PHI's definition class is constrained to <vgpr> then the coalescer 63 /// will be unable to perform the COPY removal from the above example which 64 /// ultimately led to the creation of an illegal COPY. 65 //===----------------------------------------------------------------------===// 66 67 #include "AMDGPU.h" 68 #include "AMDGPUSubtarget.h" 69 #include "SIInstrInfo.h" 70 #include "SIRegisterInfo.h" 71 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 72 #include "llvm/ADT/DenseSet.h" 73 #include "llvm/ADT/STLExtras.h" 74 #include "llvm/ADT/SmallSet.h" 75 #include "llvm/ADT/SmallVector.h" 76 #include "llvm/CodeGen/MachineBasicBlock.h" 77 #include "llvm/CodeGen/MachineDominators.h" 78 #include "llvm/CodeGen/MachineFunction.h" 79 #include "llvm/CodeGen/MachineFunctionPass.h" 80 #include "llvm/CodeGen/MachineInstr.h" 81 #include "llvm/CodeGen/MachineInstrBuilder.h" 82 #include "llvm/CodeGen/MachineOperand.h" 83 #include "llvm/CodeGen/MachineRegisterInfo.h" 84 #include "llvm/CodeGen/TargetRegisterInfo.h" 85 #include "llvm/Pass.h" 86 #include "llvm/Support/CodeGen.h" 87 #include "llvm/Support/CommandLine.h" 88 #include "llvm/Support/Debug.h" 89 #include "llvm/Support/raw_ostream.h" 90 #include "llvm/Target/TargetMachine.h" 91 #include <cassert> 92 #include <cstdint> 93 #include <iterator> 94 #include <list> 95 #include <map> 96 #include <tuple> 97 #include <utility> 98 99 using namespace llvm; 100 101 #define DEBUG_TYPE "si-fix-sgpr-copies" 102 103 static cl::opt<bool> EnableM0Merge( 104 "amdgpu-enable-merge-m0", 105 cl::desc("Merge and hoist M0 initializations"), 106 cl::init(true)); 107 108 namespace { 109 110 class SIFixSGPRCopies : public MachineFunctionPass { 111 MachineDominatorTree *MDT; 112 113 public: 114 static char ID; 115 116 SIFixSGPRCopies() : MachineFunctionPass(ID) {} 117 118 bool runOnMachineFunction(MachineFunction &MF) override; 119 120 StringRef getPassName() const override { return "SI Fix SGPR copies"; } 121 122 void getAnalysisUsage(AnalysisUsage &AU) const override { 123 AU.addRequired<MachineDominatorTree>(); 124 AU.addPreserved<MachineDominatorTree>(); 125 AU.setPreservesCFG(); 126 MachineFunctionPass::getAnalysisUsage(AU); 127 } 128 }; 129 130 } // end anonymous namespace 131 132 INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE, 133 "SI Fix SGPR copies", false, false) 134 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 135 INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE, 136 "SI Fix SGPR copies", false, false) 137 138 char SIFixSGPRCopies::ID = 0; 139 140 char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID; 141 142 FunctionPass *llvm::createSIFixSGPRCopiesPass() { 143 return new SIFixSGPRCopies(); 144 } 145 146 static bool hasVectorOperands(const MachineInstr &MI, 147 const SIRegisterInfo *TRI) { 148 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 149 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 150 if (!MI.getOperand(i).isReg() || 151 !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) 152 continue; 153 154 if (TRI->hasVectorRegisters(MRI.getRegClass(MI.getOperand(i).getReg()))) 155 return true; 156 } 157 return false; 158 } 159 160 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> 161 getCopyRegClasses(const MachineInstr &Copy, 162 const SIRegisterInfo &TRI, 163 const MachineRegisterInfo &MRI) { 164 unsigned DstReg = Copy.getOperand(0).getReg(); 165 unsigned SrcReg = Copy.getOperand(1).getReg(); 166 167 const TargetRegisterClass *SrcRC = 168 TargetRegisterInfo::isVirtualRegister(SrcReg) ? 169 MRI.getRegClass(SrcReg) : 170 TRI.getPhysRegClass(SrcReg); 171 172 // We don't really care about the subregister here. 173 // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg()); 174 175 const TargetRegisterClass *DstRC = 176 TargetRegisterInfo::isVirtualRegister(DstReg) ? 177 MRI.getRegClass(DstReg) : 178 TRI.getPhysRegClass(DstReg); 179 180 return std::make_pair(SrcRC, DstRC); 181 } 182 183 static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, 184 const TargetRegisterClass *DstRC, 185 const SIRegisterInfo &TRI) { 186 return SrcRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(DstRC) && 187 TRI.hasVectorRegisters(SrcRC); 188 } 189 190 static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, 191 const TargetRegisterClass *DstRC, 192 const SIRegisterInfo &TRI) { 193 return DstRC != &AMDGPU::VReg_1RegClass && TRI.isSGPRClass(SrcRC) && 194 TRI.hasVectorRegisters(DstRC); 195 } 196 197 static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, 198 const SIRegisterInfo *TRI, 199 const SIInstrInfo *TII) { 200 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 201 auto &Src = MI.getOperand(1); 202 unsigned DstReg = MI.getOperand(0).getReg(); 203 unsigned SrcReg = Src.getReg(); 204 if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || 205 !TargetRegisterInfo::isVirtualRegister(DstReg)) 206 return false; 207 208 for (const auto &MO : MRI.reg_nodbg_operands(DstReg)) { 209 const auto *UseMI = MO.getParent(); 210 if (UseMI == &MI) 211 continue; 212 if (MO.isDef() || UseMI->getParent() != MI.getParent() || 213 UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END || 214 !TII->isOperandLegal(*UseMI, UseMI->getOperandNo(&MO), &Src)) 215 return false; 216 } 217 // Change VGPR to SGPR destination. 218 MRI.setRegClass(DstReg, TRI->getEquivalentSGPRClass(MRI.getRegClass(DstReg))); 219 return true; 220 } 221 222 // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE. 223 // 224 // SGPRx = ... 225 // SGPRy = REG_SEQUENCE SGPRx, sub0 ... 226 // VGPRz = COPY SGPRy 227 // 228 // ==> 229 // 230 // VGPRx = COPY SGPRx 231 // VGPRz = REG_SEQUENCE VGPRx, sub0 232 // 233 // This exposes immediate folding opportunities when materializing 64-bit 234 // immediates. 235 static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, 236 const SIRegisterInfo *TRI, 237 const SIInstrInfo *TII, 238 MachineRegisterInfo &MRI) { 239 assert(MI.isRegSequence()); 240 241 unsigned DstReg = MI.getOperand(0).getReg(); 242 if (!TRI->isSGPRClass(MRI.getRegClass(DstReg))) 243 return false; 244 245 if (!MRI.hasOneUse(DstReg)) 246 return false; 247 248 MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg); 249 if (!CopyUse.isCopy()) 250 return false; 251 252 // It is illegal to have vreg inputs to a physreg defining reg_sequence. 253 if (TargetRegisterInfo::isPhysicalRegister(CopyUse.getOperand(0).getReg())) 254 return false; 255 256 const TargetRegisterClass *SrcRC, *DstRC; 257 std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI); 258 259 if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) 260 return false; 261 262 if (tryChangeVGPRtoSGPRinCopy(CopyUse, TRI, TII)) 263 return true; 264 265 // TODO: Could have multiple extracts? 266 unsigned SubReg = CopyUse.getOperand(1).getSubReg(); 267 if (SubReg != AMDGPU::NoSubRegister) 268 return false; 269 270 MRI.setRegClass(DstReg, DstRC); 271 272 // SGPRx = ... 273 // SGPRy = REG_SEQUENCE SGPRx, sub0 ... 274 // VGPRz = COPY SGPRy 275 276 // => 277 // VGPRx = COPY SGPRx 278 // VGPRz = REG_SEQUENCE VGPRx, sub0 279 280 MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg()); 281 bool IsAGPR = TRI->hasAGPRs(DstRC); 282 283 for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { 284 unsigned SrcReg = MI.getOperand(I).getReg(); 285 unsigned SrcSubReg = MI.getOperand(I).getSubReg(); 286 287 const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); 288 assert(TRI->isSGPRClass(SrcRC) && 289 "Expected SGPR REG_SEQUENCE to only have SGPR inputs"); 290 291 SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg); 292 const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); 293 294 unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC); 295 296 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), 297 TmpReg) 298 .add(MI.getOperand(I)); 299 300 if (IsAGPR) { 301 const TargetRegisterClass *NewSrcRC = TRI->getEquivalentAGPRClass(SrcRC); 302 unsigned TmpAReg = MRI.createVirtualRegister(NewSrcRC); 303 unsigned Opc = NewSrcRC == &AMDGPU::AGPR_32RegClass ? 304 AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::COPY; 305 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(Opc), 306 TmpAReg) 307 .addReg(TmpReg, RegState::Kill); 308 TmpReg = TmpAReg; 309 } 310 311 MI.getOperand(I).setReg(TmpReg); 312 } 313 314 CopyUse.eraseFromParent(); 315 return true; 316 } 317 318 static bool phiHasVGPROperands(const MachineInstr &PHI, 319 const MachineRegisterInfo &MRI, 320 const SIRegisterInfo *TRI, 321 const SIInstrInfo *TII) { 322 for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { 323 unsigned Reg = PHI.getOperand(i).getReg(); 324 if (TRI->hasVGPRs(MRI.getRegClass(Reg))) 325 return true; 326 } 327 return false; 328 } 329 330 static bool phiHasBreakDef(const MachineInstr &PHI, 331 const MachineRegisterInfo &MRI, 332 SmallSet<unsigned, 8> &Visited) { 333 for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { 334 unsigned Reg = PHI.getOperand(i).getReg(); 335 if (Visited.count(Reg)) 336 continue; 337 338 Visited.insert(Reg); 339 340 MachineInstr *DefInstr = MRI.getVRegDef(Reg); 341 switch (DefInstr->getOpcode()) { 342 default: 343 break; 344 case AMDGPU::SI_IF_BREAK: 345 return true; 346 case AMDGPU::PHI: 347 if (phiHasBreakDef(*DefInstr, MRI, Visited)) 348 return true; 349 } 350 } 351 return false; 352 } 353 354 static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB, 355 const TargetRegisterInfo &TRI) { 356 for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(), 357 E = MBB.end(); I != E; ++I) { 358 if (I->modifiesRegister(AMDGPU::EXEC, &TRI)) 359 return true; 360 } 361 return false; 362 } 363 364 static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy, 365 const MachineInstr *MoveImm, 366 const SIInstrInfo *TII, 367 unsigned &SMovOp, 368 int64_t &Imm) { 369 if (Copy->getOpcode() != AMDGPU::COPY) 370 return false; 371 372 if (!MoveImm->isMoveImmediate()) 373 return false; 374 375 const MachineOperand *ImmOp = 376 TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0); 377 if (!ImmOp->isImm()) 378 return false; 379 380 // FIXME: Handle copies with sub-regs. 381 if (Copy->getOperand(0).getSubReg()) 382 return false; 383 384 switch (MoveImm->getOpcode()) { 385 default: 386 return false; 387 case AMDGPU::V_MOV_B32_e32: 388 SMovOp = AMDGPU::S_MOV_B32; 389 break; 390 case AMDGPU::V_MOV_B64_PSEUDO: 391 SMovOp = AMDGPU::S_MOV_B64; 392 break; 393 } 394 Imm = ImmOp->getImm(); 395 return true; 396 } 397 398 template <class UnaryPredicate> 399 bool searchPredecessors(const MachineBasicBlock *MBB, 400 const MachineBasicBlock *CutOff, 401 UnaryPredicate Predicate) { 402 if (MBB == CutOff) 403 return false; 404 405 DenseSet<const MachineBasicBlock *> Visited; 406 SmallVector<MachineBasicBlock *, 4> Worklist(MBB->pred_begin(), 407 MBB->pred_end()); 408 409 while (!Worklist.empty()) { 410 MachineBasicBlock *MBB = Worklist.pop_back_val(); 411 412 if (!Visited.insert(MBB).second) 413 continue; 414 if (MBB == CutOff) 415 continue; 416 if (Predicate(MBB)) 417 return true; 418 419 Worklist.append(MBB->pred_begin(), MBB->pred_end()); 420 } 421 422 return false; 423 } 424 425 static bool predsHasDivergentTerminator(MachineBasicBlock *MBB, 426 const TargetRegisterInfo *TRI) { 427 return searchPredecessors(MBB, nullptr, [TRI](MachineBasicBlock *MBB) { 428 return hasTerminatorThatModifiesExec(*MBB, *TRI); }); 429 } 430 431 // Checks if there is potential path From instruction To instruction. 432 // If CutOff is specified and it sits in between of that path we ignore 433 // a higher portion of the path and report it is not reachable. 434 static bool isReachable(const MachineInstr *From, 435 const MachineInstr *To, 436 const MachineBasicBlock *CutOff, 437 MachineDominatorTree &MDT) { 438 // If either From block dominates To block or instructions are in the same 439 // block and From is higher. 440 if (MDT.dominates(From, To)) 441 return true; 442 443 const MachineBasicBlock *MBBFrom = From->getParent(); 444 const MachineBasicBlock *MBBTo = To->getParent(); 445 if (MBBFrom == MBBTo) 446 return false; 447 448 // Instructions are in different blocks, do predecessor search. 449 // We should almost never get here since we do not usually produce M0 stores 450 // other than -1. 451 return searchPredecessors(MBBTo, CutOff, [MBBFrom] 452 (const MachineBasicBlock *MBB) { return MBB == MBBFrom; }); 453 } 454 455 // Return the first non-prologue instruction in the block. 456 static MachineBasicBlock::iterator 457 getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) { 458 MachineBasicBlock::iterator I = MBB->getFirstNonPHI(); 459 while (I != MBB->end() && TII->isBasicBlockPrologue(*I)) 460 ++I; 461 462 return I; 463 } 464 465 // Hoist and merge identical SGPR initializations into a common predecessor. 466 // This is intended to combine M0 initializations, but can work with any 467 // SGPR. A VGPR cannot be processed since we cannot guarantee vector 468 // executioon. 469 static bool hoistAndMergeSGPRInits(unsigned Reg, 470 const MachineRegisterInfo &MRI, 471 MachineDominatorTree &MDT, 472 const TargetInstrInfo *TII) { 473 // List of inits by immediate value. 474 using InitListMap = std::map<unsigned, std::list<MachineInstr *>>; 475 InitListMap Inits; 476 // List of clobbering instructions. 477 SmallVector<MachineInstr*, 8> Clobbers; 478 // List of instructions marked for deletion. 479 SmallSet<MachineInstr*, 8> MergedInstrs; 480 481 bool Changed = false; 482 483 for (auto &MI : MRI.def_instructions(Reg)) { 484 MachineOperand *Imm = nullptr; 485 for (auto &MO: MI.operands()) { 486 if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) || 487 (!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) { 488 Imm = nullptr; 489 break; 490 } else if (MO.isImm()) 491 Imm = &MO; 492 } 493 if (Imm) 494 Inits[Imm->getImm()].push_front(&MI); 495 else 496 Clobbers.push_back(&MI); 497 } 498 499 for (auto &Init : Inits) { 500 auto &Defs = Init.second; 501 502 for (auto I1 = Defs.begin(), E = Defs.end(); I1 != E; ) { 503 MachineInstr *MI1 = *I1; 504 505 for (auto I2 = std::next(I1); I2 != E; ) { 506 MachineInstr *MI2 = *I2; 507 508 // Check any possible interference 509 auto interferes = [&](MachineBasicBlock::iterator From, 510 MachineBasicBlock::iterator To) -> bool { 511 512 assert(MDT.dominates(&*To, &*From)); 513 514 auto interferes = [&MDT, From, To](MachineInstr* &Clobber) -> bool { 515 const MachineBasicBlock *MBBFrom = From->getParent(); 516 const MachineBasicBlock *MBBTo = To->getParent(); 517 bool MayClobberFrom = isReachable(Clobber, &*From, MBBTo, MDT); 518 bool MayClobberTo = isReachable(Clobber, &*To, MBBTo, MDT); 519 if (!MayClobberFrom && !MayClobberTo) 520 return false; 521 if ((MayClobberFrom && !MayClobberTo) || 522 (!MayClobberFrom && MayClobberTo)) 523 return true; 524 // Both can clobber, this is not an interference only if both are 525 // dominated by Clobber and belong to the same block or if Clobber 526 // properly dominates To, given that To >> From, so it dominates 527 // both and located in a common dominator. 528 return !((MBBFrom == MBBTo && 529 MDT.dominates(Clobber, &*From) && 530 MDT.dominates(Clobber, &*To)) || 531 MDT.properlyDominates(Clobber->getParent(), MBBTo)); 532 }; 533 534 return (llvm::any_of(Clobbers, interferes)) || 535 (llvm::any_of(Inits, [&](InitListMap::value_type &C) { 536 return C.first != Init.first && 537 llvm::any_of(C.second, interferes); 538 })); 539 }; 540 541 if (MDT.dominates(MI1, MI2)) { 542 if (!interferes(MI2, MI1)) { 543 LLVM_DEBUG(dbgs() 544 << "Erasing from " 545 << printMBBReference(*MI2->getParent()) << " " << *MI2); 546 MergedInstrs.insert(MI2); 547 Changed = true; 548 ++I2; 549 continue; 550 } 551 } else if (MDT.dominates(MI2, MI1)) { 552 if (!interferes(MI1, MI2)) { 553 LLVM_DEBUG(dbgs() 554 << "Erasing from " 555 << printMBBReference(*MI1->getParent()) << " " << *MI1); 556 MergedInstrs.insert(MI1); 557 Changed = true; 558 ++I1; 559 break; 560 } 561 } else { 562 auto *MBB = MDT.findNearestCommonDominator(MI1->getParent(), 563 MI2->getParent()); 564 if (!MBB) { 565 ++I2; 566 continue; 567 } 568 569 MachineBasicBlock::iterator I = getFirstNonPrologue(MBB, TII); 570 if (!interferes(MI1, I) && !interferes(MI2, I)) { 571 LLVM_DEBUG(dbgs() 572 << "Erasing from " 573 << printMBBReference(*MI1->getParent()) << " " << *MI1 574 << "and moving from " 575 << printMBBReference(*MI2->getParent()) << " to " 576 << printMBBReference(*I->getParent()) << " " << *MI2); 577 I->getParent()->splice(I, MI2->getParent(), MI2); 578 MergedInstrs.insert(MI1); 579 Changed = true; 580 ++I1; 581 break; 582 } 583 } 584 ++I2; 585 } 586 ++I1; 587 } 588 } 589 590 for (auto MI : MergedInstrs) 591 MI->removeFromParent(); 592 593 if (Changed) 594 MRI.clearKillFlags(Reg); 595 596 return Changed; 597 } 598 599 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { 600 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 601 MachineRegisterInfo &MRI = MF.getRegInfo(); 602 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 603 const SIInstrInfo *TII = ST.getInstrInfo(); 604 MDT = &getAnalysis<MachineDominatorTree>(); 605 606 SmallVector<MachineInstr *, 16> Worklist; 607 608 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 609 BI != BE; ++BI) { 610 MachineBasicBlock &MBB = *BI; 611 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 612 I != E; ++I) { 613 MachineInstr &MI = *I; 614 615 switch (MI.getOpcode()) { 616 default: 617 continue; 618 case AMDGPU::COPY: 619 case AMDGPU::WQM: 620 case AMDGPU::WWM: { 621 // If the destination register is a physical register there isn't really 622 // much we can do to fix this. 623 if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) 624 continue; 625 626 const TargetRegisterClass *SrcRC, *DstRC; 627 std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); 628 if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { 629 unsigned SrcReg = MI.getOperand(1).getReg(); 630 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { 631 TII->moveToVALU(MI, MDT); 632 break; 633 } 634 635 MachineInstr *DefMI = MRI.getVRegDef(SrcReg); 636 unsigned SMovOp; 637 int64_t Imm; 638 // If we are just copying an immediate, we can replace the copy with 639 // s_mov_b32. 640 if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) { 641 MI.getOperand(1).ChangeToImmediate(Imm); 642 MI.addImplicitDefUseOperands(MF); 643 MI.setDesc(TII->get(SMovOp)); 644 break; 645 } 646 TII->moveToVALU(MI, MDT); 647 } else if (isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) { 648 tryChangeVGPRtoSGPRinCopy(MI, TRI, TII); 649 } 650 651 break; 652 } 653 case AMDGPU::PHI: { 654 unsigned Reg = MI.getOperand(0).getReg(); 655 if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) 656 break; 657 658 // We don't need to fix the PHI if the common dominator of the 659 // two incoming blocks terminates with a uniform branch. 660 bool HasVGPROperand = phiHasVGPROperands(MI, MRI, TRI, TII); 661 if (MI.getNumExplicitOperands() == 5 && !HasVGPROperand) { 662 MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB(); 663 MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB(); 664 665 if (!predsHasDivergentTerminator(MBB0, TRI) && 666 !predsHasDivergentTerminator(MBB1, TRI)) { 667 LLVM_DEBUG(dbgs() 668 << "Not fixing PHI for uniform branch: " << MI << '\n'); 669 break; 670 } 671 } 672 673 // If a PHI node defines an SGPR and any of its operands are VGPRs, 674 // then we need to move it to the VALU. 675 // 676 // Also, if a PHI node defines an SGPR and has all SGPR operands 677 // we must move it to the VALU, because the SGPR operands will 678 // all end up being assigned the same register, which means 679 // there is a potential for a conflict if different threads take 680 // different control flow paths. 681 // 682 // For Example: 683 // 684 // sgpr0 = def; 685 // ... 686 // sgpr1 = def; 687 // ... 688 // sgpr2 = PHI sgpr0, sgpr1 689 // use sgpr2; 690 // 691 // Will Become: 692 // 693 // sgpr2 = def; 694 // ... 695 // sgpr2 = def; 696 // ... 697 // use sgpr2 698 // 699 // The one exception to this rule is when one of the operands 700 // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK 701 // instruction. In this case, there we know the program will 702 // never enter the second block (the loop) without entering 703 // the first block (where the condition is computed), so there 704 // is no chance for values to be over-written. 705 706 SmallSet<unsigned, 8> Visited; 707 if (HasVGPROperand || !phiHasBreakDef(MI, MRI, Visited)) { 708 LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI); 709 TII->moveToVALU(MI, MDT); 710 } 711 712 break; 713 } 714 case AMDGPU::REG_SEQUENCE: 715 if (TRI->hasVectorRegisters(TII->getOpRegClass(MI, 0)) || 716 !hasVectorOperands(MI, TRI)) { 717 foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI); 718 continue; 719 } 720 721 LLVM_DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI); 722 723 TII->moveToVALU(MI, MDT); 724 break; 725 case AMDGPU::INSERT_SUBREG: { 726 const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; 727 DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); 728 Src0RC = MRI.getRegClass(MI.getOperand(1).getReg()); 729 Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); 730 if (TRI->isSGPRClass(DstRC) && 731 (TRI->hasVectorRegisters(Src0RC) || 732 TRI->hasVectorRegisters(Src1RC))) { 733 LLVM_DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI); 734 TII->moveToVALU(MI, MDT); 735 } 736 break; 737 } 738 } 739 } 740 } 741 742 if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge) 743 hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII); 744 745 return true; 746 } 747