1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass compute turns all control flow pseudo instructions into native one 11 /// computing their address on the fly; it also sets STACK_SIZE info. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "MCTargetDesc/R600MCTargetDesc.h" 16 #include "R600.h" 17 #include "R600MachineFunctionInfo.h" 18 #include "R600Subtarget.h" 19 #include "llvm/CodeGen/MachineFunctionPass.h" 20 #include <set> 21 22 using namespace llvm; 23 24 #define DEBUG_TYPE "r600cf" 25 26 namespace { 27 28 struct CFStack { 29 enum StackItem { 30 ENTRY = 0, 31 SUB_ENTRY = 1, 32 FIRST_NON_WQM_PUSH = 2, 33 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 34 }; 35 36 const R600Subtarget *ST; 37 std::vector<StackItem> BranchStack; 38 std::vector<StackItem> LoopStack; 39 unsigned MaxStackSize; 40 unsigned CurrentEntries = 0; 41 unsigned CurrentSubEntries = 0; 42 43 CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st), 44 // We need to reserve a stack entry for CALL_FS in vertex shaders. 45 MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {} 46 47 unsigned getLoopDepth(); 48 bool branchStackContains(CFStack::StackItem); 49 bool requiresWorkAroundForInst(unsigned Opcode); 50 unsigned getSubEntrySize(CFStack::StackItem Item); 51 void updateMaxStackSize(); 52 void pushBranch(unsigned Opcode, bool isWQM = false); 53 void pushLoop(); 54 void popBranch(); 55 void popLoop(); 56 }; 57 58 unsigned CFStack::getLoopDepth() { 59 return LoopStack.size(); 60 } 61 62 bool CFStack::branchStackContains(CFStack::StackItem Item) { 63 return llvm::is_contained(BranchStack, Item); 64 } 65 66 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { 67 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && 68 getLoopDepth() > 1) 69 return true; 70 71 if (!ST->hasCFAluBug()) 72 return false; 73 74 switch(Opcode) { 75 default: return false; 76 case R600::CF_ALU_PUSH_BEFORE: 77 case R600::CF_ALU_ELSE_AFTER: 78 case R600::CF_ALU_BREAK: 79 case R600::CF_ALU_CONTINUE: 80 if (CurrentSubEntries == 0) 81 return false; 82 if (ST->getWavefrontSize() == 64) { 83 // We are being conservative here. We only require this work-around if 84 // CurrentSubEntries > 3 && 85 // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) 86 // 87 // We have to be conservative, because we don't know for certain that 88 // our stack allocation algorithm for Evergreen/NI is correct. Applying this 89 // work-around when CurrentSubEntries > 3 allows us to over-allocate stack 90 // resources without any problems. 91 return CurrentSubEntries > 3; 92 } else { 93 assert(ST->getWavefrontSize() == 32); 94 // We are being conservative here. We only require the work-around if 95 // CurrentSubEntries > 7 && 96 // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) 97 // See the comment on the wavefront size == 64 case for why we are 98 // being conservative. 99 return CurrentSubEntries > 7; 100 } 101 } 102 } 103 104 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { 105 switch(Item) { 106 default: 107 return 0; 108 case CFStack::FIRST_NON_WQM_PUSH: 109 assert(!ST->hasCaymanISA()); 110 if (ST->getGeneration() <= AMDGPUSubtarget::R700) { 111 // +1 For the push operation. 112 // +2 Extra space required. 113 return 3; 114 } else { 115 // Some documentation says that this is not necessary on Evergreen, 116 // but experimentation has show that we need to allocate 1 extra 117 // sub-entry for the first non-WQM push. 118 // +1 For the push operation. 119 // +1 Extra space required. 120 return 2; 121 } 122 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: 123 assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 124 // +1 For the push operation. 125 // +1 Extra space required. 126 return 2; 127 case CFStack::SUB_ENTRY: 128 return 1; 129 } 130 } 131 132 void CFStack::updateMaxStackSize() { 133 unsigned CurrentStackSize = CurrentEntries + divideCeil(CurrentSubEntries, 4); 134 MaxStackSize = std::max(CurrentStackSize, MaxStackSize); 135 } 136 137 void CFStack::pushBranch(unsigned Opcode, bool isWQM) { 138 CFStack::StackItem Item = CFStack::ENTRY; 139 switch(Opcode) { 140 case R600::CF_PUSH_EG: 141 case R600::CF_ALU_PUSH_BEFORE: 142 if (!isWQM) { 143 if (!ST->hasCaymanISA() && 144 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) 145 Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI 146 // See comment in 147 // CFStack::getSubEntrySize() 148 else if (CurrentEntries > 0 && 149 ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && 150 !ST->hasCaymanISA() && 151 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) 152 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; 153 else 154 Item = CFStack::SUB_ENTRY; 155 } else 156 Item = CFStack::ENTRY; 157 break; 158 } 159 BranchStack.push_back(Item); 160 if (Item == CFStack::ENTRY) 161 CurrentEntries++; 162 else 163 CurrentSubEntries += getSubEntrySize(Item); 164 updateMaxStackSize(); 165 } 166 167 void CFStack::pushLoop() { 168 LoopStack.push_back(CFStack::ENTRY); 169 CurrentEntries++; 170 updateMaxStackSize(); 171 } 172 173 void CFStack::popBranch() { 174 CFStack::StackItem Top = BranchStack.back(); 175 if (Top == CFStack::ENTRY) 176 CurrentEntries--; 177 else 178 CurrentSubEntries-= getSubEntrySize(Top); 179 BranchStack.pop_back(); 180 } 181 182 void CFStack::popLoop() { 183 CurrentEntries--; 184 LoopStack.pop_back(); 185 } 186 187 class R600ControlFlowFinalizer : public MachineFunctionPass { 188 private: 189 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>; 190 191 enum ControlFlowInstruction { 192 CF_TC, 193 CF_VC, 194 CF_CALL_FS, 195 CF_WHILE_LOOP, 196 CF_END_LOOP, 197 CF_LOOP_BREAK, 198 CF_LOOP_CONTINUE, 199 CF_JUMP, 200 CF_ELSE, 201 CF_POP, 202 CF_END 203 }; 204 205 const R600InstrInfo *TII = nullptr; 206 const R600RegisterInfo *TRI = nullptr; 207 unsigned MaxFetchInst; 208 const R600Subtarget *ST = nullptr; 209 210 bool IsTrivialInst(MachineInstr &MI) const { 211 switch (MI.getOpcode()) { 212 case R600::KILL: 213 case R600::RETURN: 214 return true; 215 default: 216 return false; 217 } 218 } 219 220 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { 221 unsigned Opcode = 0; 222 bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 223 switch (CFI) { 224 case CF_TC: 225 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; 226 break; 227 case CF_VC: 228 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600; 229 break; 230 case CF_CALL_FS: 231 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600; 232 break; 233 case CF_WHILE_LOOP: 234 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600; 235 break; 236 case CF_END_LOOP: 237 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600; 238 break; 239 case CF_LOOP_BREAK: 240 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600; 241 break; 242 case CF_LOOP_CONTINUE: 243 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600; 244 break; 245 case CF_JUMP: 246 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600; 247 break; 248 case CF_ELSE: 249 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600; 250 break; 251 case CF_POP: 252 Opcode = isEg ? R600::POP_EG : R600::POP_R600; 253 break; 254 case CF_END: 255 if (ST->hasCaymanISA()) { 256 Opcode = R600::CF_END_CM; 257 break; 258 } 259 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600; 260 break; 261 } 262 assert (Opcode && "No opcode selected"); 263 return TII->get(Opcode); 264 } 265 266 bool isCompatibleWithClause(const MachineInstr &MI, 267 std::set<unsigned> &DstRegs) const { 268 unsigned DstMI, SrcMI; 269 for (MachineInstr::const_mop_iterator I = MI.operands_begin(), 270 E = MI.operands_end(); 271 I != E; ++I) { 272 const MachineOperand &MO = *I; 273 if (!MO.isReg()) 274 continue; 275 if (MO.isDef()) { 276 Register Reg = MO.getReg(); 277 if (R600::R600_Reg128RegClass.contains(Reg)) 278 DstMI = Reg; 279 else 280 DstMI = TRI->getMatchingSuperReg(Reg, 281 R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 282 &R600::R600_Reg128RegClass); 283 } 284 if (MO.isUse()) { 285 Register Reg = MO.getReg(); 286 if (R600::R600_Reg128RegClass.contains(Reg)) 287 SrcMI = Reg; 288 else 289 SrcMI = TRI->getMatchingSuperReg(Reg, 290 R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 291 &R600::R600_Reg128RegClass); 292 } 293 } 294 if ((DstRegs.find(SrcMI) == DstRegs.end())) { 295 DstRegs.insert(DstMI); 296 return true; 297 } else 298 return false; 299 } 300 301 ClauseFile 302 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 303 const { 304 MachineBasicBlock::iterator ClauseHead = I; 305 std::vector<MachineInstr *> ClauseContent; 306 unsigned AluInstCount = 0; 307 bool IsTex = TII->usesTextureCache(*ClauseHead); 308 std::set<unsigned> DstRegs; 309 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 310 if (IsTrivialInst(*I)) 311 continue; 312 if (AluInstCount >= MaxFetchInst) 313 break; 314 if ((IsTex && !TII->usesTextureCache(*I)) || 315 (!IsTex && !TII->usesVertexCache(*I))) 316 break; 317 if (!isCompatibleWithClause(*I, DstRegs)) 318 break; 319 AluInstCount ++; 320 ClauseContent.push_back(&*I); 321 } 322 MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), 323 getHWInstrDesc(IsTex?CF_TC:CF_VC)) 324 .addImm(0) // ADDR 325 .addImm(AluInstCount - 1); // COUNT 326 return ClauseFile(MIb, std::move(ClauseContent)); 327 } 328 329 void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const { 330 static const unsigned LiteralRegs[] = { 331 R600::ALU_LITERAL_X, 332 R600::ALU_LITERAL_Y, 333 R600::ALU_LITERAL_Z, 334 R600::ALU_LITERAL_W 335 }; 336 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = 337 TII->getSrcs(MI); 338 for (const auto &Src:Srcs) { 339 if (Src.first->getReg() != R600::ALU_LITERAL_X) 340 continue; 341 int64_t Imm = Src.second; 342 std::vector<MachineOperand *>::iterator It = 343 llvm::find_if(Lits, [&](MachineOperand *val) { 344 return val->isImm() && (val->getImm() == Imm); 345 }); 346 347 // Get corresponding Operand 348 MachineOperand &Operand = MI.getOperand( 349 TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal)); 350 351 if (It != Lits.end()) { 352 // Reuse existing literal reg 353 unsigned Index = It - Lits.begin(); 354 Src.first->setReg(LiteralRegs[Index]); 355 } else { 356 // Allocate new literal reg 357 assert(Lits.size() < 4 && "Too many literals in Instruction Group"); 358 Src.first->setReg(LiteralRegs[Lits.size()]); 359 Lits.push_back(&Operand); 360 } 361 } 362 } 363 364 MachineBasicBlock::iterator insertLiterals( 365 MachineBasicBlock::iterator InsertPos, 366 const std::vector<unsigned> &Literals) const { 367 MachineBasicBlock *MBB = InsertPos->getParent(); 368 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 369 unsigned LiteralPair0 = Literals[i]; 370 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; 371 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), 372 TII->get(R600::LITERALS)) 373 .addImm(LiteralPair0) 374 .addImm(LiteralPair1); 375 } 376 return InsertPos; 377 } 378 379 ClauseFile 380 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 381 const { 382 MachineInstr &ClauseHead = *I; 383 std::vector<MachineInstr *> ClauseContent; 384 I++; 385 for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { 386 if (IsTrivialInst(*I)) { 387 ++I; 388 continue; 389 } 390 if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) 391 break; 392 std::vector<MachineOperand *>Literals; 393 if (I->isBundle()) { 394 MachineInstr &DeleteMI = *I; 395 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 396 while (++BI != E && BI->isBundledWithPred()) { 397 BI->unbundleFromPred(); 398 for (MachineOperand &MO : BI->operands()) { 399 if (MO.isReg() && MO.isInternalRead()) 400 MO.setIsInternalRead(false); 401 } 402 getLiteral(*BI, Literals); 403 ClauseContent.push_back(&*BI); 404 } 405 I = BI; 406 DeleteMI.eraseFromParent(); 407 } else { 408 getLiteral(*I, Literals); 409 ClauseContent.push_back(&*I); 410 I++; 411 } 412 for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { 413 MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), 414 TII->get(R600::LITERALS)); 415 if (Literals[i]->isImm()) { 416 MILit.addImm(Literals[i]->getImm()); 417 } else { 418 MILit.addGlobalAddress(Literals[i]->getGlobal(), 419 Literals[i]->getOffset()); 420 } 421 if (i + 1 < e) { 422 if (Literals[i + 1]->isImm()) { 423 MILit.addImm(Literals[i + 1]->getImm()); 424 } else { 425 MILit.addGlobalAddress(Literals[i + 1]->getGlobal(), 426 Literals[i + 1]->getOffset()); 427 } 428 } else 429 MILit.addImm(0); 430 ClauseContent.push_back(MILit); 431 } 432 } 433 assert(ClauseContent.size() < 128 && "ALU clause is too big"); 434 ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1); 435 return ClauseFile(&ClauseHead, std::move(ClauseContent)); 436 } 437 438 void EmitFetchClause(MachineBasicBlock::iterator InsertPos, 439 const DebugLoc &DL, ClauseFile &Clause, 440 unsigned &CfCount) { 441 CounterPropagateAddr(*Clause.first, CfCount); 442 MachineBasicBlock *BB = Clause.first->getParent(); 443 BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount); 444 for (MachineInstr *MI : Clause.second) 445 BB->splice(InsertPos, BB, MI); 446 CfCount += 2 * Clause.second.size(); 447 } 448 449 void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL, 450 ClauseFile &Clause, unsigned &CfCount) { 451 Clause.first->getOperand(0).setImm(0); 452 CounterPropagateAddr(*Clause.first, CfCount); 453 MachineBasicBlock *BB = Clause.first->getParent(); 454 BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount); 455 for (MachineInstr *MI : Clause.second) 456 BB->splice(InsertPos, BB, MI); 457 CfCount += Clause.second.size(); 458 } 459 460 void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const { 461 MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm()); 462 } 463 void CounterPropagateAddr(const std::set<MachineInstr *> &MIs, 464 unsigned Addr) const { 465 for (MachineInstr *MI : MIs) { 466 CounterPropagateAddr(*MI, Addr); 467 } 468 } 469 470 public: 471 static char ID; 472 473 R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} 474 475 bool runOnMachineFunction(MachineFunction &MF) override { 476 ST = &MF.getSubtarget<R600Subtarget>(); 477 MaxFetchInst = ST->getTexVTXClauseSize(); 478 TII = ST->getInstrInfo(); 479 TRI = ST->getRegisterInfo(); 480 481 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 482 483 CFStack CFStack(ST, MF.getFunction().getCallingConv()); 484 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; 485 ++MB) { 486 MachineBasicBlock &MBB = *MB; 487 unsigned CfCount = 0; 488 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack; 489 std::vector<MachineInstr * > IfThenElseStack; 490 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { 491 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 492 getHWInstrDesc(CF_CALL_FS)); 493 CfCount++; 494 } 495 std::vector<ClauseFile> FetchClauses, AluClauses; 496 std::vector<MachineInstr *> LastAlu(1); 497 std::vector<MachineInstr *> ToPopAfter; 498 499 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 500 I != E;) { 501 if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { 502 LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); 503 FetchClauses.push_back(MakeFetchClause(MBB, I)); 504 CfCount++; 505 LastAlu.back() = nullptr; 506 continue; 507 } 508 509 MachineBasicBlock::iterator MI = I; 510 if (MI->getOpcode() != R600::ENDIF) 511 LastAlu.back() = nullptr; 512 if (MI->getOpcode() == R600::CF_ALU) 513 LastAlu.back() = &*MI; 514 I++; 515 bool RequiresWorkAround = 516 CFStack.requiresWorkAroundForInst(MI->getOpcode()); 517 switch (MI->getOpcode()) { 518 case R600::CF_ALU_PUSH_BEFORE: 519 if (RequiresWorkAround) { 520 LLVM_DEBUG(dbgs() 521 << "Applying bug work-around for ALU_PUSH_BEFORE\n"); 522 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) 523 .addImm(CfCount + 1) 524 .addImm(1); 525 MI->setDesc(TII->get(R600::CF_ALU)); 526 CfCount++; 527 CFStack.pushBranch(R600::CF_PUSH_EG); 528 } else 529 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); 530 LLVM_FALLTHROUGH; 531 case R600::CF_ALU: 532 I = MI; 533 AluClauses.push_back(MakeALUClause(MBB, I)); 534 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 535 CfCount++; 536 break; 537 case R600::WHILELOOP: { 538 CFStack.pushLoop(); 539 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 540 getHWInstrDesc(CF_WHILE_LOOP)) 541 .addImm(1); 542 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount, 543 std::set<MachineInstr *>()); 544 Pair.second.insert(MIb); 545 LoopStack.push_back(std::move(Pair)); 546 MI->eraseFromParent(); 547 CfCount++; 548 break; 549 } 550 case R600::ENDLOOP: { 551 CFStack.popLoop(); 552 std::pair<unsigned, std::set<MachineInstr *>> Pair = 553 std::move(LoopStack.back()); 554 LoopStack.pop_back(); 555 CounterPropagateAddr(Pair.second, CfCount); 556 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) 557 .addImm(Pair.first + 1); 558 MI->eraseFromParent(); 559 CfCount++; 560 break; 561 } 562 case R600::IF_PREDICATE_SET: { 563 LastAlu.push_back(nullptr); 564 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 565 getHWInstrDesc(CF_JUMP)) 566 .addImm(0) 567 .addImm(0); 568 IfThenElseStack.push_back(MIb); 569 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 570 MI->eraseFromParent(); 571 CfCount++; 572 break; 573 } 574 case R600::ELSE: { 575 MachineInstr * JumpInst = IfThenElseStack.back(); 576 IfThenElseStack.pop_back(); 577 CounterPropagateAddr(*JumpInst, CfCount); 578 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 579 getHWInstrDesc(CF_ELSE)) 580 .addImm(0) 581 .addImm(0); 582 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 583 IfThenElseStack.push_back(MIb); 584 MI->eraseFromParent(); 585 CfCount++; 586 break; 587 } 588 case R600::ENDIF: { 589 CFStack.popBranch(); 590 if (LastAlu.back()) { 591 ToPopAfter.push_back(LastAlu.back()); 592 } else { 593 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 594 getHWInstrDesc(CF_POP)) 595 .addImm(CfCount + 1) 596 .addImm(1); 597 (void)MIb; 598 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 599 CfCount++; 600 } 601 602 MachineInstr *IfOrElseInst = IfThenElseStack.back(); 603 IfThenElseStack.pop_back(); 604 CounterPropagateAddr(*IfOrElseInst, CfCount); 605 IfOrElseInst->getOperand(1).setImm(1); 606 LastAlu.pop_back(); 607 MI->eraseFromParent(); 608 break; 609 } 610 case R600::BREAK: { 611 CfCount ++; 612 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 613 getHWInstrDesc(CF_LOOP_BREAK)) 614 .addImm(0); 615 LoopStack.back().second.insert(MIb); 616 MI->eraseFromParent(); 617 break; 618 } 619 case R600::CONTINUE: { 620 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 621 getHWInstrDesc(CF_LOOP_CONTINUE)) 622 .addImm(0); 623 LoopStack.back().second.insert(MIb); 624 MI->eraseFromParent(); 625 CfCount++; 626 break; 627 } 628 case R600::RETURN: { 629 DebugLoc DL = MBB.findDebugLoc(MI); 630 BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END)); 631 CfCount++; 632 if (CfCount % 2) { 633 BuildMI(MBB, I, DL, TII->get(R600::PAD)); 634 CfCount++; 635 } 636 MI->eraseFromParent(); 637 for (ClauseFile &CF : FetchClauses) 638 EmitFetchClause(I, DL, CF, CfCount); 639 for (ClauseFile &CF : AluClauses) 640 EmitALUClause(I, DL, CF, CfCount); 641 break; 642 } 643 default: 644 if (TII->isExport(MI->getOpcode())) { 645 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 646 CfCount++; 647 } 648 break; 649 } 650 } 651 for (MachineInstr *Alu : ToPopAfter) { 652 BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), 653 TII->get(R600::CF_ALU_POP_AFTER)) 654 .addImm(Alu->getOperand(0).getImm()) 655 .addImm(Alu->getOperand(1).getImm()) 656 .addImm(Alu->getOperand(2).getImm()) 657 .addImm(Alu->getOperand(3).getImm()) 658 .addImm(Alu->getOperand(4).getImm()) 659 .addImm(Alu->getOperand(5).getImm()) 660 .addImm(Alu->getOperand(6).getImm()) 661 .addImm(Alu->getOperand(7).getImm()) 662 .addImm(Alu->getOperand(8).getImm()); 663 Alu->eraseFromParent(); 664 } 665 MFI->CFStackSize = CFStack.MaxStackSize; 666 } 667 668 return false; 669 } 670 671 StringRef getPassName() const override { 672 return "R600 Control Flow Finalizer Pass"; 673 } 674 }; 675 676 } // end anonymous namespace 677 678 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, 679 "R600 Control Flow Finalizer", false, false) 680 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE, 681 "R600 Control Flow Finalizer", false, false) 682 683 char R600ControlFlowFinalizer::ID = 0; 684 685 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID; 686 687 FunctionPass *llvm::createR600ControlFlowFinalizer() { 688 return new R600ControlFlowFinalizer(); 689 } 690