1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass compute turns all control flow pseudo instructions into native one 11 /// computing their address on the fly; it also sets STACK_SIZE info. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "MCTargetDesc/R600MCTargetDesc.h" 16 #include "R600.h" 17 #include "R600MachineFunctionInfo.h" 18 #include "R600Subtarget.h" 19 #include "llvm/CodeGen/MachineFunctionPass.h" 20 #include <set> 21 22 using namespace llvm; 23 24 #define DEBUG_TYPE "r600cf" 25 26 namespace { 27 28 struct CFStack { 29 enum StackItem { 30 ENTRY = 0, 31 SUB_ENTRY = 1, 32 FIRST_NON_WQM_PUSH = 2, 33 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 34 }; 35 36 const R600Subtarget *ST; 37 std::vector<StackItem> BranchStack; 38 std::vector<StackItem> LoopStack; 39 unsigned MaxStackSize; 40 unsigned CurrentEntries = 0; 41 unsigned CurrentSubEntries = 0; 42 43 CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st), 44 // We need to reserve a stack entry for CALL_FS in vertex shaders. 45 MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {} 46 47 unsigned getLoopDepth(); 48 bool branchStackContains(CFStack::StackItem); 49 bool requiresWorkAroundForInst(unsigned Opcode); 50 unsigned getSubEntrySize(CFStack::StackItem Item); 51 void updateMaxStackSize(); 52 void pushBranch(unsigned Opcode, bool isWQM = false); 53 void pushLoop(); 54 void popBranch(); 55 void popLoop(); 56 }; 57 58 unsigned CFStack::getLoopDepth() { 59 return LoopStack.size(); 60 } 61 62 bool CFStack::branchStackContains(CFStack::StackItem Item) { 63 return llvm::is_contained(BranchStack, Item); 64 } 65 66 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { 67 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && 68 getLoopDepth() > 1) 69 return true; 70 71 if (!ST->hasCFAluBug()) 72 return false; 73 74 switch(Opcode) { 75 default: return false; 76 case R600::CF_ALU_PUSH_BEFORE: 77 case R600::CF_ALU_ELSE_AFTER: 78 case R600::CF_ALU_BREAK: 79 case R600::CF_ALU_CONTINUE: 80 if (CurrentSubEntries == 0) 81 return false; 82 if (ST->getWavefrontSize() == 64) { 83 // We are being conservative here. We only require this work-around if 84 // CurrentSubEntries > 3 && 85 // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) 86 // 87 // We have to be conservative, because we don't know for certain that 88 // our stack allocation algorithm for Evergreen/NI is correct. Applying this 89 // work-around when CurrentSubEntries > 3 allows us to over-allocate stack 90 // resources without any problems. 91 return CurrentSubEntries > 3; 92 } 93 assert(ST->getWavefrontSize() == 32); 94 // We are being conservative here. We only require the work-around if 95 // CurrentSubEntries > 7 && 96 // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) 97 // See the comment on the wavefront size == 64 case for why we are 98 // being conservative. 99 return CurrentSubEntries > 7; 100 } 101 } 102 103 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { 104 switch(Item) { 105 default: 106 return 0; 107 case CFStack::FIRST_NON_WQM_PUSH: 108 assert(!ST->hasCaymanISA()); 109 if (ST->getGeneration() <= AMDGPUSubtarget::R700) { 110 // +1 For the push operation. 111 // +2 Extra space required. 112 return 3; 113 } 114 // Some documentation says that this is not necessary on Evergreen, 115 // but experimentation has show that we need to allocate 1 extra 116 // sub-entry for the first non-WQM push. 117 // +1 For the push operation. 118 // +1 Extra space required. 119 return 2; 120 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: 121 assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 122 // +1 For the push operation. 123 // +1 Extra space required. 124 return 2; 125 case CFStack::SUB_ENTRY: 126 return 1; 127 } 128 } 129 130 void CFStack::updateMaxStackSize() { 131 unsigned CurrentStackSize = CurrentEntries + divideCeil(CurrentSubEntries, 4); 132 MaxStackSize = std::max(CurrentStackSize, MaxStackSize); 133 } 134 135 void CFStack::pushBranch(unsigned Opcode, bool isWQM) { 136 CFStack::StackItem Item = CFStack::ENTRY; 137 switch(Opcode) { 138 case R600::CF_PUSH_EG: 139 case R600::CF_ALU_PUSH_BEFORE: 140 if (!isWQM) { 141 if (!ST->hasCaymanISA() && 142 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) 143 Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI 144 // See comment in 145 // CFStack::getSubEntrySize() 146 else if (CurrentEntries > 0 && 147 ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && 148 !ST->hasCaymanISA() && 149 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) 150 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; 151 else 152 Item = CFStack::SUB_ENTRY; 153 } else 154 Item = CFStack::ENTRY; 155 break; 156 } 157 BranchStack.push_back(Item); 158 if (Item == CFStack::ENTRY) 159 CurrentEntries++; 160 else 161 CurrentSubEntries += getSubEntrySize(Item); 162 updateMaxStackSize(); 163 } 164 165 void CFStack::pushLoop() { 166 LoopStack.push_back(CFStack::ENTRY); 167 CurrentEntries++; 168 updateMaxStackSize(); 169 } 170 171 void CFStack::popBranch() { 172 CFStack::StackItem Top = BranchStack.back(); 173 if (Top == CFStack::ENTRY) 174 CurrentEntries--; 175 else 176 CurrentSubEntries-= getSubEntrySize(Top); 177 BranchStack.pop_back(); 178 } 179 180 void CFStack::popLoop() { 181 CurrentEntries--; 182 LoopStack.pop_back(); 183 } 184 185 class R600ControlFlowFinalizer : public MachineFunctionPass { 186 private: 187 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>; 188 189 enum ControlFlowInstruction { 190 CF_TC, 191 CF_VC, 192 CF_CALL_FS, 193 CF_WHILE_LOOP, 194 CF_END_LOOP, 195 CF_LOOP_BREAK, 196 CF_LOOP_CONTINUE, 197 CF_JUMP, 198 CF_ELSE, 199 CF_POP, 200 CF_END 201 }; 202 203 const R600InstrInfo *TII = nullptr; 204 const R600RegisterInfo *TRI = nullptr; 205 unsigned MaxFetchInst; 206 const R600Subtarget *ST = nullptr; 207 208 bool IsTrivialInst(MachineInstr &MI) const { 209 switch (MI.getOpcode()) { 210 case R600::KILL: 211 case R600::RETURN: 212 return true; 213 default: 214 return false; 215 } 216 } 217 218 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { 219 unsigned Opcode = 0; 220 bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 221 switch (CFI) { 222 case CF_TC: 223 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; 224 break; 225 case CF_VC: 226 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600; 227 break; 228 case CF_CALL_FS: 229 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600; 230 break; 231 case CF_WHILE_LOOP: 232 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600; 233 break; 234 case CF_END_LOOP: 235 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600; 236 break; 237 case CF_LOOP_BREAK: 238 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600; 239 break; 240 case CF_LOOP_CONTINUE: 241 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600; 242 break; 243 case CF_JUMP: 244 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600; 245 break; 246 case CF_ELSE: 247 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600; 248 break; 249 case CF_POP: 250 Opcode = isEg ? R600::POP_EG : R600::POP_R600; 251 break; 252 case CF_END: 253 if (ST->hasCaymanISA()) { 254 Opcode = R600::CF_END_CM; 255 break; 256 } 257 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600; 258 break; 259 } 260 assert (Opcode && "No opcode selected"); 261 return TII->get(Opcode); 262 } 263 264 bool isCompatibleWithClause(const MachineInstr &MI, 265 std::set<unsigned> &DstRegs) const { 266 unsigned DstMI, SrcMI; 267 for (MachineInstr::const_mop_iterator I = MI.operands_begin(), 268 E = MI.operands_end(); 269 I != E; ++I) { 270 const MachineOperand &MO = *I; 271 if (!MO.isReg()) 272 continue; 273 if (MO.isDef()) { 274 Register Reg = MO.getReg(); 275 if (R600::R600_Reg128RegClass.contains(Reg)) 276 DstMI = Reg; 277 else 278 DstMI = TRI->getMatchingSuperReg(Reg, 279 R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 280 &R600::R600_Reg128RegClass); 281 } 282 if (MO.isUse()) { 283 Register Reg = MO.getReg(); 284 if (R600::R600_Reg128RegClass.contains(Reg)) 285 SrcMI = Reg; 286 else 287 SrcMI = TRI->getMatchingSuperReg(Reg, 288 R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 289 &R600::R600_Reg128RegClass); 290 } 291 } 292 if ((DstRegs.find(SrcMI) == DstRegs.end())) { 293 DstRegs.insert(DstMI); 294 return true; 295 } 296 return false; 297 } 298 299 ClauseFile 300 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 301 const { 302 MachineBasicBlock::iterator ClauseHead = I; 303 std::vector<MachineInstr *> ClauseContent; 304 unsigned AluInstCount = 0; 305 bool IsTex = TII->usesTextureCache(*ClauseHead); 306 std::set<unsigned> DstRegs; 307 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 308 if (IsTrivialInst(*I)) 309 continue; 310 if (AluInstCount >= MaxFetchInst) 311 break; 312 if ((IsTex && !TII->usesTextureCache(*I)) || 313 (!IsTex && !TII->usesVertexCache(*I))) 314 break; 315 if (!isCompatibleWithClause(*I, DstRegs)) 316 break; 317 AluInstCount ++; 318 ClauseContent.push_back(&*I); 319 } 320 MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), 321 getHWInstrDesc(IsTex?CF_TC:CF_VC)) 322 .addImm(0) // ADDR 323 .addImm(AluInstCount - 1); // COUNT 324 return ClauseFile(MIb, std::move(ClauseContent)); 325 } 326 327 void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const { 328 static const unsigned LiteralRegs[] = { 329 R600::ALU_LITERAL_X, 330 R600::ALU_LITERAL_Y, 331 R600::ALU_LITERAL_Z, 332 R600::ALU_LITERAL_W 333 }; 334 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = 335 TII->getSrcs(MI); 336 for (const auto &Src:Srcs) { 337 if (Src.first->getReg() != R600::ALU_LITERAL_X) 338 continue; 339 int64_t Imm = Src.second; 340 std::vector<MachineOperand *>::iterator It = 341 llvm::find_if(Lits, [&](MachineOperand *val) { 342 return val->isImm() && (val->getImm() == Imm); 343 }); 344 345 // Get corresponding Operand 346 MachineOperand &Operand = MI.getOperand( 347 TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal)); 348 349 if (It != Lits.end()) { 350 // Reuse existing literal reg 351 unsigned Index = It - Lits.begin(); 352 Src.first->setReg(LiteralRegs[Index]); 353 } else { 354 // Allocate new literal reg 355 assert(Lits.size() < 4 && "Too many literals in Instruction Group"); 356 Src.first->setReg(LiteralRegs[Lits.size()]); 357 Lits.push_back(&Operand); 358 } 359 } 360 } 361 362 MachineBasicBlock::iterator insertLiterals( 363 MachineBasicBlock::iterator InsertPos, 364 const std::vector<unsigned> &Literals) const { 365 MachineBasicBlock *MBB = InsertPos->getParent(); 366 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 367 unsigned LiteralPair0 = Literals[i]; 368 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; 369 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), 370 TII->get(R600::LITERALS)) 371 .addImm(LiteralPair0) 372 .addImm(LiteralPair1); 373 } 374 return InsertPos; 375 } 376 377 ClauseFile 378 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 379 const { 380 MachineInstr &ClauseHead = *I; 381 std::vector<MachineInstr *> ClauseContent; 382 I++; 383 for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { 384 if (IsTrivialInst(*I)) { 385 ++I; 386 continue; 387 } 388 if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) 389 break; 390 std::vector<MachineOperand *>Literals; 391 if (I->isBundle()) { 392 MachineInstr &DeleteMI = *I; 393 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 394 while (++BI != E && BI->isBundledWithPred()) { 395 BI->unbundleFromPred(); 396 for (MachineOperand &MO : BI->operands()) { 397 if (MO.isReg() && MO.isInternalRead()) 398 MO.setIsInternalRead(false); 399 } 400 getLiteral(*BI, Literals); 401 ClauseContent.push_back(&*BI); 402 } 403 I = BI; 404 DeleteMI.eraseFromParent(); 405 } else { 406 getLiteral(*I, Literals); 407 ClauseContent.push_back(&*I); 408 I++; 409 } 410 for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { 411 MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), 412 TII->get(R600::LITERALS)); 413 if (Literals[i]->isImm()) { 414 MILit.addImm(Literals[i]->getImm()); 415 } else { 416 MILit.addGlobalAddress(Literals[i]->getGlobal(), 417 Literals[i]->getOffset()); 418 } 419 if (i + 1 < e) { 420 if (Literals[i + 1]->isImm()) { 421 MILit.addImm(Literals[i + 1]->getImm()); 422 } else { 423 MILit.addGlobalAddress(Literals[i + 1]->getGlobal(), 424 Literals[i + 1]->getOffset()); 425 } 426 } else 427 MILit.addImm(0); 428 ClauseContent.push_back(MILit); 429 } 430 } 431 assert(ClauseContent.size() < 128 && "ALU clause is too big"); 432 ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1); 433 return ClauseFile(&ClauseHead, std::move(ClauseContent)); 434 } 435 436 void EmitFetchClause(MachineBasicBlock::iterator InsertPos, 437 const DebugLoc &DL, ClauseFile &Clause, 438 unsigned &CfCount) { 439 CounterPropagateAddr(*Clause.first, CfCount); 440 MachineBasicBlock *BB = Clause.first->getParent(); 441 BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount); 442 for (MachineInstr *MI : Clause.second) 443 BB->splice(InsertPos, BB, MI); 444 CfCount += 2 * Clause.second.size(); 445 } 446 447 void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL, 448 ClauseFile &Clause, unsigned &CfCount) { 449 Clause.first->getOperand(0).setImm(0); 450 CounterPropagateAddr(*Clause.first, CfCount); 451 MachineBasicBlock *BB = Clause.first->getParent(); 452 BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount); 453 for (MachineInstr *MI : Clause.second) 454 BB->splice(InsertPos, BB, MI); 455 CfCount += Clause.second.size(); 456 } 457 458 void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const { 459 MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm()); 460 } 461 void CounterPropagateAddr(const std::set<MachineInstr *> &MIs, 462 unsigned Addr) const { 463 for (MachineInstr *MI : MIs) { 464 CounterPropagateAddr(*MI, Addr); 465 } 466 } 467 468 public: 469 static char ID; 470 471 R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} 472 473 bool runOnMachineFunction(MachineFunction &MF) override { 474 ST = &MF.getSubtarget<R600Subtarget>(); 475 MaxFetchInst = ST->getTexVTXClauseSize(); 476 TII = ST->getInstrInfo(); 477 TRI = ST->getRegisterInfo(); 478 479 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 480 481 CFStack CFStack(ST, MF.getFunction().getCallingConv()); 482 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; 483 ++MB) { 484 MachineBasicBlock &MBB = *MB; 485 unsigned CfCount = 0; 486 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack; 487 std::vector<MachineInstr * > IfThenElseStack; 488 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { 489 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 490 getHWInstrDesc(CF_CALL_FS)); 491 CfCount++; 492 } 493 std::vector<ClauseFile> FetchClauses, AluClauses; 494 std::vector<MachineInstr *> LastAlu(1); 495 std::vector<MachineInstr *> ToPopAfter; 496 497 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 498 I != E;) { 499 if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { 500 LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); 501 FetchClauses.push_back(MakeFetchClause(MBB, I)); 502 CfCount++; 503 LastAlu.back() = nullptr; 504 continue; 505 } 506 507 MachineBasicBlock::iterator MI = I; 508 if (MI->getOpcode() != R600::ENDIF) 509 LastAlu.back() = nullptr; 510 if (MI->getOpcode() == R600::CF_ALU) 511 LastAlu.back() = &*MI; 512 I++; 513 bool RequiresWorkAround = 514 CFStack.requiresWorkAroundForInst(MI->getOpcode()); 515 switch (MI->getOpcode()) { 516 case R600::CF_ALU_PUSH_BEFORE: 517 if (RequiresWorkAround) { 518 LLVM_DEBUG(dbgs() 519 << "Applying bug work-around for ALU_PUSH_BEFORE\n"); 520 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) 521 .addImm(CfCount + 1) 522 .addImm(1); 523 MI->setDesc(TII->get(R600::CF_ALU)); 524 CfCount++; 525 CFStack.pushBranch(R600::CF_PUSH_EG); 526 } else 527 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); 528 [[fallthrough]]; 529 case R600::CF_ALU: 530 I = MI; 531 AluClauses.push_back(MakeALUClause(MBB, I)); 532 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 533 CfCount++; 534 break; 535 case R600::WHILELOOP: { 536 CFStack.pushLoop(); 537 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 538 getHWInstrDesc(CF_WHILE_LOOP)) 539 .addImm(1); 540 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount, 541 std::set<MachineInstr *>()); 542 Pair.second.insert(MIb); 543 LoopStack.push_back(std::move(Pair)); 544 MI->eraseFromParent(); 545 CfCount++; 546 break; 547 } 548 case R600::ENDLOOP: { 549 CFStack.popLoop(); 550 std::pair<unsigned, std::set<MachineInstr *>> Pair = 551 std::move(LoopStack.back()); 552 LoopStack.pop_back(); 553 CounterPropagateAddr(Pair.second, CfCount); 554 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) 555 .addImm(Pair.first + 1); 556 MI->eraseFromParent(); 557 CfCount++; 558 break; 559 } 560 case R600::IF_PREDICATE_SET: { 561 LastAlu.push_back(nullptr); 562 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 563 getHWInstrDesc(CF_JUMP)) 564 .addImm(0) 565 .addImm(0); 566 IfThenElseStack.push_back(MIb); 567 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 568 MI->eraseFromParent(); 569 CfCount++; 570 break; 571 } 572 case R600::ELSE: { 573 MachineInstr * JumpInst = IfThenElseStack.back(); 574 IfThenElseStack.pop_back(); 575 CounterPropagateAddr(*JumpInst, CfCount); 576 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 577 getHWInstrDesc(CF_ELSE)) 578 .addImm(0) 579 .addImm(0); 580 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 581 IfThenElseStack.push_back(MIb); 582 MI->eraseFromParent(); 583 CfCount++; 584 break; 585 } 586 case R600::ENDIF: { 587 CFStack.popBranch(); 588 if (LastAlu.back()) { 589 ToPopAfter.push_back(LastAlu.back()); 590 } else { 591 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 592 getHWInstrDesc(CF_POP)) 593 .addImm(CfCount + 1) 594 .addImm(1); 595 (void)MIb; 596 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 597 CfCount++; 598 } 599 600 MachineInstr *IfOrElseInst = IfThenElseStack.back(); 601 IfThenElseStack.pop_back(); 602 CounterPropagateAddr(*IfOrElseInst, CfCount); 603 IfOrElseInst->getOperand(1).setImm(1); 604 LastAlu.pop_back(); 605 MI->eraseFromParent(); 606 break; 607 } 608 case R600::BREAK: { 609 CfCount ++; 610 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 611 getHWInstrDesc(CF_LOOP_BREAK)) 612 .addImm(0); 613 LoopStack.back().second.insert(MIb); 614 MI->eraseFromParent(); 615 break; 616 } 617 case R600::CONTINUE: { 618 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 619 getHWInstrDesc(CF_LOOP_CONTINUE)) 620 .addImm(0); 621 LoopStack.back().second.insert(MIb); 622 MI->eraseFromParent(); 623 CfCount++; 624 break; 625 } 626 case R600::RETURN: { 627 DebugLoc DL = MBB.findDebugLoc(MI); 628 BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END)); 629 CfCount++; 630 if (CfCount % 2) { 631 BuildMI(MBB, I, DL, TII->get(R600::PAD)); 632 CfCount++; 633 } 634 MI->eraseFromParent(); 635 for (ClauseFile &CF : FetchClauses) 636 EmitFetchClause(I, DL, CF, CfCount); 637 for (ClauseFile &CF : AluClauses) 638 EmitALUClause(I, DL, CF, CfCount); 639 break; 640 } 641 default: 642 if (TII->isExport(MI->getOpcode())) { 643 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 644 CfCount++; 645 } 646 break; 647 } 648 } 649 for (MachineInstr *Alu : ToPopAfter) { 650 BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), 651 TII->get(R600::CF_ALU_POP_AFTER)) 652 .addImm(Alu->getOperand(0).getImm()) 653 .addImm(Alu->getOperand(1).getImm()) 654 .addImm(Alu->getOperand(2).getImm()) 655 .addImm(Alu->getOperand(3).getImm()) 656 .addImm(Alu->getOperand(4).getImm()) 657 .addImm(Alu->getOperand(5).getImm()) 658 .addImm(Alu->getOperand(6).getImm()) 659 .addImm(Alu->getOperand(7).getImm()) 660 .addImm(Alu->getOperand(8).getImm()); 661 Alu->eraseFromParent(); 662 } 663 MFI->CFStackSize = CFStack.MaxStackSize; 664 } 665 666 return false; 667 } 668 669 StringRef getPassName() const override { 670 return "R600 Control Flow Finalizer Pass"; 671 } 672 }; 673 674 } // end anonymous namespace 675 676 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, 677 "R600 Control Flow Finalizer", false, false) 678 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE, 679 "R600 Control Flow Finalizer", false, false) 680 681 char R600ControlFlowFinalizer::ID = 0; 682 683 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID; 684 685 FunctionPass *llvm::createR600ControlFlowFinalizer() { 686 return new R600ControlFlowFinalizer(); 687 } 688