1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass compute turns all control flow pseudo instructions into native one 11 /// computing their address on the fly; it also sets STACK_SIZE info. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 17 #include "R600MachineFunctionInfo.h" 18 #include "R600Subtarget.h" 19 #include <set> 20 21 using namespace llvm; 22 23 #define DEBUG_TYPE "r600cf" 24 25 namespace { 26 27 struct CFStack { 28 enum StackItem { 29 ENTRY = 0, 30 SUB_ENTRY = 1, 31 FIRST_NON_WQM_PUSH = 2, 32 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 33 }; 34 35 const R600Subtarget *ST; 36 std::vector<StackItem> BranchStack; 37 std::vector<StackItem> LoopStack; 38 unsigned MaxStackSize; 39 unsigned CurrentEntries = 0; 40 unsigned CurrentSubEntries = 0; 41 42 CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st), 43 // We need to reserve a stack entry for CALL_FS in vertex shaders. 44 MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {} 45 46 unsigned getLoopDepth(); 47 bool branchStackContains(CFStack::StackItem); 48 bool requiresWorkAroundForInst(unsigned Opcode); 49 unsigned getSubEntrySize(CFStack::StackItem Item); 50 void updateMaxStackSize(); 51 void pushBranch(unsigned Opcode, bool isWQM = false); 52 void pushLoop(); 53 void popBranch(); 54 void popLoop(); 55 }; 56 57 unsigned CFStack::getLoopDepth() { 58 return LoopStack.size(); 59 } 60 61 bool CFStack::branchStackContains(CFStack::StackItem Item) { 62 return llvm::is_contained(BranchStack, Item); 63 } 64 65 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { 66 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && 67 getLoopDepth() > 1) 68 return true; 69 70 if (!ST->hasCFAluBug()) 71 return false; 72 73 switch(Opcode) { 74 default: return false; 75 case R600::CF_ALU_PUSH_BEFORE: 76 case R600::CF_ALU_ELSE_AFTER: 77 case R600::CF_ALU_BREAK: 78 case R600::CF_ALU_CONTINUE: 79 if (CurrentSubEntries == 0) 80 return false; 81 if (ST->getWavefrontSize() == 64) { 82 // We are being conservative here. We only require this work-around if 83 // CurrentSubEntries > 3 && 84 // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) 85 // 86 // We have to be conservative, because we don't know for certain that 87 // our stack allocation algorithm for Evergreen/NI is correct. Applying this 88 // work-around when CurrentSubEntries > 3 allows us to over-allocate stack 89 // resources without any problems. 90 return CurrentSubEntries > 3; 91 } else { 92 assert(ST->getWavefrontSize() == 32); 93 // We are being conservative here. We only require the work-around if 94 // CurrentSubEntries > 7 && 95 // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) 96 // See the comment on the wavefront size == 64 case for why we are 97 // being conservative. 98 return CurrentSubEntries > 7; 99 } 100 } 101 } 102 103 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { 104 switch(Item) { 105 default: 106 return 0; 107 case CFStack::FIRST_NON_WQM_PUSH: 108 assert(!ST->hasCaymanISA()); 109 if (ST->getGeneration() <= AMDGPUSubtarget::R700) { 110 // +1 For the push operation. 111 // +2 Extra space required. 112 return 3; 113 } else { 114 // Some documentation says that this is not necessary on Evergreen, 115 // but experimentation has show that we need to allocate 1 extra 116 // sub-entry for the first non-WQM push. 117 // +1 For the push operation. 118 // +1 Extra space required. 119 return 2; 120 } 121 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: 122 assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 123 // +1 For the push operation. 124 // +1 Extra space required. 125 return 2; 126 case CFStack::SUB_ENTRY: 127 return 1; 128 } 129 } 130 131 void CFStack::updateMaxStackSize() { 132 unsigned CurrentStackSize = CurrentEntries + divideCeil(CurrentSubEntries, 4); 133 MaxStackSize = std::max(CurrentStackSize, MaxStackSize); 134 } 135 136 void CFStack::pushBranch(unsigned Opcode, bool isWQM) { 137 CFStack::StackItem Item = CFStack::ENTRY; 138 switch(Opcode) { 139 case R600::CF_PUSH_EG: 140 case R600::CF_ALU_PUSH_BEFORE: 141 if (!isWQM) { 142 if (!ST->hasCaymanISA() && 143 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) 144 Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI 145 // See comment in 146 // CFStack::getSubEntrySize() 147 else if (CurrentEntries > 0 && 148 ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && 149 !ST->hasCaymanISA() && 150 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) 151 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; 152 else 153 Item = CFStack::SUB_ENTRY; 154 } else 155 Item = CFStack::ENTRY; 156 break; 157 } 158 BranchStack.push_back(Item); 159 if (Item == CFStack::ENTRY) 160 CurrentEntries++; 161 else 162 CurrentSubEntries += getSubEntrySize(Item); 163 updateMaxStackSize(); 164 } 165 166 void CFStack::pushLoop() { 167 LoopStack.push_back(CFStack::ENTRY); 168 CurrentEntries++; 169 updateMaxStackSize(); 170 } 171 172 void CFStack::popBranch() { 173 CFStack::StackItem Top = BranchStack.back(); 174 if (Top == CFStack::ENTRY) 175 CurrentEntries--; 176 else 177 CurrentSubEntries-= getSubEntrySize(Top); 178 BranchStack.pop_back(); 179 } 180 181 void CFStack::popLoop() { 182 CurrentEntries--; 183 LoopStack.pop_back(); 184 } 185 186 class R600ControlFlowFinalizer : public MachineFunctionPass { 187 private: 188 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>; 189 190 enum ControlFlowInstruction { 191 CF_TC, 192 CF_VC, 193 CF_CALL_FS, 194 CF_WHILE_LOOP, 195 CF_END_LOOP, 196 CF_LOOP_BREAK, 197 CF_LOOP_CONTINUE, 198 CF_JUMP, 199 CF_ELSE, 200 CF_POP, 201 CF_END 202 }; 203 204 const R600InstrInfo *TII = nullptr; 205 const R600RegisterInfo *TRI = nullptr; 206 unsigned MaxFetchInst; 207 const R600Subtarget *ST = nullptr; 208 209 bool IsTrivialInst(MachineInstr &MI) const { 210 switch (MI.getOpcode()) { 211 case R600::KILL: 212 case R600::RETURN: 213 return true; 214 default: 215 return false; 216 } 217 } 218 219 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { 220 unsigned Opcode = 0; 221 bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 222 switch (CFI) { 223 case CF_TC: 224 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; 225 break; 226 case CF_VC: 227 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600; 228 break; 229 case CF_CALL_FS: 230 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600; 231 break; 232 case CF_WHILE_LOOP: 233 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600; 234 break; 235 case CF_END_LOOP: 236 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600; 237 break; 238 case CF_LOOP_BREAK: 239 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600; 240 break; 241 case CF_LOOP_CONTINUE: 242 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600; 243 break; 244 case CF_JUMP: 245 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600; 246 break; 247 case CF_ELSE: 248 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600; 249 break; 250 case CF_POP: 251 Opcode = isEg ? R600::POP_EG : R600::POP_R600; 252 break; 253 case CF_END: 254 if (ST->hasCaymanISA()) { 255 Opcode = R600::CF_END_CM; 256 break; 257 } 258 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600; 259 break; 260 } 261 assert (Opcode && "No opcode selected"); 262 return TII->get(Opcode); 263 } 264 265 bool isCompatibleWithClause(const MachineInstr &MI, 266 std::set<unsigned> &DstRegs) const { 267 unsigned DstMI, SrcMI; 268 for (MachineInstr::const_mop_iterator I = MI.operands_begin(), 269 E = MI.operands_end(); 270 I != E; ++I) { 271 const MachineOperand &MO = *I; 272 if (!MO.isReg()) 273 continue; 274 if (MO.isDef()) { 275 Register Reg = MO.getReg(); 276 if (R600::R600_Reg128RegClass.contains(Reg)) 277 DstMI = Reg; 278 else 279 DstMI = TRI->getMatchingSuperReg(Reg, 280 R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 281 &R600::R600_Reg128RegClass); 282 } 283 if (MO.isUse()) { 284 Register Reg = MO.getReg(); 285 if (R600::R600_Reg128RegClass.contains(Reg)) 286 SrcMI = Reg; 287 else 288 SrcMI = TRI->getMatchingSuperReg(Reg, 289 R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 290 &R600::R600_Reg128RegClass); 291 } 292 } 293 if ((DstRegs.find(SrcMI) == DstRegs.end())) { 294 DstRegs.insert(DstMI); 295 return true; 296 } else 297 return false; 298 } 299 300 ClauseFile 301 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 302 const { 303 MachineBasicBlock::iterator ClauseHead = I; 304 std::vector<MachineInstr *> ClauseContent; 305 unsigned AluInstCount = 0; 306 bool IsTex = TII->usesTextureCache(*ClauseHead); 307 std::set<unsigned> DstRegs; 308 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 309 if (IsTrivialInst(*I)) 310 continue; 311 if (AluInstCount >= MaxFetchInst) 312 break; 313 if ((IsTex && !TII->usesTextureCache(*I)) || 314 (!IsTex && !TII->usesVertexCache(*I))) 315 break; 316 if (!isCompatibleWithClause(*I, DstRegs)) 317 break; 318 AluInstCount ++; 319 ClauseContent.push_back(&*I); 320 } 321 MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), 322 getHWInstrDesc(IsTex?CF_TC:CF_VC)) 323 .addImm(0) // ADDR 324 .addImm(AluInstCount - 1); // COUNT 325 return ClauseFile(MIb, std::move(ClauseContent)); 326 } 327 328 void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const { 329 static const unsigned LiteralRegs[] = { 330 R600::ALU_LITERAL_X, 331 R600::ALU_LITERAL_Y, 332 R600::ALU_LITERAL_Z, 333 R600::ALU_LITERAL_W 334 }; 335 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = 336 TII->getSrcs(MI); 337 for (const auto &Src:Srcs) { 338 if (Src.first->getReg() != R600::ALU_LITERAL_X) 339 continue; 340 int64_t Imm = Src.second; 341 std::vector<MachineOperand *>::iterator It = 342 llvm::find_if(Lits, [&](MachineOperand *val) { 343 return val->isImm() && (val->getImm() == Imm); 344 }); 345 346 // Get corresponding Operand 347 MachineOperand &Operand = MI.getOperand( 348 TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal)); 349 350 if (It != Lits.end()) { 351 // Reuse existing literal reg 352 unsigned Index = It - Lits.begin(); 353 Src.first->setReg(LiteralRegs[Index]); 354 } else { 355 // Allocate new literal reg 356 assert(Lits.size() < 4 && "Too many literals in Instruction Group"); 357 Src.first->setReg(LiteralRegs[Lits.size()]); 358 Lits.push_back(&Operand); 359 } 360 } 361 } 362 363 MachineBasicBlock::iterator insertLiterals( 364 MachineBasicBlock::iterator InsertPos, 365 const std::vector<unsigned> &Literals) const { 366 MachineBasicBlock *MBB = InsertPos->getParent(); 367 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 368 unsigned LiteralPair0 = Literals[i]; 369 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; 370 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), 371 TII->get(R600::LITERALS)) 372 .addImm(LiteralPair0) 373 .addImm(LiteralPair1); 374 } 375 return InsertPos; 376 } 377 378 ClauseFile 379 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 380 const { 381 MachineInstr &ClauseHead = *I; 382 std::vector<MachineInstr *> ClauseContent; 383 I++; 384 for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { 385 if (IsTrivialInst(*I)) { 386 ++I; 387 continue; 388 } 389 if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) 390 break; 391 std::vector<MachineOperand *>Literals; 392 if (I->isBundle()) { 393 MachineInstr &DeleteMI = *I; 394 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 395 while (++BI != E && BI->isBundledWithPred()) { 396 BI->unbundleFromPred(); 397 for (MachineOperand &MO : BI->operands()) { 398 if (MO.isReg() && MO.isInternalRead()) 399 MO.setIsInternalRead(false); 400 } 401 getLiteral(*BI, Literals); 402 ClauseContent.push_back(&*BI); 403 } 404 I = BI; 405 DeleteMI.eraseFromParent(); 406 } else { 407 getLiteral(*I, Literals); 408 ClauseContent.push_back(&*I); 409 I++; 410 } 411 for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { 412 MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), 413 TII->get(R600::LITERALS)); 414 if (Literals[i]->isImm()) { 415 MILit.addImm(Literals[i]->getImm()); 416 } else { 417 MILit.addGlobalAddress(Literals[i]->getGlobal(), 418 Literals[i]->getOffset()); 419 } 420 if (i + 1 < e) { 421 if (Literals[i + 1]->isImm()) { 422 MILit.addImm(Literals[i + 1]->getImm()); 423 } else { 424 MILit.addGlobalAddress(Literals[i + 1]->getGlobal(), 425 Literals[i + 1]->getOffset()); 426 } 427 } else 428 MILit.addImm(0); 429 ClauseContent.push_back(MILit); 430 } 431 } 432 assert(ClauseContent.size() < 128 && "ALU clause is too big"); 433 ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1); 434 return ClauseFile(&ClauseHead, std::move(ClauseContent)); 435 } 436 437 void EmitFetchClause(MachineBasicBlock::iterator InsertPos, 438 const DebugLoc &DL, ClauseFile &Clause, 439 unsigned &CfCount) { 440 CounterPropagateAddr(*Clause.first, CfCount); 441 MachineBasicBlock *BB = Clause.first->getParent(); 442 BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount); 443 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 444 BB->splice(InsertPos, BB, Clause.second[i]); 445 } 446 CfCount += 2 * Clause.second.size(); 447 } 448 449 void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL, 450 ClauseFile &Clause, unsigned &CfCount) { 451 Clause.first->getOperand(0).setImm(0); 452 CounterPropagateAddr(*Clause.first, CfCount); 453 MachineBasicBlock *BB = Clause.first->getParent(); 454 BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount); 455 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 456 BB->splice(InsertPos, BB, Clause.second[i]); 457 } 458 CfCount += Clause.second.size(); 459 } 460 461 void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const { 462 MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm()); 463 } 464 void CounterPropagateAddr(const std::set<MachineInstr *> &MIs, 465 unsigned Addr) const { 466 for (MachineInstr *MI : MIs) { 467 CounterPropagateAddr(*MI, Addr); 468 } 469 } 470 471 public: 472 static char ID; 473 474 R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} 475 476 bool runOnMachineFunction(MachineFunction &MF) override { 477 ST = &MF.getSubtarget<R600Subtarget>(); 478 MaxFetchInst = ST->getTexVTXClauseSize(); 479 TII = ST->getInstrInfo(); 480 TRI = ST->getRegisterInfo(); 481 482 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 483 484 CFStack CFStack(ST, MF.getFunction().getCallingConv()); 485 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; 486 ++MB) { 487 MachineBasicBlock &MBB = *MB; 488 unsigned CfCount = 0; 489 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack; 490 std::vector<MachineInstr * > IfThenElseStack; 491 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { 492 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 493 getHWInstrDesc(CF_CALL_FS)); 494 CfCount++; 495 } 496 std::vector<ClauseFile> FetchClauses, AluClauses; 497 std::vector<MachineInstr *> LastAlu(1); 498 std::vector<MachineInstr *> ToPopAfter; 499 500 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 501 I != E;) { 502 if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { 503 LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); 504 FetchClauses.push_back(MakeFetchClause(MBB, I)); 505 CfCount++; 506 LastAlu.back() = nullptr; 507 continue; 508 } 509 510 MachineBasicBlock::iterator MI = I; 511 if (MI->getOpcode() != R600::ENDIF) 512 LastAlu.back() = nullptr; 513 if (MI->getOpcode() == R600::CF_ALU) 514 LastAlu.back() = &*MI; 515 I++; 516 bool RequiresWorkAround = 517 CFStack.requiresWorkAroundForInst(MI->getOpcode()); 518 switch (MI->getOpcode()) { 519 case R600::CF_ALU_PUSH_BEFORE: 520 if (RequiresWorkAround) { 521 LLVM_DEBUG(dbgs() 522 << "Applying bug work-around for ALU_PUSH_BEFORE\n"); 523 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) 524 .addImm(CfCount + 1) 525 .addImm(1); 526 MI->setDesc(TII->get(R600::CF_ALU)); 527 CfCount++; 528 CFStack.pushBranch(R600::CF_PUSH_EG); 529 } else 530 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); 531 LLVM_FALLTHROUGH; 532 case R600::CF_ALU: 533 I = MI; 534 AluClauses.push_back(MakeALUClause(MBB, I)); 535 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 536 CfCount++; 537 break; 538 case R600::WHILELOOP: { 539 CFStack.pushLoop(); 540 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 541 getHWInstrDesc(CF_WHILE_LOOP)) 542 .addImm(1); 543 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount, 544 std::set<MachineInstr *>()); 545 Pair.second.insert(MIb); 546 LoopStack.push_back(std::move(Pair)); 547 MI->eraseFromParent(); 548 CfCount++; 549 break; 550 } 551 case R600::ENDLOOP: { 552 CFStack.popLoop(); 553 std::pair<unsigned, std::set<MachineInstr *>> Pair = 554 std::move(LoopStack.back()); 555 LoopStack.pop_back(); 556 CounterPropagateAddr(Pair.second, CfCount); 557 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) 558 .addImm(Pair.first + 1); 559 MI->eraseFromParent(); 560 CfCount++; 561 break; 562 } 563 case R600::IF_PREDICATE_SET: { 564 LastAlu.push_back(nullptr); 565 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 566 getHWInstrDesc(CF_JUMP)) 567 .addImm(0) 568 .addImm(0); 569 IfThenElseStack.push_back(MIb); 570 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 571 MI->eraseFromParent(); 572 CfCount++; 573 break; 574 } 575 case R600::ELSE: { 576 MachineInstr * JumpInst = IfThenElseStack.back(); 577 IfThenElseStack.pop_back(); 578 CounterPropagateAddr(*JumpInst, CfCount); 579 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 580 getHWInstrDesc(CF_ELSE)) 581 .addImm(0) 582 .addImm(0); 583 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 584 IfThenElseStack.push_back(MIb); 585 MI->eraseFromParent(); 586 CfCount++; 587 break; 588 } 589 case R600::ENDIF: { 590 CFStack.popBranch(); 591 if (LastAlu.back()) { 592 ToPopAfter.push_back(LastAlu.back()); 593 } else { 594 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 595 getHWInstrDesc(CF_POP)) 596 .addImm(CfCount + 1) 597 .addImm(1); 598 (void)MIb; 599 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 600 CfCount++; 601 } 602 603 MachineInstr *IfOrElseInst = IfThenElseStack.back(); 604 IfThenElseStack.pop_back(); 605 CounterPropagateAddr(*IfOrElseInst, CfCount); 606 IfOrElseInst->getOperand(1).setImm(1); 607 LastAlu.pop_back(); 608 MI->eraseFromParent(); 609 break; 610 } 611 case R600::BREAK: { 612 CfCount ++; 613 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 614 getHWInstrDesc(CF_LOOP_BREAK)) 615 .addImm(0); 616 LoopStack.back().second.insert(MIb); 617 MI->eraseFromParent(); 618 break; 619 } 620 case R600::CONTINUE: { 621 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 622 getHWInstrDesc(CF_LOOP_CONTINUE)) 623 .addImm(0); 624 LoopStack.back().second.insert(MIb); 625 MI->eraseFromParent(); 626 CfCount++; 627 break; 628 } 629 case R600::RETURN: { 630 DebugLoc DL = MBB.findDebugLoc(MI); 631 BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END)); 632 CfCount++; 633 if (CfCount % 2) { 634 BuildMI(MBB, I, DL, TII->get(R600::PAD)); 635 CfCount++; 636 } 637 MI->eraseFromParent(); 638 for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) 639 EmitFetchClause(I, DL, FetchClauses[i], CfCount); 640 for (unsigned i = 0, e = AluClauses.size(); i < e; i++) 641 EmitALUClause(I, DL, AluClauses[i], CfCount); 642 break; 643 } 644 default: 645 if (TII->isExport(MI->getOpcode())) { 646 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 647 CfCount++; 648 } 649 break; 650 } 651 } 652 for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { 653 MachineInstr *Alu = ToPopAfter[i]; 654 BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), 655 TII->get(R600::CF_ALU_POP_AFTER)) 656 .addImm(Alu->getOperand(0).getImm()) 657 .addImm(Alu->getOperand(1).getImm()) 658 .addImm(Alu->getOperand(2).getImm()) 659 .addImm(Alu->getOperand(3).getImm()) 660 .addImm(Alu->getOperand(4).getImm()) 661 .addImm(Alu->getOperand(5).getImm()) 662 .addImm(Alu->getOperand(6).getImm()) 663 .addImm(Alu->getOperand(7).getImm()) 664 .addImm(Alu->getOperand(8).getImm()); 665 Alu->eraseFromParent(); 666 } 667 MFI->CFStackSize = CFStack.MaxStackSize; 668 } 669 670 return false; 671 } 672 673 StringRef getPassName() const override { 674 return "R600 Control Flow Finalizer Pass"; 675 } 676 }; 677 678 } // end anonymous namespace 679 680 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, 681 "R600 Control Flow Finalizer", false, false) 682 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE, 683 "R600 Control Flow Finalizer", false, false) 684 685 char R600ControlFlowFinalizer::ID = 0; 686 687 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID; 688 689 FunctionPass *llvm::createR600ControlFlowFinalizer() { 690 return new R600ControlFlowFinalizer(); 691 } 692