1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass compute turns all control flow pseudo instructions into native one 11 /// computing their address on the fly; it also sets STACK_SIZE info. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUSubtarget.h" 17 #include "R600Defines.h" 18 #include "R600InstrInfo.h" 19 #include "R600MachineFunctionInfo.h" 20 #include "R600RegisterInfo.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineOperand.h" 31 #include "llvm/IR/CallingConv.h" 32 #include "llvm/IR/DebugLoc.h" 33 #include "llvm/IR/Function.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Support/Compiler.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/MathExtras.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <algorithm> 40 #include <cassert> 41 #include <cstdint> 42 #include <set> 43 #include <utility> 44 #include <vector> 45 46 using namespace llvm; 47 48 #define DEBUG_TYPE "r600cf" 49 50 namespace { 51 52 struct CFStack { 53 enum StackItem { 54 ENTRY = 0, 55 SUB_ENTRY = 1, 56 FIRST_NON_WQM_PUSH = 2, 57 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 58 }; 59 60 const R600Subtarget *ST; 61 std::vector<StackItem> BranchStack; 62 std::vector<StackItem> LoopStack; 63 unsigned MaxStackSize; 64 unsigned CurrentEntries = 0; 65 unsigned CurrentSubEntries = 0; 66 67 CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st), 68 // We need to reserve a stack entry for CALL_FS in vertex shaders. 69 MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {} 70 71 unsigned getLoopDepth(); 72 bool branchStackContains(CFStack::StackItem); 73 bool requiresWorkAroundForInst(unsigned Opcode); 74 unsigned getSubEntrySize(CFStack::StackItem Item); 75 void updateMaxStackSize(); 76 void pushBranch(unsigned Opcode, bool isWQM = false); 77 void pushLoop(); 78 void popBranch(); 79 void popLoop(); 80 }; 81 82 unsigned CFStack::getLoopDepth() { 83 return LoopStack.size(); 84 } 85 86 bool CFStack::branchStackContains(CFStack::StackItem Item) { 87 for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(), 88 E = BranchStack.end(); I != E; ++I) { 89 if (*I == Item) 90 return true; 91 } 92 return false; 93 } 94 95 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { 96 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && 97 getLoopDepth() > 1) 98 return true; 99 100 if (!ST->hasCFAluBug()) 101 return false; 102 103 switch(Opcode) { 104 default: return false; 105 case R600::CF_ALU_PUSH_BEFORE: 106 case R600::CF_ALU_ELSE_AFTER: 107 case R600::CF_ALU_BREAK: 108 case R600::CF_ALU_CONTINUE: 109 if (CurrentSubEntries == 0) 110 return false; 111 if (ST->getWavefrontSize() == 64) { 112 // We are being conservative here. We only require this work-around if 113 // CurrentSubEntries > 3 && 114 // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) 115 // 116 // We have to be conservative, because we don't know for certain that 117 // our stack allocation algorithm for Evergreen/NI is correct. Applying this 118 // work-around when CurrentSubEntries > 3 allows us to over-allocate stack 119 // resources without any problems. 120 return CurrentSubEntries > 3; 121 } else { 122 assert(ST->getWavefrontSize() == 32); 123 // We are being conservative here. We only require the work-around if 124 // CurrentSubEntries > 7 && 125 // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) 126 // See the comment on the wavefront size == 64 case for why we are 127 // being conservative. 128 return CurrentSubEntries > 7; 129 } 130 } 131 } 132 133 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { 134 switch(Item) { 135 default: 136 return 0; 137 case CFStack::FIRST_NON_WQM_PUSH: 138 assert(!ST->hasCaymanISA()); 139 if (ST->getGeneration() <= AMDGPUSubtarget::R700) { 140 // +1 For the push operation. 141 // +2 Extra space required. 142 return 3; 143 } else { 144 // Some documentation says that this is not necessary on Evergreen, 145 // but experimentation has show that we need to allocate 1 extra 146 // sub-entry for the first non-WQM push. 147 // +1 For the push operation. 148 // +1 Extra space required. 149 return 2; 150 } 151 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: 152 assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 153 // +1 For the push operation. 154 // +1 Extra space required. 155 return 2; 156 case CFStack::SUB_ENTRY: 157 return 1; 158 } 159 } 160 161 void CFStack::updateMaxStackSize() { 162 unsigned CurrentStackSize = CurrentEntries + divideCeil(CurrentSubEntries, 4); 163 MaxStackSize = std::max(CurrentStackSize, MaxStackSize); 164 } 165 166 void CFStack::pushBranch(unsigned Opcode, bool isWQM) { 167 CFStack::StackItem Item = CFStack::ENTRY; 168 switch(Opcode) { 169 case R600::CF_PUSH_EG: 170 case R600::CF_ALU_PUSH_BEFORE: 171 if (!isWQM) { 172 if (!ST->hasCaymanISA() && 173 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) 174 Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI 175 // See comment in 176 // CFStack::getSubEntrySize() 177 else if (CurrentEntries > 0 && 178 ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && 179 !ST->hasCaymanISA() && 180 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) 181 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; 182 else 183 Item = CFStack::SUB_ENTRY; 184 } else 185 Item = CFStack::ENTRY; 186 break; 187 } 188 BranchStack.push_back(Item); 189 if (Item == CFStack::ENTRY) 190 CurrentEntries++; 191 else 192 CurrentSubEntries += getSubEntrySize(Item); 193 updateMaxStackSize(); 194 } 195 196 void CFStack::pushLoop() { 197 LoopStack.push_back(CFStack::ENTRY); 198 CurrentEntries++; 199 updateMaxStackSize(); 200 } 201 202 void CFStack::popBranch() { 203 CFStack::StackItem Top = BranchStack.back(); 204 if (Top == CFStack::ENTRY) 205 CurrentEntries--; 206 else 207 CurrentSubEntries-= getSubEntrySize(Top); 208 BranchStack.pop_back(); 209 } 210 211 void CFStack::popLoop() { 212 CurrentEntries--; 213 LoopStack.pop_back(); 214 } 215 216 class R600ControlFlowFinalizer : public MachineFunctionPass { 217 private: 218 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>; 219 220 enum ControlFlowInstruction { 221 CF_TC, 222 CF_VC, 223 CF_CALL_FS, 224 CF_WHILE_LOOP, 225 CF_END_LOOP, 226 CF_LOOP_BREAK, 227 CF_LOOP_CONTINUE, 228 CF_JUMP, 229 CF_ELSE, 230 CF_POP, 231 CF_END 232 }; 233 234 const R600InstrInfo *TII = nullptr; 235 const R600RegisterInfo *TRI = nullptr; 236 unsigned MaxFetchInst; 237 const R600Subtarget *ST = nullptr; 238 239 bool IsTrivialInst(MachineInstr &MI) const { 240 switch (MI.getOpcode()) { 241 case R600::KILL: 242 case R600::RETURN: 243 return true; 244 default: 245 return false; 246 } 247 } 248 249 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { 250 unsigned Opcode = 0; 251 bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 252 switch (CFI) { 253 case CF_TC: 254 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; 255 break; 256 case CF_VC: 257 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600; 258 break; 259 case CF_CALL_FS: 260 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600; 261 break; 262 case CF_WHILE_LOOP: 263 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600; 264 break; 265 case CF_END_LOOP: 266 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600; 267 break; 268 case CF_LOOP_BREAK: 269 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600; 270 break; 271 case CF_LOOP_CONTINUE: 272 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600; 273 break; 274 case CF_JUMP: 275 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600; 276 break; 277 case CF_ELSE: 278 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600; 279 break; 280 case CF_POP: 281 Opcode = isEg ? R600::POP_EG : R600::POP_R600; 282 break; 283 case CF_END: 284 if (ST->hasCaymanISA()) { 285 Opcode = R600::CF_END_CM; 286 break; 287 } 288 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600; 289 break; 290 } 291 assert (Opcode && "No opcode selected"); 292 return TII->get(Opcode); 293 } 294 295 bool isCompatibleWithClause(const MachineInstr &MI, 296 std::set<unsigned> &DstRegs) const { 297 unsigned DstMI, SrcMI; 298 for (MachineInstr::const_mop_iterator I = MI.operands_begin(), 299 E = MI.operands_end(); 300 I != E; ++I) { 301 const MachineOperand &MO = *I; 302 if (!MO.isReg()) 303 continue; 304 if (MO.isDef()) { 305 Register Reg = MO.getReg(); 306 if (R600::R600_Reg128RegClass.contains(Reg)) 307 DstMI = Reg; 308 else 309 DstMI = TRI->getMatchingSuperReg(Reg, 310 R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 311 &R600::R600_Reg128RegClass); 312 } 313 if (MO.isUse()) { 314 Register Reg = MO.getReg(); 315 if (R600::R600_Reg128RegClass.contains(Reg)) 316 SrcMI = Reg; 317 else 318 SrcMI = TRI->getMatchingSuperReg(Reg, 319 R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 320 &R600::R600_Reg128RegClass); 321 } 322 } 323 if ((DstRegs.find(SrcMI) == DstRegs.end())) { 324 DstRegs.insert(DstMI); 325 return true; 326 } else 327 return false; 328 } 329 330 ClauseFile 331 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 332 const { 333 MachineBasicBlock::iterator ClauseHead = I; 334 std::vector<MachineInstr *> ClauseContent; 335 unsigned AluInstCount = 0; 336 bool IsTex = TII->usesTextureCache(*ClauseHead); 337 std::set<unsigned> DstRegs; 338 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 339 if (IsTrivialInst(*I)) 340 continue; 341 if (AluInstCount >= MaxFetchInst) 342 break; 343 if ((IsTex && !TII->usesTextureCache(*I)) || 344 (!IsTex && !TII->usesVertexCache(*I))) 345 break; 346 if (!isCompatibleWithClause(*I, DstRegs)) 347 break; 348 AluInstCount ++; 349 ClauseContent.push_back(&*I); 350 } 351 MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), 352 getHWInstrDesc(IsTex?CF_TC:CF_VC)) 353 .addImm(0) // ADDR 354 .addImm(AluInstCount - 1); // COUNT 355 return ClauseFile(MIb, std::move(ClauseContent)); 356 } 357 358 void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const { 359 static const unsigned LiteralRegs[] = { 360 R600::ALU_LITERAL_X, 361 R600::ALU_LITERAL_Y, 362 R600::ALU_LITERAL_Z, 363 R600::ALU_LITERAL_W 364 }; 365 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = 366 TII->getSrcs(MI); 367 for (const auto &Src:Srcs) { 368 if (Src.first->getReg() != R600::ALU_LITERAL_X) 369 continue; 370 int64_t Imm = Src.second; 371 std::vector<MachineOperand *>::iterator It = 372 llvm::find_if(Lits, [&](MachineOperand *val) { 373 return val->isImm() && (val->getImm() == Imm); 374 }); 375 376 // Get corresponding Operand 377 MachineOperand &Operand = MI.getOperand( 378 TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal)); 379 380 if (It != Lits.end()) { 381 // Reuse existing literal reg 382 unsigned Index = It - Lits.begin(); 383 Src.first->setReg(LiteralRegs[Index]); 384 } else { 385 // Allocate new literal reg 386 assert(Lits.size() < 4 && "Too many literals in Instruction Group"); 387 Src.first->setReg(LiteralRegs[Lits.size()]); 388 Lits.push_back(&Operand); 389 } 390 } 391 } 392 393 MachineBasicBlock::iterator insertLiterals( 394 MachineBasicBlock::iterator InsertPos, 395 const std::vector<unsigned> &Literals) const { 396 MachineBasicBlock *MBB = InsertPos->getParent(); 397 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 398 unsigned LiteralPair0 = Literals[i]; 399 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; 400 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), 401 TII->get(R600::LITERALS)) 402 .addImm(LiteralPair0) 403 .addImm(LiteralPair1); 404 } 405 return InsertPos; 406 } 407 408 ClauseFile 409 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 410 const { 411 MachineInstr &ClauseHead = *I; 412 std::vector<MachineInstr *> ClauseContent; 413 I++; 414 for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { 415 if (IsTrivialInst(*I)) { 416 ++I; 417 continue; 418 } 419 if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) 420 break; 421 std::vector<MachineOperand *>Literals; 422 if (I->isBundle()) { 423 MachineInstr &DeleteMI = *I; 424 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 425 while (++BI != E && BI->isBundledWithPred()) { 426 BI->unbundleFromPred(); 427 for (MachineOperand &MO : BI->operands()) { 428 if (MO.isReg() && MO.isInternalRead()) 429 MO.setIsInternalRead(false); 430 } 431 getLiteral(*BI, Literals); 432 ClauseContent.push_back(&*BI); 433 } 434 I = BI; 435 DeleteMI.eraseFromParent(); 436 } else { 437 getLiteral(*I, Literals); 438 ClauseContent.push_back(&*I); 439 I++; 440 } 441 for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { 442 MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), 443 TII->get(R600::LITERALS)); 444 if (Literals[i]->isImm()) { 445 MILit.addImm(Literals[i]->getImm()); 446 } else { 447 MILit.addGlobalAddress(Literals[i]->getGlobal(), 448 Literals[i]->getOffset()); 449 } 450 if (i + 1 < e) { 451 if (Literals[i + 1]->isImm()) { 452 MILit.addImm(Literals[i + 1]->getImm()); 453 } else { 454 MILit.addGlobalAddress(Literals[i + 1]->getGlobal(), 455 Literals[i + 1]->getOffset()); 456 } 457 } else 458 MILit.addImm(0); 459 ClauseContent.push_back(MILit); 460 } 461 } 462 assert(ClauseContent.size() < 128 && "ALU clause is too big"); 463 ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1); 464 return ClauseFile(&ClauseHead, std::move(ClauseContent)); 465 } 466 467 void EmitFetchClause(MachineBasicBlock::iterator InsertPos, 468 const DebugLoc &DL, ClauseFile &Clause, 469 unsigned &CfCount) { 470 CounterPropagateAddr(*Clause.first, CfCount); 471 MachineBasicBlock *BB = Clause.first->getParent(); 472 BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount); 473 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 474 BB->splice(InsertPos, BB, Clause.second[i]); 475 } 476 CfCount += 2 * Clause.second.size(); 477 } 478 479 void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL, 480 ClauseFile &Clause, unsigned &CfCount) { 481 Clause.first->getOperand(0).setImm(0); 482 CounterPropagateAddr(*Clause.first, CfCount); 483 MachineBasicBlock *BB = Clause.first->getParent(); 484 BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount); 485 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 486 BB->splice(InsertPos, BB, Clause.second[i]); 487 } 488 CfCount += Clause.second.size(); 489 } 490 491 void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const { 492 MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm()); 493 } 494 void CounterPropagateAddr(const std::set<MachineInstr *> &MIs, 495 unsigned Addr) const { 496 for (MachineInstr *MI : MIs) { 497 CounterPropagateAddr(*MI, Addr); 498 } 499 } 500 501 public: 502 static char ID; 503 504 R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} 505 506 bool runOnMachineFunction(MachineFunction &MF) override { 507 ST = &MF.getSubtarget<R600Subtarget>(); 508 MaxFetchInst = ST->getTexVTXClauseSize(); 509 TII = ST->getInstrInfo(); 510 TRI = ST->getRegisterInfo(); 511 512 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 513 514 CFStack CFStack(ST, MF.getFunction().getCallingConv()); 515 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; 516 ++MB) { 517 MachineBasicBlock &MBB = *MB; 518 unsigned CfCount = 0; 519 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack; 520 std::vector<MachineInstr * > IfThenElseStack; 521 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { 522 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 523 getHWInstrDesc(CF_CALL_FS)); 524 CfCount++; 525 } 526 std::vector<ClauseFile> FetchClauses, AluClauses; 527 std::vector<MachineInstr *> LastAlu(1); 528 std::vector<MachineInstr *> ToPopAfter; 529 530 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 531 I != E;) { 532 if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { 533 LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); 534 FetchClauses.push_back(MakeFetchClause(MBB, I)); 535 CfCount++; 536 LastAlu.back() = nullptr; 537 continue; 538 } 539 540 MachineBasicBlock::iterator MI = I; 541 if (MI->getOpcode() != R600::ENDIF) 542 LastAlu.back() = nullptr; 543 if (MI->getOpcode() == R600::CF_ALU) 544 LastAlu.back() = &*MI; 545 I++; 546 bool RequiresWorkAround = 547 CFStack.requiresWorkAroundForInst(MI->getOpcode()); 548 switch (MI->getOpcode()) { 549 case R600::CF_ALU_PUSH_BEFORE: 550 if (RequiresWorkAround) { 551 LLVM_DEBUG(dbgs() 552 << "Applying bug work-around for ALU_PUSH_BEFORE\n"); 553 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) 554 .addImm(CfCount + 1) 555 .addImm(1); 556 MI->setDesc(TII->get(R600::CF_ALU)); 557 CfCount++; 558 CFStack.pushBranch(R600::CF_PUSH_EG); 559 } else 560 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); 561 LLVM_FALLTHROUGH; 562 case R600::CF_ALU: 563 I = MI; 564 AluClauses.push_back(MakeALUClause(MBB, I)); 565 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 566 CfCount++; 567 break; 568 case R600::WHILELOOP: { 569 CFStack.pushLoop(); 570 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 571 getHWInstrDesc(CF_WHILE_LOOP)) 572 .addImm(1); 573 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount, 574 std::set<MachineInstr *>()); 575 Pair.second.insert(MIb); 576 LoopStack.push_back(std::move(Pair)); 577 MI->eraseFromParent(); 578 CfCount++; 579 break; 580 } 581 case R600::ENDLOOP: { 582 CFStack.popLoop(); 583 std::pair<unsigned, std::set<MachineInstr *>> Pair = 584 std::move(LoopStack.back()); 585 LoopStack.pop_back(); 586 CounterPropagateAddr(Pair.second, CfCount); 587 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) 588 .addImm(Pair.first + 1); 589 MI->eraseFromParent(); 590 CfCount++; 591 break; 592 } 593 case R600::IF_PREDICATE_SET: { 594 LastAlu.push_back(nullptr); 595 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 596 getHWInstrDesc(CF_JUMP)) 597 .addImm(0) 598 .addImm(0); 599 IfThenElseStack.push_back(MIb); 600 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 601 MI->eraseFromParent(); 602 CfCount++; 603 break; 604 } 605 case R600::ELSE: { 606 MachineInstr * JumpInst = IfThenElseStack.back(); 607 IfThenElseStack.pop_back(); 608 CounterPropagateAddr(*JumpInst, CfCount); 609 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 610 getHWInstrDesc(CF_ELSE)) 611 .addImm(0) 612 .addImm(0); 613 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 614 IfThenElseStack.push_back(MIb); 615 MI->eraseFromParent(); 616 CfCount++; 617 break; 618 } 619 case R600::ENDIF: { 620 CFStack.popBranch(); 621 if (LastAlu.back()) { 622 ToPopAfter.push_back(LastAlu.back()); 623 } else { 624 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 625 getHWInstrDesc(CF_POP)) 626 .addImm(CfCount + 1) 627 .addImm(1); 628 (void)MIb; 629 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 630 CfCount++; 631 } 632 633 MachineInstr *IfOrElseInst = IfThenElseStack.back(); 634 IfThenElseStack.pop_back(); 635 CounterPropagateAddr(*IfOrElseInst, CfCount); 636 IfOrElseInst->getOperand(1).setImm(1); 637 LastAlu.pop_back(); 638 MI->eraseFromParent(); 639 break; 640 } 641 case R600::BREAK: { 642 CfCount ++; 643 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 644 getHWInstrDesc(CF_LOOP_BREAK)) 645 .addImm(0); 646 LoopStack.back().second.insert(MIb); 647 MI->eraseFromParent(); 648 break; 649 } 650 case R600::CONTINUE: { 651 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 652 getHWInstrDesc(CF_LOOP_CONTINUE)) 653 .addImm(0); 654 LoopStack.back().second.insert(MIb); 655 MI->eraseFromParent(); 656 CfCount++; 657 break; 658 } 659 case R600::RETURN: { 660 DebugLoc DL = MBB.findDebugLoc(MI); 661 BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END)); 662 CfCount++; 663 if (CfCount % 2) { 664 BuildMI(MBB, I, DL, TII->get(R600::PAD)); 665 CfCount++; 666 } 667 MI->eraseFromParent(); 668 for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) 669 EmitFetchClause(I, DL, FetchClauses[i], CfCount); 670 for (unsigned i = 0, e = AluClauses.size(); i < e; i++) 671 EmitALUClause(I, DL, AluClauses[i], CfCount); 672 break; 673 } 674 default: 675 if (TII->isExport(MI->getOpcode())) { 676 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 677 CfCount++; 678 } 679 break; 680 } 681 } 682 for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { 683 MachineInstr *Alu = ToPopAfter[i]; 684 BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), 685 TII->get(R600::CF_ALU_POP_AFTER)) 686 .addImm(Alu->getOperand(0).getImm()) 687 .addImm(Alu->getOperand(1).getImm()) 688 .addImm(Alu->getOperand(2).getImm()) 689 .addImm(Alu->getOperand(3).getImm()) 690 .addImm(Alu->getOperand(4).getImm()) 691 .addImm(Alu->getOperand(5).getImm()) 692 .addImm(Alu->getOperand(6).getImm()) 693 .addImm(Alu->getOperand(7).getImm()) 694 .addImm(Alu->getOperand(8).getImm()); 695 Alu->eraseFromParent(); 696 } 697 MFI->CFStackSize = CFStack.MaxStackSize; 698 } 699 700 return false; 701 } 702 703 StringRef getPassName() const override { 704 return "R600 Control Flow Finalizer Pass"; 705 } 706 }; 707 708 } // end anonymous namespace 709 710 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, 711 "R600 Control Flow Finalizer", false, false) 712 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE, 713 "R600 Control Flow Finalizer", false, false) 714 715 char R600ControlFlowFinalizer::ID = 0; 716 717 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID; 718 719 FunctionPass *llvm::createR600ControlFlowFinalizer() { 720 return new R600ControlFlowFinalizer(); 721 } 722