1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass compute turns all control flow pseudo instructions into native one 11 /// computing their address on the fly; it also sets STACK_SIZE info. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUSubtarget.h" 17 #include "R600Defines.h" 18 #include "R600InstrInfo.h" 19 #include "R600MachineFunctionInfo.h" 20 #include "R600RegisterInfo.h" 21 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineOperand.h" 31 #include "llvm/IR/CallingConv.h" 32 #include "llvm/IR/DebugLoc.h" 33 #include "llvm/IR/Function.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Support/Compiler.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/MathExtras.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <algorithm> 40 #include <cassert> 41 #include <cstdint> 42 #include <set> 43 #include <utility> 44 #include <vector> 45 46 using namespace llvm; 47 48 #define DEBUG_TYPE "r600cf" 49 50 namespace { 51 52 struct CFStack { 53 enum StackItem { 54 ENTRY = 0, 55 SUB_ENTRY = 1, 56 FIRST_NON_WQM_PUSH = 2, 57 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 58 }; 59 60 const R600Subtarget *ST; 61 std::vector<StackItem> BranchStack; 62 std::vector<StackItem> LoopStack; 63 unsigned MaxStackSize; 64 unsigned CurrentEntries = 0; 65 unsigned CurrentSubEntries = 0; 66 67 CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st), 68 // We need to reserve a stack entry for CALL_FS in vertex shaders. 69 MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {} 70 71 unsigned getLoopDepth(); 72 bool branchStackContains(CFStack::StackItem); 73 bool requiresWorkAroundForInst(unsigned Opcode); 74 unsigned getSubEntrySize(CFStack::StackItem Item); 75 void updateMaxStackSize(); 76 void pushBranch(unsigned Opcode, bool isWQM = false); 77 void pushLoop(); 78 void popBranch(); 79 void popLoop(); 80 }; 81 82 unsigned CFStack::getLoopDepth() { 83 return LoopStack.size(); 84 } 85 86 bool CFStack::branchStackContains(CFStack::StackItem Item) { 87 for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(), 88 E = BranchStack.end(); I != E; ++I) { 89 if (*I == Item) 90 return true; 91 } 92 return false; 93 } 94 95 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { 96 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && 97 getLoopDepth() > 1) 98 return true; 99 100 if (!ST->hasCFAluBug()) 101 return false; 102 103 switch(Opcode) { 104 default: return false; 105 case R600::CF_ALU_PUSH_BEFORE: 106 case R600::CF_ALU_ELSE_AFTER: 107 case R600::CF_ALU_BREAK: 108 case R600::CF_ALU_CONTINUE: 109 if (CurrentSubEntries == 0) 110 return false; 111 if (ST->getWavefrontSize() == 64) { 112 // We are being conservative here. We only require this work-around if 113 // CurrentSubEntries > 3 && 114 // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) 115 // 116 // We have to be conservative, because we don't know for certain that 117 // our stack allocation algorithm for Evergreen/NI is correct. Applying this 118 // work-around when CurrentSubEntries > 3 allows us to over-allocate stack 119 // resources without any problems. 120 return CurrentSubEntries > 3; 121 } else { 122 assert(ST->getWavefrontSize() == 32); 123 // We are being conservative here. We only require the work-around if 124 // CurrentSubEntries > 7 && 125 // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) 126 // See the comment on the wavefront size == 64 case for why we are 127 // being conservative. 128 return CurrentSubEntries > 7; 129 } 130 } 131 } 132 133 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { 134 switch(Item) { 135 default: 136 return 0; 137 case CFStack::FIRST_NON_WQM_PUSH: 138 assert(!ST->hasCaymanISA()); 139 if (ST->getGeneration() <= AMDGPUSubtarget::R700) { 140 // +1 For the push operation. 141 // +2 Extra space required. 142 return 3; 143 } else { 144 // Some documentation says that this is not necessary on Evergreen, 145 // but experimentation has show that we need to allocate 1 extra 146 // sub-entry for the first non-WQM push. 147 // +1 For the push operation. 148 // +1 Extra space required. 149 return 2; 150 } 151 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: 152 assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 153 // +1 For the push operation. 154 // +1 Extra space required. 155 return 2; 156 case CFStack::SUB_ENTRY: 157 return 1; 158 } 159 } 160 161 void CFStack::updateMaxStackSize() { 162 unsigned CurrentStackSize = 163 CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4); 164 MaxStackSize = std::max(CurrentStackSize, MaxStackSize); 165 } 166 167 void CFStack::pushBranch(unsigned Opcode, bool isWQM) { 168 CFStack::StackItem Item = CFStack::ENTRY; 169 switch(Opcode) { 170 case R600::CF_PUSH_EG: 171 case R600::CF_ALU_PUSH_BEFORE: 172 if (!isWQM) { 173 if (!ST->hasCaymanISA() && 174 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) 175 Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI 176 // See comment in 177 // CFStack::getSubEntrySize() 178 else if (CurrentEntries > 0 && 179 ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && 180 !ST->hasCaymanISA() && 181 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) 182 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; 183 else 184 Item = CFStack::SUB_ENTRY; 185 } else 186 Item = CFStack::ENTRY; 187 break; 188 } 189 BranchStack.push_back(Item); 190 if (Item == CFStack::ENTRY) 191 CurrentEntries++; 192 else 193 CurrentSubEntries += getSubEntrySize(Item); 194 updateMaxStackSize(); 195 } 196 197 void CFStack::pushLoop() { 198 LoopStack.push_back(CFStack::ENTRY); 199 CurrentEntries++; 200 updateMaxStackSize(); 201 } 202 203 void CFStack::popBranch() { 204 CFStack::StackItem Top = BranchStack.back(); 205 if (Top == CFStack::ENTRY) 206 CurrentEntries--; 207 else 208 CurrentSubEntries-= getSubEntrySize(Top); 209 BranchStack.pop_back(); 210 } 211 212 void CFStack::popLoop() { 213 CurrentEntries--; 214 LoopStack.pop_back(); 215 } 216 217 class R600ControlFlowFinalizer : public MachineFunctionPass { 218 private: 219 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>; 220 221 enum ControlFlowInstruction { 222 CF_TC, 223 CF_VC, 224 CF_CALL_FS, 225 CF_WHILE_LOOP, 226 CF_END_LOOP, 227 CF_LOOP_BREAK, 228 CF_LOOP_CONTINUE, 229 CF_JUMP, 230 CF_ELSE, 231 CF_POP, 232 CF_END 233 }; 234 235 const R600InstrInfo *TII = nullptr; 236 const R600RegisterInfo *TRI = nullptr; 237 unsigned MaxFetchInst; 238 const R600Subtarget *ST = nullptr; 239 240 bool IsTrivialInst(MachineInstr &MI) const { 241 switch (MI.getOpcode()) { 242 case R600::KILL: 243 case R600::RETURN: 244 return true; 245 default: 246 return false; 247 } 248 } 249 250 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { 251 unsigned Opcode = 0; 252 bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 253 switch (CFI) { 254 case CF_TC: 255 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; 256 break; 257 case CF_VC: 258 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600; 259 break; 260 case CF_CALL_FS: 261 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600; 262 break; 263 case CF_WHILE_LOOP: 264 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600; 265 break; 266 case CF_END_LOOP: 267 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600; 268 break; 269 case CF_LOOP_BREAK: 270 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600; 271 break; 272 case CF_LOOP_CONTINUE: 273 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600; 274 break; 275 case CF_JUMP: 276 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600; 277 break; 278 case CF_ELSE: 279 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600; 280 break; 281 case CF_POP: 282 Opcode = isEg ? R600::POP_EG : R600::POP_R600; 283 break; 284 case CF_END: 285 if (ST->hasCaymanISA()) { 286 Opcode = R600::CF_END_CM; 287 break; 288 } 289 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600; 290 break; 291 } 292 assert (Opcode && "No opcode selected"); 293 return TII->get(Opcode); 294 } 295 296 bool isCompatibleWithClause(const MachineInstr &MI, 297 std::set<unsigned> &DstRegs) const { 298 unsigned DstMI, SrcMI; 299 for (MachineInstr::const_mop_iterator I = MI.operands_begin(), 300 E = MI.operands_end(); 301 I != E; ++I) { 302 const MachineOperand &MO = *I; 303 if (!MO.isReg()) 304 continue; 305 if (MO.isDef()) { 306 unsigned Reg = MO.getReg(); 307 if (R600::R600_Reg128RegClass.contains(Reg)) 308 DstMI = Reg; 309 else 310 DstMI = TRI->getMatchingSuperReg(Reg, 311 AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 312 &R600::R600_Reg128RegClass); 313 } 314 if (MO.isUse()) { 315 unsigned Reg = MO.getReg(); 316 if (R600::R600_Reg128RegClass.contains(Reg)) 317 SrcMI = Reg; 318 else 319 SrcMI = TRI->getMatchingSuperReg(Reg, 320 AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 321 &R600::R600_Reg128RegClass); 322 } 323 } 324 if ((DstRegs.find(SrcMI) == DstRegs.end())) { 325 DstRegs.insert(DstMI); 326 return true; 327 } else 328 return false; 329 } 330 331 ClauseFile 332 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 333 const { 334 MachineBasicBlock::iterator ClauseHead = I; 335 std::vector<MachineInstr *> ClauseContent; 336 unsigned AluInstCount = 0; 337 bool IsTex = TII->usesTextureCache(*ClauseHead); 338 std::set<unsigned> DstRegs; 339 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 340 if (IsTrivialInst(*I)) 341 continue; 342 if (AluInstCount >= MaxFetchInst) 343 break; 344 if ((IsTex && !TII->usesTextureCache(*I)) || 345 (!IsTex && !TII->usesVertexCache(*I))) 346 break; 347 if (!isCompatibleWithClause(*I, DstRegs)) 348 break; 349 AluInstCount ++; 350 ClauseContent.push_back(&*I); 351 } 352 MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), 353 getHWInstrDesc(IsTex?CF_TC:CF_VC)) 354 .addImm(0) // ADDR 355 .addImm(AluInstCount - 1); // COUNT 356 return ClauseFile(MIb, std::move(ClauseContent)); 357 } 358 359 void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const { 360 static const unsigned LiteralRegs[] = { 361 R600::ALU_LITERAL_X, 362 R600::ALU_LITERAL_Y, 363 R600::ALU_LITERAL_Z, 364 R600::ALU_LITERAL_W 365 }; 366 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = 367 TII->getSrcs(MI); 368 for (const auto &Src:Srcs) { 369 if (Src.first->getReg() != R600::ALU_LITERAL_X) 370 continue; 371 int64_t Imm = Src.second; 372 std::vector<MachineOperand *>::iterator It = 373 llvm::find_if(Lits, [&](MachineOperand *val) { 374 return val->isImm() && (val->getImm() == Imm); 375 }); 376 377 // Get corresponding Operand 378 MachineOperand &Operand = MI.getOperand( 379 TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal)); 380 381 if (It != Lits.end()) { 382 // Reuse existing literal reg 383 unsigned Index = It - Lits.begin(); 384 Src.first->setReg(LiteralRegs[Index]); 385 } else { 386 // Allocate new literal reg 387 assert(Lits.size() < 4 && "Too many literals in Instruction Group"); 388 Src.first->setReg(LiteralRegs[Lits.size()]); 389 Lits.push_back(&Operand); 390 } 391 } 392 } 393 394 MachineBasicBlock::iterator insertLiterals( 395 MachineBasicBlock::iterator InsertPos, 396 const std::vector<unsigned> &Literals) const { 397 MachineBasicBlock *MBB = InsertPos->getParent(); 398 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 399 unsigned LiteralPair0 = Literals[i]; 400 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; 401 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), 402 TII->get(R600::LITERALS)) 403 .addImm(LiteralPair0) 404 .addImm(LiteralPair1); 405 } 406 return InsertPos; 407 } 408 409 ClauseFile 410 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 411 const { 412 MachineInstr &ClauseHead = *I; 413 std::vector<MachineInstr *> ClauseContent; 414 I++; 415 for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { 416 if (IsTrivialInst(*I)) { 417 ++I; 418 continue; 419 } 420 if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) 421 break; 422 std::vector<MachineOperand *>Literals; 423 if (I->isBundle()) { 424 MachineInstr &DeleteMI = *I; 425 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 426 while (++BI != E && BI->isBundledWithPred()) { 427 BI->unbundleFromPred(); 428 for (MachineOperand &MO : BI->operands()) { 429 if (MO.isReg() && MO.isInternalRead()) 430 MO.setIsInternalRead(false); 431 } 432 getLiteral(*BI, Literals); 433 ClauseContent.push_back(&*BI); 434 } 435 I = BI; 436 DeleteMI.eraseFromParent(); 437 } else { 438 getLiteral(*I, Literals); 439 ClauseContent.push_back(&*I); 440 I++; 441 } 442 for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { 443 MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), 444 TII->get(R600::LITERALS)); 445 if (Literals[i]->isImm()) { 446 MILit.addImm(Literals[i]->getImm()); 447 } else { 448 MILit.addGlobalAddress(Literals[i]->getGlobal(), 449 Literals[i]->getOffset()); 450 } 451 if (i + 1 < e) { 452 if (Literals[i + 1]->isImm()) { 453 MILit.addImm(Literals[i + 1]->getImm()); 454 } else { 455 MILit.addGlobalAddress(Literals[i + 1]->getGlobal(), 456 Literals[i + 1]->getOffset()); 457 } 458 } else 459 MILit.addImm(0); 460 ClauseContent.push_back(MILit); 461 } 462 } 463 assert(ClauseContent.size() < 128 && "ALU clause is too big"); 464 ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1); 465 return ClauseFile(&ClauseHead, std::move(ClauseContent)); 466 } 467 468 void EmitFetchClause(MachineBasicBlock::iterator InsertPos, 469 const DebugLoc &DL, ClauseFile &Clause, 470 unsigned &CfCount) { 471 CounterPropagateAddr(*Clause.first, CfCount); 472 MachineBasicBlock *BB = Clause.first->getParent(); 473 BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount); 474 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 475 BB->splice(InsertPos, BB, Clause.second[i]); 476 } 477 CfCount += 2 * Clause.second.size(); 478 } 479 480 void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL, 481 ClauseFile &Clause, unsigned &CfCount) { 482 Clause.first->getOperand(0).setImm(0); 483 CounterPropagateAddr(*Clause.first, CfCount); 484 MachineBasicBlock *BB = Clause.first->getParent(); 485 BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount); 486 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 487 BB->splice(InsertPos, BB, Clause.second[i]); 488 } 489 CfCount += Clause.second.size(); 490 } 491 492 void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const { 493 MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm()); 494 } 495 void CounterPropagateAddr(const std::set<MachineInstr *> &MIs, 496 unsigned Addr) const { 497 for (MachineInstr *MI : MIs) { 498 CounterPropagateAddr(*MI, Addr); 499 } 500 } 501 502 public: 503 static char ID; 504 505 R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} 506 507 bool runOnMachineFunction(MachineFunction &MF) override { 508 ST = &MF.getSubtarget<R600Subtarget>(); 509 MaxFetchInst = ST->getTexVTXClauseSize(); 510 TII = ST->getInstrInfo(); 511 TRI = ST->getRegisterInfo(); 512 513 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 514 515 CFStack CFStack(ST, MF.getFunction().getCallingConv()); 516 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; 517 ++MB) { 518 MachineBasicBlock &MBB = *MB; 519 unsigned CfCount = 0; 520 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack; 521 std::vector<MachineInstr * > IfThenElseStack; 522 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { 523 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 524 getHWInstrDesc(CF_CALL_FS)); 525 CfCount++; 526 } 527 std::vector<ClauseFile> FetchClauses, AluClauses; 528 std::vector<MachineInstr *> LastAlu(1); 529 std::vector<MachineInstr *> ToPopAfter; 530 531 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 532 I != E;) { 533 if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { 534 LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); 535 FetchClauses.push_back(MakeFetchClause(MBB, I)); 536 CfCount++; 537 LastAlu.back() = nullptr; 538 continue; 539 } 540 541 MachineBasicBlock::iterator MI = I; 542 if (MI->getOpcode() != R600::ENDIF) 543 LastAlu.back() = nullptr; 544 if (MI->getOpcode() == R600::CF_ALU) 545 LastAlu.back() = &*MI; 546 I++; 547 bool RequiresWorkAround = 548 CFStack.requiresWorkAroundForInst(MI->getOpcode()); 549 switch (MI->getOpcode()) { 550 case R600::CF_ALU_PUSH_BEFORE: 551 if (RequiresWorkAround) { 552 LLVM_DEBUG(dbgs() 553 << "Applying bug work-around for ALU_PUSH_BEFORE\n"); 554 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) 555 .addImm(CfCount + 1) 556 .addImm(1); 557 MI->setDesc(TII->get(R600::CF_ALU)); 558 CfCount++; 559 CFStack.pushBranch(R600::CF_PUSH_EG); 560 } else 561 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); 562 LLVM_FALLTHROUGH; 563 case R600::CF_ALU: 564 I = MI; 565 AluClauses.push_back(MakeALUClause(MBB, I)); 566 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 567 CfCount++; 568 break; 569 case R600::WHILELOOP: { 570 CFStack.pushLoop(); 571 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 572 getHWInstrDesc(CF_WHILE_LOOP)) 573 .addImm(1); 574 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount, 575 std::set<MachineInstr *>()); 576 Pair.second.insert(MIb); 577 LoopStack.push_back(std::move(Pair)); 578 MI->eraseFromParent(); 579 CfCount++; 580 break; 581 } 582 case R600::ENDLOOP: { 583 CFStack.popLoop(); 584 std::pair<unsigned, std::set<MachineInstr *>> Pair = 585 std::move(LoopStack.back()); 586 LoopStack.pop_back(); 587 CounterPropagateAddr(Pair.second, CfCount); 588 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) 589 .addImm(Pair.first + 1); 590 MI->eraseFromParent(); 591 CfCount++; 592 break; 593 } 594 case R600::IF_PREDICATE_SET: { 595 LastAlu.push_back(nullptr); 596 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 597 getHWInstrDesc(CF_JUMP)) 598 .addImm(0) 599 .addImm(0); 600 IfThenElseStack.push_back(MIb); 601 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 602 MI->eraseFromParent(); 603 CfCount++; 604 break; 605 } 606 case R600::ELSE: { 607 MachineInstr * JumpInst = IfThenElseStack.back(); 608 IfThenElseStack.pop_back(); 609 CounterPropagateAddr(*JumpInst, CfCount); 610 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 611 getHWInstrDesc(CF_ELSE)) 612 .addImm(0) 613 .addImm(0); 614 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 615 IfThenElseStack.push_back(MIb); 616 MI->eraseFromParent(); 617 CfCount++; 618 break; 619 } 620 case R600::ENDIF: { 621 CFStack.popBranch(); 622 if (LastAlu.back()) { 623 ToPopAfter.push_back(LastAlu.back()); 624 } else { 625 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 626 getHWInstrDesc(CF_POP)) 627 .addImm(CfCount + 1) 628 .addImm(1); 629 (void)MIb; 630 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 631 CfCount++; 632 } 633 634 MachineInstr *IfOrElseInst = IfThenElseStack.back(); 635 IfThenElseStack.pop_back(); 636 CounterPropagateAddr(*IfOrElseInst, CfCount); 637 IfOrElseInst->getOperand(1).setImm(1); 638 LastAlu.pop_back(); 639 MI->eraseFromParent(); 640 break; 641 } 642 case R600::BREAK: { 643 CfCount ++; 644 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 645 getHWInstrDesc(CF_LOOP_BREAK)) 646 .addImm(0); 647 LoopStack.back().second.insert(MIb); 648 MI->eraseFromParent(); 649 break; 650 } 651 case R600::CONTINUE: { 652 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 653 getHWInstrDesc(CF_LOOP_CONTINUE)) 654 .addImm(0); 655 LoopStack.back().second.insert(MIb); 656 MI->eraseFromParent(); 657 CfCount++; 658 break; 659 } 660 case R600::RETURN: { 661 DebugLoc DL = MBB.findDebugLoc(MI); 662 BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END)); 663 CfCount++; 664 if (CfCount % 2) { 665 BuildMI(MBB, I, DL, TII->get(R600::PAD)); 666 CfCount++; 667 } 668 MI->eraseFromParent(); 669 for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) 670 EmitFetchClause(I, DL, FetchClauses[i], CfCount); 671 for (unsigned i = 0, e = AluClauses.size(); i < e; i++) 672 EmitALUClause(I, DL, AluClauses[i], CfCount); 673 break; 674 } 675 default: 676 if (TII->isExport(MI->getOpcode())) { 677 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 678 CfCount++; 679 } 680 break; 681 } 682 } 683 for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { 684 MachineInstr *Alu = ToPopAfter[i]; 685 BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), 686 TII->get(R600::CF_ALU_POP_AFTER)) 687 .addImm(Alu->getOperand(0).getImm()) 688 .addImm(Alu->getOperand(1).getImm()) 689 .addImm(Alu->getOperand(2).getImm()) 690 .addImm(Alu->getOperand(3).getImm()) 691 .addImm(Alu->getOperand(4).getImm()) 692 .addImm(Alu->getOperand(5).getImm()) 693 .addImm(Alu->getOperand(6).getImm()) 694 .addImm(Alu->getOperand(7).getImm()) 695 .addImm(Alu->getOperand(8).getImm()); 696 Alu->eraseFromParent(); 697 } 698 MFI->CFStackSize = CFStack.MaxStackSize; 699 } 700 701 return false; 702 } 703 704 StringRef getPassName() const override { 705 return "R600 Control Flow Finalizer Pass"; 706 } 707 }; 708 709 } // end anonymous namespace 710 711 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, 712 "R600 Control Flow Finalizer", false, false) 713 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE, 714 "R600 Control Flow Finalizer", false, false) 715 716 char R600ControlFlowFinalizer::ID = 0; 717 718 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID; 719 720 FunctionPass *llvm::createR600ControlFlowFinalizer() { 721 return new R600ControlFlowFinalizer(); 722 } 723