xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp (revision 8c22b9f3ba586e008e8e55a6215a1d46eb6830b9)
1  //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  /// \file
10  /// This pass compute turns all control flow pseudo instructions into native one
11  /// computing their address on the fly; it also sets STACK_SIZE info.
12  //
13  //===----------------------------------------------------------------------===//
14  
15  #include "AMDGPU.h"
16  #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17  #include "R600MachineFunctionInfo.h"
18  #include "R600Subtarget.h"
19  #include <set>
20  
21  using namespace llvm;
22  
23  #define DEBUG_TYPE "r600cf"
24  
25  namespace {
26  
27  struct CFStack {
28    enum StackItem {
29      ENTRY = 0,
30      SUB_ENTRY = 1,
31      FIRST_NON_WQM_PUSH = 2,
32      FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
33    };
34  
35    const R600Subtarget *ST;
36    std::vector<StackItem> BranchStack;
37    std::vector<StackItem> LoopStack;
38    unsigned MaxStackSize;
39    unsigned CurrentEntries = 0;
40    unsigned CurrentSubEntries = 0;
41  
42    CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
43        // We need to reserve a stack entry for CALL_FS in vertex shaders.
44        MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
45  
46    unsigned getLoopDepth();
47    bool branchStackContains(CFStack::StackItem);
48    bool requiresWorkAroundForInst(unsigned Opcode);
49    unsigned getSubEntrySize(CFStack::StackItem Item);
50    void updateMaxStackSize();
51    void pushBranch(unsigned Opcode, bool isWQM = false);
52    void pushLoop();
53    void popBranch();
54    void popLoop();
55  };
56  
57  unsigned CFStack::getLoopDepth() {
58    return LoopStack.size();
59  }
60  
61  bool CFStack::branchStackContains(CFStack::StackItem Item) {
62    return llvm::is_contained(BranchStack, Item);
63  }
64  
65  bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
66    if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
67        getLoopDepth() > 1)
68      return true;
69  
70    if (!ST->hasCFAluBug())
71      return false;
72  
73    switch(Opcode) {
74    default: return false;
75    case R600::CF_ALU_PUSH_BEFORE:
76    case R600::CF_ALU_ELSE_AFTER:
77    case R600::CF_ALU_BREAK:
78    case R600::CF_ALU_CONTINUE:
79      if (CurrentSubEntries == 0)
80        return false;
81      if (ST->getWavefrontSize() == 64) {
82        // We are being conservative here.  We only require this work-around if
83        // CurrentSubEntries > 3 &&
84        // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
85        //
86        // We have to be conservative, because we don't know for certain that
87        // our stack allocation algorithm for Evergreen/NI is correct.  Applying this
88        // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
89        // resources without any problems.
90        return CurrentSubEntries > 3;
91      } else {
92        assert(ST->getWavefrontSize() == 32);
93        // We are being conservative here.  We only require the work-around if
94        // CurrentSubEntries > 7 &&
95        // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
96        // See the comment on the wavefront size == 64 case for why we are
97        // being conservative.
98        return CurrentSubEntries > 7;
99      }
100    }
101  }
102  
103  unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
104    switch(Item) {
105    default:
106      return 0;
107    case CFStack::FIRST_NON_WQM_PUSH:
108    assert(!ST->hasCaymanISA());
109    if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
110      // +1 For the push operation.
111      // +2 Extra space required.
112      return 3;
113    } else {
114      // Some documentation says that this is not necessary on Evergreen,
115      // but experimentation has show that we need to allocate 1 extra
116      // sub-entry for the first non-WQM push.
117      // +1 For the push operation.
118      // +1 Extra space required.
119      return 2;
120    }
121    case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
122      assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
123      // +1 For the push operation.
124      // +1 Extra space required.
125      return 2;
126    case CFStack::SUB_ENTRY:
127      return 1;
128    }
129  }
130  
131  void CFStack::updateMaxStackSize() {
132    unsigned CurrentStackSize = CurrentEntries + divideCeil(CurrentSubEntries, 4);
133    MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
134  }
135  
136  void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
137    CFStack::StackItem Item = CFStack::ENTRY;
138    switch(Opcode) {
139    case R600::CF_PUSH_EG:
140    case R600::CF_ALU_PUSH_BEFORE:
141      if (!isWQM) {
142        if (!ST->hasCaymanISA() &&
143            !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
144          Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on Evergreen/NI
145                                               // See comment in
146                                               // CFStack::getSubEntrySize()
147        else if (CurrentEntries > 0 &&
148                 ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
149                 !ST->hasCaymanISA() &&
150                 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
151          Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
152        else
153          Item = CFStack::SUB_ENTRY;
154      } else
155        Item = CFStack::ENTRY;
156      break;
157    }
158    BranchStack.push_back(Item);
159    if (Item == CFStack::ENTRY)
160      CurrentEntries++;
161    else
162      CurrentSubEntries += getSubEntrySize(Item);
163    updateMaxStackSize();
164  }
165  
166  void CFStack::pushLoop() {
167    LoopStack.push_back(CFStack::ENTRY);
168    CurrentEntries++;
169    updateMaxStackSize();
170  }
171  
172  void CFStack::popBranch() {
173    CFStack::StackItem Top = BranchStack.back();
174    if (Top == CFStack::ENTRY)
175      CurrentEntries--;
176    else
177      CurrentSubEntries-= getSubEntrySize(Top);
178    BranchStack.pop_back();
179  }
180  
181  void CFStack::popLoop() {
182    CurrentEntries--;
183    LoopStack.pop_back();
184  }
185  
186  class R600ControlFlowFinalizer : public MachineFunctionPass {
187  private:
188    using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
189  
190    enum ControlFlowInstruction {
191      CF_TC,
192      CF_VC,
193      CF_CALL_FS,
194      CF_WHILE_LOOP,
195      CF_END_LOOP,
196      CF_LOOP_BREAK,
197      CF_LOOP_CONTINUE,
198      CF_JUMP,
199      CF_ELSE,
200      CF_POP,
201      CF_END
202    };
203  
204    const R600InstrInfo *TII = nullptr;
205    const R600RegisterInfo *TRI = nullptr;
206    unsigned MaxFetchInst;
207    const R600Subtarget *ST = nullptr;
208  
209    bool IsTrivialInst(MachineInstr &MI) const {
210      switch (MI.getOpcode()) {
211      case R600::KILL:
212      case R600::RETURN:
213        return true;
214      default:
215        return false;
216      }
217    }
218  
219    const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
220      unsigned Opcode = 0;
221      bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
222      switch (CFI) {
223      case CF_TC:
224        Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
225        break;
226      case CF_VC:
227        Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
228        break;
229      case CF_CALL_FS:
230        Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
231        break;
232      case CF_WHILE_LOOP:
233        Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
234        break;
235      case CF_END_LOOP:
236        Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
237        break;
238      case CF_LOOP_BREAK:
239        Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
240        break;
241      case CF_LOOP_CONTINUE:
242        Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
243        break;
244      case CF_JUMP:
245        Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
246        break;
247      case CF_ELSE:
248        Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
249        break;
250      case CF_POP:
251        Opcode = isEg ? R600::POP_EG : R600::POP_R600;
252        break;
253      case CF_END:
254        if (ST->hasCaymanISA()) {
255          Opcode = R600::CF_END_CM;
256          break;
257        }
258        Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
259        break;
260      }
261      assert (Opcode && "No opcode selected");
262      return TII->get(Opcode);
263    }
264  
265    bool isCompatibleWithClause(const MachineInstr &MI,
266                                std::set<unsigned> &DstRegs) const {
267      unsigned DstMI, SrcMI;
268      for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
269                                            E = MI.operands_end();
270           I != E; ++I) {
271        const MachineOperand &MO = *I;
272        if (!MO.isReg())
273          continue;
274        if (MO.isDef()) {
275          Register Reg = MO.getReg();
276          if (R600::R600_Reg128RegClass.contains(Reg))
277            DstMI = Reg;
278          else
279            DstMI = TRI->getMatchingSuperReg(Reg,
280                R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
281                &R600::R600_Reg128RegClass);
282        }
283        if (MO.isUse()) {
284          Register Reg = MO.getReg();
285          if (R600::R600_Reg128RegClass.contains(Reg))
286            SrcMI = Reg;
287          else
288            SrcMI = TRI->getMatchingSuperReg(Reg,
289                R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
290                &R600::R600_Reg128RegClass);
291        }
292      }
293      if ((DstRegs.find(SrcMI) == DstRegs.end())) {
294        DstRegs.insert(DstMI);
295        return true;
296      } else
297        return false;
298    }
299  
300    ClauseFile
301    MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
302        const {
303      MachineBasicBlock::iterator ClauseHead = I;
304      std::vector<MachineInstr *> ClauseContent;
305      unsigned AluInstCount = 0;
306      bool IsTex = TII->usesTextureCache(*ClauseHead);
307      std::set<unsigned> DstRegs;
308      for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
309        if (IsTrivialInst(*I))
310          continue;
311        if (AluInstCount >= MaxFetchInst)
312          break;
313        if ((IsTex && !TII->usesTextureCache(*I)) ||
314            (!IsTex && !TII->usesVertexCache(*I)))
315          break;
316        if (!isCompatibleWithClause(*I, DstRegs))
317          break;
318        AluInstCount ++;
319        ClauseContent.push_back(&*I);
320      }
321      MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
322          getHWInstrDesc(IsTex?CF_TC:CF_VC))
323          .addImm(0) // ADDR
324          .addImm(AluInstCount - 1); // COUNT
325      return ClauseFile(MIb, std::move(ClauseContent));
326    }
327  
328    void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
329      static const unsigned LiteralRegs[] = {
330        R600::ALU_LITERAL_X,
331        R600::ALU_LITERAL_Y,
332        R600::ALU_LITERAL_Z,
333        R600::ALU_LITERAL_W
334      };
335      const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
336          TII->getSrcs(MI);
337      for (const auto &Src:Srcs) {
338        if (Src.first->getReg() != R600::ALU_LITERAL_X)
339          continue;
340        int64_t Imm = Src.second;
341        std::vector<MachineOperand *>::iterator It =
342            llvm::find_if(Lits, [&](MachineOperand *val) {
343              return val->isImm() && (val->getImm() == Imm);
344            });
345  
346        // Get corresponding Operand
347        MachineOperand &Operand = MI.getOperand(
348            TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
349  
350        if (It != Lits.end()) {
351          // Reuse existing literal reg
352          unsigned Index = It - Lits.begin();
353          Src.first->setReg(LiteralRegs[Index]);
354        } else {
355          // Allocate new literal reg
356          assert(Lits.size() < 4 && "Too many literals in Instruction Group");
357          Src.first->setReg(LiteralRegs[Lits.size()]);
358          Lits.push_back(&Operand);
359        }
360      }
361    }
362  
363    MachineBasicBlock::iterator insertLiterals(
364        MachineBasicBlock::iterator InsertPos,
365        const std::vector<unsigned> &Literals) const {
366      MachineBasicBlock *MBB = InsertPos->getParent();
367      for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
368        unsigned LiteralPair0 = Literals[i];
369        unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
370        InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
371            TII->get(R600::LITERALS))
372            .addImm(LiteralPair0)
373            .addImm(LiteralPair1);
374      }
375      return InsertPos;
376    }
377  
378    ClauseFile
379    MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
380        const {
381      MachineInstr &ClauseHead = *I;
382      std::vector<MachineInstr *> ClauseContent;
383      I++;
384      for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
385        if (IsTrivialInst(*I)) {
386          ++I;
387          continue;
388        }
389        if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
390          break;
391        std::vector<MachineOperand *>Literals;
392        if (I->isBundle()) {
393          MachineInstr &DeleteMI = *I;
394          MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
395          while (++BI != E && BI->isBundledWithPred()) {
396            BI->unbundleFromPred();
397            for (MachineOperand &MO : BI->operands()) {
398              if (MO.isReg() && MO.isInternalRead())
399                MO.setIsInternalRead(false);
400            }
401            getLiteral(*BI, Literals);
402            ClauseContent.push_back(&*BI);
403          }
404          I = BI;
405          DeleteMI.eraseFromParent();
406        } else {
407          getLiteral(*I, Literals);
408          ClauseContent.push_back(&*I);
409          I++;
410        }
411        for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
412          MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
413              TII->get(R600::LITERALS));
414          if (Literals[i]->isImm()) {
415              MILit.addImm(Literals[i]->getImm());
416          } else {
417              MILit.addGlobalAddress(Literals[i]->getGlobal(),
418                                     Literals[i]->getOffset());
419          }
420          if (i + 1 < e) {
421            if (Literals[i + 1]->isImm()) {
422              MILit.addImm(Literals[i + 1]->getImm());
423            } else {
424              MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
425                                     Literals[i + 1]->getOffset());
426            }
427          } else
428            MILit.addImm(0);
429          ClauseContent.push_back(MILit);
430        }
431      }
432      assert(ClauseContent.size() < 128 && "ALU clause is too big");
433      ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
434      return ClauseFile(&ClauseHead, std::move(ClauseContent));
435    }
436  
437    void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
438                         const DebugLoc &DL, ClauseFile &Clause,
439                         unsigned &CfCount) {
440      CounterPropagateAddr(*Clause.first, CfCount);
441      MachineBasicBlock *BB = Clause.first->getParent();
442      BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
443      for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
444        BB->splice(InsertPos, BB, Clause.second[i]);
445      }
446      CfCount += 2 * Clause.second.size();
447    }
448  
449    void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
450                       ClauseFile &Clause, unsigned &CfCount) {
451      Clause.first->getOperand(0).setImm(0);
452      CounterPropagateAddr(*Clause.first, CfCount);
453      MachineBasicBlock *BB = Clause.first->getParent();
454      BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
455      for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
456        BB->splice(InsertPos, BB, Clause.second[i]);
457      }
458      CfCount += Clause.second.size();
459    }
460  
461    void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
462      MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
463    }
464    void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
465                              unsigned Addr) const {
466      for (MachineInstr *MI : MIs) {
467        CounterPropagateAddr(*MI, Addr);
468      }
469    }
470  
471  public:
472    static char ID;
473  
474    R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
475  
476    bool runOnMachineFunction(MachineFunction &MF) override {
477      ST = &MF.getSubtarget<R600Subtarget>();
478      MaxFetchInst = ST->getTexVTXClauseSize();
479      TII = ST->getInstrInfo();
480      TRI = ST->getRegisterInfo();
481  
482      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
483  
484      CFStack CFStack(ST, MF.getFunction().getCallingConv());
485      for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
486          ++MB) {
487        MachineBasicBlock &MBB = *MB;
488        unsigned CfCount = 0;
489        std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
490        std::vector<MachineInstr * > IfThenElseStack;
491        if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
492          BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
493              getHWInstrDesc(CF_CALL_FS));
494          CfCount++;
495        }
496        std::vector<ClauseFile> FetchClauses, AluClauses;
497        std::vector<MachineInstr *> LastAlu(1);
498        std::vector<MachineInstr *> ToPopAfter;
499  
500        for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
501            I != E;) {
502          if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
503            LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
504            FetchClauses.push_back(MakeFetchClause(MBB, I));
505            CfCount++;
506            LastAlu.back() = nullptr;
507            continue;
508          }
509  
510          MachineBasicBlock::iterator MI = I;
511          if (MI->getOpcode() != R600::ENDIF)
512            LastAlu.back() = nullptr;
513          if (MI->getOpcode() == R600::CF_ALU)
514            LastAlu.back() = &*MI;
515          I++;
516          bool RequiresWorkAround =
517              CFStack.requiresWorkAroundForInst(MI->getOpcode());
518          switch (MI->getOpcode()) {
519          case R600::CF_ALU_PUSH_BEFORE:
520            if (RequiresWorkAround) {
521              LLVM_DEBUG(dbgs()
522                         << "Applying bug work-around for ALU_PUSH_BEFORE\n");
523              BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
524                  .addImm(CfCount + 1)
525                  .addImm(1);
526              MI->setDesc(TII->get(R600::CF_ALU));
527              CfCount++;
528              CFStack.pushBranch(R600::CF_PUSH_EG);
529            } else
530              CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
531            LLVM_FALLTHROUGH;
532          case R600::CF_ALU:
533            I = MI;
534            AluClauses.push_back(MakeALUClause(MBB, I));
535            LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
536            CfCount++;
537            break;
538          case R600::WHILELOOP: {
539            CFStack.pushLoop();
540            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
541                getHWInstrDesc(CF_WHILE_LOOP))
542                .addImm(1);
543            std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
544                std::set<MachineInstr *>());
545            Pair.second.insert(MIb);
546            LoopStack.push_back(std::move(Pair));
547            MI->eraseFromParent();
548            CfCount++;
549            break;
550          }
551          case R600::ENDLOOP: {
552            CFStack.popLoop();
553            std::pair<unsigned, std::set<MachineInstr *>> Pair =
554                std::move(LoopStack.back());
555            LoopStack.pop_back();
556            CounterPropagateAddr(Pair.second, CfCount);
557            BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
558                .addImm(Pair.first + 1);
559            MI->eraseFromParent();
560            CfCount++;
561            break;
562          }
563          case R600::IF_PREDICATE_SET: {
564            LastAlu.push_back(nullptr);
565            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
566                getHWInstrDesc(CF_JUMP))
567                .addImm(0)
568                .addImm(0);
569            IfThenElseStack.push_back(MIb);
570            LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
571            MI->eraseFromParent();
572            CfCount++;
573            break;
574          }
575          case R600::ELSE: {
576            MachineInstr * JumpInst = IfThenElseStack.back();
577            IfThenElseStack.pop_back();
578            CounterPropagateAddr(*JumpInst, CfCount);
579            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
580                getHWInstrDesc(CF_ELSE))
581                .addImm(0)
582                .addImm(0);
583            LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
584            IfThenElseStack.push_back(MIb);
585            MI->eraseFromParent();
586            CfCount++;
587            break;
588          }
589          case R600::ENDIF: {
590            CFStack.popBranch();
591            if (LastAlu.back()) {
592              ToPopAfter.push_back(LastAlu.back());
593            } else {
594              MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
595                  getHWInstrDesc(CF_POP))
596                  .addImm(CfCount + 1)
597                  .addImm(1);
598              (void)MIb;
599              LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
600              CfCount++;
601            }
602  
603            MachineInstr *IfOrElseInst = IfThenElseStack.back();
604            IfThenElseStack.pop_back();
605            CounterPropagateAddr(*IfOrElseInst, CfCount);
606            IfOrElseInst->getOperand(1).setImm(1);
607            LastAlu.pop_back();
608            MI->eraseFromParent();
609            break;
610          }
611          case R600::BREAK: {
612            CfCount ++;
613            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
614                getHWInstrDesc(CF_LOOP_BREAK))
615                .addImm(0);
616            LoopStack.back().second.insert(MIb);
617            MI->eraseFromParent();
618            break;
619          }
620          case R600::CONTINUE: {
621            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
622                getHWInstrDesc(CF_LOOP_CONTINUE))
623                .addImm(0);
624            LoopStack.back().second.insert(MIb);
625            MI->eraseFromParent();
626            CfCount++;
627            break;
628          }
629          case R600::RETURN: {
630            DebugLoc DL = MBB.findDebugLoc(MI);
631            BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
632            CfCount++;
633            if (CfCount % 2) {
634              BuildMI(MBB, I, DL, TII->get(R600::PAD));
635              CfCount++;
636            }
637            MI->eraseFromParent();
638            for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
639              EmitFetchClause(I, DL, FetchClauses[i], CfCount);
640            for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
641              EmitALUClause(I, DL, AluClauses[i], CfCount);
642            break;
643          }
644          default:
645            if (TII->isExport(MI->getOpcode())) {
646              LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
647              CfCount++;
648            }
649            break;
650          }
651        }
652        for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
653          MachineInstr *Alu = ToPopAfter[i];
654          BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
655              TII->get(R600::CF_ALU_POP_AFTER))
656              .addImm(Alu->getOperand(0).getImm())
657              .addImm(Alu->getOperand(1).getImm())
658              .addImm(Alu->getOperand(2).getImm())
659              .addImm(Alu->getOperand(3).getImm())
660              .addImm(Alu->getOperand(4).getImm())
661              .addImm(Alu->getOperand(5).getImm())
662              .addImm(Alu->getOperand(6).getImm())
663              .addImm(Alu->getOperand(7).getImm())
664              .addImm(Alu->getOperand(8).getImm());
665          Alu->eraseFromParent();
666        }
667        MFI->CFStackSize = CFStack.MaxStackSize;
668      }
669  
670      return false;
671    }
672  
673    StringRef getPassName() const override {
674      return "R600 Control Flow Finalizer Pass";
675    }
676  };
677  
678  } // end anonymous namespace
679  
680  INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
681                       "R600 Control Flow Finalizer", false, false)
682  INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
683                      "R600 Control Flow Finalizer", false, false)
684  
685  char R600ControlFlowFinalizer::ID = 0;
686  
687  char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
688  
689  FunctionPass *llvm::createR600ControlFlowFinalizer() {
690    return new R600ControlFlowFinalizer();
691  }
692