xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp (revision 315ee00fa9616b0a192b6834911f98bcf5316a6b)
1  //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  /// \file
10  /// This pass compute turns all control flow pseudo instructions into native one
11  /// computing their address on the fly; it also sets STACK_SIZE info.
12  //
13  //===----------------------------------------------------------------------===//
14  
15  #include "MCTargetDesc/R600MCTargetDesc.h"
16  #include "R600.h"
17  #include "R600MachineFunctionInfo.h"
18  #include "R600Subtarget.h"
19  #include "llvm/CodeGen/MachineFunctionPass.h"
20  #include <set>
21  
22  using namespace llvm;
23  
24  #define DEBUG_TYPE "r600cf"
25  
26  namespace {
27  
28  struct CFStack {
29    enum StackItem {
30      ENTRY = 0,
31      SUB_ENTRY = 1,
32      FIRST_NON_WQM_PUSH = 2,
33      FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
34    };
35  
36    const R600Subtarget *ST;
37    std::vector<StackItem> BranchStack;
38    std::vector<StackItem> LoopStack;
39    unsigned MaxStackSize;
40    unsigned CurrentEntries = 0;
41    unsigned CurrentSubEntries = 0;
42  
43    CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
44        // We need to reserve a stack entry for CALL_FS in vertex shaders.
45        MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
46  
47    unsigned getLoopDepth();
48    bool branchStackContains(CFStack::StackItem);
49    bool requiresWorkAroundForInst(unsigned Opcode);
50    unsigned getSubEntrySize(CFStack::StackItem Item);
51    void updateMaxStackSize();
52    void pushBranch(unsigned Opcode, bool isWQM = false);
53    void pushLoop();
54    void popBranch();
55    void popLoop();
56  };
57  
58  unsigned CFStack::getLoopDepth() {
59    return LoopStack.size();
60  }
61  
62  bool CFStack::branchStackContains(CFStack::StackItem Item) {
63    return llvm::is_contained(BranchStack, Item);
64  }
65  
66  bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
67    if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
68        getLoopDepth() > 1)
69      return true;
70  
71    if (!ST->hasCFAluBug())
72      return false;
73  
74    switch(Opcode) {
75    default: return false;
76    case R600::CF_ALU_PUSH_BEFORE:
77    case R600::CF_ALU_ELSE_AFTER:
78    case R600::CF_ALU_BREAK:
79    case R600::CF_ALU_CONTINUE:
80      if (CurrentSubEntries == 0)
81        return false;
82      if (ST->getWavefrontSize() == 64) {
83        // We are being conservative here.  We only require this work-around if
84        // CurrentSubEntries > 3 &&
85        // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
86        //
87        // We have to be conservative, because we don't know for certain that
88        // our stack allocation algorithm for Evergreen/NI is correct.  Applying this
89        // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
90        // resources without any problems.
91        return CurrentSubEntries > 3;
92      } else {
93        assert(ST->getWavefrontSize() == 32);
94        // We are being conservative here.  We only require the work-around if
95        // CurrentSubEntries > 7 &&
96        // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
97        // See the comment on the wavefront size == 64 case for why we are
98        // being conservative.
99        return CurrentSubEntries > 7;
100      }
101    }
102  }
103  
104  unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
105    switch(Item) {
106    default:
107      return 0;
108    case CFStack::FIRST_NON_WQM_PUSH:
109    assert(!ST->hasCaymanISA());
110    if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
111      // +1 For the push operation.
112      // +2 Extra space required.
113      return 3;
114    } else {
115      // Some documentation says that this is not necessary on Evergreen,
116      // but experimentation has show that we need to allocate 1 extra
117      // sub-entry for the first non-WQM push.
118      // +1 For the push operation.
119      // +1 Extra space required.
120      return 2;
121    }
122    case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
123      assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
124      // +1 For the push operation.
125      // +1 Extra space required.
126      return 2;
127    case CFStack::SUB_ENTRY:
128      return 1;
129    }
130  }
131  
132  void CFStack::updateMaxStackSize() {
133    unsigned CurrentStackSize = CurrentEntries + divideCeil(CurrentSubEntries, 4);
134    MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
135  }
136  
137  void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
138    CFStack::StackItem Item = CFStack::ENTRY;
139    switch(Opcode) {
140    case R600::CF_PUSH_EG:
141    case R600::CF_ALU_PUSH_BEFORE:
142      if (!isWQM) {
143        if (!ST->hasCaymanISA() &&
144            !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
145          Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on Evergreen/NI
146                                               // See comment in
147                                               // CFStack::getSubEntrySize()
148        else if (CurrentEntries > 0 &&
149                 ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
150                 !ST->hasCaymanISA() &&
151                 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
152          Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
153        else
154          Item = CFStack::SUB_ENTRY;
155      } else
156        Item = CFStack::ENTRY;
157      break;
158    }
159    BranchStack.push_back(Item);
160    if (Item == CFStack::ENTRY)
161      CurrentEntries++;
162    else
163      CurrentSubEntries += getSubEntrySize(Item);
164    updateMaxStackSize();
165  }
166  
167  void CFStack::pushLoop() {
168    LoopStack.push_back(CFStack::ENTRY);
169    CurrentEntries++;
170    updateMaxStackSize();
171  }
172  
173  void CFStack::popBranch() {
174    CFStack::StackItem Top = BranchStack.back();
175    if (Top == CFStack::ENTRY)
176      CurrentEntries--;
177    else
178      CurrentSubEntries-= getSubEntrySize(Top);
179    BranchStack.pop_back();
180  }
181  
182  void CFStack::popLoop() {
183    CurrentEntries--;
184    LoopStack.pop_back();
185  }
186  
187  class R600ControlFlowFinalizer : public MachineFunctionPass {
188  private:
189    using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
190  
191    enum ControlFlowInstruction {
192      CF_TC,
193      CF_VC,
194      CF_CALL_FS,
195      CF_WHILE_LOOP,
196      CF_END_LOOP,
197      CF_LOOP_BREAK,
198      CF_LOOP_CONTINUE,
199      CF_JUMP,
200      CF_ELSE,
201      CF_POP,
202      CF_END
203    };
204  
205    const R600InstrInfo *TII = nullptr;
206    const R600RegisterInfo *TRI = nullptr;
207    unsigned MaxFetchInst;
208    const R600Subtarget *ST = nullptr;
209  
210    bool IsTrivialInst(MachineInstr &MI) const {
211      switch (MI.getOpcode()) {
212      case R600::KILL:
213      case R600::RETURN:
214        return true;
215      default:
216        return false;
217      }
218    }
219  
220    const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
221      unsigned Opcode = 0;
222      bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
223      switch (CFI) {
224      case CF_TC:
225        Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
226        break;
227      case CF_VC:
228        Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
229        break;
230      case CF_CALL_FS:
231        Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
232        break;
233      case CF_WHILE_LOOP:
234        Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
235        break;
236      case CF_END_LOOP:
237        Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
238        break;
239      case CF_LOOP_BREAK:
240        Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
241        break;
242      case CF_LOOP_CONTINUE:
243        Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
244        break;
245      case CF_JUMP:
246        Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
247        break;
248      case CF_ELSE:
249        Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
250        break;
251      case CF_POP:
252        Opcode = isEg ? R600::POP_EG : R600::POP_R600;
253        break;
254      case CF_END:
255        if (ST->hasCaymanISA()) {
256          Opcode = R600::CF_END_CM;
257          break;
258        }
259        Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
260        break;
261      }
262      assert (Opcode && "No opcode selected");
263      return TII->get(Opcode);
264    }
265  
266    bool isCompatibleWithClause(const MachineInstr &MI,
267                                std::set<unsigned> &DstRegs) const {
268      unsigned DstMI, SrcMI;
269      for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
270                                            E = MI.operands_end();
271           I != E; ++I) {
272        const MachineOperand &MO = *I;
273        if (!MO.isReg())
274          continue;
275        if (MO.isDef()) {
276          Register Reg = MO.getReg();
277          if (R600::R600_Reg128RegClass.contains(Reg))
278            DstMI = Reg;
279          else
280            DstMI = TRI->getMatchingSuperReg(Reg,
281                R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
282                &R600::R600_Reg128RegClass);
283        }
284        if (MO.isUse()) {
285          Register Reg = MO.getReg();
286          if (R600::R600_Reg128RegClass.contains(Reg))
287            SrcMI = Reg;
288          else
289            SrcMI = TRI->getMatchingSuperReg(Reg,
290                R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
291                &R600::R600_Reg128RegClass);
292        }
293      }
294      if ((DstRegs.find(SrcMI) == DstRegs.end())) {
295        DstRegs.insert(DstMI);
296        return true;
297      } else
298        return false;
299    }
300  
301    ClauseFile
302    MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
303        const {
304      MachineBasicBlock::iterator ClauseHead = I;
305      std::vector<MachineInstr *> ClauseContent;
306      unsigned AluInstCount = 0;
307      bool IsTex = TII->usesTextureCache(*ClauseHead);
308      std::set<unsigned> DstRegs;
309      for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
310        if (IsTrivialInst(*I))
311          continue;
312        if (AluInstCount >= MaxFetchInst)
313          break;
314        if ((IsTex && !TII->usesTextureCache(*I)) ||
315            (!IsTex && !TII->usesVertexCache(*I)))
316          break;
317        if (!isCompatibleWithClause(*I, DstRegs))
318          break;
319        AluInstCount ++;
320        ClauseContent.push_back(&*I);
321      }
322      MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
323          getHWInstrDesc(IsTex?CF_TC:CF_VC))
324          .addImm(0) // ADDR
325          .addImm(AluInstCount - 1); // COUNT
326      return ClauseFile(MIb, std::move(ClauseContent));
327    }
328  
329    void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
330      static const unsigned LiteralRegs[] = {
331        R600::ALU_LITERAL_X,
332        R600::ALU_LITERAL_Y,
333        R600::ALU_LITERAL_Z,
334        R600::ALU_LITERAL_W
335      };
336      const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
337          TII->getSrcs(MI);
338      for (const auto &Src:Srcs) {
339        if (Src.first->getReg() != R600::ALU_LITERAL_X)
340          continue;
341        int64_t Imm = Src.second;
342        std::vector<MachineOperand *>::iterator It =
343            llvm::find_if(Lits, [&](MachineOperand *val) {
344              return val->isImm() && (val->getImm() == Imm);
345            });
346  
347        // Get corresponding Operand
348        MachineOperand &Operand = MI.getOperand(
349            TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
350  
351        if (It != Lits.end()) {
352          // Reuse existing literal reg
353          unsigned Index = It - Lits.begin();
354          Src.first->setReg(LiteralRegs[Index]);
355        } else {
356          // Allocate new literal reg
357          assert(Lits.size() < 4 && "Too many literals in Instruction Group");
358          Src.first->setReg(LiteralRegs[Lits.size()]);
359          Lits.push_back(&Operand);
360        }
361      }
362    }
363  
364    MachineBasicBlock::iterator insertLiterals(
365        MachineBasicBlock::iterator InsertPos,
366        const std::vector<unsigned> &Literals) const {
367      MachineBasicBlock *MBB = InsertPos->getParent();
368      for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
369        unsigned LiteralPair0 = Literals[i];
370        unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
371        InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
372            TII->get(R600::LITERALS))
373            .addImm(LiteralPair0)
374            .addImm(LiteralPair1);
375      }
376      return InsertPos;
377    }
378  
379    ClauseFile
380    MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
381        const {
382      MachineInstr &ClauseHead = *I;
383      std::vector<MachineInstr *> ClauseContent;
384      I++;
385      for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
386        if (IsTrivialInst(*I)) {
387          ++I;
388          continue;
389        }
390        if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
391          break;
392        std::vector<MachineOperand *>Literals;
393        if (I->isBundle()) {
394          MachineInstr &DeleteMI = *I;
395          MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
396          while (++BI != E && BI->isBundledWithPred()) {
397            BI->unbundleFromPred();
398            for (MachineOperand &MO : BI->operands()) {
399              if (MO.isReg() && MO.isInternalRead())
400                MO.setIsInternalRead(false);
401            }
402            getLiteral(*BI, Literals);
403            ClauseContent.push_back(&*BI);
404          }
405          I = BI;
406          DeleteMI.eraseFromParent();
407        } else {
408          getLiteral(*I, Literals);
409          ClauseContent.push_back(&*I);
410          I++;
411        }
412        for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
413          MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
414              TII->get(R600::LITERALS));
415          if (Literals[i]->isImm()) {
416              MILit.addImm(Literals[i]->getImm());
417          } else {
418              MILit.addGlobalAddress(Literals[i]->getGlobal(),
419                                     Literals[i]->getOffset());
420          }
421          if (i + 1 < e) {
422            if (Literals[i + 1]->isImm()) {
423              MILit.addImm(Literals[i + 1]->getImm());
424            } else {
425              MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
426                                     Literals[i + 1]->getOffset());
427            }
428          } else
429            MILit.addImm(0);
430          ClauseContent.push_back(MILit);
431        }
432      }
433      assert(ClauseContent.size() < 128 && "ALU clause is too big");
434      ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
435      return ClauseFile(&ClauseHead, std::move(ClauseContent));
436    }
437  
438    void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
439                         const DebugLoc &DL, ClauseFile &Clause,
440                         unsigned &CfCount) {
441      CounterPropagateAddr(*Clause.first, CfCount);
442      MachineBasicBlock *BB = Clause.first->getParent();
443      BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
444      for (MachineInstr *MI : Clause.second)
445        BB->splice(InsertPos, BB, MI);
446      CfCount += 2 * Clause.second.size();
447    }
448  
449    void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
450                       ClauseFile &Clause, unsigned &CfCount) {
451      Clause.first->getOperand(0).setImm(0);
452      CounterPropagateAddr(*Clause.first, CfCount);
453      MachineBasicBlock *BB = Clause.first->getParent();
454      BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
455      for (MachineInstr *MI : Clause.second)
456        BB->splice(InsertPos, BB, MI);
457      CfCount += Clause.second.size();
458    }
459  
460    void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
461      MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
462    }
463    void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
464                              unsigned Addr) const {
465      for (MachineInstr *MI : MIs) {
466        CounterPropagateAddr(*MI, Addr);
467      }
468    }
469  
470  public:
471    static char ID;
472  
473    R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
474  
475    bool runOnMachineFunction(MachineFunction &MF) override {
476      ST = &MF.getSubtarget<R600Subtarget>();
477      MaxFetchInst = ST->getTexVTXClauseSize();
478      TII = ST->getInstrInfo();
479      TRI = ST->getRegisterInfo();
480  
481      R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
482  
483      CFStack CFStack(ST, MF.getFunction().getCallingConv());
484      for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
485          ++MB) {
486        MachineBasicBlock &MBB = *MB;
487        unsigned CfCount = 0;
488        std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
489        std::vector<MachineInstr * > IfThenElseStack;
490        if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
491          BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
492              getHWInstrDesc(CF_CALL_FS));
493          CfCount++;
494        }
495        std::vector<ClauseFile> FetchClauses, AluClauses;
496        std::vector<MachineInstr *> LastAlu(1);
497        std::vector<MachineInstr *> ToPopAfter;
498  
499        for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
500            I != E;) {
501          if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
502            LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
503            FetchClauses.push_back(MakeFetchClause(MBB, I));
504            CfCount++;
505            LastAlu.back() = nullptr;
506            continue;
507          }
508  
509          MachineBasicBlock::iterator MI = I;
510          if (MI->getOpcode() != R600::ENDIF)
511            LastAlu.back() = nullptr;
512          if (MI->getOpcode() == R600::CF_ALU)
513            LastAlu.back() = &*MI;
514          I++;
515          bool RequiresWorkAround =
516              CFStack.requiresWorkAroundForInst(MI->getOpcode());
517          switch (MI->getOpcode()) {
518          case R600::CF_ALU_PUSH_BEFORE:
519            if (RequiresWorkAround) {
520              LLVM_DEBUG(dbgs()
521                         << "Applying bug work-around for ALU_PUSH_BEFORE\n");
522              BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
523                  .addImm(CfCount + 1)
524                  .addImm(1);
525              MI->setDesc(TII->get(R600::CF_ALU));
526              CfCount++;
527              CFStack.pushBranch(R600::CF_PUSH_EG);
528            } else
529              CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
530            [[fallthrough]];
531          case R600::CF_ALU:
532            I = MI;
533            AluClauses.push_back(MakeALUClause(MBB, I));
534            LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
535            CfCount++;
536            break;
537          case R600::WHILELOOP: {
538            CFStack.pushLoop();
539            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
540                getHWInstrDesc(CF_WHILE_LOOP))
541                .addImm(1);
542            std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
543                std::set<MachineInstr *>());
544            Pair.second.insert(MIb);
545            LoopStack.push_back(std::move(Pair));
546            MI->eraseFromParent();
547            CfCount++;
548            break;
549          }
550          case R600::ENDLOOP: {
551            CFStack.popLoop();
552            std::pair<unsigned, std::set<MachineInstr *>> Pair =
553                std::move(LoopStack.back());
554            LoopStack.pop_back();
555            CounterPropagateAddr(Pair.second, CfCount);
556            BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
557                .addImm(Pair.first + 1);
558            MI->eraseFromParent();
559            CfCount++;
560            break;
561          }
562          case R600::IF_PREDICATE_SET: {
563            LastAlu.push_back(nullptr);
564            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
565                getHWInstrDesc(CF_JUMP))
566                .addImm(0)
567                .addImm(0);
568            IfThenElseStack.push_back(MIb);
569            LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
570            MI->eraseFromParent();
571            CfCount++;
572            break;
573          }
574          case R600::ELSE: {
575            MachineInstr * JumpInst = IfThenElseStack.back();
576            IfThenElseStack.pop_back();
577            CounterPropagateAddr(*JumpInst, CfCount);
578            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
579                getHWInstrDesc(CF_ELSE))
580                .addImm(0)
581                .addImm(0);
582            LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
583            IfThenElseStack.push_back(MIb);
584            MI->eraseFromParent();
585            CfCount++;
586            break;
587          }
588          case R600::ENDIF: {
589            CFStack.popBranch();
590            if (LastAlu.back()) {
591              ToPopAfter.push_back(LastAlu.back());
592            } else {
593              MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
594                  getHWInstrDesc(CF_POP))
595                  .addImm(CfCount + 1)
596                  .addImm(1);
597              (void)MIb;
598              LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
599              CfCount++;
600            }
601  
602            MachineInstr *IfOrElseInst = IfThenElseStack.back();
603            IfThenElseStack.pop_back();
604            CounterPropagateAddr(*IfOrElseInst, CfCount);
605            IfOrElseInst->getOperand(1).setImm(1);
606            LastAlu.pop_back();
607            MI->eraseFromParent();
608            break;
609          }
610          case R600::BREAK: {
611            CfCount ++;
612            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
613                getHWInstrDesc(CF_LOOP_BREAK))
614                .addImm(0);
615            LoopStack.back().second.insert(MIb);
616            MI->eraseFromParent();
617            break;
618          }
619          case R600::CONTINUE: {
620            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
621                getHWInstrDesc(CF_LOOP_CONTINUE))
622                .addImm(0);
623            LoopStack.back().second.insert(MIb);
624            MI->eraseFromParent();
625            CfCount++;
626            break;
627          }
628          case R600::RETURN: {
629            DebugLoc DL = MBB.findDebugLoc(MI);
630            BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
631            CfCount++;
632            if (CfCount % 2) {
633              BuildMI(MBB, I, DL, TII->get(R600::PAD));
634              CfCount++;
635            }
636            MI->eraseFromParent();
637            for (ClauseFile &CF : FetchClauses)
638              EmitFetchClause(I, DL, CF, CfCount);
639            for (ClauseFile &CF : AluClauses)
640              EmitALUClause(I, DL, CF, CfCount);
641            break;
642          }
643          default:
644            if (TII->isExport(MI->getOpcode())) {
645              LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
646              CfCount++;
647            }
648            break;
649          }
650        }
651        for (MachineInstr *Alu : ToPopAfter) {
652          BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
653              TII->get(R600::CF_ALU_POP_AFTER))
654              .addImm(Alu->getOperand(0).getImm())
655              .addImm(Alu->getOperand(1).getImm())
656              .addImm(Alu->getOperand(2).getImm())
657              .addImm(Alu->getOperand(3).getImm())
658              .addImm(Alu->getOperand(4).getImm())
659              .addImm(Alu->getOperand(5).getImm())
660              .addImm(Alu->getOperand(6).getImm())
661              .addImm(Alu->getOperand(7).getImm())
662              .addImm(Alu->getOperand(8).getImm());
663          Alu->eraseFromParent();
664        }
665        MFI->CFStackSize = CFStack.MaxStackSize;
666      }
667  
668      return false;
669    }
670  
671    StringRef getPassName() const override {
672      return "R600 Control Flow Finalizer Pass";
673    }
674  };
675  
676  } // end anonymous namespace
677  
678  INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
679                       "R600 Control Flow Finalizer", false, false)
680  INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
681                      "R600 Control Flow Finalizer", false, false)
682  
683  char R600ControlFlowFinalizer::ID = 0;
684  
685  char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
686  
687  FunctionPass *llvm::createR600ControlFlowFinalizer() {
688    return new R600ControlFlowFinalizer();
689  }
690