xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp (revision a3c35da61bb201168575f1d18f4ca3e96937d35c)
1  //===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file implements the machine instruction level CFG structurizer pass.
10  //
11  //===----------------------------------------------------------------------===//
12  
13  #include "AMDGPU.h"
14  #include "AMDGPUSubtarget.h"
15  #include "SIInstrInfo.h"
16  #include "llvm/ADT/ArrayRef.h"
17  #include "llvm/ADT/DenseMap.h"
18  #include "llvm/ADT/DenseSet.h"
19  #include "llvm/ADT/PostOrderIterator.h"
20  #include "llvm/ADT/SetVector.h"
21  #include "llvm/ADT/SmallPtrSet.h"
22  #include "llvm/ADT/SmallVector.h"
23  #include "llvm/CodeGen/MachineBasicBlock.h"
24  #include "llvm/CodeGen/MachineFunction.h"
25  #include "llvm/CodeGen/MachineFunctionPass.h"
26  #include "llvm/CodeGen/MachineInstr.h"
27  #include "llvm/CodeGen/MachineInstrBuilder.h"
28  #include "llvm/CodeGen/MachineOperand.h"
29  #include "llvm/CodeGen/MachineRegionInfo.h"
30  #include "llvm/CodeGen/MachineRegisterInfo.h"
31  #include "llvm/CodeGen/TargetOpcodes.h"
32  #include "llvm/CodeGen/TargetRegisterInfo.h"
33  #include "llvm/Config/llvm-config.h"
34  #include "llvm/IR/DebugLoc.h"
35  #include "llvm/Pass.h"
36  #include "llvm/Support/Compiler.h"
37  #include "llvm/Support/Debug.h"
38  #include "llvm/Support/ErrorHandling.h"
39  #include "llvm/Support/raw_ostream.h"
40  #include <cassert>
41  #include <tuple>
42  #include <utility>
43  
44  using namespace llvm;
45  
46  #define DEBUG_TYPE "amdgpucfgstructurizer"
47  
48  namespace {
49  
50  class PHILinearizeDestIterator;
51  
52  class PHILinearize {
53    friend class PHILinearizeDestIterator;
54  
55  public:
56    using PHISourceT = std::pair<unsigned, MachineBasicBlock *>;
57  
58  private:
59    using PHISourcesT = DenseSet<PHISourceT>;
60    using PHIInfoElementT = struct {
61      unsigned DestReg;
62      DebugLoc DL;
63      PHISourcesT Sources;
64    };
65    using PHIInfoT = SmallPtrSet<PHIInfoElementT *, 2>;
66    PHIInfoT PHIInfo;
67  
68    static unsigned phiInfoElementGetDest(PHIInfoElementT *Info);
69    static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef);
70    static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info);
71    static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg,
72                                        MachineBasicBlock *SourceMBB);
73    static void phiInfoElementRemoveSource(PHIInfoElementT *Info,
74                                           unsigned SourceReg,
75                                           MachineBasicBlock *SourceMBB);
76    PHIInfoElementT *findPHIInfoElement(unsigned DestReg);
77    PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg,
78                                                  MachineBasicBlock *SourceMBB);
79  
80  public:
81    bool findSourcesFromMBB(MachineBasicBlock *SourceMBB,
82                            SmallVector<unsigned, 4> &Sources);
83    void addDest(unsigned DestReg, const DebugLoc &DL);
84    void replaceDef(unsigned OldDestReg, unsigned NewDestReg);
85    void deleteDef(unsigned DestReg);
86    void addSource(unsigned DestReg, unsigned SourceReg,
87                   MachineBasicBlock *SourceMBB);
88    void removeSource(unsigned DestReg, unsigned SourceReg,
89                      MachineBasicBlock *SourceMBB = nullptr);
90    bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB,
91                  unsigned &DestReg);
92    bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr);
93    unsigned getNumSources(unsigned DestReg);
94    void dump(MachineRegisterInfo *MRI);
95    void clear();
96  
97    using source_iterator = PHISourcesT::iterator;
98    using dest_iterator = PHILinearizeDestIterator;
99  
100    dest_iterator dests_begin();
101    dest_iterator dests_end();
102  
103    source_iterator sources_begin(unsigned Reg);
104    source_iterator sources_end(unsigned Reg);
105  };
106  
107  class PHILinearizeDestIterator {
108  private:
109    PHILinearize::PHIInfoT::iterator Iter;
110  
111  public:
112    PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {}
113  
114    unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); }
115    PHILinearizeDestIterator &operator++() {
116      ++Iter;
117      return *this;
118    }
119    bool operator==(const PHILinearizeDestIterator &I) const {
120      return I.Iter == Iter;
121    }
122    bool operator!=(const PHILinearizeDestIterator &I) const {
123      return I.Iter != Iter;
124    }
125  };
126  
127  } // end anonymous namespace
128  
129  unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) {
130    return Info->DestReg;
131  }
132  
133  void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info,
134                                          unsigned NewDef) {
135    Info->DestReg = NewDef;
136  }
137  
138  PHILinearize::PHISourcesT &
139  PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) {
140    return Info->Sources;
141  }
142  
143  void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info,
144                                             unsigned SourceReg,
145                                             MachineBasicBlock *SourceMBB) {
146    // Assertion ensures we don't use the same SourceMBB for the
147    // sources, because we cannot have different registers with
148    // identical predecessors, but we can have the same register for
149    // multiple predecessors.
150  #if !defined(NDEBUG)
151    for (auto SI : phiInfoElementGetSources(Info)) {
152      assert((SI.second != SourceMBB || SourceReg == SI.first));
153    }
154  #endif
155  
156    phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB));
157  }
158  
159  void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info,
160                                                unsigned SourceReg,
161                                                MachineBasicBlock *SourceMBB) {
162    auto &Sources = phiInfoElementGetSources(Info);
163    SmallVector<PHISourceT, 4> ElimiatedSources;
164    for (auto SI : Sources) {
165      if (SI.first == SourceReg &&
166          (SI.second == nullptr || SI.second == SourceMBB)) {
167        ElimiatedSources.push_back(PHISourceT(SI.first, SI.second));
168      }
169    }
170  
171    for (auto &Source : ElimiatedSources) {
172      Sources.erase(Source);
173    }
174  }
175  
176  PHILinearize::PHIInfoElementT *
177  PHILinearize::findPHIInfoElement(unsigned DestReg) {
178    for (auto I : PHIInfo) {
179      if (phiInfoElementGetDest(I) == DestReg) {
180        return I;
181      }
182    }
183    return nullptr;
184  }
185  
186  PHILinearize::PHIInfoElementT *
187  PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg,
188                                             MachineBasicBlock *SourceMBB) {
189    for (auto I : PHIInfo) {
190      for (auto SI : phiInfoElementGetSources(I)) {
191        if (SI.first == SourceReg &&
192            (SI.second == nullptr || SI.second == SourceMBB)) {
193          return I;
194        }
195      }
196    }
197    return nullptr;
198  }
199  
200  bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB,
201                                        SmallVector<unsigned, 4> &Sources) {
202    bool FoundSource = false;
203    for (auto I : PHIInfo) {
204      for (auto SI : phiInfoElementGetSources(I)) {
205        if (SI.second == SourceMBB) {
206          FoundSource = true;
207          Sources.push_back(SI.first);
208        }
209      }
210    }
211    return FoundSource;
212  }
213  
214  void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) {
215    assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists");
216    PHISourcesT EmptySet;
217    PHIInfoElementT *NewElement = new PHIInfoElementT();
218    NewElement->DestReg = DestReg;
219    NewElement->DL = DL;
220    NewElement->Sources = EmptySet;
221    PHIInfo.insert(NewElement);
222  }
223  
224  void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) {
225    phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg);
226  }
227  
228  void PHILinearize::deleteDef(unsigned DestReg) {
229    PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg);
230    PHIInfo.erase(InfoElement);
231    delete InfoElement;
232  }
233  
234  void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg,
235                               MachineBasicBlock *SourceMBB) {
236    phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
237  }
238  
239  void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg,
240                                  MachineBasicBlock *SourceMBB) {
241    phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
242  }
243  
244  bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB,
245                              unsigned &DestReg) {
246    PHIInfoElementT *InfoElement =
247        findPHIInfoElementFromSource(SourceReg, SourceMBB);
248    if (InfoElement != nullptr) {
249      DestReg = phiInfoElementGetDest(InfoElement);
250      return true;
251    }
252    return false;
253  }
254  
255  bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) {
256    unsigned DestReg;
257    return findDest(Reg, SourceMBB, DestReg);
258  }
259  
260  unsigned PHILinearize::getNumSources(unsigned DestReg) {
261    return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size();
262  }
263  
264  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
265  LLVM_DUMP_METHOD void PHILinearize::dump(MachineRegisterInfo *MRI) {
266    const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
267    dbgs() << "=PHIInfo Start=\n";
268    for (auto PII : this->PHIInfo) {
269      PHIInfoElementT &Element = *PII;
270      dbgs() << "Dest: " << printReg(Element.DestReg, TRI)
271             << " Sources: {";
272      for (auto &SI : Element.Sources) {
273        dbgs() << printReg(SI.first, TRI) << '(' << printMBBReference(*SI.second)
274               << "),";
275      }
276      dbgs() << "}\n";
277    }
278    dbgs() << "=PHIInfo End=\n";
279  }
280  #endif
281  
282  void PHILinearize::clear() { PHIInfo = PHIInfoT(); }
283  
284  PHILinearize::dest_iterator PHILinearize::dests_begin() {
285    return PHILinearizeDestIterator(PHIInfo.begin());
286  }
287  
288  PHILinearize::dest_iterator PHILinearize::dests_end() {
289    return PHILinearizeDestIterator(PHIInfo.end());
290  }
291  
292  PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) {
293    auto InfoElement = findPHIInfoElement(Reg);
294    return phiInfoElementGetSources(InfoElement).begin();
295  }
296  
297  PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) {
298    auto InfoElement = findPHIInfoElement(Reg);
299    return phiInfoElementGetSources(InfoElement).end();
300  }
301  
302  static unsigned getPHINumInputs(MachineInstr &PHI) {
303    assert(PHI.isPHI());
304    return (PHI.getNumOperands() - 1) / 2;
305  }
306  
307  static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) {
308    assert(PHI.isPHI());
309    return PHI.getOperand(Index * 2 + 2).getMBB();
310  }
311  
312  static void setPhiPred(MachineInstr &PHI, unsigned Index,
313                         MachineBasicBlock *NewPred) {
314    PHI.getOperand(Index * 2 + 2).setMBB(NewPred);
315  }
316  
317  static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) {
318    assert(PHI.isPHI());
319    return PHI.getOperand(Index * 2 + 1).getReg();
320  }
321  
322  static unsigned getPHIDestReg(MachineInstr &PHI) {
323    assert(PHI.isPHI());
324    return PHI.getOperand(0).getReg();
325  }
326  
327  namespace {
328  
329  class RegionMRT;
330  class MBBMRT;
331  
332  class LinearizedRegion {
333  protected:
334    MachineBasicBlock *Entry;
335    // The exit block is part of the region, and is the last
336    // merge block before exiting the region.
337    MachineBasicBlock *Exit;
338    DenseSet<unsigned> LiveOuts;
339    SmallPtrSet<MachineBasicBlock *, 1> MBBs;
340    bool HasLoop;
341    LinearizedRegion *Parent;
342    RegionMRT *RMRT;
343  
344    void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
345                         MachineInstr *DefInstr, const MachineRegisterInfo *MRI,
346                         const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
347  
348    void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
349                               MachineInstr *DefInstr,
350                               const MachineRegisterInfo *MRI,
351                               const TargetRegisterInfo *TRI,
352                               PHILinearize &PHIInfo);
353  
354    void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
355                          const TargetRegisterInfo *TRI, PHILinearize &PHIInfo,
356                          RegionMRT *TopRegion);
357  
358    void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
359                       const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
360  
361    void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI,
362                       const TargetRegisterInfo *TRI, PHILinearize &PHIInfo,
363                       RegionMRT *TopRegion = nullptr);
364  
365  public:
366    LinearizedRegion();
367    LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
368                     const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
369    ~LinearizedRegion() = default;
370  
371    void setRegionMRT(RegionMRT *Region) { RMRT = Region; }
372  
373    RegionMRT *getRegionMRT() { return RMRT; }
374  
375    void setParent(LinearizedRegion *P) { Parent = P; }
376  
377    LinearizedRegion *getParent() { return Parent; }
378  
379    void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr);
380  
381    void setBBSelectRegIn(unsigned Reg);
382  
383    unsigned getBBSelectRegIn();
384  
385    void setBBSelectRegOut(unsigned Reg, bool IsLiveOut);
386  
387    unsigned getBBSelectRegOut();
388  
389    void setHasLoop(bool Value);
390  
391    bool getHasLoop();
392  
393    void addLiveOut(unsigned VReg);
394  
395    void removeLiveOut(unsigned Reg);
396  
397    void replaceLiveOut(unsigned OldReg, unsigned NewReg);
398  
399    void replaceRegister(unsigned Register, unsigned NewRegister,
400                         MachineRegisterInfo *MRI, bool ReplaceInside,
401                         bool ReplaceOutside, bool IncludeLoopPHIs);
402  
403    void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister,
404                                     bool IncludeLoopPHIs,
405                                     MachineRegisterInfo *MRI);
406  
407    void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister,
408                                      bool IncludeLoopPHIs,
409                                      MachineRegisterInfo *MRI);
410  
411    DenseSet<unsigned> *getLiveOuts();
412  
413    void setEntry(MachineBasicBlock *NewEntry);
414  
415    MachineBasicBlock *getEntry();
416  
417    void setExit(MachineBasicBlock *NewExit);
418  
419    MachineBasicBlock *getExit();
420  
421    void addMBB(MachineBasicBlock *MBB);
422  
423    void addMBBs(LinearizedRegion *InnerRegion);
424  
425    bool contains(MachineBasicBlock *MBB);
426  
427    bool isLiveOut(unsigned Reg);
428  
429    bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI);
430  
431    void removeFalseRegisterKills(MachineRegisterInfo *MRI);
432  
433    void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI,
434                     const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
435  };
436  
437  class MRT {
438  protected:
439    RegionMRT *Parent;
440    unsigned BBSelectRegIn;
441    unsigned BBSelectRegOut;
442  
443  public:
444    virtual ~MRT() = default;
445  
446    unsigned getBBSelectRegIn() { return BBSelectRegIn; }
447  
448    unsigned getBBSelectRegOut() { return BBSelectRegOut; }
449  
450    void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; }
451  
452    void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; }
453  
454    virtual RegionMRT *getRegionMRT() { return nullptr; }
455  
456    virtual MBBMRT *getMBBMRT() { return nullptr; }
457  
458    bool isRegion() { return getRegionMRT() != nullptr; }
459  
460    bool isMBB() { return getMBBMRT() != nullptr; }
461  
462    bool isRoot() { return Parent == nullptr; }
463  
464    void setParent(RegionMRT *Region) { Parent = Region; }
465  
466    RegionMRT *getParent() { return Parent; }
467  
468    static MachineBasicBlock *
469    initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
470                  DenseMap<MachineRegion *, RegionMRT *> &RegionMap);
471  
472    static RegionMRT *buildMRT(MachineFunction &MF,
473                               const MachineRegionInfo *RegionInfo,
474                               const SIInstrInfo *TII,
475                               MachineRegisterInfo *MRI);
476  
477    virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0;
478  
479    void dumpDepth(int depth) {
480      for (int i = depth; i > 0; --i) {
481        dbgs() << "  ";
482      }
483    }
484  };
485  
486  class MBBMRT : public MRT {
487    MachineBasicBlock *MBB;
488  
489  public:
490    MBBMRT(MachineBasicBlock *BB) : MBB(BB) {
491      setParent(nullptr);
492      setBBSelectRegOut(0);
493      setBBSelectRegIn(0);
494    }
495  
496    MBBMRT *getMBBMRT() override { return this; }
497  
498    MachineBasicBlock *getMBB() { return MBB; }
499  
500    void dump(const TargetRegisterInfo *TRI, int depth = 0) override {
501      dumpDepth(depth);
502      dbgs() << "MBB: " << getMBB()->getNumber();
503      dbgs() << " In: " << printReg(getBBSelectRegIn(), TRI);
504      dbgs() << ", Out: " << printReg(getBBSelectRegOut(), TRI) << "\n";
505    }
506  };
507  
508  class RegionMRT : public MRT {
509  protected:
510    MachineRegion *Region;
511    LinearizedRegion *LRegion = nullptr;
512    MachineBasicBlock *Succ = nullptr;
513    SetVector<MRT *> Children;
514  
515  public:
516    RegionMRT(MachineRegion *MachineRegion) : Region(MachineRegion) {
517      setParent(nullptr);
518      setBBSelectRegOut(0);
519      setBBSelectRegIn(0);
520    }
521  
522    ~RegionMRT() override {
523      if (LRegion) {
524        delete LRegion;
525      }
526  
527      for (auto CI : Children) {
528        delete &(*CI);
529      }
530    }
531  
532    RegionMRT *getRegionMRT() override { return this; }
533  
534    void setLinearizedRegion(LinearizedRegion *LinearizeRegion) {
535      LRegion = LinearizeRegion;
536    }
537  
538    LinearizedRegion *getLinearizedRegion() { return LRegion; }
539  
540    MachineRegion *getMachineRegion() { return Region; }
541  
542    unsigned getInnerOutputRegister() {
543      return (*(Children.begin()))->getBBSelectRegOut();
544    }
545  
546    void addChild(MRT *Tree) { Children.insert(Tree); }
547  
548    SetVector<MRT *> *getChildren() { return &Children; }
549  
550    void dump(const TargetRegisterInfo *TRI, int depth = 0) override {
551      dumpDepth(depth);
552      dbgs() << "Region: " << (void *)Region;
553      dbgs() << " In: " << printReg(getBBSelectRegIn(), TRI);
554      dbgs() << ", Out: " << printReg(getBBSelectRegOut(), TRI) << "\n";
555  
556      dumpDepth(depth);
557      if (getSucc())
558        dbgs() << "Succ: " << getSucc()->getNumber() << "\n";
559      else
560        dbgs() << "Succ: none \n";
561      for (auto MRTI : Children) {
562        MRTI->dump(TRI, depth + 1);
563      }
564    }
565  
566    MRT *getEntryTree() { return Children.back(); }
567  
568    MRT *getExitTree() { return Children.front(); }
569  
570    MachineBasicBlock *getEntry() {
571      MRT *Tree = Children.back();
572      return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry()
573                                : Tree->getMBBMRT()->getMBB();
574    }
575  
576    MachineBasicBlock *getExit() {
577      MRT *Tree = Children.front();
578      return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit()
579                                : Tree->getMBBMRT()->getMBB();
580    }
581  
582    void setSucc(MachineBasicBlock *MBB) { Succ = MBB; }
583  
584    MachineBasicBlock *getSucc() { return Succ; }
585  
586    bool contains(MachineBasicBlock *MBB) {
587      for (auto CI : Children) {
588        if (CI->isMBB()) {
589          if (MBB == CI->getMBBMRT()->getMBB()) {
590            return true;
591          }
592        } else {
593          if (CI->getRegionMRT()->contains(MBB)) {
594            return true;
595          } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr &&
596                     CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) {
597            return true;
598          }
599        }
600      }
601      return false;
602    }
603  
604    void replaceLiveOutReg(unsigned Register, unsigned NewRegister) {
605      LinearizedRegion *LRegion = getLinearizedRegion();
606      LRegion->replaceLiveOut(Register, NewRegister);
607      for (auto &CI : Children) {
608        if (CI->isRegion()) {
609          CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
610        }
611      }
612    }
613  };
614  
615  } // end anonymous namespace
616  
617  static unsigned createBBSelectReg(const SIInstrInfo *TII,
618                                    MachineRegisterInfo *MRI) {
619    return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32));
620  }
621  
622  MachineBasicBlock *
623  MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
624                     DenseMap<MachineRegion *, RegionMRT *> &RegionMap) {
625    for (auto &MFI : MF) {
626      MachineBasicBlock *ExitMBB = &MFI;
627      if (ExitMBB->succ_size() == 0) {
628        return ExitMBB;
629      }
630    }
631    llvm_unreachable("CFG has no exit block");
632    return nullptr;
633  }
634  
635  RegionMRT *MRT::buildMRT(MachineFunction &MF,
636                           const MachineRegionInfo *RegionInfo,
637                           const SIInstrInfo *TII, MachineRegisterInfo *MRI) {
638    SmallPtrSet<MachineRegion *, 4> PlacedRegions;
639    DenseMap<MachineRegion *, RegionMRT *> RegionMap;
640    MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion();
641    RegionMRT *Result = new RegionMRT(TopLevelRegion);
642    RegionMap[TopLevelRegion] = Result;
643  
644    // Insert the exit block first, we need it to be the merge node
645    // for the top level region.
646    MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap);
647  
648    unsigned BBSelectRegIn = createBBSelectReg(TII, MRI);
649    MBBMRT *ExitMRT = new MBBMRT(Exit);
650    RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT);
651    ExitMRT->setBBSelectRegIn(BBSelectRegIn);
652  
653    for (auto MBBI : post_order(&(MF.front()))) {
654      MachineBasicBlock *MBB = &(*MBBI);
655  
656      // Skip Exit since we already added it
657      if (MBB == Exit) {
658        continue;
659      }
660  
661      LLVM_DEBUG(dbgs() << "Visiting " << printMBBReference(*MBB) << "\n");
662      MBBMRT *NewMBB = new MBBMRT(MBB);
663      MachineRegion *Region = RegionInfo->getRegionFor(MBB);
664  
665      // Ensure we have the MRT region
666      if (RegionMap.count(Region) == 0) {
667        RegionMRT *NewMRTRegion = new RegionMRT(Region);
668        RegionMap[Region] = NewMRTRegion;
669  
670        // Ensure all parents are in the RegionMap
671        MachineRegion *Parent = Region->getParent();
672        while (RegionMap.count(Parent) == 0) {
673          RegionMRT *NewMRTParent = new RegionMRT(Parent);
674          NewMRTParent->addChild(NewMRTRegion);
675          NewMRTRegion->setParent(NewMRTParent);
676          RegionMap[Parent] = NewMRTParent;
677          NewMRTRegion = NewMRTParent;
678          Parent = Parent->getParent();
679        }
680        RegionMap[Parent]->addChild(NewMRTRegion);
681        NewMRTRegion->setParent(RegionMap[Parent]);
682      }
683  
684      // Add MBB to Region MRT
685      RegionMap[Region]->addChild(NewMBB);
686      NewMBB->setParent(RegionMap[Region]);
687      RegionMap[Region]->setSucc(Region->getExit());
688    }
689    return Result;
690  }
691  
692  void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
693                                         MachineInstr *DefInstr,
694                                         const MachineRegisterInfo *MRI,
695                                         const TargetRegisterInfo *TRI,
696                                         PHILinearize &PHIInfo) {
697    if (TRI->isVirtualRegister(Reg)) {
698      LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
699                        << "\n");
700      // If this is a source register to a PHI we are chaining, it
701      // must be live out.
702      if (PHIInfo.isSource(Reg)) {
703        LLVM_DEBUG(dbgs() << "Add LiveOut (PHI): " << printReg(Reg, TRI) << "\n");
704        addLiveOut(Reg);
705      } else {
706        // If this is live out of the MBB
707        for (auto &UI : MRI->use_operands(Reg)) {
708          if (UI.getParent()->getParent() != MBB) {
709            LLVM_DEBUG(dbgs() << "Add LiveOut (MBB " << printMBBReference(*MBB)
710                              << "): " << printReg(Reg, TRI) << "\n");
711            addLiveOut(Reg);
712          } else {
713            // If the use is in the same MBB we have to make sure
714            // it is after the def, otherwise it is live out in a loop
715            MachineInstr *UseInstr = UI.getParent();
716            for (MachineBasicBlock::instr_iterator
717                     MII = UseInstr->getIterator(),
718                     MIE = UseInstr->getParent()->instr_end();
719                 MII != MIE; ++MII) {
720              if ((&(*MII)) == DefInstr) {
721                LLVM_DEBUG(dbgs() << "Add LiveOut (Loop): " << printReg(Reg, TRI)
722                                  << "\n");
723                addLiveOut(Reg);
724              }
725            }
726          }
727        }
728      }
729    }
730  }
731  
732  void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
733                                               MachineInstr *DefInstr,
734                                               const MachineRegisterInfo *MRI,
735                                               const TargetRegisterInfo *TRI,
736                                               PHILinearize &PHIInfo) {
737    if (TRI->isVirtualRegister(Reg)) {
738      LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
739                        << "\n");
740      for (auto &UI : MRI->use_operands(Reg)) {
741        if (!Region->contains(UI.getParent()->getParent())) {
742          LLVM_DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region
743                            << "): " << printReg(Reg, TRI) << "\n");
744          addLiveOut(Reg);
745        }
746      }
747    }
748  }
749  
750  void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
751                                       const MachineRegisterInfo *MRI,
752                                       const TargetRegisterInfo *TRI,
753                                       PHILinearize &PHIInfo) {
754    LLVM_DEBUG(dbgs() << "-Store Live Outs Begin (" << printMBBReference(*MBB)
755                      << ")-\n");
756    for (auto &II : *MBB) {
757      for (auto &RI : II.defs()) {
758        storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo);
759      }
760      for (auto &IRI : II.implicit_operands()) {
761        if (IRI.isDef()) {
762          storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo);
763        }
764      }
765    }
766  
767    // If we have a successor with a PHI, source coming from this MBB we have to
768    // add the register as live out
769    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
770                                          E = MBB->succ_end();
771         SI != E; ++SI) {
772      for (auto &II : *(*SI)) {
773        if (II.isPHI()) {
774          MachineInstr &PHI = II;
775          int numPreds = getPHINumInputs(PHI);
776          for (int i = 0; i < numPreds; ++i) {
777            if (getPHIPred(PHI, i) == MBB) {
778              unsigned PHIReg = getPHISourceReg(PHI, i);
779              LLVM_DEBUG(dbgs()
780                         << "Add LiveOut (PhiSource " << printMBBReference(*MBB)
781                         << " -> " << printMBBReference(*(*SI))
782                         << "): " << printReg(PHIReg, TRI) << "\n");
783              addLiveOut(PHIReg);
784            }
785          }
786        }
787      }
788    }
789  
790    LLVM_DEBUG(dbgs() << "-Store Live Outs Endn-\n");
791  }
792  
793  void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB,
794                                          const MachineRegisterInfo *MRI,
795                                          const TargetRegisterInfo *TRI,
796                                          PHILinearize &PHIInfo,
797                                          RegionMRT *TopRegion) {
798    for (auto &II : *MBB) {
799      for (auto &RI : II.defs()) {
800        storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI,
801                              PHIInfo);
802      }
803      for (auto &IRI : II.implicit_operands()) {
804        if (IRI.isDef()) {
805          storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI,
806                                TRI, PHIInfo);
807        }
808      }
809    }
810  }
811  
812  void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
813                                       const MachineRegisterInfo *MRI,
814                                       const TargetRegisterInfo *TRI,
815                                       PHILinearize &PHIInfo,
816                                       RegionMRT *CurrentTopRegion) {
817    MachineBasicBlock *Exit = Region->getSucc();
818  
819    RegionMRT *TopRegion =
820        CurrentTopRegion == nullptr ? Region : CurrentTopRegion;
821  
822    // Check if exit is end of function, if so, no live outs.
823    if (Exit == nullptr)
824      return;
825  
826    auto Children = Region->getChildren();
827    for (auto CI : *Children) {
828      if (CI->isMBB()) {
829        auto MBB = CI->getMBBMRT()->getMBB();
830        storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion);
831      } else {
832        LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion();
833        // We should be limited to only store registers that are live out from the
834        // lineaized region
835        for (auto MBBI : SubRegion->MBBs) {
836          storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion);
837        }
838      }
839    }
840  
841    if (CurrentTopRegion == nullptr) {
842      auto Succ = Region->getSucc();
843      for (auto &II : *Succ) {
844        if (II.isPHI()) {
845          MachineInstr &PHI = II;
846          int numPreds = getPHINumInputs(PHI);
847          for (int i = 0; i < numPreds; ++i) {
848            if (Region->contains(getPHIPred(PHI, i))) {
849              unsigned PHIReg = getPHISourceReg(PHI, i);
850              LLVM_DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region
851                                << "): " << printReg(PHIReg, TRI) << "\n");
852              addLiveOut(PHIReg);
853            }
854          }
855        }
856      }
857    }
858  }
859  
860  #ifndef NDEBUG
861  void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
862    OS << "Linearized Region {";
863    bool IsFirst = true;
864    for (const auto &MBB : MBBs) {
865      if (IsFirst) {
866        IsFirst = false;
867      } else {
868        OS << " ,";
869      }
870      OS << MBB->getNumber();
871    }
872    OS << "} (" << Entry->getNumber() << ", "
873       << (Exit == nullptr ? -1 : Exit->getNumber())
874       << "): In:" << printReg(getBBSelectRegIn(), TRI)
875       << " Out:" << printReg(getBBSelectRegOut(), TRI) << " {";
876    for (auto &LI : LiveOuts) {
877      OS << printReg(LI, TRI) << " ";
878    }
879    OS << "} \n";
880  }
881  #endif
882  
883  unsigned LinearizedRegion::getBBSelectRegIn() {
884    return getRegionMRT()->getBBSelectRegIn();
885  }
886  
887  unsigned LinearizedRegion::getBBSelectRegOut() {
888    return getRegionMRT()->getBBSelectRegOut();
889  }
890  
891  void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; }
892  
893  bool LinearizedRegion::getHasLoop() { return HasLoop; }
894  
895  void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); }
896  
897  void LinearizedRegion::removeLiveOut(unsigned Reg) {
898    if (isLiveOut(Reg))
899      LiveOuts.erase(Reg);
900  }
901  
902  void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) {
903    if (isLiveOut(OldReg)) {
904      removeLiveOut(OldReg);
905      addLiveOut(NewReg);
906    }
907  }
908  
909  void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister,
910                                         MachineRegisterInfo *MRI,
911                                         bool ReplaceInside, bool ReplaceOutside,
912                                         bool IncludeLoopPHI) {
913    assert(Register != NewRegister && "Cannot replace a reg with itself");
914  
915    LLVM_DEBUG(
916        dbgs() << "Pepareing to replace register (region): "
917               << printReg(Register, MRI->getTargetRegisterInfo()) << " with "
918               << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");
919  
920    // If we are replacing outside, we also need to update the LiveOuts
921    if (ReplaceOutside &&
922        (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) {
923      LinearizedRegion *Current = this;
924      while (Current != nullptr && Current->getEntry() != nullptr) {
925        LLVM_DEBUG(dbgs() << "Region before register replace\n");
926        LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
927        Current->replaceLiveOut(Register, NewRegister);
928        LLVM_DEBUG(dbgs() << "Region after register replace\n");
929        LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
930        Current = Current->getParent();
931      }
932    }
933  
934    for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
935                                           E = MRI->reg_end();
936         I != E;) {
937      MachineOperand &O = *I;
938      ++I;
939  
940      // We don't rewrite defs.
941      if (O.isDef())
942        continue;
943  
944      bool IsInside = contains(O.getParent()->getParent());
945      bool IsLoopPHI = IsInside && (O.getParent()->isPHI() &&
946                                    O.getParent()->getParent() == getEntry());
947      bool ShouldReplace = (IsInside && ReplaceInside) ||
948                           (!IsInside && ReplaceOutside) ||
949                           (IncludeLoopPHI && IsLoopPHI);
950      if (ShouldReplace) {
951  
952        if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
953          LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
954                            << printReg(NewRegister, MRI->getTargetRegisterInfo())
955                            << "\n");
956          llvm_unreachable("Cannot substitute physical registers");
957        } else {
958          LLVM_DEBUG(dbgs() << "Replacing register (region): "
959                            << printReg(Register, MRI->getTargetRegisterInfo())
960                            << " with "
961                            << printReg(NewRegister, MRI->getTargetRegisterInfo())
962                            << "\n");
963          O.setReg(NewRegister);
964        }
965      }
966    }
967  }
968  
969  void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register,
970                                                     unsigned NewRegister,
971                                                     bool IncludeLoopPHIs,
972                                                     MachineRegisterInfo *MRI) {
973    replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs);
974  }
975  
976  void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register,
977                                                      unsigned NewRegister,
978                                                      bool IncludeLoopPHIs,
979                                                      MachineRegisterInfo *MRI) {
980    replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs);
981  }
982  
983  DenseSet<unsigned> *LinearizedRegion::getLiveOuts() { return &LiveOuts; }
984  
985  void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) {
986    Entry = NewEntry;
987  }
988  
989  MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; }
990  
991  void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; }
992  
993  MachineBasicBlock *LinearizedRegion::getExit() { return Exit; }
994  
995  void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); }
996  
997  void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) {
998    for (const auto &MBB : InnerRegion->MBBs) {
999      addMBB(MBB);
1000    }
1001  }
1002  
1003  bool LinearizedRegion::contains(MachineBasicBlock *MBB) {
1004    return MBBs.count(MBB) == 1;
1005  }
1006  
1007  bool LinearizedRegion::isLiveOut(unsigned Reg) {
1008    return LiveOuts.count(Reg) == 1;
1009  }
1010  
1011  bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) {
1012    return MRI->def_begin(Reg) == MRI->def_end();
1013  }
1014  
1015  // After the code has been structurized, what was flagged as kills
1016  // before are no longer register kills.
1017  void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) {
1018    const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
1019    for (auto MBBI : MBBs) {
1020      MachineBasicBlock *MBB = MBBI;
1021      for (auto &II : *MBB) {
1022        for (auto &RI : II.uses()) {
1023          if (RI.isReg()) {
1024            unsigned Reg = RI.getReg();
1025            if (TRI->isVirtualRegister(Reg)) {
1026              if (hasNoDef(Reg, MRI))
1027                continue;
1028              if (!MRI->hasOneDef(Reg)) {
1029                LLVM_DEBUG(this->getEntry()->getParent()->dump());
1030                LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << "\n");
1031              }
1032  
1033              if (MRI->def_begin(Reg) == MRI->def_end()) {
1034                LLVM_DEBUG(dbgs() << "Register "
1035                                  << printReg(Reg, MRI->getTargetRegisterInfo())
1036                                  << " has NO defs\n");
1037              } else if (!MRI->hasOneDef(Reg)) {
1038                LLVM_DEBUG(dbgs() << "Register "
1039                                  << printReg(Reg, MRI->getTargetRegisterInfo())
1040                                  << " has multiple defs\n");
1041              }
1042  
1043              assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
1044              MachineOperand *Def = &(*(MRI->def_begin(Reg)));
1045              MachineOperand *UseOperand = &(RI);
1046              bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB;
1047              if (UseIsOutsideDefMBB && UseOperand->isKill()) {
1048                LLVM_DEBUG(dbgs() << "Removing kill flag on register: "
1049                                  << printReg(Reg, TRI) << "\n");
1050                UseOperand->setIsKill(false);
1051              }
1052            }
1053          }
1054        }
1055      }
1056    }
1057  }
1058  
1059  void LinearizedRegion::initLiveOut(RegionMRT *Region,
1060                                     const MachineRegisterInfo *MRI,
1061                                     const TargetRegisterInfo *TRI,
1062                                     PHILinearize &PHIInfo) {
1063    storeLiveOuts(Region, MRI, TRI, PHIInfo);
1064  }
1065  
1066  LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB,
1067                                     const MachineRegisterInfo *MRI,
1068                                     const TargetRegisterInfo *TRI,
1069                                     PHILinearize &PHIInfo) {
1070    setEntry(MBB);
1071    setExit(MBB);
1072    storeLiveOuts(MBB, MRI, TRI, PHIInfo);
1073    MBBs.insert(MBB);
1074    Parent = nullptr;
1075  }
1076  
1077  LinearizedRegion::LinearizedRegion() {
1078    setEntry(nullptr);
1079    setExit(nullptr);
1080    Parent = nullptr;
1081  }
1082  
1083  namespace {
1084  
1085  class AMDGPUMachineCFGStructurizer : public MachineFunctionPass {
1086  private:
1087    const MachineRegionInfo *Regions;
1088    const SIInstrInfo *TII;
1089    const TargetRegisterInfo *TRI;
1090    MachineRegisterInfo *MRI;
1091    unsigned BBSelectRegister;
1092    PHILinearize PHIInfo;
1093    DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap;
1094    RegionMRT *RMRT;
1095  
1096    void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI,
1097                             SmallVector<unsigned, 2> &RegionIndices);
1098    void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI,
1099                             SmallVector<unsigned, 2> &RegionIndices);
1100    void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI,
1101                                SmallVector<unsigned, 2> &PHINonRegionIndices);
1102  
1103    void storePHILinearizationInfoDest(
1104        unsigned LDestReg, MachineInstr &PHI,
1105        SmallVector<unsigned, 2> *RegionIndices = nullptr);
1106  
1107    unsigned storePHILinearizationInfo(MachineInstr &PHI,
1108                                       SmallVector<unsigned, 2> *RegionIndices);
1109  
1110    void extractKilledPHIs(MachineBasicBlock *MBB);
1111  
1112    bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices,
1113                   unsigned *ReplaceReg);
1114  
1115    bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1116                   MachineBasicBlock *SourceMBB,
1117                   SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg);
1118  
1119    void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1120                    MachineBasicBlock *LastMerge,
1121                    SmallVector<unsigned, 2> &PHIRegionIndices);
1122    void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1123                         MachineBasicBlock *IfMBB,
1124                         SmallVector<unsigned, 2> &PHIRegionIndices);
1125    void replaceLiveOutRegs(MachineInstr &PHI,
1126                            SmallVector<unsigned, 2> &PHIRegionIndices,
1127                            unsigned CombinedSourceReg,
1128                            LinearizedRegion *LRegion);
1129    void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge,
1130                              MachineInstr &PHI, LinearizedRegion *LRegion);
1131  
1132    void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge,
1133                               LinearizedRegion *LRegion);
1134    void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB,
1135                               MachineInstr &PHI);
1136    void rewriteRegionEntryPHIs(LinearizedRegion *Region,
1137                                MachineBasicBlock *IfMBB);
1138  
1139    bool regionIsSimpleIf(RegionMRT *Region);
1140  
1141    void transformSimpleIfRegion(RegionMRT *Region);
1142  
1143    void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II);
1144  
1145    void insertUnconditionalBranch(MachineBasicBlock *MBB,
1146                                   MachineBasicBlock *Dest,
1147                                   const DebugLoc &DL = DebugLoc());
1148  
1149    MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region);
1150  
1151    void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1152                        MachineBasicBlock *MergeBB, unsigned DestRegister,
1153                        unsigned IfSourceRegister, unsigned CodeSourceRegister,
1154                        bool IsUndefIfSource = false);
1155  
1156    MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB,
1157                                     MachineBasicBlock *CodeBBStart,
1158                                     MachineBasicBlock *CodeBBEnd,
1159                                     MachineBasicBlock *SelectBB, unsigned IfReg,
1160                                     bool InheritPreds);
1161  
1162    void prunePHIInfo(MachineBasicBlock *MBB);
1163    void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg);
1164  
1165    void createEntryPHIs(LinearizedRegion *CurrentRegion);
1166    void resolvePHIInfos(MachineBasicBlock *FunctionEntry);
1167  
1168    void replaceRegisterWith(unsigned Register, unsigned NewRegister);
1169  
1170    MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB,
1171                                      MachineBasicBlock *CodeBB,
1172                                      LinearizedRegion *LRegion,
1173                                      unsigned BBSelectRegIn,
1174                                      unsigned BBSelectRegOut);
1175  
1176    MachineBasicBlock *
1177    createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion,
1178                   LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB,
1179                   unsigned BBSelectRegIn, unsigned BBSelectRegOut);
1180    void ensureCondIsNotKilled(SmallVector<MachineOperand, 1> Cond);
1181  
1182    void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
1183                                 MachineBasicBlock *MergeBB,
1184                                 unsigned BBSelectReg);
1185  
1186    MachineInstr *getDefInstr(unsigned Reg);
1187    void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1188                          MachineBasicBlock *MergeBB,
1189                          LinearizedRegion *InnerRegion, unsigned DestReg,
1190                          unsigned SourceReg);
1191    bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion,
1192                     unsigned Register);
1193    void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1194                            MachineBasicBlock *MergeBB,
1195                            LinearizedRegion *InnerRegion,
1196                            LinearizedRegion *LRegion);
1197  
1198    void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry,
1199                      MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion);
1200    void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc,
1201                       LinearizedRegion *LRegion);
1202  
1203    MachineBasicBlock *splitExit(LinearizedRegion *LRegion);
1204  
1205    MachineBasicBlock *splitEntry(LinearizedRegion *LRegion);
1206  
1207    LinearizedRegion *initLinearizedRegion(RegionMRT *Region);
1208  
1209    bool structurizeComplexRegion(RegionMRT *Region);
1210  
1211    bool structurizeRegion(RegionMRT *Region);
1212  
1213    bool structurizeRegions(RegionMRT *Region, bool isTopRegion);
1214  
1215  public:
1216    static char ID;
1217  
1218    AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) {
1219      initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry());
1220    }
1221  
1222    void getAnalysisUsage(AnalysisUsage &AU) const override {
1223      AU.addRequired<MachineRegionInfoPass>();
1224      MachineFunctionPass::getAnalysisUsage(AU);
1225    }
1226  
1227    void initFallthroughMap(MachineFunction &MF);
1228  
1229    void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut);
1230  
1231    unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg,
1232                                       MachineRegisterInfo *MRI,
1233                                       const SIInstrInfo *TII);
1234  
1235    void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; }
1236  
1237    RegionMRT *getRegionMRT() { return RMRT; }
1238  
1239    bool runOnMachineFunction(MachineFunction &MF) override;
1240  };
1241  
1242  } // end anonymous namespace
1243  
1244  char AMDGPUMachineCFGStructurizer::ID = 0;
1245  
1246  bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) {
1247    MachineBasicBlock *Entry = Region->getEntry();
1248    MachineBasicBlock *Succ = Region->getSucc();
1249    bool FoundBypass = false;
1250    bool FoundIf = false;
1251  
1252    if (Entry->succ_size() != 2) {
1253      return false;
1254    }
1255  
1256    for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(),
1257                                                E = Entry->succ_end();
1258         SI != E; ++SI) {
1259      MachineBasicBlock *Current = *SI;
1260  
1261      if (Current == Succ) {
1262        FoundBypass = true;
1263      } else if ((Current->succ_size() == 1) &&
1264                 *(Current->succ_begin()) == Succ) {
1265        FoundIf = true;
1266      }
1267    }
1268  
1269    return FoundIf && FoundBypass;
1270  }
1271  
1272  void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) {
1273    MachineBasicBlock *Entry = Region->getEntry();
1274    MachineBasicBlock *Exit = Region->getExit();
1275    TII->convertNonUniformIfRegion(Entry, Exit);
1276  }
1277  
1278  static void fixMBBTerminator(MachineBasicBlock *MBB) {
1279    if (MBB->succ_size() == 1) {
1280      auto *Succ = *(MBB->succ_begin());
1281      for (auto &TI : MBB->terminators()) {
1282        for (auto &UI : TI.uses()) {
1283          if (UI.isMBB() && UI.getMBB() != Succ) {
1284            UI.setMBB(Succ);
1285          }
1286        }
1287      }
1288    }
1289  }
1290  
1291  static void fixRegionTerminator(RegionMRT *Region) {
1292    MachineBasicBlock *InternalSucc = nullptr;
1293    MachineBasicBlock *ExternalSucc = nullptr;
1294    LinearizedRegion *LRegion = Region->getLinearizedRegion();
1295    auto Exit = LRegion->getExit();
1296  
1297    SmallPtrSet<MachineBasicBlock *, 2> Successors;
1298    for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(),
1299                                                SE = Exit->succ_end();
1300         SI != SE; ++SI) {
1301      MachineBasicBlock *Succ = *SI;
1302      if (LRegion->contains(Succ)) {
1303        // Do not allow re-assign
1304        assert(InternalSucc == nullptr);
1305        InternalSucc = Succ;
1306      } else {
1307        // Do not allow re-assign
1308        assert(ExternalSucc == nullptr);
1309        ExternalSucc = Succ;
1310      }
1311    }
1312  
1313    for (auto &TI : Exit->terminators()) {
1314      for (auto &UI : TI.uses()) {
1315        if (UI.isMBB()) {
1316          auto Target = UI.getMBB();
1317          if (Target != InternalSucc && Target != ExternalSucc) {
1318            UI.setMBB(ExternalSucc);
1319          }
1320        }
1321      }
1322    }
1323  }
1324  
1325  // If a region region is just a sequence of regions (and the exit
1326  // block in the case of the top level region), we can simply skip
1327  // linearizing it, because it is already linear
1328  bool regionIsSequence(RegionMRT *Region) {
1329    auto Children = Region->getChildren();
1330    for (auto CI : *Children) {
1331      if (!CI->isRegion()) {
1332        if (CI->getMBBMRT()->getMBB()->succ_size() > 1) {
1333          return false;
1334        }
1335      }
1336    }
1337    return true;
1338  }
1339  
1340  void fixupRegionExits(RegionMRT *Region) {
1341    auto Children = Region->getChildren();
1342    for (auto CI : *Children) {
1343      if (!CI->isRegion()) {
1344        fixMBBTerminator(CI->getMBBMRT()->getMBB());
1345      } else {
1346        fixRegionTerminator(CI->getRegionMRT());
1347      }
1348    }
1349  }
1350  
1351  void AMDGPUMachineCFGStructurizer::getPHIRegionIndices(
1352      RegionMRT *Region, MachineInstr &PHI,
1353      SmallVector<unsigned, 2> &PHIRegionIndices) {
1354    unsigned NumInputs = getPHINumInputs(PHI);
1355    for (unsigned i = 0; i < NumInputs; ++i) {
1356      MachineBasicBlock *Pred = getPHIPred(PHI, i);
1357      if (Region->contains(Pred)) {
1358        PHIRegionIndices.push_back(i);
1359      }
1360    }
1361  }
1362  
1363  void AMDGPUMachineCFGStructurizer::getPHIRegionIndices(
1364      LinearizedRegion *Region, MachineInstr &PHI,
1365      SmallVector<unsigned, 2> &PHIRegionIndices) {
1366    unsigned NumInputs = getPHINumInputs(PHI);
1367    for (unsigned i = 0; i < NumInputs; ++i) {
1368      MachineBasicBlock *Pred = getPHIPred(PHI, i);
1369      if (Region->contains(Pred)) {
1370        PHIRegionIndices.push_back(i);
1371      }
1372    }
1373  }
1374  
1375  void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices(
1376      LinearizedRegion *Region, MachineInstr &PHI,
1377      SmallVector<unsigned, 2> &PHINonRegionIndices) {
1378    unsigned NumInputs = getPHINumInputs(PHI);
1379    for (unsigned i = 0; i < NumInputs; ++i) {
1380      MachineBasicBlock *Pred = getPHIPred(PHI, i);
1381      if (!Region->contains(Pred)) {
1382        PHINonRegionIndices.push_back(i);
1383      }
1384    }
1385  }
1386  
1387  void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest(
1388      unsigned LDestReg, MachineInstr &PHI,
1389      SmallVector<unsigned, 2> *RegionIndices) {
1390    if (RegionIndices) {
1391      for (auto i : *RegionIndices) {
1392        PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i));
1393      }
1394    } else {
1395      unsigned NumInputs = getPHINumInputs(PHI);
1396      for (unsigned i = 0; i < NumInputs; ++i) {
1397        PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i));
1398      }
1399    }
1400  }
1401  
1402  unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo(
1403      MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) {
1404    unsigned DestReg = getPHIDestReg(PHI);
1405    unsigned LinearizeDestReg =
1406        MRI->createVirtualRegister(MRI->getRegClass(DestReg));
1407    PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc());
1408    storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices);
1409    return LinearizeDestReg;
1410  }
1411  
1412  void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) {
1413    // We need to create a new chain for the killed phi, but there is no
1414    // need to do the renaming outside or inside the block.
1415    SmallPtrSet<MachineInstr *, 2> PHIs;
1416    for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
1417                                           E = MBB->instr_end();
1418         I != E; ++I) {
1419      MachineInstr &Instr = *I;
1420      if (Instr.isPHI()) {
1421        unsigned PHIDestReg = getPHIDestReg(Instr);
1422        LLVM_DEBUG(dbgs() << "Extractking killed phi:\n");
1423        LLVM_DEBUG(Instr.dump());
1424        PHIs.insert(&Instr);
1425        PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc());
1426        storePHILinearizationInfoDest(PHIDestReg, Instr);
1427      }
1428    }
1429  
1430    for (auto PI : PHIs) {
1431      PI->eraseFromParent();
1432    }
1433  }
1434  
1435  static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices,
1436                               unsigned Index) {
1437    for (auto i : PHIRegionIndices) {
1438      if (i == Index)
1439        return true;
1440    }
1441    return false;
1442  }
1443  
1444  bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
1445                                         SmallVector<unsigned, 2> &PHIIndices,
1446                                         unsigned *ReplaceReg) {
1447    return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg);
1448  }
1449  
1450  bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
1451                                         unsigned CombinedSourceReg,
1452                                         MachineBasicBlock *SourceMBB,
1453                                         SmallVector<unsigned, 2> &PHIIndices,
1454                                         unsigned *ReplaceReg) {
1455    LLVM_DEBUG(dbgs() << "Shrink PHI: ");
1456    LLVM_DEBUG(PHI.dump());
1457    LLVM_DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI)
1458                      << " = PHI(");
1459  
1460    bool Replaced = false;
1461    unsigned NumInputs = getPHINumInputs(PHI);
1462    int SingleExternalEntryIndex = -1;
1463    for (unsigned i = 0; i < NumInputs; ++i) {
1464      if (!isPHIRegionIndex(PHIIndices, i)) {
1465        if (SingleExternalEntryIndex == -1) {
1466          // Single entry
1467          SingleExternalEntryIndex = i;
1468        } else {
1469          // Multiple entries
1470          SingleExternalEntryIndex = -2;
1471        }
1472      }
1473    }
1474  
1475    if (SingleExternalEntryIndex > -1) {
1476      *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex);
1477      // We should not rewrite the code, we should only pick up the single value
1478      // that represents the shrunk PHI.
1479      Replaced = true;
1480    } else {
1481      MachineBasicBlock *MBB = PHI.getParent();
1482      MachineInstrBuilder MIB =
1483          BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1484                  getPHIDestReg(PHI));
1485      if (SourceMBB) {
1486        MIB.addReg(CombinedSourceReg);
1487        MIB.addMBB(SourceMBB);
1488        LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
1489                          << printMBBReference(*SourceMBB));
1490      }
1491  
1492      for (unsigned i = 0; i < NumInputs; ++i) {
1493        if (isPHIRegionIndex(PHIIndices, i)) {
1494          continue;
1495        }
1496        unsigned SourceReg = getPHISourceReg(PHI, i);
1497        MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1498        MIB.addReg(SourceReg);
1499        MIB.addMBB(SourcePred);
1500        LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
1501                          << printMBBReference(*SourcePred));
1502      }
1503      LLVM_DEBUG(dbgs() << ")\n");
1504    }
1505    PHI.eraseFromParent();
1506    return Replaced;
1507  }
1508  
1509  void AMDGPUMachineCFGStructurizer::replacePHI(
1510      MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge,
1511      SmallVector<unsigned, 2> &PHIRegionIndices) {
1512    LLVM_DEBUG(dbgs() << "Replace PHI: ");
1513    LLVM_DEBUG(PHI.dump());
1514    LLVM_DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI)
1515                      << " = PHI(");
1516  
1517    bool HasExternalEdge = false;
1518    unsigned NumInputs = getPHINumInputs(PHI);
1519    for (unsigned i = 0; i < NumInputs; ++i) {
1520      if (!isPHIRegionIndex(PHIRegionIndices, i)) {
1521        HasExternalEdge = true;
1522      }
1523    }
1524  
1525    if (HasExternalEdge) {
1526      MachineBasicBlock *MBB = PHI.getParent();
1527      MachineInstrBuilder MIB =
1528          BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1529                  getPHIDestReg(PHI));
1530      MIB.addReg(CombinedSourceReg);
1531      MIB.addMBB(LastMerge);
1532      LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
1533                        << printMBBReference(*LastMerge));
1534      for (unsigned i = 0; i < NumInputs; ++i) {
1535        if (isPHIRegionIndex(PHIRegionIndices, i)) {
1536          continue;
1537        }
1538        unsigned SourceReg = getPHISourceReg(PHI, i);
1539        MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1540        MIB.addReg(SourceReg);
1541        MIB.addMBB(SourcePred);
1542        LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
1543                          << printMBBReference(*SourcePred));
1544      }
1545      LLVM_DEBUG(dbgs() << ")\n");
1546    } else {
1547      replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg);
1548    }
1549    PHI.eraseFromParent();
1550  }
1551  
1552  void AMDGPUMachineCFGStructurizer::replaceEntryPHI(
1553      MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB,
1554      SmallVector<unsigned, 2> &PHIRegionIndices) {
1555    LLVM_DEBUG(dbgs() << "Replace entry PHI: ");
1556    LLVM_DEBUG(PHI.dump());
1557    LLVM_DEBUG(dbgs() << " with ");
1558  
1559    unsigned NumInputs = getPHINumInputs(PHI);
1560    unsigned NumNonRegionInputs = NumInputs;
1561    for (unsigned i = 0; i < NumInputs; ++i) {
1562      if (isPHIRegionIndex(PHIRegionIndices, i)) {
1563        NumNonRegionInputs--;
1564      }
1565    }
1566  
1567    if (NumNonRegionInputs == 0) {
1568      auto DestReg = getPHIDestReg(PHI);
1569      replaceRegisterWith(DestReg, CombinedSourceReg);
1570      LLVM_DEBUG(dbgs() << " register " << printReg(CombinedSourceReg, TRI)
1571                        << "\n");
1572      PHI.eraseFromParent();
1573    } else {
1574      LLVM_DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI(");
1575      MachineBasicBlock *MBB = PHI.getParent();
1576      MachineInstrBuilder MIB =
1577          BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1578                  getPHIDestReg(PHI));
1579      MIB.addReg(CombinedSourceReg);
1580      MIB.addMBB(IfMBB);
1581      LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
1582                        << printMBBReference(*IfMBB));
1583      unsigned NumInputs = getPHINumInputs(PHI);
1584      for (unsigned i = 0; i < NumInputs; ++i) {
1585        if (isPHIRegionIndex(PHIRegionIndices, i)) {
1586          continue;
1587        }
1588        unsigned SourceReg = getPHISourceReg(PHI, i);
1589        MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1590        MIB.addReg(SourceReg);
1591        MIB.addMBB(SourcePred);
1592        LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
1593                          << printMBBReference(*SourcePred));
1594      }
1595      LLVM_DEBUG(dbgs() << ")\n");
1596      PHI.eraseFromParent();
1597    }
1598  }
1599  
1600  void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs(
1601      MachineInstr &PHI, SmallVector<unsigned, 2> &PHIRegionIndices,
1602      unsigned CombinedSourceReg, LinearizedRegion *LRegion) {
1603    bool WasLiveOut = false;
1604    for (auto PII : PHIRegionIndices) {
1605      unsigned Reg = getPHISourceReg(PHI, PII);
1606      if (LRegion->isLiveOut(Reg)) {
1607        bool IsDead = true;
1608  
1609        // Check if register is live out of the basic block
1610        MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent();
1611        for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) {
1612          if ((*UI).getParent()->getParent() != DefMBB) {
1613            IsDead = false;
1614          }
1615        }
1616  
1617        LLVM_DEBUG(dbgs() << "Register " << printReg(Reg, TRI) << " is "
1618                          << (IsDead ? "dead" : "alive")
1619                          << " after PHI replace\n");
1620        if (IsDead) {
1621          LRegion->removeLiveOut(Reg);
1622        }
1623        WasLiveOut = true;
1624      }
1625    }
1626  
1627    if (WasLiveOut)
1628      LRegion->addLiveOut(CombinedSourceReg);
1629  }
1630  
1631  void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region,
1632                                                    MachineBasicBlock *LastMerge,
1633                                                    MachineInstr &PHI,
1634                                                    LinearizedRegion *LRegion) {
1635    SmallVector<unsigned, 2> PHIRegionIndices;
1636    getPHIRegionIndices(Region, PHI, PHIRegionIndices);
1637    unsigned LinearizedSourceReg =
1638        storePHILinearizationInfo(PHI, &PHIRegionIndices);
1639  
1640    replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices);
1641    replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion);
1642  }
1643  
1644  void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region,
1645                                                     MachineBasicBlock *IfMBB,
1646                                                     MachineInstr &PHI) {
1647    SmallVector<unsigned, 2> PHINonRegionIndices;
1648    getPHINonRegionIndices(Region, PHI, PHINonRegionIndices);
1649    unsigned LinearizedSourceReg =
1650        storePHILinearizationInfo(PHI, &PHINonRegionIndices);
1651    replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices);
1652  }
1653  
1654  static void collectPHIs(MachineBasicBlock *MBB,
1655                          SmallVector<MachineInstr *, 2> &PHIs) {
1656    for (auto &BBI : *MBB) {
1657      if (BBI.isPHI()) {
1658        PHIs.push_back(&BBI);
1659      }
1660    }
1661  }
1662  
1663  void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region,
1664                                                     MachineBasicBlock *LastMerge,
1665                                                     LinearizedRegion *LRegion) {
1666    SmallVector<MachineInstr *, 2> PHIs;
1667    auto Exit = Region->getSucc();
1668    if (Exit == nullptr)
1669      return;
1670  
1671    collectPHIs(Exit, PHIs);
1672  
1673    for (auto PHII : PHIs) {
1674      rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion);
1675    }
1676  }
1677  
1678  void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region,
1679                                                      MachineBasicBlock *IfMBB) {
1680    SmallVector<MachineInstr *, 2> PHIs;
1681    auto Entry = Region->getEntry();
1682  
1683    collectPHIs(Entry, PHIs);
1684  
1685    for (auto PHII : PHIs) {
1686      rewriteRegionEntryPHI(Region, IfMBB, *PHII);
1687    }
1688  }
1689  
1690  void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB,
1691                                                         MachineBasicBlock *Dest,
1692                                                         const DebugLoc &DL) {
1693    LLVM_DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber()
1694                      << " -> " << Dest->getNumber() << "\n");
1695    MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator();
1696    bool HasTerminator = Terminator != MBB->instr_end();
1697    if (HasTerminator) {
1698      TII->ReplaceTailWithBranchTo(Terminator, Dest);
1699    }
1700    if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) {
1701      TII->insertUnconditionalBranch(*MBB, Dest, DL);
1702    }
1703  }
1704  
1705  static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) {
1706    MachineBasicBlock *result = nullptr;
1707    for (auto &MFI : MF) {
1708      if (MFI.succ_size() == 0) {
1709        if (result == nullptr) {
1710          result = &MFI;
1711        } else {
1712          return nullptr;
1713        }
1714      }
1715    }
1716  
1717    return result;
1718  }
1719  
1720  static bool hasOneExitNode(MachineFunction &MF) {
1721    return getSingleExitNode(MF) != nullptr;
1722  }
1723  
1724  MachineBasicBlock *
1725  AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) {
1726    auto Exit = Region->getSucc();
1727  
1728    // If the exit is the end of the function, we just use the existing
1729    MachineFunction *MF = Region->getEntry()->getParent();
1730    if (Exit == nullptr && hasOneExitNode(*MF)) {
1731      return &(*(--(Region->getEntry()->getParent()->end())));
1732    }
1733  
1734    MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock();
1735    if (Exit == nullptr) {
1736      MachineFunction::iterator ExitIter = MF->end();
1737      MF->insert(ExitIter, LastMerge);
1738    } else {
1739      MachineFunction::iterator ExitIter = Exit->getIterator();
1740      MF->insert(ExitIter, LastMerge);
1741      LastMerge->addSuccessor(Exit);
1742      insertUnconditionalBranch(LastMerge, Exit);
1743      LLVM_DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber()
1744                        << "\n");
1745    }
1746    return LastMerge;
1747  }
1748  
1749  void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB,
1750                                              MachineBasicBlock *CodeBB,
1751                                              MachineBasicBlock *MergeBB,
1752                                              unsigned DestRegister,
1753                                              unsigned IfSourceRegister,
1754                                              unsigned CodeSourceRegister,
1755                                              bool IsUndefIfSource) {
1756    // If this is the function exit block, we don't need a phi.
1757    if (MergeBB->succ_begin() == MergeBB->succ_end()) {
1758      return;
1759    }
1760    LLVM_DEBUG(dbgs() << "Merge PHI (" << printMBBReference(*MergeBB)
1761                      << "): " << printReg(DestRegister, TRI) << " = PHI("
1762                      << printReg(IfSourceRegister, TRI) << ", "
1763                      << printMBBReference(*IfBB)
1764                      << printReg(CodeSourceRegister, TRI) << ", "
1765                      << printMBBReference(*CodeBB) << ")\n");
1766    const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin());
1767    MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL,
1768                                      TII->get(TargetOpcode::PHI), DestRegister);
1769    if (IsUndefIfSource && false) {
1770      MIB.addReg(IfSourceRegister, RegState::Undef);
1771    } else {
1772      MIB.addReg(IfSourceRegister);
1773    }
1774    MIB.addMBB(IfBB);
1775    MIB.addReg(CodeSourceRegister);
1776    MIB.addMBB(CodeBB);
1777  }
1778  
1779  static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) {
1780    for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(),
1781                                          E = MBB->succ_end();
1782         PI != E; ++PI) {
1783      if ((*PI) != MBB) {
1784        (MBB)->removeSuccessor(*PI);
1785      }
1786    }
1787  }
1788  
1789  static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
1790                                     MachineBasicBlock *EndMBB) {
1791  
1792    // We have to check against the StartMBB successor becasuse a
1793    // structurized region with a loop will have the entry block split,
1794    // and the backedge will go to the entry successor.
1795    DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs;
1796    unsigned SuccSize = StartMBB->succ_size();
1797    if (SuccSize > 0) {
1798      MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin());
1799      for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(),
1800                                            E = EndMBB->succ_end();
1801           PI != E; ++PI) {
1802        // Either we have a back-edge to the entry block, or a back-edge to the
1803        // successor of the entry block since the block may be split.
1804        if ((*PI) != StartMBB &&
1805            !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
1806          Succs.insert(
1807              std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI));
1808        }
1809      }
1810    }
1811  
1812    for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(),
1813                                          E = StartMBB->pred_end();
1814         PI != E; ++PI) {
1815      if ((*PI) != EndMBB) {
1816        Succs.insert(
1817            std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB));
1818      }
1819    }
1820  
1821    for (auto SI : Succs) {
1822      std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI;
1823      LLVM_DEBUG(dbgs() << "Removing edge: " << printMBBReference(*Edge.first)
1824                        << " -> " << printMBBReference(*Edge.second) << "\n");
1825      Edge.first->removeSuccessor(Edge.second);
1826    }
1827  }
1828  
1829  MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
1830      MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart,
1831      MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg,
1832      bool InheritPreds) {
1833    MachineFunction *MF = MergeBB->getParent();
1834    MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock();
1835  
1836    if (InheritPreds) {
1837      for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(),
1838                                            E = CodeBBStart->pred_end();
1839           PI != E; ++PI) {
1840        if ((*PI) != CodeBBEnd) {
1841          MachineBasicBlock *Pred = (*PI);
1842          Pred->addSuccessor(IfBB);
1843        }
1844      }
1845    }
1846  
1847    removeExternalCFGEdges(CodeBBStart, CodeBBEnd);
1848  
1849    auto CodeBBStartI = CodeBBStart->getIterator();
1850    auto CodeBBEndI = CodeBBEnd->getIterator();
1851    auto MergeIter = MergeBB->getIterator();
1852    MF->insert(MergeIter, IfBB);
1853    MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI);
1854    IfBB->addSuccessor(MergeBB);
1855    IfBB->addSuccessor(CodeBBStart);
1856  
1857    LLVM_DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n");
1858    // Ensure that the MergeBB is a successor of the CodeEndBB.
1859    if (!CodeBBEnd->isSuccessor(MergeBB))
1860      CodeBBEnd->addSuccessor(MergeBB);
1861  
1862    LLVM_DEBUG(dbgs() << "Moved " << printMBBReference(*CodeBBStart)
1863                      << " through " << printMBBReference(*CodeBBEnd) << "\n");
1864  
1865    // If we have a single predecessor we can find a reasonable debug location
1866    MachineBasicBlock *SinglePred =
1867        CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr;
1868    const DebugLoc &DL = SinglePred
1869                      ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator())
1870                      : DebugLoc();
1871  
1872    unsigned Reg =
1873        TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg,
1874                      SelectBB->getNumber() /* CodeBBStart->getNumber() */);
1875    if (&(*(IfBB->getParent()->begin())) == IfBB) {
1876      TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg,
1877                                CodeBBStart->getNumber());
1878    }
1879    MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
1880    ArrayRef<MachineOperand> Cond(RegOp);
1881    TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL);
1882  
1883    return IfBB;
1884  }
1885  
1886  void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled(
1887      SmallVector<MachineOperand, 1> Cond) {
1888    if (Cond.size() != 1)
1889      return;
1890    if (!Cond[0].isReg())
1891      return;
1892  
1893    unsigned CondReg = Cond[0].getReg();
1894    for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) {
1895      (*UI).setIsKill(false);
1896    }
1897  }
1898  
1899  void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
1900                                                       MachineBasicBlock *MergeBB,
1901                                                       unsigned BBSelectReg) {
1902    MachineBasicBlock *TrueBB = nullptr;
1903    MachineBasicBlock *FalseBB = nullptr;
1904    SmallVector<MachineOperand, 1> Cond;
1905    MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB];
1906    TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond);
1907  
1908    const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator());
1909  
1910    if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) {
1911      // This is an exit block, hence no successors. We will assign the
1912      // bb select register to the entry block.
1913      TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1914                                BBSelectReg,
1915                                CodeBB->getParent()->begin()->getNumber());
1916      insertUnconditionalBranch(CodeBB, MergeBB, DL);
1917      return;
1918    }
1919  
1920    if (FalseBB == nullptr && TrueBB == nullptr) {
1921      TrueBB = FallthroughBB;
1922    } else if (TrueBB != nullptr) {
1923      FalseBB =
1924          (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB;
1925    }
1926  
1927    if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) {
1928      TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1929                                BBSelectReg, TrueBB->getNumber());
1930    } else {
1931      const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg);
1932      unsigned TrueBBReg = MRI->createVirtualRegister(RegClass);
1933      unsigned FalseBBReg = MRI->createVirtualRegister(RegClass);
1934      TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1935                                TrueBBReg, TrueBB->getNumber());
1936      TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1937                                FalseBBReg, FalseBB->getNumber());
1938      ensureCondIsNotKilled(Cond);
1939      TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL,
1940                              BBSelectReg, Cond, TrueBBReg, FalseBBReg);
1941    }
1942  
1943    insertUnconditionalBranch(CodeBB, MergeBB, DL);
1944  }
1945  
1946  MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) {
1947    if (MRI->def_begin(Reg) == MRI->def_end()) {
1948      LLVM_DEBUG(dbgs() << "Register "
1949                        << printReg(Reg, MRI->getTargetRegisterInfo())
1950                        << " has NO defs\n");
1951    } else if (!MRI->hasOneDef(Reg)) {
1952      LLVM_DEBUG(dbgs() << "Register "
1953                        << printReg(Reg, MRI->getTargetRegisterInfo())
1954                        << " has multiple defs\n");
1955      LLVM_DEBUG(dbgs() << "DEFS BEGIN:\n");
1956      for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) {
1957        LLVM_DEBUG(DI->getParent()->dump());
1958      }
1959      LLVM_DEBUG(dbgs() << "DEFS END\n");
1960    }
1961  
1962    assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
1963    return (*(MRI->def_begin(Reg))).getParent();
1964  }
1965  
1966  void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB,
1967                                                MachineBasicBlock *CodeBB,
1968                                                MachineBasicBlock *MergeBB,
1969                                                LinearizedRegion *InnerRegion,
1970                                                unsigned DestReg,
1971                                                unsigned SourceReg) {
1972    // In this function we know we are part of a chain already, so we need
1973    // to add the registers to the existing chain, and rename the register
1974    // inside the region.
1975    bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit();
1976    MachineInstr *DefInstr = getDefInstr(SourceReg);
1977    if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) {
1978      // Handle the case where the def is a PHI-def inside a basic
1979      // block, then we only need to do renaming. Special care needs to
1980      // be taken if the PHI-def is part of an existing chain, or if a
1981      // new one needs to be created.
1982      InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI);
1983  
1984      // We collect all PHI Information, and if we are at the region entry,
1985      // all PHIs will be removed, and then re-introduced if needed.
1986      storePHILinearizationInfoDest(DestReg, *DefInstr);
1987      // We have picked up all the information we need now and can remove
1988      // the PHI
1989      PHIInfo.removeSource(DestReg, SourceReg, CodeBB);
1990      DefInstr->eraseFromParent();
1991    } else {
1992      // If this is not a phi-def, or it is a phi-def but from a linearized region
1993      if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) {
1994        // If this is a single BB and the definition is in this block we
1995        // need to replace any uses outside the region.
1996        InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI);
1997      }
1998      const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg);
1999      unsigned NextDestReg = MRI->createVirtualRegister(RegClass);
2000      bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1;
2001      LLVM_DEBUG(dbgs() << "Insert Chained PHI\n");
2002      insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg,
2003                     SourceReg, IsLastDef);
2004  
2005      PHIInfo.removeSource(DestReg, SourceReg, CodeBB);
2006      if (IsLastDef) {
2007        const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator());
2008        TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL,
2009                                  NextDestReg, 0);
2010        PHIInfo.deleteDef(DestReg);
2011      } else {
2012        PHIInfo.replaceDef(DestReg, NextDestReg);
2013      }
2014    }
2015  }
2016  
2017  bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB,
2018                                           LinearizedRegion *InnerRegion,
2019                                           unsigned Register) {
2020    return getDefInstr(Register)->getParent() == MBB ||
2021           InnerRegion->contains(getDefInstr(Register)->getParent());
2022  }
2023  
2024  void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
2025                                                  MachineBasicBlock *CodeBB,
2026                                                  MachineBasicBlock *MergeBB,
2027                                                  LinearizedRegion *InnerRegion,
2028                                                  LinearizedRegion *LRegion) {
2029    DenseSet<unsigned> *LiveOuts = InnerRegion->getLiveOuts();
2030    SmallVector<unsigned, 4> OldLiveOuts;
2031    bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit();
2032    for (auto OLI : *LiveOuts) {
2033      OldLiveOuts.push_back(OLI);
2034    }
2035  
2036    for (auto LI : OldLiveOuts) {
2037      LLVM_DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI));
2038      if (!containsDef(CodeBB, InnerRegion, LI) ||
2039          (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) {
2040        // If the register simly lives through the CodeBB, we don't have
2041        // to rewrite anything since the register is not defined in this
2042        // part of the code.
2043        LLVM_DEBUG(dbgs() << "- through");
2044        continue;
2045      }
2046      LLVM_DEBUG(dbgs() << "\n");
2047      unsigned Reg = LI;
2048      if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) {
2049        // If the register is live out, we do want to create a phi,
2050        // unless it is from the Exit block, becasuse in that case there
2051        // is already a PHI, and no need to create a new one.
2052  
2053        // If the register is just a live out def and not part of a phi
2054        // chain, we need to create a PHI node to handle the if region,
2055        // and replace all uses outside of the region with the new dest
2056        // register, unless it is the outgoing BB select register. We have
2057        // already creaed phi nodes for these.
2058        const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
2059        unsigned PHIDestReg = MRI->createVirtualRegister(RegClass);
2060        unsigned IfSourceReg = MRI->createVirtualRegister(RegClass);
2061        // Create initializer, this value is never used, but is needed
2062        // to satisfy SSA.
2063        LLVM_DEBUG(dbgs() << "Initializer for reg: " << printReg(Reg) << "\n");
2064        TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(),
2065                          IfSourceReg, 0);
2066  
2067        InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI);
2068        LLVM_DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n");
2069        insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg,
2070                       IfSourceReg, Reg, true);
2071      }
2072    }
2073  
2074    // Handle the chained definitions in PHIInfo, checking if this basic block
2075    // is a source block for a definition.
2076    SmallVector<unsigned, 4> Sources;
2077    if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) {
2078      LLVM_DEBUG(dbgs() << "Inserting PHI Live Out from "
2079                        << printMBBReference(*CodeBB) << "\n");
2080      for (auto SI : Sources) {
2081        unsigned DestReg;
2082        PHIInfo.findDest(SI, CodeBB, DestReg);
2083        insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI);
2084      }
2085      LLVM_DEBUG(dbgs() << "Insertion done.\n");
2086    }
2087  
2088    LLVM_DEBUG(PHIInfo.dump(MRI));
2089  }
2090  
2091  void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) {
2092    LLVM_DEBUG(dbgs() << "Before PHI Prune\n");
2093    LLVM_DEBUG(PHIInfo.dump(MRI));
2094    SmallVector<std::tuple<unsigned, unsigned, MachineBasicBlock *>, 4>
2095        ElimiatedSources;
2096    for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2097         ++DRI) {
2098  
2099      unsigned DestReg = *DRI;
2100      auto SE = PHIInfo.sources_end(DestReg);
2101  
2102      bool MBBContainsPHISource = false;
2103      // Check if there is a PHI source in this MBB
2104      for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2105        unsigned SourceReg = (*SRI).first;
2106        MachineOperand *Def = &(*(MRI->def_begin(SourceReg)));
2107        if (Def->getParent()->getParent() == MBB) {
2108          MBBContainsPHISource = true;
2109        }
2110      }
2111  
2112      // If so, all other sources are useless since we know this block
2113      // is always executed when the region is executed.
2114      if (MBBContainsPHISource) {
2115        for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2116          PHILinearize::PHISourceT Source = *SRI;
2117          unsigned SourceReg = Source.first;
2118          MachineBasicBlock *SourceMBB = Source.second;
2119          MachineOperand *Def = &(*(MRI->def_begin(SourceReg)));
2120          if (Def->getParent()->getParent() != MBB) {
2121            ElimiatedSources.push_back(
2122                std::make_tuple(DestReg, SourceReg, SourceMBB));
2123          }
2124        }
2125      }
2126    }
2127  
2128    // Remove the PHI sources that are in the given MBB
2129    for (auto &SourceInfo : ElimiatedSources) {
2130      PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo),
2131                           std::get<2>(SourceInfo));
2132    }
2133    LLVM_DEBUG(dbgs() << "After PHI Prune\n");
2134    LLVM_DEBUG(PHIInfo.dump(MRI));
2135  }
2136  
2137  void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion,
2138                                              unsigned DestReg) {
2139    MachineBasicBlock *Entry = CurrentRegion->getEntry();
2140    MachineBasicBlock *Exit = CurrentRegion->getExit();
2141  
2142    LLVM_DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() << " Pred: "
2143                      << (*(Entry->pred_begin()))->getNumber() << "\n");
2144  
2145    int NumSources = 0;
2146    auto SE = PHIInfo.sources_end(DestReg);
2147  
2148    for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2149      NumSources++;
2150    }
2151  
2152    if (NumSources == 1) {
2153      auto SRI = PHIInfo.sources_begin(DestReg);
2154      unsigned SourceReg = (*SRI).first;
2155      replaceRegisterWith(DestReg, SourceReg);
2156    } else {
2157      const DebugLoc &DL = Entry->findDebugLoc(Entry->begin());
2158      MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL,
2159                                        TII->get(TargetOpcode::PHI), DestReg);
2160      LLVM_DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI(");
2161  
2162      unsigned CurrentBackedgeReg = 0;
2163  
2164      for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2165        unsigned SourceReg = (*SRI).first;
2166  
2167        if (CurrentRegion->contains((*SRI).second)) {
2168          if (CurrentBackedgeReg == 0) {
2169            CurrentBackedgeReg = SourceReg;
2170          } else {
2171            MachineInstr *PHIDefInstr = getDefInstr(SourceReg);
2172            MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent();
2173            const TargetRegisterClass *RegClass =
2174                MRI->getRegClass(CurrentBackedgeReg);
2175            unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass);
2176            MachineInstrBuilder BackedgePHI =
2177                BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL,
2178                        TII->get(TargetOpcode::PHI), NewBackedgeReg);
2179            BackedgePHI.addReg(CurrentBackedgeReg);
2180            BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0));
2181            BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1));
2182            BackedgePHI.addMBB((*SRI).second);
2183            CurrentBackedgeReg = NewBackedgeReg;
2184            LLVM_DEBUG(dbgs()
2185                       << "Inserting backedge PHI: "
2186                       << printReg(NewBackedgeReg, TRI) << " = PHI("
2187                       << printReg(CurrentBackedgeReg, TRI) << ", "
2188                       << printMBBReference(*getPHIPred(*PHIDefInstr, 0)) << ", "
2189                       << printReg(getPHISourceReg(*PHIDefInstr, 1), TRI) << ", "
2190                       << printMBBReference(*(*SRI).second));
2191          }
2192        } else {
2193          MIB.addReg(SourceReg);
2194          MIB.addMBB((*SRI).second);
2195          LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
2196                            << printMBBReference(*(*SRI).second) << ", ");
2197        }
2198      }
2199  
2200      // Add the final backedge register source to the entry phi
2201      if (CurrentBackedgeReg != 0) {
2202        MIB.addReg(CurrentBackedgeReg);
2203        MIB.addMBB(Exit);
2204        LLVM_DEBUG(dbgs() << printReg(CurrentBackedgeReg, TRI) << ", "
2205                          << printMBBReference(*Exit) << ")\n");
2206      } else {
2207        LLVM_DEBUG(dbgs() << ")\n");
2208      }
2209    }
2210  }
2211  
2212  void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) {
2213    LLVM_DEBUG(PHIInfo.dump(MRI));
2214  
2215    for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2216         ++DRI) {
2217  
2218      unsigned DestReg = *DRI;
2219      createEntryPHI(CurrentRegion, DestReg);
2220    }
2221    PHIInfo.clear();
2222  }
2223  
2224  void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register,
2225                                                   unsigned NewRegister) {
2226    assert(Register != NewRegister && "Cannot replace a reg with itself");
2227  
2228    for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
2229                                           E = MRI->reg_end();
2230         I != E;) {
2231      MachineOperand &O = *I;
2232      ++I;
2233      if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
2234        LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
2235                          << printReg(NewRegister, MRI->getTargetRegisterInfo())
2236                          << "\n");
2237        llvm_unreachable("Cannot substitute physical registers");
2238        // We don't handle physical registers, but if we need to
2239        // in the future This is how we do it:
2240        // O.substPhysReg(NewRegister, *TRI);
2241      } else {
2242        LLVM_DEBUG(dbgs() << "Replacing register: "
2243                          << printReg(Register, MRI->getTargetRegisterInfo())
2244                          << " with "
2245                          << printReg(NewRegister, MRI->getTargetRegisterInfo())
2246                          << "\n");
2247        O.setReg(NewRegister);
2248      }
2249    }
2250    PHIInfo.deleteDef(Register);
2251  
2252    getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
2253  
2254    LLVM_DEBUG(PHIInfo.dump(MRI));
2255  }
2256  
2257  void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) {
2258    LLVM_DEBUG(dbgs() << "Resolve PHI Infos\n");
2259    LLVM_DEBUG(PHIInfo.dump(MRI));
2260    for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2261         ++DRI) {
2262      unsigned DestReg = *DRI;
2263      LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI) << "\n");
2264      auto SRI = PHIInfo.sources_begin(DestReg);
2265      unsigned SourceReg = (*SRI).first;
2266      LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI)
2267                        << " SourceReg: " << printReg(SourceReg, TRI) << "\n");
2268  
2269      assert(PHIInfo.sources_end(DestReg) == ++SRI &&
2270             "More than one phi source in entry node");
2271      replaceRegisterWith(DestReg, SourceReg);
2272    }
2273  }
2274  
2275  static bool isFunctionEntryBlock(MachineBasicBlock *MBB) {
2276    return ((&(*(MBB->getParent()->begin()))) == MBB);
2277  }
2278  
2279  MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
2280      MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB,
2281      LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn,
2282      unsigned BBSelectRegOut) {
2283    if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) {
2284      // Handle non-loop function entry block.
2285      // We need to allow loops to the entry block and then
2286      rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut);
2287      resolvePHIInfos(CodeBB);
2288      removeExternalCFGSuccessors(CodeBB);
2289      CodeBB->addSuccessor(MergeBB);
2290      CurrentRegion->addMBB(CodeBB);
2291      return nullptr;
2292    }
2293    if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) {
2294      // Handle non-loop region entry block.
2295      MachineFunction *MF = MergeBB->getParent();
2296      auto MergeIter = MergeBB->getIterator();
2297      auto CodeBBStartIter = CodeBB->getIterator();
2298      auto CodeBBEndIter = ++(CodeBB->getIterator());
2299      if (CodeBBEndIter != MergeIter) {
2300        MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter);
2301      }
2302      rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut);
2303      prunePHIInfo(CodeBB);
2304      createEntryPHIs(CurrentRegion);
2305      removeExternalCFGSuccessors(CodeBB);
2306      CodeBB->addSuccessor(MergeBB);
2307      CurrentRegion->addMBB(CodeBB);
2308      return nullptr;
2309    } else {
2310      // Handle internal block.
2311      const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn);
2312      unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass);
2313      rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg);
2314      bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB;
2315      MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB,
2316                                              BBSelectRegIn, IsRegionEntryBB);
2317      CurrentRegion->addMBB(IfBB);
2318      // If this is the entry block we need to make the If block the new
2319      // linearized region entry.
2320      if (IsRegionEntryBB) {
2321        CurrentRegion->setEntry(IfBB);
2322  
2323        if (CurrentRegion->getHasLoop()) {
2324          MachineBasicBlock *RegionExit = CurrentRegion->getExit();
2325          MachineBasicBlock *ETrueBB = nullptr;
2326          MachineBasicBlock *EFalseBB = nullptr;
2327          SmallVector<MachineOperand, 1> ECond;
2328  
2329          const DebugLoc &DL = DebugLoc();
2330          TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond);
2331          TII->removeBranch(*RegionExit);
2332  
2333          // We need to create a backedge if there is a loop
2334          unsigned Reg = TII->insertNE(
2335              RegionExit, RegionExit->instr_end(), DL,
2336              CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
2337              CurrentRegion->getRegionMRT()->getEntry()->getNumber());
2338          MachineOperand RegOp =
2339              MachineOperand::CreateReg(Reg, false, false, true);
2340          ArrayRef<MachineOperand> Cond(RegOp);
2341          LLVM_DEBUG(dbgs() << "RegionExitReg: ");
2342          LLVM_DEBUG(Cond[0].print(dbgs(), TRI));
2343          LLVM_DEBUG(dbgs() << "\n");
2344          TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
2345                            Cond, DebugLoc());
2346          RegionExit->addSuccessor(CurrentRegion->getEntry());
2347        }
2348      }
2349      CurrentRegion->addMBB(CodeBB);
2350      LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo);
2351  
2352      InnerRegion.setParent(CurrentRegion);
2353      LLVM_DEBUG(dbgs() << "Insert BB Select PHI (BB)\n");
2354      insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
2355                     CodeBBSelectReg);
2356      InnerRegion.addMBB(MergeBB);
2357  
2358      LLVM_DEBUG(InnerRegion.print(dbgs(), TRI));
2359      rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion);
2360      extractKilledPHIs(CodeBB);
2361      if (IsRegionEntryBB) {
2362        createEntryPHIs(CurrentRegion);
2363      }
2364      return IfBB;
2365    }
2366  }
2367  
2368  MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
2369      MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion,
2370      LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB,
2371      unsigned BBSelectRegIn, unsigned BBSelectRegOut) {
2372    unsigned CodeBBSelectReg =
2373        InnerRegion->getRegionMRT()->getInnerOutputRegister();
2374    MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry();
2375    MachineBasicBlock *CodeExitBB = InnerRegion->getExit();
2376    MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB,
2377                                            SelectBB, BBSelectRegIn, true);
2378    CurrentRegion->addMBB(IfBB);
2379    bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry();
2380    if (isEntry) {
2381  
2382      if (CurrentRegion->getHasLoop()) {
2383        MachineBasicBlock *RegionExit = CurrentRegion->getExit();
2384        MachineBasicBlock *ETrueBB = nullptr;
2385        MachineBasicBlock *EFalseBB = nullptr;
2386        SmallVector<MachineOperand, 1> ECond;
2387  
2388        const DebugLoc &DL = DebugLoc();
2389        TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond);
2390        TII->removeBranch(*RegionExit);
2391  
2392        // We need to create a backedge if there is a loop
2393        unsigned Reg =
2394            TII->insertNE(RegionExit, RegionExit->instr_end(), DL,
2395                          CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
2396                          CurrentRegion->getRegionMRT()->getEntry()->getNumber());
2397        MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
2398        ArrayRef<MachineOperand> Cond(RegOp);
2399        LLVM_DEBUG(dbgs() << "RegionExitReg: ");
2400        LLVM_DEBUG(Cond[0].print(dbgs(), TRI));
2401        LLVM_DEBUG(dbgs() << "\n");
2402        TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
2403                          Cond, DebugLoc());
2404        RegionExit->addSuccessor(IfBB);
2405      }
2406    }
2407    CurrentRegion->addMBBs(InnerRegion);
2408    LLVM_DEBUG(dbgs() << "Insert BB Select PHI (region)\n");
2409    insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
2410                   CodeBBSelectReg);
2411  
2412    rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion,
2413                       CurrentRegion);
2414  
2415    rewriteRegionEntryPHIs(InnerRegion, IfBB);
2416  
2417    if (isEntry) {
2418      CurrentRegion->setEntry(IfBB);
2419    }
2420  
2421    if (isEntry) {
2422      createEntryPHIs(CurrentRegion);
2423    }
2424  
2425    return IfBB;
2426  }
2427  
2428  void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI,
2429                                            MachineBasicBlock *Entry,
2430                                            MachineBasicBlock *EntrySucc,
2431                                            LinearizedRegion *LRegion) {
2432    SmallVector<unsigned, 2> PHIRegionIndices;
2433    getPHIRegionIndices(LRegion, PHI, PHIRegionIndices);
2434  
2435    assert(PHIRegionIndices.size() == 1);
2436  
2437    unsigned RegionIndex = PHIRegionIndices[0];
2438    unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex);
2439    MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex);
2440    unsigned PHIDest = getPHIDestReg(PHI);
2441    unsigned PHISource = PHIDest;
2442    unsigned ReplaceReg;
2443  
2444    if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) {
2445      PHISource = ReplaceReg;
2446    }
2447  
2448    const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest);
2449    unsigned NewDestReg = MRI->createVirtualRegister(RegClass);
2450    LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI);
2451    MachineInstrBuilder MIB =
2452        BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(),
2453                TII->get(TargetOpcode::PHI), NewDestReg);
2454    LLVM_DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI)
2455                      << " = PHI(");
2456    MIB.addReg(PHISource);
2457    MIB.addMBB(Entry);
2458    LLVM_DEBUG(dbgs() << printReg(PHISource, TRI) << ", "
2459                      << printMBBReference(*Entry));
2460    MIB.addReg(RegionSourceReg);
2461    MIB.addMBB(RegionSourceMBB);
2462    LLVM_DEBUG(dbgs() << " ," << printReg(RegionSourceReg, TRI) << ", "
2463                      << printMBBReference(*RegionSourceMBB) << ")\n");
2464  }
2465  
2466  void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry,
2467                                             MachineBasicBlock *EntrySucc,
2468                                             LinearizedRegion *LRegion) {
2469    SmallVector<MachineInstr *, 2> PHIs;
2470    collectPHIs(Entry, PHIs);
2471  
2472    for (auto PHII : PHIs) {
2473      splitLoopPHI(*PHII, Entry, EntrySucc, LRegion);
2474    }
2475  }
2476  
2477  // Split the exit block so that we can insert a end control flow
2478  MachineBasicBlock *
2479  AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) {
2480    auto MRTRegion = LRegion->getRegionMRT();
2481    auto Exit = LRegion->getExit();
2482    auto MF = Exit->getParent();
2483    auto Succ = MRTRegion->getSucc();
2484  
2485    auto NewExit = MF->CreateMachineBasicBlock();
2486    auto AfterExitIter = Exit->getIterator();
2487    AfterExitIter++;
2488    MF->insert(AfterExitIter, NewExit);
2489    Exit->removeSuccessor(Succ);
2490    Exit->addSuccessor(NewExit);
2491    NewExit->addSuccessor(Succ);
2492    insertUnconditionalBranch(NewExit, Succ);
2493    LRegion->addMBB(NewExit);
2494    LRegion->setExit(NewExit);
2495  
2496    LLVM_DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber()
2497                      << "\n");
2498  
2499    // Replace any PHI Predecessors in the successor with NewExit
2500    for (auto &II : *Succ) {
2501      MachineInstr &Instr = II;
2502  
2503      // If we are past the PHI instructions we are done
2504      if (!Instr.isPHI())
2505        break;
2506  
2507      int numPreds = getPHINumInputs(Instr);
2508      for (int i = 0; i < numPreds; ++i) {
2509        auto Pred = getPHIPred(Instr, i);
2510        if (Pred == Exit) {
2511          setPhiPred(Instr, i, NewExit);
2512        }
2513      }
2514    }
2515  
2516    return NewExit;
2517  }
2518  
2519  static MachineBasicBlock *split(MachineBasicBlock::iterator I) {
2520    // Create the fall-through block.
2521    MachineBasicBlock *MBB = (*I).getParent();
2522    MachineFunction *MF = MBB->getParent();
2523    MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock();
2524    auto MBBIter = ++(MBB->getIterator());
2525    MF->insert(MBBIter, SuccMBB);
2526    SuccMBB->transferSuccessorsAndUpdatePHIs(MBB);
2527    MBB->addSuccessor(SuccMBB);
2528  
2529    // Splice the code over.
2530    SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end());
2531  
2532    return SuccMBB;
2533  }
2534  
2535  // Split the entry block separating PHI-nodes and the rest of the code
2536  // This is needed to insert an initializer for the bb select register
2537  // inloop regions.
2538  
2539  MachineBasicBlock *
2540  AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) {
2541    MachineBasicBlock *Entry = LRegion->getEntry();
2542    MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI());
2543    MachineBasicBlock *Exit = LRegion->getExit();
2544  
2545    LLVM_DEBUG(dbgs() << "Split " << printMBBReference(*Entry) << " to "
2546                      << printMBBReference(*Entry) << " -> "
2547                      << printMBBReference(*EntrySucc) << "\n");
2548    LRegion->addMBB(EntrySucc);
2549  
2550    // Make the backedge go to Entry Succ
2551    if (Exit->isSuccessor(Entry)) {
2552      Exit->removeSuccessor(Entry);
2553    }
2554    Exit->addSuccessor(EntrySucc);
2555    MachineInstr &Branch = *(Exit->instr_rbegin());
2556    for (auto &UI : Branch.uses()) {
2557      if (UI.isMBB() && UI.getMBB() == Entry) {
2558        UI.setMBB(EntrySucc);
2559      }
2560    }
2561  
2562    splitLoopPHIs(Entry, EntrySucc, LRegion);
2563  
2564    return EntrySucc;
2565  }
2566  
2567  LinearizedRegion *
2568  AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) {
2569    LinearizedRegion *LRegion = Region->getLinearizedRegion();
2570    LRegion->initLiveOut(Region, MRI, TRI, PHIInfo);
2571    LRegion->setEntry(Region->getEntry());
2572    return LRegion;
2573  }
2574  
2575  static void removeOldExitPreds(RegionMRT *Region) {
2576    MachineBasicBlock *Exit = Region->getSucc();
2577    if (Exit == nullptr) {
2578      return;
2579    }
2580    for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(),
2581                                          E = Exit->pred_end();
2582         PI != E; ++PI) {
2583      if (Region->contains(*PI)) {
2584        (*PI)->removeSuccessor(Exit);
2585      }
2586    }
2587  }
2588  
2589  static bool mbbHasBackEdge(MachineBasicBlock *MBB,
2590                             SmallPtrSet<MachineBasicBlock *, 8> &MBBs) {
2591    for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
2592      if (MBBs.count(*SI) != 0) {
2593        return true;
2594      }
2595    }
2596    return false;
2597  }
2598  
2599  static bool containsNewBackedge(MRT *Tree,
2600                                  SmallPtrSet<MachineBasicBlock *, 8> &MBBs) {
2601    // Need to traverse this in reverse since it is in post order.
2602    if (Tree == nullptr)
2603      return false;
2604  
2605    if (Tree->isMBB()) {
2606      MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB();
2607      MBBs.insert(MBB);
2608      if (mbbHasBackEdge(MBB, MBBs)) {
2609        return true;
2610      }
2611    } else {
2612      RegionMRT *Region = Tree->getRegionMRT();
2613      SetVector<MRT *> *Children = Region->getChildren();
2614      for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) {
2615        if (containsNewBackedge(*CI, MBBs))
2616          return true;
2617      }
2618    }
2619    return false;
2620  }
2621  
2622  static bool containsNewBackedge(RegionMRT *Region) {
2623    SmallPtrSet<MachineBasicBlock *, 8> MBBs;
2624    return containsNewBackedge(Region, MBBs);
2625  }
2626  
2627  bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
2628    auto *LRegion = initLinearizedRegion(Region);
2629    LRegion->setHasLoop(containsNewBackedge(Region));
2630    MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region);
2631    MachineBasicBlock *CurrentMerge = LastMerge;
2632    LRegion->addMBB(LastMerge);
2633    LRegion->setExit(LastMerge);
2634  
2635    rewriteRegionExitPHIs(Region, LastMerge, LRegion);
2636    removeOldExitPreds(Region);
2637  
2638    LLVM_DEBUG(PHIInfo.dump(MRI));
2639  
2640    SetVector<MRT *> *Children = Region->getChildren();
2641    LLVM_DEBUG(dbgs() << "===========If Region Start===============\n");
2642    if (LRegion->getHasLoop()) {
2643      LLVM_DEBUG(dbgs() << "Has Backedge: Yes\n");
2644    } else {
2645      LLVM_DEBUG(dbgs() << "Has Backedge: No\n");
2646    }
2647  
2648    unsigned BBSelectRegIn;
2649    unsigned BBSelectRegOut;
2650    for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) {
2651      LLVM_DEBUG(dbgs() << "CurrentRegion: \n");
2652      LLVM_DEBUG(LRegion->print(dbgs(), TRI));
2653  
2654      auto CNI = CI;
2655      ++CNI;
2656  
2657      MRT *Child = (*CI);
2658  
2659      if (Child->isRegion()) {
2660  
2661        LinearizedRegion *InnerLRegion =
2662            Child->getRegionMRT()->getLinearizedRegion();
2663        // We found the block is the exit of an inner region, we need
2664        // to put it in the current linearized region.
2665  
2666        LLVM_DEBUG(dbgs() << "Linearizing region: ");
2667        LLVM_DEBUG(InnerLRegion->print(dbgs(), TRI));
2668        LLVM_DEBUG(dbgs() << "\n");
2669  
2670        MachineBasicBlock *InnerEntry = InnerLRegion->getEntry();
2671        if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) {
2672          // Entry has already been linearized, no need to do this region.
2673          unsigned OuterSelect = InnerLRegion->getBBSelectRegOut();
2674          unsigned InnerSelectReg =
2675              InnerLRegion->getRegionMRT()->getInnerOutputRegister();
2676          replaceRegisterWith(InnerSelectReg, OuterSelect),
2677              resolvePHIInfos(InnerEntry);
2678          if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge))
2679            InnerLRegion->getExit()->addSuccessor(CurrentMerge);
2680          continue;
2681        }
2682  
2683        BBSelectRegOut = Child->getBBSelectRegOut();
2684        BBSelectRegIn = Child->getBBSelectRegIn();
2685  
2686        LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI)
2687                          << "\n");
2688        LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI)
2689                          << "\n");
2690  
2691        MachineBasicBlock *IfEnd = CurrentMerge;
2692        CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion,
2693                                      Child->getRegionMRT()->getEntry(),
2694                                      BBSelectRegIn, BBSelectRegOut);
2695        TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
2696      } else {
2697        MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB();
2698        LLVM_DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n");
2699  
2700        if (MBB == getSingleExitNode(*(MBB->getParent()))) {
2701          // If this is the exit block then we need to skip to the next.
2702          // The "in" register will be transferred to "out" in the next
2703          // iteration.
2704          continue;
2705        }
2706  
2707        BBSelectRegOut = Child->getBBSelectRegOut();
2708        BBSelectRegIn = Child->getBBSelectRegIn();
2709  
2710        LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI)
2711                          << "\n");
2712        LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI)
2713                          << "\n");
2714  
2715        MachineBasicBlock *IfEnd = CurrentMerge;
2716        // This is a basic block that is not part of an inner region, we
2717        // need to put it in the current linearized region.
2718        CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn,
2719                                      BBSelectRegOut);
2720        if (CurrentMerge) {
2721          TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
2722        }
2723  
2724        LLVM_DEBUG(PHIInfo.dump(MRI));
2725      }
2726    }
2727  
2728    LRegion->removeFalseRegisterKills(MRI);
2729  
2730    if (LRegion->getHasLoop()) {
2731      MachineBasicBlock *NewSucc = splitEntry(LRegion);
2732      if (isFunctionEntryBlock(LRegion->getEntry())) {
2733        resolvePHIInfos(LRegion->getEntry());
2734      }
2735      const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI());
2736      unsigned InReg = LRegion->getBBSelectRegIn();
2737      unsigned InnerSelectReg =
2738          MRI->createVirtualRegister(MRI->getRegClass(InReg));
2739      unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg));
2740      TII->materializeImmediate(*(LRegion->getEntry()),
2741                                LRegion->getEntry()->getFirstTerminator(), DL,
2742                                NewInReg, Region->getEntry()->getNumber());
2743      // Need to be careful about updating the registers inside the region.
2744      LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI);
2745      LLVM_DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n");
2746      insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc,
2747                     InnerSelectReg, NewInReg,
2748                     LRegion->getRegionMRT()->getInnerOutputRegister());
2749      splitExit(LRegion);
2750      TII->convertNonUniformLoopRegion(NewSucc, LastMerge);
2751    }
2752  
2753    if (Region->isRoot()) {
2754      TII->insertReturn(*LastMerge);
2755    }
2756  
2757    LLVM_DEBUG(Region->getEntry()->getParent()->dump());
2758    LLVM_DEBUG(LRegion->print(dbgs(), TRI));
2759    LLVM_DEBUG(PHIInfo.dump(MRI));
2760  
2761    LLVM_DEBUG(dbgs() << "===========If Region End===============\n");
2762  
2763    Region->setLinearizedRegion(LRegion);
2764    return true;
2765  }
2766  
2767  bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) {
2768    if (false && regionIsSimpleIf(Region)) {
2769      transformSimpleIfRegion(Region);
2770      return true;
2771    } else if (regionIsSequence(Region)) {
2772      fixupRegionExits(Region);
2773      return false;
2774    } else {
2775      structurizeComplexRegion(Region);
2776    }
2777    return false;
2778  }
2779  
2780  static int structurize_once = 0;
2781  
2782  bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region,
2783                                                  bool isTopRegion) {
2784    bool Changed = false;
2785  
2786    auto Children = Region->getChildren();
2787    for (auto CI : *Children) {
2788      if (CI->isRegion()) {
2789        Changed |= structurizeRegions(CI->getRegionMRT(), false);
2790      }
2791    }
2792  
2793    if (structurize_once < 2 || true) {
2794      Changed |= structurizeRegion(Region);
2795      structurize_once++;
2796    }
2797    return Changed;
2798  }
2799  
2800  void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) {
2801    LLVM_DEBUG(dbgs() << "Fallthrough Map:\n");
2802    for (auto &MBBI : MF) {
2803      MachineBasicBlock *MBB = MBBI.getFallThrough();
2804      if (MBB != nullptr) {
2805        LLVM_DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> "
2806                          << MBB->getNumber() << "\n");
2807      }
2808      FallthroughMap[&MBBI] = MBB;
2809    }
2810  }
2811  
2812  void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region,
2813                                                      unsigned SelectOut) {
2814    LinearizedRegion *LRegion = new LinearizedRegion();
2815    if (SelectOut) {
2816      LRegion->addLiveOut(SelectOut);
2817      LLVM_DEBUG(dbgs() << "Add LiveOut (BBSelect): " << printReg(SelectOut, TRI)
2818                        << "\n");
2819    }
2820    LRegion->setRegionMRT(Region);
2821    Region->setLinearizedRegion(LRegion);
2822    LRegion->setParent(Region->getParent()
2823                           ? Region->getParent()->getLinearizedRegion()
2824                           : nullptr);
2825  }
2826  
2827  unsigned
2828  AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut,
2829                                                    MachineRegisterInfo *MRI,
2830                                                    const SIInstrInfo *TII) {
2831    if (MRT->isRegion()) {
2832      RegionMRT *Region = MRT->getRegionMRT();
2833      Region->setBBSelectRegOut(SelectOut);
2834      unsigned InnerSelectOut = createBBSelectReg(TII, MRI);
2835  
2836      // Fixme: Move linearization creation to the original spot
2837      createLinearizedRegion(Region, SelectOut);
2838  
2839      for (auto CI = Region->getChildren()->begin(),
2840                CE = Region->getChildren()->end();
2841           CI != CE; ++CI) {
2842        InnerSelectOut =
2843            initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII);
2844      }
2845      MRT->setBBSelectRegIn(InnerSelectOut);
2846      return InnerSelectOut;
2847    } else {
2848      MRT->setBBSelectRegOut(SelectOut);
2849      unsigned NewSelectIn = createBBSelectReg(TII, MRI);
2850      MRT->setBBSelectRegIn(NewSelectIn);
2851      return NewSelectIn;
2852    }
2853  }
2854  
2855  static void checkRegOnlyPHIInputs(MachineFunction &MF) {
2856    for (auto &MBBI : MF) {
2857      for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(),
2858                                             E = MBBI.instr_end();
2859           I != E; ++I) {
2860        MachineInstr &Instr = *I;
2861        if (Instr.isPHI()) {
2862          int numPreds = getPHINumInputs(Instr);
2863          for (int i = 0; i < numPreds; ++i) {
2864            assert(Instr.getOperand(i * 2 + 1).isReg() &&
2865                   "PHI Operand not a register");
2866          }
2867        }
2868      }
2869    }
2870  }
2871  
2872  bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) {
2873    const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2874    const SIInstrInfo *TII = ST.getInstrInfo();
2875    TRI = ST.getRegisterInfo();
2876    MRI = &(MF.getRegInfo());
2877    initFallthroughMap(MF);
2878  
2879    checkRegOnlyPHIInputs(MF);
2880    LLVM_DEBUG(dbgs() << "----STRUCTURIZER START----\n");
2881    LLVM_DEBUG(MF.dump());
2882  
2883    Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo());
2884    LLVM_DEBUG(Regions->dump());
2885  
2886    RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI);
2887    setRegionMRT(RTree);
2888    initializeSelectRegisters(RTree, 0, MRI, TII);
2889    LLVM_DEBUG(RTree->dump(TRI));
2890    bool result = structurizeRegions(RTree, true);
2891    delete RTree;
2892    LLVM_DEBUG(dbgs() << "----STRUCTURIZER END----\n");
2893    initFallthroughMap(MF);
2894    return result;
2895  }
2896  
2897  char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID;
2898  
2899  INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
2900                        "AMDGPU Machine CFG Structurizer", false, false)
2901  INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass)
2902  INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
2903                      "AMDGPU Machine CFG Structurizer", false, false)
2904  
2905  FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() {
2906    return new AMDGPUMachineCFGStructurizer();
2907  }
2908