xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1  //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // Transform each threading path to effectively jump thread the DFA. For
10  // example, the CFG below could be transformed as follows, where the cloned
11  // blocks unconditionally branch to the next correct case based on what is
12  // identified in the analysis.
13  //
14  //          sw.bb                        sw.bb
15  //        /   |   \                    /   |   \
16  //   case1  case2  case3          case1  case2  case3
17  //        \   |   /                 |      |      |
18  //       determinator            det.2   det.3  det.1
19  //        br sw.bb                /        |        \
20  //                          sw.bb.2     sw.bb.3     sw.bb.1
21  //                           br case2    br case3    br case1ยง
22  //
23  // Definitions and Terminology:
24  //
25  // * Threading path:
26  //   a list of basic blocks, the exit state, and the block that determines
27  //   the next state, for which the following notation will be used:
28  //   < path of BBs that form a cycle > [ state, determinator ]
29  //
30  // * Predictable switch:
31  //   The switch variable is always a known constant so that all conditional
32  //   jumps based on switch variable can be converted to unconditional jump.
33  //
34  // * Determinator:
35  //   The basic block that determines the next state of the DFA.
36  //
37  // Representing the optimization in C-like pseudocode: the code pattern on the
38  // left could functionally be transformed to the right pattern if the switch
39  // condition is predictable.
40  //
41  //  X = A                       goto A
42  //  for (...)                   A:
43  //    switch (X)                  ...
44  //      case A                    goto B
45  //        X = B                 B:
46  //      case B                    ...
47  //        X = C                   goto C
48  //
49  // The pass first checks that switch variable X is decided by the control flow
50  // path taken in the loop; for example, in case B, the next value of X is
51  // decided to be C. It then enumerates through all paths in the loop and labels
52  // the basic blocks where the next state is decided.
53  //
54  // Using this information it creates new paths that unconditionally branch to
55  // the next case. This involves cloning code, so it only gets triggered if the
56  // amount of code duplicated is below a threshold.
57  //
58  //===----------------------------------------------------------------------===//
59  
60  #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
61  #include "llvm/ADT/APInt.h"
62  #include "llvm/ADT/DenseMap.h"
63  #include "llvm/ADT/SmallSet.h"
64  #include "llvm/ADT/Statistic.h"
65  #include "llvm/Analysis/AssumptionCache.h"
66  #include "llvm/Analysis/CodeMetrics.h"
67  #include "llvm/Analysis/DomTreeUpdater.h"
68  #include "llvm/Analysis/LoopInfo.h"
69  #include "llvm/Analysis/OptimizationRemarkEmitter.h"
70  #include "llvm/Analysis/TargetTransformInfo.h"
71  #include "llvm/IR/CFG.h"
72  #include "llvm/IR/Constants.h"
73  #include "llvm/IR/IntrinsicInst.h"
74  #include "llvm/Support/CommandLine.h"
75  #include "llvm/Support/Debug.h"
76  #include "llvm/Transforms/Utils/Cloning.h"
77  #include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
78  #include "llvm/Transforms/Utils/ValueMapper.h"
79  #include <algorithm>
80  #include <deque>
81  
82  #ifdef EXPENSIVE_CHECKS
83  #include "llvm/IR/Verifier.h"
84  #endif
85  
86  using namespace llvm;
87  
88  #define DEBUG_TYPE "dfa-jump-threading"
89  
90  STATISTIC(NumTransforms, "Number of transformations done");
91  STATISTIC(NumCloned, "Number of blocks cloned");
92  STATISTIC(NumPaths, "Number of individual paths threaded");
93  
94  static cl::opt<bool>
95      ClViewCfgBefore("dfa-jump-view-cfg-before",
96                      cl::desc("View the CFG before DFA Jump Threading"),
97                      cl::Hidden, cl::init(false));
98  
99  static cl::opt<bool> EarlyExitHeuristic(
100      "dfa-early-exit-heuristic",
101      cl::desc("Exit early if an unpredictable value come from the same loop"),
102      cl::Hidden, cl::init(true));
103  
104  static cl::opt<unsigned> MaxPathLength(
105      "dfa-max-path-length",
106      cl::desc("Max number of blocks searched to find a threading path"),
107      cl::Hidden, cl::init(20));
108  
109  static cl::opt<unsigned>
110      MaxNumPaths("dfa-max-num-paths",
111                  cl::desc("Max number of paths enumerated around a switch"),
112                  cl::Hidden, cl::init(200));
113  
114  static cl::opt<unsigned>
115      CostThreshold("dfa-cost-threshold",
116                    cl::desc("Maximum cost accepted for the transformation"),
117                    cl::Hidden, cl::init(50));
118  
119  namespace {
120  
121  class SelectInstToUnfold {
122    SelectInst *SI;
123    PHINode *SIUse;
124  
125  public:
SelectInstToUnfold(SelectInst * SI,PHINode * SIUse)126    SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {}
127  
getInst()128    SelectInst *getInst() { return SI; }
getUse()129    PHINode *getUse() { return SIUse; }
130  
operator bool() const131    explicit operator bool() const { return SI && SIUse; }
132  };
133  
134  void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
135              std::vector<SelectInstToUnfold> *NewSIsToUnfold,
136              std::vector<BasicBlock *> *NewBBs);
137  
138  class DFAJumpThreading {
139  public:
DFAJumpThreading(AssumptionCache * AC,DominatorTree * DT,LoopInfo * LI,TargetTransformInfo * TTI,OptimizationRemarkEmitter * ORE)140    DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
141                     TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
142        : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
143  
144    bool run(Function &F);
145    bool LoopInfoBroken;
146  
147  private:
148    void
unfoldSelectInstrs(DominatorTree * DT,const SmallVector<SelectInstToUnfold,4> & SelectInsts)149    unfoldSelectInstrs(DominatorTree *DT,
150                       const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
151      DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
152      SmallVector<SelectInstToUnfold, 4> Stack;
153      for (SelectInstToUnfold SIToUnfold : SelectInsts)
154        Stack.push_back(SIToUnfold);
155  
156      while (!Stack.empty()) {
157        SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
158  
159        std::vector<SelectInstToUnfold> NewSIsToUnfold;
160        std::vector<BasicBlock *> NewBBs;
161        unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);
162  
163        // Put newly discovered select instructions into the work list.
164        for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold)
165          Stack.push_back(NewSIToUnfold);
166      }
167    }
168  
169    AssumptionCache *AC;
170    DominatorTree *DT;
171    LoopInfo *LI;
172    TargetTransformInfo *TTI;
173    OptimizationRemarkEmitter *ORE;
174  };
175  
176  } // end anonymous namespace
177  
178  namespace {
179  
180  /// Create a new basic block and sink \p SIToSink into it.
createBasicBlockAndSinkSelectInst(DomTreeUpdater * DTU,SelectInst * SI,PHINode * SIUse,SelectInst * SIToSink,BasicBlock * EndBlock,StringRef NewBBName,BasicBlock ** NewBlock,BranchInst ** NewBranch,std::vector<SelectInstToUnfold> * NewSIsToUnfold,std::vector<BasicBlock * > * NewBBs)181  void createBasicBlockAndSinkSelectInst(
182      DomTreeUpdater *DTU, SelectInst *SI, PHINode *SIUse, SelectInst *SIToSink,
183      BasicBlock *EndBlock, StringRef NewBBName, BasicBlock **NewBlock,
184      BranchInst **NewBranch, std::vector<SelectInstToUnfold> *NewSIsToUnfold,
185      std::vector<BasicBlock *> *NewBBs) {
186    assert(SIToSink->hasOneUse());
187    assert(NewBlock);
188    assert(NewBranch);
189    *NewBlock = BasicBlock::Create(SI->getContext(), NewBBName,
190                                   EndBlock->getParent(), EndBlock);
191    NewBBs->push_back(*NewBlock);
192    *NewBranch = BranchInst::Create(EndBlock, *NewBlock);
193    SIToSink->moveBefore(*NewBranch);
194    NewSIsToUnfold->push_back(SelectInstToUnfold(SIToSink, SIUse));
195    DTU->applyUpdates({{DominatorTree::Insert, *NewBlock, EndBlock}});
196  }
197  
198  /// Unfold the select instruction held in \p SIToUnfold by replacing it with
199  /// control flow.
200  ///
201  /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly
202  /// created basic blocks into \p NewBBs.
203  ///
204  /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
unfold(DomTreeUpdater * DTU,LoopInfo * LI,SelectInstToUnfold SIToUnfold,std::vector<SelectInstToUnfold> * NewSIsToUnfold,std::vector<BasicBlock * > * NewBBs)205  void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
206              std::vector<SelectInstToUnfold> *NewSIsToUnfold,
207              std::vector<BasicBlock *> *NewBBs) {
208    SelectInst *SI = SIToUnfold.getInst();
209    PHINode *SIUse = SIToUnfold.getUse();
210    BasicBlock *StartBlock = SI->getParent();
211    BasicBlock *EndBlock = SIUse->getParent();
212    BranchInst *StartBlockTerm =
213        dyn_cast<BranchInst>(StartBlock->getTerminator());
214  
215    assert(StartBlockTerm && StartBlockTerm->isUnconditional());
216    assert(SI->hasOneUse());
217  
218    // These are the new basic blocks for the conditional branch.
219    // At least one will become an actual new basic block.
220    BasicBlock *TrueBlock = nullptr;
221    BasicBlock *FalseBlock = nullptr;
222    BranchInst *TrueBranch = nullptr;
223    BranchInst *FalseBranch = nullptr;
224  
225    // Sink select instructions to be able to unfold them later.
226    if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getTrueValue())) {
227      createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock,
228                                        "si.unfold.true", &TrueBlock, &TrueBranch,
229                                        NewSIsToUnfold, NewBBs);
230    }
231    if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getFalseValue())) {
232      createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock,
233                                        "si.unfold.false", &FalseBlock,
234                                        &FalseBranch, NewSIsToUnfold, NewBBs);
235    }
236  
237    // If there was nothing to sink, then arbitrarily choose the 'false' side
238    // for a new input value to the PHI.
239    if (!TrueBlock && !FalseBlock) {
240      FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false",
241                                      EndBlock->getParent(), EndBlock);
242      NewBBs->push_back(FalseBlock);
243      BranchInst::Create(EndBlock, FalseBlock);
244      DTU->applyUpdates({{DominatorTree::Insert, FalseBlock, EndBlock}});
245    }
246  
247    // Insert the real conditional branch based on the original condition.
248    // If we did not create a new block for one of the 'true' or 'false' paths
249    // of the condition, it means that side of the branch goes to the end block
250    // directly and the path originates from the start block from the point of
251    // view of the new PHI.
252    BasicBlock *TT = EndBlock;
253    BasicBlock *FT = EndBlock;
254    if (TrueBlock && FalseBlock) {
255      // A diamond.
256      TT = TrueBlock;
257      FT = FalseBlock;
258  
259      // Update the phi node of SI.
260      SIUse->addIncoming(SI->getTrueValue(), TrueBlock);
261      SIUse->addIncoming(SI->getFalseValue(), FalseBlock);
262  
263      // Update any other PHI nodes in EndBlock.
264      for (PHINode &Phi : EndBlock->phis()) {
265        if (&Phi != SIUse) {
266          Value *OrigValue = Phi.getIncomingValueForBlock(StartBlock);
267          Phi.addIncoming(OrigValue, TrueBlock);
268          Phi.addIncoming(OrigValue, FalseBlock);
269        }
270  
271        // Remove incoming place of original StartBlock, which comes in a indirect
272        // way (through TrueBlock and FalseBlock) now.
273        Phi.removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false);
274      }
275    } else {
276      BasicBlock *NewBlock = nullptr;
277      Value *SIOp1 = SI->getTrueValue();
278      Value *SIOp2 = SI->getFalseValue();
279  
280      // A triangle pointing right.
281      if (!TrueBlock) {
282        NewBlock = FalseBlock;
283        FT = FalseBlock;
284      }
285      // A triangle pointing left.
286      else {
287        NewBlock = TrueBlock;
288        TT = TrueBlock;
289        std::swap(SIOp1, SIOp2);
290      }
291  
292      // Update the phi node of SI.
293      for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) {
294        if (SIUse->getIncomingBlock(Idx) == StartBlock)
295          SIUse->setIncomingValue(Idx, SIOp1);
296      }
297      SIUse->addIncoming(SIOp2, NewBlock);
298  
299      // Update any other PHI nodes in EndBlock.
300      for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
301           ++II) {
302        if (Phi != SIUse)
303          Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock);
304      }
305    }
306    StartBlockTerm->eraseFromParent();
307    BranchInst::Create(TT, FT, SI->getCondition(), StartBlock);
308    DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT},
309                       {DominatorTree::Insert, StartBlock, FT}});
310  
311    // Preserve loop info
312    if (Loop *L = LI->getLoopFor(SI->getParent())) {
313      for (BasicBlock *NewBB : *NewBBs)
314        L->addBasicBlockToLoop(NewBB, *LI);
315    }
316  
317    // The select is now dead.
318    assert(SI->use_empty() && "Select must be dead now");
319    SI->eraseFromParent();
320  }
321  
322  struct ClonedBlock {
323    BasicBlock *BB;
324    APInt State; ///< \p State corresponds to the next value of a switch stmnt.
325  };
326  
327  typedef std::deque<BasicBlock *> PathType;
328  typedef std::vector<PathType> PathsType;
329  typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
330  typedef std::vector<ClonedBlock> CloneList;
331  
332  // This data structure keeps track of all blocks that have been cloned.  If two
333  // different ThreadingPaths clone the same block for a certain state it should
334  // be reused, and it can be looked up in this map.
335  typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
336  
337  // This map keeps track of all the new definitions for an instruction. This
338  // information is needed when restoring SSA form after cloning blocks.
339  typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap;
340  
operator <<(raw_ostream & OS,const PathType & Path)341  inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
342    OS << "< ";
343    for (const BasicBlock *BB : Path) {
344      std::string BBName;
345      if (BB->hasName())
346        raw_string_ostream(BBName) << BB->getName();
347      else
348        raw_string_ostream(BBName) << BB;
349      OS << BBName << " ";
350    }
351    OS << ">";
352    return OS;
353  }
354  
355  /// ThreadingPath is a path in the control flow of a loop that can be threaded
356  /// by cloning necessary basic blocks and replacing conditional branches with
357  /// unconditional ones. A threading path includes a list of basic blocks, the
358  /// exit state, and the block that determines the next state.
359  struct ThreadingPath {
360    /// Exit value is DFA's exit state for the given path.
getExitValue__anonfb50cc300211::ThreadingPath361    APInt getExitValue() const { return ExitVal; }
setExitValue__anonfb50cc300211::ThreadingPath362    void setExitValue(const ConstantInt *V) {
363      ExitVal = V->getValue();
364      IsExitValSet = true;
365    }
isExitValueSet__anonfb50cc300211::ThreadingPath366    bool isExitValueSet() const { return IsExitValSet; }
367  
368    /// Determinator is the basic block that determines the next state of the DFA.
getDeterminatorBB__anonfb50cc300211::ThreadingPath369    const BasicBlock *getDeterminatorBB() const { return DBB; }
setDeterminator__anonfb50cc300211::ThreadingPath370    void setDeterminator(const BasicBlock *BB) { DBB = BB; }
371  
372    /// Path is a list of basic blocks.
getPath__anonfb50cc300211::ThreadingPath373    const PathType &getPath() const { return Path; }
setPath__anonfb50cc300211::ThreadingPath374    void setPath(const PathType &NewPath) { Path = NewPath; }
375  
print__anonfb50cc300211::ThreadingPath376    void print(raw_ostream &OS) const {
377      OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";
378    }
379  
380  private:
381    PathType Path;
382    APInt ExitVal;
383    const BasicBlock *DBB = nullptr;
384    bool IsExitValSet = false;
385  };
386  
387  #ifndef NDEBUG
operator <<(raw_ostream & OS,const ThreadingPath & TPath)388  inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
389    TPath.print(OS);
390    return OS;
391  }
392  #endif
393  
394  struct MainSwitch {
MainSwitch__anonfb50cc300211::MainSwitch395    MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE)
396        : LI(LI) {
397      if (isCandidate(SI)) {
398        Instr = SI;
399      } else {
400        ORE->emit([&]() {
401          return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI)
402                 << "Switch instruction is not predictable.";
403        });
404      }
405    }
406  
407    virtual ~MainSwitch() = default;
408  
getInstr__anonfb50cc300211::MainSwitch409    SwitchInst *getInstr() const { return Instr; }
getSelectInsts__anonfb50cc300211::MainSwitch410    const SmallVector<SelectInstToUnfold, 4> getSelectInsts() {
411      return SelectInsts;
412    }
413  
414  private:
415    /// Do a use-def chain traversal starting from the switch condition to see if
416    /// \p SI is a potential condidate.
417    ///
418    /// Also, collect select instructions to unfold.
isCandidate__anonfb50cc300211::MainSwitch419    bool isCandidate(const SwitchInst *SI) {
420      std::deque<std::pair<Value *, BasicBlock *>> Q;
421      SmallSet<Value *, 16> SeenValues;
422      SelectInsts.clear();
423  
424      Value *SICond = SI->getCondition();
425      LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n");
426      if (!isa<PHINode>(SICond))
427        return false;
428  
429      // The switch must be in a loop.
430      const Loop *L = LI->getLoopFor(SI->getParent());
431      if (!L)
432        return false;
433  
434      addToQueue(SICond, nullptr, Q, SeenValues);
435  
436      while (!Q.empty()) {
437        Value *Current = Q.front().first;
438        BasicBlock *CurrentIncomingBB = Q.front().second;
439        Q.pop_front();
440  
441        if (auto *Phi = dyn_cast<PHINode>(Current)) {
442          for (BasicBlock *IncomingBB : Phi->blocks()) {
443            Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB);
444            addToQueue(Incoming, IncomingBB, Q, SeenValues);
445          }
446          LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
447        } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
448          if (!isValidSelectInst(SelI))
449            return false;
450          addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues);
451          addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues);
452          LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
453          if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
454            SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
455        } else if (isa<Constant>(Current)) {
456          LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n");
457          continue;
458        } else {
459          LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n");
460          // Allow unpredictable values. The hope is that those will be the
461          // initial switch values that can be ignored (they will hit the
462          // unthreaded switch) but this assumption will get checked later after
463          // paths have been enumerated (in function getStateDefMap).
464  
465          // If the unpredictable value comes from the same inner loop it is
466          // likely that it will also be on the enumerated paths, causing us to
467          // exit after we have enumerated all the paths. This heuristic save
468          // compile time because a search for all the paths can become expensive.
469          if (EarlyExitHeuristic &&
470              L->contains(LI->getLoopFor(CurrentIncomingBB))) {
471            LLVM_DEBUG(dbgs()
472                       << "\tExiting early due to unpredictability heuristic.\n");
473            return false;
474          }
475  
476          continue;
477        }
478      }
479  
480      return true;
481    }
482  
addToQueue__anonfb50cc300211::MainSwitch483    void addToQueue(Value *Val, BasicBlock *BB,
484                    std::deque<std::pair<Value *, BasicBlock *>> &Q,
485                    SmallSet<Value *, 16> &SeenValues) {
486      if (SeenValues.contains(Val))
487        return;
488      Q.push_back({Val, BB});
489      SeenValues.insert(Val);
490    }
491  
isValidSelectInst__anonfb50cc300211::MainSwitch492    bool isValidSelectInst(SelectInst *SI) {
493      if (!SI->hasOneUse())
494        return false;
495  
496      Instruction *SIUse = dyn_cast<Instruction>(SI->user_back());
497      // The use of the select inst should be either a phi or another select.
498      if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse)))
499        return false;
500  
501      BasicBlock *SIBB = SI->getParent();
502  
503      // Currently, we can only expand select instructions in basic blocks with
504      // one successor.
505      BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator());
506      if (!SITerm || !SITerm->isUnconditional())
507        return false;
508  
509      // Only fold the select coming from directly where it is defined.
510      PHINode *PHIUser = dyn_cast<PHINode>(SIUse);
511      if (PHIUser && PHIUser->getIncomingBlock(*SI->use_begin()) != SIBB)
512        return false;
513  
514      // If select will not be sunk during unfolding, and it is in the same basic
515      // block as another state defining select, then cannot unfold both.
516      for (SelectInstToUnfold SIToUnfold : SelectInsts) {
517        SelectInst *PrevSI = SIToUnfold.getInst();
518        if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&
519            PrevSI->getParent() == SI->getParent())
520          return false;
521      }
522  
523      return true;
524    }
525  
526    LoopInfo *LI;
527    SwitchInst *Instr = nullptr;
528    SmallVector<SelectInstToUnfold, 4> SelectInsts;
529  };
530  
531  struct AllSwitchPaths {
AllSwitchPaths__anonfb50cc300211::AllSwitchPaths532    AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE,
533                   LoopInfo *LI)
534        : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE),
535          LI(LI) {}
536  
getThreadingPaths__anonfb50cc300211::AllSwitchPaths537    std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
getNumThreadingPaths__anonfb50cc300211::AllSwitchPaths538    unsigned getNumThreadingPaths() { return TPaths.size(); }
getSwitchInst__anonfb50cc300211::AllSwitchPaths539    SwitchInst *getSwitchInst() { return Switch; }
getSwitchBlock__anonfb50cc300211::AllSwitchPaths540    BasicBlock *getSwitchBlock() { return SwitchBlock; }
541  
run__anonfb50cc300211::AllSwitchPaths542    void run() {
543      VisitedBlocks Visited;
544      PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1);
545      StateDefMap StateDef = getStateDefMap(LoopPaths);
546  
547      if (StateDef.empty()) {
548        ORE->emit([&]() {
549          return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
550                                          Switch)
551                 << "Switch instruction is not predictable.";
552        });
553        return;
554      }
555  
556      for (const PathType &Path : LoopPaths) {
557        ThreadingPath TPath;
558  
559        const BasicBlock *PrevBB = Path.back();
560        for (const BasicBlock *BB : Path) {
561          if (StateDef.contains(BB)) {
562            const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]);
563            assert(Phi && "Expected a state-defining instr to be a phi node.");
564  
565            const Value *V = Phi->getIncomingValueForBlock(PrevBB);
566            if (const ConstantInt *C = dyn_cast<const ConstantInt>(V)) {
567              TPath.setExitValue(C);
568              TPath.setDeterminator(BB);
569              TPath.setPath(Path);
570            }
571          }
572  
573          // Switch block is the determinator, this is the final exit value.
574          if (TPath.isExitValueSet() && BB == Path.front())
575            break;
576  
577          PrevBB = BB;
578        }
579  
580        if (TPath.isExitValueSet() && isSupported(TPath))
581          TPaths.push_back(TPath);
582      }
583    }
584  
585  private:
586    // Value: an instruction that defines a switch state;
587    // Key: the parent basic block of that instruction.
588    typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap;
589  
paths__anonfb50cc300211::AllSwitchPaths590    PathsType paths(BasicBlock *BB, VisitedBlocks &Visited,
591                    unsigned PathDepth) const {
592      PathsType Res;
593  
594      // Stop exploring paths after visiting MaxPathLength blocks
595      if (PathDepth > MaxPathLength) {
596        ORE->emit([&]() {
597          return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached",
598                                            Switch)
599                 << "Exploration stopped after visiting MaxPathLength="
600                 << ore::NV("MaxPathLength", MaxPathLength) << " blocks.";
601        });
602        return Res;
603      }
604  
605      Visited.insert(BB);
606  
607      // Stop if we have reached the BB out of loop, since its successors have no
608      // impact on the DFA.
609      // TODO: Do we need to stop exploring if BB is the outer loop of the switch?
610      if (!LI->getLoopFor(BB))
611        return Res;
612  
613      // Some blocks have multiple edges to the same successor, and this set
614      // is used to prevent a duplicate path from being generated
615      SmallSet<BasicBlock *, 4> Successors;
616      for (BasicBlock *Succ : successors(BB)) {
617        if (!Successors.insert(Succ).second)
618          continue;
619  
620        // Found a cycle through the SwitchBlock
621        if (Succ == SwitchBlock) {
622          Res.push_back({BB});
623          continue;
624        }
625  
626        // We have encountered a cycle, do not get caught in it
627        if (Visited.contains(Succ))
628          continue;
629  
630        PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1);
631        for (const PathType &Path : SuccPaths) {
632          PathType NewPath(Path);
633          NewPath.push_front(BB);
634          Res.push_back(NewPath);
635          if (Res.size() >= MaxNumPaths) {
636            return Res;
637          }
638        }
639      }
640      // This block could now be visited again from a different predecessor. Note
641      // that this will result in exponential runtime. Subpaths could possibly be
642      // cached but it takes a lot of memory to store them.
643      Visited.erase(BB);
644      return Res;
645    }
646  
647    /// Walk the use-def chain and collect all the state-defining instructions.
648    ///
649    /// Return an empty map if unpredictable values encountered inside the basic
650    /// blocks of \p LoopPaths.
getStateDefMap__anonfb50cc300211::AllSwitchPaths651    StateDefMap getStateDefMap(const PathsType &LoopPaths) const {
652      StateDefMap Res;
653  
654      // Basic blocks belonging to any of the loops around the switch statement.
655      SmallPtrSet<BasicBlock *, 16> LoopBBs;
656      for (const PathType &Path : LoopPaths) {
657        for (BasicBlock *BB : Path)
658          LoopBBs.insert(BB);
659      }
660  
661      Value *FirstDef = Switch->getOperand(0);
662  
663      assert(isa<PHINode>(FirstDef) && "The first definition must be a phi.");
664  
665      SmallVector<PHINode *, 8> Stack;
666      Stack.push_back(dyn_cast<PHINode>(FirstDef));
667      SmallSet<Value *, 16> SeenValues;
668  
669      while (!Stack.empty()) {
670        PHINode *CurPhi = Stack.pop_back_val();
671  
672        Res[CurPhi->getParent()] = CurPhi;
673        SeenValues.insert(CurPhi);
674  
675        for (BasicBlock *IncomingBB : CurPhi->blocks()) {
676          Value *Incoming = CurPhi->getIncomingValueForBlock(IncomingBB);
677          bool IsOutsideLoops = LoopBBs.count(IncomingBB) == 0;
678          if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||
679              SeenValues.contains(Incoming) || IsOutsideLoops) {
680            continue;
681          }
682  
683          // Any unpredictable value inside the loops means we must bail out.
684          if (!isa<PHINode>(Incoming))
685            return StateDefMap();
686  
687          Stack.push_back(cast<PHINode>(Incoming));
688        }
689      }
690  
691      return Res;
692    }
693  
694    /// The determinator BB should precede the switch-defining BB.
695    ///
696    /// Otherwise, it is possible that the state defined in the determinator block
697    /// defines the state for the next iteration of the loop, rather than for the
698    /// current one.
699    ///
700    /// Currently supported paths:
701    /// \code
702    /// < switch bb1 determ def > [ 42, determ ]
703    /// < switch_and_def bb1 determ > [ 42, determ ]
704    /// < switch_and_def_and_determ bb1 > [ 42, switch_and_def_and_determ ]
705    /// \endcode
706    ///
707    /// Unsupported paths:
708    /// \code
709    /// < switch bb1 def determ > [ 43, determ ]
710    /// < switch_and_determ bb1 def > [ 43, switch_and_determ ]
711    /// \endcode
isSupported__anonfb50cc300211::AllSwitchPaths712    bool isSupported(const ThreadingPath &TPath) {
713      Instruction *SwitchCondI = dyn_cast<Instruction>(Switch->getCondition());
714      assert(SwitchCondI);
715      if (!SwitchCondI)
716        return false;
717  
718      const BasicBlock *SwitchCondDefBB = SwitchCondI->getParent();
719      const BasicBlock *SwitchCondUseBB = Switch->getParent();
720      const BasicBlock *DeterminatorBB = TPath.getDeterminatorBB();
721  
722      assert(
723          SwitchCondUseBB == TPath.getPath().front() &&
724          "The first BB in a threading path should have the switch instruction");
725      if (SwitchCondUseBB != TPath.getPath().front())
726        return false;
727  
728      // Make DeterminatorBB the first element in Path.
729      PathType Path = TPath.getPath();
730      auto ItDet = llvm::find(Path, DeterminatorBB);
731      std::rotate(Path.begin(), ItDet, Path.end());
732  
733      bool IsDetBBSeen = false;
734      bool IsDefBBSeen = false;
735      bool IsUseBBSeen = false;
736      for (BasicBlock *BB : Path) {
737        if (BB == DeterminatorBB)
738          IsDetBBSeen = true;
739        if (BB == SwitchCondDefBB)
740          IsDefBBSeen = true;
741        if (BB == SwitchCondUseBB)
742          IsUseBBSeen = true;
743        if (IsDetBBSeen && IsUseBBSeen && !IsDefBBSeen)
744          return false;
745      }
746  
747      return true;
748    }
749  
750    SwitchInst *Switch;
751    BasicBlock *SwitchBlock;
752    OptimizationRemarkEmitter *ORE;
753    std::vector<ThreadingPath> TPaths;
754    LoopInfo *LI;
755  };
756  
757  struct TransformDFA {
TransformDFA__anonfb50cc300211::TransformDFA758    TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT,
759                 AssumptionCache *AC, TargetTransformInfo *TTI,
760                 OptimizationRemarkEmitter *ORE,
761                 SmallPtrSet<const Value *, 32> EphValues)
762        : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
763          EphValues(EphValues) {}
764  
run__anonfb50cc300211::TransformDFA765    void run() {
766      if (isLegalAndProfitableToTransform()) {
767        createAllExitPaths();
768        NumTransforms++;
769      }
770    }
771  
772  private:
773    /// This function performs both a legality check and profitability check at
774    /// the same time since it is convenient to do so. It iterates through all
775    /// blocks that will be cloned, and keeps track of the duplication cost. It
776    /// also returns false if it is illegal to clone some required block.
isLegalAndProfitableToTransform__anonfb50cc300211::TransformDFA777    bool isLegalAndProfitableToTransform() {
778      CodeMetrics Metrics;
779      SwitchInst *Switch = SwitchPaths->getSwitchInst();
780  
781      // Don't thread switch without multiple successors.
782      if (Switch->getNumSuccessors() <= 1)
783        return false;
784  
785      // Note that DuplicateBlockMap is not being used as intended here. It is
786      // just being used to ensure (BB, State) pairs are only counted once.
787      DuplicateBlockMap DuplicateMap;
788  
789      for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
790        PathType PathBBs = TPath.getPath();
791        APInt NextState = TPath.getExitValue();
792        const BasicBlock *Determinator = TPath.getDeterminatorBB();
793  
794        // Update Metrics for the Switch block, this is always cloned
795        BasicBlock *BB = SwitchPaths->getSwitchBlock();
796        BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
797        if (!VisitedBB) {
798          Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
799          DuplicateMap[BB].push_back({BB, NextState});
800        }
801  
802        // If the Switch block is the Determinator, then we can continue since
803        // this is the only block that is cloned and we already counted for it.
804        if (PathBBs.front() == Determinator)
805          continue;
806  
807        // Otherwise update Metrics for all blocks that will be cloned. If any
808        // block is already cloned and would be reused, don't double count it.
809        auto DetIt = llvm::find(PathBBs, Determinator);
810        for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
811          BB = *BBIt;
812          VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
813          if (VisitedBB)
814            continue;
815          Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
816          DuplicateMap[BB].push_back({BB, NextState});
817        }
818  
819        if (Metrics.notDuplicatable) {
820          LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
821                            << "non-duplicatable instructions.\n");
822          ORE->emit([&]() {
823            return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst",
824                                            Switch)
825                   << "Contains non-duplicatable instructions.";
826          });
827          return false;
828        }
829  
830        // FIXME: Allow jump threading with controlled convergence.
831        if (Metrics.Convergence != ConvergenceKind::None) {
832          LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
833                            << "convergent instructions.\n");
834          ORE->emit([&]() {
835            return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
836                   << "Contains convergent instructions.";
837          });
838          return false;
839        }
840  
841        if (!Metrics.NumInsts.isValid()) {
842          LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
843                            << "instructions with invalid cost.\n");
844          ORE->emit([&]() {
845            return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
846                   << "Contains instructions with invalid cost.";
847          });
848          return false;
849        }
850      }
851  
852      InstructionCost DuplicationCost = 0;
853  
854      unsigned JumpTableSize = 0;
855      TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr,
856                                            nullptr);
857      if (JumpTableSize == 0) {
858        // Factor in the number of conditional branches reduced from jump
859        // threading. Assume that lowering the switch block is implemented by
860        // using binary search, hence the LogBase2().
861        unsigned CondBranches =
862            APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
863        assert(CondBranches > 0 &&
864               "The threaded switch must have multiple branches");
865        DuplicationCost = Metrics.NumInsts / CondBranches;
866      } else {
867        // Compared with jump tables, the DFA optimizer removes an indirect branch
868        // on each loop iteration, thus making branch prediction more precise. The
869        // more branch targets there are, the more likely it is for the branch
870        // predictor to make a mistake, and the more benefit there is in the DFA
871        // optimizer. Thus, the more branch targets there are, the lower is the
872        // cost of the DFA opt.
873        DuplicationCost = Metrics.NumInsts / JumpTableSize;
874      }
875  
876      LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
877                        << SwitchPaths->getSwitchBlock()->getName()
878                        << " is: " << DuplicationCost << "\n\n");
879  
880      if (DuplicationCost > CostThreshold) {
881        LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "
882                          << "cost threshold.\n");
883        ORE->emit([&]() {
884          return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
885                 << "Duplication cost exceeds the cost threshold (cost="
886                 << ore::NV("Cost", DuplicationCost)
887                 << ", threshold=" << ore::NV("Threshold", CostThreshold) << ").";
888        });
889        return false;
890      }
891  
892      ORE->emit([&]() {
893        return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch)
894               << "Switch statement jump-threaded.";
895      });
896  
897      return true;
898    }
899  
900    /// Transform each threading path to effectively jump thread the DFA.
createAllExitPaths__anonfb50cc300211::TransformDFA901    void createAllExitPaths() {
902      DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
903  
904      // Move the switch block to the end of the path, since it will be duplicated
905      BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
906      for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
907        LLVM_DEBUG(dbgs() << TPath << "\n");
908        PathType NewPath(TPath.getPath());
909        NewPath.push_back(SwitchBlock);
910        TPath.setPath(NewPath);
911      }
912  
913      // Transform the ThreadingPaths and keep track of the cloned values
914      DuplicateBlockMap DuplicateMap;
915      DefMap NewDefs;
916  
917      SmallSet<BasicBlock *, 16> BlocksToClean;
918      for (BasicBlock *BB : successors(SwitchBlock))
919        BlocksToClean.insert(BB);
920  
921      for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
922        createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
923        NumPaths++;
924      }
925  
926      // After all paths are cloned, now update the last successor of the cloned
927      // path so it skips over the switch statement
928      for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
929        updateLastSuccessor(TPath, DuplicateMap, &DTU);
930  
931      // For each instruction that was cloned and used outside, update its uses
932      updateSSA(NewDefs);
933  
934      // Clean PHI Nodes for the newly created blocks
935      for (BasicBlock *BB : BlocksToClean)
936        cleanPhiNodes(BB);
937    }
938  
939    /// For a specific ThreadingPath \p Path, create an exit path starting from
940    /// the determinator block.
941    ///
942    /// To remember the correct destination, we have to duplicate blocks
943    /// corresponding to each state. Also update the terminating instruction of
944    /// the predecessors, and phis in the successor blocks.
createExitPath__anonfb50cc300211::TransformDFA945    void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
946                        DuplicateBlockMap &DuplicateMap,
947                        SmallSet<BasicBlock *, 16> &BlocksToClean,
948                        DomTreeUpdater *DTU) {
949      APInt NextState = Path.getExitValue();
950      const BasicBlock *Determinator = Path.getDeterminatorBB();
951      PathType PathBBs = Path.getPath();
952  
953      // Don't select the placeholder block in front
954      if (PathBBs.front() == Determinator)
955        PathBBs.pop_front();
956  
957      auto DetIt = llvm::find(PathBBs, Determinator);
958      // When there is only one BB in PathBBs, the determinator takes itself as a
959      // direct predecessor.
960      BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(DetIt);
961      for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
962        BasicBlock *BB = *BBIt;
963        BlocksToClean.insert(BB);
964  
965        // We already cloned BB for this NextState, now just update the branch
966        // and continue.
967        BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
968        if (NextBB) {
969          updatePredecessor(PrevBB, BB, NextBB, DTU);
970          PrevBB = NextBB;
971          continue;
972        }
973  
974        // Clone the BB and update the successor of Prev to jump to the new block
975        BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
976            BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
977        DuplicateMap[BB].push_back({NewBB, NextState});
978        BlocksToClean.insert(NewBB);
979        PrevBB = NewBB;
980      }
981    }
982  
983    /// Restore SSA form after cloning blocks.
984    ///
985    /// Each cloned block creates new defs for a variable, and the uses need to be
986    /// updated to reflect this. The uses may be replaced with a cloned value, or
987    /// some derived phi instruction. Note that all uses of a value defined in the
988    /// same block were already remapped when cloning the block.
updateSSA__anonfb50cc300211::TransformDFA989    void updateSSA(DefMap &NewDefs) {
990      SSAUpdaterBulk SSAUpdate;
991      SmallVector<Use *, 16> UsesToRename;
992  
993      for (const auto &KV : NewDefs) {
994        Instruction *I = KV.first;
995        BasicBlock *BB = I->getParent();
996        std::vector<Instruction *> Cloned = KV.second;
997  
998        // Scan all uses of this instruction to see if it is used outside of its
999        // block, and if so, record them in UsesToRename.
1000        for (Use &U : I->uses()) {
1001          Instruction *User = cast<Instruction>(U.getUser());
1002          if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1003            if (UserPN->getIncomingBlock(U) == BB)
1004              continue;
1005          } else if (User->getParent() == BB) {
1006            continue;
1007          }
1008  
1009          UsesToRename.push_back(&U);
1010        }
1011  
1012        // If there are no uses outside the block, we're done with this
1013        // instruction.
1014        if (UsesToRename.empty())
1015          continue;
1016        LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I
1017                          << "\n");
1018  
1019        // We found a use of I outside of BB.  Rename all uses of I that are
1020        // outside its block to be uses of the appropriate PHI node etc.  See
1021        // ValuesInBlocks with the values we know.
1022        unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType());
1023        SSAUpdate.AddAvailableValue(VarNum, BB, I);
1024        for (Instruction *New : Cloned)
1025          SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New);
1026  
1027        while (!UsesToRename.empty())
1028          SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val());
1029  
1030        LLVM_DEBUG(dbgs() << "\n");
1031      }
1032      // SSAUpdater handles phi placement and renaming uses with the appropriate
1033      // value.
1034      SSAUpdate.RewriteAllUses(DT);
1035    }
1036  
1037    /// Clones a basic block, and adds it to the CFG.
1038    ///
1039    /// This function also includes updating phi nodes in the successors of the
1040    /// BB, and remapping uses that were defined locally in the cloned BB.
cloneBlockAndUpdatePredecessor__anonfb50cc300211::TransformDFA1041    BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB,
1042                                               const APInt &NextState,
1043                                               DuplicateBlockMap &DuplicateMap,
1044                                               DefMap &NewDefs,
1045                                               DomTreeUpdater *DTU) {
1046      ValueToValueMapTy VMap;
1047      BasicBlock *NewBB = CloneBasicBlock(
1048          BB, VMap, ".jt" + std::to_string(NextState.getLimitedValue()),
1049          BB->getParent());
1050      NewBB->moveAfter(BB);
1051      NumCloned++;
1052  
1053      for (Instruction &I : *NewBB) {
1054        // Do not remap operands of PHINode in case a definition in BB is an
1055        // incoming value to a phi in the same block. This incoming value will
1056        // be renamed later while restoring SSA.
1057        if (isa<PHINode>(&I))
1058          continue;
1059        RemapInstruction(&I, VMap,
1060                         RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
1061        if (AssumeInst *II = dyn_cast<AssumeInst>(&I))
1062          AC->registerAssumption(II);
1063      }
1064  
1065      updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap);
1066      updatePredecessor(PrevBB, BB, NewBB, DTU);
1067      updateDefMap(NewDefs, VMap);
1068  
1069      // Add all successors to the DominatorTree
1070      SmallPtrSet<BasicBlock *, 4> SuccSet;
1071      for (auto *SuccBB : successors(NewBB)) {
1072        if (SuccSet.insert(SuccBB).second)
1073          DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}});
1074      }
1075      SuccSet.clear();
1076      return NewBB;
1077    }
1078  
1079    /// Update the phi nodes in BB's successors.
1080    ///
1081    /// This means creating a new incoming value from NewBB with the new
1082    /// instruction wherever there is an incoming value from BB.
updateSuccessorPhis__anonfb50cc300211::TransformDFA1083    void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB,
1084                             const APInt &NextState, ValueToValueMapTy &VMap,
1085                             DuplicateBlockMap &DuplicateMap) {
1086      std::vector<BasicBlock *> BlocksToUpdate;
1087  
1088      // If BB is the last block in the path, we can simply update the one case
1089      // successor that will be reached.
1090      if (BB == SwitchPaths->getSwitchBlock()) {
1091        SwitchInst *Switch = SwitchPaths->getSwitchInst();
1092        BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1093        BlocksToUpdate.push_back(NextCase);
1094        BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap);
1095        if (ClonedSucc)
1096          BlocksToUpdate.push_back(ClonedSucc);
1097      }
1098      // Otherwise update phis in all successors.
1099      else {
1100        for (BasicBlock *Succ : successors(BB)) {
1101          BlocksToUpdate.push_back(Succ);
1102  
1103          // Check if a successor has already been cloned for the particular exit
1104          // value. In this case if a successor was already cloned, the phi nodes
1105          // in the cloned block should be updated directly.
1106          BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap);
1107          if (ClonedSucc)
1108            BlocksToUpdate.push_back(ClonedSucc);
1109        }
1110      }
1111  
1112      // If there is a phi with an incoming value from BB, create a new incoming
1113      // value for the new predecessor ClonedBB. The value will either be the same
1114      // value from BB or a cloned value.
1115      for (BasicBlock *Succ : BlocksToUpdate) {
1116        for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
1117             ++II) {
1118          Value *Incoming = Phi->getIncomingValueForBlock(BB);
1119          if (Incoming) {
1120            if (isa<Constant>(Incoming)) {
1121              Phi->addIncoming(Incoming, ClonedBB);
1122              continue;
1123            }
1124            Value *ClonedVal = VMap[Incoming];
1125            if (ClonedVal)
1126              Phi->addIncoming(ClonedVal, ClonedBB);
1127            else
1128              Phi->addIncoming(Incoming, ClonedBB);
1129          }
1130        }
1131      }
1132    }
1133  
1134    /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all
1135    /// other successors are kept as well.
updatePredecessor__anonfb50cc300211::TransformDFA1136    void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB,
1137                           BasicBlock *NewBB, DomTreeUpdater *DTU) {
1138      // When a path is reused, there is a chance that predecessors were already
1139      // updated before. Check if the predecessor needs to be updated first.
1140      if (!isPredecessor(OldBB, PrevBB))
1141        return;
1142  
1143      Instruction *PrevTerm = PrevBB->getTerminator();
1144      for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) {
1145        if (PrevTerm->getSuccessor(Idx) == OldBB) {
1146          OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true);
1147          PrevTerm->setSuccessor(Idx, NewBB);
1148        }
1149      }
1150      DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB},
1151                         {DominatorTree::Insert, PrevBB, NewBB}});
1152    }
1153  
1154    /// Add new value mappings to the DefMap to keep track of all new definitions
1155    /// for a particular instruction. These will be used while updating SSA form.
updateDefMap__anonfb50cc300211::TransformDFA1156    void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
1157      SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector;
1158      NewDefsVector.reserve(VMap.size());
1159  
1160      for (auto Entry : VMap) {
1161        Instruction *Inst =
1162            dyn_cast<Instruction>(const_cast<Value *>(Entry.first));
1163        if (!Inst || !Entry.second || isa<BranchInst>(Inst) ||
1164            isa<SwitchInst>(Inst)) {
1165          continue;
1166        }
1167  
1168        Instruction *Cloned = dyn_cast<Instruction>(Entry.second);
1169        if (!Cloned)
1170          continue;
1171  
1172        NewDefsVector.push_back({Inst, Cloned});
1173      }
1174  
1175      // Sort the defs to get deterministic insertion order into NewDefs.
1176      sort(NewDefsVector, [](const auto &LHS, const auto &RHS) {
1177        if (LHS.first == RHS.first)
1178          return LHS.second->comesBefore(RHS.second);
1179        return LHS.first->comesBefore(RHS.first);
1180      });
1181  
1182      for (const auto &KV : NewDefsVector)
1183        NewDefs[KV.first].push_back(KV.second);
1184    }
1185  
1186    /// Update the last branch of a particular cloned path to point to the correct
1187    /// case successor.
1188    ///
1189    /// Note that this is an optional step and would have been done in later
1190    /// optimizations, but it makes the CFG significantly easier to work with.
updateLastSuccessor__anonfb50cc300211::TransformDFA1191    void updateLastSuccessor(ThreadingPath &TPath,
1192                             DuplicateBlockMap &DuplicateMap,
1193                             DomTreeUpdater *DTU) {
1194      APInt NextState = TPath.getExitValue();
1195      BasicBlock *BB = TPath.getPath().back();
1196      BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
1197  
1198      // Note multiple paths can end at the same block so check that it is not
1199      // updated yet
1200      if (!isa<SwitchInst>(LastBlock->getTerminator()))
1201        return;
1202      SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator());
1203      BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1204  
1205      std::vector<DominatorTree::UpdateType> DTUpdates;
1206      SmallPtrSet<BasicBlock *, 4> SuccSet;
1207      for (BasicBlock *Succ : successors(LastBlock)) {
1208        if (Succ != NextCase && SuccSet.insert(Succ).second)
1209          DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ});
1210      }
1211  
1212      Switch->eraseFromParent();
1213      BranchInst::Create(NextCase, LastBlock);
1214  
1215      DTU->applyUpdates(DTUpdates);
1216    }
1217  
1218    /// After cloning blocks, some of the phi nodes have extra incoming values
1219    /// that are no longer used. This function removes them.
cleanPhiNodes__anonfb50cc300211::TransformDFA1220    void cleanPhiNodes(BasicBlock *BB) {
1221      // If BB is no longer reachable, remove any remaining phi nodes
1222      if (pred_empty(BB)) {
1223        std::vector<PHINode *> PhiToRemove;
1224        for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
1225          PhiToRemove.push_back(Phi);
1226        }
1227        for (PHINode *PN : PhiToRemove) {
1228          PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
1229          PN->eraseFromParent();
1230        }
1231        return;
1232      }
1233  
1234      // Remove any incoming values that come from an invalid predecessor
1235      for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
1236        std::vector<BasicBlock *> BlocksToRemove;
1237        for (BasicBlock *IncomingBB : Phi->blocks()) {
1238          if (!isPredecessor(BB, IncomingBB))
1239            BlocksToRemove.push_back(IncomingBB);
1240        }
1241        for (BasicBlock *BB : BlocksToRemove)
1242          Phi->removeIncomingValue(BB);
1243      }
1244    }
1245  
1246    /// Checks if BB was already cloned for a particular next state value. If it
1247    /// was then it returns this cloned block, and otherwise null.
getClonedBB__anonfb50cc300211::TransformDFA1248    BasicBlock *getClonedBB(BasicBlock *BB, const APInt &NextState,
1249                            DuplicateBlockMap &DuplicateMap) {
1250      CloneList ClonedBBs = DuplicateMap[BB];
1251  
1252      // Find an entry in the CloneList with this NextState. If it exists then
1253      // return the corresponding BB
1254      auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) {
1255        return C.State == NextState;
1256      });
1257      return It != ClonedBBs.end() ? (*It).BB : nullptr;
1258    }
1259  
1260    /// Helper to get the successor corresponding to a particular case value for
1261    /// a switch statement.
getNextCaseSuccessor__anonfb50cc300211::TransformDFA1262    BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) {
1263      BasicBlock *NextCase = nullptr;
1264      for (auto Case : Switch->cases()) {
1265        if (Case.getCaseValue()->getValue() == NextState) {
1266          NextCase = Case.getCaseSuccessor();
1267          break;
1268        }
1269      }
1270      if (!NextCase)
1271        NextCase = Switch->getDefaultDest();
1272      return NextCase;
1273    }
1274  
1275    /// Returns true if IncomingBB is a predecessor of BB.
isPredecessor__anonfb50cc300211::TransformDFA1276    bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
1277      return llvm::is_contained(predecessors(BB), IncomingBB);
1278    }
1279  
1280    AllSwitchPaths *SwitchPaths;
1281    DominatorTree *DT;
1282    AssumptionCache *AC;
1283    TargetTransformInfo *TTI;
1284    OptimizationRemarkEmitter *ORE;
1285    SmallPtrSet<const Value *, 32> EphValues;
1286    std::vector<ThreadingPath> TPaths;
1287  };
1288  
run(Function & F)1289  bool DFAJumpThreading::run(Function &F) {
1290    LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
1291  
1292    if (F.hasOptSize()) {
1293      LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");
1294      return false;
1295    }
1296  
1297    if (ClViewCfgBefore)
1298      F.viewCFG();
1299  
1300    SmallVector<AllSwitchPaths, 2> ThreadableLoops;
1301    bool MadeChanges = false;
1302    LoopInfoBroken = false;
1303  
1304    for (BasicBlock &BB : F) {
1305      auto *SI = dyn_cast<SwitchInst>(BB.getTerminator());
1306      if (!SI)
1307        continue;
1308  
1309      LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
1310                        << " is a candidate\n");
1311      MainSwitch Switch(SI, LI, ORE);
1312  
1313      if (!Switch.getInstr())
1314        continue;
1315  
1316      LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "
1317                        << "candidate for jump threading\n");
1318      LLVM_DEBUG(SI->dump());
1319  
1320      unfoldSelectInstrs(DT, Switch.getSelectInsts());
1321      if (!Switch.getSelectInsts().empty())
1322        MadeChanges = true;
1323  
1324      AllSwitchPaths SwitchPaths(&Switch, ORE, LI);
1325      SwitchPaths.run();
1326  
1327      if (SwitchPaths.getNumThreadingPaths() > 0) {
1328        ThreadableLoops.push_back(SwitchPaths);
1329  
1330        // For the time being limit this optimization to occurring once in a
1331        // function since it can change the CFG significantly. This is not a
1332        // strict requirement but it can cause buggy behavior if there is an
1333        // overlap of blocks in different opportunities. There is a lot of room to
1334        // experiment with catching more opportunities here.
1335        // NOTE: To release this contraint, we must handle LoopInfo invalidation
1336        break;
1337      }
1338    }
1339  
1340  #ifdef NDEBUG
1341    LI->verify(*DT);
1342  #endif
1343  
1344    SmallPtrSet<const Value *, 32> EphValues;
1345    if (ThreadableLoops.size() > 0)
1346      CodeMetrics::collectEphemeralValues(&F, AC, EphValues);
1347  
1348    for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
1349      TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
1350      Transform.run();
1351      MadeChanges = true;
1352      LoopInfoBroken = true;
1353    }
1354  
1355  #ifdef EXPENSIVE_CHECKS
1356    assert(DT->verify(DominatorTree::VerificationLevel::Full));
1357    verifyFunction(F, &dbgs());
1358  #endif
1359  
1360    return MadeChanges;
1361  }
1362  
1363  } // end anonymous namespace
1364  
1365  /// Integrate with the new Pass Manager
run(Function & F,FunctionAnalysisManager & AM)1366  PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
1367                                              FunctionAnalysisManager &AM) {
1368    AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
1369    DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
1370    LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
1371    TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
1372    OptimizationRemarkEmitter ORE(&F);
1373    DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE);
1374    if (!ThreadImpl.run(F))
1375      return PreservedAnalyses::all();
1376  
1377    PreservedAnalyses PA;
1378    PA.preserve<DominatorTreeAnalysis>();
1379    if (!ThreadImpl.LoopInfoBroken)
1380      PA.preserve<LoopAnalysis>();
1381    return PA;
1382  }
1383