xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Transform each threading path to effectively jump thread the DFA. For
10 // example, the CFG below could be transformed as follows, where the cloned
11 // blocks unconditionally branch to the next correct case based on what is
12 // identified in the analysis.
13 //
14 //          sw.bb                        sw.bb
15 //        /   |   \                    /   |   \
16 //   case1  case2  case3          case1  case2  case3
17 //        \   |   /                 |      |      |
18 //       determinator            det.2   det.3  det.1
19 //        br sw.bb                /        |        \
20 //                          sw.bb.2     sw.bb.3     sw.bb.1
21 //                           br case2    br case3    br case1§
22 //
23 // Definitions and Terminology:
24 //
25 // * Threading path:
26 //   a list of basic blocks, the exit state, and the block that determines
27 //   the next state, for which the following notation will be used:
28 //   < path of BBs that form a cycle > [ state, determinator ]
29 //
30 // * Predictable switch:
31 //   The switch variable is always a known constant so that all conditional
32 //   jumps based on switch variable can be converted to unconditional jump.
33 //
34 // * Determinator:
35 //   The basic block that determines the next state of the DFA.
36 //
37 // Representing the optimization in C-like pseudocode: the code pattern on the
38 // left could functionally be transformed to the right pattern if the switch
39 // condition is predictable.
40 //
41 //  X = A                       goto A
42 //  for (...)                   A:
43 //    switch (X)                  ...
44 //      case A                    goto B
45 //        X = B                 B:
46 //      case B                    ...
47 //        X = C                   goto C
48 //
49 // The pass first checks that switch variable X is decided by the control flow
50 // path taken in the loop; for example, in case B, the next value of X is
51 // decided to be C. It then enumerates through all paths in the loop and labels
52 // the basic blocks where the next state is decided.
53 //
54 // Using this information it creates new paths that unconditionally branch to
55 // the next case. This involves cloning code, so it only gets triggered if the
56 // amount of code duplicated is below a threshold.
57 //
58 //===----------------------------------------------------------------------===//
59 
60 #include "llvm/Transforms/Scalar/DFAJumpThreading.h"
61 #include "llvm/ADT/APInt.h"
62 #include "llvm/ADT/DenseMap.h"
63 #include "llvm/ADT/SmallSet.h"
64 #include "llvm/ADT/Statistic.h"
65 #include "llvm/Analysis/AssumptionCache.h"
66 #include "llvm/Analysis/CodeMetrics.h"
67 #include "llvm/Analysis/DomTreeUpdater.h"
68 #include "llvm/Analysis/LoopInfo.h"
69 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
70 #include "llvm/Analysis/TargetTransformInfo.h"
71 #include "llvm/IR/CFG.h"
72 #include "llvm/IR/Constants.h"
73 #include "llvm/IR/IntrinsicInst.h"
74 #include "llvm/Support/CommandLine.h"
75 #include "llvm/Support/Debug.h"
76 #include "llvm/Transforms/Utils/Cloning.h"
77 #include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
78 #include "llvm/Transforms/Utils/ValueMapper.h"
79 #include <deque>
80 
81 #ifdef EXPENSIVE_CHECKS
82 #include "llvm/IR/Verifier.h"
83 #endif
84 
85 using namespace llvm;
86 
87 #define DEBUG_TYPE "dfa-jump-threading"
88 
89 STATISTIC(NumTransforms, "Number of transformations done");
90 STATISTIC(NumCloned, "Number of blocks cloned");
91 STATISTIC(NumPaths, "Number of individual paths threaded");
92 
93 static cl::opt<bool>
94     ClViewCfgBefore("dfa-jump-view-cfg-before",
95                     cl::desc("View the CFG before DFA Jump Threading"),
96                     cl::Hidden, cl::init(false));
97 
98 static cl::opt<bool> EarlyExitHeuristic(
99     "dfa-early-exit-heuristic",
100     cl::desc("Exit early if an unpredictable value come from the same loop"),
101     cl::Hidden, cl::init(true));
102 
103 static cl::opt<unsigned> MaxPathLength(
104     "dfa-max-path-length",
105     cl::desc("Max number of blocks searched to find a threading path"),
106     cl::Hidden, cl::init(20));
107 
108 static cl::opt<unsigned> MaxNumVisitiedPaths(
109     "dfa-max-num-visited-paths",
110     cl::desc(
111         "Max number of blocks visited while enumerating paths around a switch"),
112     cl::Hidden, cl::init(2500));
113 
114 static cl::opt<unsigned>
115     MaxNumPaths("dfa-max-num-paths",
116                 cl::desc("Max number of paths enumerated around a switch"),
117                 cl::Hidden, cl::init(200));
118 
119 static cl::opt<unsigned>
120     CostThreshold("dfa-cost-threshold",
121                   cl::desc("Maximum cost accepted for the transformation"),
122                   cl::Hidden, cl::init(50));
123 
124 namespace {
125 
126 class SelectInstToUnfold {
127   SelectInst *SI;
128   PHINode *SIUse;
129 
130 public:
SelectInstToUnfold(SelectInst * SI,PHINode * SIUse)131   SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {}
132 
getInst()133   SelectInst *getInst() { return SI; }
getUse()134   PHINode *getUse() { return SIUse; }
135 
operator bool() const136   explicit operator bool() const { return SI && SIUse; }
137 };
138 
139 void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
140             std::vector<SelectInstToUnfold> *NewSIsToUnfold,
141             std::vector<BasicBlock *> *NewBBs);
142 
143 class DFAJumpThreading {
144 public:
DFAJumpThreading(AssumptionCache * AC,DominatorTree * DT,LoopInfo * LI,TargetTransformInfo * TTI,OptimizationRemarkEmitter * ORE)145   DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI,
146                    TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE)
147       : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {}
148 
149   bool run(Function &F);
150   bool LoopInfoBroken;
151 
152 private:
153   void
unfoldSelectInstrs(DominatorTree * DT,const SmallVector<SelectInstToUnfold,4> & SelectInsts)154   unfoldSelectInstrs(DominatorTree *DT,
155                      const SmallVector<SelectInstToUnfold, 4> &SelectInsts) {
156     DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
157     SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts);
158 
159     while (!Stack.empty()) {
160       SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
161 
162       std::vector<SelectInstToUnfold> NewSIsToUnfold;
163       std::vector<BasicBlock *> NewBBs;
164       unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs);
165 
166       // Put newly discovered select instructions into the work list.
167       llvm::append_range(Stack, NewSIsToUnfold);
168     }
169   }
170 
171   AssumptionCache *AC;
172   DominatorTree *DT;
173   LoopInfo *LI;
174   TargetTransformInfo *TTI;
175   OptimizationRemarkEmitter *ORE;
176 };
177 
178 } // end anonymous namespace
179 
180 namespace {
181 
182 /// Unfold the select instruction held in \p SIToUnfold by replacing it with
183 /// control flow.
184 ///
185 /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly
186 /// created basic blocks into \p NewBBs.
187 ///
188 /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible.
unfold(DomTreeUpdater * DTU,LoopInfo * LI,SelectInstToUnfold SIToUnfold,std::vector<SelectInstToUnfold> * NewSIsToUnfold,std::vector<BasicBlock * > * NewBBs)189 void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold,
190             std::vector<SelectInstToUnfold> *NewSIsToUnfold,
191             std::vector<BasicBlock *> *NewBBs) {
192   SelectInst *SI = SIToUnfold.getInst();
193   PHINode *SIUse = SIToUnfold.getUse();
194   BasicBlock *StartBlock = SI->getParent();
195   BranchInst *StartBlockTerm =
196       dyn_cast<BranchInst>(StartBlock->getTerminator());
197 
198   assert(StartBlockTerm);
199   assert(SI->hasOneUse());
200 
201   if (StartBlockTerm->isUnconditional()) {
202     BasicBlock *EndBlock = StartBlock->getUniqueSuccessor();
203     // Arbitrarily choose the 'false' side for a new input value to the PHI.
204     BasicBlock *NewBlock = BasicBlock::Create(
205         SI->getContext(), Twine(SI->getName(), ".si.unfold.false"),
206         EndBlock->getParent(), EndBlock);
207     NewBBs->push_back(NewBlock);
208     BranchInst::Create(EndBlock, NewBlock);
209     DTU->applyUpdates({{DominatorTree::Insert, NewBlock, EndBlock}});
210 
211     // StartBlock
212     //   |  \
213     //   |  NewBlock
214     //   |  /
215     // EndBlock
216     Value *SIOp1 = SI->getTrueValue();
217     Value *SIOp2 = SI->getFalseValue();
218 
219     PHINode *NewPhi = PHINode::Create(SIUse->getType(), 1,
220                                       Twine(SIOp2->getName(), ".si.unfold.phi"),
221                                       NewBlock->getFirstInsertionPt());
222     NewPhi->addIncoming(SIOp2, StartBlock);
223 
224     // Update any other PHI nodes in EndBlock.
225     for (PHINode &Phi : EndBlock->phis()) {
226       if (SIUse == &Phi)
227         continue;
228       Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlock);
229     }
230 
231     // Update the phi node of SI, which is its only use.
232     if (EndBlock == SIUse->getParent()) {
233       SIUse->addIncoming(NewPhi, NewBlock);
234       SIUse->replaceUsesOfWith(SI, SIOp1);
235     } else {
236       PHINode *EndPhi = PHINode::Create(SIUse->getType(), pred_size(EndBlock),
237                                         Twine(SI->getName(), ".si.unfold.phi"),
238                                         EndBlock->getFirstInsertionPt());
239       for (BasicBlock *Pred : predecessors(EndBlock)) {
240         if (Pred != StartBlock && Pred != NewBlock)
241           EndPhi->addIncoming(EndPhi, Pred);
242       }
243 
244       EndPhi->addIncoming(SIOp1, StartBlock);
245       EndPhi->addIncoming(NewPhi, NewBlock);
246       SIUse->replaceUsesOfWith(SI, EndPhi);
247       SIUse = EndPhi;
248     }
249 
250     if (auto *OpSi = dyn_cast<SelectInst>(SIOp1))
251       NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, SIUse));
252     if (auto *OpSi = dyn_cast<SelectInst>(SIOp2))
253       NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, NewPhi));
254 
255     // Insert the real conditional branch based on the original condition.
256     StartBlockTerm->eraseFromParent();
257     BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock);
258     DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock},
259                        {DominatorTree::Insert, StartBlock, NewBlock}});
260   } else {
261     BasicBlock *EndBlock = SIUse->getParent();
262     BasicBlock *NewBlockT = BasicBlock::Create(
263         SI->getContext(), Twine(SI->getName(), ".si.unfold.true"),
264         EndBlock->getParent(), EndBlock);
265     BasicBlock *NewBlockF = BasicBlock::Create(
266         SI->getContext(), Twine(SI->getName(), ".si.unfold.false"),
267         EndBlock->getParent(), EndBlock);
268 
269     NewBBs->push_back(NewBlockT);
270     NewBBs->push_back(NewBlockF);
271 
272     // Def only has one use in EndBlock.
273     // Before transformation:
274     // StartBlock(Def)
275     //   |      \
276     // EndBlock  OtherBlock
277     //  (Use)
278     //
279     // After transformation:
280     // StartBlock(Def)
281     //   |      \
282     //   |       OtherBlock
283     // NewBlockT
284     //   |     \
285     //   |   NewBlockF
286     //   |      /
287     //   |     /
288     // EndBlock
289     //  (Use)
290     BranchInst::Create(EndBlock, NewBlockF);
291     // Insert the real conditional branch based on the original condition.
292     BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT);
293     DTU->applyUpdates({{DominatorTree::Insert, NewBlockT, NewBlockF},
294                        {DominatorTree::Insert, NewBlockT, EndBlock},
295                        {DominatorTree::Insert, NewBlockF, EndBlock}});
296 
297     Value *TrueVal = SI->getTrueValue();
298     Value *FalseVal = SI->getFalseValue();
299 
300     PHINode *NewPhiT = PHINode::Create(
301         SIUse->getType(), 1, Twine(TrueVal->getName(), ".si.unfold.phi"),
302         NewBlockT->getFirstInsertionPt());
303     PHINode *NewPhiF = PHINode::Create(
304         SIUse->getType(), 1, Twine(FalseVal->getName(), ".si.unfold.phi"),
305         NewBlockF->getFirstInsertionPt());
306     NewPhiT->addIncoming(TrueVal, StartBlock);
307     NewPhiF->addIncoming(FalseVal, NewBlockT);
308 
309     if (auto *TrueSI = dyn_cast<SelectInst>(TrueVal))
310       NewSIsToUnfold->push_back(SelectInstToUnfold(TrueSI, NewPhiT));
311     if (auto *FalseSi = dyn_cast<SelectInst>(FalseVal))
312       NewSIsToUnfold->push_back(SelectInstToUnfold(FalseSi, NewPhiF));
313 
314     SIUse->addIncoming(NewPhiT, NewBlockT);
315     SIUse->addIncoming(NewPhiF, NewBlockF);
316     SIUse->removeIncomingValue(StartBlock);
317 
318     // Update any other PHI nodes in EndBlock.
319     for (PHINode &Phi : EndBlock->phis()) {
320       if (SIUse == &Phi)
321         continue;
322       Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockT);
323       Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockF);
324       Phi.removeIncomingValue(StartBlock);
325     }
326 
327     // Update the appropriate successor of the start block to point to the new
328     // unfolded block.
329     unsigned SuccNum = StartBlockTerm->getSuccessor(1) == EndBlock ? 1 : 0;
330     StartBlockTerm->setSuccessor(SuccNum, NewBlockT);
331     DTU->applyUpdates({{DominatorTree::Delete, StartBlock, EndBlock},
332                        {DominatorTree::Insert, StartBlock, NewBlockT}});
333   }
334 
335   // Preserve loop info
336   if (Loop *L = LI->getLoopFor(SI->getParent())) {
337     for (BasicBlock *NewBB : *NewBBs)
338       L->addBasicBlockToLoop(NewBB, *LI);
339   }
340 
341   // The select is now dead.
342   assert(SI->use_empty() && "Select must be dead now");
343   SI->eraseFromParent();
344 }
345 
346 struct ClonedBlock {
347   BasicBlock *BB;
348   APInt State; ///< \p State corresponds to the next value of a switch stmnt.
349 };
350 
351 typedef std::deque<BasicBlock *> PathType;
352 typedef std::vector<PathType> PathsType;
353 typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
354 typedef std::vector<ClonedBlock> CloneList;
355 
356 // This data structure keeps track of all blocks that have been cloned.  If two
357 // different ThreadingPaths clone the same block for a certain state it should
358 // be reused, and it can be looked up in this map.
359 typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap;
360 
361 // This map keeps track of all the new definitions for an instruction. This
362 // information is needed when restoring SSA form after cloning blocks.
363 typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap;
364 
operator <<(raw_ostream & OS,const PathType & Path)365 inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) {
366   OS << "< ";
367   for (const BasicBlock *BB : Path) {
368     std::string BBName;
369     if (BB->hasName())
370       raw_string_ostream(BBName) << BB->getName();
371     else
372       raw_string_ostream(BBName) << BB;
373     OS << BBName << " ";
374   }
375   OS << ">";
376   return OS;
377 }
378 
379 /// ThreadingPath is a path in the control flow of a loop that can be threaded
380 /// by cloning necessary basic blocks and replacing conditional branches with
381 /// unconditional ones. A threading path includes a list of basic blocks, the
382 /// exit state, and the block that determines the next state.
383 struct ThreadingPath {
384   /// Exit value is DFA's exit state for the given path.
getExitValue__anonfb50cc300211::ThreadingPath385   APInt getExitValue() const { return ExitVal; }
setExitValue__anonfb50cc300211::ThreadingPath386   void setExitValue(const ConstantInt *V) {
387     ExitVal = V->getValue();
388     IsExitValSet = true;
389   }
isExitValueSet__anonfb50cc300211::ThreadingPath390   bool isExitValueSet() const { return IsExitValSet; }
391 
392   /// Determinator is the basic block that determines the next state of the DFA.
getDeterminatorBB__anonfb50cc300211::ThreadingPath393   const BasicBlock *getDeterminatorBB() const { return DBB; }
setDeterminator__anonfb50cc300211::ThreadingPath394   void setDeterminator(const BasicBlock *BB) { DBB = BB; }
395 
396   /// Path is a list of basic blocks.
getPath__anonfb50cc300211::ThreadingPath397   const PathType &getPath() const { return Path; }
setPath__anonfb50cc300211::ThreadingPath398   void setPath(const PathType &NewPath) { Path = NewPath; }
push_back__anonfb50cc300211::ThreadingPath399   void push_back(BasicBlock *BB) { Path.push_back(BB); }
push_front__anonfb50cc300211::ThreadingPath400   void push_front(BasicBlock *BB) { Path.push_front(BB); }
appendExcludingFirst__anonfb50cc300211::ThreadingPath401   void appendExcludingFirst(const PathType &OtherPath) {
402     llvm::append_range(Path, llvm::drop_begin(OtherPath));
403   }
404 
print__anonfb50cc300211::ThreadingPath405   void print(raw_ostream &OS) const {
406     OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]";
407   }
408 
409 private:
410   PathType Path;
411   APInt ExitVal;
412   const BasicBlock *DBB = nullptr;
413   bool IsExitValSet = false;
414 };
415 
416 #ifndef NDEBUG
operator <<(raw_ostream & OS,const ThreadingPath & TPath)417 inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) {
418   TPath.print(OS);
419   return OS;
420 }
421 #endif
422 
423 struct MainSwitch {
MainSwitch__anonfb50cc300211::MainSwitch424   MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE)
425       : LI(LI) {
426     if (isCandidate(SI)) {
427       Instr = SI;
428     } else {
429       ORE->emit([&]() {
430         return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI)
431                << "Switch instruction is not predictable.";
432       });
433     }
434   }
435 
436   virtual ~MainSwitch() = default;
437 
getInstr__anonfb50cc300211::MainSwitch438   SwitchInst *getInstr() const { return Instr; }
getSelectInsts__anonfb50cc300211::MainSwitch439   const SmallVector<SelectInstToUnfold, 4> getSelectInsts() {
440     return SelectInsts;
441   }
442 
443 private:
444   /// Do a use-def chain traversal starting from the switch condition to see if
445   /// \p SI is a potential condidate.
446   ///
447   /// Also, collect select instructions to unfold.
isCandidate__anonfb50cc300211::MainSwitch448   bool isCandidate(const SwitchInst *SI) {
449     std::deque<std::pair<Value *, BasicBlock *>> Q;
450     SmallSet<Value *, 16> SeenValues;
451     SelectInsts.clear();
452 
453     Value *SICond = SI->getCondition();
454     LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n");
455     if (!isa<PHINode>(SICond))
456       return false;
457 
458     // The switch must be in a loop.
459     const Loop *L = LI->getLoopFor(SI->getParent());
460     if (!L)
461       return false;
462 
463     addToQueue(SICond, nullptr, Q, SeenValues);
464 
465     while (!Q.empty()) {
466       Value *Current = Q.front().first;
467       BasicBlock *CurrentIncomingBB = Q.front().second;
468       Q.pop_front();
469 
470       if (auto *Phi = dyn_cast<PHINode>(Current)) {
471         for (BasicBlock *IncomingBB : Phi->blocks()) {
472           Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB);
473           addToQueue(Incoming, IncomingBB, Q, SeenValues);
474         }
475         LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n");
476       } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) {
477         if (!isValidSelectInst(SelI))
478           return false;
479         addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues);
480         addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues);
481         LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n");
482         if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back()))
483           SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse));
484       } else if (isa<Constant>(Current)) {
485         LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n");
486         continue;
487       } else {
488         LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n");
489         // Allow unpredictable values. The hope is that those will be the
490         // initial switch values that can be ignored (they will hit the
491         // unthreaded switch) but this assumption will get checked later after
492         // paths have been enumerated (in function getStateDefMap).
493 
494         // If the unpredictable value comes from the same inner loop it is
495         // likely that it will also be on the enumerated paths, causing us to
496         // exit after we have enumerated all the paths. This heuristic save
497         // compile time because a search for all the paths can become expensive.
498         if (EarlyExitHeuristic &&
499             L->contains(LI->getLoopFor(CurrentIncomingBB))) {
500           LLVM_DEBUG(dbgs()
501                      << "\tExiting early due to unpredictability heuristic.\n");
502           return false;
503         }
504 
505         continue;
506       }
507     }
508 
509     return true;
510   }
511 
addToQueue__anonfb50cc300211::MainSwitch512   void addToQueue(Value *Val, BasicBlock *BB,
513                   std::deque<std::pair<Value *, BasicBlock *>> &Q,
514                   SmallSet<Value *, 16> &SeenValues) {
515     if (SeenValues.insert(Val).second)
516       Q.push_back({Val, BB});
517   }
518 
isValidSelectInst__anonfb50cc300211::MainSwitch519   bool isValidSelectInst(SelectInst *SI) {
520     if (!SI->hasOneUse())
521       return false;
522 
523     Instruction *SIUse = dyn_cast<Instruction>(SI->user_back());
524     // The use of the select inst should be either a phi or another select.
525     if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse)))
526       return false;
527 
528     BasicBlock *SIBB = SI->getParent();
529 
530     // Currently, we can only expand select instructions in basic blocks with
531     // one successor.
532     BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator());
533     if (!SITerm || !SITerm->isUnconditional())
534       return false;
535 
536     // Only fold the select coming from directly where it is defined.
537     PHINode *PHIUser = dyn_cast<PHINode>(SIUse);
538     if (PHIUser && PHIUser->getIncomingBlock(*SI->use_begin()) != SIBB)
539       return false;
540 
541     // If select will not be sunk during unfolding, and it is in the same basic
542     // block as another state defining select, then cannot unfold both.
543     for (SelectInstToUnfold SIToUnfold : SelectInsts) {
544       SelectInst *PrevSI = SIToUnfold.getInst();
545       if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI &&
546           PrevSI->getParent() == SI->getParent())
547         return false;
548     }
549 
550     return true;
551   }
552 
553   LoopInfo *LI;
554   SwitchInst *Instr = nullptr;
555   SmallVector<SelectInstToUnfold, 4> SelectInsts;
556 };
557 
558 struct AllSwitchPaths {
AllSwitchPaths__anonfb50cc300211::AllSwitchPaths559   AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE,
560                  LoopInfo *LI, Loop *L)
561       : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE),
562         LI(LI), SwitchOuterLoop(L) {}
563 
getThreadingPaths__anonfb50cc300211::AllSwitchPaths564   std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; }
getNumThreadingPaths__anonfb50cc300211::AllSwitchPaths565   unsigned getNumThreadingPaths() { return TPaths.size(); }
getSwitchInst__anonfb50cc300211::AllSwitchPaths566   SwitchInst *getSwitchInst() { return Switch; }
getSwitchBlock__anonfb50cc300211::AllSwitchPaths567   BasicBlock *getSwitchBlock() { return SwitchBlock; }
568 
run__anonfb50cc300211::AllSwitchPaths569   void run() {
570     StateDefMap StateDef = getStateDefMap();
571     if (StateDef.empty()) {
572       ORE->emit([&]() {
573         return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable",
574                                         Switch)
575                << "Switch instruction is not predictable.";
576       });
577       return;
578     }
579 
580     auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0));
581     auto *SwitchPhiDefBB = SwitchPhi->getParent();
582     VisitedBlocks VB;
583     // Get paths from the determinator BBs to SwitchPhiDefBB
584     std::vector<ThreadingPath> PathsToPhiDef =
585         getPathsFromStateDefMap(StateDef, SwitchPhi, VB);
586     if (SwitchPhiDefBB == SwitchBlock) {
587       TPaths = std::move(PathsToPhiDef);
588       return;
589     }
590 
591     // Find and append paths from SwitchPhiDefBB to SwitchBlock.
592     PathsType PathsToSwitchBB =
593         paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1);
594     if (PathsToSwitchBB.empty())
595       return;
596 
597     std::vector<ThreadingPath> TempList;
598     for (const ThreadingPath &Path : PathsToPhiDef) {
599       for (const PathType &PathToSw : PathsToSwitchBB) {
600         ThreadingPath PathCopy(Path);
601         PathCopy.appendExcludingFirst(PathToSw);
602         TempList.push_back(PathCopy);
603       }
604     }
605     TPaths = std::move(TempList);
606   }
607 
608 private:
609   // Value: an instruction that defines a switch state;
610   // Key: the parent basic block of that instruction.
611   typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap;
getPathsFromStateDefMap__anonfb50cc300211::AllSwitchPaths612   std::vector<ThreadingPath> getPathsFromStateDefMap(StateDefMap &StateDef,
613                                                      PHINode *Phi,
614                                                      VisitedBlocks &VB) {
615     std::vector<ThreadingPath> Res;
616     auto *PhiBB = Phi->getParent();
617     VB.insert(PhiBB);
618 
619     VisitedBlocks UniqueBlocks;
620     for (auto *IncomingBB : Phi->blocks()) {
621       if (!UniqueBlocks.insert(IncomingBB).second)
622         continue;
623       if (!SwitchOuterLoop->contains(IncomingBB))
624         continue;
625 
626       Value *IncomingValue = Phi->getIncomingValueForBlock(IncomingBB);
627       // We found the determinator. This is the start of our path.
628       if (auto *C = dyn_cast<ConstantInt>(IncomingValue)) {
629         // SwitchBlock is the determinator, unsupported unless its also the def.
630         if (PhiBB == SwitchBlock &&
631             SwitchBlock != cast<PHINode>(Switch->getOperand(0))->getParent())
632           continue;
633         ThreadingPath NewPath;
634         NewPath.setDeterminator(PhiBB);
635         NewPath.setExitValue(C);
636         // Don't add SwitchBlock at the start, this is handled later.
637         if (IncomingBB != SwitchBlock)
638           NewPath.push_back(IncomingBB);
639         NewPath.push_back(PhiBB);
640         Res.push_back(NewPath);
641         continue;
642       }
643       // Don't get into a cycle.
644       if (VB.contains(IncomingBB) || IncomingBB == SwitchBlock)
645         continue;
646       // Recurse up the PHI chain.
647       auto *IncomingPhi = dyn_cast<PHINode>(IncomingValue);
648       if (!IncomingPhi)
649         continue;
650       auto *IncomingPhiDefBB = IncomingPhi->getParent();
651       if (!StateDef.contains(IncomingPhiDefBB))
652         continue;
653 
654       // Direct predecessor, just add to the path.
655       if (IncomingPhiDefBB == IncomingBB) {
656         std::vector<ThreadingPath> PredPaths =
657             getPathsFromStateDefMap(StateDef, IncomingPhi, VB);
658         for (ThreadingPath &Path : PredPaths) {
659           Path.push_back(PhiBB);
660           Res.push_back(std::move(Path));
661         }
662         continue;
663       }
664       // Not a direct predecessor, find intermediate paths to append to the
665       // existing path.
666       if (VB.contains(IncomingPhiDefBB))
667         continue;
668 
669       PathsType IntermediatePaths;
670       IntermediatePaths =
671           paths(IncomingPhiDefBB, IncomingBB, VB, /* PathDepth = */ 1);
672       if (IntermediatePaths.empty())
673         continue;
674 
675       std::vector<ThreadingPath> PredPaths =
676           getPathsFromStateDefMap(StateDef, IncomingPhi, VB);
677       for (const ThreadingPath &Path : PredPaths) {
678         for (const PathType &IPath : IntermediatePaths) {
679           ThreadingPath NewPath(Path);
680           NewPath.appendExcludingFirst(IPath);
681           NewPath.push_back(PhiBB);
682           Res.push_back(NewPath);
683         }
684       }
685     }
686     VB.erase(PhiBB);
687     return Res;
688   }
689 
paths__anonfb50cc300211::AllSwitchPaths690   PathsType paths(BasicBlock *BB, BasicBlock *ToBB, VisitedBlocks &Visited,
691                   unsigned PathDepth) {
692     PathsType Res;
693 
694     // Stop exploring paths after visiting MaxPathLength blocks
695     if (PathDepth > MaxPathLength) {
696       ORE->emit([&]() {
697         return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached",
698                                           Switch)
699                << "Exploration stopped after visiting MaxPathLength="
700                << ore::NV("MaxPathLength", MaxPathLength) << " blocks.";
701       });
702       return Res;
703     }
704 
705     Visited.insert(BB);
706     if (++NumVisited > MaxNumVisitiedPaths)
707       return Res;
708 
709     // Stop if we have reached the BB out of loop, since its successors have no
710     // impact on the DFA.
711     if (!SwitchOuterLoop->contains(BB))
712       return Res;
713 
714     // Some blocks have multiple edges to the same successor, and this set
715     // is used to prevent a duplicate path from being generated
716     SmallSet<BasicBlock *, 4> Successors;
717     for (BasicBlock *Succ : successors(BB)) {
718       if (!Successors.insert(Succ).second)
719         continue;
720 
721       // Found a cycle through the final block.
722       if (Succ == ToBB) {
723         Res.push_back({BB, ToBB});
724         continue;
725       }
726 
727       // We have encountered a cycle, do not get caught in it
728       if (Visited.contains(Succ))
729         continue;
730 
731       auto *CurrLoop = LI->getLoopFor(BB);
732       // Unlikely to be beneficial.
733       if (Succ == CurrLoop->getHeader())
734         continue;
735       // Skip for now, revisit this condition later to see the impact on
736       // coverage and compile time.
737       if (LI->getLoopFor(Succ) != CurrLoop)
738         continue;
739 
740       PathsType SuccPaths = paths(Succ, ToBB, Visited, PathDepth + 1);
741       for (PathType &Path : SuccPaths) {
742         Path.push_front(BB);
743         Res.push_back(Path);
744         if (Res.size() >= MaxNumPaths) {
745           return Res;
746         }
747       }
748     }
749     // This block could now be visited again from a different predecessor. Note
750     // that this will result in exponential runtime. Subpaths could possibly be
751     // cached but it takes a lot of memory to store them.
752     Visited.erase(BB);
753     return Res;
754   }
755 
756   /// Walk the use-def chain and collect all the state-defining blocks and the
757   /// PHI nodes in those blocks that define the state.
getStateDefMap__anonfb50cc300211::AllSwitchPaths758   StateDefMap getStateDefMap() const {
759     StateDefMap Res;
760     PHINode *FirstDef = dyn_cast<PHINode>(Switch->getOperand(0));
761     assert(FirstDef && "The first definition must be a phi.");
762 
763     SmallVector<PHINode *, 8> Stack;
764     Stack.push_back(FirstDef);
765     SmallSet<Value *, 16> SeenValues;
766 
767     while (!Stack.empty()) {
768       PHINode *CurPhi = Stack.pop_back_val();
769 
770       Res[CurPhi->getParent()] = CurPhi;
771       SeenValues.insert(CurPhi);
772 
773       for (BasicBlock *IncomingBB : CurPhi->blocks()) {
774         PHINode *IncomingPhi =
775             dyn_cast<PHINode>(CurPhi->getIncomingValueForBlock(IncomingBB));
776         if (!IncomingPhi)
777           continue;
778         bool IsOutsideLoops = !SwitchOuterLoop->contains(IncomingBB);
779         if (SeenValues.contains(IncomingPhi) || IsOutsideLoops)
780           continue;
781 
782         Stack.push_back(IncomingPhi);
783       }
784     }
785 
786     return Res;
787   }
788 
789   unsigned NumVisited = 0;
790   SwitchInst *Switch;
791   BasicBlock *SwitchBlock;
792   OptimizationRemarkEmitter *ORE;
793   std::vector<ThreadingPath> TPaths;
794   LoopInfo *LI;
795   Loop *SwitchOuterLoop;
796 };
797 
798 struct TransformDFA {
TransformDFA__anonfb50cc300211::TransformDFA799   TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT,
800                AssumptionCache *AC, TargetTransformInfo *TTI,
801                OptimizationRemarkEmitter *ORE,
802                SmallPtrSet<const Value *, 32> EphValues)
803       : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE),
804         EphValues(EphValues) {}
805 
run__anonfb50cc300211::TransformDFA806   void run() {
807     if (isLegalAndProfitableToTransform()) {
808       createAllExitPaths();
809       NumTransforms++;
810     }
811   }
812 
813 private:
814   /// This function performs both a legality check and profitability check at
815   /// the same time since it is convenient to do so. It iterates through all
816   /// blocks that will be cloned, and keeps track of the duplication cost. It
817   /// also returns false if it is illegal to clone some required block.
isLegalAndProfitableToTransform__anonfb50cc300211::TransformDFA818   bool isLegalAndProfitableToTransform() {
819     CodeMetrics Metrics;
820     SwitchInst *Switch = SwitchPaths->getSwitchInst();
821 
822     // Don't thread switch without multiple successors.
823     if (Switch->getNumSuccessors() <= 1)
824       return false;
825 
826     // Note that DuplicateBlockMap is not being used as intended here. It is
827     // just being used to ensure (BB, State) pairs are only counted once.
828     DuplicateBlockMap DuplicateMap;
829 
830     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
831       PathType PathBBs = TPath.getPath();
832       APInt NextState = TPath.getExitValue();
833       const BasicBlock *Determinator = TPath.getDeterminatorBB();
834 
835       // Update Metrics for the Switch block, this is always cloned
836       BasicBlock *BB = SwitchPaths->getSwitchBlock();
837       BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
838       if (!VisitedBB) {
839         Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
840         DuplicateMap[BB].push_back({BB, NextState});
841       }
842 
843       // If the Switch block is the Determinator, then we can continue since
844       // this is the only block that is cloned and we already counted for it.
845       if (PathBBs.front() == Determinator)
846         continue;
847 
848       // Otherwise update Metrics for all blocks that will be cloned. If any
849       // block is already cloned and would be reused, don't double count it.
850       auto DetIt = llvm::find(PathBBs, Determinator);
851       for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
852         BB = *BBIt;
853         VisitedBB = getClonedBB(BB, NextState, DuplicateMap);
854         if (VisitedBB)
855           continue;
856         Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
857         DuplicateMap[BB].push_back({BB, NextState});
858       }
859 
860       if (Metrics.notDuplicatable) {
861         LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
862                           << "non-duplicatable instructions.\n");
863         ORE->emit([&]() {
864           return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst",
865                                           Switch)
866                  << "Contains non-duplicatable instructions.";
867         });
868         return false;
869       }
870 
871       // FIXME: Allow jump threading with controlled convergence.
872       if (Metrics.Convergence != ConvergenceKind::None) {
873         LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
874                           << "convergent instructions.\n");
875         ORE->emit([&]() {
876           return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
877                  << "Contains convergent instructions.";
878         });
879         return false;
880       }
881 
882       if (!Metrics.NumInsts.isValid()) {
883         LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains "
884                           << "instructions with invalid cost.\n");
885         ORE->emit([&]() {
886           return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch)
887                  << "Contains instructions with invalid cost.";
888         });
889         return false;
890       }
891     }
892 
893     InstructionCost DuplicationCost = 0;
894 
895     unsigned JumpTableSize = 0;
896     TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr,
897                                           nullptr);
898     if (JumpTableSize == 0) {
899       // Factor in the number of conditional branches reduced from jump
900       // threading. Assume that lowering the switch block is implemented by
901       // using binary search, hence the LogBase2().
902       unsigned CondBranches =
903           APInt(32, Switch->getNumSuccessors()).ceilLogBase2();
904       assert(CondBranches > 0 &&
905              "The threaded switch must have multiple branches");
906       DuplicationCost = Metrics.NumInsts / CondBranches;
907     } else {
908       // Compared with jump tables, the DFA optimizer removes an indirect branch
909       // on each loop iteration, thus making branch prediction more precise. The
910       // more branch targets there are, the more likely it is for the branch
911       // predictor to make a mistake, and the more benefit there is in the DFA
912       // optimizer. Thus, the more branch targets there are, the lower is the
913       // cost of the DFA opt.
914       DuplicationCost = Metrics.NumInsts / JumpTableSize;
915     }
916 
917     LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block "
918                       << SwitchPaths->getSwitchBlock()->getName()
919                       << " is: " << DuplicationCost << "\n\n");
920 
921     if (DuplicationCost > CostThreshold) {
922       LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the "
923                         << "cost threshold.\n");
924       ORE->emit([&]() {
925         return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch)
926                << "Duplication cost exceeds the cost threshold (cost="
927                << ore::NV("Cost", DuplicationCost)
928                << ", threshold=" << ore::NV("Threshold", CostThreshold) << ").";
929       });
930       return false;
931     }
932 
933     ORE->emit([&]() {
934       return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch)
935              << "Switch statement jump-threaded.";
936     });
937 
938     return true;
939   }
940 
941   /// Transform each threading path to effectively jump thread the DFA.
createAllExitPaths__anonfb50cc300211::TransformDFA942   void createAllExitPaths() {
943     DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager);
944 
945     // Move the switch block to the end of the path, since it will be duplicated
946     BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock();
947     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
948       LLVM_DEBUG(dbgs() << TPath << "\n");
949       // TODO: Fix exit path creation logic so that we dont need this
950       // placeholder.
951       TPath.push_front(SwitchBlock);
952     }
953 
954     // Transform the ThreadingPaths and keep track of the cloned values
955     DuplicateBlockMap DuplicateMap;
956     DefMap NewDefs;
957 
958     SmallSet<BasicBlock *, 16> BlocksToClean;
959     BlocksToClean.insert_range(successors(SwitchBlock));
960 
961     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) {
962       createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU);
963       NumPaths++;
964     }
965 
966     // After all paths are cloned, now update the last successor of the cloned
967     // path so it skips over the switch statement
968     for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths())
969       updateLastSuccessor(TPath, DuplicateMap, &DTU);
970 
971     // For each instruction that was cloned and used outside, update its uses
972     updateSSA(NewDefs);
973 
974     // Clean PHI Nodes for the newly created blocks
975     for (BasicBlock *BB : BlocksToClean)
976       cleanPhiNodes(BB);
977   }
978 
979   /// For a specific ThreadingPath \p Path, create an exit path starting from
980   /// the determinator block.
981   ///
982   /// To remember the correct destination, we have to duplicate blocks
983   /// corresponding to each state. Also update the terminating instruction of
984   /// the predecessors, and phis in the successor blocks.
createExitPath__anonfb50cc300211::TransformDFA985   void createExitPath(DefMap &NewDefs, ThreadingPath &Path,
986                       DuplicateBlockMap &DuplicateMap,
987                       SmallSet<BasicBlock *, 16> &BlocksToClean,
988                       DomTreeUpdater *DTU) {
989     APInt NextState = Path.getExitValue();
990     const BasicBlock *Determinator = Path.getDeterminatorBB();
991     PathType PathBBs = Path.getPath();
992 
993     // Don't select the placeholder block in front
994     if (PathBBs.front() == Determinator)
995       PathBBs.pop_front();
996 
997     auto DetIt = llvm::find(PathBBs, Determinator);
998     // When there is only one BB in PathBBs, the determinator takes itself as a
999     // direct predecessor.
1000     BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(DetIt);
1001     for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) {
1002       BasicBlock *BB = *BBIt;
1003       BlocksToClean.insert(BB);
1004 
1005       // We already cloned BB for this NextState, now just update the branch
1006       // and continue.
1007       BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap);
1008       if (NextBB) {
1009         updatePredecessor(PrevBB, BB, NextBB, DTU);
1010         PrevBB = NextBB;
1011         continue;
1012       }
1013 
1014       // Clone the BB and update the successor of Prev to jump to the new block
1015       BasicBlock *NewBB = cloneBlockAndUpdatePredecessor(
1016           BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU);
1017       DuplicateMap[BB].push_back({NewBB, NextState});
1018       BlocksToClean.insert(NewBB);
1019       PrevBB = NewBB;
1020     }
1021   }
1022 
1023   /// Restore SSA form after cloning blocks.
1024   ///
1025   /// Each cloned block creates new defs for a variable, and the uses need to be
1026   /// updated to reflect this. The uses may be replaced with a cloned value, or
1027   /// some derived phi instruction. Note that all uses of a value defined in the
1028   /// same block were already remapped when cloning the block.
updateSSA__anonfb50cc300211::TransformDFA1029   void updateSSA(DefMap &NewDefs) {
1030     SSAUpdaterBulk SSAUpdate;
1031     SmallVector<Use *, 16> UsesToRename;
1032 
1033     for (const auto &KV : NewDefs) {
1034       Instruction *I = KV.first;
1035       BasicBlock *BB = I->getParent();
1036       std::vector<Instruction *> Cloned = KV.second;
1037 
1038       // Scan all uses of this instruction to see if it is used outside of its
1039       // block, and if so, record them in UsesToRename.
1040       for (Use &U : I->uses()) {
1041         Instruction *User = cast<Instruction>(U.getUser());
1042         if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1043           if (UserPN->getIncomingBlock(U) == BB)
1044             continue;
1045         } else if (User->getParent() == BB) {
1046           continue;
1047         }
1048 
1049         UsesToRename.push_back(&U);
1050       }
1051 
1052       // If there are no uses outside the block, we're done with this
1053       // instruction.
1054       if (UsesToRename.empty())
1055         continue;
1056       LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I
1057                         << "\n");
1058 
1059       // We found a use of I outside of BB.  Rename all uses of I that are
1060       // outside its block to be uses of the appropriate PHI node etc.  See
1061       // ValuesInBlocks with the values we know.
1062       unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType());
1063       SSAUpdate.AddAvailableValue(VarNum, BB, I);
1064       for (Instruction *New : Cloned)
1065         SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New);
1066 
1067       while (!UsesToRename.empty())
1068         SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val());
1069 
1070       LLVM_DEBUG(dbgs() << "\n");
1071     }
1072     // SSAUpdater handles phi placement and renaming uses with the appropriate
1073     // value.
1074     SSAUpdate.RewriteAllUses(DT);
1075   }
1076 
1077   /// Clones a basic block, and adds it to the CFG.
1078   ///
1079   /// This function also includes updating phi nodes in the successors of the
1080   /// BB, and remapping uses that were defined locally in the cloned BB.
cloneBlockAndUpdatePredecessor__anonfb50cc300211::TransformDFA1081   BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB,
1082                                              const APInt &NextState,
1083                                              DuplicateBlockMap &DuplicateMap,
1084                                              DefMap &NewDefs,
1085                                              DomTreeUpdater *DTU) {
1086     ValueToValueMapTy VMap;
1087     BasicBlock *NewBB = CloneBasicBlock(
1088         BB, VMap, ".jt" + std::to_string(NextState.getLimitedValue()),
1089         BB->getParent());
1090     NewBB->moveAfter(BB);
1091     NumCloned++;
1092 
1093     for (Instruction &I : *NewBB) {
1094       // Do not remap operands of PHINode in case a definition in BB is an
1095       // incoming value to a phi in the same block. This incoming value will
1096       // be renamed later while restoring SSA.
1097       if (isa<PHINode>(&I))
1098         continue;
1099       RemapInstruction(&I, VMap,
1100                        RF_IgnoreMissingLocals | RF_NoModuleLevelChanges);
1101       if (AssumeInst *II = dyn_cast<AssumeInst>(&I))
1102         AC->registerAssumption(II);
1103     }
1104 
1105     updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap);
1106     updatePredecessor(PrevBB, BB, NewBB, DTU);
1107     updateDefMap(NewDefs, VMap);
1108 
1109     // Add all successors to the DominatorTree
1110     SmallPtrSet<BasicBlock *, 4> SuccSet;
1111     for (auto *SuccBB : successors(NewBB)) {
1112       if (SuccSet.insert(SuccBB).second)
1113         DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}});
1114     }
1115     SuccSet.clear();
1116     return NewBB;
1117   }
1118 
1119   /// Update the phi nodes in BB's successors.
1120   ///
1121   /// This means creating a new incoming value from NewBB with the new
1122   /// instruction wherever there is an incoming value from BB.
updateSuccessorPhis__anonfb50cc300211::TransformDFA1123   void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB,
1124                            const APInt &NextState, ValueToValueMapTy &VMap,
1125                            DuplicateBlockMap &DuplicateMap) {
1126     std::vector<BasicBlock *> BlocksToUpdate;
1127 
1128     // If BB is the last block in the path, we can simply update the one case
1129     // successor that will be reached.
1130     if (BB == SwitchPaths->getSwitchBlock()) {
1131       SwitchInst *Switch = SwitchPaths->getSwitchInst();
1132       BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1133       BlocksToUpdate.push_back(NextCase);
1134       BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap);
1135       if (ClonedSucc)
1136         BlocksToUpdate.push_back(ClonedSucc);
1137     }
1138     // Otherwise update phis in all successors.
1139     else {
1140       for (BasicBlock *Succ : successors(BB)) {
1141         BlocksToUpdate.push_back(Succ);
1142 
1143         // Check if a successor has already been cloned for the particular exit
1144         // value. In this case if a successor was already cloned, the phi nodes
1145         // in the cloned block should be updated directly.
1146         BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap);
1147         if (ClonedSucc)
1148           BlocksToUpdate.push_back(ClonedSucc);
1149       }
1150     }
1151 
1152     // If there is a phi with an incoming value from BB, create a new incoming
1153     // value for the new predecessor ClonedBB. The value will either be the same
1154     // value from BB or a cloned value.
1155     for (BasicBlock *Succ : BlocksToUpdate) {
1156       for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II);
1157            ++II) {
1158         Value *Incoming = Phi->getIncomingValueForBlock(BB);
1159         if (Incoming) {
1160           if (isa<Constant>(Incoming)) {
1161             Phi->addIncoming(Incoming, ClonedBB);
1162             continue;
1163           }
1164           Value *ClonedVal = VMap[Incoming];
1165           if (ClonedVal)
1166             Phi->addIncoming(ClonedVal, ClonedBB);
1167           else
1168             Phi->addIncoming(Incoming, ClonedBB);
1169         }
1170       }
1171     }
1172   }
1173 
1174   /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all
1175   /// other successors are kept as well.
updatePredecessor__anonfb50cc300211::TransformDFA1176   void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB,
1177                          BasicBlock *NewBB, DomTreeUpdater *DTU) {
1178     // When a path is reused, there is a chance that predecessors were already
1179     // updated before. Check if the predecessor needs to be updated first.
1180     if (!isPredecessor(OldBB, PrevBB))
1181       return;
1182 
1183     Instruction *PrevTerm = PrevBB->getTerminator();
1184     for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) {
1185       if (PrevTerm->getSuccessor(Idx) == OldBB) {
1186         OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true);
1187         PrevTerm->setSuccessor(Idx, NewBB);
1188       }
1189     }
1190     DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB},
1191                        {DominatorTree::Insert, PrevBB, NewBB}});
1192   }
1193 
1194   /// Add new value mappings to the DefMap to keep track of all new definitions
1195   /// for a particular instruction. These will be used while updating SSA form.
updateDefMap__anonfb50cc300211::TransformDFA1196   void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) {
1197     SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector;
1198     NewDefsVector.reserve(VMap.size());
1199 
1200     for (auto Entry : VMap) {
1201       Instruction *Inst =
1202           dyn_cast<Instruction>(const_cast<Value *>(Entry.first));
1203       if (!Inst || !Entry.second || isa<BranchInst>(Inst) ||
1204           isa<SwitchInst>(Inst)) {
1205         continue;
1206       }
1207 
1208       Instruction *Cloned = dyn_cast<Instruction>(Entry.second);
1209       if (!Cloned)
1210         continue;
1211 
1212       NewDefsVector.push_back({Inst, Cloned});
1213     }
1214 
1215     // Sort the defs to get deterministic insertion order into NewDefs.
1216     sort(NewDefsVector, [](const auto &LHS, const auto &RHS) {
1217       if (LHS.first == RHS.first)
1218         return LHS.second->comesBefore(RHS.second);
1219       return LHS.first->comesBefore(RHS.first);
1220     });
1221 
1222     for (const auto &KV : NewDefsVector)
1223       NewDefs[KV.first].push_back(KV.second);
1224   }
1225 
1226   /// Update the last branch of a particular cloned path to point to the correct
1227   /// case successor.
1228   ///
1229   /// Note that this is an optional step and would have been done in later
1230   /// optimizations, but it makes the CFG significantly easier to work with.
updateLastSuccessor__anonfb50cc300211::TransformDFA1231   void updateLastSuccessor(ThreadingPath &TPath,
1232                            DuplicateBlockMap &DuplicateMap,
1233                            DomTreeUpdater *DTU) {
1234     APInt NextState = TPath.getExitValue();
1235     BasicBlock *BB = TPath.getPath().back();
1236     BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap);
1237 
1238     // Note multiple paths can end at the same block so check that it is not
1239     // updated yet
1240     if (!isa<SwitchInst>(LastBlock->getTerminator()))
1241       return;
1242     SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator());
1243     BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState);
1244 
1245     std::vector<DominatorTree::UpdateType> DTUpdates;
1246     SmallPtrSet<BasicBlock *, 4> SuccSet;
1247     for (BasicBlock *Succ : successors(LastBlock)) {
1248       if (Succ != NextCase && SuccSet.insert(Succ).second)
1249         DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ});
1250     }
1251 
1252     Switch->eraseFromParent();
1253     BranchInst::Create(NextCase, LastBlock);
1254 
1255     DTU->applyUpdates(DTUpdates);
1256   }
1257 
1258   /// After cloning blocks, some of the phi nodes have extra incoming values
1259   /// that are no longer used. This function removes them.
cleanPhiNodes__anonfb50cc300211::TransformDFA1260   void cleanPhiNodes(BasicBlock *BB) {
1261     // If BB is no longer reachable, remove any remaining phi nodes
1262     if (pred_empty(BB)) {
1263       std::vector<PHINode *> PhiToRemove;
1264       for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
1265         PhiToRemove.push_back(Phi);
1266       }
1267       for (PHINode *PN : PhiToRemove) {
1268         PN->replaceAllUsesWith(PoisonValue::get(PN->getType()));
1269         PN->eraseFromParent();
1270       }
1271       return;
1272     }
1273 
1274     // Remove any incoming values that come from an invalid predecessor
1275     for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) {
1276       std::vector<BasicBlock *> BlocksToRemove;
1277       for (BasicBlock *IncomingBB : Phi->blocks()) {
1278         if (!isPredecessor(BB, IncomingBB))
1279           BlocksToRemove.push_back(IncomingBB);
1280       }
1281       for (BasicBlock *BB : BlocksToRemove)
1282         Phi->removeIncomingValue(BB);
1283     }
1284   }
1285 
1286   /// Checks if BB was already cloned for a particular next state value. If it
1287   /// was then it returns this cloned block, and otherwise null.
getClonedBB__anonfb50cc300211::TransformDFA1288   BasicBlock *getClonedBB(BasicBlock *BB, const APInt &NextState,
1289                           DuplicateBlockMap &DuplicateMap) {
1290     CloneList ClonedBBs = DuplicateMap[BB];
1291 
1292     // Find an entry in the CloneList with this NextState. If it exists then
1293     // return the corresponding BB
1294     auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) {
1295       return C.State == NextState;
1296     });
1297     return It != ClonedBBs.end() ? (*It).BB : nullptr;
1298   }
1299 
1300   /// Helper to get the successor corresponding to a particular case value for
1301   /// a switch statement.
getNextCaseSuccessor__anonfb50cc300211::TransformDFA1302   BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) {
1303     BasicBlock *NextCase = nullptr;
1304     for (auto Case : Switch->cases()) {
1305       if (Case.getCaseValue()->getValue() == NextState) {
1306         NextCase = Case.getCaseSuccessor();
1307         break;
1308       }
1309     }
1310     if (!NextCase)
1311       NextCase = Switch->getDefaultDest();
1312     return NextCase;
1313   }
1314 
1315   /// Returns true if IncomingBB is a predecessor of BB.
isPredecessor__anonfb50cc300211::TransformDFA1316   bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) {
1317     return llvm::is_contained(predecessors(BB), IncomingBB);
1318   }
1319 
1320   AllSwitchPaths *SwitchPaths;
1321   DominatorTree *DT;
1322   AssumptionCache *AC;
1323   TargetTransformInfo *TTI;
1324   OptimizationRemarkEmitter *ORE;
1325   SmallPtrSet<const Value *, 32> EphValues;
1326   std::vector<ThreadingPath> TPaths;
1327 };
1328 
run(Function & F)1329 bool DFAJumpThreading::run(Function &F) {
1330   LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n");
1331 
1332   if (F.hasOptSize()) {
1333     LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n");
1334     return false;
1335   }
1336 
1337   if (ClViewCfgBefore)
1338     F.viewCFG();
1339 
1340   SmallVector<AllSwitchPaths, 2> ThreadableLoops;
1341   bool MadeChanges = false;
1342   LoopInfoBroken = false;
1343 
1344   for (BasicBlock &BB : F) {
1345     auto *SI = dyn_cast<SwitchInst>(BB.getTerminator());
1346     if (!SI)
1347       continue;
1348 
1349     LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName()
1350                       << " is a candidate\n");
1351     MainSwitch Switch(SI, LI, ORE);
1352 
1353     if (!Switch.getInstr()) {
1354       LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is not a "
1355                         << "candidate for jump threading\n");
1356       continue;
1357     }
1358 
1359     LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a "
1360                       << "candidate for jump threading\n");
1361     LLVM_DEBUG(SI->dump());
1362 
1363     unfoldSelectInstrs(DT, Switch.getSelectInsts());
1364     if (!Switch.getSelectInsts().empty())
1365       MadeChanges = true;
1366 
1367     AllSwitchPaths SwitchPaths(&Switch, ORE, LI,
1368                                LI->getLoopFor(&BB)->getOutermostLoop());
1369     SwitchPaths.run();
1370 
1371     if (SwitchPaths.getNumThreadingPaths() > 0) {
1372       ThreadableLoops.push_back(SwitchPaths);
1373 
1374       // For the time being limit this optimization to occurring once in a
1375       // function since it can change the CFG significantly. This is not a
1376       // strict requirement but it can cause buggy behavior if there is an
1377       // overlap of blocks in different opportunities. There is a lot of room to
1378       // experiment with catching more opportunities here.
1379       // NOTE: To release this contraint, we must handle LoopInfo invalidation
1380       break;
1381     }
1382   }
1383 
1384 #ifdef NDEBUG
1385   LI->verify(*DT);
1386 #endif
1387 
1388   SmallPtrSet<const Value *, 32> EphValues;
1389   if (ThreadableLoops.size() > 0)
1390     CodeMetrics::collectEphemeralValues(&F, AC, EphValues);
1391 
1392   for (AllSwitchPaths SwitchPaths : ThreadableLoops) {
1393     TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues);
1394     Transform.run();
1395     MadeChanges = true;
1396     LoopInfoBroken = true;
1397   }
1398 
1399 #ifdef EXPENSIVE_CHECKS
1400   assert(DT->verify(DominatorTree::VerificationLevel::Full));
1401   verifyFunction(F, &dbgs());
1402 #endif
1403 
1404   return MadeChanges;
1405 }
1406 
1407 } // end anonymous namespace
1408 
1409 /// Integrate with the new Pass Manager
run(Function & F,FunctionAnalysisManager & AM)1410 PreservedAnalyses DFAJumpThreadingPass::run(Function &F,
1411                                             FunctionAnalysisManager &AM) {
1412   AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
1413   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
1414   LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
1415   TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
1416   OptimizationRemarkEmitter ORE(&F);
1417   DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE);
1418   if (!ThreadImpl.run(F))
1419     return PreservedAnalyses::all();
1420 
1421   PreservedAnalyses PA;
1422   PA.preserve<DominatorTreeAnalysis>();
1423   if (!ThreadImpl.LoopInfoBroken)
1424     PA.preserve<LoopAnalysis>();
1425   return PA;
1426 }
1427