1 //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Transform each threading path to effectively jump thread the DFA. For 10 // example, the CFG below could be transformed as follows, where the cloned 11 // blocks unconditionally branch to the next correct case based on what is 12 // identified in the analysis. 13 // 14 // sw.bb sw.bb 15 // / | \ / | \ 16 // case1 case2 case3 case1 case2 case3 17 // \ | / | | | 18 // determinator det.2 det.3 det.1 19 // br sw.bb / | \ 20 // sw.bb.2 sw.bb.3 sw.bb.1 21 // br case2 br case3 br case1§ 22 // 23 // Definitions and Terminology: 24 // 25 // * Threading path: 26 // a list of basic blocks, the exit state, and the block that determines 27 // the next state, for which the following notation will be used: 28 // < path of BBs that form a cycle > [ state, determinator ] 29 // 30 // * Predictable switch: 31 // The switch variable is always a known constant so that all conditional 32 // jumps based on switch variable can be converted to unconditional jump. 33 // 34 // * Determinator: 35 // The basic block that determines the next state of the DFA. 36 // 37 // Representing the optimization in C-like pseudocode: the code pattern on the 38 // left could functionally be transformed to the right pattern if the switch 39 // condition is predictable. 40 // 41 // X = A goto A 42 // for (...) A: 43 // switch (X) ... 44 // case A goto B 45 // X = B B: 46 // case B ... 47 // X = C goto C 48 // 49 // The pass first checks that switch variable X is decided by the control flow 50 // path taken in the loop; for example, in case B, the next value of X is 51 // decided to be C. It then enumerates through all paths in the loop and labels 52 // the basic blocks where the next state is decided. 53 // 54 // Using this information it creates new paths that unconditionally branch to 55 // the next case. This involves cloning code, so it only gets triggered if the 56 // amount of code duplicated is below a threshold. 57 // 58 //===----------------------------------------------------------------------===// 59 60 #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 61 #include "llvm/ADT/APInt.h" 62 #include "llvm/ADT/DenseMap.h" 63 #include "llvm/ADT/SmallSet.h" 64 #include "llvm/ADT/Statistic.h" 65 #include "llvm/Analysis/AssumptionCache.h" 66 #include "llvm/Analysis/CodeMetrics.h" 67 #include "llvm/Analysis/DomTreeUpdater.h" 68 #include "llvm/Analysis/LoopInfo.h" 69 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 70 #include "llvm/Analysis/TargetTransformInfo.h" 71 #include "llvm/IR/CFG.h" 72 #include "llvm/IR/Constants.h" 73 #include "llvm/IR/IntrinsicInst.h" 74 #include "llvm/Support/CommandLine.h" 75 #include "llvm/Support/Debug.h" 76 #include "llvm/Transforms/Utils/Cloning.h" 77 #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" 78 #include "llvm/Transforms/Utils/ValueMapper.h" 79 #include <deque> 80 81 #ifdef EXPENSIVE_CHECKS 82 #include "llvm/IR/Verifier.h" 83 #endif 84 85 using namespace llvm; 86 87 #define DEBUG_TYPE "dfa-jump-threading" 88 89 STATISTIC(NumTransforms, "Number of transformations done"); 90 STATISTIC(NumCloned, "Number of blocks cloned"); 91 STATISTIC(NumPaths, "Number of individual paths threaded"); 92 93 static cl::opt<bool> 94 ClViewCfgBefore("dfa-jump-view-cfg-before", 95 cl::desc("View the CFG before DFA Jump Threading"), 96 cl::Hidden, cl::init(false)); 97 98 static cl::opt<bool> EarlyExitHeuristic( 99 "dfa-early-exit-heuristic", 100 cl::desc("Exit early if an unpredictable value come from the same loop"), 101 cl::Hidden, cl::init(true)); 102 103 static cl::opt<unsigned> MaxPathLength( 104 "dfa-max-path-length", 105 cl::desc("Max number of blocks searched to find a threading path"), 106 cl::Hidden, cl::init(20)); 107 108 static cl::opt<unsigned> MaxNumVisitiedPaths( 109 "dfa-max-num-visited-paths", 110 cl::desc( 111 "Max number of blocks visited while enumerating paths around a switch"), 112 cl::Hidden, cl::init(2500)); 113 114 static cl::opt<unsigned> 115 MaxNumPaths("dfa-max-num-paths", 116 cl::desc("Max number of paths enumerated around a switch"), 117 cl::Hidden, cl::init(200)); 118 119 static cl::opt<unsigned> 120 CostThreshold("dfa-cost-threshold", 121 cl::desc("Maximum cost accepted for the transformation"), 122 cl::Hidden, cl::init(50)); 123 124 namespace { 125 126 class SelectInstToUnfold { 127 SelectInst *SI; 128 PHINode *SIUse; 129 130 public: 131 SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {} 132 133 SelectInst *getInst() { return SI; } 134 PHINode *getUse() { return SIUse; } 135 136 explicit operator bool() const { return SI && SIUse; } 137 }; 138 139 void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, 140 std::vector<SelectInstToUnfold> *NewSIsToUnfold, 141 std::vector<BasicBlock *> *NewBBs); 142 143 class DFAJumpThreading { 144 public: 145 DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, LoopInfo *LI, 146 TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) 147 : AC(AC), DT(DT), LI(LI), TTI(TTI), ORE(ORE) {} 148 149 bool run(Function &F); 150 bool LoopInfoBroken; 151 152 private: 153 void 154 unfoldSelectInstrs(DominatorTree *DT, 155 const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { 156 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 157 SmallVector<SelectInstToUnfold, 4> Stack(SelectInsts); 158 159 while (!Stack.empty()) { 160 SelectInstToUnfold SIToUnfold = Stack.pop_back_val(); 161 162 std::vector<SelectInstToUnfold> NewSIsToUnfold; 163 std::vector<BasicBlock *> NewBBs; 164 unfold(&DTU, LI, SIToUnfold, &NewSIsToUnfold, &NewBBs); 165 166 // Put newly discovered select instructions into the work list. 167 llvm::append_range(Stack, NewSIsToUnfold); 168 } 169 } 170 171 AssumptionCache *AC; 172 DominatorTree *DT; 173 LoopInfo *LI; 174 TargetTransformInfo *TTI; 175 OptimizationRemarkEmitter *ORE; 176 }; 177 178 } // end anonymous namespace 179 180 namespace { 181 182 /// Unfold the select instruction held in \p SIToUnfold by replacing it with 183 /// control flow. 184 /// 185 /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly 186 /// created basic blocks into \p NewBBs. 187 /// 188 /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. 189 void unfold(DomTreeUpdater *DTU, LoopInfo *LI, SelectInstToUnfold SIToUnfold, 190 std::vector<SelectInstToUnfold> *NewSIsToUnfold, 191 std::vector<BasicBlock *> *NewBBs) { 192 SelectInst *SI = SIToUnfold.getInst(); 193 PHINode *SIUse = SIToUnfold.getUse(); 194 BasicBlock *StartBlock = SI->getParent(); 195 BranchInst *StartBlockTerm = 196 dyn_cast<BranchInst>(StartBlock->getTerminator()); 197 198 assert(StartBlockTerm); 199 assert(SI->hasOneUse()); 200 201 if (StartBlockTerm->isUnconditional()) { 202 BasicBlock *EndBlock = StartBlock->getUniqueSuccessor(); 203 // Arbitrarily choose the 'false' side for a new input value to the PHI. 204 BasicBlock *NewBlock = BasicBlock::Create( 205 SI->getContext(), Twine(SI->getName(), ".si.unfold.false"), 206 EndBlock->getParent(), EndBlock); 207 NewBBs->push_back(NewBlock); 208 BranchInst::Create(EndBlock, NewBlock); 209 DTU->applyUpdates({{DominatorTree::Insert, NewBlock, EndBlock}}); 210 211 // StartBlock 212 // | \ 213 // | NewBlock 214 // | / 215 // EndBlock 216 Value *SIOp1 = SI->getTrueValue(); 217 Value *SIOp2 = SI->getFalseValue(); 218 219 PHINode *NewPhi = PHINode::Create(SIUse->getType(), 1, 220 Twine(SIOp2->getName(), ".si.unfold.phi"), 221 NewBlock->getFirstInsertionPt()); 222 NewPhi->addIncoming(SIOp2, StartBlock); 223 224 // Update any other PHI nodes in EndBlock. 225 for (PHINode &Phi : EndBlock->phis()) { 226 if (SIUse == &Phi) 227 continue; 228 Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlock); 229 } 230 231 // Update the phi node of SI, which is its only use. 232 if (EndBlock == SIUse->getParent()) { 233 SIUse->addIncoming(NewPhi, NewBlock); 234 SIUse->replaceUsesOfWith(SI, SIOp1); 235 } else { 236 PHINode *EndPhi = PHINode::Create(SIUse->getType(), pred_size(EndBlock), 237 Twine(SI->getName(), ".si.unfold.phi"), 238 EndBlock->getFirstInsertionPt()); 239 for (BasicBlock *Pred : predecessors(EndBlock)) { 240 if (Pred != StartBlock && Pred != NewBlock) 241 EndPhi->addIncoming(EndPhi, Pred); 242 } 243 244 EndPhi->addIncoming(SIOp1, StartBlock); 245 EndPhi->addIncoming(NewPhi, NewBlock); 246 SIUse->replaceUsesOfWith(SI, EndPhi); 247 SIUse = EndPhi; 248 } 249 250 if (auto *OpSi = dyn_cast<SelectInst>(SIOp1)) 251 NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, SIUse)); 252 if (auto *OpSi = dyn_cast<SelectInst>(SIOp2)) 253 NewSIsToUnfold->push_back(SelectInstToUnfold(OpSi, NewPhi)); 254 255 // Insert the real conditional branch based on the original condition. 256 StartBlockTerm->eraseFromParent(); 257 BranchInst::Create(EndBlock, NewBlock, SI->getCondition(), StartBlock); 258 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}, 259 {DominatorTree::Insert, StartBlock, NewBlock}}); 260 } else { 261 BasicBlock *EndBlock = SIUse->getParent(); 262 BasicBlock *NewBlockT = BasicBlock::Create( 263 SI->getContext(), Twine(SI->getName(), ".si.unfold.true"), 264 EndBlock->getParent(), EndBlock); 265 BasicBlock *NewBlockF = BasicBlock::Create( 266 SI->getContext(), Twine(SI->getName(), ".si.unfold.false"), 267 EndBlock->getParent(), EndBlock); 268 269 NewBBs->push_back(NewBlockT); 270 NewBBs->push_back(NewBlockF); 271 272 // Def only has one use in EndBlock. 273 // Before transformation: 274 // StartBlock(Def) 275 // | \ 276 // EndBlock OtherBlock 277 // (Use) 278 // 279 // After transformation: 280 // StartBlock(Def) 281 // | \ 282 // | OtherBlock 283 // NewBlockT 284 // | \ 285 // | NewBlockF 286 // | / 287 // | / 288 // EndBlock 289 // (Use) 290 BranchInst::Create(EndBlock, NewBlockF); 291 // Insert the real conditional branch based on the original condition. 292 BranchInst::Create(EndBlock, NewBlockF, SI->getCondition(), NewBlockT); 293 DTU->applyUpdates({{DominatorTree::Insert, NewBlockT, NewBlockF}, 294 {DominatorTree::Insert, NewBlockT, EndBlock}, 295 {DominatorTree::Insert, NewBlockF, EndBlock}}); 296 297 Value *TrueVal = SI->getTrueValue(); 298 Value *FalseVal = SI->getFalseValue(); 299 300 PHINode *NewPhiT = PHINode::Create( 301 SIUse->getType(), 1, Twine(TrueVal->getName(), ".si.unfold.phi"), 302 NewBlockT->getFirstInsertionPt()); 303 PHINode *NewPhiF = PHINode::Create( 304 SIUse->getType(), 1, Twine(FalseVal->getName(), ".si.unfold.phi"), 305 NewBlockF->getFirstInsertionPt()); 306 NewPhiT->addIncoming(TrueVal, StartBlock); 307 NewPhiF->addIncoming(FalseVal, NewBlockT); 308 309 if (auto *TrueSI = dyn_cast<SelectInst>(TrueVal)) 310 NewSIsToUnfold->push_back(SelectInstToUnfold(TrueSI, NewPhiT)); 311 if (auto *FalseSi = dyn_cast<SelectInst>(FalseVal)) 312 NewSIsToUnfold->push_back(SelectInstToUnfold(FalseSi, NewPhiF)); 313 314 SIUse->addIncoming(NewPhiT, NewBlockT); 315 SIUse->addIncoming(NewPhiF, NewBlockF); 316 SIUse->removeIncomingValue(StartBlock); 317 318 // Update any other PHI nodes in EndBlock. 319 for (PHINode &Phi : EndBlock->phis()) { 320 if (SIUse == &Phi) 321 continue; 322 Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockT); 323 Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), NewBlockF); 324 Phi.removeIncomingValue(StartBlock); 325 } 326 327 // Update the appropriate successor of the start block to point to the new 328 // unfolded block. 329 unsigned SuccNum = StartBlockTerm->getSuccessor(1) == EndBlock ? 1 : 0; 330 StartBlockTerm->setSuccessor(SuccNum, NewBlockT); 331 DTU->applyUpdates({{DominatorTree::Delete, StartBlock, EndBlock}, 332 {DominatorTree::Insert, StartBlock, NewBlockT}}); 333 } 334 335 // Preserve loop info 336 if (Loop *L = LI->getLoopFor(SI->getParent())) { 337 for (BasicBlock *NewBB : *NewBBs) 338 L->addBasicBlockToLoop(NewBB, *LI); 339 } 340 341 // The select is now dead. 342 assert(SI->use_empty() && "Select must be dead now"); 343 SI->eraseFromParent(); 344 } 345 346 struct ClonedBlock { 347 BasicBlock *BB; 348 APInt State; ///< \p State corresponds to the next value of a switch stmnt. 349 }; 350 351 typedef std::deque<BasicBlock *> PathType; 352 typedef std::vector<PathType> PathsType; 353 typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks; 354 typedef std::vector<ClonedBlock> CloneList; 355 356 // This data structure keeps track of all blocks that have been cloned. If two 357 // different ThreadingPaths clone the same block for a certain state it should 358 // be reused, and it can be looked up in this map. 359 typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; 360 361 // This map keeps track of all the new definitions for an instruction. This 362 // information is needed when restoring SSA form after cloning blocks. 363 typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap; 364 365 inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { 366 OS << "< "; 367 for (const BasicBlock *BB : Path) { 368 std::string BBName; 369 if (BB->hasName()) 370 raw_string_ostream(BBName) << BB->getName(); 371 else 372 raw_string_ostream(BBName) << BB; 373 OS << BBName << " "; 374 } 375 OS << ">"; 376 return OS; 377 } 378 379 /// ThreadingPath is a path in the control flow of a loop that can be threaded 380 /// by cloning necessary basic blocks and replacing conditional branches with 381 /// unconditional ones. A threading path includes a list of basic blocks, the 382 /// exit state, and the block that determines the next state. 383 struct ThreadingPath { 384 /// Exit value is DFA's exit state for the given path. 385 APInt getExitValue() const { return ExitVal; } 386 void setExitValue(const ConstantInt *V) { 387 ExitVal = V->getValue(); 388 IsExitValSet = true; 389 } 390 bool isExitValueSet() const { return IsExitValSet; } 391 392 /// Determinator is the basic block that determines the next state of the DFA. 393 const BasicBlock *getDeterminatorBB() const { return DBB; } 394 void setDeterminator(const BasicBlock *BB) { DBB = BB; } 395 396 /// Path is a list of basic blocks. 397 const PathType &getPath() const { return Path; } 398 void setPath(const PathType &NewPath) { Path = NewPath; } 399 void push_back(BasicBlock *BB) { Path.push_back(BB); } 400 void push_front(BasicBlock *BB) { Path.push_front(BB); } 401 void appendExcludingFirst(const PathType &OtherPath) { 402 llvm::append_range(Path, llvm::drop_begin(OtherPath)); 403 } 404 405 void print(raw_ostream &OS) const { 406 OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]"; 407 } 408 409 private: 410 PathType Path; 411 APInt ExitVal; 412 const BasicBlock *DBB = nullptr; 413 bool IsExitValSet = false; 414 }; 415 416 #ifndef NDEBUG 417 inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { 418 TPath.print(OS); 419 return OS; 420 } 421 #endif 422 423 struct MainSwitch { 424 MainSwitch(SwitchInst *SI, LoopInfo *LI, OptimizationRemarkEmitter *ORE) 425 : LI(LI) { 426 if (isCandidate(SI)) { 427 Instr = SI; 428 } else { 429 ORE->emit([&]() { 430 return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI) 431 << "Switch instruction is not predictable."; 432 }); 433 } 434 } 435 436 virtual ~MainSwitch() = default; 437 438 SwitchInst *getInstr() const { return Instr; } 439 const SmallVector<SelectInstToUnfold, 4> getSelectInsts() { 440 return SelectInsts; 441 } 442 443 private: 444 /// Do a use-def chain traversal starting from the switch condition to see if 445 /// \p SI is a potential condidate. 446 /// 447 /// Also, collect select instructions to unfold. 448 bool isCandidate(const SwitchInst *SI) { 449 std::deque<std::pair<Value *, BasicBlock *>> Q; 450 SmallSet<Value *, 16> SeenValues; 451 SelectInsts.clear(); 452 453 Value *SICond = SI->getCondition(); 454 LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n"); 455 if (!isa<PHINode>(SICond)) 456 return false; 457 458 // The switch must be in a loop. 459 const Loop *L = LI->getLoopFor(SI->getParent()); 460 if (!L) 461 return false; 462 463 addToQueue(SICond, nullptr, Q, SeenValues); 464 465 while (!Q.empty()) { 466 Value *Current = Q.front().first; 467 BasicBlock *CurrentIncomingBB = Q.front().second; 468 Q.pop_front(); 469 470 if (auto *Phi = dyn_cast<PHINode>(Current)) { 471 for (BasicBlock *IncomingBB : Phi->blocks()) { 472 Value *Incoming = Phi->getIncomingValueForBlock(IncomingBB); 473 addToQueue(Incoming, IncomingBB, Q, SeenValues); 474 } 475 LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n"); 476 } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) { 477 if (!isValidSelectInst(SelI)) 478 return false; 479 addToQueue(SelI->getTrueValue(), CurrentIncomingBB, Q, SeenValues); 480 addToQueue(SelI->getFalseValue(), CurrentIncomingBB, Q, SeenValues); 481 LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n"); 482 if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back())) 483 SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); 484 } else if (isa<Constant>(Current)) { 485 LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n"); 486 continue; 487 } else { 488 LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n"); 489 // Allow unpredictable values. The hope is that those will be the 490 // initial switch values that can be ignored (they will hit the 491 // unthreaded switch) but this assumption will get checked later after 492 // paths have been enumerated (in function getStateDefMap). 493 494 // If the unpredictable value comes from the same inner loop it is 495 // likely that it will also be on the enumerated paths, causing us to 496 // exit after we have enumerated all the paths. This heuristic save 497 // compile time because a search for all the paths can become expensive. 498 if (EarlyExitHeuristic && 499 L->contains(LI->getLoopFor(CurrentIncomingBB))) { 500 LLVM_DEBUG(dbgs() 501 << "\tExiting early due to unpredictability heuristic.\n"); 502 return false; 503 } 504 505 continue; 506 } 507 } 508 509 return true; 510 } 511 512 void addToQueue(Value *Val, BasicBlock *BB, 513 std::deque<std::pair<Value *, BasicBlock *>> &Q, 514 SmallSet<Value *, 16> &SeenValues) { 515 if (SeenValues.insert(Val).second) 516 Q.push_back({Val, BB}); 517 } 518 519 bool isValidSelectInst(SelectInst *SI) { 520 if (!SI->hasOneUse()) 521 return false; 522 523 Instruction *SIUse = dyn_cast<Instruction>(SI->user_back()); 524 // The use of the select inst should be either a phi or another select. 525 if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse))) 526 return false; 527 528 BasicBlock *SIBB = SI->getParent(); 529 530 // Currently, we can only expand select instructions in basic blocks with 531 // one successor. 532 BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator()); 533 if (!SITerm || !SITerm->isUnconditional()) 534 return false; 535 536 // Only fold the select coming from directly where it is defined. 537 PHINode *PHIUser = dyn_cast<PHINode>(SIUse); 538 if (PHIUser && PHIUser->getIncomingBlock(*SI->use_begin()) != SIBB) 539 return false; 540 541 // If select will not be sunk during unfolding, and it is in the same basic 542 // block as another state defining select, then cannot unfold both. 543 for (SelectInstToUnfold SIToUnfold : SelectInsts) { 544 SelectInst *PrevSI = SIToUnfold.getInst(); 545 if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI && 546 PrevSI->getParent() == SI->getParent()) 547 return false; 548 } 549 550 return true; 551 } 552 553 LoopInfo *LI; 554 SwitchInst *Instr = nullptr; 555 SmallVector<SelectInstToUnfold, 4> SelectInsts; 556 }; 557 558 struct AllSwitchPaths { 559 AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE, 560 LoopInfo *LI, Loop *L) 561 : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), ORE(ORE), 562 LI(LI), SwitchOuterLoop(L) {} 563 564 std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; } 565 unsigned getNumThreadingPaths() { return TPaths.size(); } 566 SwitchInst *getSwitchInst() { return Switch; } 567 BasicBlock *getSwitchBlock() { return SwitchBlock; } 568 569 void run() { 570 StateDefMap StateDef = getStateDefMap(); 571 if (StateDef.empty()) { 572 ORE->emit([&]() { 573 return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", 574 Switch) 575 << "Switch instruction is not predictable."; 576 }); 577 return; 578 } 579 580 auto *SwitchPhi = cast<PHINode>(Switch->getOperand(0)); 581 auto *SwitchPhiDefBB = SwitchPhi->getParent(); 582 VisitedBlocks VB; 583 // Get paths from the determinator BBs to SwitchPhiDefBB 584 std::vector<ThreadingPath> PathsToPhiDef = 585 getPathsFromStateDefMap(StateDef, SwitchPhi, VB); 586 if (SwitchPhiDefBB == SwitchBlock) { 587 TPaths = std::move(PathsToPhiDef); 588 return; 589 } 590 591 // Find and append paths from SwitchPhiDefBB to SwitchBlock. 592 PathsType PathsToSwitchBB = 593 paths(SwitchPhiDefBB, SwitchBlock, VB, /* PathDepth = */ 1); 594 if (PathsToSwitchBB.empty()) 595 return; 596 597 std::vector<ThreadingPath> TempList; 598 for (const ThreadingPath &Path : PathsToPhiDef) { 599 for (const PathType &PathToSw : PathsToSwitchBB) { 600 ThreadingPath PathCopy(Path); 601 PathCopy.appendExcludingFirst(PathToSw); 602 TempList.push_back(PathCopy); 603 } 604 } 605 TPaths = std::move(TempList); 606 } 607 608 private: 609 // Value: an instruction that defines a switch state; 610 // Key: the parent basic block of that instruction. 611 typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap; 612 std::vector<ThreadingPath> getPathsFromStateDefMap(StateDefMap &StateDef, 613 PHINode *Phi, 614 VisitedBlocks &VB) { 615 std::vector<ThreadingPath> Res; 616 auto *PhiBB = Phi->getParent(); 617 VB.insert(PhiBB); 618 619 VisitedBlocks UniqueBlocks; 620 for (auto *IncomingBB : Phi->blocks()) { 621 if (!UniqueBlocks.insert(IncomingBB).second) 622 continue; 623 if (!SwitchOuterLoop->contains(IncomingBB)) 624 continue; 625 626 Value *IncomingValue = Phi->getIncomingValueForBlock(IncomingBB); 627 // We found the determinator. This is the start of our path. 628 if (auto *C = dyn_cast<ConstantInt>(IncomingValue)) { 629 // SwitchBlock is the determinator, unsupported unless its also the def. 630 if (PhiBB == SwitchBlock && 631 SwitchBlock != cast<PHINode>(Switch->getOperand(0))->getParent()) 632 continue; 633 ThreadingPath NewPath; 634 NewPath.setDeterminator(PhiBB); 635 NewPath.setExitValue(C); 636 // Don't add SwitchBlock at the start, this is handled later. 637 if (IncomingBB != SwitchBlock) 638 NewPath.push_back(IncomingBB); 639 NewPath.push_back(PhiBB); 640 Res.push_back(NewPath); 641 continue; 642 } 643 // Don't get into a cycle. 644 if (VB.contains(IncomingBB) || IncomingBB == SwitchBlock) 645 continue; 646 // Recurse up the PHI chain. 647 auto *IncomingPhi = dyn_cast<PHINode>(IncomingValue); 648 if (!IncomingPhi) 649 continue; 650 auto *IncomingPhiDefBB = IncomingPhi->getParent(); 651 if (!StateDef.contains(IncomingPhiDefBB)) 652 continue; 653 654 // Direct predecessor, just add to the path. 655 if (IncomingPhiDefBB == IncomingBB) { 656 std::vector<ThreadingPath> PredPaths = 657 getPathsFromStateDefMap(StateDef, IncomingPhi, VB); 658 for (ThreadingPath &Path : PredPaths) { 659 Path.push_back(PhiBB); 660 Res.push_back(std::move(Path)); 661 } 662 continue; 663 } 664 // Not a direct predecessor, find intermediate paths to append to the 665 // existing path. 666 if (VB.contains(IncomingPhiDefBB)) 667 continue; 668 669 PathsType IntermediatePaths; 670 IntermediatePaths = 671 paths(IncomingPhiDefBB, IncomingBB, VB, /* PathDepth = */ 1); 672 if (IntermediatePaths.empty()) 673 continue; 674 675 std::vector<ThreadingPath> PredPaths = 676 getPathsFromStateDefMap(StateDef, IncomingPhi, VB); 677 for (const ThreadingPath &Path : PredPaths) { 678 for (const PathType &IPath : IntermediatePaths) { 679 ThreadingPath NewPath(Path); 680 NewPath.appendExcludingFirst(IPath); 681 NewPath.push_back(PhiBB); 682 Res.push_back(NewPath); 683 } 684 } 685 } 686 VB.erase(PhiBB); 687 return Res; 688 } 689 690 PathsType paths(BasicBlock *BB, BasicBlock *ToBB, VisitedBlocks &Visited, 691 unsigned PathDepth) { 692 PathsType Res; 693 694 // Stop exploring paths after visiting MaxPathLength blocks 695 if (PathDepth > MaxPathLength) { 696 ORE->emit([&]() { 697 return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached", 698 Switch) 699 << "Exploration stopped after visiting MaxPathLength=" 700 << ore::NV("MaxPathLength", MaxPathLength) << " blocks."; 701 }); 702 return Res; 703 } 704 705 Visited.insert(BB); 706 if (++NumVisited > MaxNumVisitiedPaths) 707 return Res; 708 709 // Stop if we have reached the BB out of loop, since its successors have no 710 // impact on the DFA. 711 if (!SwitchOuterLoop->contains(BB)) 712 return Res; 713 714 // Some blocks have multiple edges to the same successor, and this set 715 // is used to prevent a duplicate path from being generated 716 SmallSet<BasicBlock *, 4> Successors; 717 for (BasicBlock *Succ : successors(BB)) { 718 if (!Successors.insert(Succ).second) 719 continue; 720 721 // Found a cycle through the final block. 722 if (Succ == ToBB) { 723 Res.push_back({BB, ToBB}); 724 continue; 725 } 726 727 // We have encountered a cycle, do not get caught in it 728 if (Visited.contains(Succ)) 729 continue; 730 731 auto *CurrLoop = LI->getLoopFor(BB); 732 // Unlikely to be beneficial. 733 if (Succ == CurrLoop->getHeader()) 734 continue; 735 // Skip for now, revisit this condition later to see the impact on 736 // coverage and compile time. 737 if (LI->getLoopFor(Succ) != CurrLoop) 738 continue; 739 740 PathsType SuccPaths = paths(Succ, ToBB, Visited, PathDepth + 1); 741 for (PathType &Path : SuccPaths) { 742 Path.push_front(BB); 743 Res.push_back(Path); 744 if (Res.size() >= MaxNumPaths) { 745 return Res; 746 } 747 } 748 } 749 // This block could now be visited again from a different predecessor. Note 750 // that this will result in exponential runtime. Subpaths could possibly be 751 // cached but it takes a lot of memory to store them. 752 Visited.erase(BB); 753 return Res; 754 } 755 756 /// Walk the use-def chain and collect all the state-defining blocks and the 757 /// PHI nodes in those blocks that define the state. 758 StateDefMap getStateDefMap() const { 759 StateDefMap Res; 760 PHINode *FirstDef = dyn_cast<PHINode>(Switch->getOperand(0)); 761 assert(FirstDef && "The first definition must be a phi."); 762 763 SmallVector<PHINode *, 8> Stack; 764 Stack.push_back(FirstDef); 765 SmallSet<Value *, 16> SeenValues; 766 767 while (!Stack.empty()) { 768 PHINode *CurPhi = Stack.pop_back_val(); 769 770 Res[CurPhi->getParent()] = CurPhi; 771 SeenValues.insert(CurPhi); 772 773 for (BasicBlock *IncomingBB : CurPhi->blocks()) { 774 PHINode *IncomingPhi = 775 dyn_cast<PHINode>(CurPhi->getIncomingValueForBlock(IncomingBB)); 776 if (!IncomingPhi) 777 continue; 778 bool IsOutsideLoops = !SwitchOuterLoop->contains(IncomingBB); 779 if (SeenValues.contains(IncomingPhi) || IsOutsideLoops) 780 continue; 781 782 Stack.push_back(IncomingPhi); 783 } 784 } 785 786 return Res; 787 } 788 789 unsigned NumVisited = 0; 790 SwitchInst *Switch; 791 BasicBlock *SwitchBlock; 792 OptimizationRemarkEmitter *ORE; 793 std::vector<ThreadingPath> TPaths; 794 LoopInfo *LI; 795 Loop *SwitchOuterLoop; 796 }; 797 798 struct TransformDFA { 799 TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, 800 AssumptionCache *AC, TargetTransformInfo *TTI, 801 OptimizationRemarkEmitter *ORE, 802 SmallPtrSet<const Value *, 32> EphValues) 803 : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), 804 EphValues(EphValues) {} 805 806 void run() { 807 if (isLegalAndProfitableToTransform()) { 808 createAllExitPaths(); 809 NumTransforms++; 810 } 811 } 812 813 private: 814 /// This function performs both a legality check and profitability check at 815 /// the same time since it is convenient to do so. It iterates through all 816 /// blocks that will be cloned, and keeps track of the duplication cost. It 817 /// also returns false if it is illegal to clone some required block. 818 bool isLegalAndProfitableToTransform() { 819 CodeMetrics Metrics; 820 SwitchInst *Switch = SwitchPaths->getSwitchInst(); 821 822 // Don't thread switch without multiple successors. 823 if (Switch->getNumSuccessors() <= 1) 824 return false; 825 826 // Note that DuplicateBlockMap is not being used as intended here. It is 827 // just being used to ensure (BB, State) pairs are only counted once. 828 DuplicateBlockMap DuplicateMap; 829 830 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 831 PathType PathBBs = TPath.getPath(); 832 APInt NextState = TPath.getExitValue(); 833 const BasicBlock *Determinator = TPath.getDeterminatorBB(); 834 835 // Update Metrics for the Switch block, this is always cloned 836 BasicBlock *BB = SwitchPaths->getSwitchBlock(); 837 BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 838 if (!VisitedBB) { 839 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 840 DuplicateMap[BB].push_back({BB, NextState}); 841 } 842 843 // If the Switch block is the Determinator, then we can continue since 844 // this is the only block that is cloned and we already counted for it. 845 if (PathBBs.front() == Determinator) 846 continue; 847 848 // Otherwise update Metrics for all blocks that will be cloned. If any 849 // block is already cloned and would be reused, don't double count it. 850 auto DetIt = llvm::find(PathBBs, Determinator); 851 for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 852 BB = *BBIt; 853 VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 854 if (VisitedBB) 855 continue; 856 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 857 DuplicateMap[BB].push_back({BB, NextState}); 858 } 859 860 if (Metrics.notDuplicatable) { 861 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 862 << "non-duplicatable instructions.\n"); 863 ORE->emit([&]() { 864 return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst", 865 Switch) 866 << "Contains non-duplicatable instructions."; 867 }); 868 return false; 869 } 870 871 // FIXME: Allow jump threading with controlled convergence. 872 if (Metrics.Convergence != ConvergenceKind::None) { 873 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 874 << "convergent instructions.\n"); 875 ORE->emit([&]() { 876 return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 877 << "Contains convergent instructions."; 878 }); 879 return false; 880 } 881 882 if (!Metrics.NumInsts.isValid()) { 883 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 884 << "instructions with invalid cost.\n"); 885 ORE->emit([&]() { 886 return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 887 << "Contains instructions with invalid cost."; 888 }); 889 return false; 890 } 891 } 892 893 InstructionCost DuplicationCost = 0; 894 895 unsigned JumpTableSize = 0; 896 TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr, 897 nullptr); 898 if (JumpTableSize == 0) { 899 // Factor in the number of conditional branches reduced from jump 900 // threading. Assume that lowering the switch block is implemented by 901 // using binary search, hence the LogBase2(). 902 unsigned CondBranches = 903 APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); 904 assert(CondBranches > 0 && 905 "The threaded switch must have multiple branches"); 906 DuplicationCost = Metrics.NumInsts / CondBranches; 907 } else { 908 // Compared with jump tables, the DFA optimizer removes an indirect branch 909 // on each loop iteration, thus making branch prediction more precise. The 910 // more branch targets there are, the more likely it is for the branch 911 // predictor to make a mistake, and the more benefit there is in the DFA 912 // optimizer. Thus, the more branch targets there are, the lower is the 913 // cost of the DFA opt. 914 DuplicationCost = Metrics.NumInsts / JumpTableSize; 915 } 916 917 LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " 918 << SwitchPaths->getSwitchBlock()->getName() 919 << " is: " << DuplicationCost << "\n\n"); 920 921 if (DuplicationCost > CostThreshold) { 922 LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the " 923 << "cost threshold.\n"); 924 ORE->emit([&]() { 925 return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) 926 << "Duplication cost exceeds the cost threshold (cost=" 927 << ore::NV("Cost", DuplicationCost) 928 << ", threshold=" << ore::NV("Threshold", CostThreshold) << ")."; 929 }); 930 return false; 931 } 932 933 ORE->emit([&]() { 934 return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch) 935 << "Switch statement jump-threaded."; 936 }); 937 938 return true; 939 } 940 941 /// Transform each threading path to effectively jump thread the DFA. 942 void createAllExitPaths() { 943 DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); 944 945 // Move the switch block to the end of the path, since it will be duplicated 946 BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); 947 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 948 LLVM_DEBUG(dbgs() << TPath << "\n"); 949 // TODO: Fix exit path creation logic so that we dont need this 950 // placeholder. 951 TPath.push_front(SwitchBlock); 952 } 953 954 // Transform the ThreadingPaths and keep track of the cloned values 955 DuplicateBlockMap DuplicateMap; 956 DefMap NewDefs; 957 958 SmallSet<BasicBlock *, 16> BlocksToClean; 959 BlocksToClean.insert_range(successors(SwitchBlock)); 960 961 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 962 createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); 963 NumPaths++; 964 } 965 966 // After all paths are cloned, now update the last successor of the cloned 967 // path so it skips over the switch statement 968 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) 969 updateLastSuccessor(TPath, DuplicateMap, &DTU); 970 971 // For each instruction that was cloned and used outside, update its uses 972 updateSSA(NewDefs); 973 974 // Clean PHI Nodes for the newly created blocks 975 for (BasicBlock *BB : BlocksToClean) 976 cleanPhiNodes(BB); 977 } 978 979 /// For a specific ThreadingPath \p Path, create an exit path starting from 980 /// the determinator block. 981 /// 982 /// To remember the correct destination, we have to duplicate blocks 983 /// corresponding to each state. Also update the terminating instruction of 984 /// the predecessors, and phis in the successor blocks. 985 void createExitPath(DefMap &NewDefs, ThreadingPath &Path, 986 DuplicateBlockMap &DuplicateMap, 987 SmallSet<BasicBlock *, 16> &BlocksToClean, 988 DomTreeUpdater *DTU) { 989 APInt NextState = Path.getExitValue(); 990 const BasicBlock *Determinator = Path.getDeterminatorBB(); 991 PathType PathBBs = Path.getPath(); 992 993 // Don't select the placeholder block in front 994 if (PathBBs.front() == Determinator) 995 PathBBs.pop_front(); 996 997 auto DetIt = llvm::find(PathBBs, Determinator); 998 // When there is only one BB in PathBBs, the determinator takes itself as a 999 // direct predecessor. 1000 BasicBlock *PrevBB = PathBBs.size() == 1 ? *DetIt : *std::prev(DetIt); 1001 for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 1002 BasicBlock *BB = *BBIt; 1003 BlocksToClean.insert(BB); 1004 1005 // We already cloned BB for this NextState, now just update the branch 1006 // and continue. 1007 BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap); 1008 if (NextBB) { 1009 updatePredecessor(PrevBB, BB, NextBB, DTU); 1010 PrevBB = NextBB; 1011 continue; 1012 } 1013 1014 // Clone the BB and update the successor of Prev to jump to the new block 1015 BasicBlock *NewBB = cloneBlockAndUpdatePredecessor( 1016 BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU); 1017 DuplicateMap[BB].push_back({NewBB, NextState}); 1018 BlocksToClean.insert(NewBB); 1019 PrevBB = NewBB; 1020 } 1021 } 1022 1023 /// Restore SSA form after cloning blocks. 1024 /// 1025 /// Each cloned block creates new defs for a variable, and the uses need to be 1026 /// updated to reflect this. The uses may be replaced with a cloned value, or 1027 /// some derived phi instruction. Note that all uses of a value defined in the 1028 /// same block were already remapped when cloning the block. 1029 void updateSSA(DefMap &NewDefs) { 1030 SSAUpdaterBulk SSAUpdate; 1031 SmallVector<Use *, 16> UsesToRename; 1032 1033 for (const auto &KV : NewDefs) { 1034 Instruction *I = KV.first; 1035 BasicBlock *BB = I->getParent(); 1036 std::vector<Instruction *> Cloned = KV.second; 1037 1038 // Scan all uses of this instruction to see if it is used outside of its 1039 // block, and if so, record them in UsesToRename. 1040 for (Use &U : I->uses()) { 1041 Instruction *User = cast<Instruction>(U.getUser()); 1042 if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 1043 if (UserPN->getIncomingBlock(U) == BB) 1044 continue; 1045 } else if (User->getParent() == BB) { 1046 continue; 1047 } 1048 1049 UsesToRename.push_back(&U); 1050 } 1051 1052 // If there are no uses outside the block, we're done with this 1053 // instruction. 1054 if (UsesToRename.empty()) 1055 continue; 1056 LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I 1057 << "\n"); 1058 1059 // We found a use of I outside of BB. Rename all uses of I that are 1060 // outside its block to be uses of the appropriate PHI node etc. See 1061 // ValuesInBlocks with the values we know. 1062 unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType()); 1063 SSAUpdate.AddAvailableValue(VarNum, BB, I); 1064 for (Instruction *New : Cloned) 1065 SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New); 1066 1067 while (!UsesToRename.empty()) 1068 SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val()); 1069 1070 LLVM_DEBUG(dbgs() << "\n"); 1071 } 1072 // SSAUpdater handles phi placement and renaming uses with the appropriate 1073 // value. 1074 SSAUpdate.RewriteAllUses(DT); 1075 } 1076 1077 /// Clones a basic block, and adds it to the CFG. 1078 /// 1079 /// This function also includes updating phi nodes in the successors of the 1080 /// BB, and remapping uses that were defined locally in the cloned BB. 1081 BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB, 1082 const APInt &NextState, 1083 DuplicateBlockMap &DuplicateMap, 1084 DefMap &NewDefs, 1085 DomTreeUpdater *DTU) { 1086 ValueToValueMapTy VMap; 1087 BasicBlock *NewBB = CloneBasicBlock( 1088 BB, VMap, ".jt" + std::to_string(NextState.getLimitedValue()), 1089 BB->getParent()); 1090 NewBB->moveAfter(BB); 1091 NumCloned++; 1092 1093 for (Instruction &I : *NewBB) { 1094 // Do not remap operands of PHINode in case a definition in BB is an 1095 // incoming value to a phi in the same block. This incoming value will 1096 // be renamed later while restoring SSA. 1097 if (isa<PHINode>(&I)) 1098 continue; 1099 RemapInstruction(&I, VMap, 1100 RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); 1101 if (AssumeInst *II = dyn_cast<AssumeInst>(&I)) 1102 AC->registerAssumption(II); 1103 } 1104 1105 updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap); 1106 updatePredecessor(PrevBB, BB, NewBB, DTU); 1107 updateDefMap(NewDefs, VMap); 1108 1109 // Add all successors to the DominatorTree 1110 SmallPtrSet<BasicBlock *, 4> SuccSet; 1111 for (auto *SuccBB : successors(NewBB)) { 1112 if (SuccSet.insert(SuccBB).second) 1113 DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}}); 1114 } 1115 SuccSet.clear(); 1116 return NewBB; 1117 } 1118 1119 /// Update the phi nodes in BB's successors. 1120 /// 1121 /// This means creating a new incoming value from NewBB with the new 1122 /// instruction wherever there is an incoming value from BB. 1123 void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB, 1124 const APInt &NextState, ValueToValueMapTy &VMap, 1125 DuplicateBlockMap &DuplicateMap) { 1126 std::vector<BasicBlock *> BlocksToUpdate; 1127 1128 // If BB is the last block in the path, we can simply update the one case 1129 // successor that will be reached. 1130 if (BB == SwitchPaths->getSwitchBlock()) { 1131 SwitchInst *Switch = SwitchPaths->getSwitchInst(); 1132 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1133 BlocksToUpdate.push_back(NextCase); 1134 BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap); 1135 if (ClonedSucc) 1136 BlocksToUpdate.push_back(ClonedSucc); 1137 } 1138 // Otherwise update phis in all successors. 1139 else { 1140 for (BasicBlock *Succ : successors(BB)) { 1141 BlocksToUpdate.push_back(Succ); 1142 1143 // Check if a successor has already been cloned for the particular exit 1144 // value. In this case if a successor was already cloned, the phi nodes 1145 // in the cloned block should be updated directly. 1146 BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap); 1147 if (ClonedSucc) 1148 BlocksToUpdate.push_back(ClonedSucc); 1149 } 1150 } 1151 1152 // If there is a phi with an incoming value from BB, create a new incoming 1153 // value for the new predecessor ClonedBB. The value will either be the same 1154 // value from BB or a cloned value. 1155 for (BasicBlock *Succ : BlocksToUpdate) { 1156 for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 1157 ++II) { 1158 Value *Incoming = Phi->getIncomingValueForBlock(BB); 1159 if (Incoming) { 1160 if (isa<Constant>(Incoming)) { 1161 Phi->addIncoming(Incoming, ClonedBB); 1162 continue; 1163 } 1164 Value *ClonedVal = VMap[Incoming]; 1165 if (ClonedVal) 1166 Phi->addIncoming(ClonedVal, ClonedBB); 1167 else 1168 Phi->addIncoming(Incoming, ClonedBB); 1169 } 1170 } 1171 } 1172 } 1173 1174 /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all 1175 /// other successors are kept as well. 1176 void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB, 1177 BasicBlock *NewBB, DomTreeUpdater *DTU) { 1178 // When a path is reused, there is a chance that predecessors were already 1179 // updated before. Check if the predecessor needs to be updated first. 1180 if (!isPredecessor(OldBB, PrevBB)) 1181 return; 1182 1183 Instruction *PrevTerm = PrevBB->getTerminator(); 1184 for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) { 1185 if (PrevTerm->getSuccessor(Idx) == OldBB) { 1186 OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true); 1187 PrevTerm->setSuccessor(Idx, NewBB); 1188 } 1189 } 1190 DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB}, 1191 {DominatorTree::Insert, PrevBB, NewBB}}); 1192 } 1193 1194 /// Add new value mappings to the DefMap to keep track of all new definitions 1195 /// for a particular instruction. These will be used while updating SSA form. 1196 void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { 1197 SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector; 1198 NewDefsVector.reserve(VMap.size()); 1199 1200 for (auto Entry : VMap) { 1201 Instruction *Inst = 1202 dyn_cast<Instruction>(const_cast<Value *>(Entry.first)); 1203 if (!Inst || !Entry.second || isa<BranchInst>(Inst) || 1204 isa<SwitchInst>(Inst)) { 1205 continue; 1206 } 1207 1208 Instruction *Cloned = dyn_cast<Instruction>(Entry.second); 1209 if (!Cloned) 1210 continue; 1211 1212 NewDefsVector.push_back({Inst, Cloned}); 1213 } 1214 1215 // Sort the defs to get deterministic insertion order into NewDefs. 1216 sort(NewDefsVector, [](const auto &LHS, const auto &RHS) { 1217 if (LHS.first == RHS.first) 1218 return LHS.second->comesBefore(RHS.second); 1219 return LHS.first->comesBefore(RHS.first); 1220 }); 1221 1222 for (const auto &KV : NewDefsVector) 1223 NewDefs[KV.first].push_back(KV.second); 1224 } 1225 1226 /// Update the last branch of a particular cloned path to point to the correct 1227 /// case successor. 1228 /// 1229 /// Note that this is an optional step and would have been done in later 1230 /// optimizations, but it makes the CFG significantly easier to work with. 1231 void updateLastSuccessor(ThreadingPath &TPath, 1232 DuplicateBlockMap &DuplicateMap, 1233 DomTreeUpdater *DTU) { 1234 APInt NextState = TPath.getExitValue(); 1235 BasicBlock *BB = TPath.getPath().back(); 1236 BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap); 1237 1238 // Note multiple paths can end at the same block so check that it is not 1239 // updated yet 1240 if (!isa<SwitchInst>(LastBlock->getTerminator())) 1241 return; 1242 SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator()); 1243 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1244 1245 std::vector<DominatorTree::UpdateType> DTUpdates; 1246 SmallPtrSet<BasicBlock *, 4> SuccSet; 1247 for (BasicBlock *Succ : successors(LastBlock)) { 1248 if (Succ != NextCase && SuccSet.insert(Succ).second) 1249 DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ}); 1250 } 1251 1252 Switch->eraseFromParent(); 1253 BranchInst::Create(NextCase, LastBlock); 1254 1255 DTU->applyUpdates(DTUpdates); 1256 } 1257 1258 /// After cloning blocks, some of the phi nodes have extra incoming values 1259 /// that are no longer used. This function removes them. 1260 void cleanPhiNodes(BasicBlock *BB) { 1261 // If BB is no longer reachable, remove any remaining phi nodes 1262 if (pred_empty(BB)) { 1263 std::vector<PHINode *> PhiToRemove; 1264 for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1265 PhiToRemove.push_back(Phi); 1266 } 1267 for (PHINode *PN : PhiToRemove) { 1268 PN->replaceAllUsesWith(PoisonValue::get(PN->getType())); 1269 PN->eraseFromParent(); 1270 } 1271 return; 1272 } 1273 1274 // Remove any incoming values that come from an invalid predecessor 1275 for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1276 std::vector<BasicBlock *> BlocksToRemove; 1277 for (BasicBlock *IncomingBB : Phi->blocks()) { 1278 if (!isPredecessor(BB, IncomingBB)) 1279 BlocksToRemove.push_back(IncomingBB); 1280 } 1281 for (BasicBlock *BB : BlocksToRemove) 1282 Phi->removeIncomingValue(BB); 1283 } 1284 } 1285 1286 /// Checks if BB was already cloned for a particular next state value. If it 1287 /// was then it returns this cloned block, and otherwise null. 1288 BasicBlock *getClonedBB(BasicBlock *BB, const APInt &NextState, 1289 DuplicateBlockMap &DuplicateMap) { 1290 CloneList ClonedBBs = DuplicateMap[BB]; 1291 1292 // Find an entry in the CloneList with this NextState. If it exists then 1293 // return the corresponding BB 1294 auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) { 1295 return C.State == NextState; 1296 }); 1297 return It != ClonedBBs.end() ? (*It).BB : nullptr; 1298 } 1299 1300 /// Helper to get the successor corresponding to a particular case value for 1301 /// a switch statement. 1302 BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, const APInt &NextState) { 1303 BasicBlock *NextCase = nullptr; 1304 for (auto Case : Switch->cases()) { 1305 if (Case.getCaseValue()->getValue() == NextState) { 1306 NextCase = Case.getCaseSuccessor(); 1307 break; 1308 } 1309 } 1310 if (!NextCase) 1311 NextCase = Switch->getDefaultDest(); 1312 return NextCase; 1313 } 1314 1315 /// Returns true if IncomingBB is a predecessor of BB. 1316 bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { 1317 return llvm::is_contained(predecessors(BB), IncomingBB); 1318 } 1319 1320 AllSwitchPaths *SwitchPaths; 1321 DominatorTree *DT; 1322 AssumptionCache *AC; 1323 TargetTransformInfo *TTI; 1324 OptimizationRemarkEmitter *ORE; 1325 SmallPtrSet<const Value *, 32> EphValues; 1326 std::vector<ThreadingPath> TPaths; 1327 }; 1328 1329 bool DFAJumpThreading::run(Function &F) { 1330 LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); 1331 1332 if (F.hasOptSize()) { 1333 LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n"); 1334 return false; 1335 } 1336 1337 if (ClViewCfgBefore) 1338 F.viewCFG(); 1339 1340 SmallVector<AllSwitchPaths, 2> ThreadableLoops; 1341 bool MadeChanges = false; 1342 LoopInfoBroken = false; 1343 1344 for (BasicBlock &BB : F) { 1345 auto *SI = dyn_cast<SwitchInst>(BB.getTerminator()); 1346 if (!SI) 1347 continue; 1348 1349 LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() 1350 << " is a candidate\n"); 1351 MainSwitch Switch(SI, LI, ORE); 1352 1353 if (!Switch.getInstr()) { 1354 LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is not a " 1355 << "candidate for jump threading\n"); 1356 continue; 1357 } 1358 1359 LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a " 1360 << "candidate for jump threading\n"); 1361 LLVM_DEBUG(SI->dump()); 1362 1363 unfoldSelectInstrs(DT, Switch.getSelectInsts()); 1364 if (!Switch.getSelectInsts().empty()) 1365 MadeChanges = true; 1366 1367 AllSwitchPaths SwitchPaths(&Switch, ORE, LI, 1368 LI->getLoopFor(&BB)->getOutermostLoop()); 1369 SwitchPaths.run(); 1370 1371 if (SwitchPaths.getNumThreadingPaths() > 0) { 1372 ThreadableLoops.push_back(SwitchPaths); 1373 1374 // For the time being limit this optimization to occurring once in a 1375 // function since it can change the CFG significantly. This is not a 1376 // strict requirement but it can cause buggy behavior if there is an 1377 // overlap of blocks in different opportunities. There is a lot of room to 1378 // experiment with catching more opportunities here. 1379 // NOTE: To release this contraint, we must handle LoopInfo invalidation 1380 break; 1381 } 1382 } 1383 1384 #ifdef NDEBUG 1385 LI->verify(*DT); 1386 #endif 1387 1388 SmallPtrSet<const Value *, 32> EphValues; 1389 if (ThreadableLoops.size() > 0) 1390 CodeMetrics::collectEphemeralValues(&F, AC, EphValues); 1391 1392 for (AllSwitchPaths SwitchPaths : ThreadableLoops) { 1393 TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); 1394 Transform.run(); 1395 MadeChanges = true; 1396 LoopInfoBroken = true; 1397 } 1398 1399 #ifdef EXPENSIVE_CHECKS 1400 assert(DT->verify(DominatorTree::VerificationLevel::Full)); 1401 verifyFunction(F, &dbgs()); 1402 #endif 1403 1404 return MadeChanges; 1405 } 1406 1407 } // end anonymous namespace 1408 1409 /// Integrate with the new Pass Manager 1410 PreservedAnalyses DFAJumpThreadingPass::run(Function &F, 1411 FunctionAnalysisManager &AM) { 1412 AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); 1413 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 1414 LoopInfo &LI = AM.getResult<LoopAnalysis>(F); 1415 TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); 1416 OptimizationRemarkEmitter ORE(&F); 1417 DFAJumpThreading ThreadImpl(&AC, &DT, &LI, &TTI, &ORE); 1418 if (!ThreadImpl.run(F)) 1419 return PreservedAnalyses::all(); 1420 1421 PreservedAnalyses PA; 1422 PA.preserve<DominatorTreeAnalysis>(); 1423 if (!ThreadImpl.LoopInfoBroken) 1424 PA.preserve<LoopAnalysis>(); 1425 return PA; 1426 } 1427