1 //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Transform each threading path to effectively jump thread the DFA. For 11 // example, the CFG below could be transformed as follows, where the cloned 12 // blocks unconditionally branch to the next correct case based on what is 13 // identified in the analysis. 14 // 15 // sw.bb sw.bb 16 // / | \ / | \ 17 // case1 case2 case3 case1 case2 case3 18 // \ | / | | | 19 // determinator det.2 det.3 det.1 20 // br sw.bb / | \ 21 // sw.bb.2 sw.bb.3 sw.bb.1 22 // br case2 br case3 br case1§ 23 // 24 // Definitions and Terminology: 25 // 26 // * Threading path: 27 // a list of basic blocks, the exit state, and the block that determines 28 // the next state, for which the following notation will be used: 29 // < path of BBs that form a cycle > [ state, determinator ] 30 // 31 // * Predictable switch: 32 // The switch variable is always a known constant so that all conditional 33 // jumps based on switch variable can be converted to unconditional jump. 34 // 35 // * Determinator: 36 // The basic block that determines the next state of the DFA. 37 // 38 // Representing the optimization in C-like pseudocode: the code pattern on the 39 // left could functionally be transformed to the right pattern if the switch 40 // condition is predictable. 41 // 42 // X = A goto A 43 // for (...) A: 44 // switch (X) ... 45 // case A goto B 46 // X = B B: 47 // case B ... 48 // X = C goto C 49 // 50 // The pass first checks that switch variable X is decided by the control flow 51 // path taken in the loop; for example, in case B, the next value of X is 52 // decided to be C. It then enumerates through all paths in the loop and labels 53 // the basic blocks where the next state is decided. 54 // 55 // Using this information it creates new paths that unconditionally branch to 56 // the next case. This involves cloning code, so it only gets triggered if the 57 // amount of code duplicated is below a threshold. 58 // 59 //===----------------------------------------------------------------------===// 60 61 #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 62 #include "llvm/ADT/APInt.h" 63 #include "llvm/ADT/DenseMap.h" 64 #include "llvm/ADT/DepthFirstIterator.h" 65 #include "llvm/ADT/SmallSet.h" 66 #include "llvm/ADT/Statistic.h" 67 #include "llvm/Analysis/AssumptionCache.h" 68 #include "llvm/Analysis/CodeMetrics.h" 69 #include "llvm/Analysis/LoopIterator.h" 70 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 71 #include "llvm/Analysis/TargetTransformInfo.h" 72 #include "llvm/IR/CFG.h" 73 #include "llvm/IR/Constants.h" 74 #include "llvm/IR/IntrinsicInst.h" 75 #include "llvm/IR/Verifier.h" 76 #include "llvm/InitializePasses.h" 77 #include "llvm/Pass.h" 78 #include "llvm/Support/CommandLine.h" 79 #include "llvm/Support/Debug.h" 80 #include "llvm/Transforms/Scalar.h" 81 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 82 #include "llvm/Transforms/Utils/Cloning.h" 83 #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" 84 #include "llvm/Transforms/Utils/ValueMapper.h" 85 #include <algorithm> 86 #include <deque> 87 #include <unordered_map> 88 #include <unordered_set> 89 90 using namespace llvm; 91 92 #define DEBUG_TYPE "dfa-jump-threading" 93 94 STATISTIC(NumTransforms, "Number of transformations done"); 95 STATISTIC(NumCloned, "Number of blocks cloned"); 96 STATISTIC(NumPaths, "Number of individual paths threaded"); 97 98 static cl::opt<bool> 99 ClViewCfgBefore("dfa-jump-view-cfg-before", 100 cl::desc("View the CFG before DFA Jump Threading"), 101 cl::Hidden, cl::init(false)); 102 103 static cl::opt<unsigned> MaxPathLength( 104 "dfa-max-path-length", 105 cl::desc("Max number of blocks searched to find a threading path"), 106 cl::Hidden, cl::init(20)); 107 108 static cl::opt<unsigned> 109 CostThreshold("dfa-cost-threshold", 110 cl::desc("Maximum cost accepted for the transformation"), 111 cl::Hidden, cl::init(50)); 112 113 namespace { 114 115 class SelectInstToUnfold { 116 SelectInst *SI; 117 PHINode *SIUse; 118 119 public: 120 SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {} 121 122 SelectInst *getInst() { return SI; } 123 PHINode *getUse() { return SIUse; } 124 125 explicit operator bool() const { return SI && SIUse; } 126 }; 127 128 void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, 129 std::vector<SelectInstToUnfold> *NewSIsToUnfold, 130 std::vector<BasicBlock *> *NewBBs); 131 132 class DFAJumpThreading { 133 public: 134 DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, 135 TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) 136 : AC(AC), DT(DT), TTI(TTI), ORE(ORE) {} 137 138 bool run(Function &F); 139 140 private: 141 void 142 unfoldSelectInstrs(DominatorTree *DT, 143 const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { 144 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 145 SmallVector<SelectInstToUnfold, 4> Stack; 146 for (SelectInstToUnfold SIToUnfold : SelectInsts) 147 Stack.push_back(SIToUnfold); 148 149 while (!Stack.empty()) { 150 SelectInstToUnfold SIToUnfold = Stack.back(); 151 Stack.pop_back(); 152 153 std::vector<SelectInstToUnfold> NewSIsToUnfold; 154 std::vector<BasicBlock *> NewBBs; 155 unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs); 156 157 // Put newly discovered select instructions into the work list. 158 for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold) 159 Stack.push_back(NewSIToUnfold); 160 } 161 } 162 163 AssumptionCache *AC; 164 DominatorTree *DT; 165 TargetTransformInfo *TTI; 166 OptimizationRemarkEmitter *ORE; 167 }; 168 169 class DFAJumpThreadingLegacyPass : public FunctionPass { 170 public: 171 static char ID; // Pass identification 172 DFAJumpThreadingLegacyPass() : FunctionPass(ID) {} 173 174 void getAnalysisUsage(AnalysisUsage &AU) const override { 175 AU.addRequired<AssumptionCacheTracker>(); 176 AU.addRequired<DominatorTreeWrapperPass>(); 177 AU.addRequired<TargetTransformInfoWrapperPass>(); 178 AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 179 } 180 181 bool runOnFunction(Function &F) override { 182 if (skipFunction(F)) 183 return false; 184 185 AssumptionCache *AC = 186 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 187 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 188 TargetTransformInfo *TTI = 189 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 190 OptimizationRemarkEmitter *ORE = 191 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 192 193 return DFAJumpThreading(AC, DT, TTI, ORE).run(F); 194 } 195 }; 196 } // end anonymous namespace 197 198 char DFAJumpThreadingLegacyPass::ID = 0; 199 INITIALIZE_PASS_BEGIN(DFAJumpThreadingLegacyPass, "dfa-jump-threading", 200 "DFA Jump Threading", false, false) 201 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 202 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 203 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 204 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 205 INITIALIZE_PASS_END(DFAJumpThreadingLegacyPass, "dfa-jump-threading", 206 "DFA Jump Threading", false, false) 207 208 // Public interface to the DFA Jump Threading pass 209 FunctionPass *llvm::createDFAJumpThreadingPass() { 210 return new DFAJumpThreadingLegacyPass(); 211 } 212 213 namespace { 214 215 /// Create a new basic block and sink \p SIToSink into it. 216 void createBasicBlockAndSinkSelectInst( 217 DomTreeUpdater *DTU, SelectInst *SI, PHINode *SIUse, SelectInst *SIToSink, 218 BasicBlock *EndBlock, StringRef NewBBName, BasicBlock **NewBlock, 219 BranchInst **NewBranch, std::vector<SelectInstToUnfold> *NewSIsToUnfold, 220 std::vector<BasicBlock *> *NewBBs) { 221 assert(SIToSink->hasOneUse()); 222 assert(NewBlock); 223 assert(NewBranch); 224 *NewBlock = BasicBlock::Create(SI->getContext(), NewBBName, 225 EndBlock->getParent(), EndBlock); 226 NewBBs->push_back(*NewBlock); 227 *NewBranch = BranchInst::Create(EndBlock, *NewBlock); 228 SIToSink->moveBefore(*NewBranch); 229 NewSIsToUnfold->push_back(SelectInstToUnfold(SIToSink, SIUse)); 230 DTU->applyUpdates({{DominatorTree::Insert, *NewBlock, EndBlock}}); 231 } 232 233 /// Unfold the select instruction held in \p SIToUnfold by replacing it with 234 /// control flow. 235 /// 236 /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly 237 /// created basic blocks into \p NewBBs. 238 /// 239 /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. 240 void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, 241 std::vector<SelectInstToUnfold> *NewSIsToUnfold, 242 std::vector<BasicBlock *> *NewBBs) { 243 SelectInst *SI = SIToUnfold.getInst(); 244 PHINode *SIUse = SIToUnfold.getUse(); 245 BasicBlock *StartBlock = SI->getParent(); 246 BasicBlock *EndBlock = SIUse->getParent(); 247 BranchInst *StartBlockTerm = 248 dyn_cast<BranchInst>(StartBlock->getTerminator()); 249 250 assert(StartBlockTerm && StartBlockTerm->isUnconditional()); 251 assert(SI->hasOneUse()); 252 253 // These are the new basic blocks for the conditional branch. 254 // At least one will become an actual new basic block. 255 BasicBlock *TrueBlock = nullptr; 256 BasicBlock *FalseBlock = nullptr; 257 BranchInst *TrueBranch = nullptr; 258 BranchInst *FalseBranch = nullptr; 259 260 // Sink select instructions to be able to unfold them later. 261 if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getTrueValue())) { 262 createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock, 263 "si.unfold.true", &TrueBlock, &TrueBranch, 264 NewSIsToUnfold, NewBBs); 265 } 266 if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getFalseValue())) { 267 createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock, 268 "si.unfold.false", &FalseBlock, 269 &FalseBranch, NewSIsToUnfold, NewBBs); 270 } 271 272 // If there was nothing to sink, then arbitrarily choose the 'false' side 273 // for a new input value to the PHI. 274 if (!TrueBlock && !FalseBlock) { 275 FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false", 276 EndBlock->getParent(), EndBlock); 277 NewBBs->push_back(FalseBlock); 278 BranchInst::Create(EndBlock, FalseBlock); 279 DTU->applyUpdates({{DominatorTree::Insert, FalseBlock, EndBlock}}); 280 } 281 282 // Insert the real conditional branch based on the original condition. 283 // If we did not create a new block for one of the 'true' or 'false' paths 284 // of the condition, it means that side of the branch goes to the end block 285 // directly and the path originates from the start block from the point of 286 // view of the new PHI. 287 BasicBlock *TT = EndBlock; 288 BasicBlock *FT = EndBlock; 289 if (TrueBlock && FalseBlock) { 290 // A diamond. 291 TT = TrueBlock; 292 FT = FalseBlock; 293 294 // Update the phi node of SI. 295 SIUse->removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false); 296 SIUse->addIncoming(SI->getTrueValue(), TrueBlock); 297 SIUse->addIncoming(SI->getFalseValue(), FalseBlock); 298 299 // Update any other PHI nodes in EndBlock. 300 for (PHINode &Phi : EndBlock->phis()) { 301 if (&Phi != SIUse) { 302 Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), TrueBlock); 303 Phi.addIncoming(Phi.getIncomingValueForBlock(StartBlock), FalseBlock); 304 } 305 } 306 } else { 307 BasicBlock *NewBlock = nullptr; 308 Value *SIOp1 = SI->getTrueValue(); 309 Value *SIOp2 = SI->getFalseValue(); 310 311 // A triangle pointing right. 312 if (!TrueBlock) { 313 NewBlock = FalseBlock; 314 FT = FalseBlock; 315 } 316 // A triangle pointing left. 317 else { 318 NewBlock = TrueBlock; 319 TT = TrueBlock; 320 std::swap(SIOp1, SIOp2); 321 } 322 323 // Update the phi node of SI. 324 for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) { 325 if (SIUse->getIncomingBlock(Idx) == StartBlock) 326 SIUse->setIncomingValue(Idx, SIOp1); 327 } 328 SIUse->addIncoming(SIOp2, NewBlock); 329 330 // Update any other PHI nodes in EndBlock. 331 for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 332 ++II) { 333 if (Phi != SIUse) 334 Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock); 335 } 336 } 337 StartBlockTerm->eraseFromParent(); 338 BranchInst::Create(TT, FT, SI->getCondition(), StartBlock); 339 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT}, 340 {DominatorTree::Insert, StartBlock, FT}}); 341 342 // The select is now dead. 343 SI->eraseFromParent(); 344 } 345 346 struct ClonedBlock { 347 BasicBlock *BB; 348 uint64_t State; ///< \p State corresponds to the next value of a switch stmnt. 349 }; 350 351 typedef std::deque<BasicBlock *> PathType; 352 typedef std::vector<PathType> PathsType; 353 typedef std::set<const BasicBlock *> VisitedBlocks; 354 typedef std::vector<ClonedBlock> CloneList; 355 356 // This data structure keeps track of all blocks that have been cloned. If two 357 // different ThreadingPaths clone the same block for a certain state it should 358 // be reused, and it can be looked up in this map. 359 typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; 360 361 // This map keeps track of all the new definitions for an instruction. This 362 // information is needed when restoring SSA form after cloning blocks. 363 typedef DenseMap<Instruction *, std::vector<Instruction *>> DefMap; 364 365 inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { 366 OS << "< "; 367 for (const BasicBlock *BB : Path) { 368 std::string BBName; 369 if (BB->hasName()) 370 raw_string_ostream(BBName) << BB->getName(); 371 else 372 raw_string_ostream(BBName) << BB; 373 OS << BBName << " "; 374 } 375 OS << ">"; 376 return OS; 377 } 378 379 /// ThreadingPath is a path in the control flow of a loop that can be threaded 380 /// by cloning necessary basic blocks and replacing conditional branches with 381 /// unconditional ones. A threading path includes a list of basic blocks, the 382 /// exit state, and the block that determines the next state. 383 struct ThreadingPath { 384 /// Exit value is DFA's exit state for the given path. 385 uint64_t getExitValue() const { return ExitVal; } 386 void setExitValue(const ConstantInt *V) { 387 ExitVal = V->getZExtValue(); 388 IsExitValSet = true; 389 } 390 bool isExitValueSet() const { return IsExitValSet; } 391 392 /// Determinator is the basic block that determines the next state of the DFA. 393 const BasicBlock *getDeterminatorBB() const { return DBB; } 394 void setDeterminator(const BasicBlock *BB) { DBB = BB; } 395 396 /// Path is a list of basic blocks. 397 const PathType &getPath() const { return Path; } 398 void setPath(const PathType &NewPath) { Path = NewPath; } 399 400 void print(raw_ostream &OS) const { 401 OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]"; 402 } 403 404 private: 405 PathType Path; 406 uint64_t ExitVal; 407 const BasicBlock *DBB = nullptr; 408 bool IsExitValSet = false; 409 }; 410 411 #ifndef NDEBUG 412 inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { 413 TPath.print(OS); 414 return OS; 415 } 416 #endif 417 418 struct MainSwitch { 419 MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) { 420 if (isPredictable(SI)) { 421 Instr = SI; 422 } else { 423 ORE->emit([&]() { 424 return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI) 425 << "Switch instruction is not predictable."; 426 }); 427 } 428 } 429 430 virtual ~MainSwitch() = default; 431 432 SwitchInst *getInstr() const { return Instr; } 433 const SmallVector<SelectInstToUnfold, 4> getSelectInsts() { 434 return SelectInsts; 435 } 436 437 private: 438 /// Do a use-def chain traversal. Make sure the value of the switch variable 439 /// is always a known constant. This means that all conditional jumps based on 440 /// switch variable can be converted to unconditional jumps. 441 bool isPredictable(const SwitchInst *SI) { 442 std::deque<Instruction *> Q; 443 SmallSet<Value *, 16> SeenValues; 444 SelectInsts.clear(); 445 446 Value *FirstDef = SI->getOperand(0); 447 auto *Inst = dyn_cast<Instruction>(FirstDef); 448 449 // If this is a function argument or another non-instruction, then give up. 450 // We are interested in loop local variables. 451 if (!Inst) 452 return false; 453 454 // Require the first definition to be a PHINode 455 if (!isa<PHINode>(Inst)) 456 return false; 457 458 LLVM_DEBUG(dbgs() << "\tisPredictable() FirstDef: " << *Inst << "\n"); 459 460 Q.push_back(Inst); 461 SeenValues.insert(FirstDef); 462 463 while (!Q.empty()) { 464 Instruction *Current = Q.front(); 465 Q.pop_front(); 466 467 if (auto *Phi = dyn_cast<PHINode>(Current)) { 468 for (Value *Incoming : Phi->incoming_values()) { 469 if (!isPredictableValue(Incoming, SeenValues)) 470 return false; 471 addInstToQueue(Incoming, Q, SeenValues); 472 } 473 LLVM_DEBUG(dbgs() << "\tisPredictable() phi: " << *Phi << "\n"); 474 } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) { 475 if (!isValidSelectInst(SelI)) 476 return false; 477 if (!isPredictableValue(SelI->getTrueValue(), SeenValues) || 478 !isPredictableValue(SelI->getFalseValue(), SeenValues)) { 479 return false; 480 } 481 addInstToQueue(SelI->getTrueValue(), Q, SeenValues); 482 addInstToQueue(SelI->getFalseValue(), Q, SeenValues); 483 LLVM_DEBUG(dbgs() << "\tisPredictable() select: " << *SelI << "\n"); 484 if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back())) 485 SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); 486 } else { 487 // If it is neither a phi nor a select, then we give up. 488 return false; 489 } 490 } 491 492 return true; 493 } 494 495 bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) { 496 if (SeenValues.find(InpVal) != SeenValues.end()) 497 return true; 498 499 if (isa<ConstantInt>(InpVal)) 500 return true; 501 502 // If this is a function argument or another non-instruction, then give up. 503 if (!isa<Instruction>(InpVal)) 504 return false; 505 506 return true; 507 } 508 509 void addInstToQueue(Value *Val, std::deque<Instruction *> &Q, 510 SmallSet<Value *, 16> &SeenValues) { 511 if (SeenValues.find(Val) != SeenValues.end()) 512 return; 513 if (Instruction *I = dyn_cast<Instruction>(Val)) 514 Q.push_back(I); 515 SeenValues.insert(Val); 516 } 517 518 bool isValidSelectInst(SelectInst *SI) { 519 if (!SI->hasOneUse()) 520 return false; 521 522 Instruction *SIUse = dyn_cast<Instruction>(SI->user_back()); 523 // The use of the select inst should be either a phi or another select. 524 if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse))) 525 return false; 526 527 BasicBlock *SIBB = SI->getParent(); 528 529 // Currently, we can only expand select instructions in basic blocks with 530 // one successor. 531 BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator()); 532 if (!SITerm || !SITerm->isUnconditional()) 533 return false; 534 535 if (isa<PHINode>(SIUse) && 536 SIBB->getSingleSuccessor() != dyn_cast<Instruction>(SIUse)->getParent()) 537 return false; 538 539 // If select will not be sunk during unfolding, and it is in the same basic 540 // block as another state defining select, then cannot unfold both. 541 for (SelectInstToUnfold SIToUnfold : SelectInsts) { 542 SelectInst *PrevSI = SIToUnfold.getInst(); 543 if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI && 544 PrevSI->getParent() == SI->getParent()) 545 return false; 546 } 547 548 return true; 549 } 550 551 SwitchInst *Instr = nullptr; 552 SmallVector<SelectInstToUnfold, 4> SelectInsts; 553 }; 554 555 struct AllSwitchPaths { 556 AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE) 557 : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), 558 ORE(ORE) {} 559 560 std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; } 561 unsigned getNumThreadingPaths() { return TPaths.size(); } 562 SwitchInst *getSwitchInst() { return Switch; } 563 BasicBlock *getSwitchBlock() { return SwitchBlock; } 564 565 void run() { 566 VisitedBlocks Visited; 567 PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1); 568 StateDefMap StateDef = getStateDefMap(); 569 570 for (PathType Path : LoopPaths) { 571 ThreadingPath TPath; 572 573 const BasicBlock *PrevBB = Path.back(); 574 for (const BasicBlock *BB : Path) { 575 if (StateDef.count(BB) != 0) { 576 const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]); 577 assert(Phi && "Expected a state-defining instr to be a phi node."); 578 579 const Value *V = Phi->getIncomingValueForBlock(PrevBB); 580 if (const ConstantInt *C = dyn_cast<const ConstantInt>(V)) { 581 TPath.setExitValue(C); 582 TPath.setDeterminator(BB); 583 TPath.setPath(Path); 584 } 585 } 586 587 // Switch block is the determinator, this is the final exit value. 588 if (TPath.isExitValueSet() && BB == Path.front()) 589 break; 590 591 PrevBB = BB; 592 } 593 594 if (TPath.isExitValueSet()) 595 TPaths.push_back(TPath); 596 } 597 } 598 599 private: 600 // Value: an instruction that defines a switch state; 601 // Key: the parent basic block of that instruction. 602 typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap; 603 604 PathsType paths(BasicBlock *BB, VisitedBlocks &Visited, 605 unsigned PathDepth) const { 606 PathsType Res; 607 608 // Stop exploring paths after visiting MaxPathLength blocks 609 if (PathDepth > MaxPathLength) { 610 ORE->emit([&]() { 611 return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached", 612 Switch) 613 << "Exploration stopped after visiting MaxPathLength=" 614 << ore::NV("MaxPathLength", MaxPathLength) << " blocks."; 615 }); 616 return Res; 617 } 618 619 Visited.insert(BB); 620 621 // Some blocks have multiple edges to the same successor, and this set 622 // is used to prevent a duplicate path from being generated 623 SmallSet<BasicBlock *, 4> Successors; 624 625 for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) { 626 BasicBlock *Succ = *SI; 627 628 if (Successors.find(Succ) != Successors.end()) 629 continue; 630 Successors.insert(Succ); 631 632 // Found a cycle through the SwitchBlock 633 if (Succ == SwitchBlock) { 634 Res.push_back({BB}); 635 continue; 636 } 637 638 // We have encountered a cycle, do not get caught in it 639 if (Visited.find(Succ) != Visited.end()) 640 continue; 641 642 PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1); 643 for (PathType Path : SuccPaths) { 644 PathType NewPath(Path); 645 NewPath.push_front(BB); 646 Res.push_back(NewPath); 647 } 648 } 649 // This block could now be visited again from a different predecessor. Note 650 // that this will result in exponential runtime. Subpaths could possibly be 651 // cached but it takes a lot of memory to store them. 652 Visited.erase(BB); 653 return Res; 654 } 655 656 /// Walk the use-def chain and collect all the state-defining instructions. 657 StateDefMap getStateDefMap() const { 658 StateDefMap Res; 659 660 Value *FirstDef = Switch->getOperand(0); 661 662 assert(isa<PHINode>(FirstDef) && "After select unfolding, all state " 663 "definitions are expected to be phi " 664 "nodes."); 665 666 SmallVector<PHINode *, 8> Stack; 667 Stack.push_back(dyn_cast<PHINode>(FirstDef)); 668 SmallSet<Value *, 16> SeenValues; 669 670 while (!Stack.empty()) { 671 PHINode *CurPhi = Stack.back(); 672 Stack.pop_back(); 673 674 Res[CurPhi->getParent()] = CurPhi; 675 SeenValues.insert(CurPhi); 676 677 for (Value *Incoming : CurPhi->incoming_values()) { 678 if (Incoming == FirstDef || isa<ConstantInt>(Incoming) || 679 SeenValues.find(Incoming) != SeenValues.end()) { 680 continue; 681 } 682 683 assert(isa<PHINode>(Incoming) && "After select unfolding, all state " 684 "definitions are expected to be phi " 685 "nodes."); 686 687 Stack.push_back(cast<PHINode>(Incoming)); 688 } 689 } 690 691 return Res; 692 } 693 694 SwitchInst *Switch; 695 BasicBlock *SwitchBlock; 696 OptimizationRemarkEmitter *ORE; 697 std::vector<ThreadingPath> TPaths; 698 }; 699 700 struct TransformDFA { 701 TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, 702 AssumptionCache *AC, TargetTransformInfo *TTI, 703 OptimizationRemarkEmitter *ORE, 704 SmallPtrSet<const Value *, 32> EphValues) 705 : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), 706 EphValues(EphValues) {} 707 708 void run() { 709 if (isLegalAndProfitableToTransform()) { 710 createAllExitPaths(); 711 NumTransforms++; 712 } 713 } 714 715 private: 716 /// This function performs both a legality check and profitability check at 717 /// the same time since it is convenient to do so. It iterates through all 718 /// blocks that will be cloned, and keeps track of the duplication cost. It 719 /// also returns false if it is illegal to clone some required block. 720 bool isLegalAndProfitableToTransform() { 721 CodeMetrics Metrics; 722 SwitchInst *Switch = SwitchPaths->getSwitchInst(); 723 724 // Note that DuplicateBlockMap is not being used as intended here. It is 725 // just being used to ensure (BB, State) pairs are only counted once. 726 DuplicateBlockMap DuplicateMap; 727 728 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 729 PathType PathBBs = TPath.getPath(); 730 uint64_t NextState = TPath.getExitValue(); 731 const BasicBlock *Determinator = TPath.getDeterminatorBB(); 732 733 // Update Metrics for the Switch block, this is always cloned 734 BasicBlock *BB = SwitchPaths->getSwitchBlock(); 735 BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 736 if (!VisitedBB) { 737 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 738 DuplicateMap[BB].push_back({BB, NextState}); 739 } 740 741 // If the Switch block is the Determinator, then we can continue since 742 // this is the only block that is cloned and we already counted for it. 743 if (PathBBs.front() == Determinator) 744 continue; 745 746 // Otherwise update Metrics for all blocks that will be cloned. If any 747 // block is already cloned and would be reused, don't double count it. 748 auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator); 749 for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 750 BB = *BBIt; 751 VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 752 if (VisitedBB) 753 continue; 754 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 755 DuplicateMap[BB].push_back({BB, NextState}); 756 } 757 758 if (Metrics.notDuplicatable) { 759 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 760 << "non-duplicatable instructions.\n"); 761 ORE->emit([&]() { 762 return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst", 763 Switch) 764 << "Contains non-duplicatable instructions."; 765 }); 766 return false; 767 } 768 769 if (Metrics.convergent) { 770 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 771 << "convergent instructions.\n"); 772 ORE->emit([&]() { 773 return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 774 << "Contains convergent instructions."; 775 }); 776 return false; 777 } 778 } 779 780 unsigned DuplicationCost = 0; 781 782 unsigned JumpTableSize = 0; 783 TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr, 784 nullptr); 785 if (JumpTableSize == 0) { 786 // Factor in the number of conditional branches reduced from jump 787 // threading. Assume that lowering the switch block is implemented by 788 // using binary search, hence the LogBase2(). 789 unsigned CondBranches = 790 APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); 791 DuplicationCost = Metrics.NumInsts / CondBranches; 792 } else { 793 // Compared with jump tables, the DFA optimizer removes an indirect branch 794 // on each loop iteration, thus making branch prediction more precise. The 795 // more branch targets there are, the more likely it is for the branch 796 // predictor to make a mistake, and the more benefit there is in the DFA 797 // optimizer. Thus, the more branch targets there are, the lower is the 798 // cost of the DFA opt. 799 DuplicationCost = Metrics.NumInsts / JumpTableSize; 800 } 801 802 LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " 803 << SwitchPaths->getSwitchBlock()->getName() 804 << " is: " << DuplicationCost << "\n\n"); 805 806 if (DuplicationCost > CostThreshold) { 807 LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the " 808 << "cost threshold.\n"); 809 ORE->emit([&]() { 810 return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) 811 << "Duplication cost exceeds the cost threshold (cost=" 812 << ore::NV("Cost", DuplicationCost) 813 << ", threshold=" << ore::NV("Threshold", CostThreshold) << ")."; 814 }); 815 return false; 816 } 817 818 ORE->emit([&]() { 819 return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch) 820 << "Switch statement jump-threaded."; 821 }); 822 823 return true; 824 } 825 826 /// Transform each threading path to effectively jump thread the DFA. 827 void createAllExitPaths() { 828 DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); 829 830 // Move the switch block to the end of the path, since it will be duplicated 831 BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); 832 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 833 LLVM_DEBUG(dbgs() << TPath << "\n"); 834 PathType NewPath(TPath.getPath()); 835 NewPath.push_back(SwitchBlock); 836 TPath.setPath(NewPath); 837 } 838 839 // Transform the ThreadingPaths and keep track of the cloned values 840 DuplicateBlockMap DuplicateMap; 841 DefMap NewDefs; 842 843 SmallSet<BasicBlock *, 16> BlocksToClean; 844 for (BasicBlock *BB : successors(SwitchBlock)) 845 BlocksToClean.insert(BB); 846 847 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 848 createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); 849 NumPaths++; 850 } 851 852 // After all paths are cloned, now update the last successor of the cloned 853 // path so it skips over the switch statement 854 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) 855 updateLastSuccessor(TPath, DuplicateMap, &DTU); 856 857 // For each instruction that was cloned and used outside, update its uses 858 updateSSA(NewDefs); 859 860 // Clean PHI Nodes for the newly created blocks 861 for (BasicBlock *BB : BlocksToClean) 862 cleanPhiNodes(BB); 863 } 864 865 /// For a specific ThreadingPath \p Path, create an exit path starting from 866 /// the determinator block. 867 /// 868 /// To remember the correct destination, we have to duplicate blocks 869 /// corresponding to each state. Also update the terminating instruction of 870 /// the predecessors, and phis in the successor blocks. 871 void createExitPath(DefMap &NewDefs, ThreadingPath &Path, 872 DuplicateBlockMap &DuplicateMap, 873 SmallSet<BasicBlock *, 16> &BlocksToClean, 874 DomTreeUpdater *DTU) { 875 uint64_t NextState = Path.getExitValue(); 876 const BasicBlock *Determinator = Path.getDeterminatorBB(); 877 PathType PathBBs = Path.getPath(); 878 879 // Don't select the placeholder block in front 880 if (PathBBs.front() == Determinator) 881 PathBBs.pop_front(); 882 883 auto DetIt = std::find(PathBBs.begin(), PathBBs.end(), Determinator); 884 auto Prev = std::prev(DetIt); 885 BasicBlock *PrevBB = *Prev; 886 for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 887 BasicBlock *BB = *BBIt; 888 BlocksToClean.insert(BB); 889 890 // We already cloned BB for this NextState, now just update the branch 891 // and continue. 892 BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap); 893 if (NextBB) { 894 updatePredecessor(PrevBB, BB, NextBB, DTU); 895 PrevBB = NextBB; 896 continue; 897 } 898 899 // Clone the BB and update the successor of Prev to jump to the new block 900 BasicBlock *NewBB = cloneBlockAndUpdatePredecessor( 901 BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU); 902 DuplicateMap[BB].push_back({NewBB, NextState}); 903 BlocksToClean.insert(NewBB); 904 PrevBB = NewBB; 905 } 906 } 907 908 /// Restore SSA form after cloning blocks. 909 /// 910 /// Each cloned block creates new defs for a variable, and the uses need to be 911 /// updated to reflect this. The uses may be replaced with a cloned value, or 912 /// some derived phi instruction. Note that all uses of a value defined in the 913 /// same block were already remapped when cloning the block. 914 void updateSSA(DefMap &NewDefs) { 915 SSAUpdaterBulk SSAUpdate; 916 SmallVector<Use *, 16> UsesToRename; 917 918 for (auto KV : NewDefs) { 919 Instruction *I = KV.first; 920 BasicBlock *BB = I->getParent(); 921 std::vector<Instruction *> Cloned = KV.second; 922 923 // Scan all uses of this instruction to see if it is used outside of its 924 // block, and if so, record them in UsesToRename. 925 for (Use &U : I->uses()) { 926 Instruction *User = cast<Instruction>(U.getUser()); 927 if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 928 if (UserPN->getIncomingBlock(U) == BB) 929 continue; 930 } else if (User->getParent() == BB) { 931 continue; 932 } 933 934 UsesToRename.push_back(&U); 935 } 936 937 // If there are no uses outside the block, we're done with this 938 // instruction. 939 if (UsesToRename.empty()) 940 continue; 941 LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I 942 << "\n"); 943 944 // We found a use of I outside of BB. Rename all uses of I that are 945 // outside its block to be uses of the appropriate PHI node etc. See 946 // ValuesInBlocks with the values we know. 947 unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType()); 948 SSAUpdate.AddAvailableValue(VarNum, BB, I); 949 for (Instruction *New : Cloned) 950 SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New); 951 952 while (!UsesToRename.empty()) 953 SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val()); 954 955 LLVM_DEBUG(dbgs() << "\n"); 956 } 957 // SSAUpdater handles phi placement and renaming uses with the appropriate 958 // value. 959 SSAUpdate.RewriteAllUses(DT); 960 } 961 962 /// Clones a basic block, and adds it to the CFG. 963 /// 964 /// This function also includes updating phi nodes in the successors of the 965 /// BB, and remapping uses that were defined locally in the cloned BB. 966 BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB, 967 uint64_t NextState, 968 DuplicateBlockMap &DuplicateMap, 969 DefMap &NewDefs, 970 DomTreeUpdater *DTU) { 971 ValueToValueMapTy VMap; 972 BasicBlock *NewBB = CloneBasicBlock( 973 BB, VMap, ".jt" + std::to_string(NextState), BB->getParent()); 974 NewBB->moveAfter(BB); 975 NumCloned++; 976 977 for (Instruction &I : *NewBB) { 978 // Do not remap operands of PHINode in case a definition in BB is an 979 // incoming value to a phi in the same block. This incoming value will 980 // be renamed later while restoring SSA. 981 if (isa<PHINode>(&I)) 982 continue; 983 RemapInstruction(&I, VMap, 984 RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); 985 if (AssumeInst *II = dyn_cast<AssumeInst>(&I)) 986 AC->registerAssumption(II); 987 } 988 989 updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap); 990 updatePredecessor(PrevBB, BB, NewBB, DTU); 991 updateDefMap(NewDefs, VMap); 992 993 // Add all successors to the DominatorTree 994 SmallPtrSet<BasicBlock *, 4> SuccSet; 995 for (auto *SuccBB : successors(NewBB)) { 996 if (SuccSet.insert(SuccBB).second) 997 DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}}); 998 } 999 SuccSet.clear(); 1000 return NewBB; 1001 } 1002 1003 /// Update the phi nodes in BB's successors. 1004 /// 1005 /// This means creating a new incoming value from NewBB with the new 1006 /// instruction wherever there is an incoming value from BB. 1007 void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB, 1008 uint64_t NextState, ValueToValueMapTy &VMap, 1009 DuplicateBlockMap &DuplicateMap) { 1010 std::vector<BasicBlock *> BlocksToUpdate; 1011 1012 // If BB is the last block in the path, we can simply update the one case 1013 // successor that will be reached. 1014 if (BB == SwitchPaths->getSwitchBlock()) { 1015 SwitchInst *Switch = SwitchPaths->getSwitchInst(); 1016 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1017 BlocksToUpdate.push_back(NextCase); 1018 BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap); 1019 if (ClonedSucc) 1020 BlocksToUpdate.push_back(ClonedSucc); 1021 } 1022 // Otherwise update phis in all successors. 1023 else { 1024 for (BasicBlock *Succ : successors(BB)) { 1025 BlocksToUpdate.push_back(Succ); 1026 1027 // Check if a successor has already been cloned for the particular exit 1028 // value. In this case if a successor was already cloned, the phi nodes 1029 // in the cloned block should be updated directly. 1030 BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap); 1031 if (ClonedSucc) 1032 BlocksToUpdate.push_back(ClonedSucc); 1033 } 1034 } 1035 1036 // If there is a phi with an incoming value from BB, create a new incoming 1037 // value for the new predecessor ClonedBB. The value will either be the same 1038 // value from BB or a cloned value. 1039 for (BasicBlock *Succ : BlocksToUpdate) { 1040 for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 1041 ++II) { 1042 Value *Incoming = Phi->getIncomingValueForBlock(BB); 1043 if (Incoming) { 1044 if (isa<Constant>(Incoming)) { 1045 Phi->addIncoming(Incoming, ClonedBB); 1046 continue; 1047 } 1048 Value *ClonedVal = VMap[Incoming]; 1049 if (ClonedVal) 1050 Phi->addIncoming(ClonedVal, ClonedBB); 1051 else 1052 Phi->addIncoming(Incoming, ClonedBB); 1053 } 1054 } 1055 } 1056 } 1057 1058 /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all 1059 /// other successors are kept as well. 1060 void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB, 1061 BasicBlock *NewBB, DomTreeUpdater *DTU) { 1062 // When a path is reused, there is a chance that predecessors were already 1063 // updated before. Check if the predecessor needs to be updated first. 1064 if (!isPredecessor(OldBB, PrevBB)) 1065 return; 1066 1067 Instruction *PrevTerm = PrevBB->getTerminator(); 1068 for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) { 1069 if (PrevTerm->getSuccessor(Idx) == OldBB) { 1070 OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true); 1071 PrevTerm->setSuccessor(Idx, NewBB); 1072 } 1073 } 1074 DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB}, 1075 {DominatorTree::Insert, PrevBB, NewBB}}); 1076 } 1077 1078 /// Add new value mappings to the DefMap to keep track of all new definitions 1079 /// for a particular instruction. These will be used while updating SSA form. 1080 void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { 1081 for (auto Entry : VMap) { 1082 Instruction *Inst = 1083 dyn_cast<Instruction>(const_cast<Value *>(Entry.first)); 1084 if (!Inst || !Entry.second || isa<BranchInst>(Inst) || 1085 isa<SwitchInst>(Inst)) { 1086 continue; 1087 } 1088 1089 Instruction *Cloned = dyn_cast<Instruction>(Entry.second); 1090 if (!Cloned) 1091 continue; 1092 1093 if (NewDefs.find(Inst) == NewDefs.end()) 1094 NewDefs[Inst] = {Cloned}; 1095 else 1096 NewDefs[Inst].push_back(Cloned); 1097 } 1098 } 1099 1100 /// Update the last branch of a particular cloned path to point to the correct 1101 /// case successor. 1102 /// 1103 /// Note that this is an optional step and would have been done in later 1104 /// optimizations, but it makes the CFG significantly easier to work with. 1105 void updateLastSuccessor(ThreadingPath &TPath, 1106 DuplicateBlockMap &DuplicateMap, 1107 DomTreeUpdater *DTU) { 1108 uint64_t NextState = TPath.getExitValue(); 1109 BasicBlock *BB = TPath.getPath().back(); 1110 BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap); 1111 1112 // Note multiple paths can end at the same block so check that it is not 1113 // updated yet 1114 if (!isa<SwitchInst>(LastBlock->getTerminator())) 1115 return; 1116 SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator()); 1117 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1118 1119 std::vector<DominatorTree::UpdateType> DTUpdates; 1120 SmallPtrSet<BasicBlock *, 4> SuccSet; 1121 for (BasicBlock *Succ : successors(LastBlock)) { 1122 if (Succ != NextCase && SuccSet.insert(Succ).second) 1123 DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ}); 1124 } 1125 1126 Switch->eraseFromParent(); 1127 BranchInst::Create(NextCase, LastBlock); 1128 1129 DTU->applyUpdates(DTUpdates); 1130 } 1131 1132 /// After cloning blocks, some of the phi nodes have extra incoming values 1133 /// that are no longer used. This function removes them. 1134 void cleanPhiNodes(BasicBlock *BB) { 1135 // If BB is no longer reachable, remove any remaining phi nodes 1136 if (pred_empty(BB)) { 1137 std::vector<PHINode *> PhiToRemove; 1138 for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1139 PhiToRemove.push_back(Phi); 1140 } 1141 for (PHINode *PN : PhiToRemove) { 1142 PN->replaceAllUsesWith(UndefValue::get(PN->getType())); 1143 PN->eraseFromParent(); 1144 } 1145 return; 1146 } 1147 1148 // Remove any incoming values that come from an invalid predecessor 1149 for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1150 std::vector<BasicBlock *> BlocksToRemove; 1151 for (BasicBlock *IncomingBB : Phi->blocks()) { 1152 if (!isPredecessor(BB, IncomingBB)) 1153 BlocksToRemove.push_back(IncomingBB); 1154 } 1155 for (BasicBlock *BB : BlocksToRemove) 1156 Phi->removeIncomingValue(BB); 1157 } 1158 } 1159 1160 /// Checks if BB was already cloned for a particular next state value. If it 1161 /// was then it returns this cloned block, and otherwise null. 1162 BasicBlock *getClonedBB(BasicBlock *BB, uint64_t NextState, 1163 DuplicateBlockMap &DuplicateMap) { 1164 CloneList ClonedBBs = DuplicateMap[BB]; 1165 1166 // Find an entry in the CloneList with this NextState. If it exists then 1167 // return the corresponding BB 1168 auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) { 1169 return C.State == NextState; 1170 }); 1171 return It != ClonedBBs.end() ? (*It).BB : nullptr; 1172 } 1173 1174 /// Helper to get the successor corresponding to a particular case value for 1175 /// a switch statement. 1176 BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, uint64_t NextState) { 1177 BasicBlock *NextCase = nullptr; 1178 for (auto Case : Switch->cases()) { 1179 if (Case.getCaseValue()->getZExtValue() == NextState) { 1180 NextCase = Case.getCaseSuccessor(); 1181 break; 1182 } 1183 } 1184 if (!NextCase) 1185 NextCase = Switch->getDefaultDest(); 1186 return NextCase; 1187 } 1188 1189 /// Returns true if IncomingBB is a predecessor of BB. 1190 bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { 1191 return llvm::find(predecessors(BB), IncomingBB) != pred_end(BB); 1192 } 1193 1194 AllSwitchPaths *SwitchPaths; 1195 DominatorTree *DT; 1196 AssumptionCache *AC; 1197 TargetTransformInfo *TTI; 1198 OptimizationRemarkEmitter *ORE; 1199 SmallPtrSet<const Value *, 32> EphValues; 1200 std::vector<ThreadingPath> TPaths; 1201 }; 1202 1203 bool DFAJumpThreading::run(Function &F) { 1204 LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); 1205 1206 if (F.hasOptSize()) { 1207 LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n"); 1208 return false; 1209 } 1210 1211 if (ClViewCfgBefore) 1212 F.viewCFG(); 1213 1214 SmallVector<AllSwitchPaths, 2> ThreadableLoops; 1215 bool MadeChanges = false; 1216 1217 for (BasicBlock &BB : F) { 1218 auto *SI = dyn_cast<SwitchInst>(BB.getTerminator()); 1219 if (!SI) 1220 continue; 1221 1222 LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() 1223 << " is predictable\n"); 1224 MainSwitch Switch(SI, ORE); 1225 1226 if (!Switch.getInstr()) 1227 continue; 1228 1229 LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a " 1230 << "candidate for jump threading\n"); 1231 LLVM_DEBUG(SI->dump()); 1232 1233 unfoldSelectInstrs(DT, Switch.getSelectInsts()); 1234 if (!Switch.getSelectInsts().empty()) 1235 MadeChanges = true; 1236 1237 AllSwitchPaths SwitchPaths(&Switch, ORE); 1238 SwitchPaths.run(); 1239 1240 if (SwitchPaths.getNumThreadingPaths() > 0) { 1241 ThreadableLoops.push_back(SwitchPaths); 1242 1243 // For the time being limit this optimization to occurring once in a 1244 // function since it can change the CFG significantly. This is not a 1245 // strict requirement but it can cause buggy behavior if there is an 1246 // overlap of blocks in different opportunities. There is a lot of room to 1247 // experiment with catching more opportunities here. 1248 break; 1249 } 1250 } 1251 1252 SmallPtrSet<const Value *, 32> EphValues; 1253 if (ThreadableLoops.size() > 0) 1254 CodeMetrics::collectEphemeralValues(&F, AC, EphValues); 1255 1256 for (AllSwitchPaths SwitchPaths : ThreadableLoops) { 1257 TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); 1258 Transform.run(); 1259 MadeChanges = true; 1260 } 1261 1262 #ifdef EXPENSIVE_CHECKS 1263 assert(DT->verify(DominatorTree::VerificationLevel::Full)); 1264 verifyFunction(F, &dbgs()); 1265 #endif 1266 1267 return MadeChanges; 1268 } 1269 1270 } // end anonymous namespace 1271 1272 /// Integrate with the new Pass Manager 1273 PreservedAnalyses DFAJumpThreadingPass::run(Function &F, 1274 FunctionAnalysisManager &AM) { 1275 AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); 1276 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 1277 TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); 1278 OptimizationRemarkEmitter ORE(&F); 1279 1280 if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F)) 1281 return PreservedAnalyses::all(); 1282 1283 PreservedAnalyses PA; 1284 PA.preserve<DominatorTreeAnalysis>(); 1285 return PA; 1286 } 1287