1 //===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Transform each threading path to effectively jump thread the DFA. For 10 // example, the CFG below could be transformed as follows, where the cloned 11 // blocks unconditionally branch to the next correct case based on what is 12 // identified in the analysis. 13 // 14 // sw.bb sw.bb 15 // / | \ / | \ 16 // case1 case2 case3 case1 case2 case3 17 // \ | / | | | 18 // determinator det.2 det.3 det.1 19 // br sw.bb / | \ 20 // sw.bb.2 sw.bb.3 sw.bb.1 21 // br case2 br case3 br case1§ 22 // 23 // Definitions and Terminology: 24 // 25 // * Threading path: 26 // a list of basic blocks, the exit state, and the block that determines 27 // the next state, for which the following notation will be used: 28 // < path of BBs that form a cycle > [ state, determinator ] 29 // 30 // * Predictable switch: 31 // The switch variable is always a known constant so that all conditional 32 // jumps based on switch variable can be converted to unconditional jump. 33 // 34 // * Determinator: 35 // The basic block that determines the next state of the DFA. 36 // 37 // Representing the optimization in C-like pseudocode: the code pattern on the 38 // left could functionally be transformed to the right pattern if the switch 39 // condition is predictable. 40 // 41 // X = A goto A 42 // for (...) A: 43 // switch (X) ... 44 // case A goto B 45 // X = B B: 46 // case B ... 47 // X = C goto C 48 // 49 // The pass first checks that switch variable X is decided by the control flow 50 // path taken in the loop; for example, in case B, the next value of X is 51 // decided to be C. It then enumerates through all paths in the loop and labels 52 // the basic blocks where the next state is decided. 53 // 54 // Using this information it creates new paths that unconditionally branch to 55 // the next case. This involves cloning code, so it only gets triggered if the 56 // amount of code duplicated is below a threshold. 57 // 58 //===----------------------------------------------------------------------===// 59 60 #include "llvm/Transforms/Scalar/DFAJumpThreading.h" 61 #include "llvm/ADT/APInt.h" 62 #include "llvm/ADT/DenseMap.h" 63 #include "llvm/ADT/SmallSet.h" 64 #include "llvm/ADT/Statistic.h" 65 #include "llvm/Analysis/AssumptionCache.h" 66 #include "llvm/Analysis/CodeMetrics.h" 67 #include "llvm/Analysis/DomTreeUpdater.h" 68 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 69 #include "llvm/Analysis/TargetTransformInfo.h" 70 #include "llvm/IR/CFG.h" 71 #include "llvm/IR/Constants.h" 72 #include "llvm/IR/IntrinsicInst.h" 73 #include "llvm/Support/CommandLine.h" 74 #include "llvm/Support/Debug.h" 75 #include "llvm/Transforms/Utils/Cloning.h" 76 #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" 77 #include "llvm/Transforms/Utils/ValueMapper.h" 78 #include <algorithm> 79 #include <deque> 80 81 #ifdef EXPENSIVE_CHECKS 82 #include "llvm/IR/Verifier.h" 83 #endif 84 85 using namespace llvm; 86 87 #define DEBUG_TYPE "dfa-jump-threading" 88 89 STATISTIC(NumTransforms, "Number of transformations done"); 90 STATISTIC(NumCloned, "Number of blocks cloned"); 91 STATISTIC(NumPaths, "Number of individual paths threaded"); 92 93 static cl::opt<bool> 94 ClViewCfgBefore("dfa-jump-view-cfg-before", 95 cl::desc("View the CFG before DFA Jump Threading"), 96 cl::Hidden, cl::init(false)); 97 98 static cl::opt<unsigned> MaxPathLength( 99 "dfa-max-path-length", 100 cl::desc("Max number of blocks searched to find a threading path"), 101 cl::Hidden, cl::init(20)); 102 103 static cl::opt<unsigned> 104 MaxNumPaths("dfa-max-num-paths", 105 cl::desc("Max number of paths enumerated around a switch"), 106 cl::Hidden, cl::init(200)); 107 108 static cl::opt<unsigned> 109 CostThreshold("dfa-cost-threshold", 110 cl::desc("Maximum cost accepted for the transformation"), 111 cl::Hidden, cl::init(50)); 112 113 namespace { 114 115 class SelectInstToUnfold { 116 SelectInst *SI; 117 PHINode *SIUse; 118 119 public: 120 SelectInstToUnfold(SelectInst *SI, PHINode *SIUse) : SI(SI), SIUse(SIUse) {} 121 122 SelectInst *getInst() { return SI; } 123 PHINode *getUse() { return SIUse; } 124 125 explicit operator bool() const { return SI && SIUse; } 126 }; 127 128 void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, 129 std::vector<SelectInstToUnfold> *NewSIsToUnfold, 130 std::vector<BasicBlock *> *NewBBs); 131 132 class DFAJumpThreading { 133 public: 134 DFAJumpThreading(AssumptionCache *AC, DominatorTree *DT, 135 TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE) 136 : AC(AC), DT(DT), TTI(TTI), ORE(ORE) {} 137 138 bool run(Function &F); 139 140 private: 141 void 142 unfoldSelectInstrs(DominatorTree *DT, 143 const SmallVector<SelectInstToUnfold, 4> &SelectInsts) { 144 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 145 SmallVector<SelectInstToUnfold, 4> Stack; 146 for (SelectInstToUnfold SIToUnfold : SelectInsts) 147 Stack.push_back(SIToUnfold); 148 149 while (!Stack.empty()) { 150 SelectInstToUnfold SIToUnfold = Stack.pop_back_val(); 151 152 std::vector<SelectInstToUnfold> NewSIsToUnfold; 153 std::vector<BasicBlock *> NewBBs; 154 unfold(&DTU, SIToUnfold, &NewSIsToUnfold, &NewBBs); 155 156 // Put newly discovered select instructions into the work list. 157 for (const SelectInstToUnfold &NewSIToUnfold : NewSIsToUnfold) 158 Stack.push_back(NewSIToUnfold); 159 } 160 } 161 162 AssumptionCache *AC; 163 DominatorTree *DT; 164 TargetTransformInfo *TTI; 165 OptimizationRemarkEmitter *ORE; 166 }; 167 168 } // end anonymous namespace 169 170 namespace { 171 172 /// Create a new basic block and sink \p SIToSink into it. 173 void createBasicBlockAndSinkSelectInst( 174 DomTreeUpdater *DTU, SelectInst *SI, PHINode *SIUse, SelectInst *SIToSink, 175 BasicBlock *EndBlock, StringRef NewBBName, BasicBlock **NewBlock, 176 BranchInst **NewBranch, std::vector<SelectInstToUnfold> *NewSIsToUnfold, 177 std::vector<BasicBlock *> *NewBBs) { 178 assert(SIToSink->hasOneUse()); 179 assert(NewBlock); 180 assert(NewBranch); 181 *NewBlock = BasicBlock::Create(SI->getContext(), NewBBName, 182 EndBlock->getParent(), EndBlock); 183 NewBBs->push_back(*NewBlock); 184 *NewBranch = BranchInst::Create(EndBlock, *NewBlock); 185 SIToSink->moveBefore(*NewBranch); 186 NewSIsToUnfold->push_back(SelectInstToUnfold(SIToSink, SIUse)); 187 DTU->applyUpdates({{DominatorTree::Insert, *NewBlock, EndBlock}}); 188 } 189 190 /// Unfold the select instruction held in \p SIToUnfold by replacing it with 191 /// control flow. 192 /// 193 /// Put newly discovered select instructions into \p NewSIsToUnfold. Put newly 194 /// created basic blocks into \p NewBBs. 195 /// 196 /// TODO: merge it with CodeGenPrepare::optimizeSelectInst() if possible. 197 void unfold(DomTreeUpdater *DTU, SelectInstToUnfold SIToUnfold, 198 std::vector<SelectInstToUnfold> *NewSIsToUnfold, 199 std::vector<BasicBlock *> *NewBBs) { 200 SelectInst *SI = SIToUnfold.getInst(); 201 PHINode *SIUse = SIToUnfold.getUse(); 202 BasicBlock *StartBlock = SI->getParent(); 203 BasicBlock *EndBlock = SIUse->getParent(); 204 BranchInst *StartBlockTerm = 205 dyn_cast<BranchInst>(StartBlock->getTerminator()); 206 207 assert(StartBlockTerm && StartBlockTerm->isUnconditional()); 208 assert(SI->hasOneUse()); 209 210 // These are the new basic blocks for the conditional branch. 211 // At least one will become an actual new basic block. 212 BasicBlock *TrueBlock = nullptr; 213 BasicBlock *FalseBlock = nullptr; 214 BranchInst *TrueBranch = nullptr; 215 BranchInst *FalseBranch = nullptr; 216 217 // Sink select instructions to be able to unfold them later. 218 if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getTrueValue())) { 219 createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock, 220 "si.unfold.true", &TrueBlock, &TrueBranch, 221 NewSIsToUnfold, NewBBs); 222 } 223 if (SelectInst *SIOp = dyn_cast<SelectInst>(SI->getFalseValue())) { 224 createBasicBlockAndSinkSelectInst(DTU, SI, SIUse, SIOp, EndBlock, 225 "si.unfold.false", &FalseBlock, 226 &FalseBranch, NewSIsToUnfold, NewBBs); 227 } 228 229 // If there was nothing to sink, then arbitrarily choose the 'false' side 230 // for a new input value to the PHI. 231 if (!TrueBlock && !FalseBlock) { 232 FalseBlock = BasicBlock::Create(SI->getContext(), "si.unfold.false", 233 EndBlock->getParent(), EndBlock); 234 NewBBs->push_back(FalseBlock); 235 BranchInst::Create(EndBlock, FalseBlock); 236 DTU->applyUpdates({{DominatorTree::Insert, FalseBlock, EndBlock}}); 237 } 238 239 // Insert the real conditional branch based on the original condition. 240 // If we did not create a new block for one of the 'true' or 'false' paths 241 // of the condition, it means that side of the branch goes to the end block 242 // directly and the path originates from the start block from the point of 243 // view of the new PHI. 244 BasicBlock *TT = EndBlock; 245 BasicBlock *FT = EndBlock; 246 if (TrueBlock && FalseBlock) { 247 // A diamond. 248 TT = TrueBlock; 249 FT = FalseBlock; 250 251 // Update the phi node of SI. 252 SIUse->addIncoming(SI->getTrueValue(), TrueBlock); 253 SIUse->addIncoming(SI->getFalseValue(), FalseBlock); 254 255 // Update any other PHI nodes in EndBlock. 256 for (PHINode &Phi : EndBlock->phis()) { 257 if (&Phi != SIUse) { 258 Value *OrigValue = Phi.getIncomingValueForBlock(StartBlock); 259 Phi.addIncoming(OrigValue, TrueBlock); 260 Phi.addIncoming(OrigValue, FalseBlock); 261 } 262 263 // Remove incoming place of original StartBlock, which comes in a indirect 264 // way (through TrueBlock and FalseBlock) now. 265 Phi.removeIncomingValue(StartBlock, /* DeletePHIIfEmpty = */ false); 266 } 267 } else { 268 BasicBlock *NewBlock = nullptr; 269 Value *SIOp1 = SI->getTrueValue(); 270 Value *SIOp2 = SI->getFalseValue(); 271 272 // A triangle pointing right. 273 if (!TrueBlock) { 274 NewBlock = FalseBlock; 275 FT = FalseBlock; 276 } 277 // A triangle pointing left. 278 else { 279 NewBlock = TrueBlock; 280 TT = TrueBlock; 281 std::swap(SIOp1, SIOp2); 282 } 283 284 // Update the phi node of SI. 285 for (unsigned Idx = 0; Idx < SIUse->getNumIncomingValues(); ++Idx) { 286 if (SIUse->getIncomingBlock(Idx) == StartBlock) 287 SIUse->setIncomingValue(Idx, SIOp1); 288 } 289 SIUse->addIncoming(SIOp2, NewBlock); 290 291 // Update any other PHI nodes in EndBlock. 292 for (auto II = EndBlock->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 293 ++II) { 294 if (Phi != SIUse) 295 Phi->addIncoming(Phi->getIncomingValueForBlock(StartBlock), NewBlock); 296 } 297 } 298 StartBlockTerm->eraseFromParent(); 299 BranchInst::Create(TT, FT, SI->getCondition(), StartBlock); 300 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, TT}, 301 {DominatorTree::Insert, StartBlock, FT}}); 302 303 // The select is now dead. 304 assert(SI->use_empty() && "Select must be dead now"); 305 SI->eraseFromParent(); 306 } 307 308 struct ClonedBlock { 309 BasicBlock *BB; 310 uint64_t State; ///< \p State corresponds to the next value of a switch stmnt. 311 }; 312 313 typedef std::deque<BasicBlock *> PathType; 314 typedef std::vector<PathType> PathsType; 315 typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks; 316 typedef std::vector<ClonedBlock> CloneList; 317 318 // This data structure keeps track of all blocks that have been cloned. If two 319 // different ThreadingPaths clone the same block for a certain state it should 320 // be reused, and it can be looked up in this map. 321 typedef DenseMap<BasicBlock *, CloneList> DuplicateBlockMap; 322 323 // This map keeps track of all the new definitions for an instruction. This 324 // information is needed when restoring SSA form after cloning blocks. 325 typedef MapVector<Instruction *, std::vector<Instruction *>> DefMap; 326 327 inline raw_ostream &operator<<(raw_ostream &OS, const PathType &Path) { 328 OS << "< "; 329 for (const BasicBlock *BB : Path) { 330 std::string BBName; 331 if (BB->hasName()) 332 raw_string_ostream(BBName) << BB->getName(); 333 else 334 raw_string_ostream(BBName) << BB; 335 OS << BBName << " "; 336 } 337 OS << ">"; 338 return OS; 339 } 340 341 /// ThreadingPath is a path in the control flow of a loop that can be threaded 342 /// by cloning necessary basic blocks and replacing conditional branches with 343 /// unconditional ones. A threading path includes a list of basic blocks, the 344 /// exit state, and the block that determines the next state. 345 struct ThreadingPath { 346 /// Exit value is DFA's exit state for the given path. 347 uint64_t getExitValue() const { return ExitVal; } 348 void setExitValue(const ConstantInt *V) { 349 ExitVal = V->getZExtValue(); 350 IsExitValSet = true; 351 } 352 bool isExitValueSet() const { return IsExitValSet; } 353 354 /// Determinator is the basic block that determines the next state of the DFA. 355 const BasicBlock *getDeterminatorBB() const { return DBB; } 356 void setDeterminator(const BasicBlock *BB) { DBB = BB; } 357 358 /// Path is a list of basic blocks. 359 const PathType &getPath() const { return Path; } 360 void setPath(const PathType &NewPath) { Path = NewPath; } 361 362 void print(raw_ostream &OS) const { 363 OS << Path << " [ " << ExitVal << ", " << DBB->getName() << " ]"; 364 } 365 366 private: 367 PathType Path; 368 uint64_t ExitVal; 369 const BasicBlock *DBB = nullptr; 370 bool IsExitValSet = false; 371 }; 372 373 #ifndef NDEBUG 374 inline raw_ostream &operator<<(raw_ostream &OS, const ThreadingPath &TPath) { 375 TPath.print(OS); 376 return OS; 377 } 378 #endif 379 380 struct MainSwitch { 381 MainSwitch(SwitchInst *SI, OptimizationRemarkEmitter *ORE) { 382 if (isCandidate(SI)) { 383 Instr = SI; 384 } else { 385 ORE->emit([&]() { 386 return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", SI) 387 << "Switch instruction is not predictable."; 388 }); 389 } 390 } 391 392 virtual ~MainSwitch() = default; 393 394 SwitchInst *getInstr() const { return Instr; } 395 const SmallVector<SelectInstToUnfold, 4> getSelectInsts() { 396 return SelectInsts; 397 } 398 399 private: 400 /// Do a use-def chain traversal starting from the switch condition to see if 401 /// \p SI is a potential condidate. 402 /// 403 /// Also, collect select instructions to unfold. 404 bool isCandidate(const SwitchInst *SI) { 405 std::deque<Value *> Q; 406 SmallSet<Value *, 16> SeenValues; 407 SelectInsts.clear(); 408 409 Value *SICond = SI->getCondition(); 410 LLVM_DEBUG(dbgs() << "\tSICond: " << *SICond << "\n"); 411 if (!isa<PHINode>(SICond)) 412 return false; 413 414 addToQueue(SICond, Q, SeenValues); 415 416 while (!Q.empty()) { 417 Value *Current = Q.front(); 418 Q.pop_front(); 419 420 if (auto *Phi = dyn_cast<PHINode>(Current)) { 421 for (Value *Incoming : Phi->incoming_values()) { 422 addToQueue(Incoming, Q, SeenValues); 423 } 424 LLVM_DEBUG(dbgs() << "\tphi: " << *Phi << "\n"); 425 } else if (SelectInst *SelI = dyn_cast<SelectInst>(Current)) { 426 if (!isValidSelectInst(SelI)) 427 return false; 428 addToQueue(SelI->getTrueValue(), Q, SeenValues); 429 addToQueue(SelI->getFalseValue(), Q, SeenValues); 430 LLVM_DEBUG(dbgs() << "\tselect: " << *SelI << "\n"); 431 if (auto *SelIUse = dyn_cast<PHINode>(SelI->user_back())) 432 SelectInsts.push_back(SelectInstToUnfold(SelI, SelIUse)); 433 } else if (isa<Constant>(Current)) { 434 LLVM_DEBUG(dbgs() << "\tconst: " << *Current << "\n"); 435 continue; 436 } else { 437 LLVM_DEBUG(dbgs() << "\tother: " << *Current << "\n"); 438 // Allow unpredictable values. The hope is that those will be the 439 // initial switch values that can be ignored (they will hit the 440 // unthreaded switch) but this assumption will get checked later after 441 // paths have been enumerated (in function getStateDefMap). 442 continue; 443 } 444 } 445 446 return true; 447 } 448 449 void addToQueue(Value *Val, std::deque<Value *> &Q, 450 SmallSet<Value *, 16> &SeenValues) { 451 if (SeenValues.contains(Val)) 452 return; 453 Q.push_back(Val); 454 SeenValues.insert(Val); 455 } 456 457 bool isValidSelectInst(SelectInst *SI) { 458 if (!SI->hasOneUse()) 459 return false; 460 461 Instruction *SIUse = dyn_cast<Instruction>(SI->user_back()); 462 // The use of the select inst should be either a phi or another select. 463 if (!SIUse && !(isa<PHINode>(SIUse) || isa<SelectInst>(SIUse))) 464 return false; 465 466 BasicBlock *SIBB = SI->getParent(); 467 468 // Currently, we can only expand select instructions in basic blocks with 469 // one successor. 470 BranchInst *SITerm = dyn_cast<BranchInst>(SIBB->getTerminator()); 471 if (!SITerm || !SITerm->isUnconditional()) 472 return false; 473 474 // Only fold the select coming from directly where it is defined. 475 PHINode *PHIUser = dyn_cast<PHINode>(SIUse); 476 if (PHIUser && PHIUser->getIncomingBlock(*SI->use_begin()) != SIBB) 477 return false; 478 479 // If select will not be sunk during unfolding, and it is in the same basic 480 // block as another state defining select, then cannot unfold both. 481 for (SelectInstToUnfold SIToUnfold : SelectInsts) { 482 SelectInst *PrevSI = SIToUnfold.getInst(); 483 if (PrevSI->getTrueValue() != SI && PrevSI->getFalseValue() != SI && 484 PrevSI->getParent() == SI->getParent()) 485 return false; 486 } 487 488 return true; 489 } 490 491 SwitchInst *Instr = nullptr; 492 SmallVector<SelectInstToUnfold, 4> SelectInsts; 493 }; 494 495 struct AllSwitchPaths { 496 AllSwitchPaths(const MainSwitch *MSwitch, OptimizationRemarkEmitter *ORE) 497 : Switch(MSwitch->getInstr()), SwitchBlock(Switch->getParent()), 498 ORE(ORE) {} 499 500 std::vector<ThreadingPath> &getThreadingPaths() { return TPaths; } 501 unsigned getNumThreadingPaths() { return TPaths.size(); } 502 SwitchInst *getSwitchInst() { return Switch; } 503 BasicBlock *getSwitchBlock() { return SwitchBlock; } 504 505 void run() { 506 VisitedBlocks Visited; 507 PathsType LoopPaths = paths(SwitchBlock, Visited, /* PathDepth = */ 1); 508 StateDefMap StateDef = getStateDefMap(LoopPaths); 509 510 if (StateDef.empty()) { 511 ORE->emit([&]() { 512 return OptimizationRemarkMissed(DEBUG_TYPE, "SwitchNotPredictable", 513 Switch) 514 << "Switch instruction is not predictable."; 515 }); 516 return; 517 } 518 519 for (PathType Path : LoopPaths) { 520 ThreadingPath TPath; 521 522 const BasicBlock *PrevBB = Path.back(); 523 for (const BasicBlock *BB : Path) { 524 if (StateDef.contains(BB)) { 525 const PHINode *Phi = dyn_cast<PHINode>(StateDef[BB]); 526 assert(Phi && "Expected a state-defining instr to be a phi node."); 527 528 const Value *V = Phi->getIncomingValueForBlock(PrevBB); 529 if (const ConstantInt *C = dyn_cast<const ConstantInt>(V)) { 530 TPath.setExitValue(C); 531 TPath.setDeterminator(BB); 532 TPath.setPath(Path); 533 } 534 } 535 536 // Switch block is the determinator, this is the final exit value. 537 if (TPath.isExitValueSet() && BB == Path.front()) 538 break; 539 540 PrevBB = BB; 541 } 542 543 if (TPath.isExitValueSet() && isSupported(TPath)) 544 TPaths.push_back(TPath); 545 } 546 } 547 548 private: 549 // Value: an instruction that defines a switch state; 550 // Key: the parent basic block of that instruction. 551 typedef DenseMap<const BasicBlock *, const PHINode *> StateDefMap; 552 553 PathsType paths(BasicBlock *BB, VisitedBlocks &Visited, 554 unsigned PathDepth) const { 555 PathsType Res; 556 557 // Stop exploring paths after visiting MaxPathLength blocks 558 if (PathDepth > MaxPathLength) { 559 ORE->emit([&]() { 560 return OptimizationRemarkAnalysis(DEBUG_TYPE, "MaxPathLengthReached", 561 Switch) 562 << "Exploration stopped after visiting MaxPathLength=" 563 << ore::NV("MaxPathLength", MaxPathLength) << " blocks."; 564 }); 565 return Res; 566 } 567 568 Visited.insert(BB); 569 570 // Some blocks have multiple edges to the same successor, and this set 571 // is used to prevent a duplicate path from being generated 572 SmallSet<BasicBlock *, 4> Successors; 573 for (BasicBlock *Succ : successors(BB)) { 574 if (!Successors.insert(Succ).second) 575 continue; 576 577 // Found a cycle through the SwitchBlock 578 if (Succ == SwitchBlock) { 579 Res.push_back({BB}); 580 continue; 581 } 582 583 // We have encountered a cycle, do not get caught in it 584 if (Visited.contains(Succ)) 585 continue; 586 587 PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1); 588 for (const PathType &Path : SuccPaths) { 589 PathType NewPath(Path); 590 NewPath.push_front(BB); 591 Res.push_back(NewPath); 592 if (Res.size() >= MaxNumPaths) { 593 return Res; 594 } 595 } 596 } 597 // This block could now be visited again from a different predecessor. Note 598 // that this will result in exponential runtime. Subpaths could possibly be 599 // cached but it takes a lot of memory to store them. 600 Visited.erase(BB); 601 return Res; 602 } 603 604 /// Walk the use-def chain and collect all the state-defining instructions. 605 /// 606 /// Return an empty map if unpredictable values encountered inside the basic 607 /// blocks of \p LoopPaths. 608 StateDefMap getStateDefMap(const PathsType &LoopPaths) const { 609 StateDefMap Res; 610 611 // Basic blocks belonging to any of the loops around the switch statement. 612 SmallPtrSet<BasicBlock *, 16> LoopBBs; 613 for (const PathType &Path : LoopPaths) { 614 for (BasicBlock *BB : Path) 615 LoopBBs.insert(BB); 616 } 617 618 Value *FirstDef = Switch->getOperand(0); 619 620 assert(isa<PHINode>(FirstDef) && "The first definition must be a phi."); 621 622 SmallVector<PHINode *, 8> Stack; 623 Stack.push_back(dyn_cast<PHINode>(FirstDef)); 624 SmallSet<Value *, 16> SeenValues; 625 626 while (!Stack.empty()) { 627 PHINode *CurPhi = Stack.pop_back_val(); 628 629 Res[CurPhi->getParent()] = CurPhi; 630 SeenValues.insert(CurPhi); 631 632 for (BasicBlock *IncomingBB : CurPhi->blocks()) { 633 Value *Incoming = CurPhi->getIncomingValueForBlock(IncomingBB); 634 bool IsOutsideLoops = LoopBBs.count(IncomingBB) == 0; 635 if (Incoming == FirstDef || isa<ConstantInt>(Incoming) || 636 SeenValues.contains(Incoming) || IsOutsideLoops) { 637 continue; 638 } 639 640 // Any unpredictable value inside the loops means we must bail out. 641 if (!isa<PHINode>(Incoming)) 642 return StateDefMap(); 643 644 Stack.push_back(cast<PHINode>(Incoming)); 645 } 646 } 647 648 return Res; 649 } 650 651 /// The determinator BB should precede the switch-defining BB. 652 /// 653 /// Otherwise, it is possible that the state defined in the determinator block 654 /// defines the state for the next iteration of the loop, rather than for the 655 /// current one. 656 /// 657 /// Currently supported paths: 658 /// \code 659 /// < switch bb1 determ def > [ 42, determ ] 660 /// < switch_and_def bb1 determ > [ 42, determ ] 661 /// < switch_and_def_and_determ bb1 > [ 42, switch_and_def_and_determ ] 662 /// \endcode 663 /// 664 /// Unsupported paths: 665 /// \code 666 /// < switch bb1 def determ > [ 43, determ ] 667 /// < switch_and_determ bb1 def > [ 43, switch_and_determ ] 668 /// \endcode 669 bool isSupported(const ThreadingPath &TPath) { 670 Instruction *SwitchCondI = dyn_cast<Instruction>(Switch->getCondition()); 671 assert(SwitchCondI); 672 if (!SwitchCondI) 673 return false; 674 675 const BasicBlock *SwitchCondDefBB = SwitchCondI->getParent(); 676 const BasicBlock *SwitchCondUseBB = Switch->getParent(); 677 const BasicBlock *DeterminatorBB = TPath.getDeterminatorBB(); 678 679 assert( 680 SwitchCondUseBB == TPath.getPath().front() && 681 "The first BB in a threading path should have the switch instruction"); 682 if (SwitchCondUseBB != TPath.getPath().front()) 683 return false; 684 685 // Make DeterminatorBB the first element in Path. 686 PathType Path = TPath.getPath(); 687 auto ItDet = llvm::find(Path, DeterminatorBB); 688 std::rotate(Path.begin(), ItDet, Path.end()); 689 690 bool IsDetBBSeen = false; 691 bool IsDefBBSeen = false; 692 bool IsUseBBSeen = false; 693 for (BasicBlock *BB : Path) { 694 if (BB == DeterminatorBB) 695 IsDetBBSeen = true; 696 if (BB == SwitchCondDefBB) 697 IsDefBBSeen = true; 698 if (BB == SwitchCondUseBB) 699 IsUseBBSeen = true; 700 if (IsDetBBSeen && IsUseBBSeen && !IsDefBBSeen) 701 return false; 702 } 703 704 return true; 705 } 706 707 SwitchInst *Switch; 708 BasicBlock *SwitchBlock; 709 OptimizationRemarkEmitter *ORE; 710 std::vector<ThreadingPath> TPaths; 711 }; 712 713 struct TransformDFA { 714 TransformDFA(AllSwitchPaths *SwitchPaths, DominatorTree *DT, 715 AssumptionCache *AC, TargetTransformInfo *TTI, 716 OptimizationRemarkEmitter *ORE, 717 SmallPtrSet<const Value *, 32> EphValues) 718 : SwitchPaths(SwitchPaths), DT(DT), AC(AC), TTI(TTI), ORE(ORE), 719 EphValues(EphValues) {} 720 721 void run() { 722 if (isLegalAndProfitableToTransform()) { 723 createAllExitPaths(); 724 NumTransforms++; 725 } 726 } 727 728 private: 729 /// This function performs both a legality check and profitability check at 730 /// the same time since it is convenient to do so. It iterates through all 731 /// blocks that will be cloned, and keeps track of the duplication cost. It 732 /// also returns false if it is illegal to clone some required block. 733 bool isLegalAndProfitableToTransform() { 734 CodeMetrics Metrics; 735 SwitchInst *Switch = SwitchPaths->getSwitchInst(); 736 737 // Don't thread switch without multiple successors. 738 if (Switch->getNumSuccessors() <= 1) 739 return false; 740 741 // Note that DuplicateBlockMap is not being used as intended here. It is 742 // just being used to ensure (BB, State) pairs are only counted once. 743 DuplicateBlockMap DuplicateMap; 744 745 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 746 PathType PathBBs = TPath.getPath(); 747 uint64_t NextState = TPath.getExitValue(); 748 const BasicBlock *Determinator = TPath.getDeterminatorBB(); 749 750 // Update Metrics for the Switch block, this is always cloned 751 BasicBlock *BB = SwitchPaths->getSwitchBlock(); 752 BasicBlock *VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 753 if (!VisitedBB) { 754 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 755 DuplicateMap[BB].push_back({BB, NextState}); 756 } 757 758 // If the Switch block is the Determinator, then we can continue since 759 // this is the only block that is cloned and we already counted for it. 760 if (PathBBs.front() == Determinator) 761 continue; 762 763 // Otherwise update Metrics for all blocks that will be cloned. If any 764 // block is already cloned and would be reused, don't double count it. 765 auto DetIt = llvm::find(PathBBs, Determinator); 766 for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 767 BB = *BBIt; 768 VisitedBB = getClonedBB(BB, NextState, DuplicateMap); 769 if (VisitedBB) 770 continue; 771 Metrics.analyzeBasicBlock(BB, *TTI, EphValues); 772 DuplicateMap[BB].push_back({BB, NextState}); 773 } 774 775 if (Metrics.notDuplicatable) { 776 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 777 << "non-duplicatable instructions.\n"); 778 ORE->emit([&]() { 779 return OptimizationRemarkMissed(DEBUG_TYPE, "NonDuplicatableInst", 780 Switch) 781 << "Contains non-duplicatable instructions."; 782 }); 783 return false; 784 } 785 786 if (Metrics.convergent) { 787 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 788 << "convergent instructions.\n"); 789 ORE->emit([&]() { 790 return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 791 << "Contains convergent instructions."; 792 }); 793 return false; 794 } 795 796 if (!Metrics.NumInsts.isValid()) { 797 LLVM_DEBUG(dbgs() << "DFA Jump Threading: Not jump threading, contains " 798 << "instructions with invalid cost.\n"); 799 ORE->emit([&]() { 800 return OptimizationRemarkMissed(DEBUG_TYPE, "ConvergentInst", Switch) 801 << "Contains instructions with invalid cost."; 802 }); 803 return false; 804 } 805 } 806 807 InstructionCost DuplicationCost = 0; 808 809 unsigned JumpTableSize = 0; 810 TTI->getEstimatedNumberOfCaseClusters(*Switch, JumpTableSize, nullptr, 811 nullptr); 812 if (JumpTableSize == 0) { 813 // Factor in the number of conditional branches reduced from jump 814 // threading. Assume that lowering the switch block is implemented by 815 // using binary search, hence the LogBase2(). 816 unsigned CondBranches = 817 APInt(32, Switch->getNumSuccessors()).ceilLogBase2(); 818 assert(CondBranches > 0 && 819 "The threaded switch must have multiple branches"); 820 DuplicationCost = Metrics.NumInsts / CondBranches; 821 } else { 822 // Compared with jump tables, the DFA optimizer removes an indirect branch 823 // on each loop iteration, thus making branch prediction more precise. The 824 // more branch targets there are, the more likely it is for the branch 825 // predictor to make a mistake, and the more benefit there is in the DFA 826 // optimizer. Thus, the more branch targets there are, the lower is the 827 // cost of the DFA opt. 828 DuplicationCost = Metrics.NumInsts / JumpTableSize; 829 } 830 831 LLVM_DEBUG(dbgs() << "\nDFA Jump Threading: Cost to jump thread block " 832 << SwitchPaths->getSwitchBlock()->getName() 833 << " is: " << DuplicationCost << "\n\n"); 834 835 if (DuplicationCost > CostThreshold) { 836 LLVM_DEBUG(dbgs() << "Not jump threading, duplication cost exceeds the " 837 << "cost threshold.\n"); 838 ORE->emit([&]() { 839 return OptimizationRemarkMissed(DEBUG_TYPE, "NotProfitable", Switch) 840 << "Duplication cost exceeds the cost threshold (cost=" 841 << ore::NV("Cost", DuplicationCost) 842 << ", threshold=" << ore::NV("Threshold", CostThreshold) << ")."; 843 }); 844 return false; 845 } 846 847 ORE->emit([&]() { 848 return OptimizationRemark(DEBUG_TYPE, "JumpThreaded", Switch) 849 << "Switch statement jump-threaded."; 850 }); 851 852 return true; 853 } 854 855 /// Transform each threading path to effectively jump thread the DFA. 856 void createAllExitPaths() { 857 DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Eager); 858 859 // Move the switch block to the end of the path, since it will be duplicated 860 BasicBlock *SwitchBlock = SwitchPaths->getSwitchBlock(); 861 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 862 LLVM_DEBUG(dbgs() << TPath << "\n"); 863 PathType NewPath(TPath.getPath()); 864 NewPath.push_back(SwitchBlock); 865 TPath.setPath(NewPath); 866 } 867 868 // Transform the ThreadingPaths and keep track of the cloned values 869 DuplicateBlockMap DuplicateMap; 870 DefMap NewDefs; 871 872 SmallSet<BasicBlock *, 16> BlocksToClean; 873 for (BasicBlock *BB : successors(SwitchBlock)) 874 BlocksToClean.insert(BB); 875 876 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) { 877 createExitPath(NewDefs, TPath, DuplicateMap, BlocksToClean, &DTU); 878 NumPaths++; 879 } 880 881 // After all paths are cloned, now update the last successor of the cloned 882 // path so it skips over the switch statement 883 for (ThreadingPath &TPath : SwitchPaths->getThreadingPaths()) 884 updateLastSuccessor(TPath, DuplicateMap, &DTU); 885 886 // For each instruction that was cloned and used outside, update its uses 887 updateSSA(NewDefs); 888 889 // Clean PHI Nodes for the newly created blocks 890 for (BasicBlock *BB : BlocksToClean) 891 cleanPhiNodes(BB); 892 } 893 894 /// For a specific ThreadingPath \p Path, create an exit path starting from 895 /// the determinator block. 896 /// 897 /// To remember the correct destination, we have to duplicate blocks 898 /// corresponding to each state. Also update the terminating instruction of 899 /// the predecessors, and phis in the successor blocks. 900 void createExitPath(DefMap &NewDefs, ThreadingPath &Path, 901 DuplicateBlockMap &DuplicateMap, 902 SmallSet<BasicBlock *, 16> &BlocksToClean, 903 DomTreeUpdater *DTU) { 904 uint64_t NextState = Path.getExitValue(); 905 const BasicBlock *Determinator = Path.getDeterminatorBB(); 906 PathType PathBBs = Path.getPath(); 907 908 // Don't select the placeholder block in front 909 if (PathBBs.front() == Determinator) 910 PathBBs.pop_front(); 911 912 auto DetIt = llvm::find(PathBBs, Determinator); 913 auto Prev = std::prev(DetIt); 914 BasicBlock *PrevBB = *Prev; 915 for (auto BBIt = DetIt; BBIt != PathBBs.end(); BBIt++) { 916 BasicBlock *BB = *BBIt; 917 BlocksToClean.insert(BB); 918 919 // We already cloned BB for this NextState, now just update the branch 920 // and continue. 921 BasicBlock *NextBB = getClonedBB(BB, NextState, DuplicateMap); 922 if (NextBB) { 923 updatePredecessor(PrevBB, BB, NextBB, DTU); 924 PrevBB = NextBB; 925 continue; 926 } 927 928 // Clone the BB and update the successor of Prev to jump to the new block 929 BasicBlock *NewBB = cloneBlockAndUpdatePredecessor( 930 BB, PrevBB, NextState, DuplicateMap, NewDefs, DTU); 931 DuplicateMap[BB].push_back({NewBB, NextState}); 932 BlocksToClean.insert(NewBB); 933 PrevBB = NewBB; 934 } 935 } 936 937 /// Restore SSA form after cloning blocks. 938 /// 939 /// Each cloned block creates new defs for a variable, and the uses need to be 940 /// updated to reflect this. The uses may be replaced with a cloned value, or 941 /// some derived phi instruction. Note that all uses of a value defined in the 942 /// same block were already remapped when cloning the block. 943 void updateSSA(DefMap &NewDefs) { 944 SSAUpdaterBulk SSAUpdate; 945 SmallVector<Use *, 16> UsesToRename; 946 947 for (const auto &KV : NewDefs) { 948 Instruction *I = KV.first; 949 BasicBlock *BB = I->getParent(); 950 std::vector<Instruction *> Cloned = KV.second; 951 952 // Scan all uses of this instruction to see if it is used outside of its 953 // block, and if so, record them in UsesToRename. 954 for (Use &U : I->uses()) { 955 Instruction *User = cast<Instruction>(U.getUser()); 956 if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 957 if (UserPN->getIncomingBlock(U) == BB) 958 continue; 959 } else if (User->getParent() == BB) { 960 continue; 961 } 962 963 UsesToRename.push_back(&U); 964 } 965 966 // If there are no uses outside the block, we're done with this 967 // instruction. 968 if (UsesToRename.empty()) 969 continue; 970 LLVM_DEBUG(dbgs() << "DFA-JT: Renaming non-local uses of: " << *I 971 << "\n"); 972 973 // We found a use of I outside of BB. Rename all uses of I that are 974 // outside its block to be uses of the appropriate PHI node etc. See 975 // ValuesInBlocks with the values we know. 976 unsigned VarNum = SSAUpdate.AddVariable(I->getName(), I->getType()); 977 SSAUpdate.AddAvailableValue(VarNum, BB, I); 978 for (Instruction *New : Cloned) 979 SSAUpdate.AddAvailableValue(VarNum, New->getParent(), New); 980 981 while (!UsesToRename.empty()) 982 SSAUpdate.AddUse(VarNum, UsesToRename.pop_back_val()); 983 984 LLVM_DEBUG(dbgs() << "\n"); 985 } 986 // SSAUpdater handles phi placement and renaming uses with the appropriate 987 // value. 988 SSAUpdate.RewriteAllUses(DT); 989 } 990 991 /// Clones a basic block, and adds it to the CFG. 992 /// 993 /// This function also includes updating phi nodes in the successors of the 994 /// BB, and remapping uses that were defined locally in the cloned BB. 995 BasicBlock *cloneBlockAndUpdatePredecessor(BasicBlock *BB, BasicBlock *PrevBB, 996 uint64_t NextState, 997 DuplicateBlockMap &DuplicateMap, 998 DefMap &NewDefs, 999 DomTreeUpdater *DTU) { 1000 ValueToValueMapTy VMap; 1001 BasicBlock *NewBB = CloneBasicBlock( 1002 BB, VMap, ".jt" + std::to_string(NextState), BB->getParent()); 1003 NewBB->moveAfter(BB); 1004 NumCloned++; 1005 1006 for (Instruction &I : *NewBB) { 1007 // Do not remap operands of PHINode in case a definition in BB is an 1008 // incoming value to a phi in the same block. This incoming value will 1009 // be renamed later while restoring SSA. 1010 if (isa<PHINode>(&I)) 1011 continue; 1012 RemapInstruction(&I, VMap, 1013 RF_IgnoreMissingLocals | RF_NoModuleLevelChanges); 1014 if (AssumeInst *II = dyn_cast<AssumeInst>(&I)) 1015 AC->registerAssumption(II); 1016 } 1017 1018 updateSuccessorPhis(BB, NewBB, NextState, VMap, DuplicateMap); 1019 updatePredecessor(PrevBB, BB, NewBB, DTU); 1020 updateDefMap(NewDefs, VMap); 1021 1022 // Add all successors to the DominatorTree 1023 SmallPtrSet<BasicBlock *, 4> SuccSet; 1024 for (auto *SuccBB : successors(NewBB)) { 1025 if (SuccSet.insert(SuccBB).second) 1026 DTU->applyUpdates({{DominatorTree::Insert, NewBB, SuccBB}}); 1027 } 1028 SuccSet.clear(); 1029 return NewBB; 1030 } 1031 1032 /// Update the phi nodes in BB's successors. 1033 /// 1034 /// This means creating a new incoming value from NewBB with the new 1035 /// instruction wherever there is an incoming value from BB. 1036 void updateSuccessorPhis(BasicBlock *BB, BasicBlock *ClonedBB, 1037 uint64_t NextState, ValueToValueMapTy &VMap, 1038 DuplicateBlockMap &DuplicateMap) { 1039 std::vector<BasicBlock *> BlocksToUpdate; 1040 1041 // If BB is the last block in the path, we can simply update the one case 1042 // successor that will be reached. 1043 if (BB == SwitchPaths->getSwitchBlock()) { 1044 SwitchInst *Switch = SwitchPaths->getSwitchInst(); 1045 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1046 BlocksToUpdate.push_back(NextCase); 1047 BasicBlock *ClonedSucc = getClonedBB(NextCase, NextState, DuplicateMap); 1048 if (ClonedSucc) 1049 BlocksToUpdate.push_back(ClonedSucc); 1050 } 1051 // Otherwise update phis in all successors. 1052 else { 1053 for (BasicBlock *Succ : successors(BB)) { 1054 BlocksToUpdate.push_back(Succ); 1055 1056 // Check if a successor has already been cloned for the particular exit 1057 // value. In this case if a successor was already cloned, the phi nodes 1058 // in the cloned block should be updated directly. 1059 BasicBlock *ClonedSucc = getClonedBB(Succ, NextState, DuplicateMap); 1060 if (ClonedSucc) 1061 BlocksToUpdate.push_back(ClonedSucc); 1062 } 1063 } 1064 1065 // If there is a phi with an incoming value from BB, create a new incoming 1066 // value for the new predecessor ClonedBB. The value will either be the same 1067 // value from BB or a cloned value. 1068 for (BasicBlock *Succ : BlocksToUpdate) { 1069 for (auto II = Succ->begin(); PHINode *Phi = dyn_cast<PHINode>(II); 1070 ++II) { 1071 Value *Incoming = Phi->getIncomingValueForBlock(BB); 1072 if (Incoming) { 1073 if (isa<Constant>(Incoming)) { 1074 Phi->addIncoming(Incoming, ClonedBB); 1075 continue; 1076 } 1077 Value *ClonedVal = VMap[Incoming]; 1078 if (ClonedVal) 1079 Phi->addIncoming(ClonedVal, ClonedBB); 1080 else 1081 Phi->addIncoming(Incoming, ClonedBB); 1082 } 1083 } 1084 } 1085 } 1086 1087 /// Sets the successor of PrevBB to be NewBB instead of OldBB. Note that all 1088 /// other successors are kept as well. 1089 void updatePredecessor(BasicBlock *PrevBB, BasicBlock *OldBB, 1090 BasicBlock *NewBB, DomTreeUpdater *DTU) { 1091 // When a path is reused, there is a chance that predecessors were already 1092 // updated before. Check if the predecessor needs to be updated first. 1093 if (!isPredecessor(OldBB, PrevBB)) 1094 return; 1095 1096 Instruction *PrevTerm = PrevBB->getTerminator(); 1097 for (unsigned Idx = 0; Idx < PrevTerm->getNumSuccessors(); Idx++) { 1098 if (PrevTerm->getSuccessor(Idx) == OldBB) { 1099 OldBB->removePredecessor(PrevBB, /* KeepOneInputPHIs = */ true); 1100 PrevTerm->setSuccessor(Idx, NewBB); 1101 } 1102 } 1103 DTU->applyUpdates({{DominatorTree::Delete, PrevBB, OldBB}, 1104 {DominatorTree::Insert, PrevBB, NewBB}}); 1105 } 1106 1107 /// Add new value mappings to the DefMap to keep track of all new definitions 1108 /// for a particular instruction. These will be used while updating SSA form. 1109 void updateDefMap(DefMap &NewDefs, ValueToValueMapTy &VMap) { 1110 SmallVector<std::pair<Instruction *, Instruction *>> NewDefsVector; 1111 NewDefsVector.reserve(VMap.size()); 1112 1113 for (auto Entry : VMap) { 1114 Instruction *Inst = 1115 dyn_cast<Instruction>(const_cast<Value *>(Entry.first)); 1116 if (!Inst || !Entry.second || isa<BranchInst>(Inst) || 1117 isa<SwitchInst>(Inst)) { 1118 continue; 1119 } 1120 1121 Instruction *Cloned = dyn_cast<Instruction>(Entry.second); 1122 if (!Cloned) 1123 continue; 1124 1125 NewDefsVector.push_back({Inst, Cloned}); 1126 } 1127 1128 // Sort the defs to get deterministic insertion order into NewDefs. 1129 sort(NewDefsVector, [](const auto &LHS, const auto &RHS) { 1130 if (LHS.first == RHS.first) 1131 return LHS.second->comesBefore(RHS.second); 1132 return LHS.first->comesBefore(RHS.first); 1133 }); 1134 1135 for (const auto &KV : NewDefsVector) 1136 NewDefs[KV.first].push_back(KV.second); 1137 } 1138 1139 /// Update the last branch of a particular cloned path to point to the correct 1140 /// case successor. 1141 /// 1142 /// Note that this is an optional step and would have been done in later 1143 /// optimizations, but it makes the CFG significantly easier to work with. 1144 void updateLastSuccessor(ThreadingPath &TPath, 1145 DuplicateBlockMap &DuplicateMap, 1146 DomTreeUpdater *DTU) { 1147 uint64_t NextState = TPath.getExitValue(); 1148 BasicBlock *BB = TPath.getPath().back(); 1149 BasicBlock *LastBlock = getClonedBB(BB, NextState, DuplicateMap); 1150 1151 // Note multiple paths can end at the same block so check that it is not 1152 // updated yet 1153 if (!isa<SwitchInst>(LastBlock->getTerminator())) 1154 return; 1155 SwitchInst *Switch = cast<SwitchInst>(LastBlock->getTerminator()); 1156 BasicBlock *NextCase = getNextCaseSuccessor(Switch, NextState); 1157 1158 std::vector<DominatorTree::UpdateType> DTUpdates; 1159 SmallPtrSet<BasicBlock *, 4> SuccSet; 1160 for (BasicBlock *Succ : successors(LastBlock)) { 1161 if (Succ != NextCase && SuccSet.insert(Succ).second) 1162 DTUpdates.push_back({DominatorTree::Delete, LastBlock, Succ}); 1163 } 1164 1165 Switch->eraseFromParent(); 1166 BranchInst::Create(NextCase, LastBlock); 1167 1168 DTU->applyUpdates(DTUpdates); 1169 } 1170 1171 /// After cloning blocks, some of the phi nodes have extra incoming values 1172 /// that are no longer used. This function removes them. 1173 void cleanPhiNodes(BasicBlock *BB) { 1174 // If BB is no longer reachable, remove any remaining phi nodes 1175 if (pred_empty(BB)) { 1176 std::vector<PHINode *> PhiToRemove; 1177 for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1178 PhiToRemove.push_back(Phi); 1179 } 1180 for (PHINode *PN : PhiToRemove) { 1181 PN->replaceAllUsesWith(PoisonValue::get(PN->getType())); 1182 PN->eraseFromParent(); 1183 } 1184 return; 1185 } 1186 1187 // Remove any incoming values that come from an invalid predecessor 1188 for (auto II = BB->begin(); PHINode *Phi = dyn_cast<PHINode>(II); ++II) { 1189 std::vector<BasicBlock *> BlocksToRemove; 1190 for (BasicBlock *IncomingBB : Phi->blocks()) { 1191 if (!isPredecessor(BB, IncomingBB)) 1192 BlocksToRemove.push_back(IncomingBB); 1193 } 1194 for (BasicBlock *BB : BlocksToRemove) 1195 Phi->removeIncomingValue(BB); 1196 } 1197 } 1198 1199 /// Checks if BB was already cloned for a particular next state value. If it 1200 /// was then it returns this cloned block, and otherwise null. 1201 BasicBlock *getClonedBB(BasicBlock *BB, uint64_t NextState, 1202 DuplicateBlockMap &DuplicateMap) { 1203 CloneList ClonedBBs = DuplicateMap[BB]; 1204 1205 // Find an entry in the CloneList with this NextState. If it exists then 1206 // return the corresponding BB 1207 auto It = llvm::find_if(ClonedBBs, [NextState](const ClonedBlock &C) { 1208 return C.State == NextState; 1209 }); 1210 return It != ClonedBBs.end() ? (*It).BB : nullptr; 1211 } 1212 1213 /// Helper to get the successor corresponding to a particular case value for 1214 /// a switch statement. 1215 BasicBlock *getNextCaseSuccessor(SwitchInst *Switch, uint64_t NextState) { 1216 BasicBlock *NextCase = nullptr; 1217 for (auto Case : Switch->cases()) { 1218 if (Case.getCaseValue()->getZExtValue() == NextState) { 1219 NextCase = Case.getCaseSuccessor(); 1220 break; 1221 } 1222 } 1223 if (!NextCase) 1224 NextCase = Switch->getDefaultDest(); 1225 return NextCase; 1226 } 1227 1228 /// Returns true if IncomingBB is a predecessor of BB. 1229 bool isPredecessor(BasicBlock *BB, BasicBlock *IncomingBB) { 1230 return llvm::is_contained(predecessors(BB), IncomingBB); 1231 } 1232 1233 AllSwitchPaths *SwitchPaths; 1234 DominatorTree *DT; 1235 AssumptionCache *AC; 1236 TargetTransformInfo *TTI; 1237 OptimizationRemarkEmitter *ORE; 1238 SmallPtrSet<const Value *, 32> EphValues; 1239 std::vector<ThreadingPath> TPaths; 1240 }; 1241 1242 bool DFAJumpThreading::run(Function &F) { 1243 LLVM_DEBUG(dbgs() << "\nDFA Jump threading: " << F.getName() << "\n"); 1244 1245 if (F.hasOptSize()) { 1246 LLVM_DEBUG(dbgs() << "Skipping due to the 'minsize' attribute\n"); 1247 return false; 1248 } 1249 1250 if (ClViewCfgBefore) 1251 F.viewCFG(); 1252 1253 SmallVector<AllSwitchPaths, 2> ThreadableLoops; 1254 bool MadeChanges = false; 1255 1256 for (BasicBlock &BB : F) { 1257 auto *SI = dyn_cast<SwitchInst>(BB.getTerminator()); 1258 if (!SI) 1259 continue; 1260 1261 LLVM_DEBUG(dbgs() << "\nCheck if SwitchInst in BB " << BB.getName() 1262 << " is a candidate\n"); 1263 MainSwitch Switch(SI, ORE); 1264 1265 if (!Switch.getInstr()) 1266 continue; 1267 1268 LLVM_DEBUG(dbgs() << "\nSwitchInst in BB " << BB.getName() << " is a " 1269 << "candidate for jump threading\n"); 1270 LLVM_DEBUG(SI->dump()); 1271 1272 unfoldSelectInstrs(DT, Switch.getSelectInsts()); 1273 if (!Switch.getSelectInsts().empty()) 1274 MadeChanges = true; 1275 1276 AllSwitchPaths SwitchPaths(&Switch, ORE); 1277 SwitchPaths.run(); 1278 1279 if (SwitchPaths.getNumThreadingPaths() > 0) { 1280 ThreadableLoops.push_back(SwitchPaths); 1281 1282 // For the time being limit this optimization to occurring once in a 1283 // function since it can change the CFG significantly. This is not a 1284 // strict requirement but it can cause buggy behavior if there is an 1285 // overlap of blocks in different opportunities. There is a lot of room to 1286 // experiment with catching more opportunities here. 1287 break; 1288 } 1289 } 1290 1291 SmallPtrSet<const Value *, 32> EphValues; 1292 if (ThreadableLoops.size() > 0) 1293 CodeMetrics::collectEphemeralValues(&F, AC, EphValues); 1294 1295 for (AllSwitchPaths SwitchPaths : ThreadableLoops) { 1296 TransformDFA Transform(&SwitchPaths, DT, AC, TTI, ORE, EphValues); 1297 Transform.run(); 1298 MadeChanges = true; 1299 } 1300 1301 #ifdef EXPENSIVE_CHECKS 1302 assert(DT->verify(DominatorTree::VerificationLevel::Full)); 1303 verifyFunction(F, &dbgs()); 1304 #endif 1305 1306 return MadeChanges; 1307 } 1308 1309 } // end anonymous namespace 1310 1311 /// Integrate with the new Pass Manager 1312 PreservedAnalyses DFAJumpThreadingPass::run(Function &F, 1313 FunctionAnalysisManager &AM) { 1314 AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F); 1315 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 1316 TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F); 1317 OptimizationRemarkEmitter ORE(&F); 1318 1319 if (!DFAJumpThreading(&AC, &DT, &TTI, &ORE).run(F)) 1320 return PreservedAnalyses::all(); 1321 1322 PreservedAnalyses PA; 1323 PA.preserve<DominatorTreeAnalysis>(); 1324 return PA; 1325 } 1326