1 //===- StructurizeCFG.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Scalar/StructurizeCFG.h" 10 #include "llvm/ADT/DenseMap.h" 11 #include "llvm/ADT/MapVector.h" 12 #include "llvm/ADT/SCCIterator.h" 13 #include "llvm/ADT/STLExtras.h" 14 #include "llvm/ADT/SmallPtrSet.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/Analysis/InstructionSimplify.h" 17 #include "llvm/Analysis/LegacyDivergenceAnalysis.h" 18 #include "llvm/Analysis/RegionInfo.h" 19 #include "llvm/Analysis/RegionIterator.h" 20 #include "llvm/Analysis/RegionPass.h" 21 #include "llvm/IR/Argument.h" 22 #include "llvm/IR/BasicBlock.h" 23 #include "llvm/IR/CFG.h" 24 #include "llvm/IR/Constant.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/InstrTypes.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Metadata.h" 32 #include "llvm/IR/PassManager.h" 33 #include "llvm/IR/PatternMatch.h" 34 #include "llvm/IR/Type.h" 35 #include "llvm/IR/Use.h" 36 #include "llvm/IR/User.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/IR/ValueHandle.h" 39 #include "llvm/InitializePasses.h" 40 #include "llvm/Pass.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/raw_ostream.h" 46 #include "llvm/Transforms/Scalar.h" 47 #include "llvm/Transforms/Utils.h" 48 #include "llvm/Transforms/Utils/Local.h" 49 #include "llvm/Transforms/Utils/SSAUpdater.h" 50 #include <algorithm> 51 #include <cassert> 52 #include <utility> 53 54 using namespace llvm; 55 using namespace llvm::PatternMatch; 56 57 #define DEBUG_TYPE "structurizecfg" 58 59 // The name for newly created blocks. 60 const char FlowBlockName[] = "Flow"; 61 62 namespace { 63 64 static cl::opt<bool> ForceSkipUniformRegions( 65 "structurizecfg-skip-uniform-regions", 66 cl::Hidden, 67 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"), 68 cl::init(false)); 69 70 static cl::opt<bool> 71 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, 72 cl::desc("Allow relaxed uniform region checks"), 73 cl::init(true)); 74 75 // Definition of the complex types used in this pass. 76 77 using BBValuePair = std::pair<BasicBlock *, Value *>; 78 79 using RNVector = SmallVector<RegionNode *, 8>; 80 using BBVector = SmallVector<BasicBlock *, 8>; 81 using BranchVector = SmallVector<BranchInst *, 8>; 82 using BBValueVector = SmallVector<BBValuePair, 2>; 83 84 using BBSet = SmallPtrSet<BasicBlock *, 8>; 85 86 using PhiMap = MapVector<PHINode *, BBValueVector>; 87 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>; 88 89 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>; 90 using BBPredicates = DenseMap<BasicBlock *, Value *>; 91 using PredMap = DenseMap<BasicBlock *, BBPredicates>; 92 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; 93 94 // A traits type that is intended to be used in graph algorithms. The graph 95 // traits starts at an entry node, and traverses the RegionNodes that are in 96 // the Nodes set. 97 struct SubGraphTraits { 98 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>; 99 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType; 100 101 // This wraps a set of Nodes into the iterator, so we know which edges to 102 // filter out. 103 class WrappedSuccIterator 104 : public iterator_adaptor_base< 105 WrappedSuccIterator, BaseSuccIterator, 106 typename std::iterator_traits<BaseSuccIterator>::iterator_category, 107 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { 108 SmallDenseSet<RegionNode *> *Nodes; 109 110 public: 111 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes) 112 : iterator_adaptor_base(It), Nodes(Nodes) {} 113 114 NodeRef operator*() const { return {*I, Nodes}; } 115 }; 116 117 static bool filterAll(const NodeRef &N) { return true; } 118 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); } 119 120 using ChildIteratorType = 121 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>; 122 123 static NodeRef getEntryNode(Region *R) { 124 return {GraphTraits<Region *>::getEntryNode(R), nullptr}; 125 } 126 127 static NodeRef getEntryNode(NodeRef N) { return N; } 128 129 static iterator_range<ChildIteratorType> children(const NodeRef &N) { 130 auto *filter = N.second ? &filterSet : &filterAll; 131 return make_filter_range( 132 make_range<WrappedSuccIterator>( 133 {GraphTraits<RegionNode *>::child_begin(N.first), N.second}, 134 {GraphTraits<RegionNode *>::child_end(N.first), N.second}), 135 filter); 136 } 137 138 static ChildIteratorType child_begin(const NodeRef &N) { 139 return children(N).begin(); 140 } 141 142 static ChildIteratorType child_end(const NodeRef &N) { 143 return children(N).end(); 144 } 145 }; 146 147 /// Finds the nearest common dominator of a set of BasicBlocks. 148 /// 149 /// For every BB you add to the set, you can specify whether we "remember" the 150 /// block. When you get the common dominator, you can also ask whether it's one 151 /// of the blocks we remembered. 152 class NearestCommonDominator { 153 DominatorTree *DT; 154 BasicBlock *Result = nullptr; 155 bool ResultIsRemembered = false; 156 157 /// Add BB to the resulting dominator. 158 void addBlock(BasicBlock *BB, bool Remember) { 159 if (!Result) { 160 Result = BB; 161 ResultIsRemembered = Remember; 162 return; 163 } 164 165 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB); 166 if (NewResult != Result) 167 ResultIsRemembered = false; 168 if (NewResult == BB) 169 ResultIsRemembered |= Remember; 170 Result = NewResult; 171 } 172 173 public: 174 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {} 175 176 void addBlock(BasicBlock *BB) { 177 addBlock(BB, /* Remember = */ false); 178 } 179 180 void addAndRememberBlock(BasicBlock *BB) { 181 addBlock(BB, /* Remember = */ true); 182 } 183 184 /// Get the nearest common dominator of all the BBs added via addBlock() and 185 /// addAndRememberBlock(). 186 BasicBlock *result() { return Result; } 187 188 /// Is the BB returned by getResult() one of the blocks we added to the set 189 /// with addAndRememberBlock()? 190 bool resultIsRememberedBlock() { return ResultIsRemembered; } 191 }; 192 193 /// Transforms the control flow graph on one single entry/exit region 194 /// at a time. 195 /// 196 /// After the transform all "If"/"Then"/"Else" style control flow looks like 197 /// this: 198 /// 199 /// \verbatim 200 /// 1 201 /// || 202 /// | | 203 /// 2 | 204 /// | / 205 /// |/ 206 /// 3 207 /// || Where: 208 /// | | 1 = "If" block, calculates the condition 209 /// 4 | 2 = "Then" subregion, runs if the condition is true 210 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 211 /// |/ 4 = "Else" optional subregion, runs if the condition is false 212 /// 5 5 = "End" block, also rejoins the control flow 213 /// \endverbatim 214 /// 215 /// Control flow is expressed as a branch where the true exit goes into the 216 /// "Then"/"Else" region, while the false exit skips the region 217 /// The condition for the optional "Else" region is expressed as a PHI node. 218 /// The incoming values of the PHI node are true for the "If" edge and false 219 /// for the "Then" edge. 220 /// 221 /// Additionally to that even complicated loops look like this: 222 /// 223 /// \verbatim 224 /// 1 225 /// || 226 /// | | 227 /// 2 ^ Where: 228 /// | / 1 = "Entry" block 229 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 230 /// 3 3 = "Flow" block, with back edge to entry block 231 /// | 232 /// \endverbatim 233 /// 234 /// The back edge of the "Flow" block is always on the false side of the branch 235 /// while the true side continues the general flow. So the loop condition 236 /// consist of a network of PHI nodes where the true incoming values expresses 237 /// breaks and the false values expresses continue states. 238 239 class StructurizeCFG { 240 Type *Boolean; 241 ConstantInt *BoolTrue; 242 ConstantInt *BoolFalse; 243 UndefValue *BoolUndef; 244 245 Function *Func; 246 Region *ParentRegion; 247 248 LegacyDivergenceAnalysis *DA = nullptr; 249 DominatorTree *DT; 250 251 SmallVector<RegionNode *, 8> Order; 252 BBSet Visited; 253 254 SmallVector<WeakVH, 8> AffectedPhis; 255 BBPhiMap DeletedPhis; 256 BB2BBVecMap AddedPhis; 257 258 PredMap Predicates; 259 BranchVector Conditions; 260 261 BB2BBMap Loops; 262 PredMap LoopPreds; 263 BranchVector LoopConds; 264 265 RegionNode *PrevNode; 266 267 void orderNodes(); 268 269 void analyzeLoops(RegionNode *N); 270 271 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 272 273 void gatherPredicates(RegionNode *N); 274 275 void collectInfos(); 276 277 void insertConditions(bool Loops); 278 279 void simplifyConditions(); 280 281 void delPhiValues(BasicBlock *From, BasicBlock *To); 282 283 void addPhiValues(BasicBlock *From, BasicBlock *To); 284 285 void setPhiValues(); 286 287 void simplifyAffectedPhis(); 288 289 void killTerminator(BasicBlock *BB); 290 291 void changeExit(RegionNode *Node, BasicBlock *NewExit, 292 bool IncludeDominator); 293 294 BasicBlock *getNextFlow(BasicBlock *Dominator); 295 296 BasicBlock *needPrefix(bool NeedEmpty); 297 298 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 299 300 void setPrevNode(BasicBlock *BB); 301 302 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 303 304 bool isPredictableTrue(RegionNode *Node); 305 306 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 307 308 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 309 310 void createFlow(); 311 312 void rebuildSSA(); 313 314 public: 315 void init(Region *R); 316 bool run(Region *R, DominatorTree *DT); 317 bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA); 318 }; 319 320 class StructurizeCFGLegacyPass : public RegionPass { 321 bool SkipUniformRegions; 322 323 public: 324 static char ID; 325 326 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false) 327 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) { 328 if (ForceSkipUniformRegions.getNumOccurrences()) 329 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 330 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry()); 331 } 332 333 bool runOnRegion(Region *R, RGPassManager &RGM) override { 334 StructurizeCFG SCFG; 335 SCFG.init(R); 336 if (SkipUniformRegions) { 337 LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>(); 338 if (SCFG.makeUniformRegion(R, DA)) 339 return false; 340 } 341 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 342 return SCFG.run(R, DT); 343 } 344 345 StringRef getPassName() const override { return "Structurize control flow"; } 346 347 void getAnalysisUsage(AnalysisUsage &AU) const override { 348 if (SkipUniformRegions) 349 AU.addRequired<LegacyDivergenceAnalysis>(); 350 AU.addRequiredID(LowerSwitchID); 351 AU.addRequired<DominatorTreeWrapperPass>(); 352 353 AU.addPreserved<DominatorTreeWrapperPass>(); 354 RegionPass::getAnalysisUsage(AU); 355 } 356 }; 357 358 } // end anonymous namespace 359 360 char StructurizeCFGLegacyPass::ID = 0; 361 362 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", 363 "Structurize the CFG", false, false) 364 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 365 INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) 366 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 367 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 368 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", 369 "Structurize the CFG", false, false) 370 371 /// Build up the general order of nodes, by performing a topological sort of the 372 /// parent region's nodes, while ensuring that there is no outer cycle node 373 /// between any two inner cycle nodes. 374 void StructurizeCFG::orderNodes() { 375 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion), 376 GraphTraits<Region *>::nodes_end(ParentRegion))); 377 if (Order.empty()) 378 return; 379 380 SmallDenseSet<RegionNode *> Nodes; 381 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion); 382 383 // A list of range indices of SCCs in Order, to be processed. 384 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList; 385 unsigned I = 0, E = Order.size(); 386 while (true) { 387 // Run through all the SCCs in the subgraph starting with Entry. 388 for (auto SCCI = 389 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin( 390 EntryNode); 391 !SCCI.isAtEnd(); ++SCCI) { 392 auto &SCC = *SCCI; 393 394 // An SCC up to the size of 2, can be reduced to an entry (the last node), 395 // and a possible additional node. Therefore, it is already in order, and 396 // there is no need to add it to the work-list. 397 unsigned Size = SCC.size(); 398 if (Size > 2) 399 WorkList.emplace_back(I, I + Size); 400 401 // Add the SCC nodes to the Order array. 402 for (auto &N : SCC) { 403 assert(I < E && "SCC size mismatch!"); 404 Order[I++] = N.first; 405 } 406 } 407 assert(I == E && "SCC size mismatch!"); 408 409 // If there are no more SCCs to order, then we are done. 410 if (WorkList.empty()) 411 break; 412 413 std::tie(I, E) = WorkList.pop_back_val(); 414 415 // Collect the set of nodes in the SCC's subgraph. These are only the 416 // possible child nodes; we do not add the entry (last node) otherwise we 417 // will have the same exact SCC all over again. 418 Nodes.clear(); 419 Nodes.insert(Order.begin() + I, Order.begin() + E - 1); 420 421 // Update the entry node. 422 EntryNode.first = Order[E - 1]; 423 EntryNode.second = &Nodes; 424 } 425 } 426 427 /// Determine the end of the loops 428 void StructurizeCFG::analyzeLoops(RegionNode *N) { 429 if (N->isSubRegion()) { 430 // Test for exit as back edge 431 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 432 if (Visited.count(Exit)) 433 Loops[Exit] = N->getEntry(); 434 435 } else { 436 // Test for successors as back edge 437 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 438 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 439 440 for (BasicBlock *Succ : Term->successors()) 441 if (Visited.count(Succ)) 442 Loops[Succ] = BB; 443 } 444 } 445 446 /// Build the condition for one edge 447 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 448 bool Invert) { 449 Value *Cond = Invert ? BoolFalse : BoolTrue; 450 if (Term->isConditional()) { 451 Cond = Term->getCondition(); 452 453 if (Idx != (unsigned)Invert) 454 Cond = invertCondition(Cond); 455 } 456 return Cond; 457 } 458 459 /// Analyze the predecessors of each block and build up predicates 460 void StructurizeCFG::gatherPredicates(RegionNode *N) { 461 RegionInfo *RI = ParentRegion->getRegionInfo(); 462 BasicBlock *BB = N->getEntry(); 463 BBPredicates &Pred = Predicates[BB]; 464 BBPredicates &LPred = LoopPreds[BB]; 465 466 for (BasicBlock *P : predecessors(BB)) { 467 // Ignore it if it's a branch from outside into our region entry 468 if (!ParentRegion->contains(P)) 469 continue; 470 471 Region *R = RI->getRegionFor(P); 472 if (R == ParentRegion) { 473 // It's a top level block in our region 474 BranchInst *Term = cast<BranchInst>(P->getTerminator()); 475 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 476 BasicBlock *Succ = Term->getSuccessor(i); 477 if (Succ != BB) 478 continue; 479 480 if (Visited.count(P)) { 481 // Normal forward edge 482 if (Term->isConditional()) { 483 // Try to treat it like an ELSE block 484 BasicBlock *Other = Term->getSuccessor(!i); 485 if (Visited.count(Other) && !Loops.count(Other) && 486 !Pred.count(Other) && !Pred.count(P)) { 487 488 Pred[Other] = BoolFalse; 489 Pred[P] = BoolTrue; 490 continue; 491 } 492 } 493 Pred[P] = buildCondition(Term, i, false); 494 } else { 495 // Back edge 496 LPred[P] = buildCondition(Term, i, true); 497 } 498 } 499 } else { 500 // It's an exit from a sub region 501 while (R->getParent() != ParentRegion) 502 R = R->getParent(); 503 504 // Edge from inside a subregion to its entry, ignore it 505 if (*R == *N) 506 continue; 507 508 BasicBlock *Entry = R->getEntry(); 509 if (Visited.count(Entry)) 510 Pred[Entry] = BoolTrue; 511 else 512 LPred[Entry] = BoolFalse; 513 } 514 } 515 } 516 517 /// Collect various loop and predicate infos 518 void StructurizeCFG::collectInfos() { 519 // Reset predicate 520 Predicates.clear(); 521 522 // and loop infos 523 Loops.clear(); 524 LoopPreds.clear(); 525 526 // Reset the visited nodes 527 Visited.clear(); 528 529 for (RegionNode *RN : reverse(Order)) { 530 LLVM_DEBUG(dbgs() << "Visiting: " 531 << (RN->isSubRegion() ? "SubRegion with entry: " : "") 532 << RN->getEntry()->getName() << "\n"); 533 534 // Analyze all the conditions leading to a node 535 gatherPredicates(RN); 536 537 // Remember that we've seen this node 538 Visited.insert(RN->getEntry()); 539 540 // Find the last back edges 541 analyzeLoops(RN); 542 } 543 } 544 545 /// Insert the missing branch conditions 546 void StructurizeCFG::insertConditions(bool Loops) { 547 BranchVector &Conds = Loops ? LoopConds : Conditions; 548 Value *Default = Loops ? BoolTrue : BoolFalse; 549 SSAUpdater PhiInserter; 550 551 for (BranchInst *Term : Conds) { 552 assert(Term->isConditional()); 553 554 BasicBlock *Parent = Term->getParent(); 555 BasicBlock *SuccTrue = Term->getSuccessor(0); 556 BasicBlock *SuccFalse = Term->getSuccessor(1); 557 558 PhiInserter.Initialize(Boolean, ""); 559 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); 560 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 561 562 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 563 564 NearestCommonDominator Dominator(DT); 565 Dominator.addBlock(Parent); 566 567 Value *ParentValue = nullptr; 568 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) { 569 BasicBlock *BB = BBAndPred.first; 570 Value *Pred = BBAndPred.second; 571 572 if (BB == Parent) { 573 ParentValue = Pred; 574 break; 575 } 576 PhiInserter.AddAvailableValue(BB, Pred); 577 Dominator.addAndRememberBlock(BB); 578 } 579 580 if (ParentValue) { 581 Term->setCondition(ParentValue); 582 } else { 583 if (!Dominator.resultIsRememberedBlock()) 584 PhiInserter.AddAvailableValue(Dominator.result(), Default); 585 586 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 587 } 588 } 589 } 590 591 /// Simplify any inverted conditions that were built by buildConditions. 592 void StructurizeCFG::simplifyConditions() { 593 SmallVector<Instruction *> InstToErase; 594 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) { 595 auto &Preds = I.second; 596 for (auto &J : Preds) { 597 auto &Cond = J.second; 598 Instruction *Inverted; 599 if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) && 600 !Cond->use_empty()) { 601 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) { 602 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate()); 603 Cond->replaceAllUsesWith(InvertedCmp); 604 InstToErase.push_back(cast<Instruction>(Cond)); 605 } 606 } 607 } 608 } 609 for (auto *I : InstToErase) 610 I->eraseFromParent(); 611 } 612 613 /// Remove all PHI values coming from "From" into "To" and remember 614 /// them in DeletedPhis 615 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 616 PhiMap &Map = DeletedPhis[To]; 617 for (PHINode &Phi : To->phis()) { 618 bool Recorded = false; 619 while (Phi.getBasicBlockIndex(From) != -1) { 620 Value *Deleted = Phi.removeIncomingValue(From, false); 621 Map[&Phi].push_back(std::make_pair(From, Deleted)); 622 if (!Recorded) { 623 AffectedPhis.push_back(&Phi); 624 Recorded = true; 625 } 626 } 627 } 628 } 629 630 /// Add a dummy PHI value as soon as we knew the new predecessor 631 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 632 for (PHINode &Phi : To->phis()) { 633 Value *Undef = UndefValue::get(Phi.getType()); 634 Phi.addIncoming(Undef, From); 635 } 636 AddedPhis[To].push_back(From); 637 } 638 639 /// Add the real PHI value as soon as everything is set up 640 void StructurizeCFG::setPhiValues() { 641 SmallVector<PHINode *, 8> InsertedPhis; 642 SSAUpdater Updater(&InsertedPhis); 643 for (const auto &AddedPhi : AddedPhis) { 644 BasicBlock *To = AddedPhi.first; 645 const BBVector &From = AddedPhi.second; 646 647 if (!DeletedPhis.count(To)) 648 continue; 649 650 PhiMap &Map = DeletedPhis[To]; 651 for (const auto &PI : Map) { 652 PHINode *Phi = PI.first; 653 Value *Undef = UndefValue::get(Phi->getType()); 654 Updater.Initialize(Phi->getType(), ""); 655 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 656 Updater.AddAvailableValue(To, Undef); 657 658 NearestCommonDominator Dominator(DT); 659 Dominator.addBlock(To); 660 for (const auto &VI : PI.second) { 661 Updater.AddAvailableValue(VI.first, VI.second); 662 Dominator.addAndRememberBlock(VI.first); 663 } 664 665 if (!Dominator.resultIsRememberedBlock()) 666 Updater.AddAvailableValue(Dominator.result(), Undef); 667 668 for (BasicBlock *FI : From) 669 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); 670 AffectedPhis.push_back(Phi); 671 } 672 673 DeletedPhis.erase(To); 674 } 675 assert(DeletedPhis.empty()); 676 677 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); 678 } 679 680 void StructurizeCFG::simplifyAffectedPhis() { 681 bool Changed; 682 do { 683 Changed = false; 684 SimplifyQuery Q(Func->getParent()->getDataLayout()); 685 Q.DT = DT; 686 for (WeakVH VH : AffectedPhis) { 687 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { 688 if (auto NewValue = SimplifyInstruction(Phi, Q)) { 689 Phi->replaceAllUsesWith(NewValue); 690 Phi->eraseFromParent(); 691 Changed = true; 692 } 693 } 694 } 695 } while (Changed); 696 } 697 698 /// Remove phi values from all successors and then remove the terminator. 699 void StructurizeCFG::killTerminator(BasicBlock *BB) { 700 Instruction *Term = BB->getTerminator(); 701 if (!Term) 702 return; 703 704 for (BasicBlock *Succ : successors(BB)) 705 delPhiValues(BB, Succ); 706 707 if (DA) 708 DA->removeValue(Term); 709 Term->eraseFromParent(); 710 } 711 712 /// Let node exit(s) point to NewExit 713 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 714 bool IncludeDominator) { 715 if (Node->isSubRegion()) { 716 Region *SubRegion = Node->getNodeAs<Region>(); 717 BasicBlock *OldExit = SubRegion->getExit(); 718 BasicBlock *Dominator = nullptr; 719 720 // Find all the edges from the sub region to the exit. 721 // We use make_early_inc_range here because we modify BB's terminator. 722 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) { 723 if (!SubRegion->contains(BB)) 724 continue; 725 726 // Modify the edges to point to the new exit 727 delPhiValues(BB, OldExit); 728 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 729 addPhiValues(BB, NewExit); 730 731 // Find the new dominator (if requested) 732 if (IncludeDominator) { 733 if (!Dominator) 734 Dominator = BB; 735 else 736 Dominator = DT->findNearestCommonDominator(Dominator, BB); 737 } 738 } 739 740 // Change the dominator (if requested) 741 if (Dominator) 742 DT->changeImmediateDominator(NewExit, Dominator); 743 744 // Update the region info 745 SubRegion->replaceExit(NewExit); 746 } else { 747 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 748 killTerminator(BB); 749 BranchInst::Create(NewExit, BB); 750 addPhiValues(BB, NewExit); 751 if (IncludeDominator) 752 DT->changeImmediateDominator(NewExit, BB); 753 } 754 } 755 756 /// Create a new flow node and update dominator tree and region info 757 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { 758 LLVMContext &Context = Func->getContext(); 759 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 760 Order.back()->getEntry(); 761 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 762 Func, Insert); 763 DT->addNewBlock(Flow, Dominator); 764 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 765 return Flow; 766 } 767 768 /// Create a new or reuse the previous node as flow node 769 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) { 770 BasicBlock *Entry = PrevNode->getEntry(); 771 772 if (!PrevNode->isSubRegion()) { 773 killTerminator(Entry); 774 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 775 return Entry; 776 } 777 778 // create a new flow node 779 BasicBlock *Flow = getNextFlow(Entry); 780 781 // and wire it up 782 changeExit(PrevNode, Flow, true); 783 PrevNode = ParentRegion->getBBNode(Flow); 784 return Flow; 785 } 786 787 /// Returns the region exit if possible, otherwise just a new flow node 788 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, 789 bool ExitUseAllowed) { 790 if (!Order.empty() || !ExitUseAllowed) 791 return getNextFlow(Flow); 792 793 BasicBlock *Exit = ParentRegion->getExit(); 794 DT->changeImmediateDominator(Exit, Flow); 795 addPhiValues(Flow, Exit); 796 return Exit; 797 } 798 799 /// Set the previous node 800 void StructurizeCFG::setPrevNode(BasicBlock *BB) { 801 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) 802 : nullptr; 803 } 804 805 /// Does BB dominate all the predicates of Node? 806 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 807 BBPredicates &Preds = Predicates[Node->getEntry()]; 808 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) { 809 return DT->dominates(BB, Pred.first); 810 }); 811 } 812 813 /// Can we predict that this node will always be called? 814 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { 815 BBPredicates &Preds = Predicates[Node->getEntry()]; 816 bool Dominated = false; 817 818 // Regionentry is always true 819 if (!PrevNode) 820 return true; 821 822 for (std::pair<BasicBlock*, Value*> Pred : Preds) { 823 BasicBlock *BB = Pred.first; 824 Value *V = Pred.second; 825 826 if (V != BoolTrue) 827 return false; 828 829 if (!Dominated && DT->dominates(BB, PrevNode->getEntry())) 830 Dominated = true; 831 } 832 833 // TODO: The dominator check is too strict 834 return Dominated; 835 } 836 837 /// Take one node from the order vector and wire it up 838 void StructurizeCFG::wireFlow(bool ExitUseAllowed, 839 BasicBlock *LoopEnd) { 840 RegionNode *Node = Order.pop_back_val(); 841 Visited.insert(Node->getEntry()); 842 843 if (isPredictableTrue(Node)) { 844 // Just a linear flow 845 if (PrevNode) { 846 changeExit(PrevNode, Node->getEntry(), true); 847 } 848 PrevNode = Node; 849 } else { 850 // Insert extra prefix node (or reuse last one) 851 BasicBlock *Flow = needPrefix(false); 852 853 // Insert extra postfix node (or use exit instead) 854 BasicBlock *Entry = Node->getEntry(); 855 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 856 857 // let it point to entry and next block 858 Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); 859 addPhiValues(Flow, Entry); 860 DT->changeImmediateDominator(Entry, Flow); 861 862 PrevNode = Node; 863 while (!Order.empty() && !Visited.count(LoopEnd) && 864 dominatesPredicates(Entry, Order.back())) { 865 handleLoops(false, LoopEnd); 866 } 867 868 changeExit(PrevNode, Next, false); 869 setPrevNode(Next); 870 } 871 } 872 873 void StructurizeCFG::handleLoops(bool ExitUseAllowed, 874 BasicBlock *LoopEnd) { 875 RegionNode *Node = Order.back(); 876 BasicBlock *LoopStart = Node->getEntry(); 877 878 if (!Loops.count(LoopStart)) { 879 wireFlow(ExitUseAllowed, LoopEnd); 880 return; 881 } 882 883 if (!isPredictableTrue(Node)) 884 LoopStart = needPrefix(true); 885 886 LoopEnd = Loops[Node->getEntry()]; 887 wireFlow(false, LoopEnd); 888 while (!Visited.count(LoopEnd)) { 889 handleLoops(false, LoopEnd); 890 } 891 892 // If the start of the loop is the entry block, we can't branch to it so 893 // insert a new dummy entry block. 894 Function *LoopFunc = LoopStart->getParent(); 895 if (LoopStart == &LoopFunc->getEntryBlock()) { 896 LoopStart->setName("entry.orig"); 897 898 BasicBlock *NewEntry = 899 BasicBlock::Create(LoopStart->getContext(), 900 "entry", 901 LoopFunc, 902 LoopStart); 903 BranchInst::Create(LoopStart, NewEntry); 904 DT->setNewRoot(NewEntry); 905 } 906 907 // Create an extra loop end node 908 LoopEnd = needPrefix(false); 909 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 910 LoopConds.push_back(BranchInst::Create(Next, LoopStart, 911 BoolUndef, LoopEnd)); 912 addPhiValues(LoopEnd, LoopStart); 913 setPrevNode(Next); 914 } 915 916 /// After this function control flow looks like it should be, but 917 /// branches and PHI nodes only have undefined conditions. 918 void StructurizeCFG::createFlow() { 919 BasicBlock *Exit = ParentRegion->getExit(); 920 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 921 922 AffectedPhis.clear(); 923 DeletedPhis.clear(); 924 AddedPhis.clear(); 925 Conditions.clear(); 926 LoopConds.clear(); 927 928 PrevNode = nullptr; 929 Visited.clear(); 930 931 while (!Order.empty()) { 932 handleLoops(EntryDominatesExit, nullptr); 933 } 934 935 if (PrevNode) 936 changeExit(PrevNode, Exit, EntryDominatesExit); 937 else 938 assert(EntryDominatesExit); 939 } 940 941 /// Handle a rare case where the disintegrated nodes instructions 942 /// no longer dominate all their uses. Not sure if this is really necessary 943 void StructurizeCFG::rebuildSSA() { 944 SSAUpdater Updater; 945 for (BasicBlock *BB : ParentRegion->blocks()) 946 for (Instruction &I : *BB) { 947 bool Initialized = false; 948 // We may modify the use list as we iterate over it, so we use 949 // make_early_inc_range. 950 for (Use &U : llvm::make_early_inc_range(I.uses())) { 951 Instruction *User = cast<Instruction>(U.getUser()); 952 if (User->getParent() == BB) { 953 continue; 954 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 955 if (UserPN->getIncomingBlock(U) == BB) 956 continue; 957 } 958 959 if (DT->dominates(&I, User)) 960 continue; 961 962 if (!Initialized) { 963 Value *Undef = UndefValue::get(I.getType()); 964 Updater.Initialize(I.getType(), ""); 965 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 966 Updater.AddAvailableValue(BB, &I); 967 Initialized = true; 968 } 969 Updater.RewriteUseAfterInsertions(U); 970 } 971 } 972 } 973 974 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, 975 const LegacyDivergenceAnalysis &DA) { 976 // Bool for if all sub-regions are uniform. 977 bool SubRegionsAreUniform = true; 978 // Count of how many direct children are conditional. 979 unsigned ConditionalDirectChildren = 0; 980 981 for (auto E : R->elements()) { 982 if (!E->isSubRegion()) { 983 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); 984 if (!Br || !Br->isConditional()) 985 continue; 986 987 if (!DA.isUniform(Br)) 988 return false; 989 990 // One of our direct children is conditional. 991 ConditionalDirectChildren++; 992 993 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() 994 << " has uniform terminator\n"); 995 } else { 996 // Explicitly refuse to treat regions as uniform if they have non-uniform 997 // subregions. We cannot rely on DivergenceAnalysis for branches in 998 // subregions because those branches may have been removed and re-created, 999 // so we look for our metadata instead. 1000 // 1001 // Warning: It would be nice to treat regions as uniform based only on 1002 // their direct child basic blocks' terminators, regardless of whether 1003 // subregions are uniform or not. However, this requires a very careful 1004 // look at SIAnnotateControlFlow to make sure nothing breaks there. 1005 for (auto BB : E->getNodeAs<Region>()->blocks()) { 1006 auto Br = dyn_cast<BranchInst>(BB->getTerminator()); 1007 if (!Br || !Br->isConditional()) 1008 continue; 1009 1010 if (!Br->getMetadata(UniformMDKindID)) { 1011 // Early exit if we cannot have relaxed uniform regions. 1012 if (!RelaxedUniformRegions) 1013 return false; 1014 1015 SubRegionsAreUniform = false; 1016 break; 1017 } 1018 } 1019 } 1020 } 1021 1022 // Our region is uniform if: 1023 // 1. All conditional branches that are direct children are uniform (checked 1024 // above). 1025 // 2. And either: 1026 // a. All sub-regions are uniform. 1027 // b. There is one or less conditional branches among the direct children. 1028 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); 1029 } 1030 1031 void StructurizeCFG::init(Region *R) { 1032 LLVMContext &Context = R->getEntry()->getContext(); 1033 1034 Boolean = Type::getInt1Ty(Context); 1035 BoolTrue = ConstantInt::getTrue(Context); 1036 BoolFalse = ConstantInt::getFalse(Context); 1037 BoolUndef = UndefValue::get(Boolean); 1038 1039 this->DA = nullptr; 1040 } 1041 1042 bool StructurizeCFG::makeUniformRegion(Region *R, 1043 LegacyDivergenceAnalysis *DA) { 1044 if (R->isTopLevelRegion()) 1045 return false; 1046 1047 this->DA = DA; 1048 // TODO: We could probably be smarter here with how we handle sub-regions. 1049 // We currently rely on the fact that metadata is set by earlier invocations 1050 // of the pass on sub-regions, and that this metadata doesn't get lost -- 1051 // but we shouldn't rely on metadata for correctness! 1052 unsigned UniformMDKindID = 1053 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); 1054 1055 if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) { 1056 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R 1057 << '\n'); 1058 1059 // Mark all direct child block terminators as having been treated as 1060 // uniform. To account for a possible future in which non-uniform 1061 // sub-regions are treated more cleverly, indirect children are not 1062 // marked as uniform. 1063 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); 1064 for (RegionNode *E : R->elements()) { 1065 if (E->isSubRegion()) 1066 continue; 1067 1068 if (Instruction *Term = E->getEntry()->getTerminator()) 1069 Term->setMetadata(UniformMDKindID, MD); 1070 } 1071 1072 return true; 1073 } 1074 return false; 1075 } 1076 1077 /// Run the transformation for each region found 1078 bool StructurizeCFG::run(Region *R, DominatorTree *DT) { 1079 if (R->isTopLevelRegion()) 1080 return false; 1081 1082 this->DT = DT; 1083 1084 Func = R->getEntry()->getParent(); 1085 ParentRegion = R; 1086 1087 orderNodes(); 1088 collectInfos(); 1089 createFlow(); 1090 insertConditions(false); 1091 insertConditions(true); 1092 simplifyConditions(); 1093 setPhiValues(); 1094 simplifyAffectedPhis(); 1095 rebuildSSA(); 1096 1097 // Cleanup 1098 Order.clear(); 1099 Visited.clear(); 1100 DeletedPhis.clear(); 1101 AddedPhis.clear(); 1102 Predicates.clear(); 1103 Conditions.clear(); 1104 Loops.clear(); 1105 LoopPreds.clear(); 1106 LoopConds.clear(); 1107 1108 return true; 1109 } 1110 1111 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { 1112 return new StructurizeCFGLegacyPass(SkipUniformRegions); 1113 } 1114 1115 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) { 1116 Regions.push_back(&R); 1117 for (const auto &E : R) 1118 addRegionIntoQueue(*E, Regions); 1119 } 1120 1121 PreservedAnalyses StructurizeCFGPass::run(Function &F, 1122 FunctionAnalysisManager &AM) { 1123 1124 bool Changed = false; 1125 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1126 auto &RI = AM.getResult<RegionInfoAnalysis>(F); 1127 std::vector<Region *> Regions; 1128 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); 1129 while (!Regions.empty()) { 1130 Region *R = Regions.back(); 1131 StructurizeCFG SCFG; 1132 SCFG.init(R); 1133 Changed |= SCFG.run(R, DT); 1134 Regions.pop_back(); 1135 } 1136 if (!Changed) 1137 return PreservedAnalyses::all(); 1138 PreservedAnalyses PA; 1139 PA.preserve<DominatorTreeAnalysis>(); 1140 return PA; 1141 } 1142