1 //===- StructurizeCFG.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Scalar/StructurizeCFG.h" 10 #include "llvm/ADT/DenseMap.h" 11 #include "llvm/ADT/MapVector.h" 12 #include "llvm/ADT/SCCIterator.h" 13 #include "llvm/ADT/STLExtras.h" 14 #include "llvm/ADT/SmallPtrSet.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/Analysis/InstructionSimplify.h" 17 #include "llvm/Analysis/LegacyDivergenceAnalysis.h" 18 #include "llvm/Analysis/RegionInfo.h" 19 #include "llvm/Analysis/RegionIterator.h" 20 #include "llvm/Analysis/RegionPass.h" 21 #include "llvm/IR/BasicBlock.h" 22 #include "llvm/IR/CFG.h" 23 #include "llvm/IR/Constants.h" 24 #include "llvm/IR/Dominators.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/IR/InstrTypes.h" 27 #include "llvm/IR/Instruction.h" 28 #include "llvm/IR/Instructions.h" 29 #include "llvm/IR/Metadata.h" 30 #include "llvm/IR/PassManager.h" 31 #include "llvm/IR/PatternMatch.h" 32 #include "llvm/IR/Type.h" 33 #include "llvm/IR/Use.h" 34 #include "llvm/IR/Value.h" 35 #include "llvm/IR/ValueHandle.h" 36 #include "llvm/InitializePasses.h" 37 #include "llvm/Pass.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include "llvm/Transforms/Scalar.h" 43 #include "llvm/Transforms/Utils.h" 44 #include "llvm/Transforms/Utils/Local.h" 45 #include "llvm/Transforms/Utils/SSAUpdater.h" 46 #include <algorithm> 47 #include <cassert> 48 #include <utility> 49 50 using namespace llvm; 51 using namespace llvm::PatternMatch; 52 53 #define DEBUG_TYPE "structurizecfg" 54 55 // The name for newly created blocks. 56 const char FlowBlockName[] = "Flow"; 57 58 namespace { 59 60 static cl::opt<bool> ForceSkipUniformRegions( 61 "structurizecfg-skip-uniform-regions", 62 cl::Hidden, 63 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"), 64 cl::init(false)); 65 66 static cl::opt<bool> 67 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, 68 cl::desc("Allow relaxed uniform region checks"), 69 cl::init(true)); 70 71 // Definition of the complex types used in this pass. 72 73 using BBValuePair = std::pair<BasicBlock *, Value *>; 74 75 using RNVector = SmallVector<RegionNode *, 8>; 76 using BBVector = SmallVector<BasicBlock *, 8>; 77 using BranchVector = SmallVector<BranchInst *, 8>; 78 using BBValueVector = SmallVector<BBValuePair, 2>; 79 80 using BBSet = SmallPtrSet<BasicBlock *, 8>; 81 82 using PhiMap = MapVector<PHINode *, BBValueVector>; 83 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>; 84 85 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>; 86 using BBPredicates = DenseMap<BasicBlock *, Value *>; 87 using PredMap = DenseMap<BasicBlock *, BBPredicates>; 88 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; 89 90 // A traits type that is intended to be used in graph algorithms. The graph 91 // traits starts at an entry node, and traverses the RegionNodes that are in 92 // the Nodes set. 93 struct SubGraphTraits { 94 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>; 95 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType; 96 97 // This wraps a set of Nodes into the iterator, so we know which edges to 98 // filter out. 99 class WrappedSuccIterator 100 : public iterator_adaptor_base< 101 WrappedSuccIterator, BaseSuccIterator, 102 typename std::iterator_traits<BaseSuccIterator>::iterator_category, 103 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { 104 SmallDenseSet<RegionNode *> *Nodes; 105 106 public: 107 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes) 108 : iterator_adaptor_base(It), Nodes(Nodes) {} 109 110 NodeRef operator*() const { return {*I, Nodes}; } 111 }; 112 113 static bool filterAll(const NodeRef &N) { return true; } 114 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); } 115 116 using ChildIteratorType = 117 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>; 118 119 static NodeRef getEntryNode(Region *R) { 120 return {GraphTraits<Region *>::getEntryNode(R), nullptr}; 121 } 122 123 static NodeRef getEntryNode(NodeRef N) { return N; } 124 125 static iterator_range<ChildIteratorType> children(const NodeRef &N) { 126 auto *filter = N.second ? &filterSet : &filterAll; 127 return make_filter_range( 128 make_range<WrappedSuccIterator>( 129 {GraphTraits<RegionNode *>::child_begin(N.first), N.second}, 130 {GraphTraits<RegionNode *>::child_end(N.first), N.second}), 131 filter); 132 } 133 134 static ChildIteratorType child_begin(const NodeRef &N) { 135 return children(N).begin(); 136 } 137 138 static ChildIteratorType child_end(const NodeRef &N) { 139 return children(N).end(); 140 } 141 }; 142 143 /// Finds the nearest common dominator of a set of BasicBlocks. 144 /// 145 /// For every BB you add to the set, you can specify whether we "remember" the 146 /// block. When you get the common dominator, you can also ask whether it's one 147 /// of the blocks we remembered. 148 class NearestCommonDominator { 149 DominatorTree *DT; 150 BasicBlock *Result = nullptr; 151 bool ResultIsRemembered = false; 152 153 /// Add BB to the resulting dominator. 154 void addBlock(BasicBlock *BB, bool Remember) { 155 if (!Result) { 156 Result = BB; 157 ResultIsRemembered = Remember; 158 return; 159 } 160 161 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB); 162 if (NewResult != Result) 163 ResultIsRemembered = false; 164 if (NewResult == BB) 165 ResultIsRemembered |= Remember; 166 Result = NewResult; 167 } 168 169 public: 170 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {} 171 172 void addBlock(BasicBlock *BB) { 173 addBlock(BB, /* Remember = */ false); 174 } 175 176 void addAndRememberBlock(BasicBlock *BB) { 177 addBlock(BB, /* Remember = */ true); 178 } 179 180 /// Get the nearest common dominator of all the BBs added via addBlock() and 181 /// addAndRememberBlock(). 182 BasicBlock *result() { return Result; } 183 184 /// Is the BB returned by getResult() one of the blocks we added to the set 185 /// with addAndRememberBlock()? 186 bool resultIsRememberedBlock() { return ResultIsRemembered; } 187 }; 188 189 /// Transforms the control flow graph on one single entry/exit region 190 /// at a time. 191 /// 192 /// After the transform all "If"/"Then"/"Else" style control flow looks like 193 /// this: 194 /// 195 /// \verbatim 196 /// 1 197 /// || 198 /// | | 199 /// 2 | 200 /// | / 201 /// |/ 202 /// 3 203 /// || Where: 204 /// | | 1 = "If" block, calculates the condition 205 /// 4 | 2 = "Then" subregion, runs if the condition is true 206 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 207 /// |/ 4 = "Else" optional subregion, runs if the condition is false 208 /// 5 5 = "End" block, also rejoins the control flow 209 /// \endverbatim 210 /// 211 /// Control flow is expressed as a branch where the true exit goes into the 212 /// "Then"/"Else" region, while the false exit skips the region 213 /// The condition for the optional "Else" region is expressed as a PHI node. 214 /// The incoming values of the PHI node are true for the "If" edge and false 215 /// for the "Then" edge. 216 /// 217 /// Additionally to that even complicated loops look like this: 218 /// 219 /// \verbatim 220 /// 1 221 /// || 222 /// | | 223 /// 2 ^ Where: 224 /// | / 1 = "Entry" block 225 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 226 /// 3 3 = "Flow" block, with back edge to entry block 227 /// | 228 /// \endverbatim 229 /// 230 /// The back edge of the "Flow" block is always on the false side of the branch 231 /// while the true side continues the general flow. So the loop condition 232 /// consist of a network of PHI nodes where the true incoming values expresses 233 /// breaks and the false values expresses continue states. 234 235 class StructurizeCFG { 236 Type *Boolean; 237 ConstantInt *BoolTrue; 238 ConstantInt *BoolFalse; 239 UndefValue *BoolUndef; 240 241 Function *Func; 242 Region *ParentRegion; 243 244 LegacyDivergenceAnalysis *DA = nullptr; 245 DominatorTree *DT; 246 247 SmallVector<RegionNode *, 8> Order; 248 BBSet Visited; 249 250 SmallVector<WeakVH, 8> AffectedPhis; 251 BBPhiMap DeletedPhis; 252 BB2BBVecMap AddedPhis; 253 254 PredMap Predicates; 255 BranchVector Conditions; 256 257 BB2BBMap Loops; 258 PredMap LoopPreds; 259 BranchVector LoopConds; 260 261 RegionNode *PrevNode; 262 263 void orderNodes(); 264 265 void analyzeLoops(RegionNode *N); 266 267 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 268 269 void gatherPredicates(RegionNode *N); 270 271 void collectInfos(); 272 273 void insertConditions(bool Loops); 274 275 void simplifyConditions(); 276 277 void delPhiValues(BasicBlock *From, BasicBlock *To); 278 279 void addPhiValues(BasicBlock *From, BasicBlock *To); 280 281 void setPhiValues(); 282 283 void simplifyAffectedPhis(); 284 285 void killTerminator(BasicBlock *BB); 286 287 void changeExit(RegionNode *Node, BasicBlock *NewExit, 288 bool IncludeDominator); 289 290 BasicBlock *getNextFlow(BasicBlock *Dominator); 291 292 BasicBlock *needPrefix(bool NeedEmpty); 293 294 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 295 296 void setPrevNode(BasicBlock *BB); 297 298 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 299 300 bool isPredictableTrue(RegionNode *Node); 301 302 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 303 304 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 305 306 void createFlow(); 307 308 void rebuildSSA(); 309 310 public: 311 void init(Region *R); 312 bool run(Region *R, DominatorTree *DT); 313 bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA); 314 }; 315 316 class StructurizeCFGLegacyPass : public RegionPass { 317 bool SkipUniformRegions; 318 319 public: 320 static char ID; 321 322 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false) 323 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) { 324 if (ForceSkipUniformRegions.getNumOccurrences()) 325 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 326 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry()); 327 } 328 329 bool runOnRegion(Region *R, RGPassManager &RGM) override { 330 StructurizeCFG SCFG; 331 SCFG.init(R); 332 if (SkipUniformRegions) { 333 LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>(); 334 if (SCFG.makeUniformRegion(R, DA)) 335 return false; 336 } 337 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 338 return SCFG.run(R, DT); 339 } 340 341 StringRef getPassName() const override { return "Structurize control flow"; } 342 343 void getAnalysisUsage(AnalysisUsage &AU) const override { 344 if (SkipUniformRegions) 345 AU.addRequired<LegacyDivergenceAnalysis>(); 346 AU.addRequiredID(LowerSwitchID); 347 AU.addRequired<DominatorTreeWrapperPass>(); 348 349 AU.addPreserved<DominatorTreeWrapperPass>(); 350 RegionPass::getAnalysisUsage(AU); 351 } 352 }; 353 354 } // end anonymous namespace 355 356 char StructurizeCFGLegacyPass::ID = 0; 357 358 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", 359 "Structurize the CFG", false, false) 360 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 361 INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) 362 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 363 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 364 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", 365 "Structurize the CFG", false, false) 366 367 /// Build up the general order of nodes, by performing a topological sort of the 368 /// parent region's nodes, while ensuring that there is no outer cycle node 369 /// between any two inner cycle nodes. 370 void StructurizeCFG::orderNodes() { 371 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion), 372 GraphTraits<Region *>::nodes_end(ParentRegion))); 373 if (Order.empty()) 374 return; 375 376 SmallDenseSet<RegionNode *> Nodes; 377 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion); 378 379 // A list of range indices of SCCs in Order, to be processed. 380 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList; 381 unsigned I = 0, E = Order.size(); 382 while (true) { 383 // Run through all the SCCs in the subgraph starting with Entry. 384 for (auto SCCI = 385 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin( 386 EntryNode); 387 !SCCI.isAtEnd(); ++SCCI) { 388 auto &SCC = *SCCI; 389 390 // An SCC up to the size of 2, can be reduced to an entry (the last node), 391 // and a possible additional node. Therefore, it is already in order, and 392 // there is no need to add it to the work-list. 393 unsigned Size = SCC.size(); 394 if (Size > 2) 395 WorkList.emplace_back(I, I + Size); 396 397 // Add the SCC nodes to the Order array. 398 for (auto &N : SCC) { 399 assert(I < E && "SCC size mismatch!"); 400 Order[I++] = N.first; 401 } 402 } 403 assert(I == E && "SCC size mismatch!"); 404 405 // If there are no more SCCs to order, then we are done. 406 if (WorkList.empty()) 407 break; 408 409 std::tie(I, E) = WorkList.pop_back_val(); 410 411 // Collect the set of nodes in the SCC's subgraph. These are only the 412 // possible child nodes; we do not add the entry (last node) otherwise we 413 // will have the same exact SCC all over again. 414 Nodes.clear(); 415 Nodes.insert(Order.begin() + I, Order.begin() + E - 1); 416 417 // Update the entry node. 418 EntryNode.first = Order[E - 1]; 419 EntryNode.second = &Nodes; 420 } 421 } 422 423 /// Determine the end of the loops 424 void StructurizeCFG::analyzeLoops(RegionNode *N) { 425 if (N->isSubRegion()) { 426 // Test for exit as back edge 427 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 428 if (Visited.count(Exit)) 429 Loops[Exit] = N->getEntry(); 430 431 } else { 432 // Test for successors as back edge 433 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 434 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 435 436 for (BasicBlock *Succ : Term->successors()) 437 if (Visited.count(Succ)) 438 Loops[Succ] = BB; 439 } 440 } 441 442 /// Build the condition for one edge 443 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 444 bool Invert) { 445 Value *Cond = Invert ? BoolFalse : BoolTrue; 446 if (Term->isConditional()) { 447 Cond = Term->getCondition(); 448 449 if (Idx != (unsigned)Invert) 450 Cond = invertCondition(Cond); 451 } 452 return Cond; 453 } 454 455 /// Analyze the predecessors of each block and build up predicates 456 void StructurizeCFG::gatherPredicates(RegionNode *N) { 457 RegionInfo *RI = ParentRegion->getRegionInfo(); 458 BasicBlock *BB = N->getEntry(); 459 BBPredicates &Pred = Predicates[BB]; 460 BBPredicates &LPred = LoopPreds[BB]; 461 462 for (BasicBlock *P : predecessors(BB)) { 463 // Ignore it if it's a branch from outside into our region entry 464 if (!ParentRegion->contains(P)) 465 continue; 466 467 Region *R = RI->getRegionFor(P); 468 if (R == ParentRegion) { 469 // It's a top level block in our region 470 BranchInst *Term = cast<BranchInst>(P->getTerminator()); 471 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 472 BasicBlock *Succ = Term->getSuccessor(i); 473 if (Succ != BB) 474 continue; 475 476 if (Visited.count(P)) { 477 // Normal forward edge 478 if (Term->isConditional()) { 479 // Try to treat it like an ELSE block 480 BasicBlock *Other = Term->getSuccessor(!i); 481 if (Visited.count(Other) && !Loops.count(Other) && 482 !Pred.count(Other) && !Pred.count(P)) { 483 484 Pred[Other] = BoolFalse; 485 Pred[P] = BoolTrue; 486 continue; 487 } 488 } 489 Pred[P] = buildCondition(Term, i, false); 490 } else { 491 // Back edge 492 LPred[P] = buildCondition(Term, i, true); 493 } 494 } 495 } else { 496 // It's an exit from a sub region 497 while (R->getParent() != ParentRegion) 498 R = R->getParent(); 499 500 // Edge from inside a subregion to its entry, ignore it 501 if (*R == *N) 502 continue; 503 504 BasicBlock *Entry = R->getEntry(); 505 if (Visited.count(Entry)) 506 Pred[Entry] = BoolTrue; 507 else 508 LPred[Entry] = BoolFalse; 509 } 510 } 511 } 512 513 /// Collect various loop and predicate infos 514 void StructurizeCFG::collectInfos() { 515 // Reset predicate 516 Predicates.clear(); 517 518 // and loop infos 519 Loops.clear(); 520 LoopPreds.clear(); 521 522 // Reset the visited nodes 523 Visited.clear(); 524 525 for (RegionNode *RN : reverse(Order)) { 526 LLVM_DEBUG(dbgs() << "Visiting: " 527 << (RN->isSubRegion() ? "SubRegion with entry: " : "") 528 << RN->getEntry()->getName() << "\n"); 529 530 // Analyze all the conditions leading to a node 531 gatherPredicates(RN); 532 533 // Remember that we've seen this node 534 Visited.insert(RN->getEntry()); 535 536 // Find the last back edges 537 analyzeLoops(RN); 538 } 539 } 540 541 /// Insert the missing branch conditions 542 void StructurizeCFG::insertConditions(bool Loops) { 543 BranchVector &Conds = Loops ? LoopConds : Conditions; 544 Value *Default = Loops ? BoolTrue : BoolFalse; 545 SSAUpdater PhiInserter; 546 547 for (BranchInst *Term : Conds) { 548 assert(Term->isConditional()); 549 550 BasicBlock *Parent = Term->getParent(); 551 BasicBlock *SuccTrue = Term->getSuccessor(0); 552 BasicBlock *SuccFalse = Term->getSuccessor(1); 553 554 PhiInserter.Initialize(Boolean, ""); 555 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); 556 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 557 558 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 559 560 NearestCommonDominator Dominator(DT); 561 Dominator.addBlock(Parent); 562 563 Value *ParentValue = nullptr; 564 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) { 565 BasicBlock *BB = BBAndPred.first; 566 Value *Pred = BBAndPred.second; 567 568 if (BB == Parent) { 569 ParentValue = Pred; 570 break; 571 } 572 PhiInserter.AddAvailableValue(BB, Pred); 573 Dominator.addAndRememberBlock(BB); 574 } 575 576 if (ParentValue) { 577 Term->setCondition(ParentValue); 578 } else { 579 if (!Dominator.resultIsRememberedBlock()) 580 PhiInserter.AddAvailableValue(Dominator.result(), Default); 581 582 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 583 } 584 } 585 } 586 587 /// Simplify any inverted conditions that were built by buildConditions. 588 void StructurizeCFG::simplifyConditions() { 589 SmallVector<Instruction *> InstToErase; 590 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) { 591 auto &Preds = I.second; 592 for (auto &J : Preds) { 593 auto &Cond = J.second; 594 Instruction *Inverted; 595 if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) && 596 !Cond->use_empty()) { 597 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) { 598 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate()); 599 Cond->replaceAllUsesWith(InvertedCmp); 600 InstToErase.push_back(cast<Instruction>(Cond)); 601 } 602 } 603 } 604 } 605 for (auto *I : InstToErase) 606 I->eraseFromParent(); 607 } 608 609 /// Remove all PHI values coming from "From" into "To" and remember 610 /// them in DeletedPhis 611 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 612 PhiMap &Map = DeletedPhis[To]; 613 for (PHINode &Phi : To->phis()) { 614 bool Recorded = false; 615 while (Phi.getBasicBlockIndex(From) != -1) { 616 Value *Deleted = Phi.removeIncomingValue(From, false); 617 Map[&Phi].push_back(std::make_pair(From, Deleted)); 618 if (!Recorded) { 619 AffectedPhis.push_back(&Phi); 620 Recorded = true; 621 } 622 } 623 } 624 } 625 626 /// Add a dummy PHI value as soon as we knew the new predecessor 627 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 628 for (PHINode &Phi : To->phis()) { 629 Value *Undef = UndefValue::get(Phi.getType()); 630 Phi.addIncoming(Undef, From); 631 } 632 AddedPhis[To].push_back(From); 633 } 634 635 /// Add the real PHI value as soon as everything is set up 636 void StructurizeCFG::setPhiValues() { 637 SmallVector<PHINode *, 8> InsertedPhis; 638 SSAUpdater Updater(&InsertedPhis); 639 for (const auto &AddedPhi : AddedPhis) { 640 BasicBlock *To = AddedPhi.first; 641 const BBVector &From = AddedPhi.second; 642 643 if (!DeletedPhis.count(To)) 644 continue; 645 646 PhiMap &Map = DeletedPhis[To]; 647 for (const auto &PI : Map) { 648 PHINode *Phi = PI.first; 649 Value *Undef = UndefValue::get(Phi->getType()); 650 Updater.Initialize(Phi->getType(), ""); 651 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 652 Updater.AddAvailableValue(To, Undef); 653 654 NearestCommonDominator Dominator(DT); 655 Dominator.addBlock(To); 656 for (const auto &VI : PI.second) { 657 Updater.AddAvailableValue(VI.first, VI.second); 658 Dominator.addAndRememberBlock(VI.first); 659 } 660 661 if (!Dominator.resultIsRememberedBlock()) 662 Updater.AddAvailableValue(Dominator.result(), Undef); 663 664 for (BasicBlock *FI : From) 665 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); 666 AffectedPhis.push_back(Phi); 667 } 668 669 DeletedPhis.erase(To); 670 } 671 assert(DeletedPhis.empty()); 672 673 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); 674 } 675 676 void StructurizeCFG::simplifyAffectedPhis() { 677 bool Changed; 678 do { 679 Changed = false; 680 SimplifyQuery Q(Func->getParent()->getDataLayout()); 681 Q.DT = DT; 682 for (WeakVH VH : AffectedPhis) { 683 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { 684 if (auto NewValue = simplifyInstruction(Phi, Q)) { 685 Phi->replaceAllUsesWith(NewValue); 686 Phi->eraseFromParent(); 687 Changed = true; 688 } 689 } 690 } 691 } while (Changed); 692 } 693 694 /// Remove phi values from all successors and then remove the terminator. 695 void StructurizeCFG::killTerminator(BasicBlock *BB) { 696 Instruction *Term = BB->getTerminator(); 697 if (!Term) 698 return; 699 700 for (BasicBlock *Succ : successors(BB)) 701 delPhiValues(BB, Succ); 702 703 if (DA) 704 DA->removeValue(Term); 705 Term->eraseFromParent(); 706 } 707 708 /// Let node exit(s) point to NewExit 709 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 710 bool IncludeDominator) { 711 if (Node->isSubRegion()) { 712 Region *SubRegion = Node->getNodeAs<Region>(); 713 BasicBlock *OldExit = SubRegion->getExit(); 714 BasicBlock *Dominator = nullptr; 715 716 // Find all the edges from the sub region to the exit. 717 // We use make_early_inc_range here because we modify BB's terminator. 718 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) { 719 if (!SubRegion->contains(BB)) 720 continue; 721 722 // Modify the edges to point to the new exit 723 delPhiValues(BB, OldExit); 724 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 725 addPhiValues(BB, NewExit); 726 727 // Find the new dominator (if requested) 728 if (IncludeDominator) { 729 if (!Dominator) 730 Dominator = BB; 731 else 732 Dominator = DT->findNearestCommonDominator(Dominator, BB); 733 } 734 } 735 736 // Change the dominator (if requested) 737 if (Dominator) 738 DT->changeImmediateDominator(NewExit, Dominator); 739 740 // Update the region info 741 SubRegion->replaceExit(NewExit); 742 } else { 743 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 744 killTerminator(BB); 745 BranchInst::Create(NewExit, BB); 746 addPhiValues(BB, NewExit); 747 if (IncludeDominator) 748 DT->changeImmediateDominator(NewExit, BB); 749 } 750 } 751 752 /// Create a new flow node and update dominator tree and region info 753 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { 754 LLVMContext &Context = Func->getContext(); 755 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 756 Order.back()->getEntry(); 757 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 758 Func, Insert); 759 DT->addNewBlock(Flow, Dominator); 760 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 761 return Flow; 762 } 763 764 /// Create a new or reuse the previous node as flow node 765 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) { 766 BasicBlock *Entry = PrevNode->getEntry(); 767 768 if (!PrevNode->isSubRegion()) { 769 killTerminator(Entry); 770 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 771 return Entry; 772 } 773 774 // create a new flow node 775 BasicBlock *Flow = getNextFlow(Entry); 776 777 // and wire it up 778 changeExit(PrevNode, Flow, true); 779 PrevNode = ParentRegion->getBBNode(Flow); 780 return Flow; 781 } 782 783 /// Returns the region exit if possible, otherwise just a new flow node 784 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, 785 bool ExitUseAllowed) { 786 if (!Order.empty() || !ExitUseAllowed) 787 return getNextFlow(Flow); 788 789 BasicBlock *Exit = ParentRegion->getExit(); 790 DT->changeImmediateDominator(Exit, Flow); 791 addPhiValues(Flow, Exit); 792 return Exit; 793 } 794 795 /// Set the previous node 796 void StructurizeCFG::setPrevNode(BasicBlock *BB) { 797 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) 798 : nullptr; 799 } 800 801 /// Does BB dominate all the predicates of Node? 802 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 803 BBPredicates &Preds = Predicates[Node->getEntry()]; 804 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) { 805 return DT->dominates(BB, Pred.first); 806 }); 807 } 808 809 /// Can we predict that this node will always be called? 810 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { 811 BBPredicates &Preds = Predicates[Node->getEntry()]; 812 bool Dominated = false; 813 814 // Regionentry is always true 815 if (!PrevNode) 816 return true; 817 818 for (std::pair<BasicBlock*, Value*> Pred : Preds) { 819 BasicBlock *BB = Pred.first; 820 Value *V = Pred.second; 821 822 if (V != BoolTrue) 823 return false; 824 825 if (!Dominated && DT->dominates(BB, PrevNode->getEntry())) 826 Dominated = true; 827 } 828 829 // TODO: The dominator check is too strict 830 return Dominated; 831 } 832 833 /// Take one node from the order vector and wire it up 834 void StructurizeCFG::wireFlow(bool ExitUseAllowed, 835 BasicBlock *LoopEnd) { 836 RegionNode *Node = Order.pop_back_val(); 837 Visited.insert(Node->getEntry()); 838 839 if (isPredictableTrue(Node)) { 840 // Just a linear flow 841 if (PrevNode) { 842 changeExit(PrevNode, Node->getEntry(), true); 843 } 844 PrevNode = Node; 845 } else { 846 // Insert extra prefix node (or reuse last one) 847 BasicBlock *Flow = needPrefix(false); 848 849 // Insert extra postfix node (or use exit instead) 850 BasicBlock *Entry = Node->getEntry(); 851 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 852 853 // let it point to entry and next block 854 Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); 855 addPhiValues(Flow, Entry); 856 DT->changeImmediateDominator(Entry, Flow); 857 858 PrevNode = Node; 859 while (!Order.empty() && !Visited.count(LoopEnd) && 860 dominatesPredicates(Entry, Order.back())) { 861 handleLoops(false, LoopEnd); 862 } 863 864 changeExit(PrevNode, Next, false); 865 setPrevNode(Next); 866 } 867 } 868 869 void StructurizeCFG::handleLoops(bool ExitUseAllowed, 870 BasicBlock *LoopEnd) { 871 RegionNode *Node = Order.back(); 872 BasicBlock *LoopStart = Node->getEntry(); 873 874 if (!Loops.count(LoopStart)) { 875 wireFlow(ExitUseAllowed, LoopEnd); 876 return; 877 } 878 879 if (!isPredictableTrue(Node)) 880 LoopStart = needPrefix(true); 881 882 LoopEnd = Loops[Node->getEntry()]; 883 wireFlow(false, LoopEnd); 884 while (!Visited.count(LoopEnd)) { 885 handleLoops(false, LoopEnd); 886 } 887 888 // If the start of the loop is the entry block, we can't branch to it so 889 // insert a new dummy entry block. 890 Function *LoopFunc = LoopStart->getParent(); 891 if (LoopStart == &LoopFunc->getEntryBlock()) { 892 LoopStart->setName("entry.orig"); 893 894 BasicBlock *NewEntry = 895 BasicBlock::Create(LoopStart->getContext(), 896 "entry", 897 LoopFunc, 898 LoopStart); 899 BranchInst::Create(LoopStart, NewEntry); 900 DT->setNewRoot(NewEntry); 901 } 902 903 // Create an extra loop end node 904 LoopEnd = needPrefix(false); 905 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 906 LoopConds.push_back(BranchInst::Create(Next, LoopStart, 907 BoolUndef, LoopEnd)); 908 addPhiValues(LoopEnd, LoopStart); 909 setPrevNode(Next); 910 } 911 912 /// After this function control flow looks like it should be, but 913 /// branches and PHI nodes only have undefined conditions. 914 void StructurizeCFG::createFlow() { 915 BasicBlock *Exit = ParentRegion->getExit(); 916 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 917 918 AffectedPhis.clear(); 919 DeletedPhis.clear(); 920 AddedPhis.clear(); 921 Conditions.clear(); 922 LoopConds.clear(); 923 924 PrevNode = nullptr; 925 Visited.clear(); 926 927 while (!Order.empty()) { 928 handleLoops(EntryDominatesExit, nullptr); 929 } 930 931 if (PrevNode) 932 changeExit(PrevNode, Exit, EntryDominatesExit); 933 else 934 assert(EntryDominatesExit); 935 } 936 937 /// Handle a rare case where the disintegrated nodes instructions 938 /// no longer dominate all their uses. Not sure if this is really necessary 939 void StructurizeCFG::rebuildSSA() { 940 SSAUpdater Updater; 941 for (BasicBlock *BB : ParentRegion->blocks()) 942 for (Instruction &I : *BB) { 943 bool Initialized = false; 944 // We may modify the use list as we iterate over it, so we use 945 // make_early_inc_range. 946 for (Use &U : llvm::make_early_inc_range(I.uses())) { 947 Instruction *User = cast<Instruction>(U.getUser()); 948 if (User->getParent() == BB) { 949 continue; 950 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 951 if (UserPN->getIncomingBlock(U) == BB) 952 continue; 953 } 954 955 if (DT->dominates(&I, User)) 956 continue; 957 958 if (!Initialized) { 959 Value *Undef = UndefValue::get(I.getType()); 960 Updater.Initialize(I.getType(), ""); 961 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 962 Updater.AddAvailableValue(BB, &I); 963 Initialized = true; 964 } 965 Updater.RewriteUseAfterInsertions(U); 966 } 967 } 968 } 969 970 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, 971 const LegacyDivergenceAnalysis &DA) { 972 // Bool for if all sub-regions are uniform. 973 bool SubRegionsAreUniform = true; 974 // Count of how many direct children are conditional. 975 unsigned ConditionalDirectChildren = 0; 976 977 for (auto E : R->elements()) { 978 if (!E->isSubRegion()) { 979 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); 980 if (!Br || !Br->isConditional()) 981 continue; 982 983 if (!DA.isUniform(Br)) 984 return false; 985 986 // One of our direct children is conditional. 987 ConditionalDirectChildren++; 988 989 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() 990 << " has uniform terminator\n"); 991 } else { 992 // Explicitly refuse to treat regions as uniform if they have non-uniform 993 // subregions. We cannot rely on DivergenceAnalysis for branches in 994 // subregions because those branches may have been removed and re-created, 995 // so we look for our metadata instead. 996 // 997 // Warning: It would be nice to treat regions as uniform based only on 998 // their direct child basic blocks' terminators, regardless of whether 999 // subregions are uniform or not. However, this requires a very careful 1000 // look at SIAnnotateControlFlow to make sure nothing breaks there. 1001 for (auto BB : E->getNodeAs<Region>()->blocks()) { 1002 auto Br = dyn_cast<BranchInst>(BB->getTerminator()); 1003 if (!Br || !Br->isConditional()) 1004 continue; 1005 1006 if (!Br->getMetadata(UniformMDKindID)) { 1007 // Early exit if we cannot have relaxed uniform regions. 1008 if (!RelaxedUniformRegions) 1009 return false; 1010 1011 SubRegionsAreUniform = false; 1012 break; 1013 } 1014 } 1015 } 1016 } 1017 1018 // Our region is uniform if: 1019 // 1. All conditional branches that are direct children are uniform (checked 1020 // above). 1021 // 2. And either: 1022 // a. All sub-regions are uniform. 1023 // b. There is one or less conditional branches among the direct children. 1024 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); 1025 } 1026 1027 void StructurizeCFG::init(Region *R) { 1028 LLVMContext &Context = R->getEntry()->getContext(); 1029 1030 Boolean = Type::getInt1Ty(Context); 1031 BoolTrue = ConstantInt::getTrue(Context); 1032 BoolFalse = ConstantInt::getFalse(Context); 1033 BoolUndef = UndefValue::get(Boolean); 1034 1035 this->DA = nullptr; 1036 } 1037 1038 bool StructurizeCFG::makeUniformRegion(Region *R, 1039 LegacyDivergenceAnalysis *DA) { 1040 if (R->isTopLevelRegion()) 1041 return false; 1042 1043 this->DA = DA; 1044 // TODO: We could probably be smarter here with how we handle sub-regions. 1045 // We currently rely on the fact that metadata is set by earlier invocations 1046 // of the pass on sub-regions, and that this metadata doesn't get lost -- 1047 // but we shouldn't rely on metadata for correctness! 1048 unsigned UniformMDKindID = 1049 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); 1050 1051 if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) { 1052 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R 1053 << '\n'); 1054 1055 // Mark all direct child block terminators as having been treated as 1056 // uniform. To account for a possible future in which non-uniform 1057 // sub-regions are treated more cleverly, indirect children are not 1058 // marked as uniform. 1059 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); 1060 for (RegionNode *E : R->elements()) { 1061 if (E->isSubRegion()) 1062 continue; 1063 1064 if (Instruction *Term = E->getEntry()->getTerminator()) 1065 Term->setMetadata(UniformMDKindID, MD); 1066 } 1067 1068 return true; 1069 } 1070 return false; 1071 } 1072 1073 /// Run the transformation for each region found 1074 bool StructurizeCFG::run(Region *R, DominatorTree *DT) { 1075 if (R->isTopLevelRegion()) 1076 return false; 1077 1078 this->DT = DT; 1079 1080 Func = R->getEntry()->getParent(); 1081 ParentRegion = R; 1082 1083 orderNodes(); 1084 collectInfos(); 1085 createFlow(); 1086 insertConditions(false); 1087 insertConditions(true); 1088 setPhiValues(); 1089 simplifyConditions(); 1090 simplifyAffectedPhis(); 1091 rebuildSSA(); 1092 1093 // Cleanup 1094 Order.clear(); 1095 Visited.clear(); 1096 DeletedPhis.clear(); 1097 AddedPhis.clear(); 1098 Predicates.clear(); 1099 Conditions.clear(); 1100 Loops.clear(); 1101 LoopPreds.clear(); 1102 LoopConds.clear(); 1103 1104 return true; 1105 } 1106 1107 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { 1108 return new StructurizeCFGLegacyPass(SkipUniformRegions); 1109 } 1110 1111 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) { 1112 Regions.push_back(&R); 1113 for (const auto &E : R) 1114 addRegionIntoQueue(*E, Regions); 1115 } 1116 1117 PreservedAnalyses StructurizeCFGPass::run(Function &F, 1118 FunctionAnalysisManager &AM) { 1119 1120 bool Changed = false; 1121 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1122 auto &RI = AM.getResult<RegionInfoAnalysis>(F); 1123 std::vector<Region *> Regions; 1124 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); 1125 while (!Regions.empty()) { 1126 Region *R = Regions.back(); 1127 StructurizeCFG SCFG; 1128 SCFG.init(R); 1129 Changed |= SCFG.run(R, DT); 1130 Regions.pop_back(); 1131 } 1132 if (!Changed) 1133 return PreservedAnalyses::all(); 1134 PreservedAnalyses PA; 1135 PA.preserve<DominatorTreeAnalysis>(); 1136 return PA; 1137 } 1138