1 //===- StructurizeCFG.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Scalar/StructurizeCFG.h" 10 #include "llvm/ADT/DenseMap.h" 11 #include "llvm/ADT/MapVector.h" 12 #include "llvm/ADT/SCCIterator.h" 13 #include "llvm/ADT/STLExtras.h" 14 #include "llvm/ADT/SmallPtrSet.h" 15 #include "llvm/ADT/SmallSet.h" 16 #include "llvm/ADT/SmallVector.h" 17 #include "llvm/Analysis/InstructionSimplify.h" 18 #include "llvm/Analysis/RegionInfo.h" 19 #include "llvm/Analysis/RegionIterator.h" 20 #include "llvm/Analysis/RegionPass.h" 21 #include "llvm/Analysis/UniformityAnalysis.h" 22 #include "llvm/IR/BasicBlock.h" 23 #include "llvm/IR/CFG.h" 24 #include "llvm/IR/Constants.h" 25 #include "llvm/IR/Dominators.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/IR/InstrTypes.h" 28 #include "llvm/IR/Instruction.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Metadata.h" 31 #include "llvm/IR/PassManager.h" 32 #include "llvm/IR/PatternMatch.h" 33 #include "llvm/IR/Type.h" 34 #include "llvm/IR/Use.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/IR/ValueHandle.h" 37 #include "llvm/InitializePasses.h" 38 #include "llvm/Pass.h" 39 #include "llvm/Support/Casting.h" 40 #include "llvm/Support/CommandLine.h" 41 #include "llvm/Support/Debug.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include "llvm/Transforms/Scalar.h" 44 #include "llvm/Transforms/Utils.h" 45 #include "llvm/Transforms/Utils/Local.h" 46 #include "llvm/Transforms/Utils/SSAUpdater.h" 47 #include <algorithm> 48 #include <cassert> 49 #include <utility> 50 51 using namespace llvm; 52 using namespace llvm::PatternMatch; 53 54 #define DEBUG_TYPE "structurizecfg" 55 56 // The name for newly created blocks. 57 const char FlowBlockName[] = "Flow"; 58 59 namespace { 60 61 static cl::opt<bool> ForceSkipUniformRegions( 62 "structurizecfg-skip-uniform-regions", 63 cl::Hidden, 64 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"), 65 cl::init(false)); 66 67 static cl::opt<bool> 68 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, 69 cl::desc("Allow relaxed uniform region checks"), 70 cl::init(true)); 71 72 // Definition of the complex types used in this pass. 73 74 using BBValuePair = std::pair<BasicBlock *, Value *>; 75 76 using RNVector = SmallVector<RegionNode *, 8>; 77 using BBVector = SmallVector<BasicBlock *, 8>; 78 using BranchVector = SmallVector<BranchInst *, 8>; 79 using BBValueVector = SmallVector<BBValuePair, 2>; 80 81 using BBSet = SmallPtrSet<BasicBlock *, 8>; 82 83 using PhiMap = MapVector<PHINode *, BBValueVector>; 84 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>; 85 86 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>; 87 using BBPredicates = DenseMap<BasicBlock *, Value *>; 88 using PredMap = DenseMap<BasicBlock *, BBPredicates>; 89 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; 90 91 using BranchDebugLocMap = DenseMap<BasicBlock *, DebugLoc>; 92 93 // A traits type that is intended to be used in graph algorithms. The graph 94 // traits starts at an entry node, and traverses the RegionNodes that are in 95 // the Nodes set. 96 struct SubGraphTraits { 97 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>; 98 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType; 99 100 // This wraps a set of Nodes into the iterator, so we know which edges to 101 // filter out. 102 class WrappedSuccIterator 103 : public iterator_adaptor_base< 104 WrappedSuccIterator, BaseSuccIterator, 105 typename std::iterator_traits<BaseSuccIterator>::iterator_category, 106 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { 107 SmallDenseSet<RegionNode *> *Nodes; 108 109 public: 110 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes) 111 : iterator_adaptor_base(It), Nodes(Nodes) {} 112 113 NodeRef operator*() const { return {*I, Nodes}; } 114 }; 115 116 static bool filterAll(const NodeRef &N) { return true; } 117 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); } 118 119 using ChildIteratorType = 120 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>; 121 122 static NodeRef getEntryNode(Region *R) { 123 return {GraphTraits<Region *>::getEntryNode(R), nullptr}; 124 } 125 126 static NodeRef getEntryNode(NodeRef N) { return N; } 127 128 static iterator_range<ChildIteratorType> children(const NodeRef &N) { 129 auto *filter = N.second ? &filterSet : &filterAll; 130 return make_filter_range( 131 make_range<WrappedSuccIterator>( 132 {GraphTraits<RegionNode *>::child_begin(N.first), N.second}, 133 {GraphTraits<RegionNode *>::child_end(N.first), N.second}), 134 filter); 135 } 136 137 static ChildIteratorType child_begin(const NodeRef &N) { 138 return children(N).begin(); 139 } 140 141 static ChildIteratorType child_end(const NodeRef &N) { 142 return children(N).end(); 143 } 144 }; 145 146 /// Finds the nearest common dominator of a set of BasicBlocks. 147 /// 148 /// For every BB you add to the set, you can specify whether we "remember" the 149 /// block. When you get the common dominator, you can also ask whether it's one 150 /// of the blocks we remembered. 151 class NearestCommonDominator { 152 DominatorTree *DT; 153 BasicBlock *Result = nullptr; 154 bool ResultIsRemembered = false; 155 156 /// Add BB to the resulting dominator. 157 void addBlock(BasicBlock *BB, bool Remember) { 158 if (!Result) { 159 Result = BB; 160 ResultIsRemembered = Remember; 161 return; 162 } 163 164 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB); 165 if (NewResult != Result) 166 ResultIsRemembered = false; 167 if (NewResult == BB) 168 ResultIsRemembered |= Remember; 169 Result = NewResult; 170 } 171 172 public: 173 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {} 174 175 void addBlock(BasicBlock *BB) { 176 addBlock(BB, /* Remember = */ false); 177 } 178 179 void addAndRememberBlock(BasicBlock *BB) { 180 addBlock(BB, /* Remember = */ true); 181 } 182 183 /// Get the nearest common dominator of all the BBs added via addBlock() and 184 /// addAndRememberBlock(). 185 BasicBlock *result() { return Result; } 186 187 /// Is the BB returned by getResult() one of the blocks we added to the set 188 /// with addAndRememberBlock()? 189 bool resultIsRememberedBlock() { return ResultIsRemembered; } 190 }; 191 192 /// Transforms the control flow graph on one single entry/exit region 193 /// at a time. 194 /// 195 /// After the transform all "If"/"Then"/"Else" style control flow looks like 196 /// this: 197 /// 198 /// \verbatim 199 /// 1 200 /// || 201 /// | | 202 /// 2 | 203 /// | / 204 /// |/ 205 /// 3 206 /// || Where: 207 /// | | 1 = "If" block, calculates the condition 208 /// 4 | 2 = "Then" subregion, runs if the condition is true 209 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 210 /// |/ 4 = "Else" optional subregion, runs if the condition is false 211 /// 5 5 = "End" block, also rejoins the control flow 212 /// \endverbatim 213 /// 214 /// Control flow is expressed as a branch where the true exit goes into the 215 /// "Then"/"Else" region, while the false exit skips the region 216 /// The condition for the optional "Else" region is expressed as a PHI node. 217 /// The incoming values of the PHI node are true for the "If" edge and false 218 /// for the "Then" edge. 219 /// 220 /// Additionally to that even complicated loops look like this: 221 /// 222 /// \verbatim 223 /// 1 224 /// || 225 /// | | 226 /// 2 ^ Where: 227 /// | / 1 = "Entry" block 228 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 229 /// 3 3 = "Flow" block, with back edge to entry block 230 /// | 231 /// \endverbatim 232 /// 233 /// The back edge of the "Flow" block is always on the false side of the branch 234 /// while the true side continues the general flow. So the loop condition 235 /// consist of a network of PHI nodes where the true incoming values expresses 236 /// breaks and the false values expresses continue states. 237 238 class StructurizeCFG { 239 Type *Boolean; 240 ConstantInt *BoolTrue; 241 ConstantInt *BoolFalse; 242 Value *BoolPoison; 243 244 Function *Func; 245 Region *ParentRegion; 246 247 UniformityInfo *UA = nullptr; 248 DominatorTree *DT; 249 250 SmallVector<RegionNode *, 8> Order; 251 BBSet Visited; 252 BBSet FlowSet; 253 254 SmallVector<WeakVH, 8> AffectedPhis; 255 BBPhiMap DeletedPhis; 256 BB2BBVecMap AddedPhis; 257 258 PredMap Predicates; 259 BranchVector Conditions; 260 261 BB2BBMap Loops; 262 PredMap LoopPreds; 263 BranchVector LoopConds; 264 265 BranchDebugLocMap TermDL; 266 267 RegionNode *PrevNode; 268 269 void orderNodes(); 270 271 void analyzeLoops(RegionNode *N); 272 273 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 274 275 void gatherPredicates(RegionNode *N); 276 277 void collectInfos(); 278 279 void insertConditions(bool Loops); 280 281 void simplifyConditions(); 282 283 void delPhiValues(BasicBlock *From, BasicBlock *To); 284 285 void addPhiValues(BasicBlock *From, BasicBlock *To); 286 287 void findUndefBlocks(BasicBlock *PHIBlock, 288 const SmallSet<BasicBlock *, 8> &Incomings, 289 SmallVector<BasicBlock *> &UndefBlks) const; 290 void setPhiValues(); 291 292 void simplifyAffectedPhis(); 293 294 void killTerminator(BasicBlock *BB); 295 296 void changeExit(RegionNode *Node, BasicBlock *NewExit, 297 bool IncludeDominator); 298 299 BasicBlock *getNextFlow(BasicBlock *Dominator); 300 301 BasicBlock *needPrefix(bool NeedEmpty); 302 303 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 304 305 void setPrevNode(BasicBlock *BB); 306 307 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 308 309 bool isPredictableTrue(RegionNode *Node); 310 311 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 312 313 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 314 315 void createFlow(); 316 317 void rebuildSSA(); 318 319 public: 320 void init(Region *R); 321 bool run(Region *R, DominatorTree *DT); 322 bool makeUniformRegion(Region *R, UniformityInfo &UA); 323 }; 324 325 class StructurizeCFGLegacyPass : public RegionPass { 326 bool SkipUniformRegions; 327 328 public: 329 static char ID; 330 331 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false) 332 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) { 333 if (ForceSkipUniformRegions.getNumOccurrences()) 334 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 335 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry()); 336 } 337 338 bool runOnRegion(Region *R, RGPassManager &RGM) override { 339 StructurizeCFG SCFG; 340 SCFG.init(R); 341 if (SkipUniformRegions) { 342 UniformityInfo &UA = 343 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); 344 if (SCFG.makeUniformRegion(R, UA)) 345 return false; 346 } 347 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 348 return SCFG.run(R, DT); 349 } 350 351 StringRef getPassName() const override { return "Structurize control flow"; } 352 353 void getAnalysisUsage(AnalysisUsage &AU) const override { 354 if (SkipUniformRegions) 355 AU.addRequired<UniformityInfoWrapperPass>(); 356 AU.addRequiredID(LowerSwitchID); 357 AU.addRequired<DominatorTreeWrapperPass>(); 358 359 AU.addPreserved<DominatorTreeWrapperPass>(); 360 RegionPass::getAnalysisUsage(AU); 361 } 362 }; 363 364 } // end anonymous namespace 365 366 char StructurizeCFGLegacyPass::ID = 0; 367 368 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", 369 "Structurize the CFG", false, false) 370 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) 371 INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) 372 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 373 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 374 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", 375 "Structurize the CFG", false, false) 376 377 /// Build up the general order of nodes, by performing a topological sort of the 378 /// parent region's nodes, while ensuring that there is no outer cycle node 379 /// between any two inner cycle nodes. 380 void StructurizeCFG::orderNodes() { 381 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion), 382 GraphTraits<Region *>::nodes_end(ParentRegion))); 383 if (Order.empty()) 384 return; 385 386 SmallDenseSet<RegionNode *> Nodes; 387 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion); 388 389 // A list of range indices of SCCs in Order, to be processed. 390 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList; 391 unsigned I = 0, E = Order.size(); 392 while (true) { 393 // Run through all the SCCs in the subgraph starting with Entry. 394 for (auto SCCI = 395 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin( 396 EntryNode); 397 !SCCI.isAtEnd(); ++SCCI) { 398 auto &SCC = *SCCI; 399 400 // An SCC up to the size of 2, can be reduced to an entry (the last node), 401 // and a possible additional node. Therefore, it is already in order, and 402 // there is no need to add it to the work-list. 403 unsigned Size = SCC.size(); 404 if (Size > 2) 405 WorkList.emplace_back(I, I + Size); 406 407 // Add the SCC nodes to the Order array. 408 for (const auto &N : SCC) { 409 assert(I < E && "SCC size mismatch!"); 410 Order[I++] = N.first; 411 } 412 } 413 assert(I == E && "SCC size mismatch!"); 414 415 // If there are no more SCCs to order, then we are done. 416 if (WorkList.empty()) 417 break; 418 419 std::tie(I, E) = WorkList.pop_back_val(); 420 421 // Collect the set of nodes in the SCC's subgraph. These are only the 422 // possible child nodes; we do not add the entry (last node) otherwise we 423 // will have the same exact SCC all over again. 424 Nodes.clear(); 425 Nodes.insert(Order.begin() + I, Order.begin() + E - 1); 426 427 // Update the entry node. 428 EntryNode.first = Order[E - 1]; 429 EntryNode.second = &Nodes; 430 } 431 } 432 433 /// Determine the end of the loops 434 void StructurizeCFG::analyzeLoops(RegionNode *N) { 435 if (N->isSubRegion()) { 436 // Test for exit as back edge 437 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 438 if (Visited.count(Exit)) 439 Loops[Exit] = N->getEntry(); 440 441 } else { 442 // Test for successors as back edge 443 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 444 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 445 446 for (BasicBlock *Succ : Term->successors()) 447 if (Visited.count(Succ)) 448 Loops[Succ] = BB; 449 } 450 } 451 452 /// Build the condition for one edge 453 Value *StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 454 bool Invert) { 455 Value *Cond = Invert ? BoolFalse : BoolTrue; 456 if (Term->isConditional()) { 457 Cond = Term->getCondition(); 458 459 if (Idx != (unsigned)Invert) 460 Cond = invertCondition(Cond); 461 } 462 return Cond; 463 } 464 465 /// Analyze the predecessors of each block and build up predicates 466 void StructurizeCFG::gatherPredicates(RegionNode *N) { 467 RegionInfo *RI = ParentRegion->getRegionInfo(); 468 BasicBlock *BB = N->getEntry(); 469 BBPredicates &Pred = Predicates[BB]; 470 BBPredicates &LPred = LoopPreds[BB]; 471 472 for (BasicBlock *P : predecessors(BB)) { 473 // Ignore it if it's a branch from outside into our region entry 474 if (!ParentRegion->contains(P)) 475 continue; 476 477 Region *R = RI->getRegionFor(P); 478 if (R == ParentRegion) { 479 // It's a top level block in our region 480 BranchInst *Term = cast<BranchInst>(P->getTerminator()); 481 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 482 BasicBlock *Succ = Term->getSuccessor(i); 483 if (Succ != BB) 484 continue; 485 486 if (Visited.count(P)) { 487 // Normal forward edge 488 if (Term->isConditional()) { 489 // Try to treat it like an ELSE block 490 BasicBlock *Other = Term->getSuccessor(!i); 491 if (Visited.count(Other) && !Loops.count(Other) && 492 !Pred.count(Other) && !Pred.count(P)) { 493 494 Pred[Other] = BoolFalse; 495 Pred[P] = BoolTrue; 496 continue; 497 } 498 } 499 Pred[P] = buildCondition(Term, i, false); 500 } else { 501 // Back edge 502 LPred[P] = buildCondition(Term, i, true); 503 } 504 } 505 } else { 506 // It's an exit from a sub region 507 while (R->getParent() != ParentRegion) 508 R = R->getParent(); 509 510 // Edge from inside a subregion to its entry, ignore it 511 if (*R == *N) 512 continue; 513 514 BasicBlock *Entry = R->getEntry(); 515 if (Visited.count(Entry)) 516 Pred[Entry] = BoolTrue; 517 else 518 LPred[Entry] = BoolFalse; 519 } 520 } 521 } 522 523 /// Collect various loop and predicate infos 524 void StructurizeCFG::collectInfos() { 525 // Reset predicate 526 Predicates.clear(); 527 528 // and loop infos 529 Loops.clear(); 530 LoopPreds.clear(); 531 532 // Reset the visited nodes 533 Visited.clear(); 534 535 for (RegionNode *RN : reverse(Order)) { 536 LLVM_DEBUG(dbgs() << "Visiting: " 537 << (RN->isSubRegion() ? "SubRegion with entry: " : "") 538 << RN->getEntry()->getName() << "\n"); 539 540 // Analyze all the conditions leading to a node 541 gatherPredicates(RN); 542 543 // Remember that we've seen this node 544 Visited.insert(RN->getEntry()); 545 546 // Find the last back edges 547 analyzeLoops(RN); 548 } 549 550 // Reset the collected term debug locations 551 TermDL.clear(); 552 553 for (BasicBlock &BB : *Func) { 554 if (const DebugLoc &DL = BB.getTerminator()->getDebugLoc()) 555 TermDL[&BB] = DL; 556 } 557 } 558 559 /// Insert the missing branch conditions 560 void StructurizeCFG::insertConditions(bool Loops) { 561 BranchVector &Conds = Loops ? LoopConds : Conditions; 562 Value *Default = Loops ? BoolTrue : BoolFalse; 563 SSAUpdater PhiInserter; 564 565 for (BranchInst *Term : Conds) { 566 assert(Term->isConditional()); 567 568 BasicBlock *Parent = Term->getParent(); 569 BasicBlock *SuccTrue = Term->getSuccessor(0); 570 BasicBlock *SuccFalse = Term->getSuccessor(1); 571 572 PhiInserter.Initialize(Boolean, ""); 573 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); 574 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 575 576 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 577 578 NearestCommonDominator Dominator(DT); 579 Dominator.addBlock(Parent); 580 581 Value *ParentValue = nullptr; 582 for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) { 583 BasicBlock *BB = BBAndPred.first; 584 Value *Pred = BBAndPred.second; 585 586 if (BB == Parent) { 587 ParentValue = Pred; 588 break; 589 } 590 PhiInserter.AddAvailableValue(BB, Pred); 591 Dominator.addAndRememberBlock(BB); 592 } 593 594 if (ParentValue) { 595 Term->setCondition(ParentValue); 596 } else { 597 if (!Dominator.resultIsRememberedBlock()) 598 PhiInserter.AddAvailableValue(Dominator.result(), Default); 599 600 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 601 } 602 } 603 } 604 605 /// Simplify any inverted conditions that were built by buildConditions. 606 void StructurizeCFG::simplifyConditions() { 607 SmallVector<Instruction *> InstToErase; 608 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) { 609 auto &Preds = I.second; 610 for (auto &J : Preds) { 611 auto &Cond = J.second; 612 Instruction *Inverted; 613 if (match(Cond, m_Not(m_OneUse(m_Instruction(Inverted)))) && 614 !Cond->use_empty()) { 615 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) { 616 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate()); 617 Cond->replaceAllUsesWith(InvertedCmp); 618 InstToErase.push_back(cast<Instruction>(Cond)); 619 } 620 } 621 } 622 } 623 for (auto *I : InstToErase) 624 I->eraseFromParent(); 625 } 626 627 /// Remove all PHI values coming from "From" into "To" and remember 628 /// them in DeletedPhis 629 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 630 PhiMap &Map = DeletedPhis[To]; 631 for (PHINode &Phi : To->phis()) { 632 bool Recorded = false; 633 while (Phi.getBasicBlockIndex(From) != -1) { 634 Value *Deleted = Phi.removeIncomingValue(From, false); 635 Map[&Phi].push_back(std::make_pair(From, Deleted)); 636 if (!Recorded) { 637 AffectedPhis.push_back(&Phi); 638 Recorded = true; 639 } 640 } 641 } 642 } 643 644 /// Add a dummy PHI value as soon as we knew the new predecessor 645 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 646 for (PHINode &Phi : To->phis()) { 647 Value *Undef = UndefValue::get(Phi.getType()); 648 Phi.addIncoming(Undef, From); 649 } 650 AddedPhis[To].push_back(From); 651 } 652 653 /// When we are reconstructing a PHI inside \p PHIBlock with incoming values 654 /// from predecessors \p Incomings, we have a chance to mark the available value 655 /// from some blocks as undefined. The function will find out all such blocks 656 /// and return in \p UndefBlks. 657 void StructurizeCFG::findUndefBlocks( 658 BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings, 659 SmallVector<BasicBlock *> &UndefBlks) const { 660 // We may get a post-structured CFG like below: 661 // 662 // | P1 663 // |/ 664 // F1 665 // |\ 666 // | N 667 // |/ 668 // F2 669 // |\ 670 // | P2 671 // |/ 672 // F3 673 // |\ 674 // B 675 // 676 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors 677 // of B before structurization. F1/F2/F3 are flow blocks inserted during 678 // structurization process. Block N is not a predecessor of B before 679 // structurization, but are placed between the predecessors(P1/P2) of B after 680 // structurization. This usually means that threads went to N never take the 681 // path N->F2->F3->B. For example, the threads take the branch F1->N may 682 // always take the branch F2->P2. So, when we are reconstructing a PHI 683 // originally in B, we can safely say the incoming value from N is undefined. 684 SmallSet<BasicBlock *, 8> VisitedBlock; 685 SmallVector<BasicBlock *, 8> Stack; 686 if (PHIBlock == ParentRegion->getExit()) { 687 for (auto P : predecessors(PHIBlock)) { 688 if (ParentRegion->contains(P)) 689 Stack.push_back(P); 690 } 691 } else { 692 append_range(Stack, predecessors(PHIBlock)); 693 } 694 695 // Do a backward traversal over the CFG, and stop further searching if 696 // the block is not a Flow. If a block is neither flow block nor the 697 // incoming predecessor, then the incoming value from the block is 698 // undefined value for the PHI being reconstructed. 699 while (!Stack.empty()) { 700 BasicBlock *Current = Stack.pop_back_val(); 701 if (VisitedBlock.contains(Current)) 702 continue; 703 704 VisitedBlock.insert(Current); 705 if (FlowSet.contains(Current)) { 706 for (auto P : predecessors(Current)) 707 Stack.push_back(P); 708 } else if (!Incomings.contains(Current)) { 709 UndefBlks.push_back(Current); 710 } 711 } 712 } 713 714 /// Add the real PHI value as soon as everything is set up 715 void StructurizeCFG::setPhiValues() { 716 SmallVector<PHINode *, 8> InsertedPhis; 717 SSAUpdater Updater(&InsertedPhis); 718 for (const auto &AddedPhi : AddedPhis) { 719 BasicBlock *To = AddedPhi.first; 720 const BBVector &From = AddedPhi.second; 721 722 if (!DeletedPhis.count(To)) 723 continue; 724 725 SmallVector<BasicBlock *> UndefBlks; 726 bool CachedUndefs = false; 727 PhiMap &Map = DeletedPhis[To]; 728 for (const auto &PI : Map) { 729 PHINode *Phi = PI.first; 730 Value *Undef = UndefValue::get(Phi->getType()); 731 Updater.Initialize(Phi->getType(), ""); 732 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 733 Updater.AddAvailableValue(To, Undef); 734 735 SmallSet<BasicBlock *, 8> Incomings; 736 SmallVector<BasicBlock *> ConstantPreds; 737 for (const auto &VI : PI.second) { 738 Incomings.insert(VI.first); 739 Updater.AddAvailableValue(VI.first, VI.second); 740 if (isa<Constant>(VI.second)) 741 ConstantPreds.push_back(VI.first); 742 } 743 744 if (!CachedUndefs) { 745 findUndefBlocks(To, Incomings, UndefBlks); 746 CachedUndefs = true; 747 } 748 749 for (auto UB : UndefBlks) { 750 // If this undef block is dominated by any predecessor(before 751 // structurization) of reconstructed PHI with constant incoming value, 752 // don't mark the available value as undefined. Setting undef to such 753 // block will stop us from getting optimal phi insertion. 754 if (any_of(ConstantPreds, 755 [&](BasicBlock *CP) { return DT->dominates(CP, UB); })) 756 continue; 757 Updater.AddAvailableValue(UB, Undef); 758 } 759 760 for (BasicBlock *FI : From) 761 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); 762 AffectedPhis.push_back(Phi); 763 } 764 765 DeletedPhis.erase(To); 766 } 767 assert(DeletedPhis.empty()); 768 769 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); 770 } 771 772 void StructurizeCFG::simplifyAffectedPhis() { 773 bool Changed; 774 do { 775 Changed = false; 776 SimplifyQuery Q(Func->getParent()->getDataLayout()); 777 Q.DT = DT; 778 // Setting CanUseUndef to true might extend value liveness, set it to false 779 // to achieve better register pressure. 780 Q.CanUseUndef = false; 781 for (WeakVH VH : AffectedPhis) { 782 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { 783 if (auto NewValue = simplifyInstruction(Phi, Q)) { 784 Phi->replaceAllUsesWith(NewValue); 785 Phi->eraseFromParent(); 786 Changed = true; 787 } 788 } 789 } 790 } while (Changed); 791 } 792 793 /// Remove phi values from all successors and then remove the terminator. 794 void StructurizeCFG::killTerminator(BasicBlock *BB) { 795 Instruction *Term = BB->getTerminator(); 796 if (!Term) 797 return; 798 799 for (BasicBlock *Succ : successors(BB)) 800 delPhiValues(BB, Succ); 801 802 Term->eraseFromParent(); 803 } 804 805 /// Let node exit(s) point to NewExit 806 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 807 bool IncludeDominator) { 808 if (Node->isSubRegion()) { 809 Region *SubRegion = Node->getNodeAs<Region>(); 810 BasicBlock *OldExit = SubRegion->getExit(); 811 BasicBlock *Dominator = nullptr; 812 813 // Find all the edges from the sub region to the exit. 814 // We use make_early_inc_range here because we modify BB's terminator. 815 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) { 816 if (!SubRegion->contains(BB)) 817 continue; 818 819 // Modify the edges to point to the new exit 820 delPhiValues(BB, OldExit); 821 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 822 addPhiValues(BB, NewExit); 823 824 // Find the new dominator (if requested) 825 if (IncludeDominator) { 826 if (!Dominator) 827 Dominator = BB; 828 else 829 Dominator = DT->findNearestCommonDominator(Dominator, BB); 830 } 831 } 832 833 // Change the dominator (if requested) 834 if (Dominator) 835 DT->changeImmediateDominator(NewExit, Dominator); 836 837 // Update the region info 838 SubRegion->replaceExit(NewExit); 839 } else { 840 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 841 killTerminator(BB); 842 BranchInst *Br = BranchInst::Create(NewExit, BB); 843 Br->setDebugLoc(TermDL[BB]); 844 addPhiValues(BB, NewExit); 845 if (IncludeDominator) 846 DT->changeImmediateDominator(NewExit, BB); 847 } 848 } 849 850 /// Create a new flow node and update dominator tree and region info 851 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { 852 LLVMContext &Context = Func->getContext(); 853 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 854 Order.back()->getEntry(); 855 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 856 Func, Insert); 857 FlowSet.insert(Flow); 858 859 // use a temporary variable to avoid a use-after-free if the map's storage is 860 // reallocated 861 DebugLoc DL = TermDL[Dominator]; 862 TermDL[Flow] = std::move(DL); 863 864 DT->addNewBlock(Flow, Dominator); 865 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 866 return Flow; 867 } 868 869 /// Create a new or reuse the previous node as flow node 870 BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) { 871 BasicBlock *Entry = PrevNode->getEntry(); 872 873 if (!PrevNode->isSubRegion()) { 874 killTerminator(Entry); 875 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 876 return Entry; 877 } 878 879 // create a new flow node 880 BasicBlock *Flow = getNextFlow(Entry); 881 882 // and wire it up 883 changeExit(PrevNode, Flow, true); 884 PrevNode = ParentRegion->getBBNode(Flow); 885 return Flow; 886 } 887 888 /// Returns the region exit if possible, otherwise just a new flow node 889 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, 890 bool ExitUseAllowed) { 891 if (!Order.empty() || !ExitUseAllowed) 892 return getNextFlow(Flow); 893 894 BasicBlock *Exit = ParentRegion->getExit(); 895 DT->changeImmediateDominator(Exit, Flow); 896 addPhiValues(Flow, Exit); 897 return Exit; 898 } 899 900 /// Set the previous node 901 void StructurizeCFG::setPrevNode(BasicBlock *BB) { 902 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) 903 : nullptr; 904 } 905 906 /// Does BB dominate all the predicates of Node? 907 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 908 BBPredicates &Preds = Predicates[Node->getEntry()]; 909 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) { 910 return DT->dominates(BB, Pred.first); 911 }); 912 } 913 914 /// Can we predict that this node will always be called? 915 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { 916 BBPredicates &Preds = Predicates[Node->getEntry()]; 917 bool Dominated = false; 918 919 // Regionentry is always true 920 if (!PrevNode) 921 return true; 922 923 for (std::pair<BasicBlock*, Value*> Pred : Preds) { 924 BasicBlock *BB = Pred.first; 925 Value *V = Pred.second; 926 927 if (V != BoolTrue) 928 return false; 929 930 if (!Dominated && DT->dominates(BB, PrevNode->getEntry())) 931 Dominated = true; 932 } 933 934 // TODO: The dominator check is too strict 935 return Dominated; 936 } 937 938 /// Take one node from the order vector and wire it up 939 void StructurizeCFG::wireFlow(bool ExitUseAllowed, 940 BasicBlock *LoopEnd) { 941 RegionNode *Node = Order.pop_back_val(); 942 Visited.insert(Node->getEntry()); 943 944 if (isPredictableTrue(Node)) { 945 // Just a linear flow 946 if (PrevNode) { 947 changeExit(PrevNode, Node->getEntry(), true); 948 } 949 PrevNode = Node; 950 } else { 951 // Insert extra prefix node (or reuse last one) 952 BasicBlock *Flow = needPrefix(false); 953 954 // Insert extra postfix node (or use exit instead) 955 BasicBlock *Entry = Node->getEntry(); 956 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 957 958 // let it point to entry and next block 959 BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow); 960 Br->setDebugLoc(TermDL[Flow]); 961 Conditions.push_back(Br); 962 addPhiValues(Flow, Entry); 963 DT->changeImmediateDominator(Entry, Flow); 964 965 PrevNode = Node; 966 while (!Order.empty() && !Visited.count(LoopEnd) && 967 dominatesPredicates(Entry, Order.back())) { 968 handleLoops(false, LoopEnd); 969 } 970 971 changeExit(PrevNode, Next, false); 972 setPrevNode(Next); 973 } 974 } 975 976 void StructurizeCFG::handleLoops(bool ExitUseAllowed, 977 BasicBlock *LoopEnd) { 978 RegionNode *Node = Order.back(); 979 BasicBlock *LoopStart = Node->getEntry(); 980 981 if (!Loops.count(LoopStart)) { 982 wireFlow(ExitUseAllowed, LoopEnd); 983 return; 984 } 985 986 if (!isPredictableTrue(Node)) 987 LoopStart = needPrefix(true); 988 989 LoopEnd = Loops[Node->getEntry()]; 990 wireFlow(false, LoopEnd); 991 while (!Visited.count(LoopEnd)) { 992 handleLoops(false, LoopEnd); 993 } 994 995 assert(LoopStart != &LoopStart->getParent()->getEntryBlock()); 996 997 // Create an extra loop end node 998 LoopEnd = needPrefix(false); 999 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 1000 BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd); 1001 Br->setDebugLoc(TermDL[LoopEnd]); 1002 LoopConds.push_back(Br); 1003 addPhiValues(LoopEnd, LoopStart); 1004 setPrevNode(Next); 1005 } 1006 1007 /// After this function control flow looks like it should be, but 1008 /// branches and PHI nodes only have undefined conditions. 1009 void StructurizeCFG::createFlow() { 1010 BasicBlock *Exit = ParentRegion->getExit(); 1011 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 1012 1013 AffectedPhis.clear(); 1014 DeletedPhis.clear(); 1015 AddedPhis.clear(); 1016 Conditions.clear(); 1017 LoopConds.clear(); 1018 1019 PrevNode = nullptr; 1020 Visited.clear(); 1021 1022 while (!Order.empty()) { 1023 handleLoops(EntryDominatesExit, nullptr); 1024 } 1025 1026 if (PrevNode) 1027 changeExit(PrevNode, Exit, EntryDominatesExit); 1028 else 1029 assert(EntryDominatesExit); 1030 } 1031 1032 /// Handle a rare case where the disintegrated nodes instructions 1033 /// no longer dominate all their uses. Not sure if this is really necessary 1034 void StructurizeCFG::rebuildSSA() { 1035 SSAUpdater Updater; 1036 for (BasicBlock *BB : ParentRegion->blocks()) 1037 for (Instruction &I : *BB) { 1038 bool Initialized = false; 1039 // We may modify the use list as we iterate over it, so we use 1040 // make_early_inc_range. 1041 for (Use &U : llvm::make_early_inc_range(I.uses())) { 1042 Instruction *User = cast<Instruction>(U.getUser()); 1043 if (User->getParent() == BB) { 1044 continue; 1045 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 1046 if (UserPN->getIncomingBlock(U) == BB) 1047 continue; 1048 } 1049 1050 if (DT->dominates(&I, User)) 1051 continue; 1052 1053 if (!Initialized) { 1054 Value *Undef = UndefValue::get(I.getType()); 1055 Updater.Initialize(I.getType(), ""); 1056 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 1057 Updater.AddAvailableValue(BB, &I); 1058 Initialized = true; 1059 } 1060 Updater.RewriteUseAfterInsertions(U); 1061 } 1062 } 1063 } 1064 1065 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, 1066 const UniformityInfo &UA) { 1067 // Bool for if all sub-regions are uniform. 1068 bool SubRegionsAreUniform = true; 1069 // Count of how many direct children are conditional. 1070 unsigned ConditionalDirectChildren = 0; 1071 1072 for (auto *E : R->elements()) { 1073 if (!E->isSubRegion()) { 1074 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); 1075 if (!Br || !Br->isConditional()) 1076 continue; 1077 1078 if (!UA.isUniform(Br)) 1079 return false; 1080 1081 // One of our direct children is conditional. 1082 ConditionalDirectChildren++; 1083 1084 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() 1085 << " has uniform terminator\n"); 1086 } else { 1087 // Explicitly refuse to treat regions as uniform if they have non-uniform 1088 // subregions. We cannot rely on UniformityAnalysis for branches in 1089 // subregions because those branches may have been removed and re-created, 1090 // so we look for our metadata instead. 1091 // 1092 // Warning: It would be nice to treat regions as uniform based only on 1093 // their direct child basic blocks' terminators, regardless of whether 1094 // subregions are uniform or not. However, this requires a very careful 1095 // look at SIAnnotateControlFlow to make sure nothing breaks there. 1096 for (auto *BB : E->getNodeAs<Region>()->blocks()) { 1097 auto Br = dyn_cast<BranchInst>(BB->getTerminator()); 1098 if (!Br || !Br->isConditional()) 1099 continue; 1100 1101 if (!Br->getMetadata(UniformMDKindID)) { 1102 // Early exit if we cannot have relaxed uniform regions. 1103 if (!RelaxedUniformRegions) 1104 return false; 1105 1106 SubRegionsAreUniform = false; 1107 break; 1108 } 1109 } 1110 } 1111 } 1112 1113 // Our region is uniform if: 1114 // 1. All conditional branches that are direct children are uniform (checked 1115 // above). 1116 // 2. And either: 1117 // a. All sub-regions are uniform. 1118 // b. There is one or less conditional branches among the direct children. 1119 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); 1120 } 1121 1122 void StructurizeCFG::init(Region *R) { 1123 LLVMContext &Context = R->getEntry()->getContext(); 1124 1125 Boolean = Type::getInt1Ty(Context); 1126 BoolTrue = ConstantInt::getTrue(Context); 1127 BoolFalse = ConstantInt::getFalse(Context); 1128 BoolPoison = PoisonValue::get(Boolean); 1129 1130 this->UA = nullptr; 1131 } 1132 1133 bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) { 1134 if (R->isTopLevelRegion()) 1135 return false; 1136 1137 this->UA = &UA; 1138 1139 // TODO: We could probably be smarter here with how we handle sub-regions. 1140 // We currently rely on the fact that metadata is set by earlier invocations 1141 // of the pass on sub-regions, and that this metadata doesn't get lost -- 1142 // but we shouldn't rely on metadata for correctness! 1143 unsigned UniformMDKindID = 1144 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); 1145 1146 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) { 1147 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R 1148 << '\n'); 1149 1150 // Mark all direct child block terminators as having been treated as 1151 // uniform. To account for a possible future in which non-uniform 1152 // sub-regions are treated more cleverly, indirect children are not 1153 // marked as uniform. 1154 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); 1155 for (RegionNode *E : R->elements()) { 1156 if (E->isSubRegion()) 1157 continue; 1158 1159 if (Instruction *Term = E->getEntry()->getTerminator()) 1160 Term->setMetadata(UniformMDKindID, MD); 1161 } 1162 1163 return true; 1164 } 1165 return false; 1166 } 1167 1168 /// Run the transformation for each region found 1169 bool StructurizeCFG::run(Region *R, DominatorTree *DT) { 1170 if (R->isTopLevelRegion()) 1171 return false; 1172 1173 this->DT = DT; 1174 1175 Func = R->getEntry()->getParent(); 1176 ParentRegion = R; 1177 1178 orderNodes(); 1179 collectInfos(); 1180 createFlow(); 1181 insertConditions(false); 1182 insertConditions(true); 1183 setPhiValues(); 1184 simplifyConditions(); 1185 simplifyAffectedPhis(); 1186 rebuildSSA(); 1187 1188 // Cleanup 1189 Order.clear(); 1190 Visited.clear(); 1191 DeletedPhis.clear(); 1192 AddedPhis.clear(); 1193 Predicates.clear(); 1194 Conditions.clear(); 1195 Loops.clear(); 1196 LoopPreds.clear(); 1197 LoopConds.clear(); 1198 FlowSet.clear(); 1199 TermDL.clear(); 1200 1201 return true; 1202 } 1203 1204 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { 1205 return new StructurizeCFGLegacyPass(SkipUniformRegions); 1206 } 1207 1208 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) { 1209 Regions.push_back(&R); 1210 for (const auto &E : R) 1211 addRegionIntoQueue(*E, Regions); 1212 } 1213 1214 PreservedAnalyses StructurizeCFGPass::run(Function &F, 1215 FunctionAnalysisManager &AM) { 1216 1217 bool Changed = false; 1218 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1219 auto &RI = AM.getResult<RegionInfoAnalysis>(F); 1220 std::vector<Region *> Regions; 1221 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); 1222 while (!Regions.empty()) { 1223 Region *R = Regions.back(); 1224 StructurizeCFG SCFG; 1225 SCFG.init(R); 1226 Changed |= SCFG.run(R, DT); 1227 Regions.pop_back(); 1228 } 1229 if (!Changed) 1230 return PreservedAnalyses::all(); 1231 PreservedAnalyses PA; 1232 PA.preserve<DominatorTreeAnalysis>(); 1233 return PA; 1234 } 1235