1 //===- StructurizeCFG.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Transforms/Scalar/StructurizeCFG.h" 10 #include "llvm/ADT/DenseMap.h" 11 #include "llvm/ADT/EquivalenceClasses.h" 12 #include "llvm/ADT/MapVector.h" 13 #include "llvm/ADT/SCCIterator.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/ADT/SmallPtrSet.h" 16 #include "llvm/ADT/SmallSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/Analysis/InstructionSimplify.h" 19 #include "llvm/Analysis/RegionInfo.h" 20 #include "llvm/Analysis/RegionIterator.h" 21 #include "llvm/Analysis/RegionPass.h" 22 #include "llvm/Analysis/UniformityAnalysis.h" 23 #include "llvm/IR/BasicBlock.h" 24 #include "llvm/IR/CFG.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/InstrTypes.h" 29 #include "llvm/IR/Instruction.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Metadata.h" 32 #include "llvm/IR/PassManager.h" 33 #include "llvm/IR/PatternMatch.h" 34 #include "llvm/IR/ProfDataUtils.h" 35 #include "llvm/IR/Type.h" 36 #include "llvm/IR/Use.h" 37 #include "llvm/IR/Value.h" 38 #include "llvm/IR/ValueHandle.h" 39 #include "llvm/InitializePasses.h" 40 #include "llvm/Pass.h" 41 #include "llvm/Support/Casting.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include "llvm/Transforms/Scalar.h" 46 #include "llvm/Transforms/Utils.h" 47 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 48 #include "llvm/Transforms/Utils/Local.h" 49 #include "llvm/Transforms/Utils/SSAUpdater.h" 50 #include <cassert> 51 #include <utility> 52 53 using namespace llvm; 54 using namespace llvm::PatternMatch; 55 56 #define DEBUG_TYPE "structurizecfg" 57 58 // The name for newly created blocks. 59 const char FlowBlockName[] = "Flow"; 60 61 namespace { 62 63 static cl::opt<bool> ForceSkipUniformRegions( 64 "structurizecfg-skip-uniform-regions", 65 cl::Hidden, 66 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"), 67 cl::init(false)); 68 69 static cl::opt<bool> 70 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden, 71 cl::desc("Allow relaxed uniform region checks"), 72 cl::init(true)); 73 74 // Definition of the complex types used in this pass. 75 76 using BBValuePair = std::pair<BasicBlock *, Value *>; 77 78 using RNVector = SmallVector<RegionNode *, 8>; 79 using BBVector = SmallVector<BasicBlock *, 8>; 80 using BranchVector = SmallVector<BranchInst *, 8>; 81 using BBValueVector = SmallVector<BBValuePair, 2>; 82 83 using BBSet = SmallPtrSet<BasicBlock *, 8>; 84 85 using PhiMap = MapVector<PHINode *, BBValueVector>; 86 using BB2BBVecMap = MapVector<BasicBlock *, BBVector>; 87 88 using BBPhiMap = DenseMap<BasicBlock *, PhiMap>; 89 90 using MaybeCondBranchWeights = std::optional<class CondBranchWeights>; 91 92 class CondBranchWeights { 93 uint32_t TrueWeight; 94 uint32_t FalseWeight; 95 96 CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {} 97 98 public: 99 static MaybeCondBranchWeights tryParse(const BranchInst &Br) { 100 assert(Br.isConditional()); 101 102 uint64_t T, F; 103 if (!extractBranchWeights(Br, T, F)) 104 return std::nullopt; 105 106 return CondBranchWeights(T, F); 107 } 108 109 static void setMetadata(BranchInst &Br, 110 const MaybeCondBranchWeights &Weights) { 111 assert(Br.isConditional()); 112 if (!Weights) 113 return; 114 uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight}; 115 setBranchWeights(Br, Arr, false); 116 } 117 118 CondBranchWeights invert() const { 119 return CondBranchWeights{FalseWeight, TrueWeight}; 120 } 121 }; 122 123 struct PredInfo { 124 Value *Pred; 125 MaybeCondBranchWeights Weights; 126 }; 127 128 using BBPredicates = DenseMap<BasicBlock *, PredInfo>; 129 using PredMap = DenseMap<BasicBlock *, BBPredicates>; 130 using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>; 131 132 // A traits type that is intended to be used in graph algorithms. The graph 133 // traits starts at an entry node, and traverses the RegionNodes that are in 134 // the Nodes set. 135 struct SubGraphTraits { 136 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>; 137 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType; 138 139 // This wraps a set of Nodes into the iterator, so we know which edges to 140 // filter out. 141 class WrappedSuccIterator 142 : public iterator_adaptor_base< 143 WrappedSuccIterator, BaseSuccIterator, 144 typename std::iterator_traits<BaseSuccIterator>::iterator_category, 145 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> { 146 SmallDenseSet<RegionNode *> *Nodes; 147 148 public: 149 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes) 150 : iterator_adaptor_base(It), Nodes(Nodes) {} 151 152 NodeRef operator*() const { return {*I, Nodes}; } 153 }; 154 155 static bool filterAll(const NodeRef &N) { return true; } 156 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); } 157 158 using ChildIteratorType = 159 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>; 160 161 static NodeRef getEntryNode(Region *R) { 162 return {GraphTraits<Region *>::getEntryNode(R), nullptr}; 163 } 164 165 static NodeRef getEntryNode(NodeRef N) { return N; } 166 167 static iterator_range<ChildIteratorType> children(const NodeRef &N) { 168 auto *filter = N.second ? &filterSet : &filterAll; 169 return make_filter_range( 170 make_range<WrappedSuccIterator>( 171 {GraphTraits<RegionNode *>::child_begin(N.first), N.second}, 172 {GraphTraits<RegionNode *>::child_end(N.first), N.second}), 173 filter); 174 } 175 176 static ChildIteratorType child_begin(const NodeRef &N) { 177 return children(N).begin(); 178 } 179 180 static ChildIteratorType child_end(const NodeRef &N) { 181 return children(N).end(); 182 } 183 }; 184 185 /// Finds the nearest common dominator of a set of BasicBlocks. 186 /// 187 /// For every BB you add to the set, you can specify whether we "remember" the 188 /// block. When you get the common dominator, you can also ask whether it's one 189 /// of the blocks we remembered. 190 class NearestCommonDominator { 191 DominatorTree *DT; 192 BasicBlock *Result = nullptr; 193 bool ResultIsRemembered = false; 194 195 /// Add BB to the resulting dominator. 196 void addBlock(BasicBlock *BB, bool Remember) { 197 if (!Result) { 198 Result = BB; 199 ResultIsRemembered = Remember; 200 return; 201 } 202 203 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB); 204 if (NewResult != Result) 205 ResultIsRemembered = false; 206 if (NewResult == BB) 207 ResultIsRemembered |= Remember; 208 Result = NewResult; 209 } 210 211 public: 212 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {} 213 214 void addBlock(BasicBlock *BB) { 215 addBlock(BB, /* Remember = */ false); 216 } 217 218 void addAndRememberBlock(BasicBlock *BB) { 219 addBlock(BB, /* Remember = */ true); 220 } 221 222 /// Get the nearest common dominator of all the BBs added via addBlock() and 223 /// addAndRememberBlock(). 224 BasicBlock *result() { return Result; } 225 226 /// Is the BB returned by getResult() one of the blocks we added to the set 227 /// with addAndRememberBlock()? 228 bool resultIsRememberedBlock() { return ResultIsRemembered; } 229 }; 230 231 /// Transforms the control flow graph on one single entry/exit region 232 /// at a time. 233 /// 234 /// After the transform all "If"/"Then"/"Else" style control flow looks like 235 /// this: 236 /// 237 /// \verbatim 238 /// 1 239 /// || 240 /// | | 241 /// 2 | 242 /// | / 243 /// |/ 244 /// 3 245 /// || Where: 246 /// | | 1 = "If" block, calculates the condition 247 /// 4 | 2 = "Then" subregion, runs if the condition is true 248 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 249 /// |/ 4 = "Else" optional subregion, runs if the condition is false 250 /// 5 5 = "End" block, also rejoins the control flow 251 /// \endverbatim 252 /// 253 /// Control flow is expressed as a branch where the true exit goes into the 254 /// "Then"/"Else" region, while the false exit skips the region 255 /// The condition for the optional "Else" region is expressed as a PHI node. 256 /// The incoming values of the PHI node are true for the "If" edge and false 257 /// for the "Then" edge. 258 /// 259 /// Additionally to that even complicated loops look like this: 260 /// 261 /// \verbatim 262 /// 1 263 /// || 264 /// | | 265 /// 2 ^ Where: 266 /// | / 1 = "Entry" block 267 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 268 /// 3 3 = "Flow" block, with back edge to entry block 269 /// | 270 /// \endverbatim 271 /// 272 /// The back edge of the "Flow" block is always on the false side of the branch 273 /// while the true side continues the general flow. So the loop condition 274 /// consist of a network of PHI nodes where the true incoming values expresses 275 /// breaks and the false values expresses continue states. 276 277 class StructurizeCFG { 278 Type *Boolean; 279 ConstantInt *BoolTrue; 280 ConstantInt *BoolFalse; 281 Value *BoolPoison; 282 283 Function *Func; 284 Region *ParentRegion; 285 286 UniformityInfo *UA = nullptr; 287 DominatorTree *DT; 288 289 SmallVector<RegionNode *, 8> Order; 290 BBSet Visited; 291 BBSet FlowSet; 292 293 SmallVector<WeakVH, 8> AffectedPhis; 294 BBPhiMap DeletedPhis; 295 BB2BBVecMap AddedPhis; 296 297 PredMap Predicates; 298 BranchVector Conditions; 299 300 BB2BBMap Loops; 301 PredMap LoopPreds; 302 BranchVector LoopConds; 303 304 RegionNode *PrevNode; 305 306 void orderNodes(); 307 308 void analyzeLoops(RegionNode *N); 309 310 PredInfo buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 311 312 void gatherPredicates(RegionNode *N); 313 314 void collectInfos(); 315 316 void insertConditions(bool Loops); 317 318 void simplifyConditions(); 319 320 void delPhiValues(BasicBlock *From, BasicBlock *To); 321 322 void addPhiValues(BasicBlock *From, BasicBlock *To); 323 324 void findUndefBlocks(BasicBlock *PHIBlock, 325 const SmallSet<BasicBlock *, 8> &Incomings, 326 SmallVector<BasicBlock *> &UndefBlks) const; 327 328 void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, 329 PHINode *B); 330 331 void setPhiValues(); 332 333 void simplifyAffectedPhis(); 334 335 DebugLoc killTerminator(BasicBlock *BB); 336 337 void changeExit(RegionNode *Node, BasicBlock *NewExit, 338 bool IncludeDominator); 339 340 BasicBlock *getNextFlow(BasicBlock *Dominator); 341 342 std::pair<BasicBlock *, DebugLoc> needPrefix(bool NeedEmpty); 343 344 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 345 346 void setPrevNode(BasicBlock *BB); 347 348 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 349 350 bool isPredictableTrue(RegionNode *Node); 351 352 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 353 354 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 355 356 void createFlow(); 357 358 void rebuildSSA(); 359 360 public: 361 void init(Region *R); 362 bool run(Region *R, DominatorTree *DT); 363 bool makeUniformRegion(Region *R, UniformityInfo &UA); 364 }; 365 366 class StructurizeCFGLegacyPass : public RegionPass { 367 bool SkipUniformRegions; 368 369 public: 370 static char ID; 371 372 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false) 373 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) { 374 if (ForceSkipUniformRegions.getNumOccurrences()) 375 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 376 initializeStructurizeCFGLegacyPassPass(*PassRegistry::getPassRegistry()); 377 } 378 379 bool runOnRegion(Region *R, RGPassManager &RGM) override { 380 StructurizeCFG SCFG; 381 SCFG.init(R); 382 if (SkipUniformRegions) { 383 UniformityInfo &UA = 384 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo(); 385 if (SCFG.makeUniformRegion(R, UA)) 386 return false; 387 } 388 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 389 return SCFG.run(R, DT); 390 } 391 392 StringRef getPassName() const override { return "Structurize control flow"; } 393 394 void getAnalysisUsage(AnalysisUsage &AU) const override { 395 if (SkipUniformRegions) 396 AU.addRequired<UniformityInfoWrapperPass>(); 397 AU.addRequired<DominatorTreeWrapperPass>(); 398 399 AU.addPreserved<DominatorTreeWrapperPass>(); 400 RegionPass::getAnalysisUsage(AU); 401 } 402 }; 403 404 } // end anonymous namespace 405 406 char StructurizeCFGLegacyPass::ID = 0; 407 408 INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", 409 "Structurize the CFG", false, false) 410 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) 411 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 412 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 413 INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", 414 "Structurize the CFG", false, false) 415 416 /// Build up the general order of nodes, by performing a topological sort of the 417 /// parent region's nodes, while ensuring that there is no outer cycle node 418 /// between any two inner cycle nodes. 419 void StructurizeCFG::orderNodes() { 420 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion), 421 GraphTraits<Region *>::nodes_end(ParentRegion))); 422 if (Order.empty()) 423 return; 424 425 SmallDenseSet<RegionNode *> Nodes; 426 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion); 427 428 // A list of range indices of SCCs in Order, to be processed. 429 SmallVector<std::pair<unsigned, unsigned>, 8> WorkList; 430 unsigned I = 0, E = Order.size(); 431 while (true) { 432 // Run through all the SCCs in the subgraph starting with Entry. 433 for (auto SCCI = 434 scc_iterator<SubGraphTraits::NodeRef, SubGraphTraits>::begin( 435 EntryNode); 436 !SCCI.isAtEnd(); ++SCCI) { 437 auto &SCC = *SCCI; 438 439 // An SCC up to the size of 2, can be reduced to an entry (the last node), 440 // and a possible additional node. Therefore, it is already in order, and 441 // there is no need to add it to the work-list. 442 unsigned Size = SCC.size(); 443 if (Size > 2) 444 WorkList.emplace_back(I, I + Size); 445 446 // Add the SCC nodes to the Order array. 447 for (const auto &N : SCC) { 448 assert(I < E && "SCC size mismatch!"); 449 Order[I++] = N.first; 450 } 451 } 452 assert(I == E && "SCC size mismatch!"); 453 454 // If there are no more SCCs to order, then we are done. 455 if (WorkList.empty()) 456 break; 457 458 std::tie(I, E) = WorkList.pop_back_val(); 459 460 // Collect the set of nodes in the SCC's subgraph. These are only the 461 // possible child nodes; we do not add the entry (last node) otherwise we 462 // will have the same exact SCC all over again. 463 Nodes.clear(); 464 Nodes.insert(Order.begin() + I, Order.begin() + E - 1); 465 466 // Update the entry node. 467 EntryNode.first = Order[E - 1]; 468 EntryNode.second = &Nodes; 469 } 470 } 471 472 /// Determine the end of the loops 473 void StructurizeCFG::analyzeLoops(RegionNode *N) { 474 if (N->isSubRegion()) { 475 // Test for exit as back edge 476 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 477 if (Visited.count(Exit)) 478 Loops[Exit] = N->getEntry(); 479 480 } else { 481 // Test for successors as back edge 482 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 483 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 484 485 for (BasicBlock *Succ : Term->successors()) 486 if (Visited.count(Succ)) 487 Loops[Succ] = BB; 488 } 489 } 490 491 /// Build the condition for one edge 492 PredInfo StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 493 bool Invert) { 494 Value *Cond = Invert ? BoolFalse : BoolTrue; 495 MaybeCondBranchWeights Weights; 496 497 if (Term->isConditional()) { 498 Cond = Term->getCondition(); 499 Weights = CondBranchWeights::tryParse(*Term); 500 501 if (Idx != (unsigned)Invert) { 502 Cond = invertCondition(Cond); 503 if (Weights) 504 Weights = Weights->invert(); 505 } 506 } 507 return {Cond, Weights}; 508 } 509 510 /// Analyze the predecessors of each block and build up predicates 511 void StructurizeCFG::gatherPredicates(RegionNode *N) { 512 RegionInfo *RI = ParentRegion->getRegionInfo(); 513 BasicBlock *BB = N->getEntry(); 514 BBPredicates &Pred = Predicates[BB]; 515 BBPredicates &LPred = LoopPreds[BB]; 516 517 for (BasicBlock *P : predecessors(BB)) { 518 // Ignore it if it's a branch from outside into our region entry 519 if (!ParentRegion->contains(P)) 520 continue; 521 522 Region *R = RI->getRegionFor(P); 523 if (R == ParentRegion) { 524 // It's a top level block in our region 525 BranchInst *Term = cast<BranchInst>(P->getTerminator()); 526 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 527 BasicBlock *Succ = Term->getSuccessor(i); 528 if (Succ != BB) 529 continue; 530 531 if (Visited.count(P)) { 532 // Normal forward edge 533 if (Term->isConditional()) { 534 // Try to treat it like an ELSE block 535 BasicBlock *Other = Term->getSuccessor(!i); 536 if (Visited.count(Other) && !Loops.count(Other) && 537 !Pred.count(Other) && !Pred.count(P)) { 538 539 Pred[Other] = {BoolFalse, std::nullopt}; 540 Pred[P] = {BoolTrue, std::nullopt}; 541 continue; 542 } 543 } 544 Pred[P] = buildCondition(Term, i, false); 545 } else { 546 // Back edge 547 LPred[P] = buildCondition(Term, i, true); 548 } 549 } 550 } else { 551 // It's an exit from a sub region 552 while (R->getParent() != ParentRegion) 553 R = R->getParent(); 554 555 // Edge from inside a subregion to its entry, ignore it 556 if (*R == *N) 557 continue; 558 559 BasicBlock *Entry = R->getEntry(); 560 if (Visited.count(Entry)) 561 Pred[Entry] = {BoolTrue, std::nullopt}; 562 else 563 LPred[Entry] = {BoolFalse, std::nullopt}; 564 } 565 } 566 } 567 568 /// Collect various loop and predicate infos 569 void StructurizeCFG::collectInfos() { 570 // Reset predicate 571 Predicates.clear(); 572 573 // and loop infos 574 Loops.clear(); 575 LoopPreds.clear(); 576 577 // Reset the visited nodes 578 Visited.clear(); 579 580 for (RegionNode *RN : reverse(Order)) { 581 LLVM_DEBUG(dbgs() << "Visiting: " 582 << (RN->isSubRegion() ? "SubRegion with entry: " : "") 583 << RN->getEntry()->getName() << "\n"); 584 585 // Analyze all the conditions leading to a node 586 gatherPredicates(RN); 587 588 // Remember that we've seen this node 589 Visited.insert(RN->getEntry()); 590 591 // Find the last back edges 592 analyzeLoops(RN); 593 } 594 } 595 596 /// Insert the missing branch conditions 597 void StructurizeCFG::insertConditions(bool Loops) { 598 BranchVector &Conds = Loops ? LoopConds : Conditions; 599 Value *Default = Loops ? BoolTrue : BoolFalse; 600 SSAUpdater PhiInserter; 601 602 for (BranchInst *Term : Conds) { 603 assert(Term->isConditional()); 604 605 BasicBlock *Parent = Term->getParent(); 606 BasicBlock *SuccTrue = Term->getSuccessor(0); 607 BasicBlock *SuccFalse = Term->getSuccessor(1); 608 609 PhiInserter.Initialize(Boolean, ""); 610 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 611 612 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 613 614 NearestCommonDominator Dominator(DT); 615 Dominator.addBlock(Parent); 616 617 PredInfo ParentInfo{nullptr, std::nullopt}; 618 for (auto [BB, PI] : Preds) { 619 if (BB == Parent) { 620 ParentInfo = PI; 621 break; 622 } 623 PhiInserter.AddAvailableValue(BB, PI.Pred); 624 Dominator.addAndRememberBlock(BB); 625 } 626 627 if (ParentInfo.Pred) { 628 Term->setCondition(ParentInfo.Pred); 629 CondBranchWeights::setMetadata(*Term, ParentInfo.Weights); 630 } else { 631 if (!Dominator.resultIsRememberedBlock()) 632 PhiInserter.AddAvailableValue(Dominator.result(), Default); 633 634 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 635 } 636 } 637 } 638 639 /// Simplify any inverted conditions that were built by buildConditions. 640 void StructurizeCFG::simplifyConditions() { 641 SmallVector<Instruction *> InstToErase; 642 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) { 643 auto &Preds = I.second; 644 for (auto [BB, PI] : Preds) { 645 Instruction *Inverted; 646 if (match(PI.Pred, m_Not(m_OneUse(m_Instruction(Inverted)))) && 647 !PI.Pred->use_empty()) { 648 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) { 649 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate()); 650 PI.Pred->replaceAllUsesWith(InvertedCmp); 651 InstToErase.push_back(cast<Instruction>(PI.Pred)); 652 } 653 } 654 } 655 } 656 for (auto *I : InstToErase) 657 I->eraseFromParent(); 658 } 659 660 /// Remove all PHI values coming from "From" into "To" and remember 661 /// them in DeletedPhis 662 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 663 PhiMap &Map = DeletedPhis[To]; 664 for (PHINode &Phi : To->phis()) { 665 bool Recorded = false; 666 while (Phi.getBasicBlockIndex(From) != -1) { 667 Value *Deleted = Phi.removeIncomingValue(From, false); 668 Map[&Phi].push_back(std::make_pair(From, Deleted)); 669 if (!Recorded) { 670 AffectedPhis.push_back(&Phi); 671 Recorded = true; 672 } 673 } 674 } 675 } 676 677 /// Add a dummy PHI value as soon as we knew the new predecessor 678 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 679 for (PHINode &Phi : To->phis()) { 680 Value *Poison = PoisonValue::get(Phi.getType()); 681 Phi.addIncoming(Poison, From); 682 } 683 AddedPhis[To].push_back(From); 684 } 685 686 /// When we are reconstructing a PHI inside \p PHIBlock with incoming values 687 /// from predecessors \p Incomings, we have a chance to mark the available value 688 /// from some blocks as undefined. The function will find out all such blocks 689 /// and return in \p UndefBlks. 690 void StructurizeCFG::findUndefBlocks( 691 BasicBlock *PHIBlock, const SmallSet<BasicBlock *, 8> &Incomings, 692 SmallVector<BasicBlock *> &UndefBlks) const { 693 // We may get a post-structured CFG like below: 694 // 695 // | P1 696 // |/ 697 // F1 698 // |\ 699 // | N 700 // |/ 701 // F2 702 // |\ 703 // | P2 704 // |/ 705 // F3 706 // |\ 707 // B 708 // 709 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors 710 // of B before structurization. F1/F2/F3 are flow blocks inserted during 711 // structurization process. Block N is not a predecessor of B before 712 // structurization, but are placed between the predecessors(P1/P2) of B after 713 // structurization. This usually means that threads went to N never take the 714 // path N->F2->F3->B. For example, the threads take the branch F1->N may 715 // always take the branch F2->P2. So, when we are reconstructing a PHI 716 // originally in B, we can safely say the incoming value from N is undefined. 717 SmallSet<BasicBlock *, 8> VisitedBlock; 718 SmallVector<BasicBlock *, 8> Stack; 719 if (PHIBlock == ParentRegion->getExit()) { 720 for (auto P : predecessors(PHIBlock)) { 721 if (ParentRegion->contains(P)) 722 Stack.push_back(P); 723 } 724 } else { 725 append_range(Stack, predecessors(PHIBlock)); 726 } 727 728 // Do a backward traversal over the CFG, and stop further searching if 729 // the block is not a Flow. If a block is neither flow block nor the 730 // incoming predecessor, then the incoming value from the block is 731 // undefined value for the PHI being reconstructed. 732 while (!Stack.empty()) { 733 BasicBlock *Current = Stack.pop_back_val(); 734 if (!VisitedBlock.insert(Current).second) 735 continue; 736 737 if (FlowSet.contains(Current)) 738 llvm::append_range(Stack, predecessors(Current)); 739 else if (!Incomings.contains(Current)) 740 UndefBlks.push_back(Current); 741 } 742 } 743 744 // If two phi nodes have compatible incoming values (for each 745 // incoming block, either they have the same incoming value or only one phi 746 // node has an incoming value), let them share the merged incoming values. The 747 // merge process is guided by the equivalence information from \p PhiClasses. 748 // The function will possibly update the incoming values of leader phi in 749 // DeletedPhis. 750 void StructurizeCFG::mergeIfCompatible( 751 EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) { 752 auto ItA = PhiClasses.findLeader(PhiClasses.insert(A)); 753 auto ItB = PhiClasses.findLeader(PhiClasses.insert(B)); 754 // They are already in the same class, no work needed. 755 if (ItA == ItB) 756 return; 757 758 PHINode *LeaderA = *ItA; 759 PHINode *LeaderB = *ItB; 760 BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA]; 761 BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB]; 762 763 DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end()); 764 for (auto [BB, V] : IncomingB) { 765 auto BBIt = Mergeable.find(BB); 766 if (BBIt != Mergeable.end() && BBIt->second != V) 767 return; 768 // Either IncomingA does not have this value or IncomingA has the same 769 // value. 770 Mergeable.insert({BB, V}); 771 } 772 773 // Update the incoming value of leaderA. 774 IncomingA.assign(Mergeable.begin(), Mergeable.end()); 775 PhiClasses.unionSets(ItA, ItB); 776 } 777 778 /// Add the real PHI value as soon as everything is set up 779 void StructurizeCFG::setPhiValues() { 780 SmallVector<PHINode *, 8> InsertedPhis; 781 SSAUpdater Updater(&InsertedPhis); 782 DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap; 783 784 // Find phi nodes that have compatible incoming values (either they have 785 // the same value for the same block or only one phi node has an incoming 786 // value, see example below). We only search again the phi's that are 787 // referenced by another phi, which is the case we care about. 788 // 789 // For example (-- means no incoming value): 790 // phi1 : BB1:phi2 BB2:v BB3:-- 791 // phi2: BB1:-- BB2:v BB3:w 792 // 793 // Then we can merge these incoming values and let phi1, phi2 use the 794 // same set of incoming values: 795 // 796 // phi1&phi2: BB1:phi2 BB2:v BB3:w 797 // 798 // By doing this, phi1 and phi2 would share more intermediate phi nodes. 799 // This would help reduce the number of phi nodes during SSA reconstruction 800 // and ultimately result in fewer COPY instructions. 801 // 802 // This should be correct, because if a phi node does not have incoming 803 // value from certain block, this means the block is not the predecessor 804 // of the parent block, so we actually don't care about its incoming value. 805 EquivalenceClasses<PHINode *> PhiClasses; 806 for (const auto &[To, From] : AddedPhis) { 807 auto OldPhiIt = DeletedPhis.find(To); 808 if (OldPhiIt == DeletedPhis.end()) 809 continue; 810 811 PhiMap &BlkPhis = OldPhiIt->second; 812 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To]; 813 SmallSet<BasicBlock *, 8> Incomings; 814 815 // Get the undefined blocks shared by all the phi nodes. 816 if (!BlkPhis.empty()) { 817 Incomings.insert_range(llvm::make_first_range(BlkPhis.front().second)); 818 findUndefBlocks(To, Incomings, UndefBlks); 819 } 820 821 for (const auto &[Phi, Incomings] : OldPhiIt->second) { 822 SmallVector<PHINode *> IncomingPHIs; 823 for (const auto &[BB, V] : Incomings) { 824 // First, for each phi, check whether it has incoming value which is 825 // another phi. 826 if (PHINode *P = dyn_cast<PHINode>(V)) 827 IncomingPHIs.push_back(P); 828 } 829 830 for (auto *OtherPhi : IncomingPHIs) { 831 // Skip phis that are unrelated to the phi reconstruction for now. 832 if (!DeletedPhis.contains(OtherPhi->getParent())) 833 continue; 834 mergeIfCompatible(PhiClasses, Phi, OtherPhi); 835 } 836 } 837 } 838 839 for (const auto &AddedPhi : AddedPhis) { 840 BasicBlock *To = AddedPhi.first; 841 const BBVector &From = AddedPhi.second; 842 843 auto It = DeletedPhis.find(To); 844 if (It == DeletedPhis.end()) 845 continue; 846 847 PhiMap &Map = It->second; 848 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To]; 849 for (const auto &[Phi, Incoming] : Map) { 850 Value *Poison = PoisonValue::get(Phi->getType()); 851 Updater.Initialize(Phi->getType(), ""); 852 Updater.AddAvailableValue(&Func->getEntryBlock(), Poison); 853 Updater.AddAvailableValue(To, Poison); 854 855 // Use leader phi's incoming if there is. 856 auto LeaderIt = PhiClasses.findLeader(Phi); 857 bool UseIncomingOfLeader = 858 LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi; 859 const auto &IncomingMap = 860 UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt] 861 : Incoming; 862 863 SmallVector<BasicBlock *> ConstantPreds; 864 for (const auto &[BB, V] : IncomingMap) { 865 Updater.AddAvailableValue(BB, V); 866 if (isa<Constant>(V)) 867 ConstantPreds.push_back(BB); 868 } 869 870 for (auto UB : UndefBlks) { 871 // If this undef block is dominated by any predecessor(before 872 // structurization) of reconstructed PHI with constant incoming value, 873 // don't mark the available value as undefined. Setting undef to such 874 // block will stop us from getting optimal phi insertion. 875 if (any_of(ConstantPreds, 876 [&](BasicBlock *CP) { return DT->dominates(CP, UB); })) 877 continue; 878 // Maybe already get a value through sharing with other phi nodes. 879 if (Updater.HasValueForBlock(UB)) 880 continue; 881 882 Updater.AddAvailableValue(UB, Poison); 883 } 884 885 for (BasicBlock *FI : From) 886 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI)); 887 AffectedPhis.push_back(Phi); 888 } 889 } 890 891 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end()); 892 } 893 894 void StructurizeCFG::simplifyAffectedPhis() { 895 bool Changed; 896 do { 897 Changed = false; 898 SimplifyQuery Q(Func->getDataLayout()); 899 Q.DT = DT; 900 // Setting CanUseUndef to true might extend value liveness, set it to false 901 // to achieve better register pressure. 902 Q.CanUseUndef = false; 903 for (WeakVH VH : AffectedPhis) { 904 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) { 905 if (auto NewValue = simplifyInstruction(Phi, Q)) { 906 Phi->replaceAllUsesWith(NewValue); 907 Phi->eraseFromParent(); 908 Changed = true; 909 } 910 } 911 } 912 } while (Changed); 913 } 914 915 /// Remove phi values from all successors and then remove the terminator. 916 DebugLoc StructurizeCFG::killTerminator(BasicBlock *BB) { 917 Instruction *Term = BB->getTerminator(); 918 if (!Term) 919 return DebugLoc(); 920 921 for (BasicBlock *Succ : successors(BB)) 922 delPhiValues(BB, Succ); 923 924 DebugLoc DL = Term->getDebugLoc(); 925 Term->eraseFromParent(); 926 return DL; 927 } 928 929 /// Let node exit(s) point to NewExit 930 void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 931 bool IncludeDominator) { 932 if (Node->isSubRegion()) { 933 Region *SubRegion = Node->getNodeAs<Region>(); 934 BasicBlock *OldExit = SubRegion->getExit(); 935 BasicBlock *Dominator = nullptr; 936 937 // Find all the edges from the sub region to the exit. 938 // We use make_early_inc_range here because we modify BB's terminator. 939 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) { 940 if (!SubRegion->contains(BB)) 941 continue; 942 943 // Modify the edges to point to the new exit 944 delPhiValues(BB, OldExit); 945 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 946 addPhiValues(BB, NewExit); 947 948 // Find the new dominator (if requested) 949 if (IncludeDominator) { 950 if (!Dominator) 951 Dominator = BB; 952 else 953 Dominator = DT->findNearestCommonDominator(Dominator, BB); 954 } 955 } 956 957 // Change the dominator (if requested) 958 if (Dominator) 959 DT->changeImmediateDominator(NewExit, Dominator); 960 961 // Update the region info 962 SubRegion->replaceExit(NewExit); 963 } else { 964 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 965 DebugLoc DL = killTerminator(BB); 966 BranchInst *Br = BranchInst::Create(NewExit, BB); 967 Br->setDebugLoc(DL); 968 addPhiValues(BB, NewExit); 969 if (IncludeDominator) 970 DT->changeImmediateDominator(NewExit, BB); 971 } 972 } 973 974 /// Create a new flow node and update dominator tree and region info 975 BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) { 976 LLVMContext &Context = Func->getContext(); 977 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 978 Order.back()->getEntry(); 979 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 980 Func, Insert); 981 FlowSet.insert(Flow); 982 DT->addNewBlock(Flow, Dominator); 983 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 984 return Flow; 985 } 986 987 /// Create a new or reuse the previous node as flow node. Returns a block and a 988 /// debug location to be used for new instructions in that block. 989 std::pair<BasicBlock *, DebugLoc> StructurizeCFG::needPrefix(bool NeedEmpty) { 990 BasicBlock *Entry = PrevNode->getEntry(); 991 992 if (!PrevNode->isSubRegion()) { 993 DebugLoc DL = killTerminator(Entry); 994 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 995 return {Entry, DL}; 996 } 997 998 // create a new flow node 999 BasicBlock *Flow = getNextFlow(Entry); 1000 1001 // and wire it up 1002 changeExit(PrevNode, Flow, true); 1003 PrevNode = ParentRegion->getBBNode(Flow); 1004 return {Flow, DebugLoc()}; 1005 } 1006 1007 /// Returns the region exit if possible, otherwise just a new flow node 1008 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, 1009 bool ExitUseAllowed) { 1010 if (!Order.empty() || !ExitUseAllowed) 1011 return getNextFlow(Flow); 1012 1013 BasicBlock *Exit = ParentRegion->getExit(); 1014 DT->changeImmediateDominator(Exit, Flow); 1015 addPhiValues(Flow, Exit); 1016 return Exit; 1017 } 1018 1019 /// Set the previous node 1020 void StructurizeCFG::setPrevNode(BasicBlock *BB) { 1021 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) 1022 : nullptr; 1023 } 1024 1025 /// Does BB dominate all the predicates of Node? 1026 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 1027 BBPredicates &Preds = Predicates[Node->getEntry()]; 1028 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, PredInfo> Pred) { 1029 return DT->dominates(BB, Pred.first); 1030 }); 1031 } 1032 1033 /// Can we predict that this node will always be called? 1034 bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { 1035 BBPredicates &Preds = Predicates[Node->getEntry()]; 1036 bool Dominated = false; 1037 1038 // Regionentry is always true 1039 if (!PrevNode) 1040 return true; 1041 1042 for (auto [BB, PI] : Preds) { 1043 if (PI.Pred != BoolTrue) 1044 return false; 1045 1046 if (!Dominated && DT->dominates(BB, PrevNode->getEntry())) 1047 Dominated = true; 1048 } 1049 1050 // TODO: The dominator check is too strict 1051 return Dominated; 1052 } 1053 1054 /// Take one node from the order vector and wire it up 1055 void StructurizeCFG::wireFlow(bool ExitUseAllowed, 1056 BasicBlock *LoopEnd) { 1057 RegionNode *Node = Order.pop_back_val(); 1058 Visited.insert(Node->getEntry()); 1059 1060 if (isPredictableTrue(Node)) { 1061 // Just a linear flow 1062 if (PrevNode) { 1063 changeExit(PrevNode, Node->getEntry(), true); 1064 } 1065 PrevNode = Node; 1066 } else { 1067 // Insert extra prefix node (or reuse last one) 1068 auto [Flow, DL] = needPrefix(false); 1069 1070 // Insert extra postfix node (or use exit instead) 1071 BasicBlock *Entry = Node->getEntry(); 1072 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 1073 1074 // let it point to entry and next block 1075 BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow); 1076 Br->setDebugLoc(DL); 1077 Conditions.push_back(Br); 1078 addPhiValues(Flow, Entry); 1079 DT->changeImmediateDominator(Entry, Flow); 1080 1081 PrevNode = Node; 1082 while (!Order.empty() && !Visited.count(LoopEnd) && 1083 dominatesPredicates(Entry, Order.back())) { 1084 handleLoops(false, LoopEnd); 1085 } 1086 1087 changeExit(PrevNode, Next, false); 1088 setPrevNode(Next); 1089 } 1090 } 1091 1092 void StructurizeCFG::handleLoops(bool ExitUseAllowed, 1093 BasicBlock *LoopEnd) { 1094 RegionNode *Node = Order.back(); 1095 BasicBlock *LoopStart = Node->getEntry(); 1096 1097 if (!Loops.count(LoopStart)) { 1098 wireFlow(ExitUseAllowed, LoopEnd); 1099 return; 1100 } 1101 1102 if (!isPredictableTrue(Node)) 1103 LoopStart = needPrefix(true).first; 1104 1105 LoopEnd = Loops[Node->getEntry()]; 1106 wireFlow(false, LoopEnd); 1107 while (!Visited.count(LoopEnd)) { 1108 handleLoops(false, LoopEnd); 1109 } 1110 1111 assert(LoopStart != &LoopStart->getParent()->getEntryBlock()); 1112 1113 // Create an extra loop end node 1114 DebugLoc DL; 1115 std::tie(LoopEnd, DL) = needPrefix(false); 1116 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 1117 BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd); 1118 Br->setDebugLoc(DL); 1119 LoopConds.push_back(Br); 1120 addPhiValues(LoopEnd, LoopStart); 1121 setPrevNode(Next); 1122 } 1123 1124 /// After this function control flow looks like it should be, but 1125 /// branches and PHI nodes only have undefined conditions. 1126 void StructurizeCFG::createFlow() { 1127 BasicBlock *Exit = ParentRegion->getExit(); 1128 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 1129 1130 AffectedPhis.clear(); 1131 DeletedPhis.clear(); 1132 AddedPhis.clear(); 1133 Conditions.clear(); 1134 LoopConds.clear(); 1135 1136 PrevNode = nullptr; 1137 Visited.clear(); 1138 1139 while (!Order.empty()) { 1140 handleLoops(EntryDominatesExit, nullptr); 1141 } 1142 1143 if (PrevNode) 1144 changeExit(PrevNode, Exit, EntryDominatesExit); 1145 else 1146 assert(EntryDominatesExit); 1147 } 1148 1149 /// Handle a rare case where the disintegrated nodes instructions 1150 /// no longer dominate all their uses. Not sure if this is really necessary 1151 void StructurizeCFG::rebuildSSA() { 1152 SSAUpdater Updater; 1153 for (BasicBlock *BB : ParentRegion->blocks()) 1154 for (Instruction &I : *BB) { 1155 bool Initialized = false; 1156 // We may modify the use list as we iterate over it, so we use 1157 // make_early_inc_range. 1158 for (Use &U : llvm::make_early_inc_range(I.uses())) { 1159 Instruction *User = cast<Instruction>(U.getUser()); 1160 if (User->getParent() == BB) { 1161 continue; 1162 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 1163 if (UserPN->getIncomingBlock(U) == BB) 1164 continue; 1165 } 1166 1167 if (DT->dominates(&I, User)) 1168 continue; 1169 1170 if (!Initialized) { 1171 Value *Poison = PoisonValue::get(I.getType()); 1172 Updater.Initialize(I.getType(), ""); 1173 Updater.AddAvailableValue(&Func->getEntryBlock(), Poison); 1174 Updater.AddAvailableValue(BB, &I); 1175 Initialized = true; 1176 } 1177 Updater.RewriteUseAfterInsertions(U); 1178 } 1179 } 1180 } 1181 1182 static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, 1183 const UniformityInfo &UA) { 1184 // Bool for if all sub-regions are uniform. 1185 bool SubRegionsAreUniform = true; 1186 // Count of how many direct children are conditional. 1187 unsigned ConditionalDirectChildren = 0; 1188 1189 for (auto *E : R->elements()) { 1190 if (!E->isSubRegion()) { 1191 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator()); 1192 if (!Br || !Br->isConditional()) 1193 continue; 1194 1195 if (!UA.isUniform(Br)) 1196 return false; 1197 1198 // One of our direct children is conditional. 1199 ConditionalDirectChildren++; 1200 1201 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName() 1202 << " has uniform terminator\n"); 1203 } else { 1204 // Explicitly refuse to treat regions as uniform if they have non-uniform 1205 // subregions. We cannot rely on UniformityAnalysis for branches in 1206 // subregions because those branches may have been removed and re-created, 1207 // so we look for our metadata instead. 1208 // 1209 // Warning: It would be nice to treat regions as uniform based only on 1210 // their direct child basic blocks' terminators, regardless of whether 1211 // subregions are uniform or not. However, this requires a very careful 1212 // look at SIAnnotateControlFlow to make sure nothing breaks there. 1213 for (auto *BB : E->getNodeAs<Region>()->blocks()) { 1214 auto Br = dyn_cast<BranchInst>(BB->getTerminator()); 1215 if (!Br || !Br->isConditional()) 1216 continue; 1217 1218 if (!Br->getMetadata(UniformMDKindID)) { 1219 // Early exit if we cannot have relaxed uniform regions. 1220 if (!RelaxedUniformRegions) 1221 return false; 1222 1223 SubRegionsAreUniform = false; 1224 break; 1225 } 1226 } 1227 } 1228 } 1229 1230 // Our region is uniform if: 1231 // 1. All conditional branches that are direct children are uniform (checked 1232 // above). 1233 // 2. And either: 1234 // a. All sub-regions are uniform. 1235 // b. There is one or less conditional branches among the direct children. 1236 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1); 1237 } 1238 1239 void StructurizeCFG::init(Region *R) { 1240 LLVMContext &Context = R->getEntry()->getContext(); 1241 1242 Boolean = Type::getInt1Ty(Context); 1243 BoolTrue = ConstantInt::getTrue(Context); 1244 BoolFalse = ConstantInt::getFalse(Context); 1245 BoolPoison = PoisonValue::get(Boolean); 1246 1247 this->UA = nullptr; 1248 } 1249 1250 bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) { 1251 if (R->isTopLevelRegion()) 1252 return false; 1253 1254 this->UA = &UA; 1255 1256 // TODO: We could probably be smarter here with how we handle sub-regions. 1257 // We currently rely on the fact that metadata is set by earlier invocations 1258 // of the pass on sub-regions, and that this metadata doesn't get lost -- 1259 // but we shouldn't rely on metadata for correctness! 1260 unsigned UniformMDKindID = 1261 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform"); 1262 1263 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) { 1264 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R 1265 << '\n'); 1266 1267 // Mark all direct child block terminators as having been treated as 1268 // uniform. To account for a possible future in which non-uniform 1269 // sub-regions are treated more cleverly, indirect children are not 1270 // marked as uniform. 1271 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {}); 1272 for (RegionNode *E : R->elements()) { 1273 if (E->isSubRegion()) 1274 continue; 1275 1276 if (Instruction *Term = E->getEntry()->getTerminator()) 1277 Term->setMetadata(UniformMDKindID, MD); 1278 } 1279 1280 return true; 1281 } 1282 return false; 1283 } 1284 1285 /// Run the transformation for each region found 1286 bool StructurizeCFG::run(Region *R, DominatorTree *DT) { 1287 if (R->isTopLevelRegion()) 1288 return false; 1289 1290 this->DT = DT; 1291 1292 Func = R->getEntry()->getParent(); 1293 assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator."); 1294 1295 ParentRegion = R; 1296 1297 orderNodes(); 1298 collectInfos(); 1299 createFlow(); 1300 insertConditions(false); 1301 insertConditions(true); 1302 setPhiValues(); 1303 simplifyConditions(); 1304 simplifyAffectedPhis(); 1305 rebuildSSA(); 1306 1307 // Cleanup 1308 Order.clear(); 1309 Visited.clear(); 1310 DeletedPhis.clear(); 1311 AddedPhis.clear(); 1312 Predicates.clear(); 1313 Conditions.clear(); 1314 Loops.clear(); 1315 LoopPreds.clear(); 1316 LoopConds.clear(); 1317 FlowSet.clear(); 1318 1319 return true; 1320 } 1321 1322 Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) { 1323 return new StructurizeCFGLegacyPass(SkipUniformRegions); 1324 } 1325 1326 static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) { 1327 Regions.push_back(&R); 1328 for (const auto &E : R) 1329 addRegionIntoQueue(*E, Regions); 1330 } 1331 1332 StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_) 1333 : SkipUniformRegions(SkipUniformRegions_) { 1334 if (ForceSkipUniformRegions.getNumOccurrences()) 1335 SkipUniformRegions = ForceSkipUniformRegions.getValue(); 1336 } 1337 1338 void StructurizeCFGPass::printPipeline( 1339 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) { 1340 static_cast<PassInfoMixin<StructurizeCFGPass> *>(this)->printPipeline( 1341 OS, MapClassName2PassName); 1342 if (SkipUniformRegions) 1343 OS << "<skip-uniform-regions>"; 1344 } 1345 1346 PreservedAnalyses StructurizeCFGPass::run(Function &F, 1347 FunctionAnalysisManager &AM) { 1348 1349 bool Changed = false; 1350 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); 1351 auto &RI = AM.getResult<RegionInfoAnalysis>(F); 1352 1353 UniformityInfo *UI = nullptr; 1354 if (SkipUniformRegions) 1355 UI = &AM.getResult<UniformityInfoAnalysis>(F); 1356 1357 std::vector<Region *> Regions; 1358 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); 1359 while (!Regions.empty()) { 1360 Region *R = Regions.back(); 1361 Regions.pop_back(); 1362 1363 StructurizeCFG SCFG; 1364 SCFG.init(R); 1365 1366 if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) { 1367 Changed = true; // May have added metadata. 1368 continue; 1369 } 1370 1371 Changed |= SCFG.run(R, DT); 1372 } 1373 if (!Changed) 1374 return PreservedAnalyses::all(); 1375 PreservedAnalyses PA; 1376 PA.preserve<DominatorTreeAnalysis>(); 1377 return PA; 1378 } 1379