1 //===--------- LoopSimplifyCFG.cpp - Loop CFG Simplification Pass ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the Loop SimplifyCFG Pass. This pass is responsible for 10 // basic loop CFG cleanup, primarily to assist other loop passes. If you 11 // encounter a noncanonical CFG construct that causes another loop pass to 12 // perform suboptimally, this is the place to fix it up. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/DependenceAnalysis.h" 20 #include "llvm/Analysis/DomTreeUpdater.h" 21 #include "llvm/Analysis/LoopInfo.h" 22 #include "llvm/Analysis/LoopIterator.h" 23 #include "llvm/Analysis/MemorySSA.h" 24 #include "llvm/Analysis/MemorySSAUpdater.h" 25 #include "llvm/Analysis/ScalarEvolution.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/IRBuilder.h" 28 #include "llvm/Support/CommandLine.h" 29 #include "llvm/Transforms/Scalar.h" 30 #include "llvm/Transforms/Scalar/LoopPassManager.h" 31 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 32 #include "llvm/Transforms/Utils/LoopUtils.h" 33 #include <optional> 34 using namespace llvm; 35 36 #define DEBUG_TYPE "loop-simplifycfg" 37 38 static cl::opt<bool> EnableTermFolding("enable-loop-simplifycfg-term-folding", 39 cl::init(true)); 40 41 STATISTIC(NumTerminatorsFolded, 42 "Number of terminators folded to unconditional branches"); 43 STATISTIC(NumLoopBlocksDeleted, 44 "Number of loop blocks deleted"); 45 STATISTIC(NumLoopExitsDeleted, 46 "Number of loop exiting edges deleted"); 47 48 /// If \p BB is a switch or a conditional branch, but only one of its successors 49 /// can be reached from this block in runtime, return this successor. Otherwise, 50 /// return nullptr. 51 static BasicBlock *getOnlyLiveSuccessor(BasicBlock *BB) { 52 Instruction *TI = BB->getTerminator(); 53 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 54 if (BI->isUnconditional()) 55 return nullptr; 56 if (BI->getSuccessor(0) == BI->getSuccessor(1)) 57 return BI->getSuccessor(0); 58 ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition()); 59 if (!Cond) 60 return nullptr; 61 return Cond->isZero() ? BI->getSuccessor(1) : BI->getSuccessor(0); 62 } 63 64 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 65 auto *CI = dyn_cast<ConstantInt>(SI->getCondition()); 66 if (!CI) 67 return nullptr; 68 for (auto Case : SI->cases()) 69 if (Case.getCaseValue() == CI) 70 return Case.getCaseSuccessor(); 71 return SI->getDefaultDest(); 72 } 73 74 return nullptr; 75 } 76 77 /// Removes \p BB from all loops from [FirstLoop, LastLoop) in parent chain. 78 static void removeBlockFromLoops(BasicBlock *BB, Loop *FirstLoop, 79 Loop *LastLoop = nullptr) { 80 assert((!LastLoop || LastLoop->contains(FirstLoop->getHeader())) && 81 "First loop is supposed to be inside of last loop!"); 82 assert(FirstLoop->contains(BB) && "Must be a loop block!"); 83 for (Loop *Current = FirstLoop; Current != LastLoop; 84 Current = Current->getParentLoop()) 85 Current->removeBlockFromLoop(BB); 86 } 87 88 /// Find innermost loop that contains at least one block from \p BBs and 89 /// contains the header of loop \p L. 90 static Loop *getInnermostLoopFor(SmallPtrSetImpl<BasicBlock *> &BBs, 91 Loop &L, LoopInfo &LI) { 92 Loop *Innermost = nullptr; 93 for (BasicBlock *BB : BBs) { 94 Loop *BBL = LI.getLoopFor(BB); 95 while (BBL && !BBL->contains(L.getHeader())) 96 BBL = BBL->getParentLoop(); 97 if (BBL == &L) 98 BBL = BBL->getParentLoop(); 99 if (!BBL) 100 continue; 101 if (!Innermost || BBL->getLoopDepth() > Innermost->getLoopDepth()) 102 Innermost = BBL; 103 } 104 return Innermost; 105 } 106 107 namespace { 108 /// Helper class that can turn branches and switches with constant conditions 109 /// into unconditional branches. 110 class ConstantTerminatorFoldingImpl { 111 private: 112 Loop &L; 113 LoopInfo &LI; 114 DominatorTree &DT; 115 ScalarEvolution &SE; 116 MemorySSAUpdater *MSSAU; 117 LoopBlocksDFS DFS; 118 DomTreeUpdater DTU; 119 SmallVector<DominatorTree::UpdateType, 16> DTUpdates; 120 121 // Whether or not the current loop has irreducible CFG. 122 bool HasIrreducibleCFG = false; 123 // Whether or not the current loop will still exist after terminator constant 124 // folding will be done. In theory, there are two ways how it can happen: 125 // 1. Loop's latch(es) become unreachable from loop header; 126 // 2. Loop's header becomes unreachable from method entry. 127 // In practice, the second situation is impossible because we only modify the 128 // current loop and its preheader and do not affect preheader's reachibility 129 // from any other block. So this variable set to true means that loop's latch 130 // has become unreachable from loop header. 131 bool DeleteCurrentLoop = false; 132 133 // The blocks of the original loop that will still be reachable from entry 134 // after the constant folding. 135 SmallPtrSet<BasicBlock *, 8> LiveLoopBlocks; 136 // The blocks of the original loop that will become unreachable from entry 137 // after the constant folding. 138 SmallVector<BasicBlock *, 8> DeadLoopBlocks; 139 // The exits of the original loop that will still be reachable from entry 140 // after the constant folding. 141 SmallPtrSet<BasicBlock *, 8> LiveExitBlocks; 142 // The exits of the original loop that will become unreachable from entry 143 // after the constant folding. 144 SmallVector<BasicBlock *, 8> DeadExitBlocks; 145 // The blocks that will still be a part of the current loop after folding. 146 SmallPtrSet<BasicBlock *, 8> BlocksInLoopAfterFolding; 147 // The blocks that have terminators with constant condition that can be 148 // folded. Note: fold candidates should be in L but not in any of its 149 // subloops to avoid complex LI updates. 150 SmallVector<BasicBlock *, 8> FoldCandidates; 151 152 void dump() const { 153 dbgs() << "Constant terminator folding for loop " << L << "\n"; 154 dbgs() << "After terminator constant-folding, the loop will"; 155 if (!DeleteCurrentLoop) 156 dbgs() << " not"; 157 dbgs() << " be destroyed\n"; 158 auto PrintOutVector = [&](const char *Message, 159 const SmallVectorImpl<BasicBlock *> &S) { 160 dbgs() << Message << "\n"; 161 for (const BasicBlock *BB : S) 162 dbgs() << "\t" << BB->getName() << "\n"; 163 }; 164 auto PrintOutSet = [&](const char *Message, 165 const SmallPtrSetImpl<BasicBlock *> &S) { 166 dbgs() << Message << "\n"; 167 for (const BasicBlock *BB : S) 168 dbgs() << "\t" << BB->getName() << "\n"; 169 }; 170 PrintOutVector("Blocks in which we can constant-fold terminator:", 171 FoldCandidates); 172 PrintOutSet("Live blocks from the original loop:", LiveLoopBlocks); 173 PrintOutVector("Dead blocks from the original loop:", DeadLoopBlocks); 174 PrintOutSet("Live exit blocks:", LiveExitBlocks); 175 PrintOutVector("Dead exit blocks:", DeadExitBlocks); 176 if (!DeleteCurrentLoop) 177 PrintOutSet("The following blocks will still be part of the loop:", 178 BlocksInLoopAfterFolding); 179 } 180 181 /// Whether or not the current loop has irreducible CFG. 182 bool hasIrreducibleCFG(LoopBlocksDFS &DFS) { 183 assert(DFS.isComplete() && "DFS is expected to be finished"); 184 // Index of a basic block in RPO traversal. 185 DenseMap<const BasicBlock *, unsigned> RPO; 186 unsigned Current = 0; 187 for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) 188 RPO[*I] = Current++; 189 190 for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) { 191 BasicBlock *BB = *I; 192 for (auto *Succ : successors(BB)) 193 if (L.contains(Succ) && !LI.isLoopHeader(Succ) && RPO[BB] > RPO[Succ]) 194 // If an edge goes from a block with greater order number into a block 195 // with lesses number, and it is not a loop backedge, then it can only 196 // be a part of irreducible non-loop cycle. 197 return true; 198 } 199 return false; 200 } 201 202 /// Fill all information about status of blocks and exits of the current loop 203 /// if constant folding of all branches will be done. 204 void analyze() { 205 DFS.perform(&LI); 206 assert(DFS.isComplete() && "DFS is expected to be finished"); 207 208 // TODO: The algorithm below relies on both RPO and Postorder traversals. 209 // When the loop has only reducible CFG inside, then the invariant "all 210 // predecessors of X are processed before X in RPO" is preserved. However 211 // an irreducible loop can break this invariant (e.g. latch does not have to 212 // be the last block in the traversal in this case, and the algorithm relies 213 // on this). We can later decide to support such cases by altering the 214 // algorithms, but so far we just give up analyzing them. 215 if (hasIrreducibleCFG(DFS)) { 216 HasIrreducibleCFG = true; 217 return; 218 } 219 220 // Collect live and dead loop blocks and exits. 221 LiveLoopBlocks.insert(L.getHeader()); 222 for (auto I = DFS.beginRPO(), E = DFS.endRPO(); I != E; ++I) { 223 BasicBlock *BB = *I; 224 225 // If a loop block wasn't marked as live so far, then it's dead. 226 if (!LiveLoopBlocks.count(BB)) { 227 DeadLoopBlocks.push_back(BB); 228 continue; 229 } 230 231 BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(BB); 232 233 // If a block has only one live successor, it's a candidate on constant 234 // folding. Only handle blocks from current loop: branches in child loops 235 // are skipped because if they can be folded, they should be folded during 236 // the processing of child loops. 237 bool TakeFoldCandidate = TheOnlySucc && LI.getLoopFor(BB) == &L; 238 if (TakeFoldCandidate) 239 FoldCandidates.push_back(BB); 240 241 // Handle successors. 242 for (BasicBlock *Succ : successors(BB)) 243 if (!TakeFoldCandidate || TheOnlySucc == Succ) { 244 if (L.contains(Succ)) 245 LiveLoopBlocks.insert(Succ); 246 else 247 LiveExitBlocks.insert(Succ); 248 } 249 } 250 251 // Amount of dead and live loop blocks should match the total number of 252 // blocks in loop. 253 assert(L.getNumBlocks() == LiveLoopBlocks.size() + DeadLoopBlocks.size() && 254 "Malformed block sets?"); 255 256 // Now, all exit blocks that are not marked as live are dead, if all their 257 // predecessors are in the loop. This may not be the case, as the input loop 258 // may not by in loop-simplify/canonical form. 259 SmallVector<BasicBlock *, 8> ExitBlocks; 260 L.getExitBlocks(ExitBlocks); 261 SmallPtrSet<BasicBlock *, 8> UniqueDeadExits; 262 for (auto *ExitBlock : ExitBlocks) 263 if (!LiveExitBlocks.count(ExitBlock) && 264 UniqueDeadExits.insert(ExitBlock).second && 265 all_of(predecessors(ExitBlock), 266 [this](BasicBlock *Pred) { return L.contains(Pred); })) 267 DeadExitBlocks.push_back(ExitBlock); 268 269 // Whether or not the edge From->To will still be present in graph after the 270 // folding. 271 auto IsEdgeLive = [&](BasicBlock *From, BasicBlock *To) { 272 if (!LiveLoopBlocks.count(From)) 273 return false; 274 BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(From); 275 return !TheOnlySucc || TheOnlySucc == To || LI.getLoopFor(From) != &L; 276 }; 277 278 // The loop will not be destroyed if its latch is live. 279 DeleteCurrentLoop = !IsEdgeLive(L.getLoopLatch(), L.getHeader()); 280 281 // If we are going to delete the current loop completely, no extra analysis 282 // is needed. 283 if (DeleteCurrentLoop) 284 return; 285 286 // Otherwise, we should check which blocks will still be a part of the 287 // current loop after the transform. 288 BlocksInLoopAfterFolding.insert(L.getLoopLatch()); 289 // If the loop is live, then we should compute what blocks are still in 290 // loop after all branch folding has been done. A block is in loop if 291 // it has a live edge to another block that is in the loop; by definition, 292 // latch is in the loop. 293 auto BlockIsInLoop = [&](BasicBlock *BB) { 294 return any_of(successors(BB), [&](BasicBlock *Succ) { 295 return BlocksInLoopAfterFolding.count(Succ) && IsEdgeLive(BB, Succ); 296 }); 297 }; 298 for (auto I = DFS.beginPostorder(), E = DFS.endPostorder(); I != E; ++I) { 299 BasicBlock *BB = *I; 300 if (BlockIsInLoop(BB)) 301 BlocksInLoopAfterFolding.insert(BB); 302 } 303 304 assert(BlocksInLoopAfterFolding.count(L.getHeader()) && 305 "Header not in loop?"); 306 assert(BlocksInLoopAfterFolding.size() <= LiveLoopBlocks.size() && 307 "All blocks that stay in loop should be live!"); 308 } 309 310 /// We need to preserve static reachibility of all loop exit blocks (this is) 311 /// required by loop pass manager. In order to do it, we make the following 312 /// trick: 313 /// 314 /// preheader: 315 /// <preheader code> 316 /// br label %loop_header 317 /// 318 /// loop_header: 319 /// ... 320 /// br i1 false, label %dead_exit, label %loop_block 321 /// ... 322 /// 323 /// We cannot simply remove edge from the loop to dead exit because in this 324 /// case dead_exit (and its successors) may become unreachable. To avoid that, 325 /// we insert the following fictive preheader: 326 /// 327 /// preheader: 328 /// <preheader code> 329 /// switch i32 0, label %preheader-split, 330 /// [i32 1, label %dead_exit_1], 331 /// [i32 2, label %dead_exit_2], 332 /// ... 333 /// [i32 N, label %dead_exit_N], 334 /// 335 /// preheader-split: 336 /// br label %loop_header 337 /// 338 /// loop_header: 339 /// ... 340 /// br i1 false, label %dead_exit_N, label %loop_block 341 /// ... 342 /// 343 /// Doing so, we preserve static reachibility of all dead exits and can later 344 /// remove edges from the loop to these blocks. 345 void handleDeadExits() { 346 // If no dead exits, nothing to do. 347 if (DeadExitBlocks.empty()) 348 return; 349 350 // Construct split preheader and the dummy switch to thread edges from it to 351 // dead exits. 352 BasicBlock *Preheader = L.getLoopPreheader(); 353 BasicBlock *NewPreheader = llvm::SplitBlock( 354 Preheader, Preheader->getTerminator(), &DT, &LI, MSSAU); 355 356 IRBuilder<> Builder(Preheader->getTerminator()); 357 SwitchInst *DummySwitch = 358 Builder.CreateSwitch(Builder.getInt32(0), NewPreheader); 359 Preheader->getTerminator()->eraseFromParent(); 360 361 unsigned DummyIdx = 1; 362 for (BasicBlock *BB : DeadExitBlocks) { 363 // Eliminate all Phis and LandingPads from dead exits. 364 // TODO: Consider removing all instructions in this dead block. 365 SmallVector<Instruction *, 4> DeadInstructions; 366 for (auto &PN : BB->phis()) 367 DeadInstructions.push_back(&PN); 368 369 if (auto *LandingPad = dyn_cast<LandingPadInst>(BB->getFirstNonPHI())) 370 DeadInstructions.emplace_back(LandingPad); 371 372 for (Instruction *I : DeadInstructions) { 373 SE.forgetBlockAndLoopDispositions(I); 374 I->replaceAllUsesWith(PoisonValue::get(I->getType())); 375 I->eraseFromParent(); 376 } 377 378 assert(DummyIdx != 0 && "Too many dead exits!"); 379 DummySwitch->addCase(Builder.getInt32(DummyIdx++), BB); 380 DTUpdates.push_back({DominatorTree::Insert, Preheader, BB}); 381 ++NumLoopExitsDeleted; 382 } 383 384 assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?"); 385 if (Loop *OuterLoop = LI.getLoopFor(Preheader)) { 386 // When we break dead edges, the outer loop may become unreachable from 387 // the current loop. We need to fix loop info accordingly. For this, we 388 // find the most nested loop that still contains L and remove L from all 389 // loops that are inside of it. 390 Loop *StillReachable = getInnermostLoopFor(LiveExitBlocks, L, LI); 391 392 // Okay, our loop is no longer in the outer loop (and maybe not in some of 393 // its parents as well). Make the fixup. 394 if (StillReachable != OuterLoop) { 395 LI.changeLoopFor(NewPreheader, StillReachable); 396 removeBlockFromLoops(NewPreheader, OuterLoop, StillReachable); 397 for (auto *BB : L.blocks()) 398 removeBlockFromLoops(BB, OuterLoop, StillReachable); 399 OuterLoop->removeChildLoop(&L); 400 if (StillReachable) 401 StillReachable->addChildLoop(&L); 402 else 403 LI.addTopLevelLoop(&L); 404 405 // Some values from loops in [OuterLoop, StillReachable) could be used 406 // in the current loop. Now it is not their child anymore, so such uses 407 // require LCSSA Phis. 408 Loop *FixLCSSALoop = OuterLoop; 409 while (FixLCSSALoop->getParentLoop() != StillReachable) 410 FixLCSSALoop = FixLCSSALoop->getParentLoop(); 411 assert(FixLCSSALoop && "Should be a loop!"); 412 // We need all DT updates to be done before forming LCSSA. 413 if (MSSAU) 414 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true); 415 else 416 DTU.applyUpdates(DTUpdates); 417 DTUpdates.clear(); 418 formLCSSARecursively(*FixLCSSALoop, DT, &LI, &SE); 419 SE.forgetBlockAndLoopDispositions(); 420 } 421 } 422 423 if (MSSAU) { 424 // Clear all updates now. Facilitates deletes that follow. 425 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true); 426 DTUpdates.clear(); 427 if (VerifyMemorySSA) 428 MSSAU->getMemorySSA()->verifyMemorySSA(); 429 } 430 } 431 432 /// Delete loop blocks that have become unreachable after folding. Make all 433 /// relevant updates to DT and LI. 434 void deleteDeadLoopBlocks() { 435 if (MSSAU) { 436 SmallSetVector<BasicBlock *, 8> DeadLoopBlocksSet(DeadLoopBlocks.begin(), 437 DeadLoopBlocks.end()); 438 MSSAU->removeBlocks(DeadLoopBlocksSet); 439 } 440 441 // The function LI.erase has some invariants that need to be preserved when 442 // it tries to remove a loop which is not the top-level loop. In particular, 443 // it requires loop's preheader to be strictly in loop's parent. We cannot 444 // just remove blocks one by one, because after removal of preheader we may 445 // break this invariant for the dead loop. So we detatch and erase all dead 446 // loops beforehand. 447 for (auto *BB : DeadLoopBlocks) 448 if (LI.isLoopHeader(BB)) { 449 assert(LI.getLoopFor(BB) != &L && "Attempt to remove current loop!"); 450 Loop *DL = LI.getLoopFor(BB); 451 if (!DL->isOutermost()) { 452 for (auto *PL = DL->getParentLoop(); PL; PL = PL->getParentLoop()) 453 for (auto *BB : DL->getBlocks()) 454 PL->removeBlockFromLoop(BB); 455 DL->getParentLoop()->removeChildLoop(DL); 456 LI.addTopLevelLoop(DL); 457 } 458 LI.erase(DL); 459 } 460 461 for (auto *BB : DeadLoopBlocks) { 462 assert(BB != L.getHeader() && 463 "Header of the current loop cannot be dead!"); 464 LLVM_DEBUG(dbgs() << "Deleting dead loop block " << BB->getName() 465 << "\n"); 466 LI.removeBlock(BB); 467 } 468 469 detachDeadBlocks(DeadLoopBlocks, &DTUpdates, /*KeepOneInputPHIs*/true); 470 DTU.applyUpdates(DTUpdates); 471 DTUpdates.clear(); 472 for (auto *BB : DeadLoopBlocks) 473 DTU.deleteBB(BB); 474 475 NumLoopBlocksDeleted += DeadLoopBlocks.size(); 476 } 477 478 /// Constant-fold terminators of blocks accumulated in FoldCandidates into the 479 /// unconditional branches. 480 void foldTerminators() { 481 for (BasicBlock *BB : FoldCandidates) { 482 assert(LI.getLoopFor(BB) == &L && "Should be a loop block!"); 483 BasicBlock *TheOnlySucc = getOnlyLiveSuccessor(BB); 484 assert(TheOnlySucc && "Should have one live successor!"); 485 486 LLVM_DEBUG(dbgs() << "Replacing terminator of " << BB->getName() 487 << " with an unconditional branch to the block " 488 << TheOnlySucc->getName() << "\n"); 489 490 SmallPtrSet<BasicBlock *, 2> DeadSuccessors; 491 // Remove all BB's successors except for the live one. 492 unsigned TheOnlySuccDuplicates = 0; 493 for (auto *Succ : successors(BB)) 494 if (Succ != TheOnlySucc) { 495 DeadSuccessors.insert(Succ); 496 // If our successor lies in a different loop, we don't want to remove 497 // the one-input Phi because it is a LCSSA Phi. 498 bool PreserveLCSSAPhi = !L.contains(Succ); 499 Succ->removePredecessor(BB, PreserveLCSSAPhi); 500 if (MSSAU) 501 MSSAU->removeEdge(BB, Succ); 502 } else 503 ++TheOnlySuccDuplicates; 504 505 assert(TheOnlySuccDuplicates > 0 && "Should be!"); 506 // If TheOnlySucc was BB's successor more than once, after transform it 507 // will be its successor only once. Remove redundant inputs from 508 // TheOnlySucc's Phis. 509 bool PreserveLCSSAPhi = !L.contains(TheOnlySucc); 510 for (unsigned Dup = 1; Dup < TheOnlySuccDuplicates; ++Dup) 511 TheOnlySucc->removePredecessor(BB, PreserveLCSSAPhi); 512 if (MSSAU && TheOnlySuccDuplicates > 1) 513 MSSAU->removeDuplicatePhiEdgesBetween(BB, TheOnlySucc); 514 515 IRBuilder<> Builder(BB->getContext()); 516 Instruction *Term = BB->getTerminator(); 517 Builder.SetInsertPoint(Term); 518 Builder.CreateBr(TheOnlySucc); 519 Term->eraseFromParent(); 520 521 for (auto *DeadSucc : DeadSuccessors) 522 DTUpdates.push_back({DominatorTree::Delete, BB, DeadSucc}); 523 524 ++NumTerminatorsFolded; 525 } 526 } 527 528 public: 529 ConstantTerminatorFoldingImpl(Loop &L, LoopInfo &LI, DominatorTree &DT, 530 ScalarEvolution &SE, 531 MemorySSAUpdater *MSSAU) 532 : L(L), LI(LI), DT(DT), SE(SE), MSSAU(MSSAU), DFS(&L), 533 DTU(DT, DomTreeUpdater::UpdateStrategy::Eager) {} 534 bool run() { 535 assert(L.getLoopLatch() && "Should be single latch!"); 536 537 // Collect all available information about status of blocks after constant 538 // folding. 539 analyze(); 540 BasicBlock *Header = L.getHeader(); 541 (void)Header; 542 543 LLVM_DEBUG(dbgs() << "In function " << Header->getParent()->getName() 544 << ": "); 545 546 if (HasIrreducibleCFG) { 547 LLVM_DEBUG(dbgs() << "Loops with irreducible CFG are not supported!\n"); 548 return false; 549 } 550 551 // Nothing to constant-fold. 552 if (FoldCandidates.empty()) { 553 LLVM_DEBUG( 554 dbgs() << "No constant terminator folding candidates found in loop " 555 << Header->getName() << "\n"); 556 return false; 557 } 558 559 // TODO: Support deletion of the current loop. 560 if (DeleteCurrentLoop) { 561 LLVM_DEBUG( 562 dbgs() 563 << "Give up constant terminator folding in loop " << Header->getName() 564 << ": we don't currently support deletion of the current loop.\n"); 565 return false; 566 } 567 568 // TODO: Support blocks that are not dead, but also not in loop after the 569 // folding. 570 if (BlocksInLoopAfterFolding.size() + DeadLoopBlocks.size() != 571 L.getNumBlocks()) { 572 LLVM_DEBUG( 573 dbgs() << "Give up constant terminator folding in loop " 574 << Header->getName() << ": we don't currently" 575 " support blocks that are not dead, but will stop " 576 "being a part of the loop after constant-folding.\n"); 577 return false; 578 } 579 580 // TODO: Tokens may breach LCSSA form by default. However, the transform for 581 // dead exit blocks requires LCSSA form to be maintained for all values, 582 // tokens included, otherwise it may break use-def dominance (see PR56243). 583 if (!DeadExitBlocks.empty() && !L.isLCSSAForm(DT, /*IgnoreTokens*/ false)) { 584 assert(L.isLCSSAForm(DT, /*IgnoreTokens*/ true) && 585 "LCSSA broken not by tokens?"); 586 LLVM_DEBUG(dbgs() << "Give up constant terminator folding in loop " 587 << Header->getName() 588 << ": tokens uses potentially break LCSSA form.\n"); 589 return false; 590 } 591 592 SE.forgetTopmostLoop(&L); 593 // Dump analysis results. 594 LLVM_DEBUG(dump()); 595 596 LLVM_DEBUG(dbgs() << "Constant-folding " << FoldCandidates.size() 597 << " terminators in loop " << Header->getName() << "\n"); 598 599 if (!DeadLoopBlocks.empty()) 600 SE.forgetBlockAndLoopDispositions(); 601 602 // Make the actual transforms. 603 handleDeadExits(); 604 foldTerminators(); 605 606 if (!DeadLoopBlocks.empty()) { 607 LLVM_DEBUG(dbgs() << "Deleting " << DeadLoopBlocks.size() 608 << " dead blocks in loop " << Header->getName() << "\n"); 609 deleteDeadLoopBlocks(); 610 } else { 611 // If we didn't do updates inside deleteDeadLoopBlocks, do them here. 612 DTU.applyUpdates(DTUpdates); 613 DTUpdates.clear(); 614 } 615 616 if (MSSAU && VerifyMemorySSA) 617 MSSAU->getMemorySSA()->verifyMemorySSA(); 618 619 #ifndef NDEBUG 620 // Make sure that we have preserved all data structures after the transform. 621 #if defined(EXPENSIVE_CHECKS) 622 assert(DT.verify(DominatorTree::VerificationLevel::Full) && 623 "DT broken after transform!"); 624 #else 625 assert(DT.verify(DominatorTree::VerificationLevel::Fast) && 626 "DT broken after transform!"); 627 #endif 628 assert(DT.isReachableFromEntry(Header)); 629 LI.verify(DT); 630 #endif 631 632 return true; 633 } 634 635 bool foldingBreaksCurrentLoop() const { 636 return DeleteCurrentLoop; 637 } 638 }; 639 } // namespace 640 641 /// Turn branches and switches with known constant conditions into unconditional 642 /// branches. 643 static bool constantFoldTerminators(Loop &L, DominatorTree &DT, LoopInfo &LI, 644 ScalarEvolution &SE, 645 MemorySSAUpdater *MSSAU, 646 bool &IsLoopDeleted) { 647 if (!EnableTermFolding) 648 return false; 649 650 // To keep things simple, only process loops with single latch. We 651 // canonicalize most loops to this form. We can support multi-latch if needed. 652 if (!L.getLoopLatch()) 653 return false; 654 655 ConstantTerminatorFoldingImpl BranchFolder(L, LI, DT, SE, MSSAU); 656 bool Changed = BranchFolder.run(); 657 IsLoopDeleted = Changed && BranchFolder.foldingBreaksCurrentLoop(); 658 return Changed; 659 } 660 661 static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, 662 LoopInfo &LI, MemorySSAUpdater *MSSAU, 663 ScalarEvolution &SE) { 664 bool Changed = false; 665 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 666 // Copy blocks into a temporary array to avoid iterator invalidation issues 667 // as we remove them. 668 SmallVector<WeakTrackingVH, 16> Blocks(L.blocks()); 669 670 for (auto &Block : Blocks) { 671 // Attempt to merge blocks in the trivial case. Don't modify blocks which 672 // belong to other loops. 673 BasicBlock *Succ = cast_or_null<BasicBlock>(Block); 674 if (!Succ) 675 continue; 676 677 BasicBlock *Pred = Succ->getSinglePredecessor(); 678 if (!Pred || !Pred->getSingleSuccessor() || LI.getLoopFor(Pred) != &L) 679 continue; 680 681 // Merge Succ into Pred and delete it. 682 MergeBlockIntoPredecessor(Succ, &DTU, &LI, MSSAU); 683 684 if (MSSAU && VerifyMemorySSA) 685 MSSAU->getMemorySSA()->verifyMemorySSA(); 686 687 Changed = true; 688 } 689 690 if (Changed) 691 SE.forgetBlockAndLoopDispositions(); 692 693 return Changed; 694 } 695 696 static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI, 697 ScalarEvolution &SE, MemorySSAUpdater *MSSAU, 698 bool &IsLoopDeleted) { 699 bool Changed = false; 700 701 // Constant-fold terminators with known constant conditions. 702 Changed |= constantFoldTerminators(L, DT, LI, SE, MSSAU, IsLoopDeleted); 703 704 if (IsLoopDeleted) 705 return true; 706 707 // Eliminate unconditional branches by merging blocks into their predecessors. 708 Changed |= mergeBlocksIntoPredecessors(L, DT, LI, MSSAU, SE); 709 710 if (Changed) 711 SE.forgetTopmostLoop(&L); 712 713 return Changed; 714 } 715 716 PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM, 717 LoopStandardAnalysisResults &AR, 718 LPMUpdater &LPMU) { 719 std::optional<MemorySSAUpdater> MSSAU; 720 if (AR.MSSA) 721 MSSAU = MemorySSAUpdater(AR.MSSA); 722 bool DeleteCurrentLoop = false; 723 if (!simplifyLoopCFG(L, AR.DT, AR.LI, AR.SE, MSSAU ? &*MSSAU : nullptr, 724 DeleteCurrentLoop)) 725 return PreservedAnalyses::all(); 726 727 if (DeleteCurrentLoop) 728 LPMU.markLoopAsDeleted(L, "loop-simplifycfg"); 729 730 auto PA = getLoopPassPreservedAnalyses(); 731 if (AR.MSSA) 732 PA.preserve<MemorySSAAnalysis>(); 733 return PA; 734 } 735