xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopSimplify.cpp (revision ba94a95402f335c8e7aa8e28ebdad43361c65909)
1  //===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This pass performs several transformations to transform natural loops into a
10  // simpler form, which makes subsequent analyses and transformations simpler and
11  // more effective.
12  //
13  // Loop pre-header insertion guarantees that there is a single, non-critical
14  // entry edge from outside of the loop to the loop header.  This simplifies a
15  // number of analyses and transformations, such as LICM.
16  //
17  // Loop exit-block insertion guarantees that all exit blocks from the loop
18  // (blocks which are outside of the loop that have predecessors inside of the
19  // loop) only have predecessors from inside of the loop (and are thus dominated
20  // by the loop header).  This simplifies transformations such as store-sinking
21  // that are built into LICM.
22  //
23  // This pass also guarantees that loops will have exactly one backedge.
24  //
25  // Indirectbr instructions introduce several complications. If the loop
26  // contains or is entered by an indirectbr instruction, it may not be possible
27  // to transform the loop and make these guarantees. Client code should check
28  // that these conditions are true before relying on them.
29  //
30  // Similar complications arise from callbr instructions, particularly in
31  // asm-goto where blockaddress expressions are used.
32  //
33  // Note that the simplifycfg pass will clean up blocks which are split out but
34  // end up being unnecessary, so usage of this pass should not pessimize
35  // generated code.
36  //
37  // This pass obviously modifies the CFG, but updates loop information and
38  // dominator information.
39  //
40  //===----------------------------------------------------------------------===//
41  
42  #include "llvm/Transforms/Utils/LoopSimplify.h"
43  #include "llvm/ADT/DepthFirstIterator.h"
44  #include "llvm/ADT/SetOperations.h"
45  #include "llvm/ADT/SetVector.h"
46  #include "llvm/ADT/SmallVector.h"
47  #include "llvm/ADT/Statistic.h"
48  #include "llvm/Analysis/AliasAnalysis.h"
49  #include "llvm/Analysis/AssumptionCache.h"
50  #include "llvm/Analysis/BasicAliasAnalysis.h"
51  #include "llvm/Analysis/BranchProbabilityInfo.h"
52  #include "llvm/Analysis/DependenceAnalysis.h"
53  #include "llvm/Analysis/GlobalsModRef.h"
54  #include "llvm/Analysis/InstructionSimplify.h"
55  #include "llvm/Analysis/LoopInfo.h"
56  #include "llvm/Analysis/MemorySSA.h"
57  #include "llvm/Analysis/MemorySSAUpdater.h"
58  #include "llvm/Analysis/ScalarEvolution.h"
59  #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
60  #include "llvm/IR/CFG.h"
61  #include "llvm/IR/Constants.h"
62  #include "llvm/IR/DataLayout.h"
63  #include "llvm/IR/Dominators.h"
64  #include "llvm/IR/Function.h"
65  #include "llvm/IR/Instructions.h"
66  #include "llvm/IR/IntrinsicInst.h"
67  #include "llvm/IR/LLVMContext.h"
68  #include "llvm/IR/Module.h"
69  #include "llvm/IR/Type.h"
70  #include "llvm/InitializePasses.h"
71  #include "llvm/Support/Debug.h"
72  #include "llvm/Support/raw_ostream.h"
73  #include "llvm/Transforms/Utils.h"
74  #include "llvm/Transforms/Utils/BasicBlockUtils.h"
75  #include "llvm/Transforms/Utils/Local.h"
76  #include "llvm/Transforms/Utils/LoopUtils.h"
77  using namespace llvm;
78  
79  #define DEBUG_TYPE "loop-simplify"
80  
81  STATISTIC(NumNested  , "Number of nested loops split out");
82  
83  // If the block isn't already, move the new block to right after some 'outside
84  // block' block.  This prevents the preheader from being placed inside the loop
85  // body, e.g. when the loop hasn't been rotated.
86  static void placeSplitBlockCarefully(BasicBlock *NewBB,
87                                       SmallVectorImpl<BasicBlock *> &SplitPreds,
88                                       Loop *L) {
89    // Check to see if NewBB is already well placed.
90    Function::iterator BBI = --NewBB->getIterator();
91    for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
92      if (&*BBI == SplitPreds[i])
93        return;
94    }
95  
96    // If it isn't already after an outside block, move it after one.  This is
97    // always good as it makes the uncond branch from the outside block into a
98    // fall-through.
99  
100    // Figure out *which* outside block to put this after.  Prefer an outside
101    // block that neighbors a BB actually in the loop.
102    BasicBlock *FoundBB = nullptr;
103    for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
104      Function::iterator BBI = SplitPreds[i]->getIterator();
105      if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) {
106        FoundBB = SplitPreds[i];
107        break;
108      }
109    }
110  
111    // If our heuristic for a *good* bb to place this after doesn't find
112    // anything, just pick something.  It's likely better than leaving it within
113    // the loop.
114    if (!FoundBB)
115      FoundBB = SplitPreds[0];
116    NewBB->moveAfter(FoundBB);
117  }
118  
119  /// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
120  /// preheader, this method is called to insert one.  This method has two phases:
121  /// preheader insertion and analysis updating.
122  ///
123  BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
124                                           LoopInfo *LI, MemorySSAUpdater *MSSAU,
125                                           bool PreserveLCSSA) {
126    BasicBlock *Header = L->getHeader();
127  
128    // Compute the set of predecessors of the loop that are not in the loop.
129    SmallVector<BasicBlock*, 8> OutsideBlocks;
130    for (BasicBlock *P : predecessors(Header)) {
131      if (!L->contains(P)) {         // Coming in from outside the loop?
132        // If the loop is branched to from an indirect terminator, we won't
133        // be able to fully transform the loop, because it prohibits
134        // edge splitting.
135        if (P->getTerminator()->isIndirectTerminator())
136          return nullptr;
137  
138        // Keep track of it.
139        OutsideBlocks.push_back(P);
140      }
141    }
142  
143    // Split out the loop pre-header.
144    BasicBlock *PreheaderBB;
145    PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
146                                         LI, MSSAU, PreserveLCSSA);
147    if (!PreheaderBB)
148      return nullptr;
149  
150    LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
151                      << PreheaderBB->getName() << "\n");
152  
153    // Make sure that NewBB is put someplace intelligent, which doesn't mess up
154    // code layout too horribly.
155    placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
156  
157    return PreheaderBB;
158  }
159  
160  /// Add the specified block, and all of its predecessors, to the specified set,
161  /// if it's not already in there.  Stop predecessor traversal when we reach
162  /// StopBlock.
163  static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
164                                    SmallPtrSetImpl<BasicBlock *> &Blocks) {
165    SmallVector<BasicBlock *, 8> Worklist;
166    Worklist.push_back(InputBB);
167    do {
168      BasicBlock *BB = Worklist.pop_back_val();
169      if (Blocks.insert(BB).second && BB != StopBlock)
170        // If BB is not already processed and it is not a stop block then
171        // insert its predecessor in the work list
172        append_range(Worklist, predecessors(BB));
173    } while (!Worklist.empty());
174  }
175  
176  /// The first part of loop-nestification is to find a PHI node that tells
177  /// us how to partition the loops.
178  static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
179                                          AssumptionCache *AC) {
180    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
181    for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
182      PHINode *PN = cast<PHINode>(I);
183      ++I;
184      if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
185        // This is a degenerate PHI already, don't modify it!
186        PN->replaceAllUsesWith(V);
187        PN->eraseFromParent();
188        continue;
189      }
190  
191      // Scan this PHI node looking for a use of the PHI node by itself.
192      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
193        if (PN->getIncomingValue(i) == PN &&
194            L->contains(PN->getIncomingBlock(i)))
195          // We found something tasty to remove.
196          return PN;
197    }
198    return nullptr;
199  }
200  
201  /// If this loop has multiple backedges, try to pull one of them out into
202  /// a nested loop.
203  ///
204  /// This is important for code that looks like
205  /// this:
206  ///
207  ///  Loop:
208  ///     ...
209  ///     br cond, Loop, Next
210  ///     ...
211  ///     br cond2, Loop, Out
212  ///
213  /// To identify this common case, we look at the PHI nodes in the header of the
214  /// loop.  PHI nodes with unchanging values on one backedge correspond to values
215  /// that change in the "outer" loop, but not in the "inner" loop.
216  ///
217  /// If we are able to separate out a loop, return the new outer loop that was
218  /// created.
219  ///
220  static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
221                                  DominatorTree *DT, LoopInfo *LI,
222                                  ScalarEvolution *SE, bool PreserveLCSSA,
223                                  AssumptionCache *AC, MemorySSAUpdater *MSSAU) {
224    // Don't try to separate loops without a preheader.
225    if (!Preheader)
226      return nullptr;
227  
228    // Treat the presence of convergent functions conservatively. The
229    // transformation is invalid if calls to certain convergent
230    // functions (like an AMDGPU barrier) get included in the resulting
231    // inner loop. But blocks meant for the inner loop will be
232    // identified later at a point where it's too late to abort the
233    // transformation. Also, the convergent attribute is not really
234    // sufficient to express the semantics of functions that are
235    // affected by this transformation. So we choose to back off if such
236    // a function call is present until a better alternative becomes
237    // available. This is similar to the conservative treatment of
238    // convergent function calls in GVNHoist and JumpThreading.
239    for (auto BB : L->blocks()) {
240      for (auto &II : *BB) {
241        if (auto CI = dyn_cast<CallBase>(&II)) {
242          if (CI->isConvergent()) {
243            return nullptr;
244          }
245        }
246      }
247    }
248  
249    // The header is not a landing pad; preheader insertion should ensure this.
250    BasicBlock *Header = L->getHeader();
251    assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
252  
253    PHINode *PN = findPHIToPartitionLoops(L, DT, AC);
254    if (!PN) return nullptr;  // No known way to partition.
255  
256    // Pull out all predecessors that have varying values in the loop.  This
257    // handles the case when a PHI node has multiple instances of itself as
258    // arguments.
259    SmallVector<BasicBlock*, 8> OuterLoopPreds;
260    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
261      if (PN->getIncomingValue(i) != PN ||
262          !L->contains(PN->getIncomingBlock(i))) {
263        // We can't split indirect control flow edges.
264        if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator())
265          return nullptr;
266        OuterLoopPreds.push_back(PN->getIncomingBlock(i));
267      }
268    }
269    LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
270  
271    // If ScalarEvolution is around and knows anything about values in
272    // this loop, tell it to forget them, because we're about to
273    // substantially change it.
274    if (SE)
275      SE->forgetLoop(L);
276  
277    BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
278                                               DT, LI, MSSAU, PreserveLCSSA);
279  
280    // Make sure that NewBB is put someplace intelligent, which doesn't mess up
281    // code layout too horribly.
282    placeSplitBlockCarefully(NewBB, OuterLoopPreds, L);
283  
284    // Create the new outer loop.
285    Loop *NewOuter = LI->AllocateLoop();
286  
287    // Change the parent loop to use the outer loop as its child now.
288    if (Loop *Parent = L->getParentLoop())
289      Parent->replaceChildLoopWith(L, NewOuter);
290    else
291      LI->changeTopLevelLoop(L, NewOuter);
292  
293    // L is now a subloop of our outer loop.
294    NewOuter->addChildLoop(L);
295  
296    for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
297         I != E; ++I)
298      NewOuter->addBlockEntry(*I);
299  
300    // Now reset the header in L, which had been moved by
301    // SplitBlockPredecessors for the outer loop.
302    L->moveToHeader(Header);
303  
304    // Determine which blocks should stay in L and which should be moved out to
305    // the Outer loop now.
306    SmallPtrSet<BasicBlock *, 4> BlocksInL;
307    for (BasicBlock *P : predecessors(Header)) {
308      if (DT->dominates(Header, P))
309        addBlockAndPredsToSet(P, Header, BlocksInL);
310    }
311  
312    // Scan all of the loop children of L, moving them to OuterLoop if they are
313    // not part of the inner loop.
314    const std::vector<Loop*> &SubLoops = L->getSubLoops();
315    for (size_t I = 0; I != SubLoops.size(); )
316      if (BlocksInL.count(SubLoops[I]->getHeader()))
317        ++I;   // Loop remains in L
318      else
319        NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
320  
321    SmallVector<BasicBlock *, 8> OuterLoopBlocks;
322    OuterLoopBlocks.push_back(NewBB);
323    // Now that we know which blocks are in L and which need to be moved to
324    // OuterLoop, move any blocks that need it.
325    for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
326      BasicBlock *BB = L->getBlocks()[i];
327      if (!BlocksInL.count(BB)) {
328        // Move this block to the parent, updating the exit blocks sets
329        L->removeBlockFromLoop(BB);
330        if ((*LI)[BB] == L) {
331          LI->changeLoopFor(BB, NewOuter);
332          OuterLoopBlocks.push_back(BB);
333        }
334        --i;
335      }
336    }
337  
338    // Split edges to exit blocks from the inner loop, if they emerged in the
339    // process of separating the outer one.
340    formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA);
341  
342    if (PreserveLCSSA) {
343      // Fix LCSSA form for L. Some values, which previously were only used inside
344      // L, can now be used in NewOuter loop. We need to insert phi-nodes for them
345      // in corresponding exit blocks.
346      // We don't need to form LCSSA recursively, because there cannot be uses
347      // inside a newly created loop of defs from inner loops as those would
348      // already be a use of an LCSSA phi node.
349      formLCSSA(*L, *DT, LI, SE);
350  
351      assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) &&
352             "LCSSA is broken after separating nested loops!");
353    }
354  
355    return NewOuter;
356  }
357  
358  /// This method is called when the specified loop has more than one
359  /// backedge in it.
360  ///
361  /// If this occurs, revector all of these backedges to target a new basic block
362  /// and have that block branch to the loop header.  This ensures that loops
363  /// have exactly one backedge.
364  static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
365                                               DominatorTree *DT, LoopInfo *LI,
366                                               MemorySSAUpdater *MSSAU) {
367    assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
368  
369    // Get information about the loop
370    BasicBlock *Header = L->getHeader();
371    Function *F = Header->getParent();
372  
373    // Unique backedge insertion currently depends on having a preheader.
374    if (!Preheader)
375      return nullptr;
376  
377    // The header is not an EH pad; preheader insertion should ensure this.
378    assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
379  
380    // Figure out which basic blocks contain back-edges to the loop header.
381    std::vector<BasicBlock*> BackedgeBlocks;
382    for (BasicBlock *P : predecessors(Header)) {
383      // Indirect edges cannot be split, so we must fail if we find one.
384      if (P->getTerminator()->isIndirectTerminator())
385        return nullptr;
386  
387      if (P != Preheader) BackedgeBlocks.push_back(P);
388    }
389  
390    // Create and insert the new backedge block...
391    BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
392                                             Header->getName() + ".backedge", F);
393    BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
394    BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
395  
396    LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
397                      << BEBlock->getName() << "\n");
398  
399    // Move the new backedge block to right after the last backedge block.
400    Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator();
401    F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
402  
403    // Now that the block has been inserted into the function, create PHI nodes in
404    // the backedge block which correspond to any PHI nodes in the header block.
405    for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
406      PHINode *PN = cast<PHINode>(I);
407      PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
408                                       PN->getName()+".be", BETerminator);
409  
410      // Loop over the PHI node, moving all entries except the one for the
411      // preheader over to the new PHI node.
412      unsigned PreheaderIdx = ~0U;
413      bool HasUniqueIncomingValue = true;
414      Value *UniqueValue = nullptr;
415      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
416        BasicBlock *IBB = PN->getIncomingBlock(i);
417        Value *IV = PN->getIncomingValue(i);
418        if (IBB == Preheader) {
419          PreheaderIdx = i;
420        } else {
421          NewPN->addIncoming(IV, IBB);
422          if (HasUniqueIncomingValue) {
423            if (!UniqueValue)
424              UniqueValue = IV;
425            else if (UniqueValue != IV)
426              HasUniqueIncomingValue = false;
427          }
428        }
429      }
430  
431      // Delete all of the incoming values from the old PN except the preheader's
432      assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
433      if (PreheaderIdx != 0) {
434        PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
435        PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
436      }
437      // Nuke all entries except the zero'th.
438      for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
439        PN->removeIncomingValue(e-i, false);
440  
441      // Finally, add the newly constructed PHI node as the entry for the BEBlock.
442      PN->addIncoming(NewPN, BEBlock);
443  
444      // As an optimization, if all incoming values in the new PhiNode (which is a
445      // subset of the incoming values of the old PHI node) have the same value,
446      // eliminate the PHI Node.
447      if (HasUniqueIncomingValue) {
448        NewPN->replaceAllUsesWith(UniqueValue);
449        BEBlock->getInstList().erase(NewPN);
450      }
451    }
452  
453    // Now that all of the PHI nodes have been inserted and adjusted, modify the
454    // backedge blocks to jump to the BEBlock instead of the header.
455    // If one of the backedges has llvm.loop metadata attached, we remove
456    // it from the backedge and add it to BEBlock.
457    unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop");
458    MDNode *LoopMD = nullptr;
459    for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
460      Instruction *TI = BackedgeBlocks[i]->getTerminator();
461      if (!LoopMD)
462        LoopMD = TI->getMetadata(LoopMDKind);
463      TI->setMetadata(LoopMDKind, nullptr);
464      TI->replaceSuccessorWith(Header, BEBlock);
465    }
466    BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD);
467  
468    //===--- Update all analyses which we must preserve now -----------------===//
469  
470    // Update Loop Information - we know that this block is now in the current
471    // loop and all parent loops.
472    L->addBasicBlockToLoop(BEBlock, *LI);
473  
474    // Update dominator information
475    DT->splitBlock(BEBlock);
476  
477    if (MSSAU)
478      MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(Header, Preheader,
479                                                        BEBlock);
480  
481    return BEBlock;
482  }
483  
484  /// Simplify one loop and queue further loops for simplification.
485  static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
486                              DominatorTree *DT, LoopInfo *LI,
487                              ScalarEvolution *SE, AssumptionCache *AC,
488                              MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
489    bool Changed = false;
490    if (MSSAU && VerifyMemorySSA)
491      MSSAU->getMemorySSA()->verifyMemorySSA();
492  
493  ReprocessLoop:
494  
495    // Check to see that no blocks (other than the header) in this loop have
496    // predecessors that are not in the loop.  This is not valid for natural
497    // loops, but can occur if the blocks are unreachable.  Since they are
498    // unreachable we can just shamelessly delete those CFG edges!
499    for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
500         BB != E; ++BB) {
501      if (*BB == L->getHeader()) continue;
502  
503      SmallPtrSet<BasicBlock*, 4> BadPreds;
504      for (BasicBlock *P : predecessors(*BB))
505        if (!L->contains(P))
506          BadPreds.insert(P);
507  
508      // Delete each unique out-of-loop (and thus dead) predecessor.
509      for (BasicBlock *P : BadPreds) {
510  
511        LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
512                          << P->getName() << "\n");
513  
514        // Zap the dead pred's terminator and replace it with unreachable.
515        Instruction *TI = P->getTerminator();
516        changeToUnreachable(TI, PreserveLCSSA,
517                            /*DTU=*/nullptr, MSSAU);
518        Changed = true;
519      }
520    }
521  
522    if (MSSAU && VerifyMemorySSA)
523      MSSAU->getMemorySSA()->verifyMemorySSA();
524  
525    // If there are exiting blocks with branches on undef, resolve the undef in
526    // the direction which will exit the loop. This will help simplify loop
527    // trip count computations.
528    SmallVector<BasicBlock*, 8> ExitingBlocks;
529    L->getExitingBlocks(ExitingBlocks);
530    for (BasicBlock *ExitingBlock : ExitingBlocks)
531      if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()))
532        if (BI->isConditional()) {
533          if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
534  
535            LLVM_DEBUG(dbgs()
536                       << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
537                       << ExitingBlock->getName() << "\n");
538  
539            BI->setCondition(ConstantInt::get(Cond->getType(),
540                                              !L->contains(BI->getSuccessor(0))));
541  
542            Changed = true;
543          }
544        }
545  
546    // Does the loop already have a preheader?  If so, don't insert one.
547    BasicBlock *Preheader = L->getLoopPreheader();
548    if (!Preheader) {
549      Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA);
550      if (Preheader)
551        Changed = true;
552    }
553  
554    // Next, check to make sure that all exit nodes of the loop only have
555    // predecessors that are inside of the loop.  This check guarantees that the
556    // loop preheader/header will dominate the exit blocks.  If the exit block has
557    // predecessors from outside of the loop, split the edge now.
558    if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA))
559      Changed = true;
560  
561    if (MSSAU && VerifyMemorySSA)
562      MSSAU->getMemorySSA()->verifyMemorySSA();
563  
564    // If the header has more than two predecessors at this point (from the
565    // preheader and from multiple backedges), we must adjust the loop.
566    BasicBlock *LoopLatch = L->getLoopLatch();
567    if (!LoopLatch) {
568      // If this is really a nested loop, rip it out into a child loop.  Don't do
569      // this for loops with a giant number of backedges, just factor them into a
570      // common backedge instead.
571      if (L->getNumBackEdges() < 8) {
572        if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE,
573                                              PreserveLCSSA, AC, MSSAU)) {
574          ++NumNested;
575          // Enqueue the outer loop as it should be processed next in our
576          // depth-first nest walk.
577          Worklist.push_back(OuterL);
578  
579          // This is a big restructuring change, reprocess the whole loop.
580          Changed = true;
581          // GCC doesn't tail recursion eliminate this.
582          // FIXME: It isn't clear we can't rely on LLVM to TRE this.
583          goto ReprocessLoop;
584        }
585      }
586  
587      // If we either couldn't, or didn't want to, identify nesting of the loops,
588      // insert a new block that all backedges target, then make it jump to the
589      // loop header.
590      LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU);
591      if (LoopLatch)
592        Changed = true;
593    }
594  
595    if (MSSAU && VerifyMemorySSA)
596      MSSAU->getMemorySSA()->verifyMemorySSA();
597  
598    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
599  
600    // Scan over the PHI nodes in the loop header.  Since they now have only two
601    // incoming values (the loop is canonicalized), we may have simplified the PHI
602    // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
603    PHINode *PN;
604    for (BasicBlock::iterator I = L->getHeader()->begin();
605         (PN = dyn_cast<PHINode>(I++)); )
606      if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
607        if (SE) SE->forgetValue(PN);
608        if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) {
609          PN->replaceAllUsesWith(V);
610          PN->eraseFromParent();
611          Changed = true;
612        }
613      }
614  
615    // If this loop has multiple exits and the exits all go to the same
616    // block, attempt to merge the exits. This helps several passes, such
617    // as LoopRotation, which do not support loops with multiple exits.
618    // SimplifyCFG also does this (and this code uses the same utility
619    // function), however this code is loop-aware, where SimplifyCFG is
620    // not. That gives it the advantage of being able to hoist
621    // loop-invariant instructions out of the way to open up more
622    // opportunities, and the disadvantage of having the responsibility
623    // to preserve dominator information.
624    auto HasUniqueExitBlock = [&]() {
625      BasicBlock *UniqueExit = nullptr;
626      for (auto *ExitingBB : ExitingBlocks)
627        for (auto *SuccBB : successors(ExitingBB)) {
628          if (L->contains(SuccBB))
629            continue;
630  
631          if (!UniqueExit)
632            UniqueExit = SuccBB;
633          else if (UniqueExit != SuccBB)
634            return false;
635        }
636  
637      return true;
638    };
639    if (HasUniqueExitBlock()) {
640      for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
641        BasicBlock *ExitingBlock = ExitingBlocks[i];
642        if (!ExitingBlock->getSinglePredecessor()) continue;
643        BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
644        if (!BI || !BI->isConditional()) continue;
645        CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
646        if (!CI || CI->getParent() != ExitingBlock) continue;
647  
648        // Attempt to hoist out all instructions except for the
649        // comparison and the branch.
650        bool AllInvariant = true;
651        bool AnyInvariant = false;
652        for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) {
653          Instruction *Inst = &*I++;
654          if (Inst == CI)
655            continue;
656          if (!L->makeLoopInvariant(
657                  Inst, AnyInvariant,
658                  Preheader ? Preheader->getTerminator() : nullptr, MSSAU)) {
659            AllInvariant = false;
660            break;
661          }
662        }
663        if (AnyInvariant) {
664          Changed = true;
665          // The loop disposition of all SCEV expressions that depend on any
666          // hoisted values have also changed.
667          if (SE)
668            SE->forgetLoopDispositions(L);
669        }
670        if (!AllInvariant) continue;
671  
672        // The block has now been cleared of all instructions except for
673        // a comparison and a conditional branch. SimplifyCFG may be able
674        // to fold it now.
675        if (!FoldBranchToCommonDest(BI, /*DTU=*/nullptr, MSSAU))
676          continue;
677  
678        // Success. The block is now dead, so remove it from the loop,
679        // update the dominator tree and delete it.
680        LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
681                          << ExitingBlock->getName() << "\n");
682  
683        assert(pred_empty(ExitingBlock));
684        Changed = true;
685        LI->removeBlock(ExitingBlock);
686  
687        DomTreeNode *Node = DT->getNode(ExitingBlock);
688        while (!Node->isLeaf()) {
689          DomTreeNode *Child = Node->back();
690          DT->changeImmediateDominator(Child, Node->getIDom());
691        }
692        DT->eraseNode(ExitingBlock);
693        if (MSSAU) {
694          SmallSetVector<BasicBlock *, 8> ExitBlockSet;
695          ExitBlockSet.insert(ExitingBlock);
696          MSSAU->removeBlocks(ExitBlockSet);
697        }
698  
699        BI->getSuccessor(0)->removePredecessor(
700            ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
701        BI->getSuccessor(1)->removePredecessor(
702            ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
703        ExitingBlock->eraseFromParent();
704      }
705    }
706  
707    // Changing exit conditions for blocks may affect exit counts of this loop and
708    // any of its paretns, so we must invalidate the entire subtree if we've made
709    // any changes.
710    if (Changed && SE)
711      SE->forgetTopmostLoop(L);
712  
713    if (MSSAU && VerifyMemorySSA)
714      MSSAU->getMemorySSA()->verifyMemorySSA();
715  
716    return Changed;
717  }
718  
719  bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
720                          ScalarEvolution *SE, AssumptionCache *AC,
721                          MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
722    bool Changed = false;
723  
724  #ifndef NDEBUG
725    // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA
726    // form.
727    if (PreserveLCSSA) {
728      assert(DT && "DT not available.");
729      assert(LI && "LI not available.");
730      assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
731             "Requested to preserve LCSSA, but it's already broken.");
732    }
733  #endif
734  
735    // Worklist maintains our depth-first queue of loops in this nest to process.
736    SmallVector<Loop *, 4> Worklist;
737    Worklist.push_back(L);
738  
739    // Walk the worklist from front to back, pushing newly found sub loops onto
740    // the back. This will let us process loops from back to front in depth-first
741    // order. We can use this simple process because loops form a tree.
742    for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
743      Loop *L2 = Worklist[Idx];
744      Worklist.append(L2->begin(), L2->end());
745    }
746  
747    while (!Worklist.empty())
748      Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
749                                 AC, MSSAU, PreserveLCSSA);
750  
751    return Changed;
752  }
753  
754  namespace {
755    struct LoopSimplify : public FunctionPass {
756      static char ID; // Pass identification, replacement for typeid
757      LoopSimplify() : FunctionPass(ID) {
758        initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
759      }
760  
761      bool runOnFunction(Function &F) override;
762  
763      void getAnalysisUsage(AnalysisUsage &AU) const override {
764        AU.addRequired<AssumptionCacheTracker>();
765  
766        // We need loop information to identify the loops...
767        AU.addRequired<DominatorTreeWrapperPass>();
768        AU.addPreserved<DominatorTreeWrapperPass>();
769  
770        AU.addRequired<LoopInfoWrapperPass>();
771        AU.addPreserved<LoopInfoWrapperPass>();
772  
773        AU.addPreserved<BasicAAWrapperPass>();
774        AU.addPreserved<AAResultsWrapperPass>();
775        AU.addPreserved<GlobalsAAWrapperPass>();
776        AU.addPreserved<ScalarEvolutionWrapperPass>();
777        AU.addPreserved<SCEVAAWrapperPass>();
778        AU.addPreservedID(LCSSAID);
779        AU.addPreserved<DependenceAnalysisWrapperPass>();
780        AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
781        AU.addPreserved<BranchProbabilityInfoWrapperPass>();
782        if (EnableMSSALoopDependency)
783          AU.addPreserved<MemorySSAWrapperPass>();
784      }
785  
786      /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
787      void verifyAnalysis() const override;
788    };
789  }
790  
791  char LoopSimplify::ID = 0;
792  INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
793                  "Canonicalize natural loops", false, false)
794  INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
795  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
796  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
797  INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
798                  "Canonicalize natural loops", false, false)
799  
800  // Publicly exposed interface to pass...
801  char &llvm::LoopSimplifyID = LoopSimplify::ID;
802  Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
803  
804  /// runOnFunction - Run down all loops in the CFG (recursively, but we could do
805  /// it in any convenient order) inserting preheaders...
806  ///
807  bool LoopSimplify::runOnFunction(Function &F) {
808    bool Changed = false;
809    LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
810    DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
811    auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
812    ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr;
813    AssumptionCache *AC =
814        &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
815    MemorySSA *MSSA = nullptr;
816    std::unique_ptr<MemorySSAUpdater> MSSAU;
817    if (EnableMSSALoopDependency) {
818      auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
819      if (MSSAAnalysis) {
820        MSSA = &MSSAAnalysis->getMSSA();
821        MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
822      }
823    }
824  
825    bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
826  
827    // Simplify each loop nest in the function.
828    for (auto *L : *LI)
829      Changed |= simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA);
830  
831  #ifndef NDEBUG
832    if (PreserveLCSSA) {
833      bool InLCSSA = all_of(
834          *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); });
835      assert(InLCSSA && "LCSSA is broken after loop-simplify.");
836    }
837  #endif
838    return Changed;
839  }
840  
841  PreservedAnalyses LoopSimplifyPass::run(Function &F,
842                                          FunctionAnalysisManager &AM) {
843    bool Changed = false;
844    LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
845    DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
846    ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
847    AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
848    auto *MSSAAnalysis = AM.getCachedResult<MemorySSAAnalysis>(F);
849    std::unique_ptr<MemorySSAUpdater> MSSAU;
850    if (MSSAAnalysis) {
851      auto *MSSA = &MSSAAnalysis->getMSSA();
852      MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
853    }
854  
855  
856    // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
857    // after simplifying the loops. MemorySSA is preserved if it exists.
858    for (auto *L : *LI)
859      Changed |=
860          simplifyLoop(L, DT, LI, SE, AC, MSSAU.get(), /*PreserveLCSSA*/ false);
861  
862    if (!Changed)
863      return PreservedAnalyses::all();
864  
865    PreservedAnalyses PA;
866    PA.preserve<DominatorTreeAnalysis>();
867    PA.preserve<LoopAnalysis>();
868    PA.preserve<ScalarEvolutionAnalysis>();
869    PA.preserve<DependenceAnalysis>();
870    if (MSSAAnalysis)
871      PA.preserve<MemorySSAAnalysis>();
872    // BPI maps conditional terminators to probabilities, LoopSimplify can insert
873    // blocks, but it does so only by splitting existing blocks and edges. This
874    // results in the interesting property that all new terminators inserted are
875    // unconditional branches which do not appear in BPI. All deletions are
876    // handled via ValueHandle callbacks w/in BPI.
877    PA.preserve<BranchProbabilityAnalysis>();
878    return PA;
879  }
880  
881  // FIXME: Restore this code when we re-enable verification in verifyAnalysis
882  // below.
883  #if 0
884  static void verifyLoop(Loop *L) {
885    // Verify subloops.
886    for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
887      verifyLoop(*I);
888  
889    // It used to be possible to just assert L->isLoopSimplifyForm(), however
890    // with the introduction of indirectbr, there are now cases where it's
891    // not possible to transform a loop as necessary. We can at least check
892    // that there is an indirectbr near any time there's trouble.
893  
894    // Indirectbr can interfere with preheader and unique backedge insertion.
895    if (!L->getLoopPreheader() || !L->getLoopLatch()) {
896      bool HasIndBrPred = false;
897      for (BasicBlock *Pred : predecessors(L->getHeader()))
898        if (isa<IndirectBrInst>(Pred->getTerminator())) {
899          HasIndBrPred = true;
900          break;
901        }
902      assert(HasIndBrPred &&
903             "LoopSimplify has no excuse for missing loop header info!");
904      (void)HasIndBrPred;
905    }
906  
907    // Indirectbr can interfere with exit block canonicalization.
908    if (!L->hasDedicatedExits()) {
909      bool HasIndBrExiting = false;
910      SmallVector<BasicBlock*, 8> ExitingBlocks;
911      L->getExitingBlocks(ExitingBlocks);
912      for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
913        if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
914          HasIndBrExiting = true;
915          break;
916        }
917      }
918  
919      assert(HasIndBrExiting &&
920             "LoopSimplify has no excuse for missing exit block info!");
921      (void)HasIndBrExiting;
922    }
923  }
924  #endif
925  
926  void LoopSimplify::verifyAnalysis() const {
927    // FIXME: This routine is being called mid-way through the loop pass manager
928    // as loop passes destroy this analysis. That's actually fine, but we have no
929    // way of expressing that here. Once all of the passes that destroy this are
930    // hoisted out of the loop pass manager we can add back verification here.
931  #if 0
932    for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
933      verifyLoop(*I);
934  #endif
935  }
936