xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp (revision 723da5d92f40a413585107f8455280ea575fe410)
1  //===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file implements the Dead Loop Deletion Pass. This pass is responsible
10  // for eliminating loops with non-infinite computable trip counts that have no
11  // side effects or volatile instructions, and do not contribute to the
12  // computation of the function's return value.
13  //
14  //===----------------------------------------------------------------------===//
15  
16  #include "llvm/Transforms/Scalar/LoopDeletion.h"
17  #include "llvm/ADT/SmallVector.h"
18  #include "llvm/ADT/Statistic.h"
19  #include "llvm/Analysis/CFG.h"
20  #include "llvm/Analysis/InstructionSimplify.h"
21  #include "llvm/Analysis/LoopIterator.h"
22  #include "llvm/Analysis/LoopPass.h"
23  #include "llvm/Analysis/MemorySSA.h"
24  #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25  #include "llvm/Analysis/ScalarEvolution.h"
26  #include "llvm/IR/Dominators.h"
27  
28  #include "llvm/IR/PatternMatch.h"
29  #include "llvm/Transforms/Scalar/LoopPassManager.h"
30  #include "llvm/Transforms/Utils/LoopUtils.h"
31  
32  using namespace llvm;
33  
34  #define DEBUG_TYPE "loop-delete"
35  
36  STATISTIC(NumDeleted, "Number of loops deleted");
37  STATISTIC(NumBackedgesBroken,
38            "Number of loops for which we managed to break the backedge");
39  
40  static cl::opt<bool> EnableSymbolicExecution(
41      "loop-deletion-enable-symbolic-execution", cl::Hidden, cl::init(true),
42      cl::desc("Break backedge through symbolic execution of 1st iteration "
43               "attempting to prove that the backedge is never taken"));
44  
45  enum class LoopDeletionResult {
46    Unmodified,
47    Modified,
48    Deleted,
49  };
50  
51  static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) {
52    if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted)
53      return LoopDeletionResult::Deleted;
54    if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified)
55      return LoopDeletionResult::Modified;
56    return LoopDeletionResult::Unmodified;
57  }
58  
59  /// Determines if a loop is dead.
60  ///
61  /// This assumes that we've already checked for unique exit and exiting blocks,
62  /// and that the code is in LCSSA form.
63  static bool isLoopDead(Loop *L, ScalarEvolution &SE,
64                         SmallVectorImpl<BasicBlock *> &ExitingBlocks,
65                         BasicBlock *ExitBlock, bool &Changed,
66                         BasicBlock *Preheader, LoopInfo &LI) {
67    // Make sure that all PHI entries coming from the loop are loop invariant.
68    // Because the code is in LCSSA form, any values used outside of the loop
69    // must pass through a PHI in the exit block, meaning that this check is
70    // sufficient to guarantee that no loop-variant values are used outside
71    // of the loop.
72    bool AllEntriesInvariant = true;
73    bool AllOutgoingValuesSame = true;
74    if (ExitBlock) {
75      for (PHINode &P : ExitBlock->phis()) {
76        Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]);
77  
78        // Make sure all exiting blocks produce the same incoming value for the
79        // block. If there are different incoming values for different exiting
80        // blocks, then it is impossible to statically determine which value
81        // should be used.
82        AllOutgoingValuesSame =
83            all_of(ArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
84              return incoming == P.getIncomingValueForBlock(BB);
85            });
86  
87        if (!AllOutgoingValuesSame)
88          break;
89  
90        if (Instruction *I = dyn_cast<Instruction>(incoming)) {
91          if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator(),
92                                    /*MSSAU=*/nullptr, &SE)) {
93            AllEntriesInvariant = false;
94            break;
95          }
96        }
97      }
98    }
99  
100    if (!AllEntriesInvariant || !AllOutgoingValuesSame)
101      return false;
102  
103    // Make sure that no instructions in the block have potential side-effects.
104    // This includes instructions that could write to memory, and loads that are
105    // marked volatile.
106    for (const auto &I : L->blocks())
107      if (any_of(*I, [](Instruction &I) {
108            return I.mayHaveSideEffects() && !I.isDroppable();
109          }))
110        return false;
111  
112    // The loop or any of its sub-loops looping infinitely is legal. The loop can
113    // only be considered dead if either
114    // a. the function is mustprogress.
115    // b. all (sub-)loops are mustprogress or have a known trip-count.
116    if (L->getHeader()->getParent()->mustProgress())
117      return true;
118  
119    LoopBlocksRPO RPOT(L);
120    RPOT.perform(&LI);
121    // If the loop contains an irreducible cycle, it may loop infinitely.
122    if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
123      return false;
124  
125    SmallVector<Loop *, 8> WorkList;
126    WorkList.push_back(L);
127    while (!WorkList.empty()) {
128      Loop *Current = WorkList.pop_back_val();
129      if (hasMustProgress(Current))
130        continue;
131  
132      const SCEV *S = SE.getConstantMaxBackedgeTakenCount(Current);
133      if (isa<SCEVCouldNotCompute>(S)) {
134        LLVM_DEBUG(
135            dbgs() << "Could not compute SCEV MaxBackedgeTakenCount and was "
136                      "not required to make progress.\n");
137        return false;
138      }
139      WorkList.append(Current->begin(), Current->end());
140    }
141    return true;
142  }
143  
144  /// This function returns true if there is no viable path from the
145  /// entry block to the header of \p L. Right now, it only does
146  /// a local search to save compile time.
147  static bool isLoopNeverExecuted(Loop *L) {
148    using namespace PatternMatch;
149  
150    auto *Preheader = L->getLoopPreheader();
151    // TODO: We can relax this constraint, since we just need a loop
152    // predecessor.
153    assert(Preheader && "Needs preheader!");
154  
155    if (Preheader->isEntryBlock())
156      return false;
157    // All predecessors of the preheader should have a constant conditional
158    // branch, with the loop's preheader as not-taken.
159    for (auto *Pred: predecessors(Preheader)) {
160      BasicBlock *Taken, *NotTaken;
161      ConstantInt *Cond;
162      if (!match(Pred->getTerminator(),
163                 m_Br(m_ConstantInt(Cond), Taken, NotTaken)))
164        return false;
165      if (!Cond->getZExtValue())
166        std::swap(Taken, NotTaken);
167      if (Taken == Preheader)
168        return false;
169    }
170    assert(!pred_empty(Preheader) &&
171           "Preheader should have predecessors at this point!");
172    // All the predecessors have the loop preheader as not-taken target.
173    return true;
174  }
175  
176  static Value *
177  getValueOnFirstIteration(Value *V, DenseMap<Value *, Value *> &FirstIterValue,
178                           const SimplifyQuery &SQ) {
179    // Quick hack: do not flood cache with non-instruction values.
180    if (!isa<Instruction>(V))
181      return V;
182    // Do we already know cached result?
183    auto Existing = FirstIterValue.find(V);
184    if (Existing != FirstIterValue.end())
185      return Existing->second;
186    Value *FirstIterV = nullptr;
187    if (auto *BO = dyn_cast<BinaryOperator>(V)) {
188      Value *LHS =
189          getValueOnFirstIteration(BO->getOperand(0), FirstIterValue, SQ);
190      Value *RHS =
191          getValueOnFirstIteration(BO->getOperand(1), FirstIterValue, SQ);
192      FirstIterV = simplifyBinOp(BO->getOpcode(), LHS, RHS, SQ);
193    } else if (auto *Cmp = dyn_cast<ICmpInst>(V)) {
194      Value *LHS =
195          getValueOnFirstIteration(Cmp->getOperand(0), FirstIterValue, SQ);
196      Value *RHS =
197          getValueOnFirstIteration(Cmp->getOperand(1), FirstIterValue, SQ);
198      FirstIterV = simplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ);
199    } else if (auto *Select = dyn_cast<SelectInst>(V)) {
200      Value *Cond =
201          getValueOnFirstIteration(Select->getCondition(), FirstIterValue, SQ);
202      if (auto *C = dyn_cast<ConstantInt>(Cond)) {
203        auto *Selected = C->isAllOnesValue() ? Select->getTrueValue()
204                                             : Select->getFalseValue();
205        FirstIterV = getValueOnFirstIteration(Selected, FirstIterValue, SQ);
206      }
207    }
208    if (!FirstIterV)
209      FirstIterV = V;
210    FirstIterValue[V] = FirstIterV;
211    return FirstIterV;
212  }
213  
214  // Try to prove that one of conditions that dominates the latch must exit on 1st
215  // iteration.
216  static bool canProveExitOnFirstIteration(Loop *L, DominatorTree &DT,
217                                           LoopInfo &LI) {
218    // Disabled by option.
219    if (!EnableSymbolicExecution)
220      return false;
221  
222    BasicBlock *Predecessor = L->getLoopPredecessor();
223    BasicBlock *Latch = L->getLoopLatch();
224  
225    if (!Predecessor || !Latch)
226      return false;
227  
228    LoopBlocksRPO RPOT(L);
229    RPOT.perform(&LI);
230  
231    // For the optimization to be correct, we need RPOT to have a property that
232    // each block is processed after all its predecessors, which may only be
233    // violated for headers of the current loop and all nested loops. Irreducible
234    // CFG provides multiple ways to break this assumption, so we do not want to
235    // deal with it.
236    if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
237      return false;
238  
239    BasicBlock *Header = L->getHeader();
240    // Blocks that are reachable on the 1st iteration.
241    SmallPtrSet<BasicBlock *, 4> LiveBlocks;
242    // Edges that are reachable on the 1st iteration.
243    DenseSet<BasicBlockEdge> LiveEdges;
244    LiveBlocks.insert(Header);
245  
246    SmallPtrSet<BasicBlock *, 4> Visited;
247    auto MarkLiveEdge = [&](BasicBlock *From, BasicBlock *To) {
248      assert(LiveBlocks.count(From) && "Must be live!");
249      assert((LI.isLoopHeader(To) || !Visited.count(To)) &&
250             "Only canonical backedges are allowed. Irreducible CFG?");
251      assert((LiveBlocks.count(To) || !Visited.count(To)) &&
252             "We already discarded this block as dead!");
253      LiveBlocks.insert(To);
254      LiveEdges.insert({ From, To });
255    };
256  
257    auto MarkAllSuccessorsLive = [&](BasicBlock *BB) {
258      for (auto *Succ : successors(BB))
259        MarkLiveEdge(BB, Succ);
260    };
261  
262    // Check if there is only one value coming from all live predecessor blocks.
263    // Note that because we iterate in RPOT, we have already visited all its
264    // (non-latch) predecessors.
265    auto GetSoleInputOnFirstIteration = [&](PHINode & PN)->Value * {
266      BasicBlock *BB = PN.getParent();
267      bool HasLivePreds = false;
268      (void)HasLivePreds;
269      if (BB == Header)
270        return PN.getIncomingValueForBlock(Predecessor);
271      Value *OnlyInput = nullptr;
272      for (auto *Pred : predecessors(BB))
273        if (LiveEdges.count({ Pred, BB })) {
274          HasLivePreds = true;
275          Value *Incoming = PN.getIncomingValueForBlock(Pred);
276          // Skip poison. If they are present, we can assume they are equal to
277          // the non-poison input.
278          if (isa<PoisonValue>(Incoming))
279            continue;
280          // Two inputs.
281          if (OnlyInput && OnlyInput != Incoming)
282            return nullptr;
283          OnlyInput = Incoming;
284        }
285  
286      assert(HasLivePreds && "No live predecessors?");
287      // If all incoming live value were poison, return poison.
288      return OnlyInput ? OnlyInput : PoisonValue::get(PN.getType());
289    };
290    DenseMap<Value *, Value *> FirstIterValue;
291  
292    // Use the following algorithm to prove we never take the latch on the 1st
293    // iteration:
294    // 1. Traverse in topological order, so that whenever we visit a block, all
295    //    its predecessors are already visited.
296    // 2. If we can prove that the block may have only 1 predecessor on the 1st
297    //    iteration, map all its phis onto input from this predecessor.
298    // 3a. If we can prove which successor of out block is taken on the 1st
299    //     iteration, mark this successor live.
300    // 3b. If we cannot prove it, conservatively assume that all successors are
301    //     live.
302    auto &DL = Header->getDataLayout();
303    const SimplifyQuery SQ(DL);
304    for (auto *BB : RPOT) {
305      Visited.insert(BB);
306  
307      // This block is not reachable on the 1st iterations.
308      if (!LiveBlocks.count(BB))
309        continue;
310  
311      // Skip inner loops.
312      if (LI.getLoopFor(BB) != L) {
313        MarkAllSuccessorsLive(BB);
314        continue;
315      }
316  
317      // If Phi has only one input from all live input blocks, use it.
318      for (auto &PN : BB->phis()) {
319        if (!PN.getType()->isIntegerTy())
320          continue;
321        auto *Incoming = GetSoleInputOnFirstIteration(PN);
322        if (Incoming && DT.dominates(Incoming, BB->getTerminator())) {
323          Value *FirstIterV =
324              getValueOnFirstIteration(Incoming, FirstIterValue, SQ);
325          FirstIterValue[&PN] = FirstIterV;
326        }
327      }
328  
329      using namespace PatternMatch;
330      Value *Cond;
331      BasicBlock *IfTrue, *IfFalse;
332      auto *Term = BB->getTerminator();
333      if (match(Term, m_Br(m_Value(Cond),
334                           m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) {
335        auto *ICmp = dyn_cast<ICmpInst>(Cond);
336        if (!ICmp || !ICmp->getType()->isIntegerTy()) {
337          MarkAllSuccessorsLive(BB);
338          continue;
339        }
340  
341        // Can we prove constant true or false for this condition?
342        auto *KnownCondition = getValueOnFirstIteration(ICmp, FirstIterValue, SQ);
343        if (KnownCondition == ICmp) {
344          // Failed to simplify.
345          MarkAllSuccessorsLive(BB);
346          continue;
347        }
348        if (isa<UndefValue>(KnownCondition)) {
349          // TODO: According to langref, branching by undef is undefined behavior.
350          // It means that, theoretically, we should be able to just continue
351          // without marking any successors as live. However, we are not certain
352          // how correct our compiler is at handling such cases. So we are being
353          // very conservative here.
354          //
355          // If there is a non-loop successor, always assume this branch leaves the
356          // loop. Otherwise, arbitrarily take IfTrue.
357          //
358          // Once we are certain that branching by undef is handled correctly by
359          // other transforms, we should not mark any successors live here.
360          if (L->contains(IfTrue) && L->contains(IfFalse))
361            MarkLiveEdge(BB, IfTrue);
362          continue;
363        }
364        auto *ConstCondition = dyn_cast<ConstantInt>(KnownCondition);
365        if (!ConstCondition) {
366          // Non-constant condition, cannot analyze any further.
367          MarkAllSuccessorsLive(BB);
368          continue;
369        }
370        if (ConstCondition->isAllOnesValue())
371          MarkLiveEdge(BB, IfTrue);
372        else
373          MarkLiveEdge(BB, IfFalse);
374      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) {
375        auto *SwitchValue = SI->getCondition();
376        auto *SwitchValueOnFirstIter =
377            getValueOnFirstIteration(SwitchValue, FirstIterValue, SQ);
378        auto *ConstSwitchValue = dyn_cast<ConstantInt>(SwitchValueOnFirstIter);
379        if (!ConstSwitchValue) {
380          MarkAllSuccessorsLive(BB);
381          continue;
382        }
383        auto CaseIterator = SI->findCaseValue(ConstSwitchValue);
384        MarkLiveEdge(BB, CaseIterator->getCaseSuccessor());
385      } else {
386        MarkAllSuccessorsLive(BB);
387        continue;
388      }
389    }
390  
391    // We can break the latch if it wasn't live.
392    return !LiveEdges.count({ Latch, Header });
393  }
394  
395  /// If we can prove the backedge is untaken, remove it.  This destroys the
396  /// loop, but leaves the (now trivially loop invariant) control flow and
397  /// side effects (if any) in place.
398  static LoopDeletionResult
399  breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
400                          LoopInfo &LI, MemorySSA *MSSA,
401                          OptimizationRemarkEmitter &ORE) {
402    assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
403  
404    if (!L->getLoopLatch())
405      return LoopDeletionResult::Unmodified;
406  
407    auto *BTCMax = SE.getConstantMaxBackedgeTakenCount(L);
408    if (!BTCMax->isZero()) {
409      auto *BTC = SE.getBackedgeTakenCount(L);
410      if (!BTC->isZero()) {
411        if (!isa<SCEVCouldNotCompute>(BTC) && SE.isKnownNonZero(BTC))
412          return LoopDeletionResult::Unmodified;
413        if (!canProveExitOnFirstIteration(L, DT, LI))
414          return LoopDeletionResult::Unmodified;
415      }
416    }
417    ++NumBackedgesBroken;
418    breakLoopBackedge(L, DT, SE, LI, MSSA);
419    return LoopDeletionResult::Deleted;
420  }
421  
422  /// Remove a loop if it is dead.
423  ///
424  /// A loop is considered dead either if it does not impact the observable
425  /// behavior of the program other than finite running time, or if it is
426  /// required to make progress by an attribute such as 'mustprogress' or
427  /// 'llvm.loop.mustprogress' and does not make any. This may remove
428  /// infinite loops that have been required to make progress.
429  ///
430  /// This entire process relies pretty heavily on LoopSimplify form and LCSSA in
431  /// order to make various safety checks work.
432  ///
433  /// \returns true if any changes were made. This may mutate the loop even if it
434  /// is unable to delete it due to hoisting trivially loop invariant
435  /// instructions out of the loop.
436  static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
437                                             ScalarEvolution &SE, LoopInfo &LI,
438                                             MemorySSA *MSSA,
439                                             OptimizationRemarkEmitter &ORE) {
440    assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
441  
442    // We can only remove the loop if there is a preheader that we can branch from
443    // after removing it. Also, if LoopSimplify form is not available, stay out
444    // of trouble.
445    BasicBlock *Preheader = L->getLoopPreheader();
446    if (!Preheader || !L->hasDedicatedExits()) {
447      LLVM_DEBUG(
448          dbgs()
449          << "Deletion requires Loop with preheader and dedicated exits.\n");
450      return LoopDeletionResult::Unmodified;
451    }
452  
453    BasicBlock *ExitBlock = L->getUniqueExitBlock();
454  
455    // We can't directly branch to an EH pad. Don't bother handling this edge
456    // case.
457    if (ExitBlock && ExitBlock->isEHPad()) {
458      LLVM_DEBUG(dbgs() << "Cannot delete loop exiting to EH pad.\n");
459      return LoopDeletionResult::Unmodified;
460    }
461  
462    if (ExitBlock && isLoopNeverExecuted(L)) {
463      LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!\n");
464      // We need to forget the loop before setting the incoming values of the exit
465      // phis to poison, so we properly invalidate the SCEV expressions for those
466      // phis.
467      SE.forgetLoop(L);
468      // Set incoming value to poison for phi nodes in the exit block.
469      for (PHINode &P : ExitBlock->phis()) {
470        std::fill(P.incoming_values().begin(), P.incoming_values().end(),
471                  PoisonValue::get(P.getType()));
472      }
473      ORE.emit([&]() {
474        return OptimizationRemark(DEBUG_TYPE, "NeverExecutes", L->getStartLoc(),
475                                  L->getHeader())
476               << "Loop deleted because it never executes";
477      });
478      deleteDeadLoop(L, &DT, &SE, &LI, MSSA);
479      ++NumDeleted;
480      return LoopDeletionResult::Deleted;
481    }
482  
483    // The remaining checks below are for a loop being dead because all statements
484    // in the loop are invariant.
485    SmallVector<BasicBlock *, 4> ExitingBlocks;
486    L->getExitingBlocks(ExitingBlocks);
487  
488    // We require that the loop has at most one exit block. Otherwise, we'd be in
489    // the situation of needing to be able to solve statically which exit block
490    // will be branched to, or trying to preserve the branching logic in a loop
491    // invariant manner.
492    if (!ExitBlock && !L->hasNoExitBlocks()) {
493      LLVM_DEBUG(dbgs() << "Deletion requires at most one exit block.\n");
494      return LoopDeletionResult::Unmodified;
495    }
496  
497    // Finally, we have to check that the loop really is dead.
498    bool Changed = false;
499    if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader, LI)) {
500      LLVM_DEBUG(dbgs() << "Loop is not invariant, cannot delete.\n");
501      return Changed ? LoopDeletionResult::Modified
502                     : LoopDeletionResult::Unmodified;
503    }
504  
505    LLVM_DEBUG(dbgs() << "Loop is invariant, delete it!\n");
506    ORE.emit([&]() {
507      return OptimizationRemark(DEBUG_TYPE, "Invariant", L->getStartLoc(),
508                                L->getHeader())
509             << "Loop deleted because it is invariant";
510    });
511    deleteDeadLoop(L, &DT, &SE, &LI, MSSA);
512    ++NumDeleted;
513  
514    return LoopDeletionResult::Deleted;
515  }
516  
517  PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
518                                          LoopStandardAnalysisResults &AR,
519                                          LPMUpdater &Updater) {
520  
521    LLVM_DEBUG(dbgs() << "Analyzing Loop for deletion: ");
522    LLVM_DEBUG(L.dump());
523    std::string LoopName = std::string(L.getName());
524    // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
525    // pass. Function analyses need to be preserved across loop transformations
526    // but ORE cannot be preserved (see comment before the pass definition).
527    OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
528    auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE);
529  
530    // If we can prove the backedge isn't taken, just break it and be done.  This
531    // leaves the loop structure in place which means it can handle dispatching
532    // to the right exit based on whatever loop invariant structure remains.
533    if (Result != LoopDeletionResult::Deleted)
534      Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI,
535                                                     AR.MSSA, ORE));
536  
537    if (Result == LoopDeletionResult::Unmodified)
538      return PreservedAnalyses::all();
539  
540    if (Result == LoopDeletionResult::Deleted)
541      Updater.markLoopAsDeleted(L, LoopName);
542  
543    auto PA = getLoopPassPreservedAnalyses();
544    if (AR.MSSA)
545      PA.preserve<MemorySSAAnalysis>();
546    return PA;
547  }
548