xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopDeletion.cpp (revision f81cdf24ba5436367377f7c8e8f51f6df2a75ca7)
1 //===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the Dead Loop Deletion Pass. This pass is responsible
10 // for eliminating loops with non-infinite computable trip counts that have no
11 // side effects or volatile instructions, and do not contribute to the
12 // computation of the function's return value.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/Scalar/LoopDeletion.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/CFG.h"
20 #include "llvm/Analysis/InstructionSimplify.h"
21 #include "llvm/Analysis/LoopIterator.h"
22 #include "llvm/Analysis/LoopPass.h"
23 #include "llvm/Analysis/MemorySSA.h"
24 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
25 #include "llvm/Analysis/ScalarEvolution.h"
26 #include "llvm/IR/Dominators.h"
27 
28 #include "llvm/IR/PatternMatch.h"
29 #include "llvm/Transforms/Scalar/LoopPassManager.h"
30 #include "llvm/Transforms/Utils/LoopUtils.h"
31 
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "loop-delete"
35 
36 STATISTIC(NumDeleted, "Number of loops deleted");
37 STATISTIC(NumBackedgesBroken,
38           "Number of loops for which we managed to break the backedge");
39 
40 static cl::opt<bool> EnableSymbolicExecution(
41     "loop-deletion-enable-symbolic-execution", cl::Hidden, cl::init(true),
42     cl::desc("Break backedge through symbolic execution of 1st iteration "
43              "attempting to prove that the backedge is never taken"));
44 
45 enum class LoopDeletionResult {
46   Unmodified,
47   Modified,
48   Deleted,
49 };
50 
51 static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) {
52   if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted)
53     return LoopDeletionResult::Deleted;
54   if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified)
55     return LoopDeletionResult::Modified;
56   return LoopDeletionResult::Unmodified;
57 }
58 
59 /// Determines if a loop is dead.
60 ///
61 /// This assumes that we've already checked for unique exit and exiting blocks,
62 /// and that the code is in LCSSA form.
63 static bool isLoopDead(Loop *L, ScalarEvolution &SE,
64                        SmallVectorImpl<BasicBlock *> &ExitingBlocks,
65                        BasicBlock *ExitBlock, bool &Changed,
66                        BasicBlock *Preheader, LoopInfo &LI) {
67   // Make sure that all PHI entries coming from the loop are loop invariant.
68   // Because the code is in LCSSA form, any values used outside of the loop
69   // must pass through a PHI in the exit block, meaning that this check is
70   // sufficient to guarantee that no loop-variant values are used outside
71   // of the loop.
72   bool AllEntriesInvariant = true;
73   bool AllOutgoingValuesSame = true;
74   if (ExitBlock) {
75     for (PHINode &P : ExitBlock->phis()) {
76       Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]);
77 
78       // Make sure all exiting blocks produce the same incoming value for the
79       // block. If there are different incoming values for different exiting
80       // blocks, then it is impossible to statically determine which value
81       // should be used.
82       AllOutgoingValuesSame =
83           all_of(ArrayRef(ExitingBlocks).slice(1), [&](BasicBlock *BB) {
84             return incoming == P.getIncomingValueForBlock(BB);
85           });
86 
87       if (!AllOutgoingValuesSame)
88         break;
89 
90       if (Instruction *I = dyn_cast<Instruction>(incoming)) {
91         if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator(),
92                                   /*MSSAU=*/nullptr, &SE)) {
93           AllEntriesInvariant = false;
94           break;
95         }
96       }
97     }
98   }
99 
100   if (!AllEntriesInvariant || !AllOutgoingValuesSame)
101     return false;
102 
103   // Make sure that no instructions in the block have potential side-effects.
104   // This includes instructions that could write to memory, and loads that are
105   // marked volatile.
106   for (const auto &I : L->blocks())
107     if (any_of(*I, [](Instruction &I) {
108           return I.mayHaveSideEffects() && !I.isDroppable();
109         }))
110       return false;
111 
112   // The loop or any of its sub-loops looping infinitely is legal. The loop can
113   // only be considered dead if either
114   // a. the function is mustprogress.
115   // b. all (sub-)loops are mustprogress or have a known trip-count.
116   if (L->getHeader()->getParent()->mustProgress())
117     return true;
118 
119   LoopBlocksRPO RPOT(L);
120   RPOT.perform(&LI);
121   // If the loop contains an irreducible cycle, it may loop infinitely.
122   if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
123     return false;
124 
125   SmallVector<Loop *, 8> WorkList;
126   WorkList.push_back(L);
127   while (!WorkList.empty()) {
128     Loop *Current = WorkList.pop_back_val();
129     if (hasMustProgress(Current))
130       continue;
131 
132     const SCEV *S = SE.getConstantMaxBackedgeTakenCount(Current);
133     if (isa<SCEVCouldNotCompute>(S)) {
134       LLVM_DEBUG(
135           dbgs() << "Could not compute SCEV MaxBackedgeTakenCount and was "
136                     "not required to make progress.\n");
137       return false;
138     }
139     WorkList.append(Current->begin(), Current->end());
140   }
141   return true;
142 }
143 
144 /// This function returns true if there is no viable path from the
145 /// entry block to the header of \p L. Right now, it only does
146 /// a local search to save compile time.
147 static bool isLoopNeverExecuted(Loop *L) {
148   using namespace PatternMatch;
149 
150   auto *Preheader = L->getLoopPreheader();
151   // TODO: We can relax this constraint, since we just need a loop
152   // predecessor.
153   assert(Preheader && "Needs preheader!");
154 
155   if (Preheader->isEntryBlock())
156     return false;
157   // All predecessors of the preheader should have a constant conditional
158   // branch, with the loop's preheader as not-taken.
159   for (auto *Pred: predecessors(Preheader)) {
160     BasicBlock *Taken, *NotTaken;
161     ConstantInt *Cond;
162     if (!match(Pred->getTerminator(),
163                m_Br(m_ConstantInt(Cond), Taken, NotTaken)))
164       return false;
165     if (!Cond->getZExtValue())
166       std::swap(Taken, NotTaken);
167     if (Taken == Preheader)
168       return false;
169   }
170   assert(!pred_empty(Preheader) &&
171          "Preheader should have predecessors at this point!");
172   // All the predecessors have the loop preheader as not-taken target.
173   return true;
174 }
175 
176 static Value *
177 getValueOnFirstIteration(Value *V, DenseMap<Value *, Value *> &FirstIterValue,
178                          const SimplifyQuery &SQ) {
179   // Quick hack: do not flood cache with non-instruction values.
180   if (!isa<Instruction>(V))
181     return V;
182   // Do we already know cached result?
183   auto Existing = FirstIterValue.find(V);
184   if (Existing != FirstIterValue.end())
185     return Existing->second;
186   Value *FirstIterV = nullptr;
187   if (auto *BO = dyn_cast<BinaryOperator>(V)) {
188     Value *LHS =
189         getValueOnFirstIteration(BO->getOperand(0), FirstIterValue, SQ);
190     Value *RHS =
191         getValueOnFirstIteration(BO->getOperand(1), FirstIterValue, SQ);
192     FirstIterV = simplifyBinOp(BO->getOpcode(), LHS, RHS, SQ);
193   } else if (auto *Cmp = dyn_cast<ICmpInst>(V)) {
194     Value *LHS =
195         getValueOnFirstIteration(Cmp->getOperand(0), FirstIterValue, SQ);
196     Value *RHS =
197         getValueOnFirstIteration(Cmp->getOperand(1), FirstIterValue, SQ);
198     FirstIterV = simplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ);
199   } else if (auto *Select = dyn_cast<SelectInst>(V)) {
200     Value *Cond =
201         getValueOnFirstIteration(Select->getCondition(), FirstIterValue, SQ);
202     if (auto *C = dyn_cast<ConstantInt>(Cond)) {
203       auto *Selected = C->isAllOnesValue() ? Select->getTrueValue()
204                                            : Select->getFalseValue();
205       FirstIterV = getValueOnFirstIteration(Selected, FirstIterValue, SQ);
206     }
207   }
208   if (!FirstIterV)
209     FirstIterV = V;
210   FirstIterValue[V] = FirstIterV;
211   return FirstIterV;
212 }
213 
214 // Try to prove that one of conditions that dominates the latch must exit on 1st
215 // iteration.
216 static bool canProveExitOnFirstIteration(Loop *L, DominatorTree &DT,
217                                          LoopInfo &LI) {
218   // Disabled by option.
219   if (!EnableSymbolicExecution)
220     return false;
221 
222   BasicBlock *Predecessor = L->getLoopPredecessor();
223   BasicBlock *Latch = L->getLoopLatch();
224 
225   if (!Predecessor || !Latch)
226     return false;
227 
228   LoopBlocksRPO RPOT(L);
229   RPOT.perform(&LI);
230 
231   // For the optimization to be correct, we need RPOT to have a property that
232   // each block is processed after all its predecessors, which may only be
233   // violated for headers of the current loop and all nested loops. Irreducible
234   // CFG provides multiple ways to break this assumption, so we do not want to
235   // deal with it.
236   if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
237     return false;
238 
239   BasicBlock *Header = L->getHeader();
240   // Blocks that are reachable on the 1st iteration.
241   SmallPtrSet<BasicBlock *, 4> LiveBlocks;
242   // Edges that are reachable on the 1st iteration.
243   DenseSet<BasicBlockEdge> LiveEdges;
244   LiveBlocks.insert(Header);
245 
246   SmallPtrSet<BasicBlock *, 4> Visited;
247   auto MarkLiveEdge = [&](BasicBlock *From, BasicBlock *To) {
248     assert(LiveBlocks.count(From) && "Must be live!");
249     assert((LI.isLoopHeader(To) || !Visited.count(To)) &&
250            "Only canonical backedges are allowed. Irreducible CFG?");
251     assert((LiveBlocks.count(To) || !Visited.count(To)) &&
252            "We already discarded this block as dead!");
253     LiveBlocks.insert(To);
254     LiveEdges.insert({ From, To });
255   };
256 
257   auto MarkAllSuccessorsLive = [&](BasicBlock *BB) {
258     for (auto *Succ : successors(BB))
259       MarkLiveEdge(BB, Succ);
260   };
261 
262   // Check if there is only one value coming from all live predecessor blocks.
263   // Note that because we iterate in RPOT, we have already visited all its
264   // (non-latch) predecessors.
265   auto GetSoleInputOnFirstIteration = [&](PHINode & PN)->Value * {
266     BasicBlock *BB = PN.getParent();
267     bool HasLivePreds = false;
268     (void)HasLivePreds;
269     if (BB == Header)
270       return PN.getIncomingValueForBlock(Predecessor);
271     Value *OnlyInput = nullptr;
272     for (auto *Pred : predecessors(BB))
273       if (LiveEdges.count({ Pred, BB })) {
274         HasLivePreds = true;
275         Value *Incoming = PN.getIncomingValueForBlock(Pred);
276         // Skip undefs. If they are present, we can assume they are equal to
277         // the non-undef input.
278         if (isa<UndefValue>(Incoming))
279           continue;
280         // Two inputs.
281         if (OnlyInput && OnlyInput != Incoming)
282           return nullptr;
283         OnlyInput = Incoming;
284       }
285 
286     assert(HasLivePreds && "No live predecessors?");
287     // If all incoming live value were undefs, return undef.
288     return OnlyInput ? OnlyInput : UndefValue::get(PN.getType());
289   };
290   DenseMap<Value *, Value *> FirstIterValue;
291 
292   // Use the following algorithm to prove we never take the latch on the 1st
293   // iteration:
294   // 1. Traverse in topological order, so that whenever we visit a block, all
295   //    its predecessors are already visited.
296   // 2. If we can prove that the block may have only 1 predecessor on the 1st
297   //    iteration, map all its phis onto input from this predecessor.
298   // 3a. If we can prove which successor of out block is taken on the 1st
299   //     iteration, mark this successor live.
300   // 3b. If we cannot prove it, conservatively assume that all successors are
301   //     live.
302   auto &DL = Header->getModule()->getDataLayout();
303   const SimplifyQuery SQ(DL);
304   for (auto *BB : RPOT) {
305     Visited.insert(BB);
306 
307     // This block is not reachable on the 1st iterations.
308     if (!LiveBlocks.count(BB))
309       continue;
310 
311     // Skip inner loops.
312     if (LI.getLoopFor(BB) != L) {
313       MarkAllSuccessorsLive(BB);
314       continue;
315     }
316 
317     // If Phi has only one input from all live input blocks, use it.
318     for (auto &PN : BB->phis()) {
319       if (!PN.getType()->isIntegerTy())
320         continue;
321       auto *Incoming = GetSoleInputOnFirstIteration(PN);
322       if (Incoming && DT.dominates(Incoming, BB->getTerminator())) {
323         Value *FirstIterV =
324             getValueOnFirstIteration(Incoming, FirstIterValue, SQ);
325         FirstIterValue[&PN] = FirstIterV;
326       }
327     }
328 
329     using namespace PatternMatch;
330     Value *Cond;
331     BasicBlock *IfTrue, *IfFalse;
332     auto *Term = BB->getTerminator();
333     if (match(Term, m_Br(m_Value(Cond),
334                          m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) {
335       auto *ICmp = dyn_cast<ICmpInst>(Cond);
336       if (!ICmp || !ICmp->getType()->isIntegerTy()) {
337         MarkAllSuccessorsLive(BB);
338         continue;
339       }
340 
341       // Can we prove constant true or false for this condition?
342       auto *KnownCondition = getValueOnFirstIteration(ICmp, FirstIterValue, SQ);
343       if (KnownCondition == ICmp) {
344         // Failed to simplify.
345         MarkAllSuccessorsLive(BB);
346         continue;
347       }
348       if (isa<UndefValue>(KnownCondition)) {
349         // TODO: According to langref, branching by undef is undefined behavior.
350         // It means that, theoretically, we should be able to just continue
351         // without marking any successors as live. However, we are not certain
352         // how correct our compiler is at handling such cases. So we are being
353         // very conservative here.
354         //
355         // If there is a non-loop successor, always assume this branch leaves the
356         // loop. Otherwise, arbitrarily take IfTrue.
357         //
358         // Once we are certain that branching by undef is handled correctly by
359         // other transforms, we should not mark any successors live here.
360         if (L->contains(IfTrue) && L->contains(IfFalse))
361           MarkLiveEdge(BB, IfTrue);
362         continue;
363       }
364       auto *ConstCondition = dyn_cast<ConstantInt>(KnownCondition);
365       if (!ConstCondition) {
366         // Non-constant condition, cannot analyze any further.
367         MarkAllSuccessorsLive(BB);
368         continue;
369       }
370       if (ConstCondition->isAllOnesValue())
371         MarkLiveEdge(BB, IfTrue);
372       else
373         MarkLiveEdge(BB, IfFalse);
374     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) {
375       auto *SwitchValue = SI->getCondition();
376       auto *SwitchValueOnFirstIter =
377           getValueOnFirstIteration(SwitchValue, FirstIterValue, SQ);
378       auto *ConstSwitchValue = dyn_cast<ConstantInt>(SwitchValueOnFirstIter);
379       if (!ConstSwitchValue) {
380         MarkAllSuccessorsLive(BB);
381         continue;
382       }
383       auto CaseIterator = SI->findCaseValue(ConstSwitchValue);
384       MarkLiveEdge(BB, CaseIterator->getCaseSuccessor());
385     } else {
386       MarkAllSuccessorsLive(BB);
387       continue;
388     }
389   }
390 
391   // We can break the latch if it wasn't live.
392   return !LiveEdges.count({ Latch, Header });
393 }
394 
395 /// If we can prove the backedge is untaken, remove it.  This destroys the
396 /// loop, but leaves the (now trivially loop invariant) control flow and
397 /// side effects (if any) in place.
398 static LoopDeletionResult
399 breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
400                         LoopInfo &LI, MemorySSA *MSSA,
401                         OptimizationRemarkEmitter &ORE) {
402   assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
403 
404   if (!L->getLoopLatch())
405     return LoopDeletionResult::Unmodified;
406 
407   auto *BTCMax = SE.getConstantMaxBackedgeTakenCount(L);
408   if (!BTCMax->isZero()) {
409     auto *BTC = SE.getBackedgeTakenCount(L);
410     if (!BTC->isZero()) {
411       if (!isa<SCEVCouldNotCompute>(BTC) && SE.isKnownNonZero(BTC))
412         return LoopDeletionResult::Unmodified;
413       if (!canProveExitOnFirstIteration(L, DT, LI))
414         return LoopDeletionResult::Unmodified;
415     }
416   }
417   ++NumBackedgesBroken;
418   breakLoopBackedge(L, DT, SE, LI, MSSA);
419   return LoopDeletionResult::Deleted;
420 }
421 
422 /// Remove a loop if it is dead.
423 ///
424 /// A loop is considered dead either if it does not impact the observable
425 /// behavior of the program other than finite running time, or if it is
426 /// required to make progress by an attribute such as 'mustprogress' or
427 /// 'llvm.loop.mustprogress' and does not make any. This may remove
428 /// infinite loops that have been required to make progress.
429 ///
430 /// This entire process relies pretty heavily on LoopSimplify form and LCSSA in
431 /// order to make various safety checks work.
432 ///
433 /// \returns true if any changes were made. This may mutate the loop even if it
434 /// is unable to delete it due to hoisting trivially loop invariant
435 /// instructions out of the loop.
436 static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
437                                            ScalarEvolution &SE, LoopInfo &LI,
438                                            MemorySSA *MSSA,
439                                            OptimizationRemarkEmitter &ORE) {
440   assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
441 
442   // We can only remove the loop if there is a preheader that we can branch from
443   // after removing it. Also, if LoopSimplify form is not available, stay out
444   // of trouble.
445   BasicBlock *Preheader = L->getLoopPreheader();
446   if (!Preheader || !L->hasDedicatedExits()) {
447     LLVM_DEBUG(
448         dbgs()
449         << "Deletion requires Loop with preheader and dedicated exits.\n");
450     return LoopDeletionResult::Unmodified;
451   }
452 
453   BasicBlock *ExitBlock = L->getUniqueExitBlock();
454 
455   if (ExitBlock && isLoopNeverExecuted(L)) {
456     LLVM_DEBUG(dbgs() << "Loop is proven to never execute, delete it!\n");
457     // We need to forget the loop before setting the incoming values of the exit
458     // phis to poison, so we properly invalidate the SCEV expressions for those
459     // phis.
460     SE.forgetLoop(L);
461     // Set incoming value to poison for phi nodes in the exit block.
462     for (PHINode &P : ExitBlock->phis()) {
463       std::fill(P.incoming_values().begin(), P.incoming_values().end(),
464                 PoisonValue::get(P.getType()));
465     }
466     ORE.emit([&]() {
467       return OptimizationRemark(DEBUG_TYPE, "NeverExecutes", L->getStartLoc(),
468                                 L->getHeader())
469              << "Loop deleted because it never executes";
470     });
471     deleteDeadLoop(L, &DT, &SE, &LI, MSSA);
472     ++NumDeleted;
473     return LoopDeletionResult::Deleted;
474   }
475 
476   // The remaining checks below are for a loop being dead because all statements
477   // in the loop are invariant.
478   SmallVector<BasicBlock *, 4> ExitingBlocks;
479   L->getExitingBlocks(ExitingBlocks);
480 
481   // We require that the loop has at most one exit block. Otherwise, we'd be in
482   // the situation of needing to be able to solve statically which exit block
483   // will be branched to, or trying to preserve the branching logic in a loop
484   // invariant manner.
485   if (!ExitBlock && !L->hasNoExitBlocks()) {
486     LLVM_DEBUG(dbgs() << "Deletion requires at most one exit block.\n");
487     return LoopDeletionResult::Unmodified;
488   }
489 
490   // We can't directly branch to an EH pad. Don't bother handling this edge
491   // case.
492   if (ExitBlock && ExitBlock->isEHPad()) {
493     LLVM_DEBUG(dbgs() << "Cannot delete loop exiting to EH pad.\n");
494     return LoopDeletionResult::Unmodified;
495   }
496 
497   // Finally, we have to check that the loop really is dead.
498   bool Changed = false;
499   if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader, LI)) {
500     LLVM_DEBUG(dbgs() << "Loop is not invariant, cannot delete.\n");
501     return Changed ? LoopDeletionResult::Modified
502                    : LoopDeletionResult::Unmodified;
503   }
504 
505   LLVM_DEBUG(dbgs() << "Loop is invariant, delete it!\n");
506   ORE.emit([&]() {
507     return OptimizationRemark(DEBUG_TYPE, "Invariant", L->getStartLoc(),
508                               L->getHeader())
509            << "Loop deleted because it is invariant";
510   });
511   deleteDeadLoop(L, &DT, &SE, &LI, MSSA);
512   ++NumDeleted;
513 
514   return LoopDeletionResult::Deleted;
515 }
516 
517 PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
518                                         LoopStandardAnalysisResults &AR,
519                                         LPMUpdater &Updater) {
520 
521   LLVM_DEBUG(dbgs() << "Analyzing Loop for deletion: ");
522   LLVM_DEBUG(L.dump());
523   std::string LoopName = std::string(L.getName());
524   // For the new PM, we can't use OptimizationRemarkEmitter as an analysis
525   // pass. Function analyses need to be preserved across loop transformations
526   // but ORE cannot be preserved (see comment before the pass definition).
527   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
528   auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE);
529 
530   // If we can prove the backedge isn't taken, just break it and be done.  This
531   // leaves the loop structure in place which means it can handle dispatching
532   // to the right exit based on whatever loop invariant structure remains.
533   if (Result != LoopDeletionResult::Deleted)
534     Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI,
535                                                    AR.MSSA, ORE));
536 
537   if (Result == LoopDeletionResult::Unmodified)
538     return PreservedAnalyses::all();
539 
540   if (Result == LoopDeletionResult::Deleted)
541     Updater.markLoopAsDeleted(L, LoopName);
542 
543   auto PA = getLoopPassPreservedAnalyses();
544   if (AR.MSSA)
545     PA.preserve<MemorySSAAnalysis>();
546   return PA;
547 }
548