xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp (revision da759cfa320d5076b075d15ff3f00ab3ba5634fd)
1 //===----------------- LoopRotationUtils.cpp -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides utilities to convert a loop into a loop with bottom test.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/Utils/LoopRotationUtils.h"
14 #include "llvm/ADT/Statistic.h"
15 #include "llvm/Analysis/AliasAnalysis.h"
16 #include "llvm/Analysis/AssumptionCache.h"
17 #include "llvm/Analysis/BasicAliasAnalysis.h"
18 #include "llvm/Analysis/CodeMetrics.h"
19 #include "llvm/Analysis/DomTreeUpdater.h"
20 #include "llvm/Analysis/GlobalsModRef.h"
21 #include "llvm/Analysis/InstructionSimplify.h"
22 #include "llvm/Analysis/LoopPass.h"
23 #include "llvm/Analysis/MemorySSA.h"
24 #include "llvm/Analysis/MemorySSAUpdater.h"
25 #include "llvm/Analysis/ScalarEvolution.h"
26 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/DebugInfoMetadata.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IntrinsicInst.h"
34 #include "llvm/IR/Module.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
39 #include "llvm/Transforms/Utils/Local.h"
40 #include "llvm/Transforms/Utils/LoopUtils.h"
41 #include "llvm/Transforms/Utils/SSAUpdater.h"
42 #include "llvm/Transforms/Utils/ValueMapper.h"
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "loop-rotate"
46 
47 STATISTIC(NumRotated, "Number of loops rotated");
48 
49 namespace {
50 /// A simple loop rotation transformation.
51 class LoopRotate {
52   const unsigned MaxHeaderSize;
53   LoopInfo *LI;
54   const TargetTransformInfo *TTI;
55   AssumptionCache *AC;
56   DominatorTree *DT;
57   ScalarEvolution *SE;
58   MemorySSAUpdater *MSSAU;
59   const SimplifyQuery &SQ;
60   bool RotationOnly;
61   bool IsUtilMode;
62 
63 public:
64   LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
65              const TargetTransformInfo *TTI, AssumptionCache *AC,
66              DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
67              const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode)
68       : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
69         MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
70         IsUtilMode(IsUtilMode) {}
71   bool processLoop(Loop *L);
72 
73 private:
74   bool rotateLoop(Loop *L, bool SimplifiedLatch);
75   bool simplifyLoopLatch(Loop *L);
76 };
77 } // end anonymous namespace
78 
79 /// Insert (K, V) pair into the ValueToValueMap, and verify the key did not
80 /// previously exist in the map, and the value was inserted.
81 static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V) {
82   bool Inserted = VM.insert({K, V}).second;
83   assert(Inserted);
84   (void)Inserted;
85 }
86 /// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
87 /// old header into the preheader.  If there were uses of the values produced by
88 /// these instruction that were outside of the loop, we have to insert PHI nodes
89 /// to merge the two values.  Do this now.
90 static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
91                                             BasicBlock *OrigPreheader,
92                                             ValueToValueMapTy &ValueMap,
93                                 SmallVectorImpl<PHINode*> *InsertedPHIs) {
94   // Remove PHI node entries that are no longer live.
95   BasicBlock::iterator I, E = OrigHeader->end();
96   for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
97     PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
98 
99   // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
100   // as necessary.
101   SSAUpdater SSA(InsertedPHIs);
102   for (I = OrigHeader->begin(); I != E; ++I) {
103     Value *OrigHeaderVal = &*I;
104 
105     // If there are no uses of the value (e.g. because it returns void), there
106     // is nothing to rewrite.
107     if (OrigHeaderVal->use_empty())
108       continue;
109 
110     Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal);
111 
112     // The value now exits in two versions: the initial value in the preheader
113     // and the loop "next" value in the original header.
114     SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
115     SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
116     SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
117 
118     // Visit each use of the OrigHeader instruction.
119     for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
120                              UE = OrigHeaderVal->use_end();
121          UI != UE;) {
122       // Grab the use before incrementing the iterator.
123       Use &U = *UI;
124 
125       // Increment the iterator before removing the use from the list.
126       ++UI;
127 
128       // SSAUpdater can't handle a non-PHI use in the same block as an
129       // earlier def. We can easily handle those cases manually.
130       Instruction *UserInst = cast<Instruction>(U.getUser());
131       if (!isa<PHINode>(UserInst)) {
132         BasicBlock *UserBB = UserInst->getParent();
133 
134         // The original users in the OrigHeader are already using the
135         // original definitions.
136         if (UserBB == OrigHeader)
137           continue;
138 
139         // Users in the OrigPreHeader need to use the value to which the
140         // original definitions are mapped.
141         if (UserBB == OrigPreheader) {
142           U = OrigPreHeaderVal;
143           continue;
144         }
145       }
146 
147       // Anything else can be handled by SSAUpdater.
148       SSA.RewriteUse(U);
149     }
150 
151     // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
152     // intrinsics.
153     SmallVector<DbgValueInst *, 1> DbgValues;
154     llvm::findDbgValues(DbgValues, OrigHeaderVal);
155     for (auto &DbgValue : DbgValues) {
156       // The original users in the OrigHeader are already using the original
157       // definitions.
158       BasicBlock *UserBB = DbgValue->getParent();
159       if (UserBB == OrigHeader)
160         continue;
161 
162       // Users in the OrigPreHeader need to use the value to which the
163       // original definitions are mapped and anything else can be handled by
164       // the SSAUpdater. To avoid adding PHINodes, check if the value is
165       // available in UserBB, if not substitute undef.
166       Value *NewVal;
167       if (UserBB == OrigPreheader)
168         NewVal = OrigPreHeaderVal;
169       else if (SSA.HasValueForBlock(UserBB))
170         NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
171       else
172         NewVal = UndefValue::get(OrigHeaderVal->getType());
173       DbgValue->setOperand(0,
174                            MetadataAsValue::get(OrigHeaderVal->getContext(),
175                                                 ValueAsMetadata::get(NewVal)));
176     }
177   }
178 }
179 
180 // Look for a phi which is only used outside the loop (via a LCSSA phi)
181 // in the exit from the header. This means that rotating the loop can
182 // remove the phi.
183 static bool shouldRotateLoopExitingLatch(Loop *L) {
184   BasicBlock *Header = L->getHeader();
185   BasicBlock *HeaderExit = Header->getTerminator()->getSuccessor(0);
186   if (L->contains(HeaderExit))
187     HeaderExit = Header->getTerminator()->getSuccessor(1);
188 
189   for (auto &Phi : Header->phis()) {
190     // Look for uses of this phi in the loop/via exits other than the header.
191     if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) {
192           return cast<Instruction>(U)->getParent() != HeaderExit;
193         }))
194       continue;
195     return true;
196   }
197 
198   return false;
199 }
200 
201 /// Rotate loop LP. Return true if the loop is rotated.
202 ///
203 /// \param SimplifiedLatch is true if the latch was just folded into the final
204 /// loop exit. In this case we may want to rotate even though the new latch is
205 /// now an exiting branch. This rotation would have happened had the latch not
206 /// been simplified. However, if SimplifiedLatch is false, then we avoid
207 /// rotating loops in which the latch exits to avoid excessive or endless
208 /// rotation. LoopRotate should be repeatable and converge to a canonical
209 /// form. This property is satisfied because simplifying the loop latch can only
210 /// happen once across multiple invocations of the LoopRotate pass.
211 bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
212   // If the loop has only one block then there is not much to rotate.
213   if (L->getBlocks().size() == 1)
214     return false;
215 
216   BasicBlock *OrigHeader = L->getHeader();
217   BasicBlock *OrigLatch = L->getLoopLatch();
218 
219   BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
220   if (!BI || BI->isUnconditional())
221     return false;
222 
223   // If the loop header is not one of the loop exiting blocks then
224   // either this loop is already rotated or it is not
225   // suitable for loop rotation transformations.
226   if (!L->isLoopExiting(OrigHeader))
227     return false;
228 
229   // If the loop latch already contains a branch that leaves the loop then the
230   // loop is already rotated.
231   if (!OrigLatch)
232     return false;
233 
234   // Rotate if either the loop latch does *not* exit the loop, or if the loop
235   // latch was just simplified. Or if we think it will be profitable.
236   if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
237       !shouldRotateLoopExitingLatch(L))
238     return false;
239 
240   // Check size of original header and reject loop if it is very big or we can't
241   // duplicate blocks inside it.
242   {
243     SmallPtrSet<const Value *, 32> EphValues;
244     CodeMetrics::collectEphemeralValues(L, AC, EphValues);
245 
246     CodeMetrics Metrics;
247     Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
248     if (Metrics.notDuplicatable) {
249       LLVM_DEBUG(
250           dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
251                  << " instructions: ";
252           L->dump());
253       return false;
254     }
255     if (Metrics.convergent) {
256       LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
257                            "instructions: ";
258                  L->dump());
259       return false;
260     }
261     if (Metrics.NumInsts > MaxHeaderSize)
262       return false;
263   }
264 
265   // Now, this loop is suitable for rotation.
266   BasicBlock *OrigPreheader = L->getLoopPreheader();
267 
268   // If the loop could not be converted to canonical form, it must have an
269   // indirectbr in it, just give up.
270   if (!OrigPreheader || !L->hasDedicatedExits())
271     return false;
272 
273   // Anything ScalarEvolution may know about this loop or the PHI nodes
274   // in its header will soon be invalidated. We should also invalidate
275   // all outer loops because insertion and deletion of blocks that happens
276   // during the rotation may violate invariants related to backedge taken
277   // infos in them.
278   if (SE)
279     SE->forgetTopmostLoop(L);
280 
281   LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
282   if (MSSAU && VerifyMemorySSA)
283     MSSAU->getMemorySSA()->verifyMemorySSA();
284 
285   // Find new Loop header. NewHeader is a Header's one and only successor
286   // that is inside loop.  Header's other successor is outside the
287   // loop.  Otherwise loop is not suitable for rotation.
288   BasicBlock *Exit = BI->getSuccessor(0);
289   BasicBlock *NewHeader = BI->getSuccessor(1);
290   if (L->contains(Exit))
291     std::swap(Exit, NewHeader);
292   assert(NewHeader && "Unable to determine new loop header");
293   assert(L->contains(NewHeader) && !L->contains(Exit) &&
294          "Unable to determine loop header and exit blocks");
295 
296   // This code assumes that the new header has exactly one predecessor.
297   // Remove any single-entry PHI nodes in it.
298   assert(NewHeader->getSinglePredecessor() &&
299          "New header doesn't have one pred!");
300   FoldSingleEntryPHINodes(NewHeader);
301 
302   // Begin by walking OrigHeader and populating ValueMap with an entry for
303   // each Instruction.
304   BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
305   ValueToValueMapTy ValueMap, ValueMapMSSA;
306 
307   // For PHI nodes, the value available in OldPreHeader is just the
308   // incoming value from OldPreHeader.
309   for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
310     InsertNewValueIntoMap(ValueMap, PN,
311                           PN->getIncomingValueForBlock(OrigPreheader));
312 
313   // For the rest of the instructions, either hoist to the OrigPreheader if
314   // possible or create a clone in the OldPreHeader if not.
315   Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
316 
317   // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication.
318   using DbgIntrinsicHash =
319       std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>;
320   auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
321     return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()};
322   };
323   SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
324   for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
325        I != E; ++I) {
326     if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I))
327       DbgIntrinsics.insert(makeHash(DII));
328     else
329       break;
330   }
331 
332   while (I != E) {
333     Instruction *Inst = &*I++;
334 
335     // If the instruction's operands are invariant and it doesn't read or write
336     // memory, then it is safe to hoist.  Doing this doesn't change the order of
337     // execution in the preheader, but does prevent the instruction from
338     // executing in each iteration of the loop.  This means it is safe to hoist
339     // something that might trap, but isn't safe to hoist something that reads
340     // memory (without proving that the loop doesn't write).
341     if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
342         !Inst->mayWriteToMemory() && !Inst->isTerminator() &&
343         !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
344       Inst->moveBefore(LoopEntryBranch);
345       continue;
346     }
347 
348     // Otherwise, create a duplicate of the instruction.
349     Instruction *C = Inst->clone();
350 
351     // Eagerly remap the operands of the instruction.
352     RemapInstruction(C, ValueMap,
353                      RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
354 
355     // Avoid inserting the same intrinsic twice.
356     if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
357       if (DbgIntrinsics.count(makeHash(DII))) {
358         C->deleteValue();
359         continue;
360       }
361 
362     // With the operands remapped, see if the instruction constant folds or is
363     // otherwise simplifyable.  This commonly occurs because the entry from PHI
364     // nodes allows icmps and other instructions to fold.
365     Value *V = SimplifyInstruction(C, SQ);
366     if (V && LI->replacementPreservesLCSSAForm(C, V)) {
367       // If so, then delete the temporary instruction and stick the folded value
368       // in the map.
369       InsertNewValueIntoMap(ValueMap, Inst, V);
370       if (!C->mayHaveSideEffects()) {
371         C->deleteValue();
372         C = nullptr;
373       }
374     } else {
375       InsertNewValueIntoMap(ValueMap, Inst, C);
376     }
377     if (C) {
378       // Otherwise, stick the new instruction into the new block!
379       C->setName(Inst->getName());
380       C->insertBefore(LoopEntryBranch);
381 
382       if (auto *II = dyn_cast<IntrinsicInst>(C))
383         if (II->getIntrinsicID() == Intrinsic::assume)
384           AC->registerAssumption(II);
385       // MemorySSA cares whether the cloned instruction was inserted or not, and
386       // not whether it can be remapped to a simplified value.
387       if (MSSAU)
388         InsertNewValueIntoMap(ValueMapMSSA, Inst, C);
389     }
390   }
391 
392   // Along with all the other instructions, we just cloned OrigHeader's
393   // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
394   // successors by duplicating their incoming values for OrigHeader.
395   for (BasicBlock *SuccBB : successors(OrigHeader))
396     for (BasicBlock::iterator BI = SuccBB->begin();
397          PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
398       PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
399 
400   // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
401   // OrigPreHeader's old terminator (the original branch into the loop), and
402   // remove the corresponding incoming values from the PHI nodes in OrigHeader.
403   LoopEntryBranch->eraseFromParent();
404 
405   // Update MemorySSA before the rewrite call below changes the 1:1
406   // instruction:cloned_instruction_or_value mapping.
407   if (MSSAU) {
408     InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader);
409     MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
410                                         ValueMapMSSA);
411   }
412 
413   SmallVector<PHINode*, 2> InsertedPHIs;
414   // If there were any uses of instructions in the duplicated block outside the
415   // loop, update them, inserting PHI nodes as required
416   RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
417                                   &InsertedPHIs);
418 
419   // Attach dbg.value intrinsics to the new phis if that phi uses a value that
420   // previously had debug metadata attached. This keeps the debug info
421   // up-to-date in the loop body.
422   if (!InsertedPHIs.empty())
423     insertDebugValuesForPHIs(OrigHeader, InsertedPHIs);
424 
425   // NewHeader is now the header of the loop.
426   L->moveToHeader(NewHeader);
427   assert(L->getHeader() == NewHeader && "Latch block is our new header");
428 
429   // Inform DT about changes to the CFG.
430   if (DT) {
431     // The OrigPreheader branches to the NewHeader and Exit now. Then, inform
432     // the DT about the removed edge to the OrigHeader (that got removed).
433     SmallVector<DominatorTree::UpdateType, 3> Updates;
434     Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
435     Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
436     Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
437     DT->applyUpdates(Updates);
438 
439     if (MSSAU) {
440       MSSAU->applyUpdates(Updates, *DT);
441       if (VerifyMemorySSA)
442         MSSAU->getMemorySSA()->verifyMemorySSA();
443     }
444   }
445 
446   // At this point, we've finished our major CFG changes.  As part of cloning
447   // the loop into the preheader we've simplified instructions and the
448   // duplicated conditional branch may now be branching on a constant.  If it is
449   // branching on a constant and if that constant means that we enter the loop,
450   // then we fold away the cond branch to an uncond branch.  This simplifies the
451   // loop in cases important for nested loops, and it also means we don't have
452   // to split as many edges.
453   BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
454   assert(PHBI->isConditional() && "Should be clone of BI condbr!");
455   if (!isa<ConstantInt>(PHBI->getCondition()) ||
456       PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
457           NewHeader) {
458     // The conditional branch can't be folded, handle the general case.
459     // Split edges as necessary to preserve LoopSimplify form.
460 
461     // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
462     // thus is not a preheader anymore.
463     // Split the edge to form a real preheader.
464     BasicBlock *NewPH = SplitCriticalEdge(
465         OrigPreheader, NewHeader,
466         CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
467     NewPH->setName(NewHeader->getName() + ".lr.ph");
468 
469     // Preserve canonical loop form, which means that 'Exit' should have only
470     // one predecessor. Note that Exit could be an exit block for multiple
471     // nested loops, causing both of the edges to now be critical and need to
472     // be split.
473     SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
474     bool SplitLatchEdge = false;
475     for (BasicBlock *ExitPred : ExitPreds) {
476       // We only need to split loop exit edges.
477       Loop *PredLoop = LI->getLoopFor(ExitPred);
478       if (!PredLoop || PredLoop->contains(Exit) ||
479           ExitPred->getTerminator()->isIndirectTerminator())
480         continue;
481       SplitLatchEdge |= L->getLoopLatch() == ExitPred;
482       BasicBlock *ExitSplit = SplitCriticalEdge(
483           ExitPred, Exit,
484           CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
485       ExitSplit->moveBefore(Exit);
486     }
487     assert(SplitLatchEdge &&
488            "Despite splitting all preds, failed to split latch exit?");
489   } else {
490     // We can fold the conditional branch in the preheader, this makes things
491     // simpler. The first step is to remove the extra edge to the Exit block.
492     Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
493     BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
494     NewBI->setDebugLoc(PHBI->getDebugLoc());
495     PHBI->eraseFromParent();
496 
497     // With our CFG finalized, update DomTree if it is available.
498     if (DT) DT->deleteEdge(OrigPreheader, Exit);
499 
500     // Update MSSA too, if available.
501     if (MSSAU)
502       MSSAU->removeEdge(OrigPreheader, Exit);
503   }
504 
505   assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
506   assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
507 
508   if (MSSAU && VerifyMemorySSA)
509     MSSAU->getMemorySSA()->verifyMemorySSA();
510 
511   // Now that the CFG and DomTree are in a consistent state again, try to merge
512   // the OrigHeader block into OrigLatch.  This will succeed if they are
513   // connected by an unconditional branch.  This is just a cleanup so the
514   // emitted code isn't too gross in this common case.
515   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
516   MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
517 
518   if (MSSAU && VerifyMemorySSA)
519     MSSAU->getMemorySSA()->verifyMemorySSA();
520 
521   LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
522 
523   ++NumRotated;
524   return true;
525 }
526 
527 /// Determine whether the instructions in this range may be safely and cheaply
528 /// speculated. This is not an important enough situation to develop complex
529 /// heuristics. We handle a single arithmetic instruction along with any type
530 /// conversions.
531 static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
532                                   BasicBlock::iterator End, Loop *L) {
533   bool seenIncrement = false;
534   bool MultiExitLoop = false;
535 
536   if (!L->getExitingBlock())
537     MultiExitLoop = true;
538 
539   for (BasicBlock::iterator I = Begin; I != End; ++I) {
540 
541     if (!isSafeToSpeculativelyExecute(&*I))
542       return false;
543 
544     if (isa<DbgInfoIntrinsic>(I))
545       continue;
546 
547     switch (I->getOpcode()) {
548     default:
549       return false;
550     case Instruction::GetElementPtr:
551       // GEPs are cheap if all indices are constant.
552       if (!cast<GEPOperator>(I)->hasAllConstantIndices())
553         return false;
554       // fall-thru to increment case
555       LLVM_FALLTHROUGH;
556     case Instruction::Add:
557     case Instruction::Sub:
558     case Instruction::And:
559     case Instruction::Or:
560     case Instruction::Xor:
561     case Instruction::Shl:
562     case Instruction::LShr:
563     case Instruction::AShr: {
564       Value *IVOpnd =
565           !isa<Constant>(I->getOperand(0))
566               ? I->getOperand(0)
567               : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr;
568       if (!IVOpnd)
569         return false;
570 
571       // If increment operand is used outside of the loop, this speculation
572       // could cause extra live range interference.
573       if (MultiExitLoop) {
574         for (User *UseI : IVOpnd->users()) {
575           auto *UserInst = cast<Instruction>(UseI);
576           if (!L->contains(UserInst))
577             return false;
578         }
579       }
580 
581       if (seenIncrement)
582         return false;
583       seenIncrement = true;
584       break;
585     }
586     case Instruction::Trunc:
587     case Instruction::ZExt:
588     case Instruction::SExt:
589       // ignore type conversions
590       break;
591     }
592   }
593   return true;
594 }
595 
596 /// Fold the loop tail into the loop exit by speculating the loop tail
597 /// instructions. Typically, this is a single post-increment. In the case of a
598 /// simple 2-block loop, hoisting the increment can be much better than
599 /// duplicating the entire loop header. In the case of loops with early exits,
600 /// rotation will not work anyway, but simplifyLoopLatch will put the loop in
601 /// canonical form so downstream passes can handle it.
602 ///
603 /// I don't believe this invalidates SCEV.
604 bool LoopRotate::simplifyLoopLatch(Loop *L) {
605   BasicBlock *Latch = L->getLoopLatch();
606   if (!Latch || Latch->hasAddressTaken())
607     return false;
608 
609   BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
610   if (!Jmp || !Jmp->isUnconditional())
611     return false;
612 
613   BasicBlock *LastExit = Latch->getSinglePredecessor();
614   if (!LastExit || !L->isLoopExiting(LastExit))
615     return false;
616 
617   BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
618   if (!BI)
619     return false;
620 
621   if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
622     return false;
623 
624   LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
625                     << LastExit->getName() << "\n");
626 
627   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
628   MergeBlockIntoPredecessor(Latch, &DTU, LI, MSSAU, nullptr,
629                             /*PredecessorWithTwoSuccessors=*/true);
630 
631   if (MSSAU && VerifyMemorySSA)
632     MSSAU->getMemorySSA()->verifyMemorySSA();
633 
634   return true;
635 }
636 
637 /// Rotate \c L, and return true if any modification was made.
638 bool LoopRotate::processLoop(Loop *L) {
639   // Save the loop metadata.
640   MDNode *LoopMD = L->getLoopID();
641 
642   bool SimplifiedLatch = false;
643 
644   // Simplify the loop latch before attempting to rotate the header
645   // upward. Rotation may not be needed if the loop tail can be folded into the
646   // loop exit.
647   if (!RotationOnly)
648     SimplifiedLatch = simplifyLoopLatch(L);
649 
650   bool MadeChange = rotateLoop(L, SimplifiedLatch);
651   assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) &&
652          "Loop latch should be exiting after loop-rotate.");
653 
654   // Restore the loop metadata.
655   // NB! We presume LoopRotation DOESN'T ADD its own metadata.
656   if ((MadeChange || SimplifiedLatch) && LoopMD)
657     L->setLoopID(LoopMD);
658 
659   return MadeChange || SimplifiedLatch;
660 }
661 
662 
663 /// The utility to convert a loop into a loop with bottom test.
664 bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
665                         AssumptionCache *AC, DominatorTree *DT,
666                         ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
667                         const SimplifyQuery &SQ, bool RotationOnly = true,
668                         unsigned Threshold = unsigned(-1),
669                         bool IsUtilMode = true) {
670   if (MSSAU && VerifyMemorySSA)
671     MSSAU->getMemorySSA()->verifyMemorySSA();
672   LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
673                 IsUtilMode);
674   if (MSSAU && VerifyMemorySSA)
675     MSSAU->getMemorySSA()->verifyMemorySSA();
676 
677   return LR.processLoop(L);
678 }
679