10b57cec5SDimitry Andric //===- LoopInterchange.cpp - Loop interchange pass-------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This Pass handles loop interchange transform.
100b57cec5SDimitry Andric // This pass interchanges loops to provide a more cache-friendly memory access
110b57cec5SDimitry Andric // patterns.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric
15e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/LoopInterchange.h"
160b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
170b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
180b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
190b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
200b57cec5SDimitry Andric #include "llvm/Analysis/DependenceAnalysis.h"
2181ad6265SDimitry Andric #include "llvm/Analysis/LoopCacheAnalysis.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
23fe6060f1SDimitry Andric #include "llvm/Analysis/LoopNestAnalysis.h"
240b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h"
250b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
260b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
270b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
280b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
290b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
300b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
310b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
320b57cec5SDimitry Andric #include "llvm/IR/Function.h"
330b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
340b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
350b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
360b57cec5SDimitry Andric #include "llvm/IR/User.h"
370b57cec5SDimitry Andric #include "llvm/IR/Value.h"
380b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
390b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
400b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
410b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
420b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
43bdd1243dSDimitry Andric #include "llvm/Transforms/Scalar/LoopPassManager.h"
440b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
450b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
460b57cec5SDimitry Andric #include <cassert>
470b57cec5SDimitry Andric #include <utility>
480b57cec5SDimitry Andric #include <vector>
490b57cec5SDimitry Andric
500b57cec5SDimitry Andric using namespace llvm;
510b57cec5SDimitry Andric
520b57cec5SDimitry Andric #define DEBUG_TYPE "loop-interchange"
530b57cec5SDimitry Andric
540b57cec5SDimitry Andric STATISTIC(LoopsInterchanged, "Number of loops interchanged");
550b57cec5SDimitry Andric
560b57cec5SDimitry Andric static cl::opt<int> LoopInterchangeCostThreshold(
570b57cec5SDimitry Andric "loop-interchange-threshold", cl::init(0), cl::Hidden,
580b57cec5SDimitry Andric cl::desc("Interchange if you gain more than this number"));
590b57cec5SDimitry Andric
600b57cec5SDimitry Andric namespace {
610b57cec5SDimitry Andric
620b57cec5SDimitry Andric using LoopVector = SmallVector<Loop *, 8>;
630b57cec5SDimitry Andric
640b57cec5SDimitry Andric // TODO: Check if we can use a sparse matrix here.
650b57cec5SDimitry Andric using CharMatrix = std::vector<std::vector<char>>;
660b57cec5SDimitry Andric
670b57cec5SDimitry Andric } // end anonymous namespace
680b57cec5SDimitry Andric
690b57cec5SDimitry Andric // Maximum number of dependencies that can be handled in the dependency matrix.
700b57cec5SDimitry Andric static const unsigned MaxMemInstrCount = 100;
710b57cec5SDimitry Andric
720b57cec5SDimitry Andric // Maximum loop depth supported.
730b57cec5SDimitry Andric static const unsigned MaxLoopNestDepth = 10;
740b57cec5SDimitry Andric
750b57cec5SDimitry Andric #ifdef DUMP_DEP_MATRICIES
printDepMatrix(CharMatrix & DepMatrix)760b57cec5SDimitry Andric static void printDepMatrix(CharMatrix &DepMatrix) {
770b57cec5SDimitry Andric for (auto &Row : DepMatrix) {
780b57cec5SDimitry Andric for (auto D : Row)
790b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << D << " ");
800b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
810b57cec5SDimitry Andric }
820b57cec5SDimitry Andric }
830b57cec5SDimitry Andric #endif
840b57cec5SDimitry Andric
populateDependencyMatrix(CharMatrix & DepMatrix,unsigned Level,Loop * L,DependenceInfo * DI,ScalarEvolution * SE)850b57cec5SDimitry Andric static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
86bdd1243dSDimitry Andric Loop *L, DependenceInfo *DI,
87bdd1243dSDimitry Andric ScalarEvolution *SE) {
880b57cec5SDimitry Andric using ValueVector = SmallVector<Value *, 16>;
890b57cec5SDimitry Andric
900b57cec5SDimitry Andric ValueVector MemInstr;
910b57cec5SDimitry Andric
920b57cec5SDimitry Andric // For each block.
930b57cec5SDimitry Andric for (BasicBlock *BB : L->blocks()) {
940b57cec5SDimitry Andric // Scan the BB and collect legal loads and stores.
950b57cec5SDimitry Andric for (Instruction &I : *BB) {
960b57cec5SDimitry Andric if (!isa<Instruction>(I))
970b57cec5SDimitry Andric return false;
980b57cec5SDimitry Andric if (auto *Ld = dyn_cast<LoadInst>(&I)) {
990b57cec5SDimitry Andric if (!Ld->isSimple())
1000b57cec5SDimitry Andric return false;
1010b57cec5SDimitry Andric MemInstr.push_back(&I);
1020b57cec5SDimitry Andric } else if (auto *St = dyn_cast<StoreInst>(&I)) {
1030b57cec5SDimitry Andric if (!St->isSimple())
1040b57cec5SDimitry Andric return false;
1050b57cec5SDimitry Andric MemInstr.push_back(&I);
1060b57cec5SDimitry Andric }
1070b57cec5SDimitry Andric }
1080b57cec5SDimitry Andric }
1090b57cec5SDimitry Andric
1100b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found " << MemInstr.size()
1110b57cec5SDimitry Andric << " Loads and Stores to analyze\n");
1120b57cec5SDimitry Andric
1130b57cec5SDimitry Andric ValueVector::iterator I, IE, J, JE;
1140b57cec5SDimitry Andric
1150b57cec5SDimitry Andric for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
1160b57cec5SDimitry Andric for (J = I, JE = MemInstr.end(); J != JE; ++J) {
1170b57cec5SDimitry Andric std::vector<char> Dep;
1180b57cec5SDimitry Andric Instruction *Src = cast<Instruction>(*I);
1190b57cec5SDimitry Andric Instruction *Dst = cast<Instruction>(*J);
1200b57cec5SDimitry Andric // Ignore Input dependencies.
1210b57cec5SDimitry Andric if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
1220b57cec5SDimitry Andric continue;
1230b57cec5SDimitry Andric // Track Output, Flow, and Anti dependencies.
1240b57cec5SDimitry Andric if (auto D = DI->depends(Src, Dst, true)) {
1250b57cec5SDimitry Andric assert(D->isOrdered() && "Expected an output, flow or anti dep.");
126bdd1243dSDimitry Andric // If the direction vector is negative, normalize it to
127bdd1243dSDimitry Andric // make it non-negative.
128bdd1243dSDimitry Andric if (D->normalize(SE))
129bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
1300b57cec5SDimitry Andric LLVM_DEBUG(StringRef DepType =
1310b57cec5SDimitry Andric D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
1320b57cec5SDimitry Andric dbgs() << "Found " << DepType
1330b57cec5SDimitry Andric << " dependency between Src and Dst\n"
1340b57cec5SDimitry Andric << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
1350b57cec5SDimitry Andric unsigned Levels = D->getLevels();
1360b57cec5SDimitry Andric char Direction;
1370b57cec5SDimitry Andric for (unsigned II = 1; II <= Levels; ++II) {
138bdd1243dSDimitry Andric if (D->isScalar(II)) {
1390b57cec5SDimitry Andric Direction = 'S';
1400b57cec5SDimitry Andric Dep.push_back(Direction);
1410b57cec5SDimitry Andric } else {
1420b57cec5SDimitry Andric unsigned Dir = D->getDirection(II);
1430b57cec5SDimitry Andric if (Dir == Dependence::DVEntry::LT ||
1440b57cec5SDimitry Andric Dir == Dependence::DVEntry::LE)
1450b57cec5SDimitry Andric Direction = '<';
1460b57cec5SDimitry Andric else if (Dir == Dependence::DVEntry::GT ||
1470b57cec5SDimitry Andric Dir == Dependence::DVEntry::GE)
1480b57cec5SDimitry Andric Direction = '>';
1490b57cec5SDimitry Andric else if (Dir == Dependence::DVEntry::EQ)
1500b57cec5SDimitry Andric Direction = '=';
1510b57cec5SDimitry Andric else
1520b57cec5SDimitry Andric Direction = '*';
1530b57cec5SDimitry Andric Dep.push_back(Direction);
1540b57cec5SDimitry Andric }
1550b57cec5SDimitry Andric }
1560b57cec5SDimitry Andric while (Dep.size() != Level) {
1570b57cec5SDimitry Andric Dep.push_back('I');
1580b57cec5SDimitry Andric }
1590b57cec5SDimitry Andric
1600b57cec5SDimitry Andric DepMatrix.push_back(Dep);
1610b57cec5SDimitry Andric if (DepMatrix.size() > MaxMemInstrCount) {
1620b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
1630b57cec5SDimitry Andric << " dependencies inside loop\n");
1640b57cec5SDimitry Andric return false;
1650b57cec5SDimitry Andric }
1660b57cec5SDimitry Andric }
1670b57cec5SDimitry Andric }
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric
1700b57cec5SDimitry Andric return true;
1710b57cec5SDimitry Andric }
1720b57cec5SDimitry Andric
1730b57cec5SDimitry Andric // A loop is moved from index 'from' to an index 'to'. Update the Dependence
1740b57cec5SDimitry Andric // matrix by exchanging the two columns.
interChangeDependencies(CharMatrix & DepMatrix,unsigned FromIndx,unsigned ToIndx)1750b57cec5SDimitry Andric static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
1760b57cec5SDimitry Andric unsigned ToIndx) {
177fe6060f1SDimitry Andric for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I)
178fe6060f1SDimitry Andric std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]);
1790b57cec5SDimitry Andric }
1800b57cec5SDimitry Andric
181bdd1243dSDimitry Andric // After interchanging, check if the direction vector is valid.
1820b57cec5SDimitry Andric // [Theorem] A permutation of the loops in a perfect nest is legal if and only
1830b57cec5SDimitry Andric // if the direction matrix, after the same permutation is applied to its
1840b57cec5SDimitry Andric // columns, has no ">" direction as the leftmost non-"=" direction in any row.
isLexicographicallyPositive(std::vector<char> & DV)185bdd1243dSDimitry Andric static bool isLexicographicallyPositive(std::vector<char> &DV) {
18606c3fb27SDimitry Andric for (unsigned char Direction : DV) {
187bdd1243dSDimitry Andric if (Direction == '<')
188bdd1243dSDimitry Andric return true;
189bdd1243dSDimitry Andric if (Direction == '>' || Direction == '*')
190bdd1243dSDimitry Andric return false;
191bdd1243dSDimitry Andric }
192bdd1243dSDimitry Andric return true;
193bdd1243dSDimitry Andric }
194bdd1243dSDimitry Andric
195bdd1243dSDimitry Andric // Checks if it is legal to interchange 2 loops.
isLegalToInterChangeLoops(CharMatrix & DepMatrix,unsigned InnerLoopId,unsigned OuterLoopId)1960b57cec5SDimitry Andric static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
1970b57cec5SDimitry Andric unsigned InnerLoopId,
1980b57cec5SDimitry Andric unsigned OuterLoopId) {
1990b57cec5SDimitry Andric unsigned NumRows = DepMatrix.size();
200bdd1243dSDimitry Andric std::vector<char> Cur;
2010b57cec5SDimitry Andric // For each row check if it is valid to interchange.
2020b57cec5SDimitry Andric for (unsigned Row = 0; Row < NumRows; ++Row) {
203bdd1243dSDimitry Andric // Create temporary DepVector check its lexicographical order
204bdd1243dSDimitry Andric // before and after swapping OuterLoop vs InnerLoop
205bdd1243dSDimitry Andric Cur = DepMatrix[Row];
206bdd1243dSDimitry Andric if (!isLexicographicallyPositive(Cur))
2070b57cec5SDimitry Andric return false;
208bdd1243dSDimitry Andric std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
209bdd1243dSDimitry Andric if (!isLexicographicallyPositive(Cur))
2100b57cec5SDimitry Andric return false;
2110b57cec5SDimitry Andric }
2120b57cec5SDimitry Andric return true;
2130b57cec5SDimitry Andric }
2140b57cec5SDimitry Andric
populateWorklist(Loop & L,LoopVector & LoopList)21581ad6265SDimitry Andric static void populateWorklist(Loop &L, LoopVector &LoopList) {
2160b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Calling populateWorklist on Func: "
2170b57cec5SDimitry Andric << L.getHeader()->getParent()->getName() << " Loop: %"
2180b57cec5SDimitry Andric << L.getHeader()->getName() << '\n');
21981ad6265SDimitry Andric assert(LoopList.empty() && "LoopList should initially be empty!");
2200b57cec5SDimitry Andric Loop *CurrentLoop = &L;
2210b57cec5SDimitry Andric const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
2220b57cec5SDimitry Andric while (!Vec->empty()) {
2230b57cec5SDimitry Andric // The current loop has multiple subloops in it hence it is not tightly
2240b57cec5SDimitry Andric // nested.
2250b57cec5SDimitry Andric // Discard all loops above it added into Worklist.
22681ad6265SDimitry Andric if (Vec->size() != 1) {
22781ad6265SDimitry Andric LoopList = {};
22881ad6265SDimitry Andric return;
22981ad6265SDimitry Andric }
2300b57cec5SDimitry Andric
2310b57cec5SDimitry Andric LoopList.push_back(CurrentLoop);
2320b57cec5SDimitry Andric CurrentLoop = Vec->front();
2330b57cec5SDimitry Andric Vec = &CurrentLoop->getSubLoops();
2340b57cec5SDimitry Andric }
2350b57cec5SDimitry Andric LoopList.push_back(CurrentLoop);
2360b57cec5SDimitry Andric }
2370b57cec5SDimitry Andric
2380b57cec5SDimitry Andric namespace {
2390b57cec5SDimitry Andric
2400b57cec5SDimitry Andric /// LoopInterchangeLegality checks if it is legal to interchange the loop.
2410b57cec5SDimitry Andric class LoopInterchangeLegality {
2420b57cec5SDimitry Andric public:
LoopInterchangeLegality(Loop * Outer,Loop * Inner,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)2430b57cec5SDimitry Andric LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
2440b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE)
2450b57cec5SDimitry Andric : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
2460b57cec5SDimitry Andric
2470b57cec5SDimitry Andric /// Check if the loops can be interchanged.
2480b57cec5SDimitry Andric bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
2490b57cec5SDimitry Andric CharMatrix &DepMatrix);
2500b57cec5SDimitry Andric
25104eeddc0SDimitry Andric /// Discover induction PHIs in the header of \p L. Induction
25204eeddc0SDimitry Andric /// PHIs are added to \p Inductions.
25304eeddc0SDimitry Andric bool findInductions(Loop *L, SmallVectorImpl<PHINode *> &Inductions);
25404eeddc0SDimitry Andric
2550b57cec5SDimitry Andric /// Check if the loop structure is understood. We do not handle triangular
2560b57cec5SDimitry Andric /// loops for now.
25704eeddc0SDimitry Andric bool isLoopStructureUnderstood();
2580b57cec5SDimitry Andric
2590b57cec5SDimitry Andric bool currentLimitations();
2600b57cec5SDimitry Andric
getOuterInnerReductions() const2610b57cec5SDimitry Andric const SmallPtrSetImpl<PHINode *> &getOuterInnerReductions() const {
2620b57cec5SDimitry Andric return OuterInnerReductions;
2630b57cec5SDimitry Andric }
2640b57cec5SDimitry Andric
getInnerLoopInductions() const26504eeddc0SDimitry Andric const SmallVectorImpl<PHINode *> &getInnerLoopInductions() const {
26604eeddc0SDimitry Andric return InnerLoopInductions;
26704eeddc0SDimitry Andric }
26804eeddc0SDimitry Andric
2690b57cec5SDimitry Andric private:
2700b57cec5SDimitry Andric bool tightlyNested(Loop *Outer, Loop *Inner);
2710b57cec5SDimitry Andric bool containsUnsafeInstructions(BasicBlock *BB);
2720b57cec5SDimitry Andric
2730b57cec5SDimitry Andric /// Discover induction and reduction PHIs in the header of \p L. Induction
2740b57cec5SDimitry Andric /// PHIs are added to \p Inductions, reductions are added to
2750b57cec5SDimitry Andric /// OuterInnerReductions. When the outer loop is passed, the inner loop needs
2760b57cec5SDimitry Andric /// to be passed as \p InnerLoop.
2770b57cec5SDimitry Andric bool findInductionAndReductions(Loop *L,
2780b57cec5SDimitry Andric SmallVector<PHINode *, 8> &Inductions,
2790b57cec5SDimitry Andric Loop *InnerLoop);
2800b57cec5SDimitry Andric
2810b57cec5SDimitry Andric Loop *OuterLoop;
2820b57cec5SDimitry Andric Loop *InnerLoop;
2830b57cec5SDimitry Andric
2840b57cec5SDimitry Andric ScalarEvolution *SE;
2850b57cec5SDimitry Andric
2860b57cec5SDimitry Andric /// Interface to emit optimization remarks.
2870b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE;
2880b57cec5SDimitry Andric
2890b57cec5SDimitry Andric /// Set of reduction PHIs taking part of a reduction across the inner and
2900b57cec5SDimitry Andric /// outer loop.
2910b57cec5SDimitry Andric SmallPtrSet<PHINode *, 4> OuterInnerReductions;
29204eeddc0SDimitry Andric
29304eeddc0SDimitry Andric /// Set of inner loop induction PHIs
29404eeddc0SDimitry Andric SmallVector<PHINode *, 8> InnerLoopInductions;
2950b57cec5SDimitry Andric };
2960b57cec5SDimitry Andric
2970b57cec5SDimitry Andric /// LoopInterchangeProfitability checks if it is profitable to interchange the
2980b57cec5SDimitry Andric /// loop.
2990b57cec5SDimitry Andric class LoopInterchangeProfitability {
3000b57cec5SDimitry Andric public:
LoopInterchangeProfitability(Loop * Outer,Loop * Inner,ScalarEvolution * SE,OptimizationRemarkEmitter * ORE)3010b57cec5SDimitry Andric LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
3020b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE)
3030b57cec5SDimitry Andric : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {}
3040b57cec5SDimitry Andric
3050b57cec5SDimitry Andric /// Check if the loop interchange is profitable.
30681ad6265SDimitry Andric bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
30781ad6265SDimitry Andric unsigned InnerLoopId, unsigned OuterLoopId,
30881ad6265SDimitry Andric CharMatrix &DepMatrix,
309bdd1243dSDimitry Andric const DenseMap<const Loop *, unsigned> &CostMap,
310bdd1243dSDimitry Andric std::unique_ptr<CacheCost> &CC);
3110b57cec5SDimitry Andric
3120b57cec5SDimitry Andric private:
3130b57cec5SDimitry Andric int getInstrOrderCost();
314bdd1243dSDimitry Andric std::optional<bool> isProfitablePerLoopCacheAnalysis(
315bdd1243dSDimitry Andric const DenseMap<const Loop *, unsigned> &CostMap,
316bdd1243dSDimitry Andric std::unique_ptr<CacheCost> &CC);
317bdd1243dSDimitry Andric std::optional<bool> isProfitablePerInstrOrderCost();
318bdd1243dSDimitry Andric std::optional<bool> isProfitableForVectorization(unsigned InnerLoopId,
319bdd1243dSDimitry Andric unsigned OuterLoopId,
320bdd1243dSDimitry Andric CharMatrix &DepMatrix);
3210b57cec5SDimitry Andric Loop *OuterLoop;
3220b57cec5SDimitry Andric Loop *InnerLoop;
3230b57cec5SDimitry Andric
3240b57cec5SDimitry Andric /// Scev analysis.
3250b57cec5SDimitry Andric ScalarEvolution *SE;
3260b57cec5SDimitry Andric
3270b57cec5SDimitry Andric /// Interface to emit optimization remarks.
3280b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE;
3290b57cec5SDimitry Andric };
3300b57cec5SDimitry Andric
3310b57cec5SDimitry Andric /// LoopInterchangeTransform interchanges the loop.
3320b57cec5SDimitry Andric class LoopInterchangeTransform {
3330b57cec5SDimitry Andric public:
LoopInterchangeTransform(Loop * Outer,Loop * Inner,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,const LoopInterchangeLegality & LIL)3340b57cec5SDimitry Andric LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
3350b57cec5SDimitry Andric LoopInfo *LI, DominatorTree *DT,
3360b57cec5SDimitry Andric const LoopInterchangeLegality &LIL)
337fe6060f1SDimitry Andric : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {}
3380b57cec5SDimitry Andric
3390b57cec5SDimitry Andric /// Interchange OuterLoop and InnerLoop.
3400b57cec5SDimitry Andric bool transform();
3410b57cec5SDimitry Andric void restructureLoops(Loop *NewInner, Loop *NewOuter,
3420b57cec5SDimitry Andric BasicBlock *OrigInnerPreHeader,
3430b57cec5SDimitry Andric BasicBlock *OrigOuterPreHeader);
3440b57cec5SDimitry Andric void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop);
3450b57cec5SDimitry Andric
3460b57cec5SDimitry Andric private:
3470b57cec5SDimitry Andric bool adjustLoopLinks();
3480b57cec5SDimitry Andric bool adjustLoopBranches();
3490b57cec5SDimitry Andric
3500b57cec5SDimitry Andric Loop *OuterLoop;
3510b57cec5SDimitry Andric Loop *InnerLoop;
3520b57cec5SDimitry Andric
3530b57cec5SDimitry Andric /// Scev analysis.
3540b57cec5SDimitry Andric ScalarEvolution *SE;
3550b57cec5SDimitry Andric
3560b57cec5SDimitry Andric LoopInfo *LI;
3570b57cec5SDimitry Andric DominatorTree *DT;
3580b57cec5SDimitry Andric
3590b57cec5SDimitry Andric const LoopInterchangeLegality &LIL;
3600b57cec5SDimitry Andric };
3610b57cec5SDimitry Andric
362e8d8bef9SDimitry Andric struct LoopInterchange {
3630b57cec5SDimitry Andric ScalarEvolution *SE = nullptr;
3640b57cec5SDimitry Andric LoopInfo *LI = nullptr;
3650b57cec5SDimitry Andric DependenceInfo *DI = nullptr;
3660b57cec5SDimitry Andric DominatorTree *DT = nullptr;
36781ad6265SDimitry Andric std::unique_ptr<CacheCost> CC = nullptr;
3680b57cec5SDimitry Andric
3690b57cec5SDimitry Andric /// Interface to emit optimization remarks.
3700b57cec5SDimitry Andric OptimizationRemarkEmitter *ORE;
3710b57cec5SDimitry Andric
LoopInterchange__anon815ea8750211::LoopInterchange372e8d8bef9SDimitry Andric LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
37381ad6265SDimitry Andric DominatorTree *DT, std::unique_ptr<CacheCost> &CC,
37481ad6265SDimitry Andric OptimizationRemarkEmitter *ORE)
37581ad6265SDimitry Andric : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC)), ORE(ORE) {}
3760b57cec5SDimitry Andric
run__anon815ea8750211::LoopInterchange377e8d8bef9SDimitry Andric bool run(Loop *L) {
378e8d8bef9SDimitry Andric if (L->getParentLoop())
3790b57cec5SDimitry Andric return false;
38081ad6265SDimitry Andric SmallVector<Loop *, 8> LoopList;
38181ad6265SDimitry Andric populateWorklist(*L, LoopList);
38281ad6265SDimitry Andric return processLoopList(LoopList);
3830b57cec5SDimitry Andric }
3840b57cec5SDimitry Andric
run__anon815ea8750211::LoopInterchange385fe6060f1SDimitry Andric bool run(LoopNest &LN) {
38681ad6265SDimitry Andric SmallVector<Loop *, 8> LoopList(LN.getLoops().begin(), LN.getLoops().end());
387fe6060f1SDimitry Andric for (unsigned I = 1; I < LoopList.size(); ++I)
388fe6060f1SDimitry Andric if (LoopList[I]->getParentLoop() != LoopList[I - 1])
389fe6060f1SDimitry Andric return false;
390fe6060f1SDimitry Andric return processLoopList(LoopList);
391fe6060f1SDimitry Andric }
392fe6060f1SDimitry Andric
isComputableLoopNest__anon815ea8750211::LoopInterchange393fe6060f1SDimitry Andric bool isComputableLoopNest(ArrayRef<Loop *> LoopList) {
3940b57cec5SDimitry Andric for (Loop *L : LoopList) {
3950b57cec5SDimitry Andric const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
396e8d8bef9SDimitry Andric if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
3970b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
3980b57cec5SDimitry Andric return false;
3990b57cec5SDimitry Andric }
4000b57cec5SDimitry Andric if (L->getNumBackEdges() != 1) {
4010b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
4020b57cec5SDimitry Andric return false;
4030b57cec5SDimitry Andric }
4040b57cec5SDimitry Andric if (!L->getExitingBlock()) {
4050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
4060b57cec5SDimitry Andric return false;
4070b57cec5SDimitry Andric }
4080b57cec5SDimitry Andric }
4090b57cec5SDimitry Andric return true;
4100b57cec5SDimitry Andric }
4110b57cec5SDimitry Andric
selectLoopForInterchange__anon815ea8750211::LoopInterchange412fe6060f1SDimitry Andric unsigned selectLoopForInterchange(ArrayRef<Loop *> LoopList) {
4130b57cec5SDimitry Andric // TODO: Add a better heuristic to select the loop to be interchanged based
4140b57cec5SDimitry Andric // on the dependence matrix. Currently we select the innermost loop.
4150b57cec5SDimitry Andric return LoopList.size() - 1;
4160b57cec5SDimitry Andric }
4170b57cec5SDimitry Andric
processLoopList__anon815ea8750211::LoopInterchange41881ad6265SDimitry Andric bool processLoopList(SmallVectorImpl<Loop *> &LoopList) {
4190b57cec5SDimitry Andric bool Changed = false;
4200b57cec5SDimitry Andric unsigned LoopNestDepth = LoopList.size();
4210b57cec5SDimitry Andric if (LoopNestDepth < 2) {
4220b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
4230b57cec5SDimitry Andric return false;
4240b57cec5SDimitry Andric }
4250b57cec5SDimitry Andric if (LoopNestDepth > MaxLoopNestDepth) {
4260b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Cannot handle loops of depth greater than "
4270b57cec5SDimitry Andric << MaxLoopNestDepth << "\n");
4280b57cec5SDimitry Andric return false;
4290b57cec5SDimitry Andric }
4300b57cec5SDimitry Andric if (!isComputableLoopNest(LoopList)) {
4310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
4320b57cec5SDimitry Andric return false;
4330b57cec5SDimitry Andric }
4340b57cec5SDimitry Andric
4350b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth
4360b57cec5SDimitry Andric << "\n");
4370b57cec5SDimitry Andric
4380b57cec5SDimitry Andric CharMatrix DependencyMatrix;
4390b57cec5SDimitry Andric Loop *OuterMostLoop = *(LoopList.begin());
4400b57cec5SDimitry Andric if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
441bdd1243dSDimitry Andric OuterMostLoop, DI, SE)) {
4420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
4430b57cec5SDimitry Andric return false;
4440b57cec5SDimitry Andric }
4450b57cec5SDimitry Andric #ifdef DUMP_DEP_MATRICIES
4460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
4470b57cec5SDimitry Andric printDepMatrix(DependencyMatrix);
4480b57cec5SDimitry Andric #endif
4490b57cec5SDimitry Andric
4500b57cec5SDimitry Andric // Get the Outermost loop exit.
4510b57cec5SDimitry Andric BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
4520b57cec5SDimitry Andric if (!LoopNestExit) {
4530b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "OuterMostLoop needs an unique exit block");
4540b57cec5SDimitry Andric return false;
4550b57cec5SDimitry Andric }
4560b57cec5SDimitry Andric
4570b57cec5SDimitry Andric unsigned SelecLoopId = selectLoopForInterchange(LoopList);
45881ad6265SDimitry Andric // Obtain the loop vector returned from loop cache analysis beforehand,
45981ad6265SDimitry Andric // and put each <Loop, index> pair into a map for constant time query
46081ad6265SDimitry Andric // later. Indices in loop vector reprsent the optimal order of the
46181ad6265SDimitry Andric // corresponding loop, e.g., given a loopnest with depth N, index 0
46281ad6265SDimitry Andric // indicates the loop should be placed as the outermost loop and index N
46381ad6265SDimitry Andric // indicates the loop should be placed as the innermost loop.
46481ad6265SDimitry Andric //
46581ad6265SDimitry Andric // For the old pass manager CacheCost would be null.
46681ad6265SDimitry Andric DenseMap<const Loop *, unsigned> CostMap;
46781ad6265SDimitry Andric if (CC != nullptr) {
46881ad6265SDimitry Andric const auto &LoopCosts = CC->getLoopCosts();
46981ad6265SDimitry Andric for (unsigned i = 0; i < LoopCosts.size(); i++) {
47081ad6265SDimitry Andric CostMap[LoopCosts[i].first] = i;
47181ad6265SDimitry Andric }
47281ad6265SDimitry Andric }
47381ad6265SDimitry Andric // We try to achieve the globally optimal memory access for the loopnest,
47481ad6265SDimitry Andric // and do interchange based on a bubble-sort fasion. We start from
47581ad6265SDimitry Andric // the innermost loop, move it outwards to the best possible position
47681ad6265SDimitry Andric // and repeat this process.
47781ad6265SDimitry Andric for (unsigned j = SelecLoopId; j > 0; j--) {
47881ad6265SDimitry Andric bool ChangedPerIter = false;
47981ad6265SDimitry Andric for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
48081ad6265SDimitry Andric bool Interchanged = processLoop(LoopList[i], LoopList[i - 1], i, i - 1,
48181ad6265SDimitry Andric DependencyMatrix, CostMap);
4820b57cec5SDimitry Andric if (!Interchanged)
48381ad6265SDimitry Andric continue;
48481ad6265SDimitry Andric // Loops interchanged, update LoopList accordingly.
48581ad6265SDimitry Andric std::swap(LoopList[i - 1], LoopList[i]);
4860b57cec5SDimitry Andric // Update the DependencyMatrix
4870b57cec5SDimitry Andric interChangeDependencies(DependencyMatrix, i, i - 1);
4880b57cec5SDimitry Andric #ifdef DUMP_DEP_MATRICIES
4890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
4900b57cec5SDimitry Andric printDepMatrix(DependencyMatrix);
4910b57cec5SDimitry Andric #endif
49281ad6265SDimitry Andric ChangedPerIter |= Interchanged;
4930b57cec5SDimitry Andric Changed |= Interchanged;
4940b57cec5SDimitry Andric }
49581ad6265SDimitry Andric // Early abort if there was no interchange during an entire round of
49681ad6265SDimitry Andric // moving loops outwards.
49781ad6265SDimitry Andric if (!ChangedPerIter)
49881ad6265SDimitry Andric break;
49981ad6265SDimitry Andric }
5000b57cec5SDimitry Andric return Changed;
5010b57cec5SDimitry Andric }
5020b57cec5SDimitry Andric
processLoop__anon815ea8750211::LoopInterchange503fe6060f1SDimitry Andric bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId,
504fe6060f1SDimitry Andric unsigned OuterLoopId,
50581ad6265SDimitry Andric std::vector<std::vector<char>> &DependencyMatrix,
50681ad6265SDimitry Andric const DenseMap<const Loop *, unsigned> &CostMap) {
5070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId
5080b57cec5SDimitry Andric << " and OuterLoopId = " << OuterLoopId << "\n");
5090b57cec5SDimitry Andric LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE);
5100b57cec5SDimitry Andric if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
5110b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n");
5120b57cec5SDimitry Andric return false;
5130b57cec5SDimitry Andric }
5140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
5150b57cec5SDimitry Andric LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
51681ad6265SDimitry Andric if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
517bdd1243dSDimitry Andric DependencyMatrix, CostMap, CC)) {
5180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
5190b57cec5SDimitry Andric return false;
5200b57cec5SDimitry Andric }
5210b57cec5SDimitry Andric
5220b57cec5SDimitry Andric ORE->emit([&]() {
5230b57cec5SDimitry Andric return OptimizationRemark(DEBUG_TYPE, "Interchanged",
5240b57cec5SDimitry Andric InnerLoop->getStartLoc(),
5250b57cec5SDimitry Andric InnerLoop->getHeader())
5260b57cec5SDimitry Andric << "Loop interchanged with enclosing loop.";
5270b57cec5SDimitry Andric });
5280b57cec5SDimitry Andric
529fe6060f1SDimitry Andric LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL);
5300b57cec5SDimitry Andric LIT.transform();
5310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loops interchanged.\n");
5320b57cec5SDimitry Andric LoopsInterchanged++;
5335ffd83dbSDimitry Andric
534bdd1243dSDimitry Andric llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE);
5350b57cec5SDimitry Andric return true;
5360b57cec5SDimitry Andric }
5370b57cec5SDimitry Andric };
5380b57cec5SDimitry Andric
5390b57cec5SDimitry Andric } // end anonymous namespace
5400b57cec5SDimitry Andric
containsUnsafeInstructions(BasicBlock * BB)5410b57cec5SDimitry Andric bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) {
5420b57cec5SDimitry Andric return any_of(*BB, [](const Instruction &I) {
5430b57cec5SDimitry Andric return I.mayHaveSideEffects() || I.mayReadFromMemory();
5440b57cec5SDimitry Andric });
5450b57cec5SDimitry Andric }
5460b57cec5SDimitry Andric
tightlyNested(Loop * OuterLoop,Loop * InnerLoop)5470b57cec5SDimitry Andric bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
5480b57cec5SDimitry Andric BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
5490b57cec5SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
5500b57cec5SDimitry Andric BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
5510b57cec5SDimitry Andric
5520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Checking if loops are tightly nested\n");
5530b57cec5SDimitry Andric
5540b57cec5SDimitry Andric // A perfectly nested loop will not have any branch in between the outer and
5550b57cec5SDimitry Andric // inner block i.e. outer header will branch to either inner preheader and
5560b57cec5SDimitry Andric // outerloop latch.
5570b57cec5SDimitry Andric BranchInst *OuterLoopHeaderBI =
5580b57cec5SDimitry Andric dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
5590b57cec5SDimitry Andric if (!OuterLoopHeaderBI)
5600b57cec5SDimitry Andric return false;
5610b57cec5SDimitry Andric
5620b57cec5SDimitry Andric for (BasicBlock *Succ : successors(OuterLoopHeaderBI))
5630b57cec5SDimitry Andric if (Succ != InnerLoopPreHeader && Succ != InnerLoop->getHeader() &&
5640b57cec5SDimitry Andric Succ != OuterLoopLatch)
5650b57cec5SDimitry Andric return false;
5660b57cec5SDimitry Andric
5670b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
5680b57cec5SDimitry Andric // We do not have any basic block in between now make sure the outer header
5690b57cec5SDimitry Andric // and outer loop latch doesn't contain any unsafe instructions.
5700b57cec5SDimitry Andric if (containsUnsafeInstructions(OuterLoopHeader) ||
5710b57cec5SDimitry Andric containsUnsafeInstructions(OuterLoopLatch))
5720b57cec5SDimitry Andric return false;
5730b57cec5SDimitry Andric
574e8d8bef9SDimitry Andric // Also make sure the inner loop preheader does not contain any unsafe
575e8d8bef9SDimitry Andric // instructions. Note that all instructions in the preheader will be moved to
576e8d8bef9SDimitry Andric // the outer loop header when interchanging.
577e8d8bef9SDimitry Andric if (InnerLoopPreHeader != OuterLoopHeader &&
578e8d8bef9SDimitry Andric containsUnsafeInstructions(InnerLoopPreHeader))
579e8d8bef9SDimitry Andric return false;
580e8d8bef9SDimitry Andric
581fe6060f1SDimitry Andric BasicBlock *InnerLoopExit = InnerLoop->getExitBlock();
582fe6060f1SDimitry Andric // Ensure the inner loop exit block flows to the outer loop latch possibly
583fe6060f1SDimitry Andric // through empty blocks.
584fe6060f1SDimitry Andric const BasicBlock &SuccInner =
585fe6060f1SDimitry Andric LoopNest::skipEmptyBlockUntil(InnerLoopExit, OuterLoopLatch);
586fe6060f1SDimitry Andric if (&SuccInner != OuterLoopLatch) {
587fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Inner loop exit block " << *InnerLoopExit
588fe6060f1SDimitry Andric << " does not lead to the outer loop latch.\n";);
589fe6060f1SDimitry Andric return false;
590fe6060f1SDimitry Andric }
591fe6060f1SDimitry Andric // The inner loop exit block does flow to the outer loop latch and not some
592fe6060f1SDimitry Andric // other BBs, now make sure it contains safe instructions, since it will be
593fe6060f1SDimitry Andric // moved into the (new) inner loop after interchange.
594fe6060f1SDimitry Andric if (containsUnsafeInstructions(InnerLoopExit))
595fe6060f1SDimitry Andric return false;
596fe6060f1SDimitry Andric
5970b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n");
5980b57cec5SDimitry Andric // We have a perfect loop nest.
5990b57cec5SDimitry Andric return true;
6000b57cec5SDimitry Andric }
6010b57cec5SDimitry Andric
isLoopStructureUnderstood()60204eeddc0SDimitry Andric bool LoopInterchangeLegality::isLoopStructureUnderstood() {
6030b57cec5SDimitry Andric BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader();
60404eeddc0SDimitry Andric for (PHINode *InnerInduction : InnerLoopInductions) {
60504eeddc0SDimitry Andric unsigned Num = InnerInduction->getNumOperands();
6060b57cec5SDimitry Andric for (unsigned i = 0; i < Num; ++i) {
6070b57cec5SDimitry Andric Value *Val = InnerInduction->getOperand(i);
6080b57cec5SDimitry Andric if (isa<Constant>(Val))
6090b57cec5SDimitry Andric continue;
6100b57cec5SDimitry Andric Instruction *I = dyn_cast<Instruction>(Val);
6110b57cec5SDimitry Andric if (!I)
6120b57cec5SDimitry Andric return false;
6130b57cec5SDimitry Andric // TODO: Handle triangular loops.
6140b57cec5SDimitry Andric // e.g. for(int i=0;i<N;i++)
6150b57cec5SDimitry Andric // for(int j=i;j<N;j++)
6160b57cec5SDimitry Andric unsigned IncomBlockIndx = PHINode::getIncomingValueNumForOperand(i);
6170b57cec5SDimitry Andric if (InnerInduction->getIncomingBlock(IncomBlockIndx) ==
6180b57cec5SDimitry Andric InnerLoopPreheader &&
6190b57cec5SDimitry Andric !OuterLoop->isLoopInvariant(I)) {
6200b57cec5SDimitry Andric return false;
6210b57cec5SDimitry Andric }
6220b57cec5SDimitry Andric }
62304eeddc0SDimitry Andric }
624fe6060f1SDimitry Andric
625fe6060f1SDimitry Andric // TODO: Handle triangular loops of another form.
626fe6060f1SDimitry Andric // e.g. for(int i=0;i<N;i++)
627fe6060f1SDimitry Andric // for(int j=0;j<i;j++)
628fe6060f1SDimitry Andric // or,
629fe6060f1SDimitry Andric // for(int i=0;i<N;i++)
630fe6060f1SDimitry Andric // for(int j=0;j*i<N;j++)
631fe6060f1SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
632fe6060f1SDimitry Andric BranchInst *InnerLoopLatchBI =
633fe6060f1SDimitry Andric dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
634fe6060f1SDimitry Andric if (!InnerLoopLatchBI->isConditional())
635fe6060f1SDimitry Andric return false;
636fe6060f1SDimitry Andric if (CmpInst *InnerLoopCmp =
637fe6060f1SDimitry Andric dyn_cast<CmpInst>(InnerLoopLatchBI->getCondition())) {
638fe6060f1SDimitry Andric Value *Op0 = InnerLoopCmp->getOperand(0);
639fe6060f1SDimitry Andric Value *Op1 = InnerLoopCmp->getOperand(1);
640fe6060f1SDimitry Andric
641fe6060f1SDimitry Andric // LHS and RHS of the inner loop exit condition, e.g.,
642fe6060f1SDimitry Andric // in "for(int j=0;j<i;j++)", LHS is j and RHS is i.
643fe6060f1SDimitry Andric Value *Left = nullptr;
644fe6060f1SDimitry Andric Value *Right = nullptr;
645fe6060f1SDimitry Andric
646fe6060f1SDimitry Andric // Check if V only involves inner loop induction variable.
647fe6060f1SDimitry Andric // Return true if V is InnerInduction, or a cast from
648fe6060f1SDimitry Andric // InnerInduction, or a binary operator that involves
649fe6060f1SDimitry Andric // InnerInduction and a constant.
65004eeddc0SDimitry Andric std::function<bool(Value *)> IsPathToInnerIndVar;
65104eeddc0SDimitry Andric IsPathToInnerIndVar = [this, &IsPathToInnerIndVar](const Value *V) -> bool {
65204eeddc0SDimitry Andric if (llvm::is_contained(InnerLoopInductions, V))
653fe6060f1SDimitry Andric return true;
654fe6060f1SDimitry Andric if (isa<Constant>(V))
655fe6060f1SDimitry Andric return true;
65604eeddc0SDimitry Andric const Instruction *I = dyn_cast<Instruction>(V);
657fe6060f1SDimitry Andric if (!I)
658fe6060f1SDimitry Andric return false;
659fe6060f1SDimitry Andric if (isa<CastInst>(I))
66004eeddc0SDimitry Andric return IsPathToInnerIndVar(I->getOperand(0));
661fe6060f1SDimitry Andric if (isa<BinaryOperator>(I))
66204eeddc0SDimitry Andric return IsPathToInnerIndVar(I->getOperand(0)) &&
66304eeddc0SDimitry Andric IsPathToInnerIndVar(I->getOperand(1));
664fe6060f1SDimitry Andric return false;
665fe6060f1SDimitry Andric };
666fe6060f1SDimitry Andric
66704eeddc0SDimitry Andric // In case of multiple inner loop indvars, it is okay if LHS and RHS
66804eeddc0SDimitry Andric // are both inner indvar related variables.
66904eeddc0SDimitry Andric if (IsPathToInnerIndVar(Op0) && IsPathToInnerIndVar(Op1))
67004eeddc0SDimitry Andric return true;
67104eeddc0SDimitry Andric
67204eeddc0SDimitry Andric // Otherwise we check if the cmp instruction compares an inner indvar
67304eeddc0SDimitry Andric // related variable (Left) with a outer loop invariant (Right).
67404eeddc0SDimitry Andric if (IsPathToInnerIndVar(Op0) && !isa<Constant>(Op0)) {
675fe6060f1SDimitry Andric Left = Op0;
676fe6060f1SDimitry Andric Right = Op1;
67704eeddc0SDimitry Andric } else if (IsPathToInnerIndVar(Op1) && !isa<Constant>(Op1)) {
678fe6060f1SDimitry Andric Left = Op1;
679fe6060f1SDimitry Andric Right = Op0;
680fe6060f1SDimitry Andric }
681fe6060f1SDimitry Andric
682fe6060f1SDimitry Andric if (Left == nullptr)
683fe6060f1SDimitry Andric return false;
684fe6060f1SDimitry Andric
685fe6060f1SDimitry Andric const SCEV *S = SE->getSCEV(Right);
686fe6060f1SDimitry Andric if (!SE->isLoopInvariant(S, OuterLoop))
687fe6060f1SDimitry Andric return false;
688fe6060f1SDimitry Andric }
689fe6060f1SDimitry Andric
6900b57cec5SDimitry Andric return true;
6910b57cec5SDimitry Andric }
6920b57cec5SDimitry Andric
6930b57cec5SDimitry Andric // If SV is a LCSSA PHI node with a single incoming value, return the incoming
6940b57cec5SDimitry Andric // value.
followLCSSA(Value * SV)6950b57cec5SDimitry Andric static Value *followLCSSA(Value *SV) {
6960b57cec5SDimitry Andric PHINode *PHI = dyn_cast<PHINode>(SV);
6970b57cec5SDimitry Andric if (!PHI)
6980b57cec5SDimitry Andric return SV;
6990b57cec5SDimitry Andric
7000b57cec5SDimitry Andric if (PHI->getNumIncomingValues() != 1)
7010b57cec5SDimitry Andric return SV;
7020b57cec5SDimitry Andric return followLCSSA(PHI->getIncomingValue(0));
7030b57cec5SDimitry Andric }
7040b57cec5SDimitry Andric
7050b57cec5SDimitry Andric // Check V's users to see if it is involved in a reduction in L.
findInnerReductionPhi(Loop * L,Value * V)7060b57cec5SDimitry Andric static PHINode *findInnerReductionPhi(Loop *L, Value *V) {
707e8d8bef9SDimitry Andric // Reduction variables cannot be constants.
708e8d8bef9SDimitry Andric if (isa<Constant>(V))
709e8d8bef9SDimitry Andric return nullptr;
710e8d8bef9SDimitry Andric
7110b57cec5SDimitry Andric for (Value *User : V->users()) {
7120b57cec5SDimitry Andric if (PHINode *PHI = dyn_cast<PHINode>(User)) {
7130b57cec5SDimitry Andric if (PHI->getNumIncomingValues() == 1)
7140b57cec5SDimitry Andric continue;
7150b57cec5SDimitry Andric RecurrenceDescriptor RD;
71681ad6265SDimitry Andric if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD)) {
71781ad6265SDimitry Andric // Detect floating point reduction only when it can be reordered.
71881ad6265SDimitry Andric if (RD.getExactFPMathInst() != nullptr)
71981ad6265SDimitry Andric return nullptr;
7200b57cec5SDimitry Andric return PHI;
72181ad6265SDimitry Andric }
7220b57cec5SDimitry Andric return nullptr;
7230b57cec5SDimitry Andric }
7240b57cec5SDimitry Andric }
7250b57cec5SDimitry Andric
7260b57cec5SDimitry Andric return nullptr;
7270b57cec5SDimitry Andric }
7280b57cec5SDimitry Andric
findInductionAndReductions(Loop * L,SmallVector<PHINode *,8> & Inductions,Loop * InnerLoop)7290b57cec5SDimitry Andric bool LoopInterchangeLegality::findInductionAndReductions(
7300b57cec5SDimitry Andric Loop *L, SmallVector<PHINode *, 8> &Inductions, Loop *InnerLoop) {
7310b57cec5SDimitry Andric if (!L->getLoopLatch() || !L->getLoopPredecessor())
7320b57cec5SDimitry Andric return false;
7330b57cec5SDimitry Andric for (PHINode &PHI : L->getHeader()->phis()) {
7340b57cec5SDimitry Andric InductionDescriptor ID;
7350b57cec5SDimitry Andric if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
7360b57cec5SDimitry Andric Inductions.push_back(&PHI);
7370b57cec5SDimitry Andric else {
7380b57cec5SDimitry Andric // PHIs in inner loops need to be part of a reduction in the outer loop,
7390b57cec5SDimitry Andric // discovered when checking the PHIs of the outer loop earlier.
7400b57cec5SDimitry Andric if (!InnerLoop) {
7415ffd83dbSDimitry Andric if (!OuterInnerReductions.count(&PHI)) {
7420b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Inner loop PHI is not part of reductions "
7430b57cec5SDimitry Andric "across the outer loop.\n");
7440b57cec5SDimitry Andric return false;
7450b57cec5SDimitry Andric }
7460b57cec5SDimitry Andric } else {
7470b57cec5SDimitry Andric assert(PHI.getNumIncomingValues() == 2 &&
7480b57cec5SDimitry Andric "Phis in loop header should have exactly 2 incoming values");
7490b57cec5SDimitry Andric // Check if we have a PHI node in the outer loop that has a reduction
7500b57cec5SDimitry Andric // result from the inner loop as an incoming value.
7510b57cec5SDimitry Andric Value *V = followLCSSA(PHI.getIncomingValueForBlock(L->getLoopLatch()));
7520b57cec5SDimitry Andric PHINode *InnerRedPhi = findInnerReductionPhi(InnerLoop, V);
7530b57cec5SDimitry Andric if (!InnerRedPhi ||
754e8d8bef9SDimitry Andric !llvm::is_contained(InnerRedPhi->incoming_values(), &PHI)) {
7550b57cec5SDimitry Andric LLVM_DEBUG(
7560b57cec5SDimitry Andric dbgs()
7570b57cec5SDimitry Andric << "Failed to recognize PHI as an induction or reduction.\n");
7580b57cec5SDimitry Andric return false;
7590b57cec5SDimitry Andric }
7600b57cec5SDimitry Andric OuterInnerReductions.insert(&PHI);
7610b57cec5SDimitry Andric OuterInnerReductions.insert(InnerRedPhi);
7620b57cec5SDimitry Andric }
7630b57cec5SDimitry Andric }
7640b57cec5SDimitry Andric }
7650b57cec5SDimitry Andric return true;
7660b57cec5SDimitry Andric }
7670b57cec5SDimitry Andric
7680b57cec5SDimitry Andric // This function indicates the current limitations in the transform as a result
7690b57cec5SDimitry Andric // of which we do not proceed.
currentLimitations()7700b57cec5SDimitry Andric bool LoopInterchangeLegality::currentLimitations() {
7710b57cec5SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
7720b57cec5SDimitry Andric
7730b57cec5SDimitry Andric // transform currently expects the loop latches to also be the exiting
7740b57cec5SDimitry Andric // blocks.
7750b57cec5SDimitry Andric if (InnerLoop->getExitingBlock() != InnerLoopLatch ||
7760b57cec5SDimitry Andric OuterLoop->getExitingBlock() != OuterLoop->getLoopLatch() ||
7770b57cec5SDimitry Andric !isa<BranchInst>(InnerLoopLatch->getTerminator()) ||
7780b57cec5SDimitry Andric !isa<BranchInst>(OuterLoop->getLoopLatch()->getTerminator())) {
7790b57cec5SDimitry Andric LLVM_DEBUG(
7800b57cec5SDimitry Andric dbgs() << "Loops where the latch is not the exiting block are not"
7810b57cec5SDimitry Andric << " supported currently.\n");
7820b57cec5SDimitry Andric ORE->emit([&]() {
7830b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "ExitingNotLatch",
7840b57cec5SDimitry Andric OuterLoop->getStartLoc(),
7850b57cec5SDimitry Andric OuterLoop->getHeader())
7860b57cec5SDimitry Andric << "Loops where the latch is not the exiting block cannot be"
7870b57cec5SDimitry Andric " interchange currently.";
7880b57cec5SDimitry Andric });
7890b57cec5SDimitry Andric return true;
7900b57cec5SDimitry Andric }
7910b57cec5SDimitry Andric
7920b57cec5SDimitry Andric SmallVector<PHINode *, 8> Inductions;
7930b57cec5SDimitry Andric if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) {
7940b57cec5SDimitry Andric LLVM_DEBUG(
7950b57cec5SDimitry Andric dbgs() << "Only outer loops with induction or reduction PHI nodes "
7960b57cec5SDimitry Andric << "are supported currently.\n");
7970b57cec5SDimitry Andric ORE->emit([&]() {
7980b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIOuter",
7990b57cec5SDimitry Andric OuterLoop->getStartLoc(),
8000b57cec5SDimitry Andric OuterLoop->getHeader())
8010b57cec5SDimitry Andric << "Only outer loops with induction or reduction PHI nodes can be"
8020b57cec5SDimitry Andric " interchanged currently.";
8030b57cec5SDimitry Andric });
8040b57cec5SDimitry Andric return true;
8050b57cec5SDimitry Andric }
8060b57cec5SDimitry Andric
8070b57cec5SDimitry Andric Inductions.clear();
808bdd1243dSDimitry Andric // For multi-level loop nests, make sure that all phi nodes for inner loops
809bdd1243dSDimitry Andric // at all levels can be recognized as a induction or reduction phi. Bail out
810bdd1243dSDimitry Andric // if a phi node at a certain nesting level cannot be properly recognized.
811bdd1243dSDimitry Andric Loop *CurLevelLoop = OuterLoop;
812bdd1243dSDimitry Andric while (!CurLevelLoop->getSubLoops().empty()) {
813bdd1243dSDimitry Andric // We already made sure that the loop nest is tightly nested.
814bdd1243dSDimitry Andric CurLevelLoop = CurLevelLoop->getSubLoops().front();
815bdd1243dSDimitry Andric if (!findInductionAndReductions(CurLevelLoop, Inductions, nullptr)) {
8160b57cec5SDimitry Andric LLVM_DEBUG(
8170b57cec5SDimitry Andric dbgs() << "Only inner loops with induction or reduction PHI nodes "
8180b57cec5SDimitry Andric << "are supported currently.\n");
8190b57cec5SDimitry Andric ORE->emit([&]() {
8200b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedPHIInner",
821bdd1243dSDimitry Andric CurLevelLoop->getStartLoc(),
822bdd1243dSDimitry Andric CurLevelLoop->getHeader())
8230b57cec5SDimitry Andric << "Only inner loops with induction or reduction PHI nodes can be"
8240b57cec5SDimitry Andric " interchange currently.";
8250b57cec5SDimitry Andric });
8260b57cec5SDimitry Andric return true;
8270b57cec5SDimitry Andric }
828bdd1243dSDimitry Andric }
8290b57cec5SDimitry Andric
8300b57cec5SDimitry Andric // TODO: Triangular loops are not handled for now.
83104eeddc0SDimitry Andric if (!isLoopStructureUnderstood()) {
8320b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n");
8330b57cec5SDimitry Andric ORE->emit([&]() {
8340b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner",
8350b57cec5SDimitry Andric InnerLoop->getStartLoc(),
8360b57cec5SDimitry Andric InnerLoop->getHeader())
8370b57cec5SDimitry Andric << "Inner loop structure not understood currently.";
8380b57cec5SDimitry Andric });
8390b57cec5SDimitry Andric return true;
8400b57cec5SDimitry Andric }
8410b57cec5SDimitry Andric
8420b57cec5SDimitry Andric return false;
8430b57cec5SDimitry Andric }
8440b57cec5SDimitry Andric
findInductions(Loop * L,SmallVectorImpl<PHINode * > & Inductions)84504eeddc0SDimitry Andric bool LoopInterchangeLegality::findInductions(
84604eeddc0SDimitry Andric Loop *L, SmallVectorImpl<PHINode *> &Inductions) {
84704eeddc0SDimitry Andric for (PHINode &PHI : L->getHeader()->phis()) {
84804eeddc0SDimitry Andric InductionDescriptor ID;
84904eeddc0SDimitry Andric if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
85004eeddc0SDimitry Andric Inductions.push_back(&PHI);
85104eeddc0SDimitry Andric }
85204eeddc0SDimitry Andric return !Inductions.empty();
85304eeddc0SDimitry Andric }
85404eeddc0SDimitry Andric
855480093f4SDimitry Andric // We currently only support LCSSA PHI nodes in the inner loop exit, if their
856480093f4SDimitry Andric // users are either reduction PHIs or PHIs outside the outer loop (which means
857480093f4SDimitry Andric // the we are only interested in the final value after the loop).
858480093f4SDimitry Andric static bool
areInnerLoopExitPHIsSupported(Loop * InnerL,Loop * OuterL,SmallPtrSetImpl<PHINode * > & Reductions)859480093f4SDimitry Andric areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
860480093f4SDimitry Andric SmallPtrSetImpl<PHINode *> &Reductions) {
861480093f4SDimitry Andric BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
862480093f4SDimitry Andric for (PHINode &PHI : InnerExit->phis()) {
863480093f4SDimitry Andric // Reduction lcssa phi will have only 1 incoming block that from loop latch.
864480093f4SDimitry Andric if (PHI.getNumIncomingValues() > 1)
865480093f4SDimitry Andric return false;
866480093f4SDimitry Andric if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
867480093f4SDimitry Andric PHINode *PN = dyn_cast<PHINode>(U);
8685ffd83dbSDimitry Andric return !PN ||
8695ffd83dbSDimitry Andric (!Reductions.count(PN) && OuterL->contains(PN->getParent()));
870480093f4SDimitry Andric })) {
871480093f4SDimitry Andric return false;
872480093f4SDimitry Andric }
873480093f4SDimitry Andric }
874480093f4SDimitry Andric return true;
875480093f4SDimitry Andric }
876480093f4SDimitry Andric
8770b57cec5SDimitry Andric // We currently support LCSSA PHI nodes in the outer loop exit, if their
8780b57cec5SDimitry Andric // incoming values do not come from the outer loop latch or if the
8790b57cec5SDimitry Andric // outer loop latch has a single predecessor. In that case, the value will
8800b57cec5SDimitry Andric // be available if both the inner and outer loop conditions are true, which
8810b57cec5SDimitry Andric // will still be true after interchanging. If we have multiple predecessor,
8820b57cec5SDimitry Andric // that may not be the case, e.g. because the outer loop latch may be executed
8830b57cec5SDimitry Andric // if the inner loop is not executed.
areOuterLoopExitPHIsSupported(Loop * OuterLoop,Loop * InnerLoop)884480093f4SDimitry Andric static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
8850b57cec5SDimitry Andric BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
8860b57cec5SDimitry Andric for (PHINode &PHI : LoopNestExit->phis()) {
8870b57cec5SDimitry Andric for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
8880b57cec5SDimitry Andric Instruction *IncomingI = dyn_cast<Instruction>(PHI.getIncomingValue(i));
8890b57cec5SDimitry Andric if (!IncomingI || IncomingI->getParent() != OuterLoop->getLoopLatch())
8900b57cec5SDimitry Andric continue;
8910b57cec5SDimitry Andric
8920b57cec5SDimitry Andric // The incoming value is defined in the outer loop latch. Currently we
8930b57cec5SDimitry Andric // only support that in case the outer loop latch has a single predecessor.
8940b57cec5SDimitry Andric // This guarantees that the outer loop latch is executed if and only if
8950b57cec5SDimitry Andric // the inner loop is executed (because tightlyNested() guarantees that the
8960b57cec5SDimitry Andric // outer loop header only branches to the inner loop or the outer loop
8970b57cec5SDimitry Andric // latch).
8980b57cec5SDimitry Andric // FIXME: We could weaken this logic and allow multiple predecessors,
8990b57cec5SDimitry Andric // if the values are produced outside the loop latch. We would need
9000b57cec5SDimitry Andric // additional logic to update the PHI nodes in the exit block as
9010b57cec5SDimitry Andric // well.
9020b57cec5SDimitry Andric if (OuterLoop->getLoopLatch()->getUniquePredecessor() == nullptr)
9030b57cec5SDimitry Andric return false;
9040b57cec5SDimitry Andric }
9050b57cec5SDimitry Andric }
9060b57cec5SDimitry Andric return true;
9070b57cec5SDimitry Andric }
9080b57cec5SDimitry Andric
909fe6060f1SDimitry Andric // In case of multi-level nested loops, it may occur that lcssa phis exist in
910fe6060f1SDimitry Andric // the latch of InnerLoop, i.e., when defs of the incoming values are further
911fe6060f1SDimitry Andric // inside the loopnest. Sometimes those incoming values are not available
912fe6060f1SDimitry Andric // after interchange, since the original inner latch will become the new outer
913fe6060f1SDimitry Andric // latch which may have predecessor paths that do not include those incoming
914fe6060f1SDimitry Andric // values.
915fe6060f1SDimitry Andric // TODO: Handle transformation of lcssa phis in the InnerLoop latch in case of
916fe6060f1SDimitry Andric // multi-level loop nests.
areInnerLoopLatchPHIsSupported(Loop * OuterLoop,Loop * InnerLoop)917fe6060f1SDimitry Andric static bool areInnerLoopLatchPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
918fe6060f1SDimitry Andric if (InnerLoop->getSubLoops().empty())
919fe6060f1SDimitry Andric return true;
920fe6060f1SDimitry Andric // If the original outer latch has only one predecessor, then values defined
921fe6060f1SDimitry Andric // further inside the looploop, e.g., in the innermost loop, will be available
922fe6060f1SDimitry Andric // at the new outer latch after interchange.
923fe6060f1SDimitry Andric if (OuterLoop->getLoopLatch()->getUniquePredecessor() != nullptr)
924fe6060f1SDimitry Andric return true;
925fe6060f1SDimitry Andric
926fe6060f1SDimitry Andric // The outer latch has more than one predecessors, i.e., the inner
927fe6060f1SDimitry Andric // exit and the inner header.
928fe6060f1SDimitry Andric // PHI nodes in the inner latch are lcssa phis where the incoming values
929fe6060f1SDimitry Andric // are defined further inside the loopnest. Check if those phis are used
930fe6060f1SDimitry Andric // in the original inner latch. If that is the case then bail out since
931fe6060f1SDimitry Andric // those incoming values may not be available at the new outer latch.
932fe6060f1SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
933fe6060f1SDimitry Andric for (PHINode &PHI : InnerLoopLatch->phis()) {
934fe6060f1SDimitry Andric for (auto *U : PHI.users()) {
935fe6060f1SDimitry Andric Instruction *UI = cast<Instruction>(U);
936fe6060f1SDimitry Andric if (InnerLoopLatch == UI->getParent())
937fe6060f1SDimitry Andric return false;
938fe6060f1SDimitry Andric }
939fe6060f1SDimitry Andric }
940fe6060f1SDimitry Andric return true;
941fe6060f1SDimitry Andric }
942fe6060f1SDimitry Andric
canInterchangeLoops(unsigned InnerLoopId,unsigned OuterLoopId,CharMatrix & DepMatrix)9430b57cec5SDimitry Andric bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
9440b57cec5SDimitry Andric unsigned OuterLoopId,
9450b57cec5SDimitry Andric CharMatrix &DepMatrix) {
9460b57cec5SDimitry Andric if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
9470b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
9480b57cec5SDimitry Andric << " and OuterLoopId = " << OuterLoopId
9490b57cec5SDimitry Andric << " due to dependence\n");
9500b57cec5SDimitry Andric ORE->emit([&]() {
9510b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "Dependence",
9520b57cec5SDimitry Andric InnerLoop->getStartLoc(),
9530b57cec5SDimitry Andric InnerLoop->getHeader())
9540b57cec5SDimitry Andric << "Cannot interchange loops due to dependences.";
9550b57cec5SDimitry Andric });
9560b57cec5SDimitry Andric return false;
9570b57cec5SDimitry Andric }
9580b57cec5SDimitry Andric // Check if outer and inner loop contain legal instructions only.
9590b57cec5SDimitry Andric for (auto *BB : OuterLoop->blocks())
9600b57cec5SDimitry Andric for (Instruction &I : BB->instructionsWithoutDebug())
9610b57cec5SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&I)) {
9620b57cec5SDimitry Andric // readnone functions do not prevent interchanging.
96304eeddc0SDimitry Andric if (CI->onlyWritesMemory())
9640b57cec5SDimitry Andric continue;
9650b57cec5SDimitry Andric LLVM_DEBUG(
9660b57cec5SDimitry Andric dbgs() << "Loops with call instructions cannot be interchanged "
9670b57cec5SDimitry Andric << "safely.");
9680b57cec5SDimitry Andric ORE->emit([&]() {
9690b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "CallInst",
9700b57cec5SDimitry Andric CI->getDebugLoc(),
9710b57cec5SDimitry Andric CI->getParent())
9720b57cec5SDimitry Andric << "Cannot interchange loops due to call instruction.";
9730b57cec5SDimitry Andric });
9740b57cec5SDimitry Andric
9750b57cec5SDimitry Andric return false;
9760b57cec5SDimitry Andric }
9770b57cec5SDimitry Andric
97804eeddc0SDimitry Andric if (!findInductions(InnerLoop, InnerLoopInductions)) {
979*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Could not find inner loop induction variables.\n");
98004eeddc0SDimitry Andric return false;
98104eeddc0SDimitry Andric }
98204eeddc0SDimitry Andric
983fe6060f1SDimitry Andric if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) {
984fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n");
985fe6060f1SDimitry Andric ORE->emit([&]() {
986fe6060f1SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedInnerLatchPHI",
987fe6060f1SDimitry Andric InnerLoop->getStartLoc(),
988fe6060f1SDimitry Andric InnerLoop->getHeader())
989fe6060f1SDimitry Andric << "Cannot interchange loops because unsupported PHI nodes found "
990fe6060f1SDimitry Andric "in inner loop latch.";
991fe6060f1SDimitry Andric });
992fe6060f1SDimitry Andric return false;
993fe6060f1SDimitry Andric }
994fe6060f1SDimitry Andric
9950b57cec5SDimitry Andric // TODO: The loops could not be interchanged due to current limitations in the
9960b57cec5SDimitry Andric // transform module.
9970b57cec5SDimitry Andric if (currentLimitations()) {
9980b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Not legal because of current transform limitation\n");
9990b57cec5SDimitry Andric return false;
10000b57cec5SDimitry Andric }
10010b57cec5SDimitry Andric
10020b57cec5SDimitry Andric // Check if the loops are tightly nested.
10030b57cec5SDimitry Andric if (!tightlyNested(OuterLoop, InnerLoop)) {
10040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Loops not tightly nested\n");
10050b57cec5SDimitry Andric ORE->emit([&]() {
10060b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "NotTightlyNested",
10070b57cec5SDimitry Andric InnerLoop->getStartLoc(),
10080b57cec5SDimitry Andric InnerLoop->getHeader())
10090b57cec5SDimitry Andric << "Cannot interchange loops because they are not tightly "
10100b57cec5SDimitry Andric "nested.";
10110b57cec5SDimitry Andric });
10120b57cec5SDimitry Andric return false;
10130b57cec5SDimitry Andric }
10140b57cec5SDimitry Andric
1015480093f4SDimitry Andric if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
1016480093f4SDimitry Andric OuterInnerReductions)) {
1017480093f4SDimitry Andric LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
1018480093f4SDimitry Andric ORE->emit([&]() {
1019480093f4SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
1020480093f4SDimitry Andric InnerLoop->getStartLoc(),
1021480093f4SDimitry Andric InnerLoop->getHeader())
1022480093f4SDimitry Andric << "Found unsupported PHI node in loop exit.";
1023480093f4SDimitry Andric });
1024480093f4SDimitry Andric return false;
1025480093f4SDimitry Andric }
1026480093f4SDimitry Andric
1027480093f4SDimitry Andric if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
10280b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
10290b57cec5SDimitry Andric ORE->emit([&]() {
10300b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
10310b57cec5SDimitry Andric OuterLoop->getStartLoc(),
10320b57cec5SDimitry Andric OuterLoop->getHeader())
10330b57cec5SDimitry Andric << "Found unsupported PHI node in loop exit.";
10340b57cec5SDimitry Andric });
10350b57cec5SDimitry Andric return false;
10360b57cec5SDimitry Andric }
10370b57cec5SDimitry Andric
10380b57cec5SDimitry Andric return true;
10390b57cec5SDimitry Andric }
10400b57cec5SDimitry Andric
getInstrOrderCost()10410b57cec5SDimitry Andric int LoopInterchangeProfitability::getInstrOrderCost() {
10420b57cec5SDimitry Andric unsigned GoodOrder, BadOrder;
10430b57cec5SDimitry Andric BadOrder = GoodOrder = 0;
10440b57cec5SDimitry Andric for (BasicBlock *BB : InnerLoop->blocks()) {
10450b57cec5SDimitry Andric for (Instruction &Ins : *BB) {
10460b57cec5SDimitry Andric if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Ins)) {
10470b57cec5SDimitry Andric unsigned NumOp = GEP->getNumOperands();
10480b57cec5SDimitry Andric bool FoundInnerInduction = false;
10490b57cec5SDimitry Andric bool FoundOuterInduction = false;
10500b57cec5SDimitry Andric for (unsigned i = 0; i < NumOp; ++i) {
1051e8d8bef9SDimitry Andric // Skip operands that are not SCEV-able.
1052e8d8bef9SDimitry Andric if (!SE->isSCEVable(GEP->getOperand(i)->getType()))
1053e8d8bef9SDimitry Andric continue;
1054e8d8bef9SDimitry Andric
10550b57cec5SDimitry Andric const SCEV *OperandVal = SE->getSCEV(GEP->getOperand(i));
10560b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(OperandVal);
10570b57cec5SDimitry Andric if (!AR)
10580b57cec5SDimitry Andric continue;
10590b57cec5SDimitry Andric
10600b57cec5SDimitry Andric // If we find the inner induction after an outer induction e.g.
10610b57cec5SDimitry Andric // for(int i=0;i<N;i++)
10620b57cec5SDimitry Andric // for(int j=0;j<N;j++)
10630b57cec5SDimitry Andric // A[i][j] = A[i-1][j-1]+k;
10640b57cec5SDimitry Andric // then it is a good order.
10650b57cec5SDimitry Andric if (AR->getLoop() == InnerLoop) {
10660b57cec5SDimitry Andric // We found an InnerLoop induction after OuterLoop induction. It is
10670b57cec5SDimitry Andric // a good order.
10680b57cec5SDimitry Andric FoundInnerInduction = true;
10690b57cec5SDimitry Andric if (FoundOuterInduction) {
10700b57cec5SDimitry Andric GoodOrder++;
10710b57cec5SDimitry Andric break;
10720b57cec5SDimitry Andric }
10730b57cec5SDimitry Andric }
10740b57cec5SDimitry Andric // If we find the outer induction after an inner induction e.g.
10750b57cec5SDimitry Andric // for(int i=0;i<N;i++)
10760b57cec5SDimitry Andric // for(int j=0;j<N;j++)
10770b57cec5SDimitry Andric // A[j][i] = A[j-1][i-1]+k;
10780b57cec5SDimitry Andric // then it is a bad order.
10790b57cec5SDimitry Andric if (AR->getLoop() == OuterLoop) {
10800b57cec5SDimitry Andric // We found an OuterLoop induction after InnerLoop induction. It is
10810b57cec5SDimitry Andric // a bad order.
10820b57cec5SDimitry Andric FoundOuterInduction = true;
10830b57cec5SDimitry Andric if (FoundInnerInduction) {
10840b57cec5SDimitry Andric BadOrder++;
10850b57cec5SDimitry Andric break;
10860b57cec5SDimitry Andric }
10870b57cec5SDimitry Andric }
10880b57cec5SDimitry Andric }
10890b57cec5SDimitry Andric }
10900b57cec5SDimitry Andric }
10910b57cec5SDimitry Andric }
10920b57cec5SDimitry Andric return GoodOrder - BadOrder;
10930b57cec5SDimitry Andric }
10940b57cec5SDimitry Andric
1095bdd1243dSDimitry Andric std::optional<bool>
isProfitablePerLoopCacheAnalysis(const DenseMap<const Loop *,unsigned> & CostMap,std::unique_ptr<CacheCost> & CC)1096bdd1243dSDimitry Andric LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
1097bdd1243dSDimitry Andric const DenseMap<const Loop *, unsigned> &CostMap,
1098bdd1243dSDimitry Andric std::unique_ptr<CacheCost> &CC) {
109981ad6265SDimitry Andric // This is the new cost model returned from loop cache analysis.
110081ad6265SDimitry Andric // A smaller index means the loop should be placed an outer loop, and vice
110181ad6265SDimitry Andric // versa.
110206c3fb27SDimitry Andric if (CostMap.contains(InnerLoop) && CostMap.contains(OuterLoop)) {
110381ad6265SDimitry Andric unsigned InnerIndex = 0, OuterIndex = 0;
110481ad6265SDimitry Andric InnerIndex = CostMap.find(InnerLoop)->second;
110581ad6265SDimitry Andric OuterIndex = CostMap.find(OuterLoop)->second;
110681ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "InnerIndex = " << InnerIndex
110781ad6265SDimitry Andric << ", OuterIndex = " << OuterIndex << "\n");
110881ad6265SDimitry Andric if (InnerIndex < OuterIndex)
1109bdd1243dSDimitry Andric return std::optional<bool>(true);
1110bdd1243dSDimitry Andric assert(InnerIndex != OuterIndex && "CostMap should assign unique "
1111bdd1243dSDimitry Andric "numbers to each loop");
1112bdd1243dSDimitry Andric if (CC->getLoopCost(*OuterLoop) == CC->getLoopCost(*InnerLoop))
1113bdd1243dSDimitry Andric return std::nullopt;
1114bdd1243dSDimitry Andric return std::optional<bool>(false);
1115bdd1243dSDimitry Andric }
1116bdd1243dSDimitry Andric return std::nullopt;
1117bdd1243dSDimitry Andric }
1118bdd1243dSDimitry Andric
1119bdd1243dSDimitry Andric std::optional<bool>
isProfitablePerInstrOrderCost()1120bdd1243dSDimitry Andric LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
112181ad6265SDimitry Andric // Legacy cost model: this is rough cost estimation algorithm. It counts the
112281ad6265SDimitry Andric // good and bad order of induction variables in the instruction and allows
112381ad6265SDimitry Andric // reordering if number of bad orders is more than good.
11240b57cec5SDimitry Andric int Cost = getInstrOrderCost();
11250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Cost = " << Cost << "\n");
1126bdd1243dSDimitry Andric if (Cost < 0 && Cost < LoopInterchangeCostThreshold)
1127bdd1243dSDimitry Andric return std::optional<bool>(true);
1128bdd1243dSDimitry Andric
1129bdd1243dSDimitry Andric return std::nullopt;
113081ad6265SDimitry Andric }
11310b57cec5SDimitry Andric
isProfitableForVectorization(unsigned InnerLoopId,unsigned OuterLoopId,CharMatrix & DepMatrix)1132bdd1243dSDimitry Andric std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
1133bdd1243dSDimitry Andric unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) {
1134bdd1243dSDimitry Andric for (auto &Row : DepMatrix) {
1135bdd1243dSDimitry Andric // If the inner loop is loop independent or doesn't carry any dependency
1136bdd1243dSDimitry Andric // it is not profitable to move this to outer position, since we are
1137bdd1243dSDimitry Andric // likely able to do inner loop vectorization already.
1138bdd1243dSDimitry Andric if (Row[InnerLoopId] == 'I' || Row[InnerLoopId] == '=')
1139bdd1243dSDimitry Andric return std::optional<bool>(false);
11400b57cec5SDimitry Andric
1141bdd1243dSDimitry Andric // If the outer loop is not loop independent it is not profitable to move
1142bdd1243dSDimitry Andric // this to inner position, since doing so would not enable inner loop
1143bdd1243dSDimitry Andric // parallelism.
1144bdd1243dSDimitry Andric if (Row[OuterLoopId] != 'I' && Row[OuterLoopId] != '=')
1145bdd1243dSDimitry Andric return std::optional<bool>(false);
1146bdd1243dSDimitry Andric }
1147bdd1243dSDimitry Andric // If inner loop has dependence and outer loop is loop independent then it
1148bdd1243dSDimitry Andric // is/ profitable to interchange to enable inner loop parallelism.
1149bdd1243dSDimitry Andric // If there are no dependences, interchanging will not improve anything.
1150bdd1243dSDimitry Andric return std::optional<bool>(!DepMatrix.empty());
1151bdd1243dSDimitry Andric }
1152bdd1243dSDimitry Andric
isProfitable(const Loop * InnerLoop,const Loop * OuterLoop,unsigned InnerLoopId,unsigned OuterLoopId,CharMatrix & DepMatrix,const DenseMap<const Loop *,unsigned> & CostMap,std::unique_ptr<CacheCost> & CC)1153bdd1243dSDimitry Andric bool LoopInterchangeProfitability::isProfitable(
1154bdd1243dSDimitry Andric const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1155bdd1243dSDimitry Andric unsigned OuterLoopId, CharMatrix &DepMatrix,
1156bdd1243dSDimitry Andric const DenseMap<const Loop *, unsigned> &CostMap,
1157bdd1243dSDimitry Andric std::unique_ptr<CacheCost> &CC) {
1158bdd1243dSDimitry Andric // isProfitable() is structured to avoid endless loop interchange.
1159bdd1243dSDimitry Andric // If loop cache analysis could decide the profitability then,
1160bdd1243dSDimitry Andric // profitability check will stop and return the analysis result.
1161bdd1243dSDimitry Andric // If cache analysis failed to analyze the loopnest (e.g.,
1162bdd1243dSDimitry Andric // due to delinearization issues) then only check whether it is
1163bdd1243dSDimitry Andric // profitable for InstrOrderCost. Likewise, if InstrOrderCost failed to
1164bdd1243dSDimitry Andric // analysis the profitability then only, isProfitableForVectorization
1165bdd1243dSDimitry Andric // will decide.
1166bdd1243dSDimitry Andric std::optional<bool> shouldInterchange =
1167bdd1243dSDimitry Andric isProfitablePerLoopCacheAnalysis(CostMap, CC);
1168bdd1243dSDimitry Andric if (!shouldInterchange.has_value()) {
1169bdd1243dSDimitry Andric shouldInterchange = isProfitablePerInstrOrderCost();
1170bdd1243dSDimitry Andric if (!shouldInterchange.has_value())
1171bdd1243dSDimitry Andric shouldInterchange =
1172bdd1243dSDimitry Andric isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
1173bdd1243dSDimitry Andric }
1174bdd1243dSDimitry Andric if (!shouldInterchange.has_value()) {
11750b57cec5SDimitry Andric ORE->emit([&]() {
11760b57cec5SDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
11770b57cec5SDimitry Andric InnerLoop->getStartLoc(),
11780b57cec5SDimitry Andric InnerLoop->getHeader())
1179bdd1243dSDimitry Andric << "Insufficient information to calculate the cost of loop for "
1180bdd1243dSDimitry Andric "interchange.";
11810b57cec5SDimitry Andric });
11820b57cec5SDimitry Andric return false;
1183bdd1243dSDimitry Andric } else if (!shouldInterchange.value()) {
1184bdd1243dSDimitry Andric ORE->emit([&]() {
1185bdd1243dSDimitry Andric return OptimizationRemarkMissed(DEBUG_TYPE, "InterchangeNotProfitable",
1186bdd1243dSDimitry Andric InnerLoop->getStartLoc(),
1187bdd1243dSDimitry Andric InnerLoop->getHeader())
1188bdd1243dSDimitry Andric << "Interchanging loops is not considered to improve cache "
1189bdd1243dSDimitry Andric "locality nor vectorization.";
1190bdd1243dSDimitry Andric });
1191bdd1243dSDimitry Andric return false;
1192bdd1243dSDimitry Andric }
1193bdd1243dSDimitry Andric return true;
11940b57cec5SDimitry Andric }
11950b57cec5SDimitry Andric
removeChildLoop(Loop * OuterLoop,Loop * InnerLoop)11960b57cec5SDimitry Andric void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
11970b57cec5SDimitry Andric Loop *InnerLoop) {
11980b57cec5SDimitry Andric for (Loop *L : *OuterLoop)
11990b57cec5SDimitry Andric if (L == InnerLoop) {
12000b57cec5SDimitry Andric OuterLoop->removeChildLoop(L);
12010b57cec5SDimitry Andric return;
12020b57cec5SDimitry Andric }
12030b57cec5SDimitry Andric llvm_unreachable("Couldn't find loop");
12040b57cec5SDimitry Andric }
12050b57cec5SDimitry Andric
12060b57cec5SDimitry Andric /// Update LoopInfo, after interchanging. NewInner and NewOuter refer to the
12070b57cec5SDimitry Andric /// new inner and outer loop after interchanging: NewInner is the original
12080b57cec5SDimitry Andric /// outer loop and NewOuter is the original inner loop.
12090b57cec5SDimitry Andric ///
12100b57cec5SDimitry Andric /// Before interchanging, we have the following structure
12110b57cec5SDimitry Andric /// Outer preheader
12120b57cec5SDimitry Andric // Outer header
12130b57cec5SDimitry Andric // Inner preheader
12140b57cec5SDimitry Andric // Inner header
12150b57cec5SDimitry Andric // Inner body
12160b57cec5SDimitry Andric // Inner latch
12170b57cec5SDimitry Andric // outer bbs
12180b57cec5SDimitry Andric // Outer latch
12190b57cec5SDimitry Andric //
12200b57cec5SDimitry Andric // After interchanging:
12210b57cec5SDimitry Andric // Inner preheader
12220b57cec5SDimitry Andric // Inner header
12230b57cec5SDimitry Andric // Outer preheader
12240b57cec5SDimitry Andric // Outer header
12250b57cec5SDimitry Andric // Inner body
12260b57cec5SDimitry Andric // outer bbs
12270b57cec5SDimitry Andric // Outer latch
12280b57cec5SDimitry Andric // Inner latch
restructureLoops(Loop * NewInner,Loop * NewOuter,BasicBlock * OrigInnerPreHeader,BasicBlock * OrigOuterPreHeader)12290b57cec5SDimitry Andric void LoopInterchangeTransform::restructureLoops(
12300b57cec5SDimitry Andric Loop *NewInner, Loop *NewOuter, BasicBlock *OrigInnerPreHeader,
12310b57cec5SDimitry Andric BasicBlock *OrigOuterPreHeader) {
12320b57cec5SDimitry Andric Loop *OuterLoopParent = OuterLoop->getParentLoop();
12330b57cec5SDimitry Andric // The original inner loop preheader moves from the new inner loop to
12340b57cec5SDimitry Andric // the parent loop, if there is one.
12350b57cec5SDimitry Andric NewInner->removeBlockFromLoop(OrigInnerPreHeader);
12360b57cec5SDimitry Andric LI->changeLoopFor(OrigInnerPreHeader, OuterLoopParent);
12370b57cec5SDimitry Andric
12380b57cec5SDimitry Andric // Switch the loop levels.
12390b57cec5SDimitry Andric if (OuterLoopParent) {
12400b57cec5SDimitry Andric // Remove the loop from its parent loop.
12410b57cec5SDimitry Andric removeChildLoop(OuterLoopParent, NewInner);
12420b57cec5SDimitry Andric removeChildLoop(NewInner, NewOuter);
12430b57cec5SDimitry Andric OuterLoopParent->addChildLoop(NewOuter);
12440b57cec5SDimitry Andric } else {
12450b57cec5SDimitry Andric removeChildLoop(NewInner, NewOuter);
12460b57cec5SDimitry Andric LI->changeTopLevelLoop(NewInner, NewOuter);
12470b57cec5SDimitry Andric }
1248e8d8bef9SDimitry Andric while (!NewOuter->isInnermost())
12490b57cec5SDimitry Andric NewInner->addChildLoop(NewOuter->removeChildLoop(NewOuter->begin()));
12500b57cec5SDimitry Andric NewOuter->addChildLoop(NewInner);
12510b57cec5SDimitry Andric
12520b57cec5SDimitry Andric // BBs from the original inner loop.
12530b57cec5SDimitry Andric SmallVector<BasicBlock *, 8> OrigInnerBBs(NewOuter->blocks());
12540b57cec5SDimitry Andric
12550b57cec5SDimitry Andric // Add BBs from the original outer loop to the original inner loop (excluding
12560b57cec5SDimitry Andric // BBs already in inner loop)
12570b57cec5SDimitry Andric for (BasicBlock *BB : NewInner->blocks())
12580b57cec5SDimitry Andric if (LI->getLoopFor(BB) == NewInner)
12590b57cec5SDimitry Andric NewOuter->addBlockEntry(BB);
12600b57cec5SDimitry Andric
12610b57cec5SDimitry Andric // Now remove inner loop header and latch from the new inner loop and move
12620b57cec5SDimitry Andric // other BBs (the loop body) to the new inner loop.
12630b57cec5SDimitry Andric BasicBlock *OuterHeader = NewOuter->getHeader();
12640b57cec5SDimitry Andric BasicBlock *OuterLatch = NewOuter->getLoopLatch();
12650b57cec5SDimitry Andric for (BasicBlock *BB : OrigInnerBBs) {
12660b57cec5SDimitry Andric // Nothing will change for BBs in child loops.
12670b57cec5SDimitry Andric if (LI->getLoopFor(BB) != NewOuter)
12680b57cec5SDimitry Andric continue;
12690b57cec5SDimitry Andric // Remove the new outer loop header and latch from the new inner loop.
12700b57cec5SDimitry Andric if (BB == OuterHeader || BB == OuterLatch)
12710b57cec5SDimitry Andric NewInner->removeBlockFromLoop(BB);
12720b57cec5SDimitry Andric else
12730b57cec5SDimitry Andric LI->changeLoopFor(BB, NewInner);
12740b57cec5SDimitry Andric }
12750b57cec5SDimitry Andric
12760b57cec5SDimitry Andric // The preheader of the original outer loop becomes part of the new
12770b57cec5SDimitry Andric // outer loop.
12780b57cec5SDimitry Andric NewOuter->addBlockEntry(OrigOuterPreHeader);
12790b57cec5SDimitry Andric LI->changeLoopFor(OrigOuterPreHeader, NewOuter);
12800b57cec5SDimitry Andric
12810b57cec5SDimitry Andric // Tell SE that we move the loops around.
12820b57cec5SDimitry Andric SE->forgetLoop(NewOuter);
12830b57cec5SDimitry Andric }
12840b57cec5SDimitry Andric
transform()12850b57cec5SDimitry Andric bool LoopInterchangeTransform::transform() {
12860b57cec5SDimitry Andric bool Transformed = false;
12870b57cec5SDimitry Andric
12880b57cec5SDimitry Andric if (InnerLoop->getSubLoops().empty()) {
12890b57cec5SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
12908bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n");
129104eeddc0SDimitry Andric auto &InductionPHIs = LIL.getInnerLoopInductions();
129204eeddc0SDimitry Andric if (InductionPHIs.empty()) {
12930b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n");
12940b57cec5SDimitry Andric return false;
12950b57cec5SDimitry Andric }
12960b57cec5SDimitry Andric
129704eeddc0SDimitry Andric SmallVector<Instruction *, 8> InnerIndexVarList;
129804eeddc0SDimitry Andric for (PHINode *CurInductionPHI : InductionPHIs) {
129904eeddc0SDimitry Andric if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader)
130004eeddc0SDimitry Andric InnerIndexVarList.push_back(
130104eeddc0SDimitry Andric dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(1)));
13020b57cec5SDimitry Andric else
130304eeddc0SDimitry Andric InnerIndexVarList.push_back(
130404eeddc0SDimitry Andric dyn_cast<Instruction>(CurInductionPHI->getIncomingValue(0)));
130504eeddc0SDimitry Andric }
13060b57cec5SDimitry Andric
13078bcb0991SDimitry Andric // Create a new latch block for the inner loop. We split at the
13088bcb0991SDimitry Andric // current latch's terminator and then move the condition and all
13098bcb0991SDimitry Andric // operands that are not either loop-invariant or the induction PHI into the
13108bcb0991SDimitry Andric // new latch block.
13118bcb0991SDimitry Andric BasicBlock *NewLatch =
13128bcb0991SDimitry Andric SplitBlock(InnerLoop->getLoopLatch(),
13138bcb0991SDimitry Andric InnerLoop->getLoopLatch()->getTerminator(), DT, LI);
13148bcb0991SDimitry Andric
13158bcb0991SDimitry Andric SmallSetVector<Instruction *, 4> WorkList;
13168bcb0991SDimitry Andric unsigned i = 0;
131704eeddc0SDimitry Andric auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() {
13188bcb0991SDimitry Andric for (; i < WorkList.size(); i++) {
13198bcb0991SDimitry Andric // Duplicate instruction and move it the new latch. Update uses that
13208bcb0991SDimitry Andric // have been moved.
13218bcb0991SDimitry Andric Instruction *NewI = WorkList[i]->clone();
13228bcb0991SDimitry Andric NewI->insertBefore(NewLatch->getFirstNonPHI());
13238bcb0991SDimitry Andric assert(!NewI->mayHaveSideEffects() &&
13248bcb0991SDimitry Andric "Moving instructions with side-effects may change behavior of "
13258bcb0991SDimitry Andric "the loop nest!");
1326fe6060f1SDimitry Andric for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) {
13278bcb0991SDimitry Andric Instruction *UserI = cast<Instruction>(U.getUser());
13288bcb0991SDimitry Andric if (!InnerLoop->contains(UserI->getParent()) ||
132904eeddc0SDimitry Andric UserI->getParent() == NewLatch ||
133004eeddc0SDimitry Andric llvm::is_contained(InductionPHIs, UserI))
13318bcb0991SDimitry Andric U.set(NewI);
13328bcb0991SDimitry Andric }
13338bcb0991SDimitry Andric // Add operands of moved instruction to the worklist, except if they are
13348bcb0991SDimitry Andric // outside the inner loop or are the induction PHI.
13358bcb0991SDimitry Andric for (Value *Op : WorkList[i]->operands()) {
13368bcb0991SDimitry Andric Instruction *OpI = dyn_cast<Instruction>(Op);
13378bcb0991SDimitry Andric if (!OpI ||
13388bcb0991SDimitry Andric this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop ||
133904eeddc0SDimitry Andric llvm::is_contained(InductionPHIs, OpI))
13408bcb0991SDimitry Andric continue;
13418bcb0991SDimitry Andric WorkList.insert(OpI);
13428bcb0991SDimitry Andric }
13438bcb0991SDimitry Andric }
13448bcb0991SDimitry Andric };
13458bcb0991SDimitry Andric
13468bcb0991SDimitry Andric // FIXME: Should we interchange when we have a constant condition?
13478bcb0991SDimitry Andric Instruction *CondI = dyn_cast<Instruction>(
13488bcb0991SDimitry Andric cast<BranchInst>(InnerLoop->getLoopLatch()->getTerminator())
13498bcb0991SDimitry Andric ->getCondition());
13508bcb0991SDimitry Andric if (CondI)
13518bcb0991SDimitry Andric WorkList.insert(CondI);
13528bcb0991SDimitry Andric MoveInstructions();
135304eeddc0SDimitry Andric for (Instruction *InnerIndexVar : InnerIndexVarList)
13548bcb0991SDimitry Andric WorkList.insert(cast<Instruction>(InnerIndexVar));
13558bcb0991SDimitry Andric MoveInstructions();
1356bdd1243dSDimitry Andric }
13570b57cec5SDimitry Andric
1358bdd1243dSDimitry Andric // Ensure the inner loop phi nodes have a separate basic block.
13590b57cec5SDimitry Andric BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
1360bdd1243dSDimitry Andric if (InnerLoopHeader->getFirstNonPHI() != InnerLoopHeader->getTerminator()) {
13610b57cec5SDimitry Andric SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI);
13620b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n");
13630b57cec5SDimitry Andric }
13640b57cec5SDimitry Andric
1365e8d8bef9SDimitry Andric // Instructions in the original inner loop preheader may depend on values
1366e8d8bef9SDimitry Andric // defined in the outer loop header. Move them there, because the original
1367e8d8bef9SDimitry Andric // inner loop preheader will become the entry into the interchanged loop nest.
1368e8d8bef9SDimitry Andric // Currently we move all instructions and rely on LICM to move invariant
1369e8d8bef9SDimitry Andric // instructions outside the loop nest.
1370e8d8bef9SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
1371e8d8bef9SDimitry Andric BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
1372e8d8bef9SDimitry Andric if (InnerLoopPreHeader != OuterLoopHeader) {
1373e8d8bef9SDimitry Andric SmallPtrSet<Instruction *, 4> NeedsMoving;
1374e8d8bef9SDimitry Andric for (Instruction &I :
1375e8d8bef9SDimitry Andric make_early_inc_range(make_range(InnerLoopPreHeader->begin(),
1376e8d8bef9SDimitry Andric std::prev(InnerLoopPreHeader->end()))))
13775f757f3fSDimitry Andric I.moveBeforePreserving(OuterLoopHeader->getTerminator());
1378e8d8bef9SDimitry Andric }
1379e8d8bef9SDimitry Andric
13800b57cec5SDimitry Andric Transformed |= adjustLoopLinks();
13810b57cec5SDimitry Andric if (!Transformed) {
13820b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "adjustLoopLinks failed\n");
13830b57cec5SDimitry Andric return false;
13840b57cec5SDimitry Andric }
13850b57cec5SDimitry Andric
13860b57cec5SDimitry Andric return true;
13870b57cec5SDimitry Andric }
13880b57cec5SDimitry Andric
13890b57cec5SDimitry Andric /// \brief Move all instructions except the terminator from FromBB right before
13900b57cec5SDimitry Andric /// InsertBefore
moveBBContents(BasicBlock * FromBB,Instruction * InsertBefore)13910b57cec5SDimitry Andric static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
1392bdd1243dSDimitry Andric BasicBlock *ToBB = InsertBefore->getParent();
13930b57cec5SDimitry Andric
1394bdd1243dSDimitry Andric ToBB->splice(InsertBefore->getIterator(), FromBB, FromBB->begin(),
13950b57cec5SDimitry Andric FromBB->getTerminator()->getIterator());
13960b57cec5SDimitry Andric }
13970b57cec5SDimitry Andric
13985ffd83dbSDimitry Andric /// Swap instructions between \p BB1 and \p BB2 but keep terminators intact.
swapBBContents(BasicBlock * BB1,BasicBlock * BB2)13995ffd83dbSDimitry Andric static void swapBBContents(BasicBlock *BB1, BasicBlock *BB2) {
14005ffd83dbSDimitry Andric // Save all non-terminator instructions of BB1 into TempInstrs and unlink them
14015ffd83dbSDimitry Andric // from BB1 afterwards.
14025ffd83dbSDimitry Andric auto Iter = map_range(*BB1, [](Instruction &I) { return &I; });
14035ffd83dbSDimitry Andric SmallVector<Instruction *, 4> TempInstrs(Iter.begin(), std::prev(Iter.end()));
14045ffd83dbSDimitry Andric for (Instruction *I : TempInstrs)
14055ffd83dbSDimitry Andric I->removeFromParent();
14065ffd83dbSDimitry Andric
14075ffd83dbSDimitry Andric // Move instructions from BB2 to BB1.
14085ffd83dbSDimitry Andric moveBBContents(BB2, BB1->getTerminator());
14095ffd83dbSDimitry Andric
14105ffd83dbSDimitry Andric // Move instructions from TempInstrs to BB2.
14115ffd83dbSDimitry Andric for (Instruction *I : TempInstrs)
14125ffd83dbSDimitry Andric I->insertBefore(BB2->getTerminator());
14135ffd83dbSDimitry Andric }
14145ffd83dbSDimitry Andric
1415480093f4SDimitry Andric // Update BI to jump to NewBB instead of OldBB. Records updates to the
1416480093f4SDimitry Andric // dominator tree in DTUpdates. If \p MustUpdateOnce is true, assert that
1417480093f4SDimitry Andric // \p OldBB is exactly once in BI's successor list.
updateSuccessor(BranchInst * BI,BasicBlock * OldBB,BasicBlock * NewBB,std::vector<DominatorTree::UpdateType> & DTUpdates,bool MustUpdateOnce=true)14180b57cec5SDimitry Andric static void updateSuccessor(BranchInst *BI, BasicBlock *OldBB,
14190b57cec5SDimitry Andric BasicBlock *NewBB,
1420480093f4SDimitry Andric std::vector<DominatorTree::UpdateType> &DTUpdates,
1421480093f4SDimitry Andric bool MustUpdateOnce = true) {
1422480093f4SDimitry Andric assert((!MustUpdateOnce ||
1423480093f4SDimitry Andric llvm::count_if(successors(BI),
1424480093f4SDimitry Andric [OldBB](BasicBlock *BB) {
1425480093f4SDimitry Andric return BB == OldBB;
1426480093f4SDimitry Andric }) == 1) && "BI must jump to OldBB exactly once.");
1427480093f4SDimitry Andric bool Changed = false;
1428480093f4SDimitry Andric for (Use &Op : BI->operands())
1429480093f4SDimitry Andric if (Op == OldBB) {
1430480093f4SDimitry Andric Op.set(NewBB);
1431480093f4SDimitry Andric Changed = true;
1432480093f4SDimitry Andric }
14330b57cec5SDimitry Andric
1434480093f4SDimitry Andric if (Changed) {
14350b57cec5SDimitry Andric DTUpdates.push_back(
14360b57cec5SDimitry Andric {DominatorTree::UpdateKind::Insert, BI->getParent(), NewBB});
14370b57cec5SDimitry Andric DTUpdates.push_back(
14380b57cec5SDimitry Andric {DominatorTree::UpdateKind::Delete, BI->getParent(), OldBB});
14390b57cec5SDimitry Andric }
1440480093f4SDimitry Andric assert(Changed && "Expected a successor to be updated");
14410b57cec5SDimitry Andric }
14420b57cec5SDimitry Andric
14430b57cec5SDimitry Andric // Move Lcssa PHIs to the right place.
moveLCSSAPhis(BasicBlock * InnerExit,BasicBlock * InnerHeader,BasicBlock * InnerLatch,BasicBlock * OuterHeader,BasicBlock * OuterLatch,BasicBlock * OuterExit,Loop * InnerLoop,LoopInfo * LI)14440b57cec5SDimitry Andric static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader,
14450b57cec5SDimitry Andric BasicBlock *InnerLatch, BasicBlock *OuterHeader,
1446480093f4SDimitry Andric BasicBlock *OuterLatch, BasicBlock *OuterExit,
1447480093f4SDimitry Andric Loop *InnerLoop, LoopInfo *LI) {
14480b57cec5SDimitry Andric
14490b57cec5SDimitry Andric // Deal with LCSSA PHI nodes in the exit block of the inner loop, that are
14500b57cec5SDimitry Andric // defined either in the header or latch. Those blocks will become header and
14510b57cec5SDimitry Andric // latch of the new outer loop, and the only possible users can PHI nodes
14520b57cec5SDimitry Andric // in the exit block of the loop nest or the outer loop header (reduction
14530b57cec5SDimitry Andric // PHIs, in that case, the incoming value must be defined in the inner loop
14540b57cec5SDimitry Andric // header). We can just substitute the user with the incoming value and remove
14550b57cec5SDimitry Andric // the PHI.
14560b57cec5SDimitry Andric for (PHINode &P : make_early_inc_range(InnerExit->phis())) {
14570b57cec5SDimitry Andric assert(P.getNumIncomingValues() == 1 &&
14580b57cec5SDimitry Andric "Only loops with a single exit are supported!");
14590b57cec5SDimitry Andric
14600b57cec5SDimitry Andric // Incoming values are guaranteed be instructions currently.
14610b57cec5SDimitry Andric auto IncI = cast<Instruction>(P.getIncomingValueForBlock(InnerLatch));
146281ad6265SDimitry Andric // In case of multi-level nested loops, follow LCSSA to find the incoming
146381ad6265SDimitry Andric // value defined from the innermost loop.
146481ad6265SDimitry Andric auto IncIInnerMost = cast<Instruction>(followLCSSA(IncI));
14650b57cec5SDimitry Andric // Skip phis with incoming values from the inner loop body, excluding the
14660b57cec5SDimitry Andric // header and latch.
146781ad6265SDimitry Andric if (IncIInnerMost->getParent() != InnerLatch &&
146881ad6265SDimitry Andric IncIInnerMost->getParent() != InnerHeader)
14690b57cec5SDimitry Andric continue;
14700b57cec5SDimitry Andric
14710b57cec5SDimitry Andric assert(all_of(P.users(),
14720b57cec5SDimitry Andric [OuterHeader, OuterExit, IncI, InnerHeader](User *U) {
14730b57cec5SDimitry Andric return (cast<PHINode>(U)->getParent() == OuterHeader &&
14740b57cec5SDimitry Andric IncI->getParent() == InnerHeader) ||
14750b57cec5SDimitry Andric cast<PHINode>(U)->getParent() == OuterExit;
14760b57cec5SDimitry Andric }) &&
14770b57cec5SDimitry Andric "Can only replace phis iff the uses are in the loop nest exit or "
14780b57cec5SDimitry Andric "the incoming value is defined in the inner header (it will "
14790b57cec5SDimitry Andric "dominate all loop blocks after interchanging)");
14800b57cec5SDimitry Andric P.replaceAllUsesWith(IncI);
14810b57cec5SDimitry Andric P.eraseFromParent();
14820b57cec5SDimitry Andric }
14830b57cec5SDimitry Andric
14840b57cec5SDimitry Andric SmallVector<PHINode *, 8> LcssaInnerExit;
14850b57cec5SDimitry Andric for (PHINode &P : InnerExit->phis())
14860b57cec5SDimitry Andric LcssaInnerExit.push_back(&P);
14870b57cec5SDimitry Andric
14880b57cec5SDimitry Andric SmallVector<PHINode *, 8> LcssaInnerLatch;
14890b57cec5SDimitry Andric for (PHINode &P : InnerLatch->phis())
14900b57cec5SDimitry Andric LcssaInnerLatch.push_back(&P);
14910b57cec5SDimitry Andric
14920b57cec5SDimitry Andric // Lcssa PHIs for values used outside the inner loop are in InnerExit.
14930b57cec5SDimitry Andric // If a PHI node has users outside of InnerExit, it has a use outside the
14940b57cec5SDimitry Andric // interchanged loop and we have to preserve it. We move these to
14950b57cec5SDimitry Andric // InnerLatch, which will become the new exit block for the innermost
14960b57cec5SDimitry Andric // loop after interchanging.
14970b57cec5SDimitry Andric for (PHINode *P : LcssaInnerExit)
14980b57cec5SDimitry Andric P->moveBefore(InnerLatch->getFirstNonPHI());
14990b57cec5SDimitry Andric
15000b57cec5SDimitry Andric // If the inner loop latch contains LCSSA PHIs, those come from a child loop
15010b57cec5SDimitry Andric // and we have to move them to the new inner latch.
15020b57cec5SDimitry Andric for (PHINode *P : LcssaInnerLatch)
15030b57cec5SDimitry Andric P->moveBefore(InnerExit->getFirstNonPHI());
15040b57cec5SDimitry Andric
15050b57cec5SDimitry Andric // Deal with LCSSA PHI nodes in the loop nest exit block. For PHIs that have
1506480093f4SDimitry Andric // incoming values defined in the outer loop, we have to add a new PHI
15070b57cec5SDimitry Andric // in the inner loop latch, which became the exit block of the outer loop,
15080b57cec5SDimitry Andric // after interchanging.
15090b57cec5SDimitry Andric if (OuterExit) {
15100b57cec5SDimitry Andric for (PHINode &P : OuterExit->phis()) {
15110b57cec5SDimitry Andric if (P.getNumIncomingValues() != 1)
15120b57cec5SDimitry Andric continue;
1513480093f4SDimitry Andric // Skip Phis with incoming values defined in the inner loop. Those should
15140b57cec5SDimitry Andric // already have been updated.
15150b57cec5SDimitry Andric auto I = dyn_cast<Instruction>(P.getIncomingValue(0));
1516480093f4SDimitry Andric if (!I || LI->getLoopFor(I->getParent()) == InnerLoop)
15170b57cec5SDimitry Andric continue;
15180b57cec5SDimitry Andric
15190b57cec5SDimitry Andric PHINode *NewPhi = dyn_cast<PHINode>(P.clone());
15200b57cec5SDimitry Andric NewPhi->setIncomingValue(0, P.getIncomingValue(0));
15210b57cec5SDimitry Andric NewPhi->setIncomingBlock(0, OuterLatch);
1522fe6060f1SDimitry Andric // We might have incoming edges from other BBs, i.e., the original outer
1523fe6060f1SDimitry Andric // header.
1524fe6060f1SDimitry Andric for (auto *Pred : predecessors(InnerLatch)) {
1525fe6060f1SDimitry Andric if (Pred == OuterLatch)
1526fe6060f1SDimitry Andric continue;
1527fe6060f1SDimitry Andric NewPhi->addIncoming(P.getIncomingValue(0), Pred);
1528fe6060f1SDimitry Andric }
15290b57cec5SDimitry Andric NewPhi->insertBefore(InnerLatch->getFirstNonPHI());
15300b57cec5SDimitry Andric P.setIncomingValue(0, NewPhi);
15310b57cec5SDimitry Andric }
15320b57cec5SDimitry Andric }
15330b57cec5SDimitry Andric
15340b57cec5SDimitry Andric // Now adjust the incoming blocks for the LCSSA PHIs.
15350b57cec5SDimitry Andric // For PHIs moved from Inner's exit block, we need to replace Inner's latch
15360b57cec5SDimitry Andric // with the new latch.
15370b57cec5SDimitry Andric InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch);
15380b57cec5SDimitry Andric }
15390b57cec5SDimitry Andric
adjustLoopBranches()15400b57cec5SDimitry Andric bool LoopInterchangeTransform::adjustLoopBranches() {
15410b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n");
15420b57cec5SDimitry Andric std::vector<DominatorTree::UpdateType> DTUpdates;
15430b57cec5SDimitry Andric
15440b57cec5SDimitry Andric BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
15450b57cec5SDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
15460b57cec5SDimitry Andric
15470b57cec5SDimitry Andric assert(OuterLoopPreHeader != OuterLoop->getHeader() &&
15480b57cec5SDimitry Andric InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader &&
15490b57cec5SDimitry Andric InnerLoopPreHeader && "Guaranteed by loop-simplify form");
15500b57cec5SDimitry Andric // Ensure that both preheaders do not contain PHI nodes and have single
15510b57cec5SDimitry Andric // predecessors. This allows us to move them easily. We use
15520b57cec5SDimitry Andric // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing
15530b57cec5SDimitry Andric // preheaders do not satisfy those conditions.
15540b57cec5SDimitry Andric if (isa<PHINode>(OuterLoopPreHeader->begin()) ||
15550b57cec5SDimitry Andric !OuterLoopPreHeader->getUniquePredecessor())
15560b57cec5SDimitry Andric OuterLoopPreHeader =
15570b57cec5SDimitry Andric InsertPreheaderForLoop(OuterLoop, DT, LI, nullptr, true);
15580b57cec5SDimitry Andric if (InnerLoopPreHeader == OuterLoop->getHeader())
15590b57cec5SDimitry Andric InnerLoopPreHeader =
15600b57cec5SDimitry Andric InsertPreheaderForLoop(InnerLoop, DT, LI, nullptr, true);
15610b57cec5SDimitry Andric
15620b57cec5SDimitry Andric // Adjust the loop preheader
15630b57cec5SDimitry Andric BasicBlock *InnerLoopHeader = InnerLoop->getHeader();
15640b57cec5SDimitry Andric BasicBlock *OuterLoopHeader = OuterLoop->getHeader();
15650b57cec5SDimitry Andric BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
15660b57cec5SDimitry Andric BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
15670b57cec5SDimitry Andric BasicBlock *OuterLoopPredecessor = OuterLoopPreHeader->getUniquePredecessor();
15680b57cec5SDimitry Andric BasicBlock *InnerLoopLatchPredecessor =
15690b57cec5SDimitry Andric InnerLoopLatch->getUniquePredecessor();
15700b57cec5SDimitry Andric BasicBlock *InnerLoopLatchSuccessor;
15710b57cec5SDimitry Andric BasicBlock *OuterLoopLatchSuccessor;
15720b57cec5SDimitry Andric
15730b57cec5SDimitry Andric BranchInst *OuterLoopLatchBI =
15740b57cec5SDimitry Andric dyn_cast<BranchInst>(OuterLoopLatch->getTerminator());
15750b57cec5SDimitry Andric BranchInst *InnerLoopLatchBI =
15760b57cec5SDimitry Andric dyn_cast<BranchInst>(InnerLoopLatch->getTerminator());
15770b57cec5SDimitry Andric BranchInst *OuterLoopHeaderBI =
15780b57cec5SDimitry Andric dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
15790b57cec5SDimitry Andric BranchInst *InnerLoopHeaderBI =
15800b57cec5SDimitry Andric dyn_cast<BranchInst>(InnerLoopHeader->getTerminator());
15810b57cec5SDimitry Andric
15820b57cec5SDimitry Andric if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor ||
15830b57cec5SDimitry Andric !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI ||
15840b57cec5SDimitry Andric !InnerLoopHeaderBI)
15850b57cec5SDimitry Andric return false;
15860b57cec5SDimitry Andric
15870b57cec5SDimitry Andric BranchInst *InnerLoopLatchPredecessorBI =
15880b57cec5SDimitry Andric dyn_cast<BranchInst>(InnerLoopLatchPredecessor->getTerminator());
15890b57cec5SDimitry Andric BranchInst *OuterLoopPredecessorBI =
15900b57cec5SDimitry Andric dyn_cast<BranchInst>(OuterLoopPredecessor->getTerminator());
15910b57cec5SDimitry Andric
15920b57cec5SDimitry Andric if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
15930b57cec5SDimitry Andric return false;
15940b57cec5SDimitry Andric BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
15950b57cec5SDimitry Andric if (!InnerLoopHeaderSuccessor)
15960b57cec5SDimitry Andric return false;
15970b57cec5SDimitry Andric
1598480093f4SDimitry Andric // Adjust Loop Preheader and headers.
1599480093f4SDimitry Andric // The branches in the outer loop predecessor and the outer loop header can
1600480093f4SDimitry Andric // be unconditional branches or conditional branches with duplicates. Consider
1601480093f4SDimitry Andric // this when updating the successors.
16020b57cec5SDimitry Andric updateSuccessor(OuterLoopPredecessorBI, OuterLoopPreHeader,
1603480093f4SDimitry Andric InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false);
1604480093f4SDimitry Andric // The outer loop header might or might not branch to the outer latch.
1605480093f4SDimitry Andric // We are guaranteed to branch to the inner loop preheader.
1606fe6060f1SDimitry Andric if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) {
1607fe6060f1SDimitry Andric // In this case the outerLoopHeader should branch to the InnerLoopLatch.
1608fe6060f1SDimitry Andric updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch,
1609fe6060f1SDimitry Andric DTUpdates,
1610480093f4SDimitry Andric /*MustUpdateOnce=*/false);
1611fe6060f1SDimitry Andric }
16120b57cec5SDimitry Andric updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader,
1613480093f4SDimitry Andric InnerLoopHeaderSuccessor, DTUpdates,
1614480093f4SDimitry Andric /*MustUpdateOnce=*/false);
16150b57cec5SDimitry Andric
16160b57cec5SDimitry Andric // Adjust reduction PHI's now that the incoming block has changed.
16170b57cec5SDimitry Andric InnerLoopHeaderSuccessor->replacePhiUsesWith(InnerLoopHeader,
16180b57cec5SDimitry Andric OuterLoopHeader);
16190b57cec5SDimitry Andric
16200b57cec5SDimitry Andric updateSuccessor(InnerLoopHeaderBI, InnerLoopHeaderSuccessor,
16210b57cec5SDimitry Andric OuterLoopPreHeader, DTUpdates);
16220b57cec5SDimitry Andric
16230b57cec5SDimitry Andric // -------------Adjust loop latches-----------
16240b57cec5SDimitry Andric if (InnerLoopLatchBI->getSuccessor(0) == InnerLoopHeader)
16250b57cec5SDimitry Andric InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(1);
16260b57cec5SDimitry Andric else
16270b57cec5SDimitry Andric InnerLoopLatchSuccessor = InnerLoopLatchBI->getSuccessor(0);
16280b57cec5SDimitry Andric
16290b57cec5SDimitry Andric updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch,
16300b57cec5SDimitry Andric InnerLoopLatchSuccessor, DTUpdates);
16310b57cec5SDimitry Andric
16320b57cec5SDimitry Andric if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader)
16330b57cec5SDimitry Andric OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1);
16340b57cec5SDimitry Andric else
16350b57cec5SDimitry Andric OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(0);
16360b57cec5SDimitry Andric
16370b57cec5SDimitry Andric updateSuccessor(InnerLoopLatchBI, InnerLoopLatchSuccessor,
16380b57cec5SDimitry Andric OuterLoopLatchSuccessor, DTUpdates);
16390b57cec5SDimitry Andric updateSuccessor(OuterLoopLatchBI, OuterLoopLatchSuccessor, InnerLoopLatch,
16400b57cec5SDimitry Andric DTUpdates);
16410b57cec5SDimitry Andric
16420b57cec5SDimitry Andric DT->applyUpdates(DTUpdates);
16430b57cec5SDimitry Andric restructureLoops(OuterLoop, InnerLoop, InnerLoopPreHeader,
16440b57cec5SDimitry Andric OuterLoopPreHeader);
16450b57cec5SDimitry Andric
16460b57cec5SDimitry Andric moveLCSSAPhis(InnerLoopLatchSuccessor, InnerLoopHeader, InnerLoopLatch,
1647480093f4SDimitry Andric OuterLoopHeader, OuterLoopLatch, InnerLoop->getExitBlock(),
1648480093f4SDimitry Andric InnerLoop, LI);
16490b57cec5SDimitry Andric // For PHIs in the exit block of the outer loop, outer's latch has been
16500b57cec5SDimitry Andric // replaced by Inners'.
16510b57cec5SDimitry Andric OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
16520b57cec5SDimitry Andric
1653fe6060f1SDimitry Andric auto &OuterInnerReductions = LIL.getOuterInnerReductions();
16540b57cec5SDimitry Andric // Now update the reduction PHIs in the inner and outer loop headers.
16550b57cec5SDimitry Andric SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
1656349cc55cSDimitry Andric for (PHINode &PHI : InnerLoopHeader->phis())
1657349cc55cSDimitry Andric if (OuterInnerReductions.contains(&PHI))
165804eeddc0SDimitry Andric InnerLoopPHIs.push_back(&PHI);
165904eeddc0SDimitry Andric
1660349cc55cSDimitry Andric for (PHINode &PHI : OuterLoopHeader->phis())
1661349cc55cSDimitry Andric if (OuterInnerReductions.contains(&PHI))
166204eeddc0SDimitry Andric OuterLoopPHIs.push_back(&PHI);
16630b57cec5SDimitry Andric
16640b57cec5SDimitry Andric // Now move the remaining reduction PHIs from outer to inner loop header and
16650b57cec5SDimitry Andric // vice versa. The PHI nodes must be part of a reduction across the inner and
16660b57cec5SDimitry Andric // outer loop and all the remains to do is and updating the incoming blocks.
16670b57cec5SDimitry Andric for (PHINode *PHI : OuterLoopPHIs) {
166804eeddc0SDimitry Andric LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump(););
16690b57cec5SDimitry Andric PHI->moveBefore(InnerLoopHeader->getFirstNonPHI());
16705ffd83dbSDimitry Andric assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
16710b57cec5SDimitry Andric }
16720b57cec5SDimitry Andric for (PHINode *PHI : InnerLoopPHIs) {
167304eeddc0SDimitry Andric LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump(););
16740b57cec5SDimitry Andric PHI->moveBefore(OuterLoopHeader->getFirstNonPHI());
16755ffd83dbSDimitry Andric assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node");
16760b57cec5SDimitry Andric }
16770b57cec5SDimitry Andric
16780b57cec5SDimitry Andric // Update the incoming blocks for moved PHI nodes.
16790b57cec5SDimitry Andric OuterLoopHeader->replacePhiUsesWith(InnerLoopPreHeader, OuterLoopPreHeader);
16800b57cec5SDimitry Andric OuterLoopHeader->replacePhiUsesWith(InnerLoopLatch, OuterLoopLatch);
16810b57cec5SDimitry Andric InnerLoopHeader->replacePhiUsesWith(OuterLoopPreHeader, InnerLoopPreHeader);
16820b57cec5SDimitry Andric InnerLoopHeader->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch);
16830b57cec5SDimitry Andric
1684e8d8bef9SDimitry Andric // Values defined in the outer loop header could be used in the inner loop
1685e8d8bef9SDimitry Andric // latch. In that case, we need to create LCSSA phis for them, because after
1686e8d8bef9SDimitry Andric // interchanging they will be defined in the new inner loop and used in the
1687e8d8bef9SDimitry Andric // new outer loop.
1688e8d8bef9SDimitry Andric SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
1689e8d8bef9SDimitry Andric for (Instruction &I :
1690e8d8bef9SDimitry Andric make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
1691e8d8bef9SDimitry Andric MayNeedLCSSAPhis.push_back(&I);
169206c3fb27SDimitry Andric formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE);
1693e8d8bef9SDimitry Andric
16940b57cec5SDimitry Andric return true;
16950b57cec5SDimitry Andric }
16960b57cec5SDimitry Andric
adjustLoopLinks()16970b57cec5SDimitry Andric bool LoopInterchangeTransform::adjustLoopLinks() {
16980b57cec5SDimitry Andric // Adjust all branches in the inner and outer loop.
16990b57cec5SDimitry Andric bool Changed = adjustLoopBranches();
17005ffd83dbSDimitry Andric if (Changed) {
17015ffd83dbSDimitry Andric // We have interchanged the preheaders so we need to interchange the data in
17025ffd83dbSDimitry Andric // the preheaders as well. This is because the content of the inner
17035ffd83dbSDimitry Andric // preheader was previously executed inside the outer loop.
17045ffd83dbSDimitry Andric BasicBlock *OuterLoopPreHeader = OuterLoop->getLoopPreheader();
17055ffd83dbSDimitry Andric BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
17065ffd83dbSDimitry Andric swapBBContents(OuterLoopPreHeader, InnerLoopPreHeader);
17075ffd83dbSDimitry Andric }
17080b57cec5SDimitry Andric return Changed;
17090b57cec5SDimitry Andric }
17100b57cec5SDimitry Andric
run(LoopNest & LN,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR,LPMUpdater & U)1711fe6060f1SDimitry Andric PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
1712fe6060f1SDimitry Andric LoopAnalysisManager &AM,
1713e8d8bef9SDimitry Andric LoopStandardAnalysisResults &AR,
1714e8d8bef9SDimitry Andric LPMUpdater &U) {
1715fe6060f1SDimitry Andric Function &F = *LN.getParent();
1716e8d8bef9SDimitry Andric
1717e8d8bef9SDimitry Andric DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
171881ad6265SDimitry Andric std::unique_ptr<CacheCost> CC =
171981ad6265SDimitry Andric CacheCost::getCacheCost(LN.getOutermostLoop(), AR, DI);
1720e8d8bef9SDimitry Andric OptimizationRemarkEmitter ORE(&F);
172181ad6265SDimitry Andric if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, CC, &ORE).run(LN))
1722e8d8bef9SDimitry Andric return PreservedAnalyses::all();
1723bdd1243dSDimitry Andric U.markLoopNestChanged(true);
1724e8d8bef9SDimitry Andric return getLoopPassPreservedAnalyses();
1725e8d8bef9SDimitry Andric }
1726