xref: /freebsd/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp (revision b4af4f93c682e445bf159f0d1ec90b636296c946)
1 //===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass removes the computation of provably redundant expressions that have
10 // been computed earlier in a previous iteration. It relies on the use of PHIs
11 // to identify loop carried dependences. This is scalar replacement for vector
12 // types.
13 //
14 //-----------------------------------------------------------------------------
15 // Motivation: Consider the case where we have the following loop structure.
16 //
17 // Loop:
18 //  t0 = a[i];
19 //  t1 = f(t0);
20 //  t2 = g(t1);
21 //  ...
22 //  t3 = a[i+1];
23 //  t4 = f(t3);
24 //  t5 = g(t4);
25 //  t6 = op(t2, t5)
26 //  cond_branch <Loop>
27 //
28 // This can be converted to
29 //  t00 = a[0];
30 //  t10 = f(t00);
31 //  t20 = g(t10);
32 // Loop:
33 //  t2 = t20;
34 //  t3 = a[i+1];
35 //  t4 = f(t3);
36 //  t5 = g(t4);
37 //  t6 = op(t2, t5)
38 //  t20 = t5
39 //  cond_branch <Loop>
40 //
41 // SROA does a good job of reusing a[i+1] as a[i] in the next iteration.
42 // Such a loop comes to this pass in the following form.
43 //
44 // LoopPreheader:
45 //  X0 = a[0];
46 // Loop:
47 //  X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
48 //  t1 = f(X2)   <-- I1
49 //  t2 = g(t1)
50 //  ...
51 //  X1 = a[i+1]
52 //  t4 = f(X1)   <-- I2
53 //  t5 = g(t4)
54 //  t6 = op(t2, t5)
55 //  cond_branch <Loop>
56 //
57 // In this pass, we look for PHIs such as X2 whose incoming values come only
58 // from the Loop Preheader and over the backedge and additionaly, both these
59 // values are the results of the same operation in terms of opcode. We call such
60 // a PHI node a dependence chain or DepChain. In this case, the dependence of X2
61 // over X1 is carried over only one iteration and so the DepChain is only one
62 // PHI node long.
63 //
64 // Then, we traverse the uses of the PHI (X2) and the uses of the value of the
65 // PHI coming  over the backedge (X1). We stop at the first pair of such users
66 // I1 (of X2) and I2 (of X1) that meet the following conditions.
67 // 1. I1 and I2 are the same operation, but with different operands.
68 // 2. X2 and X1 are used at the same operand number in the two instructions.
69 // 3. All other operands Op1 of I1 and Op2 of I2 are also such that there is a
70 //    a DepChain from Op1 to Op2 of the same length as that between X2 and X1.
71 //
72 // We then make the following transformation
73 // LoopPreheader:
74 //  X0 = a[0];
75 //  Y0 = f(X0);
76 // Loop:
77 //  X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
78 //  Y2 = PHI<(Y0, LoopPreheader), (t4, Loop)>
79 //  t1 = f(X2)   <-- Will be removed by DCE.
80 //  t2 = g(Y2)
81 //  ...
82 //  X1 = a[i+1]
83 //  t4 = f(X1)
84 //  t5 = g(t4)
85 //  t6 = op(t2, t5)
86 //  cond_branch <Loop>
87 //
88 // We proceed until we cannot find any more such instructions I1 and I2.
89 //
90 // --- DepChains & Loop carried dependences ---
91 // Consider a single basic block loop such as
92 //
93 // LoopPreheader:
94 //  X0 = ...
95 //  Y0 = ...
96 // Loop:
97 //  X2 = PHI<(X0, LoopPreheader), (X1, Loop)>
98 //  Y2 = PHI<(Y0, LoopPreheader), (X2, Loop)>
99 //  ...
100 //  X1 = ...
101 //  ...
102 //  cond_branch <Loop>
103 //
104 // Then there is a dependence between X2 and X1 that goes back one iteration,
105 // i.e. X1 is used as X2 in the very next iteration. We represent this as a
106 // DepChain from X2 to X1 (X2->X1).
107 // Similarly, there is a dependence between Y2 and X1 that goes back two
108 // iterations. X1 is used as Y2 two iterations after it is computed. This is
109 // represented by a DepChain as (Y2->X2->X1).
110 //
111 // A DepChain has the following properties.
112 // 1. Num of edges in DepChain = Number of Instructions in DepChain = Number of
113 //    iterations of carried dependence + 1.
114 // 2. All instructions in the DepChain except the last are PHIs.
115 //
116 //===----------------------------------------------------------------------===//
117 
118 #include "llvm/ADT/SetVector.h"
119 #include "llvm/ADT/SmallVector.h"
120 #include "llvm/ADT/Statistic.h"
121 #include "llvm/Analysis/LoopInfo.h"
122 #include "llvm/Analysis/LoopPass.h"
123 #include "llvm/IR/BasicBlock.h"
124 #include "llvm/IR/DerivedTypes.h"
125 #include "llvm/IR/IRBuilder.h"
126 #include "llvm/IR/Instruction.h"
127 #include "llvm/IR/Instructions.h"
128 #include "llvm/IR/IntrinsicInst.h"
129 #include "llvm/IR/Intrinsics.h"
130 #include "llvm/IR/IntrinsicsHexagon.h"
131 #include "llvm/IR/Use.h"
132 #include "llvm/IR/User.h"
133 #include "llvm/IR/Value.h"
134 #include "llvm/InitializePasses.h"
135 #include "llvm/Pass.h"
136 #include "llvm/Support/Casting.h"
137 #include "llvm/Support/CommandLine.h"
138 #include "llvm/Support/Compiler.h"
139 #include "llvm/Support/Debug.h"
140 #include "llvm/Support/raw_ostream.h"
141 #include "llvm/Transforms/Scalar.h"
142 #include "llvm/Transforms/Utils.h"
143 #include <algorithm>
144 #include <cassert>
145 #include <cstddef>
146 #include <map>
147 #include <memory>
148 #include <set>
149 
150 using namespace llvm;
151 
152 #define DEBUG_TYPE "hexagon-vlcr"
153 
154 STATISTIC(HexagonNumVectorLoopCarriedReuse,
155           "Number of values that were reused from a previous iteration.");
156 
157 static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
158     cl::Hidden,
159     cl::desc("Maximum distance of loop carried dependences that are handled"),
160     cl::init(2), cl::ZeroOrMore);
161 
162 namespace llvm {
163 
164 void initializeHexagonVectorLoopCarriedReusePass(PassRegistry&);
165 Pass *createHexagonVectorLoopCarriedReusePass();
166 
167 } // end namespace llvm
168 
169 namespace {
170 
171   // See info about DepChain in the comments at the top of this file.
172   using ChainOfDependences = SmallVector<Instruction *, 4>;
173 
174   class DepChain {
175     ChainOfDependences Chain;
176 
177   public:
178     bool isIdentical(DepChain &Other) const {
179       if (Other.size() != size())
180         return false;
181       ChainOfDependences &OtherChain = Other.getChain();
182       for (int i = 0; i < size(); ++i) {
183         if (Chain[i] != OtherChain[i])
184           return false;
185       }
186       return true;
187     }
188 
189     ChainOfDependences &getChain() {
190       return Chain;
191     }
192 
193     int size() const {
194       return Chain.size();
195     }
196 
197     void clear() {
198       Chain.clear();
199     }
200 
201     void push_back(Instruction *I) {
202       Chain.push_back(I);
203     }
204 
205     int iterations() const {
206       return size() - 1;
207     }
208 
209     Instruction *front() const {
210       return Chain.front();
211     }
212 
213     Instruction *back() const {
214       return Chain.back();
215     }
216 
217     Instruction *&operator[](const int index) {
218       return Chain[index];
219     }
220 
221    friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
222   };
223 
224   LLVM_ATTRIBUTE_UNUSED
225   raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
226     const ChainOfDependences &CD = D.Chain;
227     int ChainSize = CD.size();
228     OS << "**DepChain Start::**\n";
229     for (int i = 0; i < ChainSize -1; ++i) {
230       OS << *(CD[i]) << " -->\n";
231     }
232     OS << *CD[ChainSize-1] << "\n";
233     return OS;
234   }
235 
236   struct ReuseValue {
237     Instruction *Inst2Replace = nullptr;
238 
239     // In the new PHI node that we'll construct this is the value that'll be
240     // used over the backedge. This is the value that gets reused from a
241     // previous iteration.
242     Instruction *BackedgeInst = nullptr;
243     std::map<Instruction *, DepChain *> DepChains;
244     int Iterations = -1;
245 
246     ReuseValue() = default;
247 
248     void reset() {
249       Inst2Replace = nullptr;
250       BackedgeInst = nullptr;
251       DepChains.clear();
252       Iterations = -1;
253     }
254     bool isDefined() { return Inst2Replace != nullptr; }
255   };
256 
257   LLVM_ATTRIBUTE_UNUSED
258   raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
259     OS << "** ReuseValue ***\n";
260     OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
261     OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
262     return OS;
263   }
264 
265   class HexagonVectorLoopCarriedReuse : public LoopPass {
266   public:
267     static char ID;
268 
269     explicit HexagonVectorLoopCarriedReuse() : LoopPass(ID) {
270       PassRegistry *PR = PassRegistry::getPassRegistry();
271       initializeHexagonVectorLoopCarriedReusePass(*PR);
272     }
273 
274     StringRef getPassName() const override {
275       return "Hexagon-specific loop carried reuse for HVX vectors";
276     }
277 
278     void getAnalysisUsage(AnalysisUsage &AU) const override {
279       AU.addRequired<LoopInfoWrapperPass>();
280       AU.addRequiredID(LoopSimplifyID);
281       AU.addRequiredID(LCSSAID);
282       AU.addPreservedID(LCSSAID);
283       AU.setPreservesCFG();
284     }
285 
286     bool runOnLoop(Loop *L, LPPassManager &LPM) override;
287 
288   private:
289     SetVector<DepChain *> Dependences;
290     std::set<Instruction *> ReplacedInsts;
291     Loop *CurLoop;
292     ReuseValue ReuseCandidate;
293 
294     bool doVLCR();
295     void findLoopCarriedDeps();
296     void findValueToReuse();
297     void findDepChainFromPHI(Instruction *I, DepChain &D);
298     void reuseValue();
299     Value *findValueInBlock(Value *Op, BasicBlock *BB);
300     DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
301     bool isEquivalentOperation(Instruction *I1, Instruction *I2);
302     bool canReplace(Instruction *I);
303     bool isCallInstCommutative(CallInst *C);
304   };
305 
306 } // end anonymous namespace
307 
308 char HexagonVectorLoopCarriedReuse::ID = 0;
309 
310 INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
311     "Hexagon-specific predictive commoning for HVX vectors", false, false)
312 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
313 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
314 INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
315 INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuse, "hexagon-vlcr",
316     "Hexagon-specific predictive commoning for HVX vectors", false, false)
317 
318 bool HexagonVectorLoopCarriedReuse::runOnLoop(Loop *L, LPPassManager &LPM) {
319   if (skipLoop(L))
320     return false;
321 
322   if (!L->getLoopPreheader())
323     return false;
324 
325   // Work only on innermost loops.
326   if (!L->getSubLoops().empty())
327     return false;
328 
329   // Work only on single basic blocks loops.
330   if (L->getNumBlocks() != 1)
331     return false;
332 
333   CurLoop = L;
334 
335   return doVLCR();
336 }
337 
338 bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
339   switch (C->getCalledFunction()->getIntrinsicID()) {
340     case Intrinsic::hexagon_V6_vaddb:
341     case Intrinsic::hexagon_V6_vaddb_128B:
342     case Intrinsic::hexagon_V6_vaddh:
343     case Intrinsic::hexagon_V6_vaddh_128B:
344     case Intrinsic::hexagon_V6_vaddw:
345     case Intrinsic::hexagon_V6_vaddw_128B:
346     case Intrinsic::hexagon_V6_vaddubh:
347     case Intrinsic::hexagon_V6_vaddubh_128B:
348     case Intrinsic::hexagon_V6_vadduhw:
349     case Intrinsic::hexagon_V6_vadduhw_128B:
350     case Intrinsic::hexagon_V6_vaddhw:
351     case Intrinsic::hexagon_V6_vaddhw_128B:
352     case Intrinsic::hexagon_V6_vmaxb:
353     case Intrinsic::hexagon_V6_vmaxb_128B:
354     case Intrinsic::hexagon_V6_vmaxh:
355     case Intrinsic::hexagon_V6_vmaxh_128B:
356     case Intrinsic::hexagon_V6_vmaxw:
357     case Intrinsic::hexagon_V6_vmaxw_128B:
358     case Intrinsic::hexagon_V6_vmaxub:
359     case Intrinsic::hexagon_V6_vmaxub_128B:
360     case Intrinsic::hexagon_V6_vmaxuh:
361     case Intrinsic::hexagon_V6_vmaxuh_128B:
362     case Intrinsic::hexagon_V6_vminub:
363     case Intrinsic::hexagon_V6_vminub_128B:
364     case Intrinsic::hexagon_V6_vminuh:
365     case Intrinsic::hexagon_V6_vminuh_128B:
366     case Intrinsic::hexagon_V6_vminb:
367     case Intrinsic::hexagon_V6_vminb_128B:
368     case Intrinsic::hexagon_V6_vminh:
369     case Intrinsic::hexagon_V6_vminh_128B:
370     case Intrinsic::hexagon_V6_vminw:
371     case Intrinsic::hexagon_V6_vminw_128B:
372     case Intrinsic::hexagon_V6_vmpyub:
373     case Intrinsic::hexagon_V6_vmpyub_128B:
374     case Intrinsic::hexagon_V6_vmpyuh:
375     case Intrinsic::hexagon_V6_vmpyuh_128B:
376     case Intrinsic::hexagon_V6_vavgub:
377     case Intrinsic::hexagon_V6_vavgub_128B:
378     case Intrinsic::hexagon_V6_vavgh:
379     case Intrinsic::hexagon_V6_vavgh_128B:
380     case Intrinsic::hexagon_V6_vavguh:
381     case Intrinsic::hexagon_V6_vavguh_128B:
382     case Intrinsic::hexagon_V6_vavgw:
383     case Intrinsic::hexagon_V6_vavgw_128B:
384     case Intrinsic::hexagon_V6_vavgb:
385     case Intrinsic::hexagon_V6_vavgb_128B:
386     case Intrinsic::hexagon_V6_vavguw:
387     case Intrinsic::hexagon_V6_vavguw_128B:
388     case Intrinsic::hexagon_V6_vabsdiffh:
389     case Intrinsic::hexagon_V6_vabsdiffh_128B:
390     case Intrinsic::hexagon_V6_vabsdiffub:
391     case Intrinsic::hexagon_V6_vabsdiffub_128B:
392     case Intrinsic::hexagon_V6_vabsdiffuh:
393     case Intrinsic::hexagon_V6_vabsdiffuh_128B:
394     case Intrinsic::hexagon_V6_vabsdiffw:
395     case Intrinsic::hexagon_V6_vabsdiffw_128B:
396       return true;
397     default:
398       return false;
399   }
400 }
401 
402 bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
403                                                           Instruction *I2) {
404   if (!I1->isSameOperationAs(I2))
405     return false;
406   // This check is in place specifically for intrinsics. isSameOperationAs will
407   // return two for any two hexagon intrinsics because they are essentially the
408   // same instruciton (CallInst). We need to scratch the surface to see if they
409   // are calls to the same function.
410   if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
411     if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
412       if (C1->getCalledFunction() != C2->getCalledFunction())
413         return false;
414     }
415   }
416 
417   // If both the Instructions are of Vector Type and any of the element
418   // is integer constant, check their values too for equivalence.
419   if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
420     unsigned NumOperands = I1->getNumOperands();
421     for (unsigned i = 0; i < NumOperands; ++i) {
422       ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i));
423       ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i));
424       if(!C1) continue;
425       assert(C2);
426       if (C1->getSExtValue() != C2->getSExtValue())
427         return false;
428     }
429   }
430 
431   return true;
432 }
433 
434 bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
435   const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
436   if (!II)
437     return true;
438 
439   switch (II->getIntrinsicID()) {
440   case Intrinsic::hexagon_V6_hi:
441   case Intrinsic::hexagon_V6_lo:
442   case Intrinsic::hexagon_V6_hi_128B:
443   case Intrinsic::hexagon_V6_lo_128B:
444     LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
445     return false;
446   default:
447     return true;
448   }
449 }
450 void HexagonVectorLoopCarriedReuse::findValueToReuse() {
451   for (auto *D : Dependences) {
452     LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
453     if (D->iterations() > HexagonVLCRIterationLim) {
454       LLVM_DEBUG(
455           dbgs()
456           << ".. Skipping because number of iterations > than the limit\n");
457       continue;
458     }
459 
460     PHINode *PN = cast<PHINode>(D->front());
461     Instruction *BEInst = D->back();
462     int Iters = D->iterations();
463     BasicBlock *BB = PN->getParent();
464     LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
465                       << " can be reused\n");
466 
467     SmallVector<Instruction *, 4> PNUsers;
468     for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
469       Use &U = *UI;
470       Instruction *User = cast<Instruction>(U.getUser());
471 
472       if (User->getParent() != BB)
473         continue;
474       if (ReplacedInsts.count(User)) {
475         LLVM_DEBUG(dbgs() << *User
476                           << " has already been replaced. Skipping...\n");
477         continue;
478       }
479       if (isa<PHINode>(User))
480         continue;
481       if (User->mayHaveSideEffects())
482         continue;
483       if (!canReplace(User))
484         continue;
485 
486       PNUsers.push_back(User);
487     }
488     LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
489 
490     // For each interesting use I of PN, find an Instruction BEUser that
491     // performs the same operation as I on BEInst and whose other operands,
492     // if any, can also be rematerialized in OtherBB. We stop when we find the
493     // first such Instruction BEUser. This is because once BEUser is
494     // rematerialized in OtherBB, we may find more such "fixup" opportunities
495     // in this block. So, we'll start over again.
496     for (Instruction *I : PNUsers) {
497       for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
498            ++UI) {
499         Use &U = *UI;
500         Instruction *BEUser = cast<Instruction>(U.getUser());
501 
502         if (BEUser->getParent() != BB)
503           continue;
504         if (!isEquivalentOperation(I, BEUser))
505           continue;
506 
507         int NumOperands = I->getNumOperands();
508 
509         // Take operands of each PNUser one by one and try to find DepChain
510         // with every operand of the BEUser. If any of the operands of BEUser
511         // has DepChain with current operand of the PNUser, break the matcher
512         // loop. Keep doing this for Every PNUser operand. If PNUser operand
513         // does not have DepChain with any of the BEUser operand, break the
514         // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
515         // This ensures that DepChain exist for all the PNUser operand with
516         // BEUser operand. This also ensures that DepChains are independent of
517         // the positions in PNUser and BEUser.
518         std::map<Instruction *, DepChain *> DepChains;
519         CallInst *C1 = dyn_cast<CallInst>(I);
520         if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
521           bool Found = false;
522           for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
523             Value *Op = I->getOperand(OpNo);
524             Instruction *OpInst = dyn_cast<Instruction>(Op);
525             Found = false;
526             for (int T = 0; T < NumOperands; ++T) {
527               Value *BEOp = BEUser->getOperand(T);
528               Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
529               if (!OpInst && !BEOpInst) {
530                 if (Op == BEOp) {
531                   Found = true;
532                   break;
533                 }
534               }
535 
536               if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
537                 continue;
538 
539               DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
540 
541               if (D) {
542                 Found = true;
543                 DepChains[OpInst] = D;
544                 break;
545               }
546             }
547             if (!Found) {
548               BEUser = nullptr;
549               break;
550             }
551           }
552         } else {
553 
554           for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
555             Value *Op = I->getOperand(OpNo);
556             Value *BEOp = BEUser->getOperand(OpNo);
557 
558             Instruction *OpInst = dyn_cast<Instruction>(Op);
559             if (!OpInst) {
560               if (Op == BEOp)
561                 continue;
562               // Do not allow reuse to occur when the operands may be different
563               // values.
564               BEUser = nullptr;
565               break;
566             }
567 
568             Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
569             DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
570 
571             if (D) {
572               DepChains[OpInst] = D;
573             } else {
574               BEUser = nullptr;
575               break;
576             }
577           }
578         }
579         if (BEUser) {
580           LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
581           ReuseCandidate.Inst2Replace = I;
582           ReuseCandidate.BackedgeInst = BEUser;
583           ReuseCandidate.DepChains = DepChains;
584           ReuseCandidate.Iterations = Iters;
585           return;
586         }
587         ReuseCandidate.reset();
588       }
589     }
590   }
591   ReuseCandidate.reset();
592 }
593 
594 Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
595                                                        BasicBlock *BB) {
596   PHINode *PN = dyn_cast<PHINode>(Op);
597   assert(PN);
598   Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
599   return ValueInBlock;
600 }
601 
602 void HexagonVectorLoopCarriedReuse::reuseValue() {
603   LLVM_DEBUG(dbgs() << ReuseCandidate);
604   Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
605   Instruction *BEInst = ReuseCandidate.BackedgeInst;
606   int NumOperands = Inst2Replace->getNumOperands();
607   std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
608   int Iterations = ReuseCandidate.Iterations;
609   BasicBlock *LoopPH = CurLoop->getLoopPreheader();
610   assert(!DepChains.empty() && "No DepChains");
611   LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
612 
613   SmallVector<Instruction *, 4> InstsInPreheader;
614   for (int i = 0; i < Iterations; ++i) {
615     Instruction *InstInPreheader = Inst2Replace->clone();
616     SmallVector<Value *, 4> Ops;
617     for (int j = 0; j < NumOperands; ++j) {
618       Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
619       if (!I)
620         continue;
621       // Get the DepChain corresponding to this operand.
622       DepChain &D = *DepChains[I];
623       // Get the PHI for the iteration number and find
624       // the incoming value from the Loop Preheader for
625       // that PHI.
626       Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
627       InstInPreheader->setOperand(j, ValInPreheader);
628     }
629     InstsInPreheader.push_back(InstInPreheader);
630     InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
631     InstInPreheader->insertBefore(LoopPH->getTerminator());
632     LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
633                       << LoopPH->getName() << "\n");
634   }
635   BasicBlock *BB = BEInst->getParent();
636   IRBuilder<> IRB(BB);
637   IRB.SetInsertPoint(BB->getFirstNonPHI());
638   Value *BEVal = BEInst;
639   PHINode *NewPhi;
640   for (int i = Iterations-1; i >=0 ; --i) {
641     Instruction *InstInPreheader = InstsInPreheader[i];
642     NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
643     NewPhi->addIncoming(InstInPreheader, LoopPH);
644     NewPhi->addIncoming(BEVal, BB);
645     LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
646                       << "\n");
647     BEVal = NewPhi;
648   }
649   // We are in LCSSA form. So, a value defined inside the Loop is used only
650   // inside the loop. So, the following is safe.
651   Inst2Replace->replaceAllUsesWith(NewPhi);
652   ReplacedInsts.insert(Inst2Replace);
653   ++HexagonNumVectorLoopCarriedReuse;
654 }
655 
656 bool HexagonVectorLoopCarriedReuse::doVLCR() {
657   assert(CurLoop->getSubLoops().empty() &&
658          "Can do VLCR on the innermost loop only");
659   assert((CurLoop->getNumBlocks() == 1) &&
660          "Can do VLCR only on single block loops");
661 
662   bool Changed = false;
663   bool Continue;
664 
665   LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
666   do {
667     // Reset datastructures.
668     Dependences.clear();
669     Continue = false;
670 
671     findLoopCarriedDeps();
672     findValueToReuse();
673     if (ReuseCandidate.isDefined()) {
674       reuseValue();
675       Changed = true;
676       Continue = true;
677     }
678     llvm::for_each(Dependences, std::default_delete<DepChain>());
679   } while (Continue);
680   return Changed;
681 }
682 
683 void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
684                                                         DepChain &D) {
685   PHINode *PN = dyn_cast<PHINode>(I);
686   if (!PN) {
687     D.push_back(I);
688     return;
689   } else {
690     auto NumIncomingValues = PN->getNumIncomingValues();
691     if (NumIncomingValues != 2) {
692       D.clear();
693       return;
694     }
695 
696     BasicBlock *BB = PN->getParent();
697     if (BB != CurLoop->getHeader()) {
698       D.clear();
699       return;
700     }
701 
702     Value *BEVal = PN->getIncomingValueForBlock(BB);
703     Instruction *BEInst = dyn_cast<Instruction>(BEVal);
704     // This is a single block loop with a preheader, so at least
705     // one value should come over the backedge.
706     assert(BEInst && "There should be a value over the backedge");
707 
708     Value *PreHdrVal =
709       PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
710     if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
711       D.clear();
712       return;
713     }
714     D.push_back(PN);
715     findDepChainFromPHI(BEInst, D);
716   }
717 }
718 
719 DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
720                                                          Instruction *I2,
721                                                          int Iters) {
722   for (auto *D : Dependences) {
723     if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
724       return D;
725   }
726   return nullptr;
727 }
728 
729 void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
730   BasicBlock *BB = CurLoop->getHeader();
731   for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
732     auto *PN = cast<PHINode>(I);
733     if (!isa<VectorType>(PN->getType()))
734       continue;
735 
736     DepChain *D = new DepChain();
737     findDepChainFromPHI(PN, *D);
738     if (D->size() != 0)
739       Dependences.insert(D);
740     else
741       delete D;
742   }
743   LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
744   LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
745                   ++i) { dbgs() << *Dependences[i] << "\n"; });
746 }
747 
748 Pass *llvm::createHexagonVectorLoopCarriedReusePass() {
749   return new HexagonVectorLoopCarriedReuse();
750 }
751