xref: /freebsd/contrib/llvm-project/llvm/lib/Analysis/LoopCacheAnalysis.cpp (revision f4beb2edcde327a49f034da26bb2e5aadcec922a)
1  //===- LoopCacheAnalysis.cpp - Loop Cache Analysis -------------------------==//
2  //
3  //                     The LLVM Compiler Infrastructure
4  //
5  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6  // See https://llvm.org/LICENSE.txt for license information.
7  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8  //
9  //===----------------------------------------------------------------------===//
10  ///
11  /// \file
12  /// This file defines the implementation for the loop cache analysis.
13  /// The implementation is largely based on the following paper:
14  ///
15  ///       Compiler Optimizations for Improving Data Locality
16  ///       By: Steve Carr, Katherine S. McKinley, Chau-Wen Tseng
17  ///       http://www.cs.utexas.edu/users/mckinley/papers/asplos-1994.pdf
18  ///
19  /// The general approach taken to estimate the number of cache lines used by the
20  /// memory references in an inner loop is:
21  ///    1. Partition memory references that exhibit temporal or spacial reuse
22  ///       into reference groups.
23  ///    2. For each loop L in the a loop nest LN:
24  ///       a. Compute the cost of the reference group
25  ///       b. Compute the loop cost by summing up the reference groups costs
26  //===----------------------------------------------------------------------===//
27  
28  #include "llvm/Analysis/LoopCacheAnalysis.h"
29  #include "llvm/ADT/BreadthFirstIterator.h"
30  #include "llvm/ADT/Sequence.h"
31  #include "llvm/ADT/SmallVector.h"
32  #include "llvm/Support/CommandLine.h"
33  #include "llvm/Support/Debug.h"
34  
35  using namespace llvm;
36  
37  #define DEBUG_TYPE "loop-cache-cost"
38  
39  static cl::opt<unsigned> DefaultTripCount(
40      "default-trip-count", cl::init(100), cl::Hidden,
41      cl::desc("Use this to specify the default trip count of a loop"));
42  
43  // In this analysis two array references are considered to exhibit temporal
44  // reuse if they access either the same memory location, or a memory location
45  // with distance smaller than a configurable threshold.
46  static cl::opt<unsigned> TemporalReuseThreshold(
47      "temporal-reuse-threshold", cl::init(2), cl::Hidden,
48      cl::desc("Use this to specify the max. distance between array elements "
49               "accessed in a loop so that the elements are classified to have "
50               "temporal reuse"));
51  
52  /// Retrieve the innermost loop in the given loop nest \p Loops. It returns a
53  /// nullptr if any loops in the loop vector supplied has more than one sibling.
54  /// The loop vector is expected to contain loops collected in breadth-first
55  /// order.
56  static Loop *getInnerMostLoop(const LoopVectorTy &Loops) {
57    assert(!Loops.empty() && "Expecting a non-empy loop vector");
58  
59    Loop *LastLoop = Loops.back();
60    Loop *ParentLoop = LastLoop->getParentLoop();
61  
62    if (ParentLoop == nullptr) {
63      assert(Loops.size() == 1 && "Expecting a single loop");
64      return LastLoop;
65    }
66  
67    return (std::is_sorted(Loops.begin(), Loops.end(),
68                           [](const Loop *L1, const Loop *L2) {
69                             return L1->getLoopDepth() < L2->getLoopDepth();
70                           }))
71               ? LastLoop
72               : nullptr;
73  }
74  
75  static bool isOneDimensionalArray(const SCEV &AccessFn, const SCEV &ElemSize,
76                                    const Loop &L, ScalarEvolution &SE) {
77    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&AccessFn);
78    if (!AR || !AR->isAffine())
79      return false;
80  
81    assert(AR->getLoop() && "AR should have a loop");
82  
83    // Check that start and increment are not add recurrences.
84    const SCEV *Start = AR->getStart();
85    const SCEV *Step = AR->getStepRecurrence(SE);
86    if (isa<SCEVAddRecExpr>(Start) || isa<SCEVAddRecExpr>(Step))
87      return false;
88  
89    // Check that start and increment are both invariant in the loop.
90    if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
91      return false;
92  
93    return AR->getStepRecurrence(SE) == &ElemSize;
94  }
95  
96  /// Compute the trip count for the given loop \p L. Return the SCEV expression
97  /// for the trip count or nullptr if it cannot be computed.
98  static const SCEV *computeTripCount(const Loop &L, ScalarEvolution &SE) {
99    const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(&L);
100    if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
101        !isa<SCEVConstant>(BackedgeTakenCount))
102      return nullptr;
103  
104    return SE.getAddExpr(BackedgeTakenCount,
105                         SE.getOne(BackedgeTakenCount->getType()));
106  }
107  
108  //===----------------------------------------------------------------------===//
109  // IndexedReference implementation
110  //
111  raw_ostream &llvm::operator<<(raw_ostream &OS, const IndexedReference &R) {
112    if (!R.IsValid) {
113      OS << R.StoreOrLoadInst;
114      OS << ", IsValid=false.";
115      return OS;
116    }
117  
118    OS << *R.BasePointer;
119    for (const SCEV *Subscript : R.Subscripts)
120      OS << "[" << *Subscript << "]";
121  
122    OS << ", Sizes: ";
123    for (const SCEV *Size : R.Sizes)
124      OS << "[" << *Size << "]";
125  
126    return OS;
127  }
128  
129  IndexedReference::IndexedReference(Instruction &StoreOrLoadInst,
130                                     const LoopInfo &LI, ScalarEvolution &SE)
131      : StoreOrLoadInst(StoreOrLoadInst), SE(SE) {
132    assert((isa<StoreInst>(StoreOrLoadInst) || isa<LoadInst>(StoreOrLoadInst)) &&
133           "Expecting a load or store instruction");
134  
135    IsValid = delinearize(LI);
136    if (IsValid)
137      LLVM_DEBUG(dbgs().indent(2) << "Succesfully delinearized: " << *this
138                                  << "\n");
139  }
140  
141  Optional<bool> IndexedReference::hasSpacialReuse(const IndexedReference &Other,
142                                                   unsigned CLS,
143                                                   AliasAnalysis &AA) const {
144    assert(IsValid && "Expecting a valid reference");
145  
146    if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
147      LLVM_DEBUG(dbgs().indent(2)
148                 << "No spacial reuse: different base pointers\n");
149      return false;
150    }
151  
152    unsigned NumSubscripts = getNumSubscripts();
153    if (NumSubscripts != Other.getNumSubscripts()) {
154      LLVM_DEBUG(dbgs().indent(2)
155                 << "No spacial reuse: different number of subscripts\n");
156      return false;
157    }
158  
159    // all subscripts must be equal, except the leftmost one (the last one).
160    for (auto SubNum : seq<unsigned>(0, NumSubscripts - 1)) {
161      if (getSubscript(SubNum) != Other.getSubscript(SubNum)) {
162        LLVM_DEBUG(dbgs().indent(2) << "No spacial reuse, different subscripts: "
163                                    << "\n\t" << *getSubscript(SubNum) << "\n\t"
164                                    << *Other.getSubscript(SubNum) << "\n");
165        return false;
166      }
167    }
168  
169    // the difference between the last subscripts must be less than the cache line
170    // size.
171    const SCEV *LastSubscript = getLastSubscript();
172    const SCEV *OtherLastSubscript = Other.getLastSubscript();
173    const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
174        SE.getMinusSCEV(LastSubscript, OtherLastSubscript));
175  
176    if (Diff == nullptr) {
177      LLVM_DEBUG(dbgs().indent(2)
178                 << "No spacial reuse, difference between subscript:\n\t"
179                 << *LastSubscript << "\n\t" << OtherLastSubscript
180                 << "\nis not constant.\n");
181      return None;
182    }
183  
184    bool InSameCacheLine = (Diff->getValue()->getSExtValue() < CLS);
185  
186    LLVM_DEBUG({
187      if (InSameCacheLine)
188        dbgs().indent(2) << "Found spacial reuse.\n";
189      else
190        dbgs().indent(2) << "No spacial reuse.\n";
191    });
192  
193    return InSameCacheLine;
194  }
195  
196  Optional<bool> IndexedReference::hasTemporalReuse(const IndexedReference &Other,
197                                                    unsigned MaxDistance,
198                                                    const Loop &L,
199                                                    DependenceInfo &DI,
200                                                    AliasAnalysis &AA) const {
201    assert(IsValid && "Expecting a valid reference");
202  
203    if (BasePointer != Other.getBasePointer() && !isAliased(Other, AA)) {
204      LLVM_DEBUG(dbgs().indent(2)
205                 << "No temporal reuse: different base pointer\n");
206      return false;
207    }
208  
209    std::unique_ptr<Dependence> D =
210        DI.depends(&StoreOrLoadInst, &Other.StoreOrLoadInst, true);
211  
212    if (D == nullptr) {
213      LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: no dependence\n");
214      return false;
215    }
216  
217    if (D->isLoopIndependent()) {
218      LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
219      return true;
220    }
221  
222    // Check the dependence distance at every loop level. There is temporal reuse
223    // if the distance at the given loop's depth is small (|d| <= MaxDistance) and
224    // it is zero at every other loop level.
225    int LoopDepth = L.getLoopDepth();
226    int Levels = D->getLevels();
227    for (int Level = 1; Level <= Levels; ++Level) {
228      const SCEV *Distance = D->getDistance(Level);
229      const SCEVConstant *SCEVConst = dyn_cast_or_null<SCEVConstant>(Distance);
230  
231      if (SCEVConst == nullptr) {
232        LLVM_DEBUG(dbgs().indent(2) << "No temporal reuse: distance unknown\n");
233        return None;
234      }
235  
236      const ConstantInt &CI = *SCEVConst->getValue();
237      if (Level != LoopDepth && !CI.isZero()) {
238        LLVM_DEBUG(dbgs().indent(2)
239                   << "No temporal reuse: distance is not zero at depth=" << Level
240                   << "\n");
241        return false;
242      } else if (Level == LoopDepth && CI.getSExtValue() > MaxDistance) {
243        LLVM_DEBUG(
244            dbgs().indent(2)
245            << "No temporal reuse: distance is greater than MaxDistance at depth="
246            << Level << "\n");
247        return false;
248      }
249    }
250  
251    LLVM_DEBUG(dbgs().indent(2) << "Found temporal reuse\n");
252    return true;
253  }
254  
255  CacheCostTy IndexedReference::computeRefCost(const Loop &L,
256                                               unsigned CLS) const {
257    assert(IsValid && "Expecting a valid reference");
258    LLVM_DEBUG({
259      dbgs().indent(2) << "Computing cache cost for:\n";
260      dbgs().indent(4) << *this << "\n";
261    });
262  
263    // If the indexed reference is loop invariant the cost is one.
264    if (isLoopInvariant(L)) {
265      LLVM_DEBUG(dbgs().indent(4) << "Reference is loop invariant: RefCost=1\n");
266      return 1;
267    }
268  
269    const SCEV *TripCount = computeTripCount(L, SE);
270    if (!TripCount) {
271      LLVM_DEBUG(dbgs() << "Trip count of loop " << L.getName()
272                        << " could not be computed, using DefaultTripCount\n");
273      const SCEV *ElemSize = Sizes.back();
274      TripCount = SE.getConstant(ElemSize->getType(), DefaultTripCount);
275    }
276    LLVM_DEBUG(dbgs() << "TripCount=" << *TripCount << "\n");
277  
278    // If the indexed reference is 'consecutive' the cost is
279    // (TripCount*Stride)/CLS, otherwise the cost is TripCount.
280    const SCEV *RefCost = TripCount;
281  
282    if (isConsecutive(L, CLS)) {
283      const SCEV *Coeff = getLastCoefficient();
284      const SCEV *ElemSize = Sizes.back();
285      const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
286      const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
287      Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
288      Stride = SE.getNoopOrSignExtend(Stride, WiderType);
289      TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
290      const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
291      RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
292      LLVM_DEBUG(dbgs().indent(4)
293                 << "Access is consecutive: RefCost=(TripCount*Stride)/CLS="
294                 << *RefCost << "\n");
295    } else
296      LLVM_DEBUG(dbgs().indent(4)
297                 << "Access is not consecutive: RefCost=TripCount=" << *RefCost
298                 << "\n");
299  
300    // Attempt to fold RefCost into a constant.
301    if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
302      return ConstantCost->getValue()->getSExtValue();
303  
304    LLVM_DEBUG(dbgs().indent(4)
305               << "RefCost is not a constant! Setting to RefCost=InvalidCost "
306                  "(invalid value).\n");
307  
308    return CacheCost::InvalidCost;
309  }
310  
311  bool IndexedReference::delinearize(const LoopInfo &LI) {
312    assert(Subscripts.empty() && "Subscripts should be empty");
313    assert(Sizes.empty() && "Sizes should be empty");
314    assert(!IsValid && "Should be called once from the constructor");
315    LLVM_DEBUG(dbgs() << "Delinearizing: " << StoreOrLoadInst << "\n");
316  
317    const SCEV *ElemSize = SE.getElementSize(&StoreOrLoadInst);
318    const BasicBlock *BB = StoreOrLoadInst.getParent();
319  
320    if (Loop *L = LI.getLoopFor(BB)) {
321      const SCEV *AccessFn =
322          SE.getSCEVAtScope(getPointerOperand(&StoreOrLoadInst), L);
323  
324      BasePointer = dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFn));
325      if (BasePointer == nullptr) {
326        LLVM_DEBUG(
327            dbgs().indent(2)
328            << "ERROR: failed to delinearize, can't identify base pointer\n");
329        return false;
330      }
331  
332      AccessFn = SE.getMinusSCEV(AccessFn, BasePointer);
333  
334      LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
335                                  << "', AccessFn: " << *AccessFn << "\n");
336  
337      SE.delinearize(AccessFn, Subscripts, Sizes,
338                     SE.getElementSize(&StoreOrLoadInst));
339  
340      if (Subscripts.empty() || Sizes.empty() ||
341          Subscripts.size() != Sizes.size()) {
342        // Attempt to determine whether we have a single dimensional array access.
343        // before giving up.
344        if (!isOneDimensionalArray(*AccessFn, *ElemSize, *L, SE)) {
345          LLVM_DEBUG(dbgs().indent(2)
346                     << "ERROR: failed to delinearize reference\n");
347          Subscripts.clear();
348          Sizes.clear();
349          return false;
350        }
351  
352        const SCEV *Div = SE.getUDivExactExpr(AccessFn, ElemSize);
353        Subscripts.push_back(Div);
354        Sizes.push_back(ElemSize);
355      }
356  
357      return all_of(Subscripts, [&](const SCEV *Subscript) {
358        return isSimpleAddRecurrence(*Subscript, *L);
359      });
360    }
361  
362    return false;
363  }
364  
365  bool IndexedReference::isLoopInvariant(const Loop &L) const {
366    Value *Addr = getPointerOperand(&StoreOrLoadInst);
367    assert(Addr != nullptr && "Expecting either a load or a store instruction");
368    assert(SE.isSCEVable(Addr->getType()) && "Addr should be SCEVable");
369  
370    if (SE.isLoopInvariant(SE.getSCEV(Addr), &L))
371      return true;
372  
373    // The indexed reference is loop invariant if none of the coefficients use
374    // the loop induction variable.
375    bool allCoeffForLoopAreZero = all_of(Subscripts, [&](const SCEV *Subscript) {
376      return isCoeffForLoopZeroOrInvariant(*Subscript, L);
377    });
378  
379    return allCoeffForLoopAreZero;
380  }
381  
382  bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
383    // The indexed reference is 'consecutive' if the only coefficient that uses
384    // the loop induction variable is the last one...
385    const SCEV *LastSubscript = Subscripts.back();
386    for (const SCEV *Subscript : Subscripts) {
387      if (Subscript == LastSubscript)
388        continue;
389      if (!isCoeffForLoopZeroOrInvariant(*Subscript, L))
390        return false;
391    }
392  
393    // ...and the access stride is less than the cache line size.
394    const SCEV *Coeff = getLastCoefficient();
395    const SCEV *ElemSize = Sizes.back();
396    const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
397    const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
398  
399    return SE.isKnownPredicate(ICmpInst::ICMP_ULT, Stride, CacheLineSize);
400  }
401  
402  const SCEV *IndexedReference::getLastCoefficient() const {
403    const SCEV *LastSubscript = getLastSubscript();
404    assert(isa<SCEVAddRecExpr>(LastSubscript) &&
405           "Expecting a SCEV add recurrence expression");
406    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript);
407    return AR->getStepRecurrence(SE);
408  }
409  
410  bool IndexedReference::isCoeffForLoopZeroOrInvariant(const SCEV &Subscript,
411                                                       const Loop &L) const {
412    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(&Subscript);
413    return (AR != nullptr) ? AR->getLoop() != &L
414                           : SE.isLoopInvariant(&Subscript, &L);
415  }
416  
417  bool IndexedReference::isSimpleAddRecurrence(const SCEV &Subscript,
418                                               const Loop &L) const {
419    if (!isa<SCEVAddRecExpr>(Subscript))
420      return false;
421  
422    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(&Subscript);
423    assert(AR->getLoop() && "AR should have a loop");
424  
425    if (!AR->isAffine())
426      return false;
427  
428    const SCEV *Start = AR->getStart();
429    const SCEV *Step = AR->getStepRecurrence(SE);
430  
431    if (!SE.isLoopInvariant(Start, &L) || !SE.isLoopInvariant(Step, &L))
432      return false;
433  
434    return true;
435  }
436  
437  bool IndexedReference::isAliased(const IndexedReference &Other,
438                                   AliasAnalysis &AA) const {
439    const auto &Loc1 = MemoryLocation::get(&StoreOrLoadInst);
440    const auto &Loc2 = MemoryLocation::get(&Other.StoreOrLoadInst);
441    return AA.isMustAlias(Loc1, Loc2);
442  }
443  
444  //===----------------------------------------------------------------------===//
445  // CacheCost implementation
446  //
447  raw_ostream &llvm::operator<<(raw_ostream &OS, const CacheCost &CC) {
448    for (const auto &LC : CC.LoopCosts) {
449      const Loop *L = LC.first;
450      OS << "Loop '" << L->getName() << "' has cost = " << LC.second << "\n";
451    }
452    return OS;
453  }
454  
455  CacheCost::CacheCost(const LoopVectorTy &Loops, const LoopInfo &LI,
456                       ScalarEvolution &SE, TargetTransformInfo &TTI,
457                       AliasAnalysis &AA, DependenceInfo &DI,
458                       Optional<unsigned> TRT)
459      : Loops(Loops), TripCounts(), LoopCosts(),
460        TRT((TRT == None) ? Optional<unsigned>(TemporalReuseThreshold) : TRT),
461        LI(LI), SE(SE), TTI(TTI), AA(AA), DI(DI) {
462    assert(!Loops.empty() && "Expecting a non-empty loop vector.");
463  
464    for (const Loop *L : Loops) {
465      unsigned TripCount = SE.getSmallConstantTripCount(L);
466      TripCount = (TripCount == 0) ? DefaultTripCount : TripCount;
467      TripCounts.push_back({L, TripCount});
468    }
469  
470    calculateCacheFootprint();
471  }
472  
473  std::unique_ptr<CacheCost>
474  CacheCost::getCacheCost(Loop &Root, LoopStandardAnalysisResults &AR,
475                          DependenceInfo &DI, Optional<unsigned> TRT) {
476    if (Root.getParentLoop()) {
477      LLVM_DEBUG(dbgs() << "Expecting the outermost loop in a loop nest\n");
478      return nullptr;
479    }
480  
481    LoopVectorTy Loops;
482    for (Loop *L : breadth_first(&Root))
483      Loops.push_back(L);
484  
485    if (!getInnerMostLoop(Loops)) {
486      LLVM_DEBUG(dbgs() << "Cannot compute cache cost of loop nest with more "
487                           "than one innermost loop\n");
488      return nullptr;
489    }
490  
491    return std::make_unique<CacheCost>(Loops, AR.LI, AR.SE, AR.TTI, AR.AA, DI, TRT);
492  }
493  
494  void CacheCost::calculateCacheFootprint() {
495    LLVM_DEBUG(dbgs() << "POPULATING REFERENCE GROUPS\n");
496    ReferenceGroupsTy RefGroups;
497    if (!populateReferenceGroups(RefGroups))
498      return;
499  
500    LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
501    for (const Loop *L : Loops) {
502      assert((std::find_if(LoopCosts.begin(), LoopCosts.end(),
503                           [L](const LoopCacheCostTy &LCC) {
504                             return LCC.first == L;
505                           }) == LoopCosts.end()) &&
506             "Should not add duplicate element");
507      CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
508      LoopCosts.push_back(std::make_pair(L, LoopCost));
509    }
510  
511    sortLoopCosts();
512    RefGroups.clear();
513  }
514  
515  bool CacheCost::populateReferenceGroups(ReferenceGroupsTy &RefGroups) const {
516    assert(RefGroups.empty() && "Reference groups should be empty");
517  
518    unsigned CLS = TTI.getCacheLineSize();
519    Loop *InnerMostLoop = getInnerMostLoop(Loops);
520    assert(InnerMostLoop != nullptr && "Expecting a valid innermost loop");
521  
522    for (BasicBlock *BB : InnerMostLoop->getBlocks()) {
523      for (Instruction &I : *BB) {
524        if (!isa<StoreInst>(I) && !isa<LoadInst>(I))
525          continue;
526  
527        std::unique_ptr<IndexedReference> R(new IndexedReference(I, LI, SE));
528        if (!R->isValid())
529          continue;
530  
531        bool Added = false;
532        for (ReferenceGroupTy &RefGroup : RefGroups) {
533          const IndexedReference &Representative = *RefGroup.front().get();
534          LLVM_DEBUG({
535            dbgs() << "References:\n";
536            dbgs().indent(2) << *R << "\n";
537            dbgs().indent(2) << Representative << "\n";
538          });
539  
540          Optional<bool> HasTemporalReuse =
541              R->hasTemporalReuse(Representative, *TRT, *InnerMostLoop, DI, AA);
542          Optional<bool> HasSpacialReuse =
543              R->hasSpacialReuse(Representative, CLS, AA);
544  
545          if ((HasTemporalReuse.hasValue() && *HasTemporalReuse) ||
546              (HasSpacialReuse.hasValue() && *HasSpacialReuse)) {
547            RefGroup.push_back(std::move(R));
548            Added = true;
549            break;
550          }
551        }
552  
553        if (!Added) {
554          ReferenceGroupTy RG;
555          RG.push_back(std::move(R));
556          RefGroups.push_back(std::move(RG));
557        }
558      }
559    }
560  
561    if (RefGroups.empty())
562      return false;
563  
564    LLVM_DEBUG({
565      dbgs() << "\nIDENTIFIED REFERENCE GROUPS:\n";
566      int n = 1;
567      for (const ReferenceGroupTy &RG : RefGroups) {
568        dbgs().indent(2) << "RefGroup " << n << ":\n";
569        for (const auto &IR : RG)
570          dbgs().indent(4) << *IR << "\n";
571        n++;
572      }
573      dbgs() << "\n";
574    });
575  
576    return true;
577  }
578  
579  CacheCostTy
580  CacheCost::computeLoopCacheCost(const Loop &L,
581                                  const ReferenceGroupsTy &RefGroups) const {
582    if (!L.isLoopSimplifyForm())
583      return InvalidCost;
584  
585    LLVM_DEBUG(dbgs() << "Considering loop '" << L.getName()
586                      << "' as innermost loop.\n");
587  
588    // Compute the product of the trip counts of each other loop in the nest.
589    CacheCostTy TripCountsProduct = 1;
590    for (const auto &TC : TripCounts) {
591      if (TC.first == &L)
592        continue;
593      TripCountsProduct *= TC.second;
594    }
595  
596    CacheCostTy LoopCost = 0;
597    for (const ReferenceGroupTy &RG : RefGroups) {
598      CacheCostTy RefGroupCost = computeRefGroupCacheCost(RG, L);
599      LoopCost += RefGroupCost * TripCountsProduct;
600    }
601  
602    LLVM_DEBUG(dbgs().indent(2) << "Loop '" << L.getName()
603                                << "' has cost=" << LoopCost << "\n");
604  
605    return LoopCost;
606  }
607  
608  CacheCostTy CacheCost::computeRefGroupCacheCost(const ReferenceGroupTy &RG,
609                                                  const Loop &L) const {
610    assert(!RG.empty() && "Reference group should have at least one member.");
611  
612    const IndexedReference *Representative = RG.front().get();
613    return Representative->computeRefCost(L, TTI.getCacheLineSize());
614  }
615  
616  //===----------------------------------------------------------------------===//
617  // LoopCachePrinterPass implementation
618  //
619  PreservedAnalyses LoopCachePrinterPass::run(Loop &L, LoopAnalysisManager &AM,
620                                              LoopStandardAnalysisResults &AR,
621                                              LPMUpdater &U) {
622    Function *F = L.getHeader()->getParent();
623    DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
624  
625    if (auto CC = CacheCost::getCacheCost(L, AR, DI))
626      OS << *CC;
627  
628    return PreservedAnalyses::all();
629  }
630