xref: /freebsd/contrib/llvm-project/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp (revision cfd6422a5217410fbd66f7a7a8a64d9d85e61229)
1 //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the functionalities used by the BottleneckAnalysis
11 /// to report bottleneck info.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "Views/BottleneckAnalysis.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MCA/Support.h"
18 #include "llvm/Support/Format.h"
19 #include "llvm/Support/FormattedStream.h"
20 
21 namespace llvm {
22 namespace mca {
23 
24 #define DEBUG_TYPE "llvm-mca"
25 
26 PressureTracker::PressureTracker(const MCSchedModel &Model)
27     : SM(Model),
28       ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
29       ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
30       ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
31       ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
32   computeProcResourceMasks(SM, ProcResID2Mask);
33 
34   // Ignore the invalid resource at index zero.
35   unsigned NextResourceUsersIdx = 0;
36   for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
37     const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
38     ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
39     NextResourceUsersIdx += ProcResource.NumUnits;
40     uint64_t ResourceMask = ProcResID2Mask[I];
41     ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
42   }
43 
44   ResourceUsers.resize(NextResourceUsersIdx);
45   std::fill(ResourceUsers.begin(), ResourceUsers.end(),
46             std::make_pair<unsigned, unsigned>(~0U, 0U));
47 }
48 
49 void PressureTracker::getResourceUsers(uint64_t ResourceMask,
50                                        SmallVectorImpl<User> &Users) const {
51   unsigned Index = getResourceStateIndex(ResourceMask);
52   unsigned ProcResID = ResIdx2ProcResID[Index];
53   const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
54   for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
55     const User U = getResourceUser(ProcResID, I);
56     if (U.second && IPI.find(U.first) != IPI.end())
57       Users.emplace_back(U);
58   }
59 }
60 
61 void PressureTracker::onInstructionDispatched(unsigned IID) {
62   IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
63 }
64 
65 void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
66 
67 void PressureTracker::handleInstructionIssuedEvent(
68     const HWInstructionIssuedEvent &Event) {
69   unsigned IID = Event.IR.getSourceIndex();
70   using ResourceRef = HWInstructionIssuedEvent::ResourceRef;
71   using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
72   for (const ResourceUse &Use : Event.UsedResources) {
73     const ResourceRef &RR = Use.first;
74     unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
75     Index += countTrailingZeros(RR.second);
76     ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
77   }
78 }
79 
80 void PressureTracker::updateResourcePressureDistribution(
81     uint64_t CumulativeMask) {
82   while (CumulativeMask) {
83     uint64_t Current = CumulativeMask & (-CumulativeMask);
84     unsigned ResIdx = getResourceStateIndex(Current);
85     unsigned ProcResID = ResIdx2ProcResID[ResIdx];
86     uint64_t Mask = ProcResID2Mask[ProcResID];
87 
88     if (Mask == Current) {
89       ResourcePressureDistribution[ProcResID]++;
90       CumulativeMask ^= Current;
91       continue;
92     }
93 
94     Mask ^= Current;
95     while (Mask) {
96       uint64_t SubUnit = Mask & (-Mask);
97       ResIdx = getResourceStateIndex(SubUnit);
98       ProcResID = ResIdx2ProcResID[ResIdx];
99       ResourcePressureDistribution[ProcResID]++;
100       Mask ^= SubUnit;
101     }
102 
103     CumulativeMask ^= Current;
104   }
105 }
106 
107 void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
108   assert(Event.Reason != HWPressureEvent::INVALID &&
109          "Unexpected invalid event!");
110 
111   switch (Event.Reason) {
112   default:
113     break;
114 
115   case HWPressureEvent::RESOURCES: {
116     const uint64_t ResourceMask = Event.ResourceMask;
117     updateResourcePressureDistribution(Event.ResourceMask);
118 
119     for (const InstRef &IR : Event.AffectedInstructions) {
120       const Instruction &IS = *IR.getInstruction();
121       unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
122       if (!BusyResources)
123         continue;
124 
125       unsigned IID = IR.getSourceIndex();
126       IPI[IID].ResourcePressureCycles++;
127     }
128     break;
129   }
130 
131   case HWPressureEvent::REGISTER_DEPS:
132     for (const InstRef &IR : Event.AffectedInstructions) {
133       unsigned IID = IR.getSourceIndex();
134       IPI[IID].RegisterPressureCycles++;
135     }
136     break;
137 
138   case HWPressureEvent::MEMORY_DEPS:
139     for (const InstRef &IR : Event.AffectedInstructions) {
140       unsigned IID = IR.getSourceIndex();
141       IPI[IID].MemoryPressureCycles++;
142     }
143   }
144 }
145 
146 #ifndef NDEBUG
147 void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
148                                          const DependencyEdge &DepEdge,
149                                          MCInstPrinter &MCIP) const {
150   unsigned FromIID = DepEdge.FromIID;
151   unsigned ToIID = DepEdge.ToIID;
152   assert(FromIID < ToIID && "Graph should be acyclic!");
153 
154   const DependencyEdge::Dependency &DE = DepEdge.Dep;
155   assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
156 
157   OS << " FROM: " << FromIID << " TO: " << ToIID << "             ";
158   if (DE.Type == DependencyEdge::DT_REGISTER) {
159     OS << " - REGISTER: ";
160     MCIP.printRegName(OS, DE.ResourceOrRegID);
161   } else if (DE.Type == DependencyEdge::DT_MEMORY) {
162     OS << " - MEMORY";
163   } else {
164     assert(DE.Type == DependencyEdge::DT_RESOURCE &&
165            "Unsupported dependency type!");
166     OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
167   }
168   OS << " - COST: " << DE.Cost << '\n';
169 }
170 #endif // NDEBUG
171 
172 void DependencyGraph::pruneEdges(unsigned Iterations) {
173   for (DGNode &N : Nodes) {
174     unsigned NumPruned = 0;
175     const unsigned Size = N.OutgoingEdges.size();
176     // Use a cut-off threshold to prune edges with a low frequency.
177     for (unsigned I = 0, E = Size; I < E; ++I) {
178       DependencyEdge &Edge = N.OutgoingEdges[I];
179       if (Edge.Frequency == Iterations)
180         continue;
181       double Factor = (double)Edge.Frequency / Iterations;
182       if (0.10 < Factor)
183         continue;
184       Nodes[Edge.ToIID].NumPredecessors--;
185       std::swap(Edge, N.OutgoingEdges[E - 1]);
186       --E;
187       ++NumPruned;
188     }
189 
190     if (NumPruned)
191       N.OutgoingEdges.resize(Size - NumPruned);
192   }
193 }
194 
195 void DependencyGraph::initializeRootSet(
196     SmallVectorImpl<unsigned> &RootSet) const {
197   for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
198     const DGNode &N = Nodes[I];
199     if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
200       RootSet.emplace_back(I);
201   }
202 }
203 
204 void DependencyGraph::propagateThroughEdges(
205     SmallVectorImpl<unsigned> &RootSet, unsigned Iterations) {
206   SmallVector<unsigned, 8> ToVisit;
207 
208   // A critical sequence is computed as the longest path from a node of the
209   // RootSet to a leaf node (i.e. a node with no successors).  The RootSet is
210   // composed of nodes with at least one successor, and no predecessors.
211   //
212   // Each node of the graph starts with an initial default cost of zero.  The
213   // cost of a node is a measure of criticality: the higher the cost, the bigger
214   // is the performance impact.
215   // For register and memory dependencies, the cost is a function of the write
216   // latency as well as the actual delay (in cycles) caused to users.
217   // For processor resource dependencies, the cost is a function of the resource
218   // pressure. Resource interferences with low frequency values are ignored.
219   //
220   // This algorithm is very similar to a (reverse) Dijkstra.  Every iteration of
221   // the inner loop selects (i.e. visits) a node N from a set of `unvisited
222   // nodes`, and then propagates the cost of N to all its neighbors.
223   //
224   // The `unvisited nodes` set initially contains all the nodes from the
225   // RootSet.  A node N is added to the `unvisited nodes` if all its
226   // predecessors have been visited already.
227   //
228   // For simplicity, every node tracks the number of unvisited incoming edges in
229   // field `NumVisitedPredecessors`.  When the value of that field drops to
230   // zero, then the corresponding node is added to a `ToVisit` set.
231   //
232   // At the end of every iteration of the outer loop, set `ToVisit` becomes our
233   // new `unvisited nodes` set.
234   //
235   // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
236   // is empty. This algorithm works under the assumption that the graph is
237   // acyclic.
238   do {
239     for (unsigned IID : RootSet) {
240       const DGNode &N = Nodes[IID];
241       for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
242         unsigned ToIID = DepEdge.ToIID;
243         DGNode &To = Nodes[ToIID];
244         uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
245         // Check if this is the most expensive incoming edge seen so far.  In
246         // case, update the total cost of the destination node (ToIID), as well
247         // its field `CriticalPredecessor`.
248         if (Cost > To.Cost) {
249           To.CriticalPredecessor = DepEdge;
250           To.Cost = Cost;
251           To.Depth = N.Depth + 1;
252         }
253         To.NumVisitedPredecessors++;
254         if (To.NumVisitedPredecessors == To.NumPredecessors)
255           ToVisit.emplace_back(ToIID);
256       }
257     }
258 
259     std::swap(RootSet, ToVisit);
260     ToVisit.clear();
261   } while (!RootSet.empty());
262 }
263 
264 void DependencyGraph::getCriticalSequence(
265     SmallVectorImpl<const DependencyEdge *> &Seq) const {
266   // At this stage, nodes of the graph have been already visited, and costs have
267   // been propagated through the edges (see method `propagateThroughEdges()`).
268 
269   // Identify the node N with the highest cost in the graph. By construction,
270   // that node is the last instruction of our critical sequence.
271   // Field N.Depth would tell us the total length of the sequence.
272   //
273   // To obtain the sequence of critical edges, we simply follow the chain of critical
274   // predecessors starting from node N (field DGNode::CriticalPredecessor).
275   const auto It = std::max_element(
276       Nodes.begin(), Nodes.end(),
277       [](const DGNode &Lhs, const DGNode &Rhs) { return Lhs.Cost < Rhs.Cost; });
278   unsigned IID = std::distance(Nodes.begin(), It);
279   Seq.resize(Nodes[IID].Depth);
280   for (unsigned I = Seq.size(), E = 0; I > E; --I) {
281     const DGNode &N = Nodes[IID];
282     Seq[I - 1] = &N.CriticalPredecessor;
283     IID = N.CriticalPredecessor.FromIID;
284   }
285 }
286 
287 static void printInstruction(formatted_raw_ostream &FOS,
288                              const MCSubtargetInfo &STI, MCInstPrinter &MCIP,
289                              const MCInst &MCI,
290                              bool UseDifferentColor = false) {
291   std::string Instruction;
292   raw_string_ostream InstrStream(Instruction);
293 
294   FOS.PadToColumn(14);
295 
296   MCIP.printInst(&MCI, 0, "", STI, InstrStream);
297   InstrStream.flush();
298 
299   if (UseDifferentColor)
300     FOS.changeColor(raw_ostream::CYAN, true, false);
301   FOS << StringRef(Instruction).ltrim();
302   if (UseDifferentColor)
303     FOS.resetColor();
304 }
305 
306 void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
307   // Early exit if no bottlenecks were found during the simulation.
308   if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
309     return;
310 
311   SmallVector<const DependencyEdge *, 16> Seq;
312   DG.getCriticalSequence(Seq);
313   if (Seq.empty())
314     return;
315 
316   OS << "\nCritical sequence based on the simulation:\n\n";
317 
318   const DependencyEdge &FirstEdge = *Seq[0];
319   unsigned FromIID = FirstEdge.FromIID % Source.size();
320   unsigned ToIID = FirstEdge.ToIID % Source.size();
321   bool IsLoopCarried = FromIID >= ToIID;
322 
323   formatted_raw_ostream FOS(OS);
324   FOS.PadToColumn(14);
325   FOS << "Instruction";
326   FOS.PadToColumn(58);
327   FOS << "Dependency Information";
328 
329   bool HasColors = FOS.has_colors();
330 
331   unsigned CurrentIID = 0;
332   if (IsLoopCarried) {
333     FOS << "\n +----< " << FromIID << ".";
334     printInstruction(FOS, STI, MCIP, Source[FromIID], HasColors);
335     FOS << "\n |\n |    < loop carried > \n |";
336   } else {
337     while (CurrentIID < FromIID) {
338       FOS << "\n        " << CurrentIID << ".";
339       printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
340       CurrentIID++;
341     }
342 
343     FOS << "\n +----< " << CurrentIID << ".";
344     printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
345     CurrentIID++;
346   }
347 
348   for (const DependencyEdge *&DE : Seq) {
349     ToIID = DE->ToIID % Source.size();
350     unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
351 
352     while (CurrentIID < LastIID) {
353       FOS << "\n |      " << CurrentIID << ".";
354       printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
355       CurrentIID++;
356     }
357 
358     if (CurrentIID == ToIID) {
359       FOS << "\n +----> " << ToIID << ".";
360       printInstruction(FOS, STI, MCIP, Source[CurrentIID], HasColors);
361     } else {
362       FOS << "\n |\n |    < loop carried > \n |"
363           << "\n +----> " << ToIID << ".";
364       printInstruction(FOS, STI, MCIP, Source[ToIID], HasColors);
365     }
366     FOS.PadToColumn(58);
367 
368     const DependencyEdge::Dependency &Dep = DE->Dep;
369     if (HasColors)
370       FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
371 
372     if (Dep.Type == DependencyEdge::DT_REGISTER) {
373       FOS << "## REGISTER dependency:  ";
374       if (HasColors)
375         FOS.changeColor(raw_ostream::MAGENTA, true, false);
376       MCIP.printRegName(FOS, Dep.ResourceOrRegID);
377     } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
378       FOS << "## MEMORY dependency.";
379     } else {
380       assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
381              "Unsupported dependency type!");
382       FOS << "## RESOURCE interference:  ";
383       if (HasColors)
384         FOS.changeColor(raw_ostream::MAGENTA, true, false);
385       FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
386       if (HasColors) {
387         FOS.resetColor();
388         FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
389       }
390       FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
391           << "% ]";
392     }
393     if (HasColors)
394       FOS.resetColor();
395     ++CurrentIID;
396   }
397 
398   while (CurrentIID < Source.size()) {
399     FOS << "\n        " << CurrentIID << ".";
400     printInstruction(FOS, STI, MCIP, Source[CurrentIID]);
401     CurrentIID++;
402   }
403 
404   FOS << '\n';
405   FOS.flush();
406 }
407 
408 #ifndef NDEBUG
409 void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
410   OS << "\nREG DEPS\n";
411   for (const DGNode &Node : Nodes)
412     for (const DependencyEdge &DE : Node.OutgoingEdges)
413       if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
414         dumpDependencyEdge(OS, DE, MCIP);
415 
416   OS << "\nMEM DEPS\n";
417   for (const DGNode &Node : Nodes)
418     for (const DependencyEdge &DE : Node.OutgoingEdges)
419       if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
420         dumpDependencyEdge(OS, DE, MCIP);
421 
422   OS << "\nRESOURCE DEPS\n";
423   for (const DGNode &Node : Nodes)
424     for (const DependencyEdge &DE : Node.OutgoingEdges)
425       if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
426         dumpDependencyEdge(OS, DE, MCIP);
427 }
428 #endif // NDEBUG
429 
430 void DependencyGraph::addDependency(unsigned From, unsigned To,
431                                     DependencyEdge::Dependency &&Dep) {
432   DGNode &NodeFrom = Nodes[From];
433   DGNode &NodeTo = Nodes[To];
434   SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
435 
436   auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
437     return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
438   });
439 
440   if (It != Vec.end()) {
441     It->Dep.Cost += Dep.Cost;
442     It->Frequency++;
443     return;
444   }
445 
446   DependencyEdge DE = {Dep, From, To, 1};
447   Vec.emplace_back(DE);
448   NodeTo.NumPredecessors++;
449 }
450 
451 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
452                                        MCInstPrinter &Printer,
453                                        ArrayRef<MCInst> S, unsigned NumIter)
454     : STI(sti), MCIP(Printer), Tracker(STI.getSchedModel()), DG(S.size() * 3),
455       Source(S), Iterations(NumIter), TotalCycles(0),
456       PressureIncreasedBecauseOfResources(false),
457       PressureIncreasedBecauseOfRegisterDependencies(false),
458       PressureIncreasedBecauseOfMemoryDependencies(false),
459       SeenStallCycles(false), BPI() {}
460 
461 void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
462                                         unsigned RegID, unsigned Cost) {
463   bool IsLoopCarried = From >= To;
464   unsigned SourceSize = Source.size();
465   if (IsLoopCarried) {
466     DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
467     DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
468     return;
469   }
470   DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
471 }
472 
473 void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
474                                       unsigned Cost) {
475   bool IsLoopCarried = From >= To;
476   unsigned SourceSize = Source.size();
477   if (IsLoopCarried) {
478     DG.addMemoryDep(From, To + SourceSize, Cost);
479     DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
480     return;
481   }
482   DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
483 }
484 
485 void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
486                                         uint64_t Mask, unsigned Cost) {
487   bool IsLoopCarried = From >= To;
488   unsigned SourceSize = Source.size();
489   if (IsLoopCarried) {
490     DG.addResourceDep(From, To + SourceSize, Mask, Cost);
491     DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
492     return;
493   }
494   DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
495 }
496 
497 void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
498   const unsigned IID = Event.IR.getSourceIndex();
499   if (Event.Type == HWInstructionEvent::Dispatched) {
500     Tracker.onInstructionDispatched(IID);
501     return;
502   }
503   if (Event.Type == HWInstructionEvent::Executed) {
504     Tracker.onInstructionExecuted(IID);
505     return;
506   }
507 
508   if (Event.Type != HWInstructionEvent::Issued)
509     return;
510 
511   const Instruction &IS = *Event.IR.getInstruction();
512   unsigned To = IID % Source.size();
513 
514   unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
515   uint64_t ResourceMask = IS.getCriticalResourceMask();
516   SmallVector<std::pair<unsigned, unsigned>, 4> Users;
517   while (ResourceMask) {
518     uint64_t Current = ResourceMask & (-ResourceMask);
519     Tracker.getResourceUsers(Current, Users);
520     for (const std::pair<unsigned, unsigned> &U : Users)
521       addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
522     Users.clear();
523     ResourceMask ^= Current;
524   }
525 
526   const CriticalDependency &RegDep = IS.getCriticalRegDep();
527   if (RegDep.Cycles) {
528     Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
529     unsigned From = RegDep.IID % Source.size();
530     addRegisterDep(From, To, RegDep.RegID, Cycles);
531   }
532 
533   const CriticalDependency &MemDep = IS.getCriticalMemDep();
534   if (MemDep.Cycles) {
535     Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
536     unsigned From = MemDep.IID % Source.size();
537     addMemoryDep(From, To, Cycles);
538   }
539 
540   Tracker.handleInstructionIssuedEvent(
541       static_cast<const HWInstructionIssuedEvent &>(Event));
542 
543   // Check if this is the last simulated instruction.
544   if (IID == ((Iterations * Source.size()) - 1))
545     DG.finalizeGraph(Iterations);
546 }
547 
548 void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
549   assert(Event.Reason != HWPressureEvent::INVALID &&
550          "Unexpected invalid event!");
551 
552   Tracker.handlePressureEvent(Event);
553 
554   switch (Event.Reason) {
555   default:
556     break;
557 
558   case HWPressureEvent::RESOURCES:
559     PressureIncreasedBecauseOfResources = true;
560     break;
561   case HWPressureEvent::REGISTER_DEPS:
562     PressureIncreasedBecauseOfRegisterDependencies = true;
563     break;
564   case HWPressureEvent::MEMORY_DEPS:
565     PressureIncreasedBecauseOfMemoryDependencies = true;
566     break;
567   }
568 }
569 
570 void BottleneckAnalysis::onCycleEnd() {
571   ++TotalCycles;
572 
573   bool PressureIncreasedBecauseOfDataDependencies =
574       PressureIncreasedBecauseOfRegisterDependencies ||
575       PressureIncreasedBecauseOfMemoryDependencies;
576   if (!PressureIncreasedBecauseOfResources &&
577       !PressureIncreasedBecauseOfDataDependencies)
578     return;
579 
580   ++BPI.PressureIncreaseCycles;
581   if (PressureIncreasedBecauseOfRegisterDependencies)
582     ++BPI.RegisterDependencyCycles;
583   if (PressureIncreasedBecauseOfMemoryDependencies)
584     ++BPI.MemoryDependencyCycles;
585   if (PressureIncreasedBecauseOfDataDependencies)
586     ++BPI.DataDependencyCycles;
587   if (PressureIncreasedBecauseOfResources)
588     ++BPI.ResourcePressureCycles;
589   PressureIncreasedBecauseOfResources = false;
590   PressureIncreasedBecauseOfRegisterDependencies = false;
591   PressureIncreasedBecauseOfMemoryDependencies = false;
592 }
593 
594 void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
595   if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
596     OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
597     return;
598   }
599 
600   double PressurePerCycle =
601       (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
602   double ResourcePressurePerCycle =
603       (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
604   double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
605   double RegDepPressurePerCycle =
606       (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
607   double MemDepPressurePerCycle =
608       (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
609 
610   OS << "\n\nCycles with backend pressure increase [ "
611      << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
612 
613   OS << "\nThroughput Bottlenecks: "
614      << "\n  Resource Pressure       [ "
615      << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
616      << "% ]";
617 
618   if (BPI.PressureIncreaseCycles) {
619     ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
620     const MCSchedModel &SM = STI.getSchedModel();
621     for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
622       unsigned ResourceCycles = Distribution[I];
623       if (ResourceCycles) {
624         double Frequency = (double)ResourceCycles * 100 / TotalCycles;
625         const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
626         OS << "\n  - " << PRDesc.Name << "  [ "
627            << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
628       }
629     }
630   }
631 
632   OS << "\n  Data Dependencies:      [ "
633      << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
634   OS << "\n  - Register Dependencies [ "
635      << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
636      << "% ]";
637   OS << "\n  - Memory Dependencies   [ "
638      << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
639      << "% ]\n";
640 }
641 
642 void BottleneckAnalysis::printView(raw_ostream &OS) const {
643   std::string Buffer;
644   raw_string_ostream TempStream(Buffer);
645   printBottleneckHints(TempStream);
646   TempStream.flush();
647   OS << Buffer;
648   printCriticalSequence(OS);
649 }
650 
651 } // namespace mca.
652 } // namespace llvm
653