1 //===--------------------- BottleneckAnalysis.cpp ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the functionalities used by the BottleneckAnalysis
11 /// to report bottleneck info.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "Views/BottleneckAnalysis.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MCA/Support.h"
18 #include "llvm/Support/Format.h"
19
20 namespace llvm {
21 namespace mca {
22
23 #define DEBUG_TYPE "llvm-mca"
24
PressureTracker(const MCSchedModel & Model)25 PressureTracker::PressureTracker(const MCSchedModel &Model)
26 : SM(Model),
27 ResourcePressureDistribution(Model.getNumProcResourceKinds(), 0),
28 ProcResID2Mask(Model.getNumProcResourceKinds(), 0),
29 ResIdx2ProcResID(Model.getNumProcResourceKinds(), 0),
30 ProcResID2ResourceUsersIndex(Model.getNumProcResourceKinds(), 0) {
31 computeProcResourceMasks(SM, ProcResID2Mask);
32
33 // Ignore the invalid resource at index zero.
34 unsigned NextResourceUsersIdx = 0;
35 for (unsigned I = 1, E = Model.getNumProcResourceKinds(); I < E; ++I) {
36 const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
37 ProcResID2ResourceUsersIndex[I] = NextResourceUsersIdx;
38 NextResourceUsersIdx += ProcResource.NumUnits;
39 uint64_t ResourceMask = ProcResID2Mask[I];
40 ResIdx2ProcResID[getResourceStateIndex(ResourceMask)] = I;
41 }
42
43 ResourceUsers.resize(NextResourceUsersIdx);
44 std::fill(ResourceUsers.begin(), ResourceUsers.end(),
45 std::make_pair<unsigned, unsigned>(~0U, 0U));
46 }
47
getResourceUsers(uint64_t ResourceMask,SmallVectorImpl<User> & Users) const48 void PressureTracker::getResourceUsers(uint64_t ResourceMask,
49 SmallVectorImpl<User> &Users) const {
50 unsigned Index = getResourceStateIndex(ResourceMask);
51 unsigned ProcResID = ResIdx2ProcResID[Index];
52 const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
53 for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
54 const User U = getResourceUser(ProcResID, I);
55 if (U.second && IPI.contains(U.first))
56 Users.emplace_back(U);
57 }
58 }
59
onInstructionDispatched(unsigned IID)60 void PressureTracker::onInstructionDispatched(unsigned IID) {
61 IPI.insert(std::make_pair(IID, InstructionPressureInfo()));
62 }
63
onInstructionExecuted(unsigned IID)64 void PressureTracker::onInstructionExecuted(unsigned IID) { IPI.erase(IID); }
65
handleInstructionIssuedEvent(const HWInstructionIssuedEvent & Event)66 void PressureTracker::handleInstructionIssuedEvent(
67 const HWInstructionIssuedEvent &Event) {
68 unsigned IID = Event.IR.getSourceIndex();
69 for (const ResourceUse &Use : Event.UsedResources) {
70 const ResourceRef &RR = Use.first;
71 unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
72 Index += llvm::countr_zero(RR.second);
73 ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
74 }
75 }
76
updateResourcePressureDistribution(uint64_t CumulativeMask)77 void PressureTracker::updateResourcePressureDistribution(
78 uint64_t CumulativeMask) {
79 while (CumulativeMask) {
80 uint64_t Current = CumulativeMask & (-CumulativeMask);
81 unsigned ResIdx = getResourceStateIndex(Current);
82 unsigned ProcResID = ResIdx2ProcResID[ResIdx];
83 uint64_t Mask = ProcResID2Mask[ProcResID];
84
85 if (Mask == Current) {
86 ResourcePressureDistribution[ProcResID]++;
87 CumulativeMask ^= Current;
88 continue;
89 }
90
91 Mask ^= Current;
92 while (Mask) {
93 uint64_t SubUnit = Mask & (-Mask);
94 ResIdx = getResourceStateIndex(SubUnit);
95 ProcResID = ResIdx2ProcResID[ResIdx];
96 ResourcePressureDistribution[ProcResID]++;
97 Mask ^= SubUnit;
98 }
99
100 CumulativeMask ^= Current;
101 }
102 }
103
handlePressureEvent(const HWPressureEvent & Event)104 void PressureTracker::handlePressureEvent(const HWPressureEvent &Event) {
105 assert(Event.Reason != HWPressureEvent::INVALID &&
106 "Unexpected invalid event!");
107
108 switch (Event.Reason) {
109 default:
110 break;
111
112 case HWPressureEvent::RESOURCES: {
113 const uint64_t ResourceMask = Event.ResourceMask;
114 updateResourcePressureDistribution(Event.ResourceMask);
115
116 for (const InstRef &IR : Event.AffectedInstructions) {
117 const Instruction &IS = *IR.getInstruction();
118 unsigned BusyResources = IS.getCriticalResourceMask() & ResourceMask;
119 if (!BusyResources)
120 continue;
121
122 unsigned IID = IR.getSourceIndex();
123 IPI[IID].ResourcePressureCycles++;
124 }
125 break;
126 }
127
128 case HWPressureEvent::REGISTER_DEPS:
129 for (const InstRef &IR : Event.AffectedInstructions) {
130 unsigned IID = IR.getSourceIndex();
131 IPI[IID].RegisterPressureCycles++;
132 }
133 break;
134
135 case HWPressureEvent::MEMORY_DEPS:
136 for (const InstRef &IR : Event.AffectedInstructions) {
137 unsigned IID = IR.getSourceIndex();
138 IPI[IID].MemoryPressureCycles++;
139 }
140 }
141 }
142
143 #ifndef NDEBUG
dumpDependencyEdge(raw_ostream & OS,const DependencyEdge & DepEdge,MCInstPrinter & MCIP) const144 void DependencyGraph::dumpDependencyEdge(raw_ostream &OS,
145 const DependencyEdge &DepEdge,
146 MCInstPrinter &MCIP) const {
147 unsigned FromIID = DepEdge.FromIID;
148 unsigned ToIID = DepEdge.ToIID;
149 assert(FromIID < ToIID && "Graph should be acyclic!");
150
151 const DependencyEdge::Dependency &DE = DepEdge.Dep;
152 assert(DE.Type != DependencyEdge::DT_INVALID && "Unexpected invalid edge!");
153
154 OS << " FROM: " << FromIID << " TO: " << ToIID << " ";
155 if (DE.Type == DependencyEdge::DT_REGISTER) {
156 OS << " - REGISTER: ";
157 MCIP.printRegName(OS, DE.ResourceOrRegID);
158 } else if (DE.Type == DependencyEdge::DT_MEMORY) {
159 OS << " - MEMORY";
160 } else {
161 assert(DE.Type == DependencyEdge::DT_RESOURCE &&
162 "Unsupported dependency type!");
163 OS << " - RESOURCE MASK: " << DE.ResourceOrRegID;
164 }
165 OS << " - COST: " << DE.Cost << '\n';
166 }
167 #endif // NDEBUG
168
pruneEdges(unsigned Iterations)169 void DependencyGraph::pruneEdges(unsigned Iterations) {
170 for (DGNode &N : Nodes) {
171 unsigned NumPruned = 0;
172 const unsigned Size = N.OutgoingEdges.size();
173 // Use a cut-off threshold to prune edges with a low frequency.
174 for (unsigned I = 0, E = Size; I < E; ++I) {
175 DependencyEdge &Edge = N.OutgoingEdges[I];
176 if (Edge.Frequency == Iterations)
177 continue;
178 double Factor = (double)Edge.Frequency / Iterations;
179 if (0.10 < Factor)
180 continue;
181 Nodes[Edge.ToIID].NumPredecessors--;
182 std::swap(Edge, N.OutgoingEdges[E - 1]);
183 --E;
184 ++NumPruned;
185 }
186
187 if (NumPruned)
188 N.OutgoingEdges.resize(Size - NumPruned);
189 }
190 }
191
initializeRootSet(SmallVectorImpl<unsigned> & RootSet) const192 void DependencyGraph::initializeRootSet(
193 SmallVectorImpl<unsigned> &RootSet) const {
194 for (unsigned I = 0, E = Nodes.size(); I < E; ++I) {
195 const DGNode &N = Nodes[I];
196 if (N.NumPredecessors == 0 && !N.OutgoingEdges.empty())
197 RootSet.emplace_back(I);
198 }
199 }
200
propagateThroughEdges(SmallVectorImpl<unsigned> & RootSet,unsigned Iterations)201 void DependencyGraph::propagateThroughEdges(SmallVectorImpl<unsigned> &RootSet,
202 unsigned Iterations) {
203 SmallVector<unsigned, 8> ToVisit;
204
205 // A critical sequence is computed as the longest path from a node of the
206 // RootSet to a leaf node (i.e. a node with no successors). The RootSet is
207 // composed of nodes with at least one successor, and no predecessors.
208 //
209 // Each node of the graph starts with an initial default cost of zero. The
210 // cost of a node is a measure of criticality: the higher the cost, the bigger
211 // is the performance impact.
212 // For register and memory dependencies, the cost is a function of the write
213 // latency as well as the actual delay (in cycles) caused to users.
214 // For processor resource dependencies, the cost is a function of the resource
215 // pressure. Resource interferences with low frequency values are ignored.
216 //
217 // This algorithm is very similar to a (reverse) Dijkstra. Every iteration of
218 // the inner loop selects (i.e. visits) a node N from a set of `unvisited
219 // nodes`, and then propagates the cost of N to all its neighbors.
220 //
221 // The `unvisited nodes` set initially contains all the nodes from the
222 // RootSet. A node N is added to the `unvisited nodes` if all its
223 // predecessors have been visited already.
224 //
225 // For simplicity, every node tracks the number of unvisited incoming edges in
226 // field `NumVisitedPredecessors`. When the value of that field drops to
227 // zero, then the corresponding node is added to a `ToVisit` set.
228 //
229 // At the end of every iteration of the outer loop, set `ToVisit` becomes our
230 // new `unvisited nodes` set.
231 //
232 // The algorithm terminates when the set of unvisited nodes (i.e. our RootSet)
233 // is empty. This algorithm works under the assumption that the graph is
234 // acyclic.
235 do {
236 for (unsigned IID : RootSet) {
237 const DGNode &N = Nodes[IID];
238 for (const DependencyEdge &DepEdge : N.OutgoingEdges) {
239 unsigned ToIID = DepEdge.ToIID;
240 DGNode &To = Nodes[ToIID];
241 uint64_t Cost = N.Cost + DepEdge.Dep.Cost;
242 // Check if this is the most expensive incoming edge seen so far. In
243 // case, update the total cost of the destination node (ToIID), as well
244 // its field `CriticalPredecessor`.
245 if (Cost > To.Cost) {
246 To.CriticalPredecessor = DepEdge;
247 To.Cost = Cost;
248 To.Depth = N.Depth + 1;
249 }
250 To.NumVisitedPredecessors++;
251 if (To.NumVisitedPredecessors == To.NumPredecessors)
252 ToVisit.emplace_back(ToIID);
253 }
254 }
255
256 std::swap(RootSet, ToVisit);
257 ToVisit.clear();
258 } while (!RootSet.empty());
259 }
260
getCriticalSequence(SmallVectorImpl<const DependencyEdge * > & Seq) const261 void DependencyGraph::getCriticalSequence(
262 SmallVectorImpl<const DependencyEdge *> &Seq) const {
263 // At this stage, nodes of the graph have been already visited, and costs have
264 // been propagated through the edges (see method `propagateThroughEdges()`).
265
266 // Identify the node N with the highest cost in the graph. By construction,
267 // that node is the last instruction of our critical sequence.
268 // Field N.Depth would tell us the total length of the sequence.
269 //
270 // To obtain the sequence of critical edges, we simply follow the chain of
271 // critical predecessors starting from node N (field
272 // DGNode::CriticalPredecessor).
273 const auto It =
274 llvm::max_element(Nodes, [](const DGNode &Lhs, const DGNode &Rhs) {
275 return Lhs.Cost < Rhs.Cost;
276 });
277 unsigned IID = std::distance(Nodes.begin(), It);
278 Seq.resize(Nodes[IID].Depth);
279 for (const DependencyEdge *&DE : llvm::reverse(Seq)) {
280 const DGNode &N = Nodes[IID];
281 DE = &N.CriticalPredecessor;
282 IID = N.CriticalPredecessor.FromIID;
283 }
284 }
285
printInstruction(formatted_raw_ostream & FOS,const MCInst & MCI,bool UseDifferentColor) const286 void BottleneckAnalysis::printInstruction(formatted_raw_ostream &FOS,
287 const MCInst &MCI,
288 bool UseDifferentColor) const {
289 FOS.PadToColumn(14);
290 if (UseDifferentColor)
291 FOS.changeColor(raw_ostream::CYAN, true, false);
292 FOS << printInstructionString(MCI);
293 if (UseDifferentColor)
294 FOS.resetColor();
295 }
296
printCriticalSequence(raw_ostream & OS) const297 void BottleneckAnalysis::printCriticalSequence(raw_ostream &OS) const {
298 // Early exit if no bottlenecks were found during the simulation.
299 if (!SeenStallCycles || !BPI.PressureIncreaseCycles)
300 return;
301
302 SmallVector<const DependencyEdge *, 16> Seq;
303 DG.getCriticalSequence(Seq);
304 if (Seq.empty())
305 return;
306
307 OS << "\nCritical sequence based on the simulation:\n\n";
308
309 const DependencyEdge &FirstEdge = *Seq[0];
310 ArrayRef<llvm::MCInst> Source = getSource();
311 unsigned FromIID = FirstEdge.FromIID % Source.size();
312 unsigned ToIID = FirstEdge.ToIID % Source.size();
313 bool IsLoopCarried = FromIID >= ToIID;
314
315 formatted_raw_ostream FOS(OS);
316 FOS.PadToColumn(14);
317 FOS << "Instruction";
318 FOS.PadToColumn(58);
319 FOS << "Dependency Information";
320
321 bool HasColors = FOS.has_colors();
322
323 unsigned CurrentIID = 0;
324 if (IsLoopCarried) {
325 FOS << "\n +----< " << FromIID << ".";
326 printInstruction(FOS, Source[FromIID], HasColors);
327 FOS << "\n |\n | < loop carried > \n |";
328 } else {
329 while (CurrentIID < FromIID) {
330 FOS << "\n " << CurrentIID << ".";
331 printInstruction(FOS, Source[CurrentIID]);
332 CurrentIID++;
333 }
334
335 FOS << "\n +----< " << CurrentIID << ".";
336 printInstruction(FOS, Source[CurrentIID], HasColors);
337 CurrentIID++;
338 }
339
340 for (const DependencyEdge *&DE : Seq) {
341 ToIID = DE->ToIID % Source.size();
342 unsigned LastIID = CurrentIID > ToIID ? Source.size() : ToIID;
343
344 while (CurrentIID < LastIID) {
345 FOS << "\n | " << CurrentIID << ".";
346 printInstruction(FOS, Source[CurrentIID]);
347 CurrentIID++;
348 }
349
350 if (CurrentIID == ToIID) {
351 FOS << "\n +----> " << ToIID << ".";
352 printInstruction(FOS, Source[CurrentIID], HasColors);
353 } else {
354 FOS << "\n |\n | < loop carried > \n |"
355 << "\n +----> " << ToIID << ".";
356 printInstruction(FOS, Source[ToIID], HasColors);
357 }
358 FOS.PadToColumn(58);
359
360 const DependencyEdge::Dependency &Dep = DE->Dep;
361 if (HasColors)
362 FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
363
364 if (Dep.Type == DependencyEdge::DT_REGISTER) {
365 FOS << "## REGISTER dependency: ";
366 if (HasColors)
367 FOS.changeColor(raw_ostream::MAGENTA, true, false);
368 getInstPrinter().printRegName(FOS, Dep.ResourceOrRegID);
369 } else if (Dep.Type == DependencyEdge::DT_MEMORY) {
370 FOS << "## MEMORY dependency.";
371 } else {
372 assert(Dep.Type == DependencyEdge::DT_RESOURCE &&
373 "Unsupported dependency type!");
374 FOS << "## RESOURCE interference: ";
375 if (HasColors)
376 FOS.changeColor(raw_ostream::MAGENTA, true, false);
377 FOS << Tracker.resolveResourceName(Dep.ResourceOrRegID);
378 if (HasColors) {
379 FOS.resetColor();
380 FOS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
381 }
382 FOS << " [ probability: " << ((DE->Frequency * 100) / Iterations)
383 << "% ]";
384 }
385 if (HasColors)
386 FOS.resetColor();
387 ++CurrentIID;
388 }
389
390 while (CurrentIID < Source.size()) {
391 FOS << "\n " << CurrentIID << ".";
392 printInstruction(FOS, Source[CurrentIID]);
393 CurrentIID++;
394 }
395
396 FOS << '\n';
397 FOS.flush();
398 }
399
400 #ifndef NDEBUG
dump(raw_ostream & OS,MCInstPrinter & MCIP) const401 void DependencyGraph::dump(raw_ostream &OS, MCInstPrinter &MCIP) const {
402 OS << "\nREG DEPS\n";
403 for (const DGNode &Node : Nodes)
404 for (const DependencyEdge &DE : Node.OutgoingEdges)
405 if (DE.Dep.Type == DependencyEdge::DT_REGISTER)
406 dumpDependencyEdge(OS, DE, MCIP);
407
408 OS << "\nMEM DEPS\n";
409 for (const DGNode &Node : Nodes)
410 for (const DependencyEdge &DE : Node.OutgoingEdges)
411 if (DE.Dep.Type == DependencyEdge::DT_MEMORY)
412 dumpDependencyEdge(OS, DE, MCIP);
413
414 OS << "\nRESOURCE DEPS\n";
415 for (const DGNode &Node : Nodes)
416 for (const DependencyEdge &DE : Node.OutgoingEdges)
417 if (DE.Dep.Type == DependencyEdge::DT_RESOURCE)
418 dumpDependencyEdge(OS, DE, MCIP);
419 }
420 #endif // NDEBUG
421
addDependency(unsigned From,unsigned To,DependencyEdge::Dependency && Dep)422 void DependencyGraph::addDependency(unsigned From, unsigned To,
423 DependencyEdge::Dependency &&Dep) {
424 DGNode &NodeFrom = Nodes[From];
425 DGNode &NodeTo = Nodes[To];
426 SmallVectorImpl<DependencyEdge> &Vec = NodeFrom.OutgoingEdges;
427
428 auto It = find_if(Vec, [To, Dep](DependencyEdge &DE) {
429 return DE.ToIID == To && DE.Dep.ResourceOrRegID == Dep.ResourceOrRegID;
430 });
431
432 if (It != Vec.end()) {
433 It->Dep.Cost += Dep.Cost;
434 It->Frequency++;
435 return;
436 }
437
438 DependencyEdge DE = {Dep, From, To, 1};
439 Vec.emplace_back(DE);
440 NodeTo.NumPredecessors++;
441 }
442
BottleneckAnalysis(const MCSubtargetInfo & sti,MCInstPrinter & Printer,ArrayRef<MCInst> S,unsigned NumIter)443 BottleneckAnalysis::BottleneckAnalysis(const MCSubtargetInfo &sti,
444 MCInstPrinter &Printer,
445 ArrayRef<MCInst> S, unsigned NumIter)
446 : InstructionView(sti, Printer, S), Tracker(sti.getSchedModel()),
447 DG(S.size() * 3), Iterations(NumIter), TotalCycles(0),
448 PressureIncreasedBecauseOfResources(false),
449 PressureIncreasedBecauseOfRegisterDependencies(false),
450 PressureIncreasedBecauseOfMemoryDependencies(false),
451 SeenStallCycles(false), BPI() {}
452
addRegisterDep(unsigned From,unsigned To,unsigned RegID,unsigned Cost)453 void BottleneckAnalysis::addRegisterDep(unsigned From, unsigned To,
454 unsigned RegID, unsigned Cost) {
455 bool IsLoopCarried = From >= To;
456 unsigned SourceSize = getSource().size();
457 if (IsLoopCarried) {
458 DG.addRegisterDep(From, To + SourceSize, RegID, Cost);
459 DG.addRegisterDep(From + SourceSize, To + (SourceSize * 2), RegID, Cost);
460 return;
461 }
462 DG.addRegisterDep(From + SourceSize, To + SourceSize, RegID, Cost);
463 }
464
addMemoryDep(unsigned From,unsigned To,unsigned Cost)465 void BottleneckAnalysis::addMemoryDep(unsigned From, unsigned To,
466 unsigned Cost) {
467 bool IsLoopCarried = From >= To;
468 unsigned SourceSize = getSource().size();
469 if (IsLoopCarried) {
470 DG.addMemoryDep(From, To + SourceSize, Cost);
471 DG.addMemoryDep(From + SourceSize, To + (SourceSize * 2), Cost);
472 return;
473 }
474 DG.addMemoryDep(From + SourceSize, To + SourceSize, Cost);
475 }
476
addResourceDep(unsigned From,unsigned To,uint64_t Mask,unsigned Cost)477 void BottleneckAnalysis::addResourceDep(unsigned From, unsigned To,
478 uint64_t Mask, unsigned Cost) {
479 bool IsLoopCarried = From >= To;
480 unsigned SourceSize = getSource().size();
481 if (IsLoopCarried) {
482 DG.addResourceDep(From, To + SourceSize, Mask, Cost);
483 DG.addResourceDep(From + SourceSize, To + (SourceSize * 2), Mask, Cost);
484 return;
485 }
486 DG.addResourceDep(From + SourceSize, To + SourceSize, Mask, Cost);
487 }
488
onEvent(const HWInstructionEvent & Event)489 void BottleneckAnalysis::onEvent(const HWInstructionEvent &Event) {
490 const unsigned IID = Event.IR.getSourceIndex();
491 if (Event.Type == HWInstructionEvent::Dispatched) {
492 Tracker.onInstructionDispatched(IID);
493 return;
494 }
495 if (Event.Type == HWInstructionEvent::Executed) {
496 Tracker.onInstructionExecuted(IID);
497 return;
498 }
499
500 if (Event.Type != HWInstructionEvent::Issued)
501 return;
502
503 ArrayRef<llvm::MCInst> Source = getSource();
504 const Instruction &IS = *Event.IR.getInstruction();
505 unsigned To = IID % Source.size();
506
507 unsigned Cycles = 2 * Tracker.getResourcePressureCycles(IID);
508 uint64_t ResourceMask = IS.getCriticalResourceMask();
509 SmallVector<std::pair<unsigned, unsigned>, 4> Users;
510 while (ResourceMask) {
511 uint64_t Current = ResourceMask & (-ResourceMask);
512 Tracker.getResourceUsers(Current, Users);
513 for (const std::pair<unsigned, unsigned> &U : Users)
514 addResourceDep(U.first % Source.size(), To, Current, U.second + Cycles);
515 Users.clear();
516 ResourceMask ^= Current;
517 }
518
519 const CriticalDependency &RegDep = IS.getCriticalRegDep();
520 if (RegDep.Cycles) {
521 Cycles = RegDep.Cycles + 2 * Tracker.getRegisterPressureCycles(IID);
522 unsigned From = RegDep.IID % Source.size();
523 addRegisterDep(From, To, RegDep.RegID, Cycles);
524 }
525
526 const CriticalDependency &MemDep = IS.getCriticalMemDep();
527 if (MemDep.Cycles) {
528 Cycles = MemDep.Cycles + 2 * Tracker.getMemoryPressureCycles(IID);
529 unsigned From = MemDep.IID % Source.size();
530 addMemoryDep(From, To, Cycles);
531 }
532
533 Tracker.handleInstructionIssuedEvent(
534 static_cast<const HWInstructionIssuedEvent &>(Event));
535
536 // Check if this is the last simulated instruction.
537 if (IID == ((Iterations * Source.size()) - 1))
538 DG.finalizeGraph(Iterations);
539 }
540
onEvent(const HWPressureEvent & Event)541 void BottleneckAnalysis::onEvent(const HWPressureEvent &Event) {
542 assert(Event.Reason != HWPressureEvent::INVALID &&
543 "Unexpected invalid event!");
544
545 Tracker.handlePressureEvent(Event);
546
547 switch (Event.Reason) {
548 default:
549 break;
550
551 case HWPressureEvent::RESOURCES:
552 PressureIncreasedBecauseOfResources = true;
553 break;
554 case HWPressureEvent::REGISTER_DEPS:
555 PressureIncreasedBecauseOfRegisterDependencies = true;
556 break;
557 case HWPressureEvent::MEMORY_DEPS:
558 PressureIncreasedBecauseOfMemoryDependencies = true;
559 break;
560 }
561 }
562
onCycleEnd()563 void BottleneckAnalysis::onCycleEnd() {
564 ++TotalCycles;
565
566 bool PressureIncreasedBecauseOfDataDependencies =
567 PressureIncreasedBecauseOfRegisterDependencies ||
568 PressureIncreasedBecauseOfMemoryDependencies;
569 if (!PressureIncreasedBecauseOfResources &&
570 !PressureIncreasedBecauseOfDataDependencies)
571 return;
572
573 ++BPI.PressureIncreaseCycles;
574 if (PressureIncreasedBecauseOfRegisterDependencies)
575 ++BPI.RegisterDependencyCycles;
576 if (PressureIncreasedBecauseOfMemoryDependencies)
577 ++BPI.MemoryDependencyCycles;
578 if (PressureIncreasedBecauseOfDataDependencies)
579 ++BPI.DataDependencyCycles;
580 if (PressureIncreasedBecauseOfResources)
581 ++BPI.ResourcePressureCycles;
582 PressureIncreasedBecauseOfResources = false;
583 PressureIncreasedBecauseOfRegisterDependencies = false;
584 PressureIncreasedBecauseOfMemoryDependencies = false;
585 }
586
printBottleneckHints(raw_ostream & OS) const587 void BottleneckAnalysis::printBottleneckHints(raw_ostream &OS) const {
588 if (!SeenStallCycles || !BPI.PressureIncreaseCycles) {
589 OS << "\n\nNo resource or data dependency bottlenecks discovered.\n";
590 return;
591 }
592
593 double PressurePerCycle =
594 (double)BPI.PressureIncreaseCycles * 100 / TotalCycles;
595 double ResourcePressurePerCycle =
596 (double)BPI.ResourcePressureCycles * 100 / TotalCycles;
597 double DDPerCycle = (double)BPI.DataDependencyCycles * 100 / TotalCycles;
598 double RegDepPressurePerCycle =
599 (double)BPI.RegisterDependencyCycles * 100 / TotalCycles;
600 double MemDepPressurePerCycle =
601 (double)BPI.MemoryDependencyCycles * 100 / TotalCycles;
602
603 OS << "\n\nCycles with backend pressure increase [ "
604 << format("%.2f", floor((PressurePerCycle * 100) + 0.5) / 100) << "% ]";
605
606 OS << "\nThroughput Bottlenecks: "
607 << "\n Resource Pressure [ "
608 << format("%.2f", floor((ResourcePressurePerCycle * 100) + 0.5) / 100)
609 << "% ]";
610
611 if (BPI.PressureIncreaseCycles) {
612 ArrayRef<unsigned> Distribution = Tracker.getResourcePressureDistribution();
613 const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
614 for (unsigned I = 0, E = Distribution.size(); I < E; ++I) {
615 unsigned ReleaseAtCycles = Distribution[I];
616 if (ReleaseAtCycles) {
617 double Frequency = (double)ReleaseAtCycles * 100 / TotalCycles;
618 const MCProcResourceDesc &PRDesc = *SM.getProcResource(I);
619 OS << "\n - " << PRDesc.Name << " [ "
620 << format("%.2f", floor((Frequency * 100) + 0.5) / 100) << "% ]";
621 }
622 }
623 }
624
625 OS << "\n Data Dependencies: [ "
626 << format("%.2f", floor((DDPerCycle * 100) + 0.5) / 100) << "% ]";
627 OS << "\n - Register Dependencies [ "
628 << format("%.2f", floor((RegDepPressurePerCycle * 100) + 0.5) / 100)
629 << "% ]";
630 OS << "\n - Memory Dependencies [ "
631 << format("%.2f", floor((MemDepPressurePerCycle * 100) + 0.5) / 100)
632 << "% ]\n";
633 }
634
printView(raw_ostream & OS) const635 void BottleneckAnalysis::printView(raw_ostream &OS) const {
636 std::string Buffer;
637 raw_string_ostream TempStream(Buffer);
638 printBottleneckHints(TempStream);
639 TempStream.flush();
640 OS << Buffer;
641 printCriticalSequence(OS);
642 }
643
644 } // namespace mca.
645 } // namespace llvm
646