xref: /freebsd/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp (revision 5956d97f4b3204318ceb6aa9c77bd0bc6ea87a41)
1 //===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \brief
9 ///
10 /// This file implements the TimelineView interface.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "Views/TimelineView.h"
15 #include <numeric>
16 
17 namespace llvm {
18 namespace mca {
19 
20 TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,
21                            llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,
22                            unsigned Cycles)
23     : InstructionView(sti, Printer, S), CurrentCycle(0),
24       MaxCycle(Cycles == 0 ? std::numeric_limits<unsigned>::max() : Cycles),
25       LastCycle(0), WaitTime(S.size()), UsedBuffer(S.size()) {
26   unsigned NumInstructions = getSource().size();
27   assert(Iterations && "Invalid number of iterations specified!");
28   NumInstructions *= Iterations;
29   Timeline.resize(NumInstructions);
30   TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0, 0};
31   std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry);
32 
33   WaitTimeEntry NullWTEntry = {0, 0, 0};
34   std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);
35 
36   std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0,
37                                                   /* unknown buffer size */ -1};
38   std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry);
39 }
40 
41 void TimelineView::onReservedBuffers(const InstRef &IR,
42                                      ArrayRef<unsigned> Buffers) {
43   if (IR.getSourceIndex() >= getSource().size())
44     return;
45 
46   const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
47   std::pair<unsigned, int> BufferInfo = {0, -1};
48   for (const unsigned Buffer : Buffers) {
49     const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer);
50     if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) {
51       BufferInfo.first = Buffer;
52       BufferInfo.second = MCDesc.BufferSize;
53     }
54   }
55 
56   UsedBuffer[IR.getSourceIndex()] = BufferInfo;
57 }
58 
59 void TimelineView::onEvent(const HWInstructionEvent &Event) {
60   const unsigned Index = Event.IR.getSourceIndex();
61   if (Index >= Timeline.size())
62     return;
63 
64   switch (Event.Type) {
65   case HWInstructionEvent::Retired: {
66     TimelineViewEntry &TVEntry = Timeline[Index];
67     if (CurrentCycle < MaxCycle)
68       TVEntry.CycleRetired = CurrentCycle;
69 
70     // Update the WaitTime entry which corresponds to this Index.
71     assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!");
72     unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched);
73     WaitTimeEntry &WTEntry = WaitTime[Index % getSource().size()];
74     WTEntry.CyclesSpentInSchedulerQueue +=
75         TVEntry.CycleIssued - CycleDispatched;
76     assert(CycleDispatched <= TVEntry.CycleReady &&
77            "Instruction cannot be ready if it hasn't been dispatched yet!");
78     WTEntry.CyclesSpentInSQWhileReady +=
79         TVEntry.CycleIssued - TVEntry.CycleReady;
80     if (CurrentCycle > TVEntry.CycleExecuted) {
81       WTEntry.CyclesSpentAfterWBAndBeforeRetire +=
82           (CurrentCycle - 1) - TVEntry.CycleExecuted;
83     }
84     break;
85   }
86   case HWInstructionEvent::Ready:
87     Timeline[Index].CycleReady = CurrentCycle;
88     break;
89   case HWInstructionEvent::Issued:
90     Timeline[Index].CycleIssued = CurrentCycle;
91     break;
92   case HWInstructionEvent::Executed:
93     Timeline[Index].CycleExecuted = CurrentCycle;
94     break;
95   case HWInstructionEvent::Dispatched:
96     // There may be multiple dispatch events. Microcoded instructions that are
97     // expanded into multiple uOps may require multiple dispatch cycles. Here,
98     // we want to capture the first dispatch cycle.
99     if (Timeline[Index].CycleDispatched == -1)
100       Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle);
101     break;
102   default:
103     return;
104   }
105   if (CurrentCycle < MaxCycle)
106     LastCycle = std::max(LastCycle, CurrentCycle);
107 }
108 
109 static raw_ostream::Colors chooseColor(unsigned CumulativeCycles,
110                                        unsigned Executions, int BufferSize) {
111   if (CumulativeCycles && BufferSize < 0)
112     return raw_ostream::MAGENTA;
113   unsigned Size = static_cast<unsigned>(BufferSize);
114   if (CumulativeCycles >= Size * Executions)
115     return raw_ostream::RED;
116   if ((CumulativeCycles * 2) >= Size * Executions)
117     return raw_ostream::YELLOW;
118   return raw_ostream::SAVEDCOLOR;
119 }
120 
121 static void tryChangeColor(raw_ostream &OS, unsigned Cycles,
122                            unsigned Executions, int BufferSize) {
123   if (!OS.has_colors())
124     return;
125 
126   raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize);
127   if (Color == raw_ostream::SAVEDCOLOR) {
128     OS.resetColor();
129     return;
130   }
131   OS.changeColor(Color, /* bold */ true, /* BG */ false);
132 }
133 
134 void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
135                                       const WaitTimeEntry &Entry,
136                                       unsigned SourceIndex,
137                                       unsigned Executions) const {
138   bool PrintingTotals = SourceIndex == getSource().size();
139   unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions;
140 
141   if (!PrintingTotals)
142     OS << SourceIndex << '.';
143 
144   OS.PadToColumn(7);
145 
146   double AverageTime1, AverageTime2, AverageTime3;
147   AverageTime1 =
148       (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions;
149   AverageTime2 =
150       (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions;
151   AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) /
152                  CumulativeExecutions;
153 
154   OS << Executions;
155   OS.PadToColumn(13);
156 
157   int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second;
158   if (!PrintingTotals)
159     tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions,
160                    BufferSize);
161   OS << format("%.1f", floor(AverageTime1 + 0.5) / 10);
162   OS.PadToColumn(20);
163   if (!PrintingTotals)
164     tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions,
165                    BufferSize);
166   OS << format("%.1f", floor(AverageTime2 + 0.5) / 10);
167   OS.PadToColumn(27);
168   if (!PrintingTotals)
169     tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire,
170                    CumulativeExecutions,
171                    getSubTargetInfo().getSchedModel().MicroOpBufferSize);
172   OS << format("%.1f", floor(AverageTime3 + 0.5) / 10);
173 
174   if (OS.has_colors())
175     OS.resetColor();
176   OS.PadToColumn(34);
177 }
178 
179 void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {
180   std::string Header =
181       "\n\nAverage Wait times (based on the timeline view):\n"
182       "[0]: Executions\n"
183       "[1]: Average time spent waiting in a scheduler's queue\n"
184       "[2]: Average time spent waiting in a scheduler's queue while ready\n"
185       "[3]: Average time elapsed from WB until retire stage\n\n"
186       "      [0]    [1]    [2]    [3]\n";
187   OS << Header;
188   formatted_raw_ostream FOS(OS);
189   unsigned Executions = Timeline.size() / getSource().size();
190   unsigned IID = 0;
191   for (const MCInst &Inst : getSource()) {
192     printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions);
193     FOS << "   " << printInstructionString(Inst) << '\n';
194     FOS.flush();
195     ++IID;
196   }
197 
198   // If the timeline contains more than one instruction,
199   // let's also print global averages.
200   if (getSource().size() != 1) {
201     WaitTimeEntry TotalWaitTime = std::accumulate(
202         WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0},
203         [](const WaitTimeEntry &A, const WaitTimeEntry &B) {
204           return WaitTimeEntry{
205               A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue,
206               A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady,
207               A.CyclesSpentAfterWBAndBeforeRetire +
208                   B.CyclesSpentAfterWBAndBeforeRetire};
209         });
210     printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions);
211     FOS << "   "
212         << "<total>" << '\n';
213     FOS.flush();
214   }
215 }
216 
217 void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,
218                                           const TimelineViewEntry &Entry,
219                                           unsigned Iteration,
220                                           unsigned SourceIndex) const {
221   if (Iteration == 0 && SourceIndex == 0)
222     OS << '\n';
223   OS << '[' << Iteration << ',' << SourceIndex << ']';
224   OS.PadToColumn(10);
225   assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!");
226   unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched);
227   for (unsigned I = 0, E = CycleDispatched; I < E; ++I)
228     OS << ((I % 5 == 0) ? '.' : ' ');
229   OS << TimelineView::DisplayChar::Dispatched;
230   if (CycleDispatched != Entry.CycleExecuted) {
231     // Zero latency instructions have the same value for CycleDispatched,
232     // CycleIssued and CycleExecuted.
233     for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I)
234       OS << TimelineView::DisplayChar::Waiting;
235     if (Entry.CycleIssued == Entry.CycleExecuted)
236       OS << TimelineView::DisplayChar::DisplayChar::Executed;
237     else {
238       if (CycleDispatched != Entry.CycleIssued)
239         OS << TimelineView::DisplayChar::Executing;
240       for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;
241            ++I)
242         OS << TimelineView::DisplayChar::Executing;
243       OS << TimelineView::DisplayChar::Executed;
244     }
245   }
246 
247   for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)
248     OS << TimelineView::DisplayChar::RetireLag;
249   if (Entry.CycleExecuted < Entry.CycleRetired)
250     OS << TimelineView::DisplayChar::Retired;
251 
252   // Skip other columns.
253   for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)
254     OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' ');
255 }
256 
257 static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) {
258   OS << "\n\nTimeline view:\n";
259   if (Cycles >= 10) {
260     OS.PadToColumn(10);
261     for (unsigned I = 0; I <= Cycles; ++I) {
262       if (((I / 10) & 1) == 0)
263         OS << ' ';
264       else
265         OS << I % 10;
266     }
267     OS << '\n';
268   }
269 
270   OS << "Index";
271   OS.PadToColumn(10);
272   for (unsigned I = 0; I <= Cycles; ++I) {
273     if (((I / 10) & 1) == 0)
274       OS << I % 10;
275     else
276       OS << ' ';
277   }
278   OS << '\n';
279 }
280 
281 void TimelineView::printTimeline(raw_ostream &OS) const {
282   formatted_raw_ostream FOS(OS);
283   printTimelineHeader(FOS, LastCycle);
284   FOS.flush();
285 
286   unsigned IID = 0;
287   ArrayRef<llvm::MCInst> Source = getSource();
288   const unsigned Iterations = Timeline.size() / Source.size();
289   for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) {
290     for (const MCInst &Inst : Source) {
291       const TimelineViewEntry &Entry = Timeline[IID];
292       // When an instruction is retired after timeline-max-cycles,
293       // its CycleRetired is left at 0. However, it's possible for
294       // a 0 latency instruction to be retired during cycle 0 and we
295       // don't want to early exit in that case. The CycleExecuted
296       // attribute is set correctly whether or not it is greater
297       // than timeline-max-cycles so we can use that to ensure
298       // we don't early exit because of a 0 latency instruction.
299       if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0) {
300         FOS << "Truncated display due to cycle limit\n";
301         return;
302       }
303 
304       unsigned SourceIndex = IID % Source.size();
305       printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
306       FOS << "   " << printInstructionString(Inst) << '\n';
307       FOS.flush();
308 
309       ++IID;
310     }
311   }
312 }
313 
314 json::Value TimelineView::toJSON() const {
315   json::Array TimelineInfo;
316 
317   for (const TimelineViewEntry &TLE : Timeline) {
318     TimelineInfo.push_back(
319         json::Object({{"CycleDispatched", TLE.CycleDispatched},
320                       {"CycleReady", TLE.CycleReady},
321                       {"CycleIssued", TLE.CycleIssued},
322                       {"CycleExecuted", TLE.CycleExecuted},
323                       {"CycleRetired", TLE.CycleRetired}}));
324   }
325   return json::Object({{"TimelineInfo", std::move(TimelineInfo)}});
326 }
327 } // namespace mca
328 } // namespace llvm
329