1 //===--------------------- TimelineView.cpp ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \brief 9 /// 10 /// This file implements the TimelineView interface. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "Views/TimelineView.h" 15 #include <numeric> 16 17 namespace llvm { 18 namespace mca { 19 20 TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer, 21 llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations, 22 unsigned Cycles) 23 : InstructionView(sti, Printer, S), CurrentCycle(0), 24 MaxCycle(Cycles == 0 ? std::numeric_limits<unsigned>::max() : Cycles), 25 LastCycle(0), WaitTime(S.size()), UsedBuffer(S.size()) { 26 unsigned NumInstructions = getSource().size(); 27 assert(Iterations && "Invalid number of iterations specified!"); 28 NumInstructions *= Iterations; 29 Timeline.resize(NumInstructions); 30 TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0, 0}; 31 std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry); 32 33 WaitTimeEntry NullWTEntry = {0, 0, 0}; 34 std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry); 35 36 std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0, 37 /* unknown buffer size */ -1}; 38 std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry); 39 } 40 41 void TimelineView::onReservedBuffers(const InstRef &IR, 42 ArrayRef<unsigned> Buffers) { 43 if (IR.getSourceIndex() >= getSource().size()) 44 return; 45 46 const MCSchedModel &SM = getSubTargetInfo().getSchedModel(); 47 std::pair<unsigned, int> BufferInfo = {0, -1}; 48 for (const unsigned Buffer : Buffers) { 49 const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer); 50 if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) { 51 BufferInfo.first = Buffer; 52 BufferInfo.second = MCDesc.BufferSize; 53 } 54 } 55 56 UsedBuffer[IR.getSourceIndex()] = BufferInfo; 57 } 58 59 void TimelineView::onEvent(const HWInstructionEvent &Event) { 60 const unsigned Index = Event.IR.getSourceIndex(); 61 if (Index >= Timeline.size()) 62 return; 63 64 switch (Event.Type) { 65 case HWInstructionEvent::Retired: { 66 TimelineViewEntry &TVEntry = Timeline[Index]; 67 if (CurrentCycle < MaxCycle) 68 TVEntry.CycleRetired = CurrentCycle; 69 70 // Update the WaitTime entry which corresponds to this Index. 71 assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!"); 72 unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched); 73 WaitTimeEntry &WTEntry = WaitTime[Index % getSource().size()]; 74 WTEntry.CyclesSpentInSchedulerQueue += 75 TVEntry.CycleIssued - CycleDispatched; 76 assert(CycleDispatched <= TVEntry.CycleReady && 77 "Instruction cannot be ready if it hasn't been dispatched yet!"); 78 WTEntry.CyclesSpentInSQWhileReady += 79 TVEntry.CycleIssued - TVEntry.CycleReady; 80 if (CurrentCycle > TVEntry.CycleExecuted) { 81 WTEntry.CyclesSpentAfterWBAndBeforeRetire += 82 (CurrentCycle - 1) - TVEntry.CycleExecuted; 83 } 84 break; 85 } 86 case HWInstructionEvent::Ready: 87 Timeline[Index].CycleReady = CurrentCycle; 88 break; 89 case HWInstructionEvent::Issued: 90 Timeline[Index].CycleIssued = CurrentCycle; 91 break; 92 case HWInstructionEvent::Executed: 93 Timeline[Index].CycleExecuted = CurrentCycle; 94 break; 95 case HWInstructionEvent::Dispatched: 96 // There may be multiple dispatch events. Microcoded instructions that are 97 // expanded into multiple uOps may require multiple dispatch cycles. Here, 98 // we want to capture the first dispatch cycle. 99 if (Timeline[Index].CycleDispatched == -1) 100 Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle); 101 break; 102 default: 103 return; 104 } 105 if (CurrentCycle < MaxCycle) 106 LastCycle = std::max(LastCycle, CurrentCycle); 107 } 108 109 static raw_ostream::Colors chooseColor(unsigned CumulativeCycles, 110 unsigned Executions, int BufferSize) { 111 if (CumulativeCycles && BufferSize < 0) 112 return raw_ostream::MAGENTA; 113 unsigned Size = static_cast<unsigned>(BufferSize); 114 if (CumulativeCycles >= Size * Executions) 115 return raw_ostream::RED; 116 if ((CumulativeCycles * 2) >= Size * Executions) 117 return raw_ostream::YELLOW; 118 return raw_ostream::SAVEDCOLOR; 119 } 120 121 static void tryChangeColor(raw_ostream &OS, unsigned Cycles, 122 unsigned Executions, int BufferSize) { 123 if (!OS.has_colors()) 124 return; 125 126 raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize); 127 if (Color == raw_ostream::SAVEDCOLOR) { 128 OS.resetColor(); 129 return; 130 } 131 OS.changeColor(Color, /* bold */ true, /* BG */ false); 132 } 133 134 void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS, 135 const WaitTimeEntry &Entry, 136 unsigned SourceIndex, 137 unsigned Executions) const { 138 bool PrintingTotals = SourceIndex == getSource().size(); 139 unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions; 140 141 if (!PrintingTotals) 142 OS << SourceIndex << '.'; 143 144 OS.PadToColumn(7); 145 146 double AverageTime1, AverageTime2, AverageTime3; 147 AverageTime1 = 148 (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions; 149 AverageTime2 = 150 (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions; 151 AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) / 152 CumulativeExecutions; 153 154 OS << Executions; 155 OS.PadToColumn(13); 156 157 int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second; 158 if (!PrintingTotals) 159 tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions, 160 BufferSize); 161 OS << format("%.1f", floor(AverageTime1 + 0.5) / 10); 162 OS.PadToColumn(20); 163 if (!PrintingTotals) 164 tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions, 165 BufferSize); 166 OS << format("%.1f", floor(AverageTime2 + 0.5) / 10); 167 OS.PadToColumn(27); 168 if (!PrintingTotals) 169 tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire, 170 CumulativeExecutions, 171 getSubTargetInfo().getSchedModel().MicroOpBufferSize); 172 OS << format("%.1f", floor(AverageTime3 + 0.5) / 10); 173 174 if (OS.has_colors()) 175 OS.resetColor(); 176 OS.PadToColumn(34); 177 } 178 179 void TimelineView::printAverageWaitTimes(raw_ostream &OS) const { 180 std::string Header = 181 "\n\nAverage Wait times (based on the timeline view):\n" 182 "[0]: Executions\n" 183 "[1]: Average time spent waiting in a scheduler's queue\n" 184 "[2]: Average time spent waiting in a scheduler's queue while ready\n" 185 "[3]: Average time elapsed from WB until retire stage\n\n" 186 " [0] [1] [2] [3]\n"; 187 OS << Header; 188 formatted_raw_ostream FOS(OS); 189 unsigned Executions = Timeline.size() / getSource().size(); 190 unsigned IID = 0; 191 for (const MCInst &Inst : getSource()) { 192 printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions); 193 FOS << " " << printInstructionString(Inst) << '\n'; 194 FOS.flush(); 195 ++IID; 196 } 197 198 // If the timeline contains more than one instruction, 199 // let's also print global averages. 200 if (getSource().size() != 1) { 201 WaitTimeEntry TotalWaitTime = std::accumulate( 202 WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0}, 203 [](const WaitTimeEntry &A, const WaitTimeEntry &B) { 204 return WaitTimeEntry{ 205 A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue, 206 A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady, 207 A.CyclesSpentAfterWBAndBeforeRetire + 208 B.CyclesSpentAfterWBAndBeforeRetire}; 209 }); 210 printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions); 211 FOS << " " 212 << "<total>" << '\n'; 213 FOS.flush(); 214 } 215 } 216 217 void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS, 218 const TimelineViewEntry &Entry, 219 unsigned Iteration, 220 unsigned SourceIndex) const { 221 if (Iteration == 0 && SourceIndex == 0) 222 OS << '\n'; 223 OS << '[' << Iteration << ',' << SourceIndex << ']'; 224 OS.PadToColumn(10); 225 assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!"); 226 unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched); 227 for (unsigned I = 0, E = CycleDispatched; I < E; ++I) 228 OS << ((I % 5 == 0) ? '.' : ' '); 229 OS << TimelineView::DisplayChar::Dispatched; 230 if (CycleDispatched != Entry.CycleExecuted) { 231 // Zero latency instructions have the same value for CycleDispatched, 232 // CycleIssued and CycleExecuted. 233 for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I) 234 OS << TimelineView::DisplayChar::Waiting; 235 if (Entry.CycleIssued == Entry.CycleExecuted) 236 OS << TimelineView::DisplayChar::DisplayChar::Executed; 237 else { 238 if (CycleDispatched != Entry.CycleIssued) 239 OS << TimelineView::DisplayChar::Executing; 240 for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E; 241 ++I) 242 OS << TimelineView::DisplayChar::Executing; 243 OS << TimelineView::DisplayChar::Executed; 244 } 245 } 246 247 for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I) 248 OS << TimelineView::DisplayChar::RetireLag; 249 if (Entry.CycleExecuted < Entry.CycleRetired) 250 OS << TimelineView::DisplayChar::Retired; 251 252 // Skip other columns. 253 for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I) 254 OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' '); 255 } 256 257 static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) { 258 OS << "\n\nTimeline view:\n"; 259 if (Cycles >= 10) { 260 OS.PadToColumn(10); 261 for (unsigned I = 0; I <= Cycles; ++I) { 262 if (((I / 10) & 1) == 0) 263 OS << ' '; 264 else 265 OS << I % 10; 266 } 267 OS << '\n'; 268 } 269 270 OS << "Index"; 271 OS.PadToColumn(10); 272 for (unsigned I = 0; I <= Cycles; ++I) { 273 if (((I / 10) & 1) == 0) 274 OS << I % 10; 275 else 276 OS << ' '; 277 } 278 OS << '\n'; 279 } 280 281 void TimelineView::printTimeline(raw_ostream &OS) const { 282 formatted_raw_ostream FOS(OS); 283 printTimelineHeader(FOS, LastCycle); 284 FOS.flush(); 285 286 unsigned IID = 0; 287 ArrayRef<llvm::MCInst> Source = getSource(); 288 const unsigned Iterations = Timeline.size() / Source.size(); 289 for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) { 290 for (const MCInst &Inst : Source) { 291 const TimelineViewEntry &Entry = Timeline[IID]; 292 // When an instruction is retired after timeline-max-cycles, 293 // its CycleRetired is left at 0. However, it's possible for 294 // a 0 latency instruction to be retired during cycle 0 and we 295 // don't want to early exit in that case. The CycleExecuted 296 // attribute is set correctly whether or not it is greater 297 // than timeline-max-cycles so we can use that to ensure 298 // we don't early exit because of a 0 latency instruction. 299 if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0) { 300 FOS << "Truncated display due to cycle limit\n"; 301 return; 302 } 303 304 unsigned SourceIndex = IID % Source.size(); 305 printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex); 306 FOS << " " << printInstructionString(Inst) << '\n'; 307 FOS.flush(); 308 309 ++IID; 310 } 311 } 312 } 313 314 json::Value TimelineView::toJSON() const { 315 json::Array TimelineInfo; 316 317 for (const TimelineViewEntry &TLE : Timeline) { 318 // Check if the timeline-max-cycles has been reached. 319 if (!TLE.CycleRetired && TLE.CycleExecuted) 320 break; 321 322 TimelineInfo.push_back( 323 json::Object({{"CycleDispatched", TLE.CycleDispatched}, 324 {"CycleReady", TLE.CycleReady}, 325 {"CycleIssued", TLE.CycleIssued}, 326 {"CycleExecuted", TLE.CycleExecuted}, 327 {"CycleRetired", TLE.CycleRetired}})); 328 } 329 return json::Object({{"TimelineInfo", std::move(TimelineInfo)}}); 330 } 331 } // namespace mca 332 } // namespace llvm 333