xref: /freebsd/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.cpp (revision 6be3386466ab79a84b48429ae66244f21526d3df)
1 //===- xray-account.h - XRay Function Call Accounting ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements basic function call accounting from an XRay trace.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <numeric>
16 #include <system_error>
17 #include <utility>
18 
19 #include "xray-account.h"
20 #include "xray-registry.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/XRay/InstrumentationMap.h"
24 #include "llvm/XRay/Trace.h"
25 
26 using namespace llvm;
27 using namespace llvm::xray;
28 
29 static cl::SubCommand Account("account", "Function call accounting");
30 static cl::opt<std::string> AccountInput(cl::Positional,
31                                          cl::desc("<xray log file>"),
32                                          cl::Required, cl::sub(Account));
33 static cl::opt<bool>
34     AccountKeepGoing("keep-going", cl::desc("Keep going on errors encountered"),
35                      cl::sub(Account), cl::init(false));
36 static cl::alias AccountKeepGoing2("k", cl::aliasopt(AccountKeepGoing),
37                                    cl::desc("Alias for -keep_going"));
38 static cl::opt<bool> AccountDeduceSiblingCalls(
39     "deduce-sibling-calls",
40     cl::desc("Deduce sibling calls when unrolling function call stacks"),
41     cl::sub(Account), cl::init(false));
42 static cl::alias
43     AccountDeduceSiblingCalls2("d", cl::aliasopt(AccountDeduceSiblingCalls),
44                                cl::desc("Alias for -deduce_sibling_calls"));
45 static cl::opt<std::string>
46     AccountOutput("output", cl::value_desc("output file"), cl::init("-"),
47                   cl::desc("output file; use '-' for stdout"),
48                   cl::sub(Account));
49 static cl::alias AccountOutput2("o", cl::aliasopt(AccountOutput),
50                                 cl::desc("Alias for -output"));
51 enum class AccountOutputFormats { TEXT, CSV };
52 static cl::opt<AccountOutputFormats>
53     AccountOutputFormat("format", cl::desc("output format"),
54                         cl::values(clEnumValN(AccountOutputFormats::TEXT,
55                                               "text", "report stats in text"),
56                                    clEnumValN(AccountOutputFormats::CSV, "csv",
57                                               "report stats in csv")),
58                         cl::sub(Account));
59 static cl::alias AccountOutputFormat2("f", cl::desc("Alias of -format"),
60                                       cl::aliasopt(AccountOutputFormat));
61 
62 enum class SortField {
63   FUNCID,
64   COUNT,
65   MIN,
66   MED,
67   PCT90,
68   PCT99,
69   MAX,
70   SUM,
71   FUNC,
72 };
73 
74 static cl::opt<SortField> AccountSortOutput(
75     "sort", cl::desc("sort output by this field"), cl::value_desc("field"),
76     cl::sub(Account), cl::init(SortField::FUNCID),
77     cl::values(clEnumValN(SortField::FUNCID, "funcid", "function id"),
78                clEnumValN(SortField::COUNT, "count", "funciton call counts"),
79                clEnumValN(SortField::MIN, "min", "minimum function durations"),
80                clEnumValN(SortField::MED, "med", "median function durations"),
81                clEnumValN(SortField::PCT90, "90p", "90th percentile durations"),
82                clEnumValN(SortField::PCT99, "99p", "99th percentile durations"),
83                clEnumValN(SortField::MAX, "max", "maximum function durations"),
84                clEnumValN(SortField::SUM, "sum", "sum of call durations"),
85                clEnumValN(SortField::FUNC, "func", "function names")));
86 static cl::alias AccountSortOutput2("s", cl::aliasopt(AccountSortOutput),
87                                     cl::desc("Alias for -sort"));
88 
89 enum class SortDirection {
90   ASCENDING,
91   DESCENDING,
92 };
93 static cl::opt<SortDirection> AccountSortOrder(
94     "sortorder", cl::desc("sort ordering"), cl::init(SortDirection::ASCENDING),
95     cl::values(clEnumValN(SortDirection::ASCENDING, "asc", "ascending"),
96                clEnumValN(SortDirection::DESCENDING, "dsc", "descending")),
97     cl::sub(Account));
98 static cl::alias AccountSortOrder2("r", cl::aliasopt(AccountSortOrder),
99                                    cl::desc("Alias for -sortorder"));
100 
101 static cl::opt<int> AccountTop("top", cl::desc("only show the top N results"),
102                                cl::value_desc("N"), cl::sub(Account),
103                                cl::init(-1));
104 static cl::alias AccountTop2("p", cl::desc("Alias for -top"),
105                              cl::aliasopt(AccountTop));
106 
107 static cl::opt<std::string>
108     AccountInstrMap("instr_map",
109                     cl::desc("binary with the instrumentation map, or "
110                              "a separate instrumentation map"),
111                     cl::value_desc("binary with xray_instr_map"),
112                     cl::sub(Account), cl::init(""));
113 static cl::alias AccountInstrMap2("m", cl::aliasopt(AccountInstrMap),
114                                   cl::desc("Alias for -instr_map"));
115 
116 namespace {
117 
118 template <class T, class U> void setMinMax(std::pair<T, T> &MM, U &&V) {
119   if (MM.first == 0 || MM.second == 0)
120     MM = std::make_pair(std::forward<U>(V), std::forward<U>(V));
121   else
122     MM = std::make_pair(std::min(MM.first, V), std::max(MM.second, V));
123 }
124 
125 template <class T> T diff(T L, T R) { return std::max(L, R) - std::min(L, R); }
126 
127 } // namespace
128 
129 bool LatencyAccountant::accountRecord(const XRayRecord &Record) {
130   setMinMax(PerThreadMinMaxTSC[Record.TId], Record.TSC);
131   setMinMax(PerCPUMinMaxTSC[Record.CPU], Record.TSC);
132 
133   if (CurrentMaxTSC == 0)
134     CurrentMaxTSC = Record.TSC;
135 
136   if (Record.TSC < CurrentMaxTSC)
137     return false;
138 
139   auto &ThreadStack = PerThreadFunctionStack[Record.TId];
140   switch (Record.Type) {
141   case RecordTypes::CUSTOM_EVENT:
142   case RecordTypes::TYPED_EVENT:
143     // TODO: Support custom and typed event accounting in the future.
144     return true;
145   case RecordTypes::ENTER:
146   case RecordTypes::ENTER_ARG: {
147     ThreadStack.emplace_back(Record.FuncId, Record.TSC);
148     break;
149   }
150   case RecordTypes::EXIT:
151   case RecordTypes::TAIL_EXIT: {
152     if (ThreadStack.empty())
153       return false;
154 
155     if (ThreadStack.back().first == Record.FuncId) {
156       const auto &Top = ThreadStack.back();
157       recordLatency(Top.first, diff(Top.second, Record.TSC));
158       ThreadStack.pop_back();
159       break;
160     }
161 
162     if (!DeduceSiblingCalls)
163       return false;
164 
165     // Look for the parent up the stack.
166     auto Parent =
167         std::find_if(ThreadStack.rbegin(), ThreadStack.rend(),
168                      [&](const std::pair<const int32_t, uint64_t> &E) {
169                        return E.first == Record.FuncId;
170                      });
171     if (Parent == ThreadStack.rend())
172       return false;
173 
174     // Account time for this apparently sibling call exit up the stack.
175     // Considering the following case:
176     //
177     //   f()
178     //    g()
179     //      h()
180     //
181     // We might only ever see the following entries:
182     //
183     //   -> f()
184     //   -> g()
185     //   -> h()
186     //   <- h()
187     //   <- f()
188     //
189     // Now we don't see the exit to g() because some older version of the XRay
190     // runtime wasn't instrumenting tail exits. If we don't deduce tail calls,
191     // we may potentially never account time for g() -- and this code would have
192     // already bailed out, because `<- f()` doesn't match the current "top" of
193     // stack where we're waiting for the exit to `g()` instead. This is not
194     // ideal and brittle -- so instead we provide a potentially inaccurate
195     // accounting of g() instead, computing it from the exit of f().
196     //
197     // While it might be better that we account the time between `-> g()` and
198     // `-> h()` as the proper accounting of time for g() here, this introduces
199     // complexity to do correctly (need to backtrack, etc.).
200     //
201     // FIXME: Potentially implement the more complex deduction algorithm?
202     auto I = std::next(Parent).base();
203     for (auto &E : make_range(I, ThreadStack.end())) {
204       recordLatency(E.first, diff(E.second, Record.TSC));
205     }
206     ThreadStack.erase(I, ThreadStack.end());
207     break;
208   }
209   }
210 
211   return true;
212 }
213 
214 namespace {
215 
216 // We consolidate the data into a struct which we can output in various forms.
217 struct ResultRow {
218   uint64_t Count;
219   double Min;
220   double Median;
221   double Pct90;
222   double Pct99;
223   double Max;
224   double Sum;
225   std::string DebugInfo;
226   std::string Function;
227 };
228 
229 ResultRow getStats(std::vector<uint64_t> &Timings) {
230   assert(!Timings.empty());
231   ResultRow R;
232   R.Sum = std::accumulate(Timings.begin(), Timings.end(), 0.0);
233   auto MinMax = std::minmax_element(Timings.begin(), Timings.end());
234   R.Min = *MinMax.first;
235   R.Max = *MinMax.second;
236   R.Count = Timings.size();
237 
238   auto MedianOff = Timings.size() / 2;
239   std::nth_element(Timings.begin(), Timings.begin() + MedianOff, Timings.end());
240   R.Median = Timings[MedianOff];
241 
242   auto Pct90Off = std::floor(Timings.size() * 0.9);
243   std::nth_element(Timings.begin(), Timings.begin() + Pct90Off, Timings.end());
244   R.Pct90 = Timings[Pct90Off];
245 
246   auto Pct99Off = std::floor(Timings.size() * 0.99);
247   std::nth_element(Timings.begin(), Timings.begin() + Pct99Off, Timings.end());
248   R.Pct99 = Timings[Pct99Off];
249   return R;
250 }
251 
252 } // namespace
253 
254 using TupleType = std::tuple<int32_t, uint64_t, ResultRow>;
255 
256 template <typename F>
257 static void sortByKey(std::vector<TupleType> &Results, F Fn) {
258   bool ASC = AccountSortOrder == SortDirection::ASCENDING;
259   llvm::sort(Results, [=](const TupleType &L, const TupleType &R) {
260     return ASC ? Fn(L) < Fn(R) : Fn(L) > Fn(R);
261   });
262 }
263 
264 template <class F>
265 void LatencyAccountant::exportStats(const XRayFileHeader &Header, F Fn) const {
266   std::vector<TupleType> Results;
267   Results.reserve(FunctionLatencies.size());
268   for (auto FT : FunctionLatencies) {
269     const auto &FuncId = FT.first;
270     auto &Timings = FT.second;
271     Results.emplace_back(FuncId, Timings.size(), getStats(Timings));
272     auto &Row = std::get<2>(Results.back());
273     if (Header.CycleFrequency) {
274       double CycleFrequency = Header.CycleFrequency;
275       Row.Min /= CycleFrequency;
276       Row.Median /= CycleFrequency;
277       Row.Pct90 /= CycleFrequency;
278       Row.Pct99 /= CycleFrequency;
279       Row.Max /= CycleFrequency;
280       Row.Sum /= CycleFrequency;
281     }
282 
283     Row.Function = FuncIdHelper.SymbolOrNumber(FuncId);
284     Row.DebugInfo = FuncIdHelper.FileLineAndColumn(FuncId);
285   }
286 
287   // Sort the data according to user-provided flags.
288   switch (AccountSortOutput) {
289   case SortField::FUNCID:
290     sortByKey(Results, [](const TupleType &X) { return std::get<0>(X); });
291     break;
292   case SortField::COUNT:
293     sortByKey(Results, [](const TupleType &X) { return std::get<1>(X); });
294     break;
295   case SortField::MIN:
296     sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Min; });
297     break;
298   case SortField::MED:
299     sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Median; });
300     break;
301   case SortField::PCT90:
302     sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct90; });
303     break;
304   case SortField::PCT99:
305     sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct99; });
306     break;
307   case SortField::MAX:
308     sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Max; });
309     break;
310   case SortField::SUM:
311     sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Sum; });
312     break;
313   case SortField::FUNC:
314     llvm_unreachable("Not implemented");
315   }
316 
317   if (AccountTop > 0) {
318     auto MaxTop =
319         std::min(AccountTop.getValue(), static_cast<int>(Results.size()));
320     Results.erase(Results.begin() + MaxTop, Results.end());
321   }
322 
323   for (const auto &R : Results)
324     Fn(std::get<0>(R), std::get<1>(R), std::get<2>(R));
325 }
326 
327 void LatencyAccountant::exportStatsAsText(raw_ostream &OS,
328                                           const XRayFileHeader &Header) const {
329   OS << "Functions with latencies: " << FunctionLatencies.size() << "\n";
330 
331   // We spend some effort to make the text output more readable, so we do the
332   // following formatting decisions for each of the fields:
333   //
334   //   - funcid: 32-bit, but we can determine the largest number and be
335   //   between
336   //     a minimum of 5 characters, up to 9 characters, right aligned.
337   //   - count:  64-bit, but we can determine the largest number and be
338   //   between
339   //     a minimum of 5 characters, up to 9 characters, right aligned.
340   //   - min, median, 90pct, 99pct, max: double precision, but we want to keep
341   //     the values in seconds, with microsecond precision (0.000'001), so we
342   //     have at most 6 significant digits, with the whole number part to be
343   //     at
344   //     least 1 character. For readability we'll right-align, with full 9
345   //     characters each.
346   //   - debug info, function name: we format this as a concatenation of the
347   //     debug info and the function name.
348   //
349   static constexpr char StatsHeaderFormat[] =
350       "{0,+9} {1,+10} [{2,+9}, {3,+9}, {4,+9}, {5,+9}, {6,+9}] {7,+9}";
351   static constexpr char StatsFormat[] =
352       R"({0,+9} {1,+10} [{2,+9:f6}, {3,+9:f6}, {4,+9:f6}, {5,+9:f6}, {6,+9:f6}] {7,+9:f6})";
353   OS << llvm::formatv(StatsHeaderFormat, "funcid", "count", "min", "med", "90p",
354                       "99p", "max", "sum")
355      << llvm::formatv("  {0,-12}\n", "function");
356   exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) {
357     OS << llvm::formatv(StatsFormat, FuncId, Count, Row.Min, Row.Median,
358                         Row.Pct90, Row.Pct99, Row.Max, Row.Sum)
359        << "  " << Row.DebugInfo << ": " << Row.Function << "\n";
360   });
361 }
362 
363 void LatencyAccountant::exportStatsAsCSV(raw_ostream &OS,
364                                          const XRayFileHeader &Header) const {
365   OS << "funcid,count,min,median,90%ile,99%ile,max,sum,debug,function\n";
366   exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) {
367     OS << FuncId << ',' << Count << ',' << Row.Min << ',' << Row.Median << ','
368        << Row.Pct90 << ',' << Row.Pct99 << ',' << Row.Max << "," << Row.Sum
369        << ",\"" << Row.DebugInfo << "\",\"" << Row.Function << "\"\n";
370   });
371 }
372 
373 using namespace llvm::xray;
374 
375 namespace llvm {
376 template <> struct format_provider<llvm::xray::RecordTypes> {
377   static void format(const llvm::xray::RecordTypes &T, raw_ostream &Stream,
378                      StringRef Style) {
379     switch (T) {
380     case RecordTypes::ENTER:
381       Stream << "enter";
382       break;
383     case RecordTypes::ENTER_ARG:
384       Stream << "enter-arg";
385       break;
386     case RecordTypes::EXIT:
387       Stream << "exit";
388       break;
389     case RecordTypes::TAIL_EXIT:
390       Stream << "tail-exit";
391       break;
392     case RecordTypes::CUSTOM_EVENT:
393       Stream << "custom-event";
394       break;
395     case RecordTypes::TYPED_EVENT:
396       Stream << "typed-event";
397       break;
398     }
399   }
400 };
401 } // namespace llvm
402 
403 static CommandRegistration Unused(&Account, []() -> Error {
404   InstrumentationMap Map;
405   if (!AccountInstrMap.empty()) {
406     auto InstrumentationMapOrError = loadInstrumentationMap(AccountInstrMap);
407     if (!InstrumentationMapOrError)
408       return joinErrors(make_error<StringError>(
409                             Twine("Cannot open instrumentation map '") +
410                                 AccountInstrMap + "'",
411                             std::make_error_code(std::errc::invalid_argument)),
412                         InstrumentationMapOrError.takeError());
413     Map = std::move(*InstrumentationMapOrError);
414   }
415 
416   std::error_code EC;
417   raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::OF_Text);
418   if (EC)
419     return make_error<StringError>(
420         Twine("Cannot open file '") + AccountOutput + "' for writing.", EC);
421 
422   const auto &FunctionAddresses = Map.getFunctionAddresses();
423   symbolize::LLVMSymbolizer Symbolizer;
424   llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer,
425                                                   FunctionAddresses);
426   xray::LatencyAccountant FCA(FuncIdHelper, AccountDeduceSiblingCalls);
427   auto TraceOrErr = loadTraceFile(AccountInput);
428   if (!TraceOrErr)
429     return joinErrors(
430         make_error<StringError>(
431             Twine("Failed loading input file '") + AccountInput + "'",
432             std::make_error_code(std::errc::executable_format_error)),
433         TraceOrErr.takeError());
434 
435   auto &T = *TraceOrErr;
436   for (const auto &Record : T) {
437     if (FCA.accountRecord(Record))
438       continue;
439     errs()
440         << "Error processing record: "
441         << llvm::formatv(
442                R"({{type: {0}; cpu: {1}; record-type: {2}; function-id: {3}; tsc: {4}; thread-id: {5}; process-id: {6}}})",
443                Record.RecordType, Record.CPU, Record.Type, Record.FuncId,
444                Record.TSC, Record.TId, Record.PId)
445         << '\n';
446     for (const auto &ThreadStack : FCA.getPerThreadFunctionStack()) {
447       errs() << "Thread ID: " << ThreadStack.first << "\n";
448       if (ThreadStack.second.empty()) {
449         errs() << "  (empty stack)\n";
450         continue;
451       }
452       auto Level = ThreadStack.second.size();
453       for (const auto &Entry : llvm::reverse(ThreadStack.second))
454         errs() << "  #" << Level-- << "\t"
455                << FuncIdHelper.SymbolOrNumber(Entry.first) << '\n';
456     }
457     if (!AccountKeepGoing)
458       return make_error<StringError>(
459           Twine("Failed accounting function calls in file '") + AccountInput +
460               "'.",
461           std::make_error_code(std::errc::executable_format_error));
462   }
463   switch (AccountOutputFormat) {
464   case AccountOutputFormats::TEXT:
465     FCA.exportStatsAsText(OS, T.getFileHeader());
466     break;
467   case AccountOutputFormats::CSV:
468     FCA.exportStatsAsCSV(OS, T.getFileHeader());
469     break;
470   }
471 
472   return Error::success();
473 });
474