xref: /freebsd/contrib/llvm-project/llvm/lib/Support/TimeProfiler.cpp (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hierarchical time profiler.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/STLFunctionalExtras.h"
16 #include "llvm/ADT/StringMap.h"
17 #include "llvm/Support/JSON.h"
18 #include "llvm/Support/Path.h"
19 #include "llvm/Support/Process.h"
20 #include "llvm/Support/Threading.h"
21 #include <algorithm>
22 #include <cassert>
23 #include <chrono>
24 #include <memory>
25 #include <mutex>
26 #include <string>
27 #include <vector>
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 using std::chrono::duration;
34 using std::chrono::duration_cast;
35 using std::chrono::microseconds;
36 using std::chrono::steady_clock;
37 using std::chrono::system_clock;
38 using std::chrono::time_point;
39 using std::chrono::time_point_cast;
40 
41 struct TimeTraceProfilerInstances {
42   std::mutex Lock;
43   std::vector<TimeTraceProfiler *> List;
44 };
45 
46 TimeTraceProfilerInstances &getTimeTraceProfilerInstances() {
47   static TimeTraceProfilerInstances Instances;
48   return Instances;
49 }
50 
51 } // anonymous namespace
52 
53 // Per Thread instance
54 static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
55 
56 TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
57   return TimeTraceProfilerInstance;
58 }
59 
60 namespace {
61 
62 using ClockType = steady_clock;
63 using TimePointType = time_point<ClockType>;
64 using DurationType = duration<ClockType::rep, ClockType::period>;
65 using CountAndDurationType = std::pair<size_t, DurationType>;
66 using NameAndCountAndDurationType =
67     std::pair<std::string, CountAndDurationType>;
68 
69 } // anonymous namespace
70 
71 /// Represents an open or completed time section entry to be captured.
72 struct llvm::TimeTraceProfilerEntry {
73   const TimePointType Start;
74   TimePointType End;
75   const std::string Name;
76   TimeTraceMetadata Metadata;
77 
78   const bool AsyncEvent = false;
79   TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
80                          std::string &&Dt, bool Ae)
81       : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Metadata(),
82         AsyncEvent(Ae) {
83     Metadata.Detail = std::move(Dt);
84   }
85 
86   TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
87                          TimeTraceMetadata &&Mt, bool Ae)
88       : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
89         Metadata(std::move(Mt)), AsyncEvent(Ae) {}
90 
91   // Calculate timings for FlameGraph. Cast time points to microsecond precision
92   // rather than casting duration. This avoids truncation issues causing inner
93   // scopes overruning outer scopes.
94   ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const {
95     return (time_point_cast<microseconds>(Start) -
96             time_point_cast<microseconds>(StartTime))
97         .count();
98   }
99 
100   ClockType::rep getFlameGraphDurUs() const {
101     return (time_point_cast<microseconds>(End) -
102             time_point_cast<microseconds>(Start))
103         .count();
104   }
105 };
106 
107 struct llvm::TimeTraceProfiler {
108   TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "",
109                     bool TimeTraceVerbose = false)
110       : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()),
111         ProcName(ProcName), Pid(sys::Process::getProcessId()),
112         Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity),
113         TimeTraceVerbose(TimeTraceVerbose) {
114     llvm::get_thread_name(ThreadName);
115   }
116 
117   TimeTraceProfilerEntry *begin(std::string Name,
118                                 llvm::function_ref<std::string()> Detail,
119                                 bool AsyncEvent = false) {
120     Stack.emplace_back(std::make_unique<TimeTraceProfilerEntry>(
121         ClockType::now(), TimePointType(), std::move(Name), Detail(),
122         AsyncEvent));
123     return Stack.back().get();
124   }
125 
126   TimeTraceProfilerEntry *
127   begin(std::string Name, llvm::function_ref<TimeTraceMetadata()> Metadata,
128         bool AsyncEvent = false) {
129     Stack.emplace_back(std::make_unique<TimeTraceProfilerEntry>(
130         ClockType::now(), TimePointType(), std::move(Name), Metadata(),
131         AsyncEvent));
132     return Stack.back().get();
133   }
134 
135   void end() {
136     assert(!Stack.empty() && "Must call begin() first");
137     end(*Stack.back());
138   }
139 
140   void end(TimeTraceProfilerEntry &E) {
141     assert(!Stack.empty() && "Must call begin() first");
142     E.End = ClockType::now();
143 
144     // Calculate duration at full precision for overall counts.
145     DurationType Duration = E.End - E.Start;
146 
147     // Only include sections longer or equal to TimeTraceGranularity msec.
148     if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
149       Entries.emplace_back(E);
150 
151     // Track total time taken by each "name", but only the topmost levels of
152     // them; e.g. if there's a template instantiation that instantiates other
153     // templates from within, we only want to add the topmost one. "topmost"
154     // happens to be the ones that don't have any currently open entries above
155     // itself.
156     if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
157                       [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) {
158                         return Val->Name == E.Name;
159                       })) {
160       auto &CountAndTotal = CountAndTotalPerName[E.Name];
161       CountAndTotal.first++;
162       CountAndTotal.second += Duration;
163     };
164 
165     llvm::erase_if(Stack,
166                    [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) {
167                      return Val.get() == &E;
168                    });
169   }
170 
171   // Write events from this TimeTraceProfilerInstance and
172   // ThreadTimeTraceProfilerInstances.
173   void write(raw_pwrite_stream &OS) {
174     // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
175     auto &Instances = getTimeTraceProfilerInstances();
176     std::lock_guard<std::mutex> Lock(Instances.Lock);
177     assert(Stack.empty() &&
178            "All profiler sections should be ended when calling write");
179     assert(llvm::all_of(Instances.List,
180                         [](const auto &TTP) { return TTP->Stack.empty(); }) &&
181            "All profiler sections should be ended when calling write");
182 
183     json::OStream J(OS);
184     J.objectBegin();
185     J.attributeBegin("traceEvents");
186     J.arrayBegin();
187 
188     // Emit all events for the main flame graph.
189     auto writeEvent = [&](const auto &E, uint64_t Tid) {
190       auto StartUs = E.getFlameGraphStartUs(StartTime);
191       auto DurUs = E.getFlameGraphDurUs();
192 
193       J.object([&] {
194         J.attribute("pid", Pid);
195         J.attribute("tid", int64_t(Tid));
196         J.attribute("ts", StartUs);
197         if (E.AsyncEvent) {
198           J.attribute("cat", E.Name);
199           J.attribute("ph", "b");
200           J.attribute("id", 0);
201         } else {
202           J.attribute("ph", "X");
203           J.attribute("dur", DurUs);
204         }
205         J.attribute("name", E.Name);
206         if (!E.Metadata.isEmpty()) {
207           J.attributeObject("args", [&] {
208             if (!E.Metadata.Detail.empty())
209               J.attribute("detail", E.Metadata.Detail);
210             if (!E.Metadata.File.empty())
211               J.attribute("file", E.Metadata.File);
212             if (E.Metadata.Line > 0)
213               J.attribute("line", E.Metadata.Line);
214           });
215         }
216       });
217 
218       if (E.AsyncEvent) {
219         J.object([&] {
220           J.attribute("pid", Pid);
221           J.attribute("tid", int64_t(Tid));
222           J.attribute("ts", StartUs + DurUs);
223           J.attribute("cat", E.Name);
224           J.attribute("ph", "e");
225           J.attribute("id", 0);
226           J.attribute("name", E.Name);
227         });
228       }
229     };
230     for (const TimeTraceProfilerEntry &E : Entries)
231       writeEvent(E, this->Tid);
232     for (const TimeTraceProfiler *TTP : Instances.List)
233       for (const TimeTraceProfilerEntry &E : TTP->Entries)
234         writeEvent(E, TTP->Tid);
235 
236     // Emit totals by section name as additional "thread" events, sorted from
237     // longest one.
238     // Find highest used thread id.
239     uint64_t MaxTid = this->Tid;
240     for (const TimeTraceProfiler *TTP : Instances.List)
241       MaxTid = std::max(MaxTid, TTP->Tid);
242 
243     // Combine all CountAndTotalPerName from threads into one.
244     StringMap<CountAndDurationType> AllCountAndTotalPerName;
245     auto combineStat = [&](const auto &Stat) {
246       StringRef Key = Stat.getKey();
247       auto Value = Stat.getValue();
248       auto &CountAndTotal = AllCountAndTotalPerName[Key];
249       CountAndTotal.first += Value.first;
250       CountAndTotal.second += Value.second;
251     };
252     for (const auto &Stat : CountAndTotalPerName)
253       combineStat(Stat);
254     for (const TimeTraceProfiler *TTP : Instances.List)
255       for (const auto &Stat : TTP->CountAndTotalPerName)
256         combineStat(Stat);
257 
258     std::vector<NameAndCountAndDurationType> SortedTotals;
259     SortedTotals.reserve(AllCountAndTotalPerName.size());
260     for (const auto &Total : AllCountAndTotalPerName)
261       SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
262 
263     llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
264                                 const NameAndCountAndDurationType &B) {
265       return A.second.second > B.second.second;
266     });
267 
268     // Report totals on separate threads of tracing file.
269     uint64_t TotalTid = MaxTid + 1;
270     for (const NameAndCountAndDurationType &Total : SortedTotals) {
271       auto DurUs = duration_cast<microseconds>(Total.second.second).count();
272       auto Count = AllCountAndTotalPerName[Total.first].first;
273 
274       J.object([&] {
275         J.attribute("pid", Pid);
276         J.attribute("tid", int64_t(TotalTid));
277         J.attribute("ph", "X");
278         J.attribute("ts", 0);
279         J.attribute("dur", DurUs);
280         J.attribute("name", "Total " + Total.first);
281         J.attributeObject("args", [&] {
282           J.attribute("count", int64_t(Count));
283           J.attribute("avg ms", int64_t(DurUs / Count / 1000));
284         });
285       });
286 
287       ++TotalTid;
288     }
289 
290     auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
291                                   StringRef arg) {
292       J.object([&] {
293         J.attribute("cat", "");
294         J.attribute("pid", Pid);
295         J.attribute("tid", int64_t(Tid));
296         J.attribute("ts", 0);
297         J.attribute("ph", "M");
298         J.attribute("name", Name);
299         J.attributeObject("args", [&] { J.attribute("name", arg); });
300       });
301     };
302 
303     writeMetadataEvent("process_name", Tid, ProcName);
304     writeMetadataEvent("thread_name", Tid, ThreadName);
305     for (const TimeTraceProfiler *TTP : Instances.List)
306       writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
307 
308     J.arrayEnd();
309     J.attributeEnd();
310 
311     // Emit the absolute time when this TimeProfiler started.
312     // This can be used to combine the profiling data from
313     // multiple processes and preserve actual time intervals.
314     J.attribute("beginningOfTime",
315                 time_point_cast<microseconds>(BeginningOfTime)
316                     .time_since_epoch()
317                     .count());
318 
319     J.objectEnd();
320   }
321 
322   SmallVector<std::unique_ptr<TimeTraceProfilerEntry>, 16> Stack;
323   SmallVector<TimeTraceProfilerEntry, 128> Entries;
324   StringMap<CountAndDurationType> CountAndTotalPerName;
325   // System clock time when the session was begun.
326   const time_point<system_clock> BeginningOfTime;
327   // Profiling clock time when the session was begun.
328   const TimePointType StartTime;
329   const std::string ProcName;
330   const sys::Process::Pid Pid;
331   SmallString<0> ThreadName;
332   const uint64_t Tid;
333 
334   // Minimum time granularity (in microseconds)
335   const unsigned TimeTraceGranularity;
336 
337   // Make time trace capture verbose event details (e.g. source filenames). This
338   // can increase the size of the output by 2-3 times.
339   const bool TimeTraceVerbose;
340 };
341 
342 bool llvm::isTimeTraceVerbose() {
343   return getTimeTraceProfilerInstance() &&
344          getTimeTraceProfilerInstance()->TimeTraceVerbose;
345 }
346 
347 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
348                                        StringRef ProcName,
349                                        bool TimeTraceVerbose) {
350   assert(TimeTraceProfilerInstance == nullptr &&
351          "Profiler should not be initialized");
352   TimeTraceProfilerInstance = new TimeTraceProfiler(
353       TimeTraceGranularity, llvm::sys::path::filename(ProcName),
354       TimeTraceVerbose);
355 }
356 
357 // Removes all TimeTraceProfilerInstances.
358 // Called from main thread.
359 void llvm::timeTraceProfilerCleanup() {
360   delete TimeTraceProfilerInstance;
361   TimeTraceProfilerInstance = nullptr;
362 
363   auto &Instances = getTimeTraceProfilerInstances();
364   std::lock_guard<std::mutex> Lock(Instances.Lock);
365   for (auto *TTP : Instances.List)
366     delete TTP;
367   Instances.List.clear();
368 }
369 
370 // Finish TimeTraceProfilerInstance on a worker thread.
371 // This doesn't remove the instance, just moves the pointer to global vector.
372 void llvm::timeTraceProfilerFinishThread() {
373   auto &Instances = getTimeTraceProfilerInstances();
374   std::lock_guard<std::mutex> Lock(Instances.Lock);
375   Instances.List.push_back(TimeTraceProfilerInstance);
376   TimeTraceProfilerInstance = nullptr;
377 }
378 
379 void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
380   assert(TimeTraceProfilerInstance != nullptr &&
381          "Profiler object can't be null");
382   TimeTraceProfilerInstance->write(OS);
383 }
384 
385 Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
386                                    StringRef FallbackFileName) {
387   assert(TimeTraceProfilerInstance != nullptr &&
388          "Profiler object can't be null");
389 
390   std::string Path = PreferredFileName.str();
391   if (Path.empty()) {
392     Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
393     Path += ".time-trace";
394   }
395 
396   std::error_code EC;
397   raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
398   if (EC)
399     return createStringError(EC, "Could not open " + Path);
400 
401   timeTraceProfilerWrite(OS);
402   return Error::success();
403 }
404 
405 TimeTraceProfilerEntry *llvm::timeTraceProfilerBegin(StringRef Name,
406                                                      StringRef Detail) {
407   if (TimeTraceProfilerInstance != nullptr)
408     return TimeTraceProfilerInstance->begin(
409         std::string(Name), [&]() { return std::string(Detail); }, false);
410   return nullptr;
411 }
412 
413 TimeTraceProfilerEntry *
414 llvm::timeTraceProfilerBegin(StringRef Name,
415                              llvm::function_ref<std::string()> Detail) {
416   if (TimeTraceProfilerInstance != nullptr)
417     return TimeTraceProfilerInstance->begin(std::string(Name), Detail, false);
418   return nullptr;
419 }
420 
421 TimeTraceProfilerEntry *
422 llvm::timeTraceProfilerBegin(StringRef Name,
423                              llvm::function_ref<TimeTraceMetadata()> Metadata) {
424   if (TimeTraceProfilerInstance != nullptr)
425     return TimeTraceProfilerInstance->begin(std::string(Name), Metadata, false);
426   return nullptr;
427 }
428 
429 TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name,
430                                                           StringRef Detail) {
431   if (TimeTraceProfilerInstance != nullptr)
432     return TimeTraceProfilerInstance->begin(
433         std::string(Name), [&]() { return std::string(Detail); }, true);
434   return nullptr;
435 }
436 
437 void llvm::timeTraceProfilerEnd() {
438   if (TimeTraceProfilerInstance != nullptr)
439     TimeTraceProfilerInstance->end();
440 }
441 
442 void llvm::timeTraceProfilerEnd(TimeTraceProfilerEntry *E) {
443   if (TimeTraceProfilerInstance != nullptr)
444     TimeTraceProfilerInstance->end(*E);
445 }
446