1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hierarchical time profiler.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/STLFunctionalExtras.h"
16 #include "llvm/ADT/StringMap.h"
17 #include "llvm/Support/JSON.h"
18 #include "llvm/Support/Path.h"
19 #include "llvm/Support/Process.h"
20 #include "llvm/Support/Threading.h"
21 #include <algorithm>
22 #include <cassert>
23 #include <chrono>
24 #include <memory>
25 #include <mutex>
26 #include <string>
27 #include <vector>
28
29 using namespace llvm;
30
31 namespace {
32
33 using std::chrono::duration;
34 using std::chrono::duration_cast;
35 using std::chrono::microseconds;
36 using std::chrono::steady_clock;
37 using std::chrono::system_clock;
38 using std::chrono::time_point;
39 using std::chrono::time_point_cast;
40
41 struct TimeTraceProfilerInstances {
42 std::mutex Lock;
43 std::vector<TimeTraceProfiler *> List;
44 };
45
getTimeTraceProfilerInstances()46 TimeTraceProfilerInstances &getTimeTraceProfilerInstances() {
47 static TimeTraceProfilerInstances Instances;
48 return Instances;
49 }
50
51 } // anonymous namespace
52
53 // Per Thread instance
54 static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
55
getTimeTraceProfilerInstance()56 TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
57 return TimeTraceProfilerInstance;
58 }
59
60 namespace {
61
62 using ClockType = steady_clock;
63 using TimePointType = time_point<ClockType>;
64 using DurationType = duration<ClockType::rep, ClockType::period>;
65 using CountAndDurationType = std::pair<size_t, DurationType>;
66 using NameAndCountAndDurationType =
67 std::pair<std::string, CountAndDurationType>;
68
69 } // anonymous namespace
70
71 /// Represents an open or completed time section entry to be captured.
72 struct llvm::TimeTraceProfilerEntry {
73 const TimePointType Start;
74 TimePointType End;
75 const std::string Name;
76 TimeTraceMetadata Metadata;
77
78 const bool AsyncEvent = false;
TimeTraceProfilerEntryllvm::TimeTraceProfilerEntry79 TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
80 std::string &&Dt, bool Ae)
81 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Metadata(),
82 AsyncEvent(Ae) {
83 Metadata.Detail = std::move(Dt);
84 }
85
TimeTraceProfilerEntryllvm::TimeTraceProfilerEntry86 TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N,
87 TimeTraceMetadata &&Mt, bool Ae)
88 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
89 Metadata(std::move(Mt)), AsyncEvent(Ae) {}
90
91 // Calculate timings for FlameGraph. Cast time points to microsecond precision
92 // rather than casting duration. This avoids truncation issues causing inner
93 // scopes overruning outer scopes.
getFlameGraphStartUsllvm::TimeTraceProfilerEntry94 ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const {
95 return (time_point_cast<microseconds>(Start) -
96 time_point_cast<microseconds>(StartTime))
97 .count();
98 }
99
getFlameGraphDurUsllvm::TimeTraceProfilerEntry100 ClockType::rep getFlameGraphDurUs() const {
101 return (time_point_cast<microseconds>(End) -
102 time_point_cast<microseconds>(Start))
103 .count();
104 }
105 };
106
107 struct llvm::TimeTraceProfiler {
TimeTraceProfilerllvm::TimeTraceProfiler108 TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "",
109 bool TimeTraceVerbose = false)
110 : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()),
111 ProcName(ProcName), Pid(sys::Process::getProcessId()),
112 Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity),
113 TimeTraceVerbose(TimeTraceVerbose) {
114 llvm::get_thread_name(ThreadName);
115 }
116
beginllvm::TimeTraceProfiler117 TimeTraceProfilerEntry *begin(std::string Name,
118 llvm::function_ref<std::string()> Detail,
119 bool AsyncEvent = false) {
120 Stack.emplace_back(std::make_unique<TimeTraceProfilerEntry>(
121 ClockType::now(), TimePointType(), std::move(Name), Detail(),
122 AsyncEvent));
123 return Stack.back().get();
124 }
125
126 TimeTraceProfilerEntry *
beginllvm::TimeTraceProfiler127 begin(std::string Name, llvm::function_ref<TimeTraceMetadata()> Metadata,
128 bool AsyncEvent = false) {
129 Stack.emplace_back(std::make_unique<TimeTraceProfilerEntry>(
130 ClockType::now(), TimePointType(), std::move(Name), Metadata(),
131 AsyncEvent));
132 return Stack.back().get();
133 }
134
endllvm::TimeTraceProfiler135 void end() {
136 assert(!Stack.empty() && "Must call begin() first");
137 end(*Stack.back());
138 }
139
endllvm::TimeTraceProfiler140 void end(TimeTraceProfilerEntry &E) {
141 assert(!Stack.empty() && "Must call begin() first");
142 E.End = ClockType::now();
143
144 // Calculate duration at full precision for overall counts.
145 DurationType Duration = E.End - E.Start;
146
147 // Only include sections longer or equal to TimeTraceGranularity msec.
148 if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
149 Entries.emplace_back(E);
150
151 // Track total time taken by each "name", but only the topmost levels of
152 // them; e.g. if there's a template instantiation that instantiates other
153 // templates from within, we only want to add the topmost one. "topmost"
154 // happens to be the ones that don't have any currently open entries above
155 // itself.
156 if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
157 [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) {
158 return Val->Name == E.Name;
159 })) {
160 auto &CountAndTotal = CountAndTotalPerName[E.Name];
161 CountAndTotal.first++;
162 CountAndTotal.second += Duration;
163 };
164
165 llvm::erase_if(Stack,
166 [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) {
167 return Val.get() == &E;
168 });
169 }
170
171 // Write events from this TimeTraceProfilerInstance and
172 // ThreadTimeTraceProfilerInstances.
writellvm::TimeTraceProfiler173 void write(raw_pwrite_stream &OS) {
174 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
175 auto &Instances = getTimeTraceProfilerInstances();
176 std::lock_guard<std::mutex> Lock(Instances.Lock);
177 assert(Stack.empty() &&
178 "All profiler sections should be ended when calling write");
179 assert(llvm::all_of(Instances.List,
180 [](const auto &TTP) { return TTP->Stack.empty(); }) &&
181 "All profiler sections should be ended when calling write");
182
183 json::OStream J(OS);
184 J.objectBegin();
185 J.attributeBegin("traceEvents");
186 J.arrayBegin();
187
188 // Emit all events for the main flame graph.
189 auto writeEvent = [&](const auto &E, uint64_t Tid) {
190 auto StartUs = E.getFlameGraphStartUs(StartTime);
191 auto DurUs = E.getFlameGraphDurUs();
192
193 J.object([&] {
194 J.attribute("pid", Pid);
195 J.attribute("tid", int64_t(Tid));
196 J.attribute("ts", StartUs);
197 if (E.AsyncEvent) {
198 J.attribute("cat", E.Name);
199 J.attribute("ph", "b");
200 J.attribute("id", 0);
201 } else {
202 J.attribute("ph", "X");
203 J.attribute("dur", DurUs);
204 }
205 J.attribute("name", E.Name);
206 if (!E.Metadata.isEmpty()) {
207 J.attributeObject("args", [&] {
208 if (!E.Metadata.Detail.empty())
209 J.attribute("detail", E.Metadata.Detail);
210 if (!E.Metadata.File.empty())
211 J.attribute("file", E.Metadata.File);
212 if (E.Metadata.Line > 0)
213 J.attribute("line", E.Metadata.Line);
214 });
215 }
216 });
217
218 if (E.AsyncEvent) {
219 J.object([&] {
220 J.attribute("pid", Pid);
221 J.attribute("tid", int64_t(Tid));
222 J.attribute("ts", StartUs + DurUs);
223 J.attribute("cat", E.Name);
224 J.attribute("ph", "e");
225 J.attribute("id", 0);
226 J.attribute("name", E.Name);
227 });
228 }
229 };
230 for (const TimeTraceProfilerEntry &E : Entries)
231 writeEvent(E, this->Tid);
232 for (const TimeTraceProfiler *TTP : Instances.List)
233 for (const TimeTraceProfilerEntry &E : TTP->Entries)
234 writeEvent(E, TTP->Tid);
235
236 // Emit totals by section name as additional "thread" events, sorted from
237 // longest one.
238 // Find highest used thread id.
239 uint64_t MaxTid = this->Tid;
240 for (const TimeTraceProfiler *TTP : Instances.List)
241 MaxTid = std::max(MaxTid, TTP->Tid);
242
243 // Combine all CountAndTotalPerName from threads into one.
244 StringMap<CountAndDurationType> AllCountAndTotalPerName;
245 auto combineStat = [&](const auto &Stat) {
246 StringRef Key = Stat.getKey();
247 auto Value = Stat.getValue();
248 auto &CountAndTotal = AllCountAndTotalPerName[Key];
249 CountAndTotal.first += Value.first;
250 CountAndTotal.second += Value.second;
251 };
252 for (const auto &Stat : CountAndTotalPerName)
253 combineStat(Stat);
254 for (const TimeTraceProfiler *TTP : Instances.List)
255 for (const auto &Stat : TTP->CountAndTotalPerName)
256 combineStat(Stat);
257
258 std::vector<NameAndCountAndDurationType> SortedTotals;
259 SortedTotals.reserve(AllCountAndTotalPerName.size());
260 for (const auto &Total : AllCountAndTotalPerName)
261 SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
262
263 llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
264 const NameAndCountAndDurationType &B) {
265 return A.second.second > B.second.second;
266 });
267
268 // Report totals on separate threads of tracing file.
269 uint64_t TotalTid = MaxTid + 1;
270 for (const NameAndCountAndDurationType &Total : SortedTotals) {
271 auto DurUs = duration_cast<microseconds>(Total.second.second).count();
272 auto Count = AllCountAndTotalPerName[Total.first].first;
273
274 J.object([&] {
275 J.attribute("pid", Pid);
276 J.attribute("tid", int64_t(TotalTid));
277 J.attribute("ph", "X");
278 J.attribute("ts", 0);
279 J.attribute("dur", DurUs);
280 J.attribute("name", "Total " + Total.first);
281 J.attributeObject("args", [&] {
282 J.attribute("count", int64_t(Count));
283 J.attribute("avg ms", int64_t(DurUs / Count / 1000));
284 });
285 });
286
287 ++TotalTid;
288 }
289
290 auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
291 StringRef arg) {
292 J.object([&] {
293 J.attribute("cat", "");
294 J.attribute("pid", Pid);
295 J.attribute("tid", int64_t(Tid));
296 J.attribute("ts", 0);
297 J.attribute("ph", "M");
298 J.attribute("name", Name);
299 J.attributeObject("args", [&] { J.attribute("name", arg); });
300 });
301 };
302
303 writeMetadataEvent("process_name", Tid, ProcName);
304 writeMetadataEvent("thread_name", Tid, ThreadName);
305 for (const TimeTraceProfiler *TTP : Instances.List)
306 writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
307
308 J.arrayEnd();
309 J.attributeEnd();
310
311 // Emit the absolute time when this TimeProfiler started.
312 // This can be used to combine the profiling data from
313 // multiple processes and preserve actual time intervals.
314 J.attribute("beginningOfTime",
315 time_point_cast<microseconds>(BeginningOfTime)
316 .time_since_epoch()
317 .count());
318
319 J.objectEnd();
320 }
321
322 SmallVector<std::unique_ptr<TimeTraceProfilerEntry>, 16> Stack;
323 SmallVector<TimeTraceProfilerEntry, 128> Entries;
324 StringMap<CountAndDurationType> CountAndTotalPerName;
325 // System clock time when the session was begun.
326 const time_point<system_clock> BeginningOfTime;
327 // Profiling clock time when the session was begun.
328 const TimePointType StartTime;
329 const std::string ProcName;
330 const sys::Process::Pid Pid;
331 SmallString<0> ThreadName;
332 const uint64_t Tid;
333
334 // Minimum time granularity (in microseconds)
335 const unsigned TimeTraceGranularity;
336
337 // Make time trace capture verbose event details (e.g. source filenames). This
338 // can increase the size of the output by 2-3 times.
339 const bool TimeTraceVerbose;
340 };
341
isTimeTraceVerbose()342 bool llvm::isTimeTraceVerbose() {
343 return getTimeTraceProfilerInstance() &&
344 getTimeTraceProfilerInstance()->TimeTraceVerbose;
345 }
346
timeTraceProfilerInitialize(unsigned TimeTraceGranularity,StringRef ProcName,bool TimeTraceVerbose)347 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
348 StringRef ProcName,
349 bool TimeTraceVerbose) {
350 assert(TimeTraceProfilerInstance == nullptr &&
351 "Profiler should not be initialized");
352 TimeTraceProfilerInstance = new TimeTraceProfiler(
353 TimeTraceGranularity, llvm::sys::path::filename(ProcName),
354 TimeTraceVerbose);
355 }
356
357 // Removes all TimeTraceProfilerInstances.
358 // Called from main thread.
timeTraceProfilerCleanup()359 void llvm::timeTraceProfilerCleanup() {
360 delete TimeTraceProfilerInstance;
361 TimeTraceProfilerInstance = nullptr;
362
363 auto &Instances = getTimeTraceProfilerInstances();
364 std::lock_guard<std::mutex> Lock(Instances.Lock);
365 for (auto *TTP : Instances.List)
366 delete TTP;
367 Instances.List.clear();
368 }
369
370 // Finish TimeTraceProfilerInstance on a worker thread.
371 // This doesn't remove the instance, just moves the pointer to global vector.
timeTraceProfilerFinishThread()372 void llvm::timeTraceProfilerFinishThread() {
373 auto &Instances = getTimeTraceProfilerInstances();
374 std::lock_guard<std::mutex> Lock(Instances.Lock);
375 Instances.List.push_back(TimeTraceProfilerInstance);
376 TimeTraceProfilerInstance = nullptr;
377 }
378
timeTraceProfilerWrite(raw_pwrite_stream & OS)379 void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
380 assert(TimeTraceProfilerInstance != nullptr &&
381 "Profiler object can't be null");
382 TimeTraceProfilerInstance->write(OS);
383 }
384
timeTraceProfilerWrite(StringRef PreferredFileName,StringRef FallbackFileName)385 Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
386 StringRef FallbackFileName) {
387 assert(TimeTraceProfilerInstance != nullptr &&
388 "Profiler object can't be null");
389
390 std::string Path = PreferredFileName.str();
391 if (Path.empty()) {
392 Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
393 Path += ".time-trace";
394 }
395
396 std::error_code EC;
397 raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
398 if (EC)
399 return createStringError(EC, "Could not open " + Path);
400
401 timeTraceProfilerWrite(OS);
402 return Error::success();
403 }
404
timeTraceProfilerBegin(StringRef Name,StringRef Detail)405 TimeTraceProfilerEntry *llvm::timeTraceProfilerBegin(StringRef Name,
406 StringRef Detail) {
407 if (TimeTraceProfilerInstance != nullptr)
408 return TimeTraceProfilerInstance->begin(
409 std::string(Name), [&]() { return std::string(Detail); }, false);
410 return nullptr;
411 }
412
413 TimeTraceProfilerEntry *
timeTraceProfilerBegin(StringRef Name,llvm::function_ref<std::string ()> Detail)414 llvm::timeTraceProfilerBegin(StringRef Name,
415 llvm::function_ref<std::string()> Detail) {
416 if (TimeTraceProfilerInstance != nullptr)
417 return TimeTraceProfilerInstance->begin(std::string(Name), Detail, false);
418 return nullptr;
419 }
420
421 TimeTraceProfilerEntry *
timeTraceProfilerBegin(StringRef Name,llvm::function_ref<TimeTraceMetadata ()> Metadata)422 llvm::timeTraceProfilerBegin(StringRef Name,
423 llvm::function_ref<TimeTraceMetadata()> Metadata) {
424 if (TimeTraceProfilerInstance != nullptr)
425 return TimeTraceProfilerInstance->begin(std::string(Name), Metadata, false);
426 return nullptr;
427 }
428
timeTraceAsyncProfilerBegin(StringRef Name,StringRef Detail)429 TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name,
430 StringRef Detail) {
431 if (TimeTraceProfilerInstance != nullptr)
432 return TimeTraceProfilerInstance->begin(
433 std::string(Name), [&]() { return std::string(Detail); }, true);
434 return nullptr;
435 }
436
timeTraceProfilerEnd()437 void llvm::timeTraceProfilerEnd() {
438 if (TimeTraceProfilerInstance != nullptr)
439 TimeTraceProfilerInstance->end();
440 }
441
timeTraceProfilerEnd(TimeTraceProfilerEntry * E)442 void llvm::timeTraceProfilerEnd(TimeTraceProfilerEntry *E) {
443 if (TimeTraceProfilerInstance != nullptr)
444 TimeTraceProfilerInstance->end(*E);
445 }
446