1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements hierarchical time profiler. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/TimeProfiler.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/ADT/STLFunctionalExtras.h" 16 #include "llvm/ADT/StringMap.h" 17 #include "llvm/Support/JSON.h" 18 #include "llvm/Support/Path.h" 19 #include "llvm/Support/Process.h" 20 #include "llvm/Support/Threading.h" 21 #include <algorithm> 22 #include <cassert> 23 #include <chrono> 24 #include <memory> 25 #include <mutex> 26 #include <string> 27 #include <vector> 28 29 using namespace llvm; 30 31 namespace { 32 33 using std::chrono::duration; 34 using std::chrono::duration_cast; 35 using std::chrono::microseconds; 36 using std::chrono::steady_clock; 37 using std::chrono::system_clock; 38 using std::chrono::time_point; 39 using std::chrono::time_point_cast; 40 41 struct TimeTraceProfilerInstances { 42 std::mutex Lock; 43 std::vector<TimeTraceProfiler *> List; 44 }; 45 46 TimeTraceProfilerInstances &getTimeTraceProfilerInstances() { 47 static TimeTraceProfilerInstances Instances; 48 return Instances; 49 } 50 51 } // anonymous namespace 52 53 // Per Thread instance 54 static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; 55 56 TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() { 57 return TimeTraceProfilerInstance; 58 } 59 60 namespace { 61 62 using ClockType = steady_clock; 63 using TimePointType = time_point<ClockType>; 64 using DurationType = duration<ClockType::rep, ClockType::period>; 65 using CountAndDurationType = std::pair<size_t, DurationType>; 66 using NameAndCountAndDurationType = 67 std::pair<std::string, CountAndDurationType>; 68 69 } // anonymous namespace 70 71 /// Represents an open or completed time section entry to be captured. 72 struct llvm::TimeTraceProfilerEntry { 73 const TimePointType Start; 74 TimePointType End; 75 const std::string Name; 76 TimeTraceMetadata Metadata; 77 78 const bool AsyncEvent = false; 79 TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, 80 std::string &&Dt, bool Ae) 81 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), Metadata(), 82 AsyncEvent(Ae) { 83 Metadata.Detail = std::move(Dt); 84 } 85 86 TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, 87 TimeTraceMetadata &&Mt, bool Ae) 88 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), 89 Metadata(std::move(Mt)), AsyncEvent(Ae) {} 90 91 // Calculate timings for FlameGraph. Cast time points to microsecond precision 92 // rather than casting duration. This avoids truncation issues causing inner 93 // scopes overruning outer scopes. 94 ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { 95 return (time_point_cast<microseconds>(Start) - 96 time_point_cast<microseconds>(StartTime)) 97 .count(); 98 } 99 100 ClockType::rep getFlameGraphDurUs() const { 101 return (time_point_cast<microseconds>(End) - 102 time_point_cast<microseconds>(Start)) 103 .count(); 104 } 105 }; 106 107 struct llvm::TimeTraceProfiler { 108 TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "", 109 bool TimeTraceVerbose = false) 110 : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()), 111 ProcName(ProcName), Pid(sys::Process::getProcessId()), 112 Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity), 113 TimeTraceVerbose(TimeTraceVerbose) { 114 llvm::get_thread_name(ThreadName); 115 } 116 117 TimeTraceProfilerEntry *begin(std::string Name, 118 llvm::function_ref<std::string()> Detail, 119 bool AsyncEvent = false) { 120 Stack.emplace_back(std::make_unique<TimeTraceProfilerEntry>( 121 ClockType::now(), TimePointType(), std::move(Name), Detail(), 122 AsyncEvent)); 123 return Stack.back().get(); 124 } 125 126 TimeTraceProfilerEntry * 127 begin(std::string Name, llvm::function_ref<TimeTraceMetadata()> Metadata, 128 bool AsyncEvent = false) { 129 Stack.emplace_back(std::make_unique<TimeTraceProfilerEntry>( 130 ClockType::now(), TimePointType(), std::move(Name), Metadata(), 131 AsyncEvent)); 132 return Stack.back().get(); 133 } 134 135 void end() { 136 assert(!Stack.empty() && "Must call begin() first"); 137 end(*Stack.back()); 138 } 139 140 void end(TimeTraceProfilerEntry &E) { 141 assert(!Stack.empty() && "Must call begin() first"); 142 E.End = ClockType::now(); 143 144 // Calculate duration at full precision for overall counts. 145 DurationType Duration = E.End - E.Start; 146 147 // Only include sections longer or equal to TimeTraceGranularity msec. 148 if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity) 149 Entries.emplace_back(E); 150 151 // Track total time taken by each "name", but only the topmost levels of 152 // them; e.g. if there's a template instantiation that instantiates other 153 // templates from within, we only want to add the topmost one. "topmost" 154 // happens to be the ones that don't have any currently open entries above 155 // itself. 156 if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)), 157 [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) { 158 return Val->Name == E.Name; 159 })) { 160 auto &CountAndTotal = CountAndTotalPerName[E.Name]; 161 CountAndTotal.first++; 162 CountAndTotal.second += Duration; 163 }; 164 165 llvm::erase_if(Stack, 166 [&](const std::unique_ptr<TimeTraceProfilerEntry> &Val) { 167 return Val.get() == &E; 168 }); 169 } 170 171 // Write events from this TimeTraceProfilerInstance and 172 // ThreadTimeTraceProfilerInstances. 173 void write(raw_pwrite_stream &OS) { 174 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. 175 auto &Instances = getTimeTraceProfilerInstances(); 176 std::lock_guard<std::mutex> Lock(Instances.Lock); 177 assert(Stack.empty() && 178 "All profiler sections should be ended when calling write"); 179 assert(llvm::all_of(Instances.List, 180 [](const auto &TTP) { return TTP->Stack.empty(); }) && 181 "All profiler sections should be ended when calling write"); 182 183 json::OStream J(OS); 184 J.objectBegin(); 185 J.attributeBegin("traceEvents"); 186 J.arrayBegin(); 187 188 // Emit all events for the main flame graph. 189 auto writeEvent = [&](const auto &E, uint64_t Tid) { 190 auto StartUs = E.getFlameGraphStartUs(StartTime); 191 auto DurUs = E.getFlameGraphDurUs(); 192 193 J.object([&] { 194 J.attribute("pid", Pid); 195 J.attribute("tid", int64_t(Tid)); 196 J.attribute("ts", StartUs); 197 if (E.AsyncEvent) { 198 J.attribute("cat", E.Name); 199 J.attribute("ph", "b"); 200 J.attribute("id", 0); 201 } else { 202 J.attribute("ph", "X"); 203 J.attribute("dur", DurUs); 204 } 205 J.attribute("name", E.Name); 206 if (!E.Metadata.isEmpty()) { 207 J.attributeObject("args", [&] { 208 if (!E.Metadata.Detail.empty()) 209 J.attribute("detail", E.Metadata.Detail); 210 if (!E.Metadata.File.empty()) 211 J.attribute("file", E.Metadata.File); 212 if (E.Metadata.Line > 0) 213 J.attribute("line", E.Metadata.Line); 214 }); 215 } 216 }); 217 218 if (E.AsyncEvent) { 219 J.object([&] { 220 J.attribute("pid", Pid); 221 J.attribute("tid", int64_t(Tid)); 222 J.attribute("ts", StartUs + DurUs); 223 J.attribute("cat", E.Name); 224 J.attribute("ph", "e"); 225 J.attribute("id", 0); 226 J.attribute("name", E.Name); 227 }); 228 } 229 }; 230 for (const TimeTraceProfilerEntry &E : Entries) 231 writeEvent(E, this->Tid); 232 for (const TimeTraceProfiler *TTP : Instances.List) 233 for (const TimeTraceProfilerEntry &E : TTP->Entries) 234 writeEvent(E, TTP->Tid); 235 236 // Emit totals by section name as additional "thread" events, sorted from 237 // longest one. 238 // Find highest used thread id. 239 uint64_t MaxTid = this->Tid; 240 for (const TimeTraceProfiler *TTP : Instances.List) 241 MaxTid = std::max(MaxTid, TTP->Tid); 242 243 // Combine all CountAndTotalPerName from threads into one. 244 StringMap<CountAndDurationType> AllCountAndTotalPerName; 245 auto combineStat = [&](const auto &Stat) { 246 StringRef Key = Stat.getKey(); 247 auto Value = Stat.getValue(); 248 auto &CountAndTotal = AllCountAndTotalPerName[Key]; 249 CountAndTotal.first += Value.first; 250 CountAndTotal.second += Value.second; 251 }; 252 for (const auto &Stat : CountAndTotalPerName) 253 combineStat(Stat); 254 for (const TimeTraceProfiler *TTP : Instances.List) 255 for (const auto &Stat : TTP->CountAndTotalPerName) 256 combineStat(Stat); 257 258 std::vector<NameAndCountAndDurationType> SortedTotals; 259 SortedTotals.reserve(AllCountAndTotalPerName.size()); 260 for (const auto &Total : AllCountAndTotalPerName) 261 SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue()); 262 263 llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A, 264 const NameAndCountAndDurationType &B) { 265 return A.second.second > B.second.second; 266 }); 267 268 // Report totals on separate threads of tracing file. 269 uint64_t TotalTid = MaxTid + 1; 270 for (const NameAndCountAndDurationType &Total : SortedTotals) { 271 auto DurUs = duration_cast<microseconds>(Total.second.second).count(); 272 auto Count = AllCountAndTotalPerName[Total.first].first; 273 274 J.object([&] { 275 J.attribute("pid", Pid); 276 J.attribute("tid", int64_t(TotalTid)); 277 J.attribute("ph", "X"); 278 J.attribute("ts", 0); 279 J.attribute("dur", DurUs); 280 J.attribute("name", "Total " + Total.first); 281 J.attributeObject("args", [&] { 282 J.attribute("count", int64_t(Count)); 283 J.attribute("avg ms", int64_t(DurUs / Count / 1000)); 284 }); 285 }); 286 287 ++TotalTid; 288 } 289 290 auto writeMetadataEvent = [&](const char *Name, uint64_t Tid, 291 StringRef arg) { 292 J.object([&] { 293 J.attribute("cat", ""); 294 J.attribute("pid", Pid); 295 J.attribute("tid", int64_t(Tid)); 296 J.attribute("ts", 0); 297 J.attribute("ph", "M"); 298 J.attribute("name", Name); 299 J.attributeObject("args", [&] { J.attribute("name", arg); }); 300 }); 301 }; 302 303 writeMetadataEvent("process_name", Tid, ProcName); 304 writeMetadataEvent("thread_name", Tid, ThreadName); 305 for (const TimeTraceProfiler *TTP : Instances.List) 306 writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName); 307 308 J.arrayEnd(); 309 J.attributeEnd(); 310 311 // Emit the absolute time when this TimeProfiler started. 312 // This can be used to combine the profiling data from 313 // multiple processes and preserve actual time intervals. 314 J.attribute("beginningOfTime", 315 time_point_cast<microseconds>(BeginningOfTime) 316 .time_since_epoch() 317 .count()); 318 319 J.objectEnd(); 320 } 321 322 SmallVector<std::unique_ptr<TimeTraceProfilerEntry>, 16> Stack; 323 SmallVector<TimeTraceProfilerEntry, 128> Entries; 324 StringMap<CountAndDurationType> CountAndTotalPerName; 325 // System clock time when the session was begun. 326 const time_point<system_clock> BeginningOfTime; 327 // Profiling clock time when the session was begun. 328 const TimePointType StartTime; 329 const std::string ProcName; 330 const sys::Process::Pid Pid; 331 SmallString<0> ThreadName; 332 const uint64_t Tid; 333 334 // Minimum time granularity (in microseconds) 335 const unsigned TimeTraceGranularity; 336 337 // Make time trace capture verbose event details (e.g. source filenames). This 338 // can increase the size of the output by 2-3 times. 339 const bool TimeTraceVerbose; 340 }; 341 342 bool llvm::isTimeTraceVerbose() { 343 return getTimeTraceProfilerInstance() && 344 getTimeTraceProfilerInstance()->TimeTraceVerbose; 345 } 346 347 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity, 348 StringRef ProcName, 349 bool TimeTraceVerbose) { 350 assert(TimeTraceProfilerInstance == nullptr && 351 "Profiler should not be initialized"); 352 TimeTraceProfilerInstance = new TimeTraceProfiler( 353 TimeTraceGranularity, llvm::sys::path::filename(ProcName), 354 TimeTraceVerbose); 355 } 356 357 // Removes all TimeTraceProfilerInstances. 358 // Called from main thread. 359 void llvm::timeTraceProfilerCleanup() { 360 delete TimeTraceProfilerInstance; 361 TimeTraceProfilerInstance = nullptr; 362 363 auto &Instances = getTimeTraceProfilerInstances(); 364 std::lock_guard<std::mutex> Lock(Instances.Lock); 365 for (auto *TTP : Instances.List) 366 delete TTP; 367 Instances.List.clear(); 368 } 369 370 // Finish TimeTraceProfilerInstance on a worker thread. 371 // This doesn't remove the instance, just moves the pointer to global vector. 372 void llvm::timeTraceProfilerFinishThread() { 373 auto &Instances = getTimeTraceProfilerInstances(); 374 std::lock_guard<std::mutex> Lock(Instances.Lock); 375 Instances.List.push_back(TimeTraceProfilerInstance); 376 TimeTraceProfilerInstance = nullptr; 377 } 378 379 void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) { 380 assert(TimeTraceProfilerInstance != nullptr && 381 "Profiler object can't be null"); 382 TimeTraceProfilerInstance->write(OS); 383 } 384 385 Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName, 386 StringRef FallbackFileName) { 387 assert(TimeTraceProfilerInstance != nullptr && 388 "Profiler object can't be null"); 389 390 std::string Path = PreferredFileName.str(); 391 if (Path.empty()) { 392 Path = FallbackFileName == "-" ? "out" : FallbackFileName.str(); 393 Path += ".time-trace"; 394 } 395 396 std::error_code EC; 397 raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF); 398 if (EC) 399 return createStringError(EC, "Could not open " + Path); 400 401 timeTraceProfilerWrite(OS); 402 return Error::success(); 403 } 404 405 TimeTraceProfilerEntry *llvm::timeTraceProfilerBegin(StringRef Name, 406 StringRef Detail) { 407 if (TimeTraceProfilerInstance != nullptr) 408 return TimeTraceProfilerInstance->begin( 409 std::string(Name), [&]() { return std::string(Detail); }, false); 410 return nullptr; 411 } 412 413 TimeTraceProfilerEntry * 414 llvm::timeTraceProfilerBegin(StringRef Name, 415 llvm::function_ref<std::string()> Detail) { 416 if (TimeTraceProfilerInstance != nullptr) 417 return TimeTraceProfilerInstance->begin(std::string(Name), Detail, false); 418 return nullptr; 419 } 420 421 TimeTraceProfilerEntry * 422 llvm::timeTraceProfilerBegin(StringRef Name, 423 llvm::function_ref<TimeTraceMetadata()> Metadata) { 424 if (TimeTraceProfilerInstance != nullptr) 425 return TimeTraceProfilerInstance->begin(std::string(Name), Metadata, false); 426 return nullptr; 427 } 428 429 TimeTraceProfilerEntry *llvm::timeTraceAsyncProfilerBegin(StringRef Name, 430 StringRef Detail) { 431 if (TimeTraceProfilerInstance != nullptr) 432 return TimeTraceProfilerInstance->begin( 433 std::string(Name), [&]() { return std::string(Detail); }, true); 434 return nullptr; 435 } 436 437 void llvm::timeTraceProfilerEnd() { 438 if (TimeTraceProfilerInstance != nullptr) 439 TimeTraceProfilerInstance->end(); 440 } 441 442 void llvm::timeTraceProfilerEnd(TimeTraceProfilerEntry *E) { 443 if (TimeTraceProfilerInstance != nullptr) 444 TimeTraceProfilerInstance->end(*E); 445 } 446