1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements hierarchical time profiler. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/TimeProfiler.h" 14 #include "llvm/ADT/STLFunctionalExtras.h" 15 #include "llvm/ADT/StringMap.h" 16 #include "llvm/Support/JSON.h" 17 #include "llvm/Support/ManagedStatic.h" 18 #include "llvm/Support/Path.h" 19 #include "llvm/Support/Process.h" 20 #include "llvm/Support/Threading.h" 21 #include <algorithm> 22 #include <cassert> 23 #include <chrono> 24 #include <mutex> 25 #include <string> 26 #include <vector> 27 28 using namespace std::chrono; 29 using namespace llvm; 30 31 static std::mutex Mu; 32 // List of all instances 33 static ManagedStatic<std::vector<TimeTraceProfiler *>> 34 ThreadTimeTraceProfilerInstances; // GUARDED_BY(Mu) 35 // Per Thread instance 36 static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; 37 38 TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() { 39 return TimeTraceProfilerInstance; 40 } 41 42 typedef duration<steady_clock::rep, steady_clock::period> DurationType; 43 typedef time_point<steady_clock> TimePointType; 44 typedef std::pair<size_t, DurationType> CountAndDurationType; 45 typedef std::pair<std::string, CountAndDurationType> 46 NameAndCountAndDurationType; 47 48 namespace { 49 struct Entry { 50 const TimePointType Start; 51 TimePointType End; 52 const std::string Name; 53 const std::string Detail; 54 55 Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt) 56 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), 57 Detail(std::move(Dt)) {} 58 59 // Calculate timings for FlameGraph. Cast time points to microsecond precision 60 // rather than casting duration. This avoid truncation issues causing inner 61 // scopes overruning outer scopes. 62 steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const { 63 return (time_point_cast<microseconds>(Start) - 64 time_point_cast<microseconds>(StartTime)) 65 .count(); 66 } 67 68 steady_clock::rep getFlameGraphDurUs() const { 69 return (time_point_cast<microseconds>(End) - 70 time_point_cast<microseconds>(Start)) 71 .count(); 72 } 73 }; 74 } // namespace 75 76 struct llvm::TimeTraceProfiler { 77 TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "") 78 : BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()), 79 ProcName(ProcName), Pid(sys::Process::getProcessId()), 80 Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) { 81 llvm::get_thread_name(ThreadName); 82 } 83 84 void begin(std::string Name, llvm::function_ref<std::string()> Detail) { 85 Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), 86 Detail()); 87 } 88 89 void end() { 90 assert(!Stack.empty() && "Must call begin() first"); 91 Entry &E = Stack.back(); 92 E.End = steady_clock::now(); 93 94 // Check that end times monotonically increase. 95 assert((Entries.empty() || 96 (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= 97 Entries.back().getFlameGraphStartUs(StartTime) + 98 Entries.back().getFlameGraphDurUs())) && 99 "TimeProfiler scope ended earlier than previous scope"); 100 101 // Calculate duration at full precision for overall counts. 102 DurationType Duration = E.End - E.Start; 103 104 // Only include sections longer or equal to TimeTraceGranularity msec. 105 if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity) 106 Entries.emplace_back(E); 107 108 // Track total time taken by each "name", but only the topmost levels of 109 // them; e.g. if there's a template instantiation that instantiates other 110 // templates from within, we only want to add the topmost one. "topmost" 111 // happens to be the ones that don't have any currently open entries above 112 // itself. 113 if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)), 114 [&](const Entry &Val) { return Val.Name == E.Name; })) { 115 auto &CountAndTotal = CountAndTotalPerName[E.Name]; 116 CountAndTotal.first++; 117 CountAndTotal.second += Duration; 118 } 119 120 Stack.pop_back(); 121 } 122 123 // Write events from this TimeTraceProfilerInstance and 124 // ThreadTimeTraceProfilerInstances. 125 void write(raw_pwrite_stream &OS) { 126 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. 127 std::lock_guard<std::mutex> Lock(Mu); 128 assert(Stack.empty() && 129 "All profiler sections should be ended when calling write"); 130 assert(llvm::all_of(*ThreadTimeTraceProfilerInstances, 131 [](const auto &TTP) { return TTP->Stack.empty(); }) && 132 "All profiler sections should be ended when calling write"); 133 134 json::OStream J(OS); 135 J.objectBegin(); 136 J.attributeBegin("traceEvents"); 137 J.arrayBegin(); 138 139 // Emit all events for the main flame graph. 140 auto writeEvent = [&](const auto &E, uint64_t Tid) { 141 auto StartUs = E.getFlameGraphStartUs(StartTime); 142 auto DurUs = E.getFlameGraphDurUs(); 143 144 J.object([&] { 145 J.attribute("pid", Pid); 146 J.attribute("tid", int64_t(Tid)); 147 J.attribute("ph", "X"); 148 J.attribute("ts", StartUs); 149 J.attribute("dur", DurUs); 150 J.attribute("name", E.Name); 151 if (!E.Detail.empty()) { 152 J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); 153 } 154 }); 155 }; 156 for (const Entry &E : Entries) 157 writeEvent(E, this->Tid); 158 for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances) 159 for (const Entry &E : TTP->Entries) 160 writeEvent(E, TTP->Tid); 161 162 // Emit totals by section name as additional "thread" events, sorted from 163 // longest one. 164 // Find highest used thread id. 165 uint64_t MaxTid = this->Tid; 166 for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances) 167 MaxTid = std::max(MaxTid, TTP->Tid); 168 169 // Combine all CountAndTotalPerName from threads into one. 170 StringMap<CountAndDurationType> AllCountAndTotalPerName; 171 auto combineStat = [&](const auto &Stat) { 172 StringRef Key = Stat.getKey(); 173 auto Value = Stat.getValue(); 174 auto &CountAndTotal = AllCountAndTotalPerName[Key]; 175 CountAndTotal.first += Value.first; 176 CountAndTotal.second += Value.second; 177 }; 178 for (const auto &Stat : CountAndTotalPerName) 179 combineStat(Stat); 180 for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances) 181 for (const auto &Stat : TTP->CountAndTotalPerName) 182 combineStat(Stat); 183 184 std::vector<NameAndCountAndDurationType> SortedTotals; 185 SortedTotals.reserve(AllCountAndTotalPerName.size()); 186 for (const auto &Total : AllCountAndTotalPerName) 187 SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue()); 188 189 llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A, 190 const NameAndCountAndDurationType &B) { 191 return A.second.second > B.second.second; 192 }); 193 194 // Report totals on separate threads of tracing file. 195 uint64_t TotalTid = MaxTid + 1; 196 for (const NameAndCountAndDurationType &Total : SortedTotals) { 197 auto DurUs = duration_cast<microseconds>(Total.second.second).count(); 198 auto Count = AllCountAndTotalPerName[Total.first].first; 199 200 J.object([&] { 201 J.attribute("pid", Pid); 202 J.attribute("tid", int64_t(TotalTid)); 203 J.attribute("ph", "X"); 204 J.attribute("ts", 0); 205 J.attribute("dur", DurUs); 206 J.attribute("name", "Total " + Total.first); 207 J.attributeObject("args", [&] { 208 J.attribute("count", int64_t(Count)); 209 J.attribute("avg ms", int64_t(DurUs / Count / 1000)); 210 }); 211 }); 212 213 ++TotalTid; 214 } 215 216 auto writeMetadataEvent = [&](const char *Name, uint64_t Tid, 217 StringRef arg) { 218 J.object([&] { 219 J.attribute("cat", ""); 220 J.attribute("pid", Pid); 221 J.attribute("tid", int64_t(Tid)); 222 J.attribute("ts", 0); 223 J.attribute("ph", "M"); 224 J.attribute("name", Name); 225 J.attributeObject("args", [&] { J.attribute("name", arg); }); 226 }); 227 }; 228 229 writeMetadataEvent("process_name", Tid, ProcName); 230 writeMetadataEvent("thread_name", Tid, ThreadName); 231 for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances) 232 writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName); 233 234 J.arrayEnd(); 235 J.attributeEnd(); 236 237 // Emit the absolute time when this TimeProfiler started. 238 // This can be used to combine the profiling data from 239 // multiple processes and preserve actual time intervals. 240 J.attribute("beginningOfTime", 241 time_point_cast<microseconds>(BeginningOfTime) 242 .time_since_epoch() 243 .count()); 244 245 J.objectEnd(); 246 } 247 248 SmallVector<Entry, 16> Stack; 249 SmallVector<Entry, 128> Entries; 250 StringMap<CountAndDurationType> CountAndTotalPerName; 251 const time_point<system_clock> BeginningOfTime; 252 const TimePointType StartTime; 253 const std::string ProcName; 254 const sys::Process::Pid Pid; 255 SmallString<0> ThreadName; 256 const uint64_t Tid; 257 258 // Minimum time granularity (in microseconds) 259 const unsigned TimeTraceGranularity; 260 }; 261 262 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity, 263 StringRef ProcName) { 264 assert(TimeTraceProfilerInstance == nullptr && 265 "Profiler should not be initialized"); 266 TimeTraceProfilerInstance = new TimeTraceProfiler( 267 TimeTraceGranularity, llvm::sys::path::filename(ProcName)); 268 } 269 270 // Removes all TimeTraceProfilerInstances. 271 // Called from main thread. 272 void llvm::timeTraceProfilerCleanup() { 273 delete TimeTraceProfilerInstance; 274 TimeTraceProfilerInstance = nullptr; 275 std::lock_guard<std::mutex> Lock(Mu); 276 for (auto *TTP : *ThreadTimeTraceProfilerInstances) 277 delete TTP; 278 ThreadTimeTraceProfilerInstances->clear(); 279 } 280 281 // Finish TimeTraceProfilerInstance on a worker thread. 282 // This doesn't remove the instance, just moves the pointer to global vector. 283 void llvm::timeTraceProfilerFinishThread() { 284 std::lock_guard<std::mutex> Lock(Mu); 285 ThreadTimeTraceProfilerInstances->push_back(TimeTraceProfilerInstance); 286 TimeTraceProfilerInstance = nullptr; 287 } 288 289 void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) { 290 assert(TimeTraceProfilerInstance != nullptr && 291 "Profiler object can't be null"); 292 TimeTraceProfilerInstance->write(OS); 293 } 294 295 Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName, 296 StringRef FallbackFileName) { 297 assert(TimeTraceProfilerInstance != nullptr && 298 "Profiler object can't be null"); 299 300 std::string Path = PreferredFileName.str(); 301 if (Path.empty()) { 302 Path = FallbackFileName == "-" ? "out" : FallbackFileName.str(); 303 Path += ".time-trace"; 304 } 305 306 std::error_code EC; 307 raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF); 308 if (EC) 309 return createStringError(EC, "Could not open " + Path); 310 311 timeTraceProfilerWrite(OS); 312 return Error::success(); 313 } 314 315 void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) { 316 if (TimeTraceProfilerInstance != nullptr) 317 TimeTraceProfilerInstance->begin(std::string(Name), 318 [&]() { return std::string(Detail); }); 319 } 320 321 void llvm::timeTraceProfilerBegin(StringRef Name, 322 llvm::function_ref<std::string()> Detail) { 323 if (TimeTraceProfilerInstance != nullptr) 324 TimeTraceProfilerInstance->begin(std::string(Name), Detail); 325 } 326 327 void llvm::timeTraceProfilerEnd() { 328 if (TimeTraceProfilerInstance != nullptr) 329 TimeTraceProfilerInstance->end(); 330 } 331