1 //===-- xray_profiling.cpp --------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of XRay, a dynamic runtime instrumentation system. 10 // 11 // This is the implementation of a profiling handler. 12 // 13 //===----------------------------------------------------------------------===// 14 #include <memory> 15 #include <time.h> 16 17 #include "sanitizer_common/sanitizer_atomic.h" 18 #include "sanitizer_common/sanitizer_flags.h" 19 #include "xray/xray_interface.h" 20 #include "xray/xray_log_interface.h" 21 #include "xray_buffer_queue.h" 22 #include "xray_flags.h" 23 #include "xray_profile_collector.h" 24 #include "xray_profiling_flags.h" 25 #include "xray_recursion_guard.h" 26 #include "xray_tsc.h" 27 #include "xray_utils.h" 28 #include <pthread.h> 29 30 namespace __xray { 31 32 namespace { 33 34 static atomic_sint32_t ProfilerLogFlushStatus = { 35 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; 36 37 static atomic_sint32_t ProfilerLogStatus = { 38 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; 39 40 static SpinMutex ProfilerOptionsMutex; 41 42 struct ProfilingData { 43 atomic_uintptr_t Allocators; 44 atomic_uintptr_t FCT; 45 }; 46 47 static pthread_key_t ProfilingKey; 48 49 // We use a global buffer queue, which gets initialized once at initialisation 50 // time, and gets reset when profiling is "done". 51 alignas(BufferQueue) static std::byte BufferQueueStorage[sizeof(BufferQueue)]; 52 static BufferQueue *BQ = nullptr; 53 54 thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers; 55 alignas(FunctionCallTrie::Allocators) thread_local std::byte 56 AllocatorsStorage[sizeof(FunctionCallTrie::Allocators)]; 57 alignas(FunctionCallTrie) thread_local std::byte 58 FunctionCallTrieStorage[sizeof(FunctionCallTrie)]; 59 thread_local ProfilingData TLD{{0}, {0}}; 60 thread_local atomic_uint8_t ReentranceGuard{0}; 61 62 // We use a separate guard for ensuring that for this thread, if we're already 63 // cleaning up, that any signal handlers don't attempt to cleanup nor 64 // initialise. 65 thread_local atomic_uint8_t TLDInitGuard{0}; 66 67 // We also use a separate latch to signal that the thread is exiting, and 68 // non-essential work should be ignored (things like recording events, etc.). 69 thread_local atomic_uint8_t ThreadExitingLatch{0}; 70 71 static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT { 72 thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT { 73 pthread_setspecific(ProfilingKey, &TLD); 74 return false; 75 }(); 76 (void)ThreadOnce; 77 78 RecursionGuard TLDInit(TLDInitGuard); 79 if (!TLDInit) 80 return nullptr; 81 82 if (atomic_load_relaxed(&ThreadExitingLatch)) 83 return nullptr; 84 85 uptr Allocators = 0; 86 if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1, 87 memory_order_acq_rel)) { 88 bool Success = false; 89 auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { 90 if (!Success) 91 atomic_store(&TLD.Allocators, 0, memory_order_release); 92 }); 93 94 // Acquire a set of buffers for this thread. 95 if (BQ == nullptr) 96 return nullptr; 97 98 if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok) 99 return nullptr; 100 auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { 101 if (!Success) 102 BQ->releaseBuffer(ThreadBuffers.NodeBuffer); 103 }); 104 105 if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok) 106 return nullptr; 107 auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { 108 if (!Success) 109 BQ->releaseBuffer(ThreadBuffers.RootsBuffer); 110 }); 111 112 if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) != 113 BufferQueue::ErrorCode::Ok) 114 return nullptr; 115 auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { 116 if (!Success) 117 BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer); 118 }); 119 120 if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) != 121 BufferQueue::ErrorCode::Ok) 122 return nullptr; 123 124 Success = true; 125 new (&AllocatorsStorage) FunctionCallTrie::Allocators( 126 FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers)); 127 Allocators = reinterpret_cast<uptr>( 128 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)); 129 atomic_store(&TLD.Allocators, Allocators, memory_order_release); 130 } 131 132 if (Allocators == 1) 133 return nullptr; 134 135 uptr FCT = 0; 136 if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) { 137 new (&FunctionCallTrieStorage) 138 FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>( 139 atomic_load_relaxed(&TLD.Allocators))); 140 FCT = reinterpret_cast<uptr>( 141 reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)); 142 atomic_store(&TLD.FCT, FCT, memory_order_release); 143 } 144 145 if (FCT == 1) 146 return nullptr; 147 148 return &TLD; 149 } 150 151 static void cleanupTLD() XRAY_NEVER_INSTRUMENT { 152 auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel); 153 if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>( 154 &FunctionCallTrieStorage))) 155 reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie(); 156 157 auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel); 158 if (Allocators == 159 reinterpret_cast<uptr>( 160 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage))) 161 reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators(); 162 } 163 164 static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT { 165 RecursionGuard TLDInit(TLDInitGuard); 166 if (!TLDInit) 167 return; 168 169 uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel); 170 if (P != reinterpret_cast<uptr>( 171 reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage))) 172 return; 173 174 auto FCT = reinterpret_cast<FunctionCallTrie *>(P); 175 DCHECK_NE(FCT, nullptr); 176 177 uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel); 178 if (A != 179 reinterpret_cast<uptr>( 180 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage))) 181 return; 182 183 auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A); 184 DCHECK_NE(Allocators, nullptr); 185 186 // Always move the data into the profile collector. 187 profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators), 188 std::move(ThreadBuffers), GetTid()); 189 190 // Re-initialize the ThreadBuffers object to a known "default" state. 191 ThreadBuffers = FunctionCallTrie::Allocators::Buffers{}; 192 } 193 194 } // namespace 195 196 const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { 197 #ifdef XRAY_PROFILER_DEFAULT_OPTIONS 198 return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS); 199 #else 200 return ""; 201 #endif 202 } 203 204 XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { 205 if (atomic_load(&ProfilerLogStatus, memory_order_acquire) != 206 XRayLogInitStatus::XRAY_LOG_FINALIZED) { 207 if (Verbosity()) 208 Report("Not flushing profiles, profiling not been finalized.\n"); 209 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 210 } 211 212 RecursionGuard SignalGuard(ReentranceGuard); 213 if (!SignalGuard) { 214 if (Verbosity()) 215 Report("Cannot finalize properly inside a signal handler!\n"); 216 atomic_store(&ProfilerLogFlushStatus, 217 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, 218 memory_order_release); 219 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 220 } 221 222 s32 Previous = atomic_exchange(&ProfilerLogFlushStatus, 223 XRayLogFlushStatus::XRAY_LOG_FLUSHING, 224 memory_order_acq_rel); 225 if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) { 226 if (Verbosity()) 227 Report("Not flushing profiles, implementation still flushing.\n"); 228 return XRayLogFlushStatus::XRAY_LOG_FLUSHING; 229 } 230 231 // At this point, we'll create the file that will contain the profile, but 232 // only if the options say so. 233 if (!profilingFlags()->no_flush) { 234 // First check whether we have data in the profile collector service 235 // before we try and write anything down. 236 XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0}); 237 if (B.Data == nullptr) { 238 if (Verbosity()) 239 Report("profiling: No data to flush.\n"); 240 } else { 241 LogWriter *LW = LogWriter::Open(); 242 if (LW == nullptr) { 243 if (Verbosity()) 244 Report("profiling: Failed to flush to file, dropping data.\n"); 245 } else { 246 // Now for each of the buffers, write out the profile data as we would 247 // see it in memory, verbatim. 248 while (B.Data != nullptr && B.Size != 0) { 249 LW->WriteAll(reinterpret_cast<const char *>(B.Data), 250 reinterpret_cast<const char *>(B.Data) + B.Size); 251 B = profileCollectorService::nextBuffer(B); 252 } 253 LogWriter::Close(LW); 254 } 255 } 256 } 257 258 profileCollectorService::reset(); 259 260 atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, 261 memory_order_release); 262 atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 263 memory_order_release); 264 265 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 266 } 267 268 void profilingHandleArg0(int32_t FuncId, 269 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { 270 unsigned char CPU; 271 auto TSC = readTSC(CPU); 272 RecursionGuard G(ReentranceGuard); 273 if (!G) 274 return; 275 276 auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); 277 if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED || 278 Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING)) 279 return; 280 281 if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED || 282 Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) { 283 postCurrentThreadFCT(TLD); 284 return; 285 } 286 287 auto T = getThreadLocalData(); 288 if (T == nullptr) 289 return; 290 291 auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT)); 292 switch (Entry) { 293 case XRayEntryType::ENTRY: 294 case XRayEntryType::LOG_ARGS_ENTRY: 295 FCT->enterFunction(FuncId, TSC, CPU); 296 break; 297 case XRayEntryType::EXIT: 298 case XRayEntryType::TAIL: 299 FCT->exitFunction(FuncId, TSC, CPU); 300 break; 301 default: 302 // FIXME: Handle bugs. 303 break; 304 } 305 } 306 307 void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry, 308 uint64_t) XRAY_NEVER_INSTRUMENT { 309 return profilingHandleArg0(FuncId, Entry); 310 } 311 312 XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { 313 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; 314 if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, 315 XRayLogInitStatus::XRAY_LOG_FINALIZING, 316 memory_order_release)) { 317 if (Verbosity()) 318 Report("Cannot finalize profile, the profiling is not initialized.\n"); 319 return static_cast<XRayLogInitStatus>(CurrentStatus); 320 } 321 322 // Mark then finalize the current generation of buffers. This allows us to let 323 // the threads currently holding onto new buffers still use them, but let the 324 // last reference do the memory cleanup. 325 DCHECK_NE(BQ, nullptr); 326 BQ->finalize(); 327 328 // Wait a grace period to allow threads to see that we're finalizing. 329 SleepForMillis(profilingFlags()->grace_period_ms); 330 331 // If we for some reason are entering this function from an instrumented 332 // handler, we bail out. 333 RecursionGuard G(ReentranceGuard); 334 if (!G) 335 return static_cast<XRayLogInitStatus>(CurrentStatus); 336 337 // Post the current thread's data if we have any. 338 postCurrentThreadFCT(TLD); 339 340 // Then we force serialize the log data. 341 profileCollectorService::serialize(); 342 343 atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, 344 memory_order_release); 345 return XRayLogInitStatus::XRAY_LOG_FINALIZED; 346 } 347 348 XRayLogInitStatus 349 profilingLoggingInit(size_t, size_t, void *Options, 350 size_t OptionsSize) XRAY_NEVER_INSTRUMENT { 351 RecursionGuard G(ReentranceGuard); 352 if (!G) 353 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 354 355 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 356 if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, 357 XRayLogInitStatus::XRAY_LOG_INITIALIZING, 358 memory_order_acq_rel)) { 359 if (Verbosity()) 360 Report("Cannot initialize already initialised profiling " 361 "implementation.\n"); 362 return static_cast<XRayLogInitStatus>(CurrentStatus); 363 } 364 365 { 366 SpinMutexLock Lock(&ProfilerOptionsMutex); 367 FlagParser ConfigParser; 368 ProfilerFlags Flags; 369 Flags.setDefaults(); 370 registerProfilerFlags(&ConfigParser, &Flags); 371 ConfigParser.ParseString(profilingCompilerDefinedFlags()); 372 const char *Env = GetEnv("XRAY_PROFILING_OPTIONS"); 373 if (Env == nullptr) 374 Env = ""; 375 ConfigParser.ParseString(Env); 376 377 // Then parse the configuration string provided. 378 ConfigParser.ParseString(static_cast<const char *>(Options)); 379 if (Verbosity()) 380 ReportUnrecognizedFlags(); 381 *profilingFlags() = Flags; 382 } 383 384 // We need to reset the profile data collection implementation now. 385 profileCollectorService::reset(); 386 387 // Then also reset the buffer queue implementation. 388 if (BQ == nullptr) { 389 bool Success = false; 390 new (&BufferQueueStorage) 391 BufferQueue(profilingFlags()->per_thread_allocator_max, 392 profilingFlags()->buffers_max, Success); 393 if (!Success) { 394 if (Verbosity()) 395 Report("Failed to initialize preallocated memory buffers!"); 396 atomic_store(&ProfilerLogStatus, 397 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 398 memory_order_release); 399 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 400 } 401 402 // If we've succeeded, set the global pointer to the initialised storage. 403 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); 404 } else { 405 BQ->finalize(); 406 auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max, 407 profilingFlags()->buffers_max); 408 409 if (InitStatus != BufferQueue::ErrorCode::Ok) { 410 if (Verbosity()) 411 Report("Failed to initialize preallocated memory buffers; error: %s", 412 BufferQueue::getErrorString(InitStatus)); 413 atomic_store(&ProfilerLogStatus, 414 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 415 memory_order_release); 416 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 417 } 418 419 DCHECK(!BQ->finalizing()); 420 } 421 422 // We need to set up the exit handlers. 423 static pthread_once_t Once = PTHREAD_ONCE_INIT; 424 pthread_once( 425 &Once, +[] { 426 pthread_key_create( 427 &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT { 428 if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) 429 return; 430 431 if (P == nullptr) 432 return; 433 434 auto T = reinterpret_cast<ProfilingData *>(P); 435 if (atomic_load_relaxed(&T->Allocators) == 0) 436 return; 437 438 { 439 // If we're somehow executing this while inside a 440 // non-reentrant-friendly context, we skip attempting to post 441 // the current thread's data. 442 RecursionGuard G(ReentranceGuard); 443 if (!G) 444 return; 445 446 postCurrentThreadFCT(*T); 447 } 448 }); 449 450 // We also need to set up an exit handler, so that we can get the 451 // profile information at exit time. We use the C API to do this, to not 452 // rely on C++ ABI functions for registering exit handlers. 453 Atexit(+[]() XRAY_NEVER_INSTRUMENT { 454 if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) 455 return; 456 457 auto Cleanup = 458 at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); }); 459 460 // Finalize and flush. 461 if (profilingFinalize() != XRAY_LOG_FINALIZED || 462 profilingFlush() != XRAY_LOG_FLUSHED) 463 return; 464 465 if (Verbosity()) 466 Report("XRay Profile flushed at exit."); 467 }); 468 }); 469 470 __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer); 471 __xray_set_handler(profilingHandleArg0); 472 __xray_set_handler_arg1(profilingHandleArg1); 473 474 atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, 475 memory_order_release); 476 if (Verbosity()) 477 Report("XRay Profiling init successful.\n"); 478 479 return XRayLogInitStatus::XRAY_LOG_INITIALIZED; 480 } 481 482 bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT { 483 // Set up the flag defaults from the static defaults and the 484 // compiler-provided defaults. 485 { 486 SpinMutexLock Lock(&ProfilerOptionsMutex); 487 auto *F = profilingFlags(); 488 F->setDefaults(); 489 FlagParser ProfilingParser; 490 registerProfilerFlags(&ProfilingParser, F); 491 ProfilingParser.ParseString(profilingCompilerDefinedFlags()); 492 } 493 494 XRayLogImpl Impl{ 495 profilingLoggingInit, 496 profilingFinalize, 497 profilingHandleArg0, 498 profilingFlush, 499 }; 500 auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl); 501 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { 502 if (Verbosity()) 503 Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = " 504 "%d\n", 505 RegistrationResult); 506 return false; 507 } 508 509 if (!internal_strcmp(flags()->xray_mode, "xray-profiling")) 510 __xray_log_select_mode("xray_profiling"); 511 return true; 512 } 513 514 } // namespace __xray 515 516 static auto UNUSED Unused = __xray::profilingDynamicInitializer(); 517