1 //===-- xray_profiling.cpp --------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of XRay, a dynamic runtime instrumentation system. 10 // 11 // This is the implementation of a profiling handler. 12 // 13 //===----------------------------------------------------------------------===// 14 #include <memory> 15 #include <time.h> 16 17 #include "sanitizer_common/sanitizer_atomic.h" 18 #include "sanitizer_common/sanitizer_flags.h" 19 #include "xray/xray_interface.h" 20 #include "xray/xray_log_interface.h" 21 #include "xray_buffer_queue.h" 22 #include "xray_flags.h" 23 #include "xray_profile_collector.h" 24 #include "xray_profiling_flags.h" 25 #include "xray_recursion_guard.h" 26 #include "xray_tsc.h" 27 #include "xray_utils.h" 28 #include <pthread.h> 29 30 namespace __xray { 31 32 namespace { 33 34 static atomic_sint32_t ProfilerLogFlushStatus = { 35 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; 36 37 static atomic_sint32_t ProfilerLogStatus = { 38 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; 39 40 static SpinMutex ProfilerOptionsMutex; 41 42 struct ProfilingData { 43 atomic_uintptr_t Allocators; 44 atomic_uintptr_t FCT; 45 }; 46 47 static pthread_key_t ProfilingKey; 48 49 // We use a global buffer queue, which gets initialized once at initialisation 50 // time, and gets reset when profiling is "done". 51 static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type 52 BufferQueueStorage; 53 static BufferQueue *BQ = nullptr; 54 55 thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers; 56 thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators), 57 alignof(FunctionCallTrie::Allocators)>::type 58 AllocatorsStorage; 59 thread_local std::aligned_storage<sizeof(FunctionCallTrie), 60 alignof(FunctionCallTrie)>::type 61 FunctionCallTrieStorage; 62 thread_local ProfilingData TLD{{0}, {0}}; 63 thread_local atomic_uint8_t ReentranceGuard{0}; 64 65 // We use a separate guard for ensuring that for this thread, if we're already 66 // cleaning up, that any signal handlers don't attempt to cleanup nor 67 // initialise. 68 thread_local atomic_uint8_t TLDInitGuard{0}; 69 70 // We also use a separate latch to signal that the thread is exiting, and 71 // non-essential work should be ignored (things like recording events, etc.). 72 thread_local atomic_uint8_t ThreadExitingLatch{0}; 73 74 static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT { 75 thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT { 76 pthread_setspecific(ProfilingKey, &TLD); 77 return false; 78 }(); 79 (void)ThreadOnce; 80 81 RecursionGuard TLDInit(TLDInitGuard); 82 if (!TLDInit) 83 return nullptr; 84 85 if (atomic_load_relaxed(&ThreadExitingLatch)) 86 return nullptr; 87 88 uptr Allocators = 0; 89 if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1, 90 memory_order_acq_rel)) { 91 bool Success = false; 92 auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { 93 if (!Success) 94 atomic_store(&TLD.Allocators, 0, memory_order_release); 95 }); 96 97 // Acquire a set of buffers for this thread. 98 if (BQ == nullptr) 99 return nullptr; 100 101 if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok) 102 return nullptr; 103 auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { 104 if (!Success) 105 BQ->releaseBuffer(ThreadBuffers.NodeBuffer); 106 }); 107 108 if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok) 109 return nullptr; 110 auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { 111 if (!Success) 112 BQ->releaseBuffer(ThreadBuffers.RootsBuffer); 113 }); 114 115 if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) != 116 BufferQueue::ErrorCode::Ok) 117 return nullptr; 118 auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT { 119 if (!Success) 120 BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer); 121 }); 122 123 if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) != 124 BufferQueue::ErrorCode::Ok) 125 return nullptr; 126 127 Success = true; 128 new (&AllocatorsStorage) FunctionCallTrie::Allocators( 129 FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers)); 130 Allocators = reinterpret_cast<uptr>( 131 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)); 132 atomic_store(&TLD.Allocators, Allocators, memory_order_release); 133 } 134 135 if (Allocators == 1) 136 return nullptr; 137 138 uptr FCT = 0; 139 if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) { 140 new (&FunctionCallTrieStorage) 141 FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>( 142 atomic_load_relaxed(&TLD.Allocators))); 143 FCT = reinterpret_cast<uptr>( 144 reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)); 145 atomic_store(&TLD.FCT, FCT, memory_order_release); 146 } 147 148 if (FCT == 1) 149 return nullptr; 150 151 return &TLD; 152 } 153 154 static void cleanupTLD() XRAY_NEVER_INSTRUMENT { 155 auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel); 156 if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>( 157 &FunctionCallTrieStorage))) 158 reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie(); 159 160 auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel); 161 if (Allocators == 162 reinterpret_cast<uptr>( 163 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage))) 164 reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators(); 165 } 166 167 static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT { 168 RecursionGuard TLDInit(TLDInitGuard); 169 if (!TLDInit) 170 return; 171 172 uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel); 173 if (P != reinterpret_cast<uptr>( 174 reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage))) 175 return; 176 177 auto FCT = reinterpret_cast<FunctionCallTrie *>(P); 178 DCHECK_NE(FCT, nullptr); 179 180 uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel); 181 if (A != 182 reinterpret_cast<uptr>( 183 reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage))) 184 return; 185 186 auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A); 187 DCHECK_NE(Allocators, nullptr); 188 189 // Always move the data into the profile collector. 190 profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators), 191 std::move(ThreadBuffers), GetTid()); 192 193 // Re-initialize the ThreadBuffers object to a known "default" state. 194 ThreadBuffers = FunctionCallTrie::Allocators::Buffers{}; 195 } 196 197 } // namespace 198 199 const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT { 200 #ifdef XRAY_PROFILER_DEFAULT_OPTIONS 201 return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS); 202 #else 203 return ""; 204 #endif 205 } 206 207 XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT { 208 if (atomic_load(&ProfilerLogStatus, memory_order_acquire) != 209 XRayLogInitStatus::XRAY_LOG_FINALIZED) { 210 if (Verbosity()) 211 Report("Not flushing profiles, profiling not been finalized.\n"); 212 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 213 } 214 215 RecursionGuard SignalGuard(ReentranceGuard); 216 if (!SignalGuard) { 217 if (Verbosity()) 218 Report("Cannot finalize properly inside a signal handler!\n"); 219 atomic_store(&ProfilerLogFlushStatus, 220 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, 221 memory_order_release); 222 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 223 } 224 225 s32 Previous = atomic_exchange(&ProfilerLogFlushStatus, 226 XRayLogFlushStatus::XRAY_LOG_FLUSHING, 227 memory_order_acq_rel); 228 if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) { 229 if (Verbosity()) 230 Report("Not flushing profiles, implementation still flushing.\n"); 231 return XRayLogFlushStatus::XRAY_LOG_FLUSHING; 232 } 233 234 // At this point, we'll create the file that will contain the profile, but 235 // only if the options say so. 236 if (!profilingFlags()->no_flush) { 237 // First check whether we have data in the profile collector service 238 // before we try and write anything down. 239 XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0}); 240 if (B.Data == nullptr) { 241 if (Verbosity()) 242 Report("profiling: No data to flush.\n"); 243 } else { 244 LogWriter *LW = LogWriter::Open(); 245 if (LW == nullptr) { 246 if (Verbosity()) 247 Report("profiling: Failed to flush to file, dropping data.\n"); 248 } else { 249 // Now for each of the buffers, write out the profile data as we would 250 // see it in memory, verbatim. 251 while (B.Data != nullptr && B.Size != 0) { 252 LW->WriteAll(reinterpret_cast<const char *>(B.Data), 253 reinterpret_cast<const char *>(B.Data) + B.Size); 254 B = profileCollectorService::nextBuffer(B); 255 } 256 } 257 LogWriter::Close(LW); 258 } 259 } 260 261 profileCollectorService::reset(); 262 263 atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, 264 memory_order_release); 265 atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 266 memory_order_release); 267 268 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 269 } 270 271 void profilingHandleArg0(int32_t FuncId, 272 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { 273 unsigned char CPU; 274 auto TSC = readTSC(CPU); 275 RecursionGuard G(ReentranceGuard); 276 if (!G) 277 return; 278 279 auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire); 280 if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED || 281 Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING)) 282 return; 283 284 if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED || 285 Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) { 286 postCurrentThreadFCT(TLD); 287 return; 288 } 289 290 auto T = getThreadLocalData(); 291 if (T == nullptr) 292 return; 293 294 auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT)); 295 switch (Entry) { 296 case XRayEntryType::ENTRY: 297 case XRayEntryType::LOG_ARGS_ENTRY: 298 FCT->enterFunction(FuncId, TSC, CPU); 299 break; 300 case XRayEntryType::EXIT: 301 case XRayEntryType::TAIL: 302 FCT->exitFunction(FuncId, TSC, CPU); 303 break; 304 default: 305 // FIXME: Handle bugs. 306 break; 307 } 308 } 309 310 void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry, 311 uint64_t) XRAY_NEVER_INSTRUMENT { 312 return profilingHandleArg0(FuncId, Entry); 313 } 314 315 XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT { 316 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; 317 if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, 318 XRayLogInitStatus::XRAY_LOG_FINALIZING, 319 memory_order_release)) { 320 if (Verbosity()) 321 Report("Cannot finalize profile, the profiling is not initialized.\n"); 322 return static_cast<XRayLogInitStatus>(CurrentStatus); 323 } 324 325 // Mark then finalize the current generation of buffers. This allows us to let 326 // the threads currently holding onto new buffers still use them, but let the 327 // last reference do the memory cleanup. 328 DCHECK_NE(BQ, nullptr); 329 BQ->finalize(); 330 331 // Wait a grace period to allow threads to see that we're finalizing. 332 SleepForMillis(profilingFlags()->grace_period_ms); 333 334 // If we for some reason are entering this function from an instrumented 335 // handler, we bail out. 336 RecursionGuard G(ReentranceGuard); 337 if (!G) 338 return static_cast<XRayLogInitStatus>(CurrentStatus); 339 340 // Post the current thread's data if we have any. 341 postCurrentThreadFCT(TLD); 342 343 // Then we force serialize the log data. 344 profileCollectorService::serialize(); 345 346 atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, 347 memory_order_release); 348 return XRayLogInitStatus::XRAY_LOG_FINALIZED; 349 } 350 351 XRayLogInitStatus 352 profilingLoggingInit(size_t, size_t, void *Options, 353 size_t OptionsSize) XRAY_NEVER_INSTRUMENT { 354 RecursionGuard G(ReentranceGuard); 355 if (!G) 356 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 357 358 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 359 if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus, 360 XRayLogInitStatus::XRAY_LOG_INITIALIZING, 361 memory_order_acq_rel)) { 362 if (Verbosity()) 363 Report("Cannot initialize already initialised profiling " 364 "implementation.\n"); 365 return static_cast<XRayLogInitStatus>(CurrentStatus); 366 } 367 368 { 369 SpinMutexLock Lock(&ProfilerOptionsMutex); 370 FlagParser ConfigParser; 371 ProfilerFlags Flags; 372 Flags.setDefaults(); 373 registerProfilerFlags(&ConfigParser, &Flags); 374 ConfigParser.ParseString(profilingCompilerDefinedFlags()); 375 const char *Env = GetEnv("XRAY_PROFILING_OPTIONS"); 376 if (Env == nullptr) 377 Env = ""; 378 ConfigParser.ParseString(Env); 379 380 // Then parse the configuration string provided. 381 ConfigParser.ParseString(static_cast<const char *>(Options)); 382 if (Verbosity()) 383 ReportUnrecognizedFlags(); 384 *profilingFlags() = Flags; 385 } 386 387 // We need to reset the profile data collection implementation now. 388 profileCollectorService::reset(); 389 390 // Then also reset the buffer queue implementation. 391 if (BQ == nullptr) { 392 bool Success = false; 393 new (&BufferQueueStorage) 394 BufferQueue(profilingFlags()->per_thread_allocator_max, 395 profilingFlags()->buffers_max, Success); 396 if (!Success) { 397 if (Verbosity()) 398 Report("Failed to initialize preallocated memory buffers!"); 399 atomic_store(&ProfilerLogStatus, 400 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 401 memory_order_release); 402 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 403 } 404 405 // If we've succeded, set the global pointer to the initialised storage. 406 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); 407 } else { 408 BQ->finalize(); 409 auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max, 410 profilingFlags()->buffers_max); 411 412 if (InitStatus != BufferQueue::ErrorCode::Ok) { 413 if (Verbosity()) 414 Report("Failed to initialize preallocated memory buffers; error: %s", 415 BufferQueue::getErrorString(InitStatus)); 416 atomic_store(&ProfilerLogStatus, 417 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 418 memory_order_release); 419 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 420 } 421 422 DCHECK(!BQ->finalizing()); 423 } 424 425 // We need to set up the exit handlers. 426 static pthread_once_t Once = PTHREAD_ONCE_INIT; 427 pthread_once( 428 &Once, +[] { 429 pthread_key_create( 430 &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT { 431 if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) 432 return; 433 434 if (P == nullptr) 435 return; 436 437 auto T = reinterpret_cast<ProfilingData *>(P); 438 if (atomic_load_relaxed(&T->Allocators) == 0) 439 return; 440 441 { 442 // If we're somehow executing this while inside a 443 // non-reentrant-friendly context, we skip attempting to post 444 // the current thread's data. 445 RecursionGuard G(ReentranceGuard); 446 if (!G) 447 return; 448 449 postCurrentThreadFCT(*T); 450 } 451 }); 452 453 // We also need to set up an exit handler, so that we can get the 454 // profile information at exit time. We use the C API to do this, to not 455 // rely on C++ ABI functions for registering exit handlers. 456 Atexit(+[]() XRAY_NEVER_INSTRUMENT { 457 if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel)) 458 return; 459 460 auto Cleanup = 461 at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); }); 462 463 // Finalize and flush. 464 if (profilingFinalize() != XRAY_LOG_FINALIZED || 465 profilingFlush() != XRAY_LOG_FLUSHED) 466 return; 467 468 if (Verbosity()) 469 Report("XRay Profile flushed at exit."); 470 }); 471 }); 472 473 __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer); 474 __xray_set_handler(profilingHandleArg0); 475 __xray_set_handler_arg1(profilingHandleArg1); 476 477 atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, 478 memory_order_release); 479 if (Verbosity()) 480 Report("XRay Profiling init successful.\n"); 481 482 return XRayLogInitStatus::XRAY_LOG_INITIALIZED; 483 } 484 485 bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT { 486 // Set up the flag defaults from the static defaults and the 487 // compiler-provided defaults. 488 { 489 SpinMutexLock Lock(&ProfilerOptionsMutex); 490 auto *F = profilingFlags(); 491 F->setDefaults(); 492 FlagParser ProfilingParser; 493 registerProfilerFlags(&ProfilingParser, F); 494 ProfilingParser.ParseString(profilingCompilerDefinedFlags()); 495 } 496 497 XRayLogImpl Impl{ 498 profilingLoggingInit, 499 profilingFinalize, 500 profilingHandleArg0, 501 profilingFlush, 502 }; 503 auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl); 504 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { 505 if (Verbosity()) 506 Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = " 507 "%d\n", 508 RegistrationResult); 509 return false; 510 } 511 512 if (!internal_strcmp(flags()->xray_mode, "xray-profiling")) 513 __xray_log_select_mode("xray_profiling"); 514 return true; 515 } 516 517 } // namespace __xray 518 519 static auto UNUSED Unused = __xray::profilingDynamicInitializer(); 520