1 //===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of XRay, a dynamic runtime instrumentation system. 10 // 11 // Here we implement the Flight Data Recorder mode for XRay, where we use 12 // compact structures to store records in memory as well as when writing out the 13 // data to files. 14 // 15 //===----------------------------------------------------------------------===// 16 #include "xray_fdr_logging.h" 17 #include <cassert> 18 #include <errno.h> 19 #include <limits> 20 #include <memory> 21 #include <pthread.h> 22 #include <sys/time.h> 23 #include <time.h> 24 #include <unistd.h> 25 26 #include "sanitizer_common/sanitizer_allocator_internal.h" 27 #include "sanitizer_common/sanitizer_atomic.h" 28 #include "sanitizer_common/sanitizer_common.h" 29 #include "xray/xray_interface.h" 30 #include "xray/xray_records.h" 31 #include "xray_allocator.h" 32 #include "xray_buffer_queue.h" 33 #include "xray_defs.h" 34 #include "xray_fdr_controller.h" 35 #include "xray_fdr_flags.h" 36 #include "xray_fdr_log_writer.h" 37 #include "xray_flags.h" 38 #include "xray_recursion_guard.h" 39 #include "xray_tsc.h" 40 #include "xray_utils.h" 41 42 namespace __xray { 43 44 static atomic_sint32_t LoggingStatus = { 45 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; 46 47 namespace { 48 49 // Group together thread-local-data in a struct, then hide it behind a function 50 // call so that it can be initialized on first use instead of as a global. We 51 // force the alignment to 64-bytes for x86 cache line alignment, as this 52 // structure is used in the hot path of implementation. 53 struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { 54 BufferQueue::Buffer Buffer{}; 55 BufferQueue *BQ = nullptr; 56 57 using LogWriterStorage = 58 typename std::aligned_storage<sizeof(FDRLogWriter), 59 alignof(FDRLogWriter)>::type; 60 61 LogWriterStorage LWStorage; 62 FDRLogWriter *Writer = nullptr; 63 64 using ControllerStorage = 65 typename std::aligned_storage<sizeof(FDRController<>), 66 alignof(FDRController<>)>::type; 67 ControllerStorage CStorage; 68 FDRController<> *Controller = nullptr; 69 }; 70 71 } // namespace 72 73 static_assert(std::is_trivially_destructible<ThreadLocalData>::value, 74 "ThreadLocalData must be trivially destructible"); 75 76 // Use a global pthread key to identify thread-local data for logging. 77 static pthread_key_t Key; 78 79 // Global BufferQueue. 80 static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage; 81 static BufferQueue *BQ = nullptr; 82 83 // Global thresholds for function durations. 84 static atomic_uint64_t ThresholdTicks{0}; 85 86 // Global for ticks per second. 87 static atomic_uint64_t TicksPerSec{0}; 88 89 static atomic_sint32_t LogFlushStatus = { 90 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; 91 92 // This function will initialize the thread-local data structure used by the FDR 93 // logging implementation and return a reference to it. The implementation 94 // details require a bit of care to maintain. 95 // 96 // First, some requirements on the implementation in general: 97 // 98 // - XRay handlers should not call any memory allocation routines that may 99 // delegate to an instrumented implementation. This means functions like 100 // malloc() and free() should not be called while instrumenting. 101 // 102 // - We would like to use some thread-local data initialized on first-use of 103 // the XRay instrumentation. These allow us to implement unsynchronized 104 // routines that access resources associated with the thread. 105 // 106 // The implementation here uses a few mechanisms that allow us to provide both 107 // the requirements listed above. We do this by: 108 // 109 // 1. Using a thread-local aligned storage buffer for representing the 110 // ThreadLocalData struct. This data will be uninitialized memory by 111 // design. 112 // 113 // 2. Not requiring a thread exit handler/implementation, keeping the 114 // thread-local as purely a collection of references/data that do not 115 // require cleanup. 116 // 117 // We're doing this to avoid using a `thread_local` object that has a 118 // non-trivial destructor, because the C++ runtime might call std::malloc(...) 119 // to register calls to destructors. Deadlocks may arise when, for example, an 120 // externally provided malloc implementation is XRay instrumented, and 121 // initializing the thread-locals involves calling into malloc. A malloc 122 // implementation that does global synchronization might be holding a lock for a 123 // critical section, calling a function that might be XRay instrumented (and 124 // thus in turn calling into malloc by virtue of registration of the 125 // thread_local's destructor). 126 #if XRAY_HAS_TLS_ALIGNAS 127 static_assert(alignof(ThreadLocalData) >= 64, 128 "ThreadLocalData must be cache line aligned."); 129 #endif 130 static ThreadLocalData &getThreadLocalData() { 131 thread_local typename std::aligned_storage< 132 sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{}; 133 134 if (pthread_getspecific(Key) == NULL) { 135 new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{}; 136 pthread_setspecific(Key, &TLDStorage); 137 } 138 139 return *reinterpret_cast<ThreadLocalData *>(&TLDStorage); 140 } 141 142 static XRayFileHeader &fdrCommonHeaderInfo() { 143 static std::aligned_storage<sizeof(XRayFileHeader)>::type HStorage; 144 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; 145 static bool TSCSupported = true; 146 static uint64_t CycleFrequency = NanosecondsPerSecond; 147 pthread_once( 148 &OnceInit, +[] { 149 XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage); 150 // Version 2 of the log writes the extents of the buffer, instead of 151 // relying on an end-of-buffer record. 152 // Version 3 includes PID metadata record. 153 // Version 4 includes CPU data in the custom event records. 154 // Version 5 uses relative deltas for custom and typed event records, 155 // and removes the CPU data in custom event records (similar to how 156 // function records use deltas instead of full TSCs and rely on other 157 // metadata records for TSC wraparound and CPU migration). 158 H.Version = 5; 159 H.Type = FileTypes::FDR_LOG; 160 161 // Test for required CPU features and cache the cycle frequency 162 TSCSupported = probeRequiredCPUFeatures(); 163 if (TSCSupported) 164 CycleFrequency = getTSCFrequency(); 165 H.CycleFrequency = CycleFrequency; 166 167 // FIXME: Actually check whether we have 'constant_tsc' and 168 // 'nonstop_tsc' before setting the values in the header. 169 H.ConstantTSC = 1; 170 H.NonstopTSC = 1; 171 }); 172 return reinterpret_cast<XRayFileHeader &>(HStorage); 173 } 174 175 // This is the iterator implementation, which knows how to handle FDR-mode 176 // specific buffers. This is used as an implementation of the iterator function 177 // needed by __xray_set_buffer_iterator(...). It maintains a global state of the 178 // buffer iteration for the currently installed FDR mode buffers. In particular: 179 // 180 // - If the argument represents the initial state of XRayBuffer ({nullptr, 0}) 181 // then the iterator returns the header information. 182 // - If the argument represents the header information ({address of header 183 // info, size of the header info}) then it returns the first FDR buffer's 184 // address and extents. 185 // - It will keep returning the next buffer and extents as there are more 186 // buffers to process. When the input represents the last buffer, it will 187 // return the initial state to signal completion ({nullptr, 0}). 188 // 189 // See xray/xray_log_interface.h for more details on the requirements for the 190 // implementations of __xray_set_buffer_iterator(...) and 191 // __xray_log_process_buffers(...). 192 XRayBuffer fdrIterator(const XRayBuffer B) { 193 DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0); 194 DCHECK(BQ->finalizing()); 195 196 if (BQ == nullptr || !BQ->finalizing()) { 197 if (Verbosity()) 198 Report( 199 "XRay FDR: Failed global buffer queue is null or not finalizing!\n"); 200 return {nullptr, 0}; 201 } 202 203 // We use a global scratch-pad for the header information, which only gets 204 // initialized the first time this function is called. We'll update one part 205 // of this information with some relevant data (in particular the number of 206 // buffers to expect). 207 static std::aligned_storage<sizeof(XRayFileHeader)>::type HeaderStorage; 208 static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT; 209 pthread_once( 210 &HeaderOnce, +[] { 211 reinterpret_cast<XRayFileHeader &>(HeaderStorage) = 212 fdrCommonHeaderInfo(); 213 }); 214 215 // We use a convenience alias for code referring to Header from here on out. 216 auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage); 217 if (B.Data == nullptr && B.Size == 0) { 218 Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; 219 return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)}; 220 } 221 222 static BufferQueue::const_iterator It{}; 223 static BufferQueue::const_iterator End{}; 224 static uint8_t *CurrentBuffer{nullptr}; 225 static size_t SerializedBufferSize = 0; 226 if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) { 227 // From this point on, we provide raw access to the raw buffer we're getting 228 // from the BufferQueue. We're relying on the iterators from the current 229 // Buffer queue. 230 It = BQ->cbegin(); 231 End = BQ->cend(); 232 } 233 234 if (CurrentBuffer != nullptr) { 235 deallocateBuffer(CurrentBuffer, SerializedBufferSize); 236 CurrentBuffer = nullptr; 237 } 238 239 if (It == End) 240 return {nullptr, 0}; 241 242 // Set up the current buffer to contain the extents like we would when writing 243 // out to disk. The difference here would be that we still write "empty" 244 // buffers, or at least go through the iterators faithfully to let the 245 // handlers see the empty buffers in the queue. 246 // 247 // We need this atomic fence here to ensure that writes happening to the 248 // buffer have been committed before we load the extents atomically. Because 249 // the buffer is not explicitly synchronised across threads, we rely on the 250 // fence ordering to ensure that writes we expect to have been completed 251 // before the fence are fully committed before we read the extents. 252 atomic_thread_fence(memory_order_acquire); 253 auto BufferSize = atomic_load(It->Extents, memory_order_acquire); 254 SerializedBufferSize = BufferSize + sizeof(MetadataRecord); 255 CurrentBuffer = allocateBuffer(SerializedBufferSize); 256 if (CurrentBuffer == nullptr) 257 return {nullptr, 0}; 258 259 // Write out the extents as a Metadata Record into the CurrentBuffer. 260 MetadataRecord ExtentsRecord; 261 ExtentsRecord.Type = uint8_t(RecordType::Metadata); 262 ExtentsRecord.RecordKind = 263 uint8_t(MetadataRecord::RecordKinds::BufferExtents); 264 internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize)); 265 auto AfterExtents = 266 static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord, 267 sizeof(MetadataRecord))) + 268 sizeof(MetadataRecord); 269 internal_memcpy(AfterExtents, It->Data, BufferSize); 270 271 XRayBuffer Result; 272 Result.Data = CurrentBuffer; 273 Result.Size = SerializedBufferSize; 274 ++It; 275 return Result; 276 } 277 278 // Must finalize before flushing. 279 XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { 280 if (atomic_load(&LoggingStatus, memory_order_acquire) != 281 XRayLogInitStatus::XRAY_LOG_FINALIZED) { 282 if (Verbosity()) 283 Report("Not flushing log, implementation is not finalized.\n"); 284 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 285 } 286 287 if (atomic_exchange(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHING, 288 memory_order_release) == 289 XRayLogFlushStatus::XRAY_LOG_FLUSHING) { 290 if (Verbosity()) 291 Report("Not flushing log, implementation is still flushing.\n"); 292 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 293 } 294 295 if (BQ == nullptr) { 296 if (Verbosity()) 297 Report("Cannot flush when global buffer queue is null.\n"); 298 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 299 } 300 301 // We wait a number of milliseconds to allow threads to see that we've 302 // finalised before attempting to flush the log. 303 SleepForMillis(fdrFlags()->grace_period_ms); 304 305 // At this point, we're going to uninstall the iterator implementation, before 306 // we decide to do anything further with the global buffer queue. 307 __xray_log_remove_buffer_iterator(); 308 309 // Once flushed, we should set the global status of the logging implementation 310 // to "uninitialized" to allow for FDR-logging multiple runs. 311 auto ResetToUnitialized = at_scope_exit([] { 312 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 313 memory_order_release); 314 }); 315 316 auto CleanupBuffers = at_scope_exit([] { 317 auto &TLD = getThreadLocalData(); 318 if (TLD.Controller != nullptr) 319 TLD.Controller->flush(); 320 }); 321 322 if (fdrFlags()->no_file_flush) { 323 if (Verbosity()) 324 Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n"); 325 326 atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, 327 memory_order_release); 328 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 329 } 330 331 // We write out the file in the following format: 332 // 333 // 1) We write down the XRay file header with version 1, type FDR_LOG. 334 // 2) Then we use the 'apply' member of the BufferQueue that's live, to 335 // ensure that at this point in time we write down the buffers that have 336 // been released (and marked "used") -- we dump the full buffer for now 337 // (fixed-sized) and let the tools reading the buffers deal with the data 338 // afterwards. 339 // 340 LogWriter *LW = LogWriter::Open(); 341 if (LW == nullptr) { 342 auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 343 atomic_store(&LogFlushStatus, Result, memory_order_release); 344 return Result; 345 } 346 347 XRayFileHeader Header = fdrCommonHeaderInfo(); 348 Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; 349 LW->WriteAll(reinterpret_cast<char *>(&Header), 350 reinterpret_cast<char *>(&Header) + sizeof(Header)); 351 352 // Release the current thread's buffer before we attempt to write out all the 353 // buffers. This ensures that in case we had only a single thread going, that 354 // we are able to capture the data nonetheless. 355 auto &TLD = getThreadLocalData(); 356 if (TLD.Controller != nullptr) 357 TLD.Controller->flush(); 358 359 BQ->apply([&](const BufferQueue::Buffer &B) { 360 // Starting at version 2 of the FDR logging implementation, we only write 361 // the records identified by the extents of the buffer. We use the Extents 362 // from the Buffer and write that out as the first record in the buffer. We 363 // still use a Metadata record, but fill in the extents instead for the 364 // data. 365 MetadataRecord ExtentsRecord; 366 auto BufferExtents = atomic_load(B.Extents, memory_order_acquire); 367 DCHECK(BufferExtents <= B.Size); 368 ExtentsRecord.Type = uint8_t(RecordType::Metadata); 369 ExtentsRecord.RecordKind = 370 uint8_t(MetadataRecord::RecordKinds::BufferExtents); 371 internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents)); 372 if (BufferExtents > 0) { 373 LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord), 374 reinterpret_cast<char *>(&ExtentsRecord) + 375 sizeof(MetadataRecord)); 376 LW->WriteAll(reinterpret_cast<char *>(B.Data), 377 reinterpret_cast<char *>(B.Data) + BufferExtents); 378 } 379 }); 380 381 atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, 382 memory_order_release); 383 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 384 } 385 386 XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT { 387 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; 388 if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, 389 XRayLogInitStatus::XRAY_LOG_FINALIZING, 390 memory_order_release)) { 391 if (Verbosity()) 392 Report("Cannot finalize log, implementation not initialized.\n"); 393 return static_cast<XRayLogInitStatus>(CurrentStatus); 394 } 395 396 // Do special things to make the log finalize itself, and not allow any more 397 // operations to be performed until re-initialized. 398 if (BQ == nullptr) { 399 if (Verbosity()) 400 Report("Attempting to finalize an uninitialized global buffer!\n"); 401 } else { 402 BQ->finalize(); 403 } 404 405 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, 406 memory_order_release); 407 return XRayLogInitStatus::XRAY_LOG_FINALIZED; 408 } 409 410 struct TSCAndCPU { 411 uint64_t TSC = 0; 412 unsigned char CPU = 0; 413 }; 414 415 static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT { 416 // We want to get the TSC as early as possible, so that we can check whether 417 // we've seen this CPU before. We also do it before we load anything else, 418 // to allow for forward progress with the scheduling. 419 TSCAndCPU Result; 420 421 // Test once for required CPU features 422 static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT; 423 static bool TSCSupported = true; 424 pthread_once( 425 &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); }); 426 427 if (TSCSupported) { 428 Result.TSC = __xray::readTSC(Result.CPU); 429 } else { 430 // FIXME: This code needs refactoring as it appears in multiple locations 431 timespec TS; 432 int result = clock_gettime(CLOCK_REALTIME, &TS); 433 if (result != 0) { 434 Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); 435 TS = {0, 0}; 436 } 437 Result.CPU = 0; 438 Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; 439 } 440 return Result; 441 } 442 443 thread_local atomic_uint8_t Running{0}; 444 445 static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT { 446 // Check if we're finalizing, before proceeding. 447 { 448 auto Status = atomic_load(&LoggingStatus, memory_order_acquire); 449 if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || 450 Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) { 451 if (TLD.Controller != nullptr) { 452 TLD.Controller->flush(); 453 TLD.Controller = nullptr; 454 } 455 return false; 456 } 457 } 458 459 if (UNLIKELY(TLD.Controller == nullptr)) { 460 // Set up the TLD buffer queue. 461 if (UNLIKELY(BQ == nullptr)) 462 return false; 463 TLD.BQ = BQ; 464 465 // Check that we have a valid buffer. 466 if (TLD.Buffer.Generation != BQ->generation() && 467 TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) 468 return false; 469 470 // Set up a buffer, before setting up the log writer. Bail out on failure. 471 if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) 472 return false; 473 474 // Set up the Log Writer for this thread. 475 if (UNLIKELY(TLD.Writer == nullptr)) { 476 auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage); 477 new (LWStorage) FDRLogWriter(TLD.Buffer); 478 TLD.Writer = LWStorage; 479 } else { 480 TLD.Writer->resetRecord(); 481 } 482 483 auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage); 484 new (CStorage) 485 FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime, 486 atomic_load_relaxed(&ThresholdTicks)); 487 TLD.Controller = CStorage; 488 } 489 490 DCHECK_NE(TLD.Controller, nullptr); 491 return true; 492 } 493 494 void fdrLoggingHandleArg0(int32_t FuncId, 495 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { 496 auto TC = getTimestamp(); 497 auto &TSC = TC.TSC; 498 auto &CPU = TC.CPU; 499 RecursionGuard Guard{Running}; 500 if (!Guard) 501 return; 502 503 auto &TLD = getThreadLocalData(); 504 if (!setupTLD(TLD)) 505 return; 506 507 switch (Entry) { 508 case XRayEntryType::ENTRY: 509 case XRayEntryType::LOG_ARGS_ENTRY: 510 TLD.Controller->functionEnter(FuncId, TSC, CPU); 511 return; 512 case XRayEntryType::EXIT: 513 TLD.Controller->functionExit(FuncId, TSC, CPU); 514 return; 515 case XRayEntryType::TAIL: 516 TLD.Controller->functionTailExit(FuncId, TSC, CPU); 517 return; 518 case XRayEntryType::CUSTOM_EVENT: 519 case XRayEntryType::TYPED_EVENT: 520 break; 521 } 522 } 523 524 void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, 525 uint64_t Arg) XRAY_NEVER_INSTRUMENT { 526 auto TC = getTimestamp(); 527 auto &TSC = TC.TSC; 528 auto &CPU = TC.CPU; 529 RecursionGuard Guard{Running}; 530 if (!Guard) 531 return; 532 533 auto &TLD = getThreadLocalData(); 534 if (!setupTLD(TLD)) 535 return; 536 537 switch (Entry) { 538 case XRayEntryType::ENTRY: 539 case XRayEntryType::LOG_ARGS_ENTRY: 540 TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg); 541 return; 542 case XRayEntryType::EXIT: 543 TLD.Controller->functionExit(FuncId, TSC, CPU); 544 return; 545 case XRayEntryType::TAIL: 546 TLD.Controller->functionTailExit(FuncId, TSC, CPU); 547 return; 548 case XRayEntryType::CUSTOM_EVENT: 549 case XRayEntryType::TYPED_EVENT: 550 break; 551 } 552 } 553 554 void fdrLoggingHandleCustomEvent(void *Event, 555 std::size_t EventSize) XRAY_NEVER_INSTRUMENT { 556 auto TC = getTimestamp(); 557 auto &TSC = TC.TSC; 558 auto &CPU = TC.CPU; 559 RecursionGuard Guard{Running}; 560 if (!Guard) 561 return; 562 563 // Complain when we ever get at least one custom event that's larger than what 564 // we can possibly support. 565 if (EventSize > 566 static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { 567 static pthread_once_t Once = PTHREAD_ONCE_INIT; 568 pthread_once( 569 &Once, +[] { 570 Report("Custom event size too large; truncating to %d.\n", 571 std::numeric_limits<int32_t>::max()); 572 }); 573 } 574 575 auto &TLD = getThreadLocalData(); 576 if (!setupTLD(TLD)) 577 return; 578 579 int32_t ReducedEventSize = static_cast<int32_t>(EventSize); 580 TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize); 581 } 582 583 void fdrLoggingHandleTypedEvent( 584 uint16_t EventType, const void *Event, 585 std::size_t EventSize) noexcept XRAY_NEVER_INSTRUMENT { 586 auto TC = getTimestamp(); 587 auto &TSC = TC.TSC; 588 auto &CPU = TC.CPU; 589 RecursionGuard Guard{Running}; 590 if (!Guard) 591 return; 592 593 // Complain when we ever get at least one typed event that's larger than what 594 // we can possibly support. 595 if (EventSize > 596 static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { 597 static pthread_once_t Once = PTHREAD_ONCE_INIT; 598 pthread_once( 599 &Once, +[] { 600 Report("Typed event size too large; truncating to %d.\n", 601 std::numeric_limits<int32_t>::max()); 602 }); 603 } 604 605 auto &TLD = getThreadLocalData(); 606 if (!setupTLD(TLD)) 607 return; 608 609 int32_t ReducedEventSize = static_cast<int32_t>(EventSize); 610 TLD.Controller->typedEvent(TSC, CPU, EventType, Event, ReducedEventSize); 611 } 612 613 XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options, 614 size_t OptionsSize) XRAY_NEVER_INSTRUMENT { 615 if (Options == nullptr) 616 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 617 618 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 619 if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, 620 XRayLogInitStatus::XRAY_LOG_INITIALIZING, 621 memory_order_release)) { 622 if (Verbosity()) 623 Report("Cannot initialize already initialized implementation.\n"); 624 return static_cast<XRayLogInitStatus>(CurrentStatus); 625 } 626 627 if (Verbosity()) 628 Report("Initializing FDR mode with options: %s\n", 629 static_cast<const char *>(Options)); 630 631 // TODO: Factor out the flags specific to the FDR mode implementation. For 632 // now, use the global/single definition of the flags, since the FDR mode 633 // flags are already defined there. 634 FlagParser FDRParser; 635 FDRFlags FDRFlags; 636 registerXRayFDRFlags(&FDRParser, &FDRFlags); 637 FDRFlags.setDefaults(); 638 639 // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided 640 // options until we migrate everyone to use the XRAY_FDR_OPTIONS 641 // compiler-provided options. 642 FDRParser.ParseString(useCompilerDefinedFlags()); 643 FDRParser.ParseString(useCompilerDefinedFDRFlags()); 644 auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS"); 645 if (EnvOpts == nullptr) 646 EnvOpts = ""; 647 FDRParser.ParseString(EnvOpts); 648 649 // FIXME: Remove this when we fully remove the deprecated flags. 650 if (internal_strlen(EnvOpts) == 0) { 651 FDRFlags.func_duration_threshold_us = 652 flags()->xray_fdr_log_func_duration_threshold_us; 653 FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; 654 } 655 656 // The provided options should always override the compiler-provided and 657 // environment-variable defined options. 658 FDRParser.ParseString(static_cast<const char *>(Options)); 659 *fdrFlags() = FDRFlags; 660 auto BufferSize = FDRFlags.buffer_size; 661 auto BufferMax = FDRFlags.buffer_max; 662 663 if (BQ == nullptr) { 664 bool Success = false; 665 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); 666 new (BQ) BufferQueue(BufferSize, BufferMax, Success); 667 if (!Success) { 668 Report("BufferQueue init failed.\n"); 669 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 670 } 671 } else { 672 if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) { 673 if (Verbosity()) 674 Report("Failed to re-initialize global buffer queue. Init failed.\n"); 675 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 676 } 677 } 678 679 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; 680 pthread_once( 681 &OnceInit, +[] { 682 atomic_store(&TicksPerSec, 683 probeRequiredCPUFeatures() ? getTSCFrequency() 684 : __xray::NanosecondsPerSecond, 685 memory_order_release); 686 pthread_key_create( 687 &Key, +[](void *TLDPtr) { 688 if (TLDPtr == nullptr) 689 return; 690 auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr); 691 if (TLD.BQ == nullptr) 692 return; 693 if (TLD.Buffer.Data == nullptr) 694 return; 695 auto EC = TLD.BQ->releaseBuffer(TLD.Buffer); 696 if (EC != BufferQueue::ErrorCode::Ok) 697 Report("At thread exit, failed to release buffer at %p; " 698 "error=%s\n", 699 TLD.Buffer.Data, BufferQueue::getErrorString(EC)); 700 }); 701 }); 702 703 atomic_store(&ThresholdTicks, 704 atomic_load_relaxed(&TicksPerSec) * 705 fdrFlags()->func_duration_threshold_us / 1000000, 706 memory_order_release); 707 // Arg1 handler should go in first to avoid concurrent code accidentally 708 // falling back to arg0 when it should have ran arg1. 709 __xray_set_handler_arg1(fdrLoggingHandleArg1); 710 // Install the actual handleArg0 handler after initialising the buffers. 711 __xray_set_handler(fdrLoggingHandleArg0); 712 __xray_set_customevent_handler(fdrLoggingHandleCustomEvent); 713 __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent); 714 715 // Install the buffer iterator implementation. 716 __xray_log_set_buffer_iterator(fdrIterator); 717 718 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, 719 memory_order_release); 720 721 if (Verbosity()) 722 Report("XRay FDR init successful.\n"); 723 return XRayLogInitStatus::XRAY_LOG_INITIALIZED; 724 } 725 726 bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { 727 XRayLogImpl Impl{ 728 fdrLoggingInit, 729 fdrLoggingFinalize, 730 fdrLoggingHandleArg0, 731 fdrLoggingFlush, 732 }; 733 auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl); 734 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && 735 Verbosity()) { 736 Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n", 737 RegistrationResult); 738 return false; 739 } 740 741 if (flags()->xray_fdr_log || 742 !internal_strcmp(flags()->xray_mode, "xray-fdr")) { 743 auto SelectResult = __xray_log_select_mode("xray-fdr"); 744 if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && 745 Verbosity()) { 746 Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n", 747 SelectResult); 748 return false; 749 } 750 } 751 return true; 752 } 753 754 } // namespace __xray 755 756 static auto UNUSED Unused = __xray::fdrLogDynamicInitializer(); 757