1 //===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of XRay, a dynamic runtime instrumentation system. 10 // 11 // Here we implement the Flight Data Recorder mode for XRay, where we use 12 // compact structures to store records in memory as well as when writing out the 13 // data to files. 14 // 15 //===----------------------------------------------------------------------===// 16 #include "xray_fdr_logging.h" 17 #include <cassert> 18 #include <errno.h> 19 #include <limits> 20 #include <memory> 21 #include <pthread.h> 22 #include <sys/time.h> 23 #include <time.h> 24 #include <unistd.h> 25 26 #include "sanitizer_common/sanitizer_allocator_internal.h" 27 #include "sanitizer_common/sanitizer_atomic.h" 28 #include "sanitizer_common/sanitizer_common.h" 29 #include "xray/xray_interface.h" 30 #include "xray/xray_records.h" 31 #include "xray_allocator.h" 32 #include "xray_buffer_queue.h" 33 #include "xray_defs.h" 34 #include "xray_fdr_controller.h" 35 #include "xray_fdr_flags.h" 36 #include "xray_fdr_log_writer.h" 37 #include "xray_flags.h" 38 #include "xray_recursion_guard.h" 39 #include "xray_tsc.h" 40 #include "xray_utils.h" 41 42 namespace __xray { 43 44 static atomic_sint32_t LoggingStatus = { 45 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; 46 47 namespace { 48 49 // Group together thread-local-data in a struct, then hide it behind a function 50 // call so that it can be initialized on first use instead of as a global. We 51 // force the alignment to 64-bytes for x86 cache line alignment, as this 52 // structure is used in the hot path of implementation. 53 struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { 54 BufferQueue::Buffer Buffer{}; 55 BufferQueue *BQ = nullptr; 56 57 using LogWriterStorage = 58 typename std::aligned_storage<sizeof(FDRLogWriter), 59 alignof(FDRLogWriter)>::type; 60 61 LogWriterStorage LWStorage; 62 FDRLogWriter *Writer = nullptr; 63 64 using ControllerStorage = 65 typename std::aligned_storage<sizeof(FDRController<>), 66 alignof(FDRController<>)>::type; 67 ControllerStorage CStorage; 68 FDRController<> *Controller = nullptr; 69 }; 70 71 } // namespace 72 73 static_assert(std::is_trivially_destructible<ThreadLocalData>::value, 74 "ThreadLocalData must be trivially destructible"); 75 76 // Use a global pthread key to identify thread-local data for logging. 77 static pthread_key_t Key; 78 79 // Global BufferQueue. 80 static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage; 81 static BufferQueue *BQ = nullptr; 82 83 // Global thresholds for function durations. 84 static atomic_uint64_t ThresholdTicks{0}; 85 86 // Global for ticks per second. 87 static atomic_uint64_t TicksPerSec{0}; 88 89 static atomic_sint32_t LogFlushStatus = { 90 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; 91 92 // This function will initialize the thread-local data structure used by the FDR 93 // logging implementation and return a reference to it. The implementation 94 // details require a bit of care to maintain. 95 // 96 // First, some requirements on the implementation in general: 97 // 98 // - XRay handlers should not call any memory allocation routines that may 99 // delegate to an instrumented implementation. This means functions like 100 // malloc() and free() should not be called while instrumenting. 101 // 102 // - We would like to use some thread-local data initialized on first-use of 103 // the XRay instrumentation. These allow us to implement unsynchronized 104 // routines that access resources associated with the thread. 105 // 106 // The implementation here uses a few mechanisms that allow us to provide both 107 // the requirements listed above. We do this by: 108 // 109 // 1. Using a thread-local aligned storage buffer for representing the 110 // ThreadLocalData struct. This data will be uninitialized memory by 111 // design. 112 // 113 // 2. Not requiring a thread exit handler/implementation, keeping the 114 // thread-local as purely a collection of references/data that do not 115 // require cleanup. 116 // 117 // We're doing this to avoid using a `thread_local` object that has a 118 // non-trivial destructor, because the C++ runtime might call std::malloc(...) 119 // to register calls to destructors. Deadlocks may arise when, for example, an 120 // externally provided malloc implementation is XRay instrumented, and 121 // initializing the thread-locals involves calling into malloc. A malloc 122 // implementation that does global synchronization might be holding a lock for a 123 // critical section, calling a function that might be XRay instrumented (and 124 // thus in turn calling into malloc by virtue of registration of the 125 // thread_local's destructor). 126 #if XRAY_HAS_TLS_ALIGNAS 127 static_assert(alignof(ThreadLocalData) >= 64, 128 "ThreadLocalData must be cache line aligned."); 129 #endif 130 static ThreadLocalData &getThreadLocalData() { 131 thread_local typename std::aligned_storage< 132 sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{}; 133 134 if (pthread_getspecific(Key) == NULL) { 135 new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{}; 136 pthread_setspecific(Key, &TLDStorage); 137 } 138 139 return *reinterpret_cast<ThreadLocalData *>(&TLDStorage); 140 } 141 142 static XRayFileHeader &fdrCommonHeaderInfo() { 143 static std::aligned_storage<sizeof(XRayFileHeader)>::type HStorage; 144 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; 145 static bool TSCSupported = true; 146 static uint64_t CycleFrequency = NanosecondsPerSecond; 147 pthread_once( 148 &OnceInit, +[] { 149 XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage); 150 // Version 2 of the log writes the extents of the buffer, instead of 151 // relying on an end-of-buffer record. 152 // Version 3 includes PID metadata record. 153 // Version 4 includes CPU data in the custom event records. 154 // Version 5 uses relative deltas for custom and typed event records, 155 // and removes the CPU data in custom event records (similar to how 156 // function records use deltas instead of full TSCs and rely on other 157 // metadata records for TSC wraparound and CPU migration). 158 H.Version = 5; 159 H.Type = FileTypes::FDR_LOG; 160 161 // Test for required CPU features and cache the cycle frequency 162 TSCSupported = probeRequiredCPUFeatures(); 163 if (TSCSupported) 164 CycleFrequency = getTSCFrequency(); 165 H.CycleFrequency = CycleFrequency; 166 167 // FIXME: Actually check whether we have 'constant_tsc' and 168 // 'nonstop_tsc' before setting the values in the header. 169 H.ConstantTSC = 1; 170 H.NonstopTSC = 1; 171 }); 172 return reinterpret_cast<XRayFileHeader &>(HStorage); 173 } 174 175 // This is the iterator implementation, which knows how to handle FDR-mode 176 // specific buffers. This is used as an implementation of the iterator function 177 // needed by __xray_set_buffer_iterator(...). It maintains a global state of the 178 // buffer iteration for the currently installed FDR mode buffers. In particular: 179 // 180 // - If the argument represents the initial state of XRayBuffer ({nullptr, 0}) 181 // then the iterator returns the header information. 182 // - If the argument represents the header information ({address of header 183 // info, size of the header info}) then it returns the first FDR buffer's 184 // address and extents. 185 // - It will keep returning the next buffer and extents as there are more 186 // buffers to process. When the input represents the last buffer, it will 187 // return the initial state to signal completion ({nullptr, 0}). 188 // 189 // See xray/xray_log_interface.h for more details on the requirements for the 190 // implementations of __xray_set_buffer_iterator(...) and 191 // __xray_log_process_buffers(...). 192 XRayBuffer fdrIterator(const XRayBuffer B) { 193 DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0); 194 DCHECK(BQ->finalizing()); 195 196 if (BQ == nullptr || !BQ->finalizing()) { 197 if (Verbosity()) 198 Report( 199 "XRay FDR: Failed global buffer queue is null or not finalizing!\n"); 200 return {nullptr, 0}; 201 } 202 203 // We use a global scratch-pad for the header information, which only gets 204 // initialized the first time this function is called. We'll update one part 205 // of this information with some relevant data (in particular the number of 206 // buffers to expect). 207 static std::aligned_storage<sizeof(XRayFileHeader)>::type HeaderStorage; 208 static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT; 209 pthread_once( 210 &HeaderOnce, +[] { 211 reinterpret_cast<XRayFileHeader &>(HeaderStorage) = 212 fdrCommonHeaderInfo(); 213 }); 214 215 // We use a convenience alias for code referring to Header from here on out. 216 auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage); 217 if (B.Data == nullptr && B.Size == 0) { 218 Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; 219 return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)}; 220 } 221 222 static BufferQueue::const_iterator It{}; 223 static BufferQueue::const_iterator End{}; 224 static uint8_t *CurrentBuffer{nullptr}; 225 static size_t SerializedBufferSize = 0; 226 if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) { 227 // From this point on, we provide raw access to the raw buffer we're getting 228 // from the BufferQueue. We're relying on the iterators from the current 229 // Buffer queue. 230 It = BQ->cbegin(); 231 End = BQ->cend(); 232 } 233 234 if (CurrentBuffer != nullptr) { 235 deallocateBuffer(CurrentBuffer, SerializedBufferSize); 236 CurrentBuffer = nullptr; 237 } 238 239 if (It == End) 240 return {nullptr, 0}; 241 242 // Set up the current buffer to contain the extents like we would when writing 243 // out to disk. The difference here would be that we still write "empty" 244 // buffers, or at least go through the iterators faithfully to let the 245 // handlers see the empty buffers in the queue. 246 // 247 // We need this atomic fence here to ensure that writes happening to the 248 // buffer have been committed before we load the extents atomically. Because 249 // the buffer is not explicitly synchronised across threads, we rely on the 250 // fence ordering to ensure that writes we expect to have been completed 251 // before the fence are fully committed before we read the extents. 252 atomic_thread_fence(memory_order_acquire); 253 auto BufferSize = atomic_load(It->Extents, memory_order_acquire); 254 SerializedBufferSize = BufferSize + sizeof(MetadataRecord); 255 CurrentBuffer = allocateBuffer(SerializedBufferSize); 256 if (CurrentBuffer == nullptr) 257 return {nullptr, 0}; 258 259 // Write out the extents as a Metadata Record into the CurrentBuffer. 260 MetadataRecord ExtentsRecord; 261 ExtentsRecord.Type = uint8_t(RecordType::Metadata); 262 ExtentsRecord.RecordKind = 263 uint8_t(MetadataRecord::RecordKinds::BufferExtents); 264 internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize)); 265 auto AfterExtents = 266 static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord, 267 sizeof(MetadataRecord))) + 268 sizeof(MetadataRecord); 269 internal_memcpy(AfterExtents, It->Data, BufferSize); 270 271 XRayBuffer Result; 272 Result.Data = CurrentBuffer; 273 Result.Size = SerializedBufferSize; 274 ++It; 275 return Result; 276 } 277 278 // Must finalize before flushing. 279 XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { 280 if (atomic_load(&LoggingStatus, memory_order_acquire) != 281 XRayLogInitStatus::XRAY_LOG_FINALIZED) { 282 if (Verbosity()) 283 Report("Not flushing log, implementation is not finalized.\n"); 284 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 285 } 286 287 s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 288 if (!atomic_compare_exchange_strong(&LogFlushStatus, &Result, 289 XRayLogFlushStatus::XRAY_LOG_FLUSHING, 290 memory_order_release)) { 291 if (Verbosity()) 292 Report("Not flushing log, implementation is still finalizing.\n"); 293 return static_cast<XRayLogFlushStatus>(Result); 294 } 295 296 if (BQ == nullptr) { 297 if (Verbosity()) 298 Report("Cannot flush when global buffer queue is null.\n"); 299 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 300 } 301 302 // We wait a number of milliseconds to allow threads to see that we've 303 // finalised before attempting to flush the log. 304 SleepForMillis(fdrFlags()->grace_period_ms); 305 306 // At this point, we're going to uninstall the iterator implementation, before 307 // we decide to do anything further with the global buffer queue. 308 __xray_log_remove_buffer_iterator(); 309 310 // Once flushed, we should set the global status of the logging implementation 311 // to "uninitialized" to allow for FDR-logging multiple runs. 312 auto ResetToUnitialized = at_scope_exit([] { 313 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 314 memory_order_release); 315 }); 316 317 auto CleanupBuffers = at_scope_exit([] { 318 auto &TLD = getThreadLocalData(); 319 if (TLD.Controller != nullptr) 320 TLD.Controller->flush(); 321 }); 322 323 if (fdrFlags()->no_file_flush) { 324 if (Verbosity()) 325 Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n"); 326 327 atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, 328 memory_order_release); 329 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 330 } 331 332 // We write out the file in the following format: 333 // 334 // 1) We write down the XRay file header with version 1, type FDR_LOG. 335 // 2) Then we use the 'apply' member of the BufferQueue that's live, to 336 // ensure that at this point in time we write down the buffers that have 337 // been released (and marked "used") -- we dump the full buffer for now 338 // (fixed-sized) and let the tools reading the buffers deal with the data 339 // afterwards. 340 // 341 LogWriter *LW = LogWriter::Open(); 342 if (LW == nullptr) { 343 auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 344 atomic_store(&LogFlushStatus, Result, memory_order_release); 345 return Result; 346 } 347 348 XRayFileHeader Header = fdrCommonHeaderInfo(); 349 Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; 350 LW->WriteAll(reinterpret_cast<char *>(&Header), 351 reinterpret_cast<char *>(&Header) + sizeof(Header)); 352 353 // Release the current thread's buffer before we attempt to write out all the 354 // buffers. This ensures that in case we had only a single thread going, that 355 // we are able to capture the data nonetheless. 356 auto &TLD = getThreadLocalData(); 357 if (TLD.Controller != nullptr) 358 TLD.Controller->flush(); 359 360 BQ->apply([&](const BufferQueue::Buffer &B) { 361 // Starting at version 2 of the FDR logging implementation, we only write 362 // the records identified by the extents of the buffer. We use the Extents 363 // from the Buffer and write that out as the first record in the buffer. We 364 // still use a Metadata record, but fill in the extents instead for the 365 // data. 366 MetadataRecord ExtentsRecord; 367 auto BufferExtents = atomic_load(B.Extents, memory_order_acquire); 368 DCHECK(BufferExtents <= B.Size); 369 ExtentsRecord.Type = uint8_t(RecordType::Metadata); 370 ExtentsRecord.RecordKind = 371 uint8_t(MetadataRecord::RecordKinds::BufferExtents); 372 internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents)); 373 if (BufferExtents > 0) { 374 LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord), 375 reinterpret_cast<char *>(&ExtentsRecord) + 376 sizeof(MetadataRecord)); 377 LW->WriteAll(reinterpret_cast<char *>(B.Data), 378 reinterpret_cast<char *>(B.Data) + BufferExtents); 379 } 380 }); 381 382 atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, 383 memory_order_release); 384 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 385 } 386 387 XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT { 388 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; 389 if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, 390 XRayLogInitStatus::XRAY_LOG_FINALIZING, 391 memory_order_release)) { 392 if (Verbosity()) 393 Report("Cannot finalize log, implementation not initialized.\n"); 394 return static_cast<XRayLogInitStatus>(CurrentStatus); 395 } 396 397 // Do special things to make the log finalize itself, and not allow any more 398 // operations to be performed until re-initialized. 399 if (BQ == nullptr) { 400 if (Verbosity()) 401 Report("Attempting to finalize an uninitialized global buffer!\n"); 402 } else { 403 BQ->finalize(); 404 } 405 406 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, 407 memory_order_release); 408 return XRayLogInitStatus::XRAY_LOG_FINALIZED; 409 } 410 411 struct TSCAndCPU { 412 uint64_t TSC = 0; 413 unsigned char CPU = 0; 414 }; 415 416 static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT { 417 // We want to get the TSC as early as possible, so that we can check whether 418 // we've seen this CPU before. We also do it before we load anything else, 419 // to allow for forward progress with the scheduling. 420 TSCAndCPU Result; 421 422 // Test once for required CPU features 423 static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT; 424 static bool TSCSupported = true; 425 pthread_once( 426 &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); }); 427 428 if (TSCSupported) { 429 Result.TSC = __xray::readTSC(Result.CPU); 430 } else { 431 // FIXME: This code needs refactoring as it appears in multiple locations 432 timespec TS; 433 int result = clock_gettime(CLOCK_REALTIME, &TS); 434 if (result != 0) { 435 Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); 436 TS = {0, 0}; 437 } 438 Result.CPU = 0; 439 Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; 440 } 441 return Result; 442 } 443 444 thread_local atomic_uint8_t Running{0}; 445 446 static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT { 447 // Check if we're finalizing, before proceeding. 448 { 449 auto Status = atomic_load(&LoggingStatus, memory_order_acquire); 450 if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || 451 Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) { 452 if (TLD.Controller != nullptr) { 453 TLD.Controller->flush(); 454 TLD.Controller = nullptr; 455 } 456 return false; 457 } 458 } 459 460 if (UNLIKELY(TLD.Controller == nullptr)) { 461 // Set up the TLD buffer queue. 462 if (UNLIKELY(BQ == nullptr)) 463 return false; 464 TLD.BQ = BQ; 465 466 // Check that we have a valid buffer. 467 if (TLD.Buffer.Generation != BQ->generation() && 468 TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) 469 return false; 470 471 // Set up a buffer, before setting up the log writer. Bail out on failure. 472 if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) 473 return false; 474 475 // Set up the Log Writer for this thread. 476 if (UNLIKELY(TLD.Writer == nullptr)) { 477 auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage); 478 new (LWStorage) FDRLogWriter(TLD.Buffer); 479 TLD.Writer = LWStorage; 480 } else { 481 TLD.Writer->resetRecord(); 482 } 483 484 auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage); 485 new (CStorage) 486 FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime, 487 atomic_load_relaxed(&ThresholdTicks)); 488 TLD.Controller = CStorage; 489 } 490 491 DCHECK_NE(TLD.Controller, nullptr); 492 return true; 493 } 494 495 void fdrLoggingHandleArg0(int32_t FuncId, 496 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { 497 auto TC = getTimestamp(); 498 auto &TSC = TC.TSC; 499 auto &CPU = TC.CPU; 500 RecursionGuard Guard{Running}; 501 if (!Guard) 502 return; 503 504 auto &TLD = getThreadLocalData(); 505 if (!setupTLD(TLD)) 506 return; 507 508 switch (Entry) { 509 case XRayEntryType::ENTRY: 510 case XRayEntryType::LOG_ARGS_ENTRY: 511 TLD.Controller->functionEnter(FuncId, TSC, CPU); 512 return; 513 case XRayEntryType::EXIT: 514 TLD.Controller->functionExit(FuncId, TSC, CPU); 515 return; 516 case XRayEntryType::TAIL: 517 TLD.Controller->functionTailExit(FuncId, TSC, CPU); 518 return; 519 case XRayEntryType::CUSTOM_EVENT: 520 case XRayEntryType::TYPED_EVENT: 521 break; 522 } 523 } 524 525 void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, 526 uint64_t Arg) XRAY_NEVER_INSTRUMENT { 527 auto TC = getTimestamp(); 528 auto &TSC = TC.TSC; 529 auto &CPU = TC.CPU; 530 RecursionGuard Guard{Running}; 531 if (!Guard) 532 return; 533 534 auto &TLD = getThreadLocalData(); 535 if (!setupTLD(TLD)) 536 return; 537 538 switch (Entry) { 539 case XRayEntryType::ENTRY: 540 case XRayEntryType::LOG_ARGS_ENTRY: 541 TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg); 542 return; 543 case XRayEntryType::EXIT: 544 TLD.Controller->functionExit(FuncId, TSC, CPU); 545 return; 546 case XRayEntryType::TAIL: 547 TLD.Controller->functionTailExit(FuncId, TSC, CPU); 548 return; 549 case XRayEntryType::CUSTOM_EVENT: 550 case XRayEntryType::TYPED_EVENT: 551 break; 552 } 553 } 554 555 void fdrLoggingHandleCustomEvent(void *Event, 556 std::size_t EventSize) XRAY_NEVER_INSTRUMENT { 557 auto TC = getTimestamp(); 558 auto &TSC = TC.TSC; 559 auto &CPU = TC.CPU; 560 RecursionGuard Guard{Running}; 561 if (!Guard) 562 return; 563 564 // Complain when we ever get at least one custom event that's larger than what 565 // we can possibly support. 566 if (EventSize > 567 static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { 568 static pthread_once_t Once = PTHREAD_ONCE_INIT; 569 pthread_once( 570 &Once, +[] { 571 Report("Custom event size too large; truncating to %d.\n", 572 std::numeric_limits<int32_t>::max()); 573 }); 574 } 575 576 auto &TLD = getThreadLocalData(); 577 if (!setupTLD(TLD)) 578 return; 579 580 int32_t ReducedEventSize = static_cast<int32_t>(EventSize); 581 TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize); 582 } 583 584 void fdrLoggingHandleTypedEvent( 585 uint16_t EventType, const void *Event, 586 std::size_t EventSize) noexcept XRAY_NEVER_INSTRUMENT { 587 auto TC = getTimestamp(); 588 auto &TSC = TC.TSC; 589 auto &CPU = TC.CPU; 590 RecursionGuard Guard{Running}; 591 if (!Guard) 592 return; 593 594 // Complain when we ever get at least one typed event that's larger than what 595 // we can possibly support. 596 if (EventSize > 597 static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { 598 static pthread_once_t Once = PTHREAD_ONCE_INIT; 599 pthread_once( 600 &Once, +[] { 601 Report("Typed event size too large; truncating to %d.\n", 602 std::numeric_limits<int32_t>::max()); 603 }); 604 } 605 606 auto &TLD = getThreadLocalData(); 607 if (!setupTLD(TLD)) 608 return; 609 610 int32_t ReducedEventSize = static_cast<int32_t>(EventSize); 611 TLD.Controller->typedEvent(TSC, CPU, EventType, Event, ReducedEventSize); 612 } 613 614 XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options, 615 size_t OptionsSize) XRAY_NEVER_INSTRUMENT { 616 if (Options == nullptr) 617 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 618 619 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 620 if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, 621 XRayLogInitStatus::XRAY_LOG_INITIALIZING, 622 memory_order_release)) { 623 if (Verbosity()) 624 Report("Cannot initialize already initialized implementation.\n"); 625 return static_cast<XRayLogInitStatus>(CurrentStatus); 626 } 627 628 if (Verbosity()) 629 Report("Initializing FDR mode with options: %s\n", 630 static_cast<const char *>(Options)); 631 632 // TODO: Factor out the flags specific to the FDR mode implementation. For 633 // now, use the global/single definition of the flags, since the FDR mode 634 // flags are already defined there. 635 FlagParser FDRParser; 636 FDRFlags FDRFlags; 637 registerXRayFDRFlags(&FDRParser, &FDRFlags); 638 FDRFlags.setDefaults(); 639 640 // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided 641 // options until we migrate everyone to use the XRAY_FDR_OPTIONS 642 // compiler-provided options. 643 FDRParser.ParseString(useCompilerDefinedFlags()); 644 FDRParser.ParseString(useCompilerDefinedFDRFlags()); 645 auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS"); 646 if (EnvOpts == nullptr) 647 EnvOpts = ""; 648 FDRParser.ParseString(EnvOpts); 649 650 // FIXME: Remove this when we fully remove the deprecated flags. 651 if (internal_strlen(EnvOpts) == 0) { 652 FDRFlags.func_duration_threshold_us = 653 flags()->xray_fdr_log_func_duration_threshold_us; 654 FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; 655 } 656 657 // The provided options should always override the compiler-provided and 658 // environment-variable defined options. 659 FDRParser.ParseString(static_cast<const char *>(Options)); 660 *fdrFlags() = FDRFlags; 661 auto BufferSize = FDRFlags.buffer_size; 662 auto BufferMax = FDRFlags.buffer_max; 663 664 if (BQ == nullptr) { 665 bool Success = false; 666 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); 667 new (BQ) BufferQueue(BufferSize, BufferMax, Success); 668 if (!Success) { 669 Report("BufferQueue init failed.\n"); 670 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 671 } 672 } else { 673 if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) { 674 if (Verbosity()) 675 Report("Failed to re-initialize global buffer queue. Init failed.\n"); 676 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 677 } 678 } 679 680 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; 681 pthread_once( 682 &OnceInit, +[] { 683 atomic_store(&TicksPerSec, 684 probeRequiredCPUFeatures() ? getTSCFrequency() 685 : __xray::NanosecondsPerSecond, 686 memory_order_release); 687 pthread_key_create( 688 &Key, +[](void *TLDPtr) { 689 if (TLDPtr == nullptr) 690 return; 691 auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr); 692 if (TLD.BQ == nullptr) 693 return; 694 if (TLD.Buffer.Data == nullptr) 695 return; 696 auto EC = TLD.BQ->releaseBuffer(TLD.Buffer); 697 if (EC != BufferQueue::ErrorCode::Ok) 698 Report("At thread exit, failed to release buffer at %p; " 699 "error=%s\n", 700 TLD.Buffer.Data, BufferQueue::getErrorString(EC)); 701 }); 702 }); 703 704 atomic_store(&ThresholdTicks, 705 atomic_load_relaxed(&TicksPerSec) * 706 fdrFlags()->func_duration_threshold_us / 1000000, 707 memory_order_release); 708 // Arg1 handler should go in first to avoid concurrent code accidentally 709 // falling back to arg0 when it should have ran arg1. 710 __xray_set_handler_arg1(fdrLoggingHandleArg1); 711 // Install the actual handleArg0 handler after initialising the buffers. 712 __xray_set_handler(fdrLoggingHandleArg0); 713 __xray_set_customevent_handler(fdrLoggingHandleCustomEvent); 714 __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent); 715 716 // Install the buffer iterator implementation. 717 __xray_log_set_buffer_iterator(fdrIterator); 718 719 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, 720 memory_order_release); 721 722 if (Verbosity()) 723 Report("XRay FDR init successful.\n"); 724 return XRayLogInitStatus::XRAY_LOG_INITIALIZED; 725 } 726 727 bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { 728 XRayLogImpl Impl{ 729 fdrLoggingInit, 730 fdrLoggingFinalize, 731 fdrLoggingHandleArg0, 732 fdrLoggingFlush, 733 }; 734 auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl); 735 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && 736 Verbosity()) { 737 Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n", 738 RegistrationResult); 739 return false; 740 } 741 742 if (flags()->xray_fdr_log || 743 !internal_strcmp(flags()->xray_mode, "xray-fdr")) { 744 auto SelectResult = __xray_log_select_mode("xray-fdr"); 745 if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && 746 Verbosity()) { 747 Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n", 748 SelectResult); 749 return false; 750 } 751 } 752 return true; 753 } 754 755 } // namespace __xray 756 757 static auto UNUSED Unused = __xray::fdrLogDynamicInitializer(); 758