1 //===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of XRay, a dynamic runtime instrumentation system. 10 // 11 // Here we implement the Flight Data Recorder mode for XRay, where we use 12 // compact structures to store records in memory as well as when writing out the 13 // data to files. 14 // 15 //===----------------------------------------------------------------------===// 16 #include "xray_fdr_logging.h" 17 #include <cassert> 18 #include <cstddef> 19 #include <errno.h> 20 #include <limits> 21 #include <memory> 22 #include <pthread.h> 23 #include <sys/time.h> 24 #include <time.h> 25 #include <unistd.h> 26 27 #include "sanitizer_common/sanitizer_allocator_internal.h" 28 #include "sanitizer_common/sanitizer_atomic.h" 29 #include "sanitizer_common/sanitizer_common.h" 30 #include "xray/xray_interface.h" 31 #include "xray/xray_records.h" 32 #include "xray_allocator.h" 33 #include "xray_buffer_queue.h" 34 #include "xray_defs.h" 35 #include "xray_fdr_controller.h" 36 #include "xray_fdr_flags.h" 37 #include "xray_fdr_log_writer.h" 38 #include "xray_flags.h" 39 #include "xray_recursion_guard.h" 40 #include "xray_tsc.h" 41 #include "xray_utils.h" 42 43 namespace __xray { 44 45 static atomic_sint32_t LoggingStatus = { 46 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; 47 48 namespace { 49 50 // Group together thread-local-data in a struct, then hide it behind a function 51 // call so that it can be initialized on first use instead of as a global. We 52 // force the alignment to 64-bytes for x86 cache line alignment, as this 53 // structure is used in the hot path of implementation. 54 struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { 55 BufferQueue::Buffer Buffer{}; 56 BufferQueue *BQ = nullptr; 57 58 using LogWriterStorage = 59 typename std::aligned_storage<sizeof(FDRLogWriter), 60 alignof(FDRLogWriter)>::type; 61 62 LogWriterStorage LWStorage; 63 FDRLogWriter *Writer = nullptr; 64 65 using ControllerStorage = 66 typename std::aligned_storage<sizeof(FDRController<>), 67 alignof(FDRController<>)>::type; 68 ControllerStorage CStorage; 69 FDRController<> *Controller = nullptr; 70 }; 71 72 } // namespace 73 74 static_assert(std::is_trivially_destructible<ThreadLocalData>::value, 75 "ThreadLocalData must be trivially destructible"); 76 77 // Use a global pthread key to identify thread-local data for logging. 78 static pthread_key_t Key; 79 80 // Global BufferQueue. 81 static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage; 82 static BufferQueue *BQ = nullptr; 83 84 // Global thresholds for function durations. 85 static atomic_uint64_t ThresholdTicks{0}; 86 87 // Global for ticks per second. 88 static atomic_uint64_t TicksPerSec{0}; 89 90 static atomic_sint32_t LogFlushStatus = { 91 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; 92 93 // This function will initialize the thread-local data structure used by the FDR 94 // logging implementation and return a reference to it. The implementation 95 // details require a bit of care to maintain. 96 // 97 // First, some requirements on the implementation in general: 98 // 99 // - XRay handlers should not call any memory allocation routines that may 100 // delegate to an instrumented implementation. This means functions like 101 // malloc() and free() should not be called while instrumenting. 102 // 103 // - We would like to use some thread-local data initialized on first-use of 104 // the XRay instrumentation. These allow us to implement unsynchronized 105 // routines that access resources associated with the thread. 106 // 107 // The implementation here uses a few mechanisms that allow us to provide both 108 // the requirements listed above. We do this by: 109 // 110 // 1. Using a thread-local aligned storage buffer for representing the 111 // ThreadLocalData struct. This data will be uninitialized memory by 112 // design. 113 // 114 // 2. Not requiring a thread exit handler/implementation, keeping the 115 // thread-local as purely a collection of references/data that do not 116 // require cleanup. 117 // 118 // We're doing this to avoid using a `thread_local` object that has a 119 // non-trivial destructor, because the C++ runtime might call std::malloc(...) 120 // to register calls to destructors. Deadlocks may arise when, for example, an 121 // externally provided malloc implementation is XRay instrumented, and 122 // initializing the thread-locals involves calling into malloc. A malloc 123 // implementation that does global synchronization might be holding a lock for a 124 // critical section, calling a function that might be XRay instrumented (and 125 // thus in turn calling into malloc by virtue of registration of the 126 // thread_local's destructor). 127 #if XRAY_HAS_TLS_ALIGNAS 128 static_assert(alignof(ThreadLocalData) >= 64, 129 "ThreadLocalData must be cache line aligned."); 130 #endif 131 static ThreadLocalData &getThreadLocalData() { 132 thread_local typename std::aligned_storage< 133 sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{}; 134 135 if (pthread_getspecific(Key) == NULL) { 136 new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{}; 137 pthread_setspecific(Key, &TLDStorage); 138 } 139 140 return *reinterpret_cast<ThreadLocalData *>(&TLDStorage); 141 } 142 143 static XRayFileHeader &fdrCommonHeaderInfo() { 144 alignas(XRayFileHeader) static std::byte HStorage[sizeof(XRayFileHeader)]; 145 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; 146 static bool TSCSupported = true; 147 static uint64_t CycleFrequency = NanosecondsPerSecond; 148 pthread_once( 149 &OnceInit, +[] { 150 XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage); 151 // Version 2 of the log writes the extents of the buffer, instead of 152 // relying on an end-of-buffer record. 153 // Version 3 includes PID metadata record. 154 // Version 4 includes CPU data in the custom event records. 155 // Version 5 uses relative deltas for custom and typed event records, 156 // and removes the CPU data in custom event records (similar to how 157 // function records use deltas instead of full TSCs and rely on other 158 // metadata records for TSC wraparound and CPU migration). 159 H.Version = 5; 160 H.Type = FileTypes::FDR_LOG; 161 162 // Test for required CPU features and cache the cycle frequency 163 TSCSupported = probeRequiredCPUFeatures(); 164 if (TSCSupported) 165 CycleFrequency = getTSCFrequency(); 166 H.CycleFrequency = CycleFrequency; 167 168 // FIXME: Actually check whether we have 'constant_tsc' and 169 // 'nonstop_tsc' before setting the values in the header. 170 H.ConstantTSC = 1; 171 H.NonstopTSC = 1; 172 }); 173 return reinterpret_cast<XRayFileHeader &>(HStorage); 174 } 175 176 // This is the iterator implementation, which knows how to handle FDR-mode 177 // specific buffers. This is used as an implementation of the iterator function 178 // needed by __xray_set_buffer_iterator(...). It maintains a global state of the 179 // buffer iteration for the currently installed FDR mode buffers. In particular: 180 // 181 // - If the argument represents the initial state of XRayBuffer ({nullptr, 0}) 182 // then the iterator returns the header information. 183 // - If the argument represents the header information ({address of header 184 // info, size of the header info}) then it returns the first FDR buffer's 185 // address and extents. 186 // - It will keep returning the next buffer and extents as there are more 187 // buffers to process. When the input represents the last buffer, it will 188 // return the initial state to signal completion ({nullptr, 0}). 189 // 190 // See xray/xray_log_interface.h for more details on the requirements for the 191 // implementations of __xray_set_buffer_iterator(...) and 192 // __xray_log_process_buffers(...). 193 XRayBuffer fdrIterator(const XRayBuffer B) { 194 DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0); 195 DCHECK(BQ->finalizing()); 196 197 if (BQ == nullptr || !BQ->finalizing()) { 198 if (Verbosity()) 199 Report( 200 "XRay FDR: Failed global buffer queue is null or not finalizing!\n"); 201 return {nullptr, 0}; 202 } 203 204 // We use a global scratch-pad for the header information, which only gets 205 // initialized the first time this function is called. We'll update one part 206 // of this information with some relevant data (in particular the number of 207 // buffers to expect). 208 alignas( 209 XRayFileHeader) static std::byte HeaderStorage[sizeof(XRayFileHeader)]; 210 static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT; 211 pthread_once( 212 &HeaderOnce, +[] { 213 reinterpret_cast<XRayFileHeader &>(HeaderStorage) = 214 fdrCommonHeaderInfo(); 215 }); 216 217 // We use a convenience alias for code referring to Header from here on out. 218 auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage); 219 if (B.Data == nullptr && B.Size == 0) { 220 Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; 221 return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)}; 222 } 223 224 static BufferQueue::const_iterator It{}; 225 static BufferQueue::const_iterator End{}; 226 static uint8_t *CurrentBuffer{nullptr}; 227 static size_t SerializedBufferSize = 0; 228 if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) { 229 // From this point on, we provide raw access to the raw buffer we're getting 230 // from the BufferQueue. We're relying on the iterators from the current 231 // Buffer queue. 232 It = BQ->cbegin(); 233 End = BQ->cend(); 234 } 235 236 if (CurrentBuffer != nullptr) { 237 deallocateBuffer(CurrentBuffer, SerializedBufferSize); 238 CurrentBuffer = nullptr; 239 } 240 241 if (It == End) 242 return {nullptr, 0}; 243 244 // Set up the current buffer to contain the extents like we would when writing 245 // out to disk. The difference here would be that we still write "empty" 246 // buffers, or at least go through the iterators faithfully to let the 247 // handlers see the empty buffers in the queue. 248 // 249 // We need this atomic fence here to ensure that writes happening to the 250 // buffer have been committed before we load the extents atomically. Because 251 // the buffer is not explicitly synchronised across threads, we rely on the 252 // fence ordering to ensure that writes we expect to have been completed 253 // before the fence are fully committed before we read the extents. 254 atomic_thread_fence(memory_order_acquire); 255 auto BufferSize = atomic_load(It->Extents, memory_order_acquire); 256 SerializedBufferSize = BufferSize + sizeof(MetadataRecord); 257 CurrentBuffer = allocateBuffer(SerializedBufferSize); 258 if (CurrentBuffer == nullptr) 259 return {nullptr, 0}; 260 261 // Write out the extents as a Metadata Record into the CurrentBuffer. 262 MetadataRecord ExtentsRecord; 263 ExtentsRecord.Type = uint8_t(RecordType::Metadata); 264 ExtentsRecord.RecordKind = 265 uint8_t(MetadataRecord::RecordKinds::BufferExtents); 266 internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize)); 267 auto AfterExtents = 268 static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord, 269 sizeof(MetadataRecord))) + 270 sizeof(MetadataRecord); 271 internal_memcpy(AfterExtents, It->Data, BufferSize); 272 273 XRayBuffer Result; 274 Result.Data = CurrentBuffer; 275 Result.Size = SerializedBufferSize; 276 ++It; 277 return Result; 278 } 279 280 // Must finalize before flushing. 281 XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { 282 if (atomic_load(&LoggingStatus, memory_order_acquire) != 283 XRayLogInitStatus::XRAY_LOG_FINALIZED) { 284 if (Verbosity()) 285 Report("Not flushing log, implementation is not finalized.\n"); 286 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 287 } 288 289 if (atomic_exchange(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHING, 290 memory_order_release) == 291 XRayLogFlushStatus::XRAY_LOG_FLUSHING) { 292 if (Verbosity()) 293 Report("Not flushing log, implementation is still flushing.\n"); 294 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 295 } 296 297 if (BQ == nullptr) { 298 if (Verbosity()) 299 Report("Cannot flush when global buffer queue is null.\n"); 300 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 301 } 302 303 // We wait a number of milliseconds to allow threads to see that we've 304 // finalised before attempting to flush the log. 305 SleepForMillis(fdrFlags()->grace_period_ms); 306 307 // At this point, we're going to uninstall the iterator implementation, before 308 // we decide to do anything further with the global buffer queue. 309 __xray_log_remove_buffer_iterator(); 310 311 // Once flushed, we should set the global status of the logging implementation 312 // to "uninitialized" to allow for FDR-logging multiple runs. 313 auto ResetToUnitialized = at_scope_exit([] { 314 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 315 memory_order_release); 316 }); 317 318 auto CleanupBuffers = at_scope_exit([] { 319 auto &TLD = getThreadLocalData(); 320 if (TLD.Controller != nullptr) 321 TLD.Controller->flush(); 322 }); 323 324 if (fdrFlags()->no_file_flush) { 325 if (Verbosity()) 326 Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n"); 327 328 atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, 329 memory_order_release); 330 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 331 } 332 333 // We write out the file in the following format: 334 // 335 // 1) We write down the XRay file header with version 1, type FDR_LOG. 336 // 2) Then we use the 'apply' member of the BufferQueue that's live, to 337 // ensure that at this point in time we write down the buffers that have 338 // been released (and marked "used") -- we dump the full buffer for now 339 // (fixed-sized) and let the tools reading the buffers deal with the data 340 // afterwards. 341 // 342 LogWriter *LW = LogWriter::Open(); 343 if (LW == nullptr) { 344 auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 345 atomic_store(&LogFlushStatus, Result, memory_order_release); 346 return Result; 347 } 348 349 XRayFileHeader Header = fdrCommonHeaderInfo(); 350 Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; 351 LW->WriteAll(reinterpret_cast<char *>(&Header), 352 reinterpret_cast<char *>(&Header) + sizeof(Header)); 353 354 // Release the current thread's buffer before we attempt to write out all the 355 // buffers. This ensures that in case we had only a single thread going, that 356 // we are able to capture the data nonetheless. 357 auto &TLD = getThreadLocalData(); 358 if (TLD.Controller != nullptr) 359 TLD.Controller->flush(); 360 361 BQ->apply([&](const BufferQueue::Buffer &B) { 362 // Starting at version 2 of the FDR logging implementation, we only write 363 // the records identified by the extents of the buffer. We use the Extents 364 // from the Buffer and write that out as the first record in the buffer. We 365 // still use a Metadata record, but fill in the extents instead for the 366 // data. 367 MetadataRecord ExtentsRecord; 368 auto BufferExtents = atomic_load(B.Extents, memory_order_acquire); 369 DCHECK(BufferExtents <= B.Size); 370 ExtentsRecord.Type = uint8_t(RecordType::Metadata); 371 ExtentsRecord.RecordKind = 372 uint8_t(MetadataRecord::RecordKinds::BufferExtents); 373 internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents)); 374 if (BufferExtents > 0) { 375 LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord), 376 reinterpret_cast<char *>(&ExtentsRecord) + 377 sizeof(MetadataRecord)); 378 LW->WriteAll(reinterpret_cast<char *>(B.Data), 379 reinterpret_cast<char *>(B.Data) + BufferExtents); 380 } 381 }); 382 383 atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, 384 memory_order_release); 385 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 386 } 387 388 XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT { 389 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; 390 if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, 391 XRayLogInitStatus::XRAY_LOG_FINALIZING, 392 memory_order_release)) { 393 if (Verbosity()) 394 Report("Cannot finalize log, implementation not initialized.\n"); 395 return static_cast<XRayLogInitStatus>(CurrentStatus); 396 } 397 398 // Do special things to make the log finalize itself, and not allow any more 399 // operations to be performed until re-initialized. 400 if (BQ == nullptr) { 401 if (Verbosity()) 402 Report("Attempting to finalize an uninitialized global buffer!\n"); 403 } else { 404 BQ->finalize(); 405 } 406 407 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, 408 memory_order_release); 409 return XRayLogInitStatus::XRAY_LOG_FINALIZED; 410 } 411 412 struct TSCAndCPU { 413 uint64_t TSC = 0; 414 unsigned char CPU = 0; 415 }; 416 417 static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT { 418 // We want to get the TSC as early as possible, so that we can check whether 419 // we've seen this CPU before. We also do it before we load anything else, 420 // to allow for forward progress with the scheduling. 421 TSCAndCPU Result; 422 423 // Test once for required CPU features 424 static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT; 425 static bool TSCSupported = true; 426 pthread_once( 427 &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); }); 428 429 if (TSCSupported) { 430 Result.TSC = __xray::readTSC(Result.CPU); 431 } else { 432 // FIXME: This code needs refactoring as it appears in multiple locations 433 timespec TS; 434 int result = clock_gettime(CLOCK_REALTIME, &TS); 435 if (result != 0) { 436 Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); 437 TS = {0, 0}; 438 } 439 Result.CPU = 0; 440 Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; 441 } 442 return Result; 443 } 444 445 thread_local atomic_uint8_t Running{0}; 446 447 static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT { 448 // Check if we're finalizing, before proceeding. 449 { 450 auto Status = atomic_load(&LoggingStatus, memory_order_acquire); 451 if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || 452 Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) { 453 if (TLD.Controller != nullptr) { 454 TLD.Controller->flush(); 455 TLD.Controller = nullptr; 456 } 457 return false; 458 } 459 } 460 461 if (UNLIKELY(TLD.Controller == nullptr)) { 462 // Set up the TLD buffer queue. 463 if (UNLIKELY(BQ == nullptr)) 464 return false; 465 TLD.BQ = BQ; 466 467 // Check that we have a valid buffer. 468 if (TLD.Buffer.Generation != BQ->generation() && 469 TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) 470 return false; 471 472 // Set up a buffer, before setting up the log writer. Bail out on failure. 473 if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) 474 return false; 475 476 // Set up the Log Writer for this thread. 477 if (UNLIKELY(TLD.Writer == nullptr)) { 478 auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage); 479 new (LWStorage) FDRLogWriter(TLD.Buffer); 480 TLD.Writer = LWStorage; 481 } else { 482 TLD.Writer->resetRecord(); 483 } 484 485 auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage); 486 new (CStorage) 487 FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime, 488 atomic_load_relaxed(&ThresholdTicks)); 489 TLD.Controller = CStorage; 490 } 491 492 DCHECK_NE(TLD.Controller, nullptr); 493 return true; 494 } 495 496 void fdrLoggingHandleArg0(int32_t FuncId, 497 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { 498 auto TC = getTimestamp(); 499 auto &TSC = TC.TSC; 500 auto &CPU = TC.CPU; 501 RecursionGuard Guard{Running}; 502 if (!Guard) 503 return; 504 505 auto &TLD = getThreadLocalData(); 506 if (!setupTLD(TLD)) 507 return; 508 509 switch (Entry) { 510 case XRayEntryType::ENTRY: 511 case XRayEntryType::LOG_ARGS_ENTRY: 512 TLD.Controller->functionEnter(FuncId, TSC, CPU); 513 return; 514 case XRayEntryType::EXIT: 515 TLD.Controller->functionExit(FuncId, TSC, CPU); 516 return; 517 case XRayEntryType::TAIL: 518 TLD.Controller->functionTailExit(FuncId, TSC, CPU); 519 return; 520 case XRayEntryType::CUSTOM_EVENT: 521 case XRayEntryType::TYPED_EVENT: 522 break; 523 } 524 } 525 526 void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, 527 uint64_t Arg) XRAY_NEVER_INSTRUMENT { 528 auto TC = getTimestamp(); 529 auto &TSC = TC.TSC; 530 auto &CPU = TC.CPU; 531 RecursionGuard Guard{Running}; 532 if (!Guard) 533 return; 534 535 auto &TLD = getThreadLocalData(); 536 if (!setupTLD(TLD)) 537 return; 538 539 switch (Entry) { 540 case XRayEntryType::ENTRY: 541 case XRayEntryType::LOG_ARGS_ENTRY: 542 TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg); 543 return; 544 case XRayEntryType::EXIT: 545 TLD.Controller->functionExit(FuncId, TSC, CPU); 546 return; 547 case XRayEntryType::TAIL: 548 TLD.Controller->functionTailExit(FuncId, TSC, CPU); 549 return; 550 case XRayEntryType::CUSTOM_EVENT: 551 case XRayEntryType::TYPED_EVENT: 552 break; 553 } 554 } 555 556 void fdrLoggingHandleCustomEvent(void *Event, 557 std::size_t EventSize) XRAY_NEVER_INSTRUMENT { 558 auto TC = getTimestamp(); 559 auto &TSC = TC.TSC; 560 auto &CPU = TC.CPU; 561 RecursionGuard Guard{Running}; 562 if (!Guard) 563 return; 564 565 // Complain when we ever get at least one custom event that's larger than what 566 // we can possibly support. 567 if (EventSize > 568 static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { 569 static pthread_once_t Once = PTHREAD_ONCE_INIT; 570 pthread_once( 571 &Once, +[] { 572 Report("Custom event size too large; truncating to %d.\n", 573 std::numeric_limits<int32_t>::max()); 574 }); 575 } 576 577 auto &TLD = getThreadLocalData(); 578 if (!setupTLD(TLD)) 579 return; 580 581 int32_t ReducedEventSize = static_cast<int32_t>(EventSize); 582 TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize); 583 } 584 585 void fdrLoggingHandleTypedEvent(size_t EventType, const void *Event, 586 size_t EventSize) noexcept 587 XRAY_NEVER_INSTRUMENT { 588 auto TC = getTimestamp(); 589 auto &TSC = TC.TSC; 590 auto &CPU = TC.CPU; 591 RecursionGuard Guard{Running}; 592 if (!Guard) 593 return; 594 595 // Complain when we ever get at least one typed event that's larger than what 596 // we can possibly support. 597 if (EventSize > 598 static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { 599 static pthread_once_t Once = PTHREAD_ONCE_INIT; 600 pthread_once( 601 &Once, +[] { 602 Report("Typed event size too large; truncating to %d.\n", 603 std::numeric_limits<int32_t>::max()); 604 }); 605 } 606 607 auto &TLD = getThreadLocalData(); 608 if (!setupTLD(TLD)) 609 return; 610 611 int32_t ReducedEventSize = static_cast<int32_t>(EventSize); 612 TLD.Controller->typedEvent(TSC, CPU, static_cast<uint16_t>(EventType), Event, 613 ReducedEventSize); 614 } 615 616 XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options, 617 size_t OptionsSize) XRAY_NEVER_INSTRUMENT { 618 if (Options == nullptr) 619 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 620 621 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 622 if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, 623 XRayLogInitStatus::XRAY_LOG_INITIALIZING, 624 memory_order_release)) { 625 if (Verbosity()) 626 Report("Cannot initialize already initialized implementation.\n"); 627 return static_cast<XRayLogInitStatus>(CurrentStatus); 628 } 629 630 if (Verbosity()) 631 Report("Initializing FDR mode with options: %s\n", 632 static_cast<const char *>(Options)); 633 634 // TODO: Factor out the flags specific to the FDR mode implementation. For 635 // now, use the global/single definition of the flags, since the FDR mode 636 // flags are already defined there. 637 FlagParser FDRParser; 638 FDRFlags FDRFlags; 639 registerXRayFDRFlags(&FDRParser, &FDRFlags); 640 FDRFlags.setDefaults(); 641 642 // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided 643 // options until we migrate everyone to use the XRAY_FDR_OPTIONS 644 // compiler-provided options. 645 FDRParser.ParseString(useCompilerDefinedFlags()); 646 FDRParser.ParseString(useCompilerDefinedFDRFlags()); 647 auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS"); 648 if (EnvOpts == nullptr) 649 EnvOpts = ""; 650 FDRParser.ParseString(EnvOpts); 651 652 // FIXME: Remove this when we fully remove the deprecated flags. 653 if (internal_strlen(EnvOpts) == 0) { 654 FDRFlags.func_duration_threshold_us = 655 flags()->xray_fdr_log_func_duration_threshold_us; 656 FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; 657 } 658 659 // The provided options should always override the compiler-provided and 660 // environment-variable defined options. 661 FDRParser.ParseString(static_cast<const char *>(Options)); 662 *fdrFlags() = FDRFlags; 663 auto BufferSize = FDRFlags.buffer_size; 664 auto BufferMax = FDRFlags.buffer_max; 665 666 if (BQ == nullptr) { 667 bool Success = false; 668 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); 669 new (BQ) BufferQueue(BufferSize, BufferMax, Success); 670 if (!Success) { 671 Report("BufferQueue init failed.\n"); 672 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 673 } 674 } else { 675 if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) { 676 if (Verbosity()) 677 Report("Failed to re-initialize global buffer queue. Init failed.\n"); 678 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 679 } 680 } 681 682 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; 683 pthread_once( 684 &OnceInit, +[] { 685 atomic_store(&TicksPerSec, 686 probeRequiredCPUFeatures() ? getTSCFrequency() 687 : __xray::NanosecondsPerSecond, 688 memory_order_release); 689 pthread_key_create( 690 &Key, +[](void *TLDPtr) { 691 if (TLDPtr == nullptr) 692 return; 693 auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr); 694 if (TLD.BQ == nullptr) 695 return; 696 if (TLD.Buffer.Data == nullptr) 697 return; 698 auto EC = TLD.BQ->releaseBuffer(TLD.Buffer); 699 if (EC != BufferQueue::ErrorCode::Ok) 700 Report("At thread exit, failed to release buffer at %p; " 701 "error=%s\n", 702 TLD.Buffer.Data, BufferQueue::getErrorString(EC)); 703 }); 704 }); 705 706 atomic_store(&ThresholdTicks, 707 atomic_load_relaxed(&TicksPerSec) * 708 fdrFlags()->func_duration_threshold_us / 1000000, 709 memory_order_release); 710 // Arg1 handler should go in first to avoid concurrent code accidentally 711 // falling back to arg0 when it should have ran arg1. 712 __xray_set_handler_arg1(fdrLoggingHandleArg1); 713 // Install the actual handleArg0 handler after initialising the buffers. 714 __xray_set_handler(fdrLoggingHandleArg0); 715 __xray_set_customevent_handler(fdrLoggingHandleCustomEvent); 716 __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent); 717 718 // Install the buffer iterator implementation. 719 __xray_log_set_buffer_iterator(fdrIterator); 720 721 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, 722 memory_order_release); 723 724 if (Verbosity()) 725 Report("XRay FDR init successful.\n"); 726 return XRayLogInitStatus::XRAY_LOG_INITIALIZED; 727 } 728 729 bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { 730 XRayLogImpl Impl{ 731 fdrLoggingInit, 732 fdrLoggingFinalize, 733 fdrLoggingHandleArg0, 734 fdrLoggingFlush, 735 }; 736 auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl); 737 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && 738 Verbosity()) { 739 Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n", 740 RegistrationResult); 741 return false; 742 } 743 744 if (flags()->xray_fdr_log || 745 !internal_strcmp(flags()->xray_mode, "xray-fdr")) { 746 auto SelectResult = __xray_log_select_mode("xray-fdr"); 747 if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && 748 Verbosity()) { 749 Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n", 750 SelectResult); 751 return false; 752 } 753 } 754 return true; 755 } 756 757 } // namespace __xray 758 759 static auto UNUSED Unused = __xray::fdrLogDynamicInitializer(); 760