1 //===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of XRay, a dynamic runtime instrumentation system. 10 // 11 // Here we implement the Flight Data Recorder mode for XRay, where we use 12 // compact structures to store records in memory as well as when writing out the 13 // data to files. 14 // 15 //===----------------------------------------------------------------------===// 16 #include "xray_fdr_logging.h" 17 #include <cassert> 18 #include <cstddef> 19 #include <errno.h> 20 #include <limits> 21 #include <memory> 22 #include <pthread.h> 23 #include <sys/time.h> 24 #include <time.h> 25 #include <unistd.h> 26 27 #include "sanitizer_common/sanitizer_allocator_internal.h" 28 #include "sanitizer_common/sanitizer_atomic.h" 29 #include "sanitizer_common/sanitizer_common.h" 30 #include "xray/xray_interface.h" 31 #include "xray/xray_records.h" 32 #include "xray_allocator.h" 33 #include "xray_buffer_queue.h" 34 #include "xray_defs.h" 35 #include "xray_fdr_controller.h" 36 #include "xray_fdr_flags.h" 37 #include "xray_fdr_log_writer.h" 38 #include "xray_flags.h" 39 #include "xray_recursion_guard.h" 40 #include "xray_tsc.h" 41 #include "xray_utils.h" 42 43 namespace __xray { 44 45 static atomic_sint32_t LoggingStatus = { 46 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; 47 48 namespace { 49 50 // Group together thread-local-data in a struct, then hide it behind a function 51 // call so that it can be initialized on first use instead of as a global. We 52 // force the alignment to 64-bytes for x86 cache line alignment, as this 53 // structure is used in the hot path of implementation. 54 struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { 55 BufferQueue::Buffer Buffer{}; 56 BufferQueue *BQ = nullptr; 57 58 using LogWriterStorage = std::byte[sizeof(FDRLogWriter)]; 59 alignas(FDRLogWriter) LogWriterStorage LWStorage; 60 FDRLogWriter *Writer = nullptr; 61 62 using ControllerStorage = std::byte[sizeof(FDRController<>)]; 63 alignas(FDRController<>) ControllerStorage CStorage; 64 FDRController<> *Controller = nullptr; 65 }; 66 67 } // namespace 68 69 static_assert(std::is_trivially_destructible<ThreadLocalData>::value, 70 "ThreadLocalData must be trivially destructible"); 71 72 // Use a global pthread key to identify thread-local data for logging. 73 static pthread_key_t Key; 74 75 // Global BufferQueue. 76 static std::byte BufferQueueStorage[sizeof(BufferQueue)]; 77 static BufferQueue *BQ = nullptr; 78 79 // Global thresholds for function durations. 80 static atomic_uint64_t ThresholdTicks{0}; 81 82 // Global for ticks per second. 83 static atomic_uint64_t TicksPerSec{0}; 84 85 static atomic_sint32_t LogFlushStatus = { 86 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; 87 88 // This function will initialize the thread-local data structure used by the FDR 89 // logging implementation and return a reference to it. The implementation 90 // details require a bit of care to maintain. 91 // 92 // First, some requirements on the implementation in general: 93 // 94 // - XRay handlers should not call any memory allocation routines that may 95 // delegate to an instrumented implementation. This means functions like 96 // malloc() and free() should not be called while instrumenting. 97 // 98 // - We would like to use some thread-local data initialized on first-use of 99 // the XRay instrumentation. These allow us to implement unsynchronized 100 // routines that access resources associated with the thread. 101 // 102 // The implementation here uses a few mechanisms that allow us to provide both 103 // the requirements listed above. We do this by: 104 // 105 // 1. Using a thread-local aligned storage buffer for representing the 106 // ThreadLocalData struct. This data will be uninitialized memory by 107 // design. 108 // 109 // 2. Not requiring a thread exit handler/implementation, keeping the 110 // thread-local as purely a collection of references/data that do not 111 // require cleanup. 112 // 113 // We're doing this to avoid using a `thread_local` object that has a 114 // non-trivial destructor, because the C++ runtime might call std::malloc(...) 115 // to register calls to destructors. Deadlocks may arise when, for example, an 116 // externally provided malloc implementation is XRay instrumented, and 117 // initializing the thread-locals involves calling into malloc. A malloc 118 // implementation that does global synchronization might be holding a lock for a 119 // critical section, calling a function that might be XRay instrumented (and 120 // thus in turn calling into malloc by virtue of registration of the 121 // thread_local's destructor). 122 #if XRAY_HAS_TLS_ALIGNAS 123 static_assert(alignof(ThreadLocalData) >= 64, 124 "ThreadLocalData must be cache line aligned."); 125 #endif 126 static ThreadLocalData &getThreadLocalData() { 127 alignas(ThreadLocalData) thread_local std::byte 128 TLDStorage[sizeof(ThreadLocalData)]; 129 130 if (pthread_getspecific(Key) == NULL) { 131 new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{}; 132 pthread_setspecific(Key, &TLDStorage); 133 } 134 135 return *reinterpret_cast<ThreadLocalData *>(&TLDStorage); 136 } 137 138 static XRayFileHeader &fdrCommonHeaderInfo() { 139 alignas(XRayFileHeader) static std::byte HStorage[sizeof(XRayFileHeader)]; 140 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; 141 static bool TSCSupported = true; 142 static uint64_t CycleFrequency = NanosecondsPerSecond; 143 pthread_once( 144 &OnceInit, +[] { 145 XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage); 146 // Version 2 of the log writes the extents of the buffer, instead of 147 // relying on an end-of-buffer record. 148 // Version 3 includes PID metadata record. 149 // Version 4 includes CPU data in the custom event records. 150 // Version 5 uses relative deltas for custom and typed event records, 151 // and removes the CPU data in custom event records (similar to how 152 // function records use deltas instead of full TSCs and rely on other 153 // metadata records for TSC wraparound and CPU migration). 154 H.Version = 5; 155 H.Type = FileTypes::FDR_LOG; 156 157 // Test for required CPU features and cache the cycle frequency 158 TSCSupported = probeRequiredCPUFeatures(); 159 if (TSCSupported) 160 CycleFrequency = getTSCFrequency(); 161 H.CycleFrequency = CycleFrequency; 162 163 // FIXME: Actually check whether we have 'constant_tsc' and 164 // 'nonstop_tsc' before setting the values in the header. 165 H.ConstantTSC = 1; 166 H.NonstopTSC = 1; 167 }); 168 return reinterpret_cast<XRayFileHeader &>(HStorage); 169 } 170 171 // This is the iterator implementation, which knows how to handle FDR-mode 172 // specific buffers. This is used as an implementation of the iterator function 173 // needed by __xray_set_buffer_iterator(...). It maintains a global state of the 174 // buffer iteration for the currently installed FDR mode buffers. In particular: 175 // 176 // - If the argument represents the initial state of XRayBuffer ({nullptr, 0}) 177 // then the iterator returns the header information. 178 // - If the argument represents the header information ({address of header 179 // info, size of the header info}) then it returns the first FDR buffer's 180 // address and extents. 181 // - It will keep returning the next buffer and extents as there are more 182 // buffers to process. When the input represents the last buffer, it will 183 // return the initial state to signal completion ({nullptr, 0}). 184 // 185 // See xray/xray_log_interface.h for more details on the requirements for the 186 // implementations of __xray_set_buffer_iterator(...) and 187 // __xray_log_process_buffers(...). 188 XRayBuffer fdrIterator(const XRayBuffer B) { 189 DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0); 190 DCHECK(BQ->finalizing()); 191 192 if (BQ == nullptr || !BQ->finalizing()) { 193 if (Verbosity()) 194 Report( 195 "XRay FDR: Failed global buffer queue is null or not finalizing!\n"); 196 return {nullptr, 0}; 197 } 198 199 // We use a global scratch-pad for the header information, which only gets 200 // initialized the first time this function is called. We'll update one part 201 // of this information with some relevant data (in particular the number of 202 // buffers to expect). 203 alignas( 204 XRayFileHeader) static std::byte HeaderStorage[sizeof(XRayFileHeader)]; 205 static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT; 206 pthread_once( 207 &HeaderOnce, +[] { 208 reinterpret_cast<XRayFileHeader &>(HeaderStorage) = 209 fdrCommonHeaderInfo(); 210 }); 211 212 // We use a convenience alias for code referring to Header from here on out. 213 auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage); 214 if (B.Data == nullptr && B.Size == 0) { 215 Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; 216 return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)}; 217 } 218 219 static BufferQueue::const_iterator It{}; 220 static BufferQueue::const_iterator End{}; 221 static uint8_t *CurrentBuffer{nullptr}; 222 static size_t SerializedBufferSize = 0; 223 if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) { 224 // From this point on, we provide raw access to the raw buffer we're getting 225 // from the BufferQueue. We're relying on the iterators from the current 226 // Buffer queue. 227 It = BQ->cbegin(); 228 End = BQ->cend(); 229 } 230 231 if (CurrentBuffer != nullptr) { 232 deallocateBuffer(CurrentBuffer, SerializedBufferSize); 233 CurrentBuffer = nullptr; 234 } 235 236 if (It == End) 237 return {nullptr, 0}; 238 239 // Set up the current buffer to contain the extents like we would when writing 240 // out to disk. The difference here would be that we still write "empty" 241 // buffers, or at least go through the iterators faithfully to let the 242 // handlers see the empty buffers in the queue. 243 // 244 // We need this atomic fence here to ensure that writes happening to the 245 // buffer have been committed before we load the extents atomically. Because 246 // the buffer is not explicitly synchronised across threads, we rely on the 247 // fence ordering to ensure that writes we expect to have been completed 248 // before the fence are fully committed before we read the extents. 249 atomic_thread_fence(memory_order_acquire); 250 auto BufferSize = atomic_load(It->Extents, memory_order_acquire); 251 SerializedBufferSize = BufferSize + sizeof(MetadataRecord); 252 CurrentBuffer = allocateBuffer(SerializedBufferSize); 253 if (CurrentBuffer == nullptr) 254 return {nullptr, 0}; 255 256 // Write out the extents as a Metadata Record into the CurrentBuffer. 257 MetadataRecord ExtentsRecord; 258 ExtentsRecord.Type = uint8_t(RecordType::Metadata); 259 ExtentsRecord.RecordKind = 260 uint8_t(MetadataRecord::RecordKinds::BufferExtents); 261 internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize)); 262 auto AfterExtents = 263 static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord, 264 sizeof(MetadataRecord))) + 265 sizeof(MetadataRecord); 266 internal_memcpy(AfterExtents, It->Data, BufferSize); 267 268 XRayBuffer Result; 269 Result.Data = CurrentBuffer; 270 Result.Size = SerializedBufferSize; 271 ++It; 272 return Result; 273 } 274 275 // Must finalize before flushing. 276 XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { 277 if (atomic_load(&LoggingStatus, memory_order_acquire) != 278 XRayLogInitStatus::XRAY_LOG_FINALIZED) { 279 if (Verbosity()) 280 Report("Not flushing log, implementation is not finalized.\n"); 281 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 282 } 283 284 if (atomic_exchange(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHING, 285 memory_order_release) == 286 XRayLogFlushStatus::XRAY_LOG_FLUSHING) { 287 if (Verbosity()) 288 Report("Not flushing log, implementation is still flushing.\n"); 289 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 290 } 291 292 if (BQ == nullptr) { 293 if (Verbosity()) 294 Report("Cannot flush when global buffer queue is null.\n"); 295 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 296 } 297 298 // We wait a number of milliseconds to allow threads to see that we've 299 // finalised before attempting to flush the log. 300 SleepForMillis(fdrFlags()->grace_period_ms); 301 302 // At this point, we're going to uninstall the iterator implementation, before 303 // we decide to do anything further with the global buffer queue. 304 __xray_log_remove_buffer_iterator(); 305 306 // Once flushed, we should set the global status of the logging implementation 307 // to "uninitialized" to allow for FDR-logging multiple runs. 308 auto ResetToUnitialized = at_scope_exit([] { 309 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, 310 memory_order_release); 311 }); 312 313 auto CleanupBuffers = at_scope_exit([] { 314 auto &TLD = getThreadLocalData(); 315 if (TLD.Controller != nullptr) 316 TLD.Controller->flush(); 317 }); 318 319 if (fdrFlags()->no_file_flush) { 320 if (Verbosity()) 321 Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n"); 322 323 atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, 324 memory_order_release); 325 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 326 } 327 328 // We write out the file in the following format: 329 // 330 // 1) We write down the XRay file header with version 1, type FDR_LOG. 331 // 2) Then we use the 'apply' member of the BufferQueue that's live, to 332 // ensure that at this point in time we write down the buffers that have 333 // been released (and marked "used") -- we dump the full buffer for now 334 // (fixed-sized) and let the tools reading the buffers deal with the data 335 // afterwards. 336 // 337 LogWriter *LW = LogWriter::Open(); 338 if (LW == nullptr) { 339 auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; 340 atomic_store(&LogFlushStatus, Result, memory_order_release); 341 return Result; 342 } 343 344 XRayFileHeader Header = fdrCommonHeaderInfo(); 345 Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; 346 LW->WriteAll(reinterpret_cast<char *>(&Header), 347 reinterpret_cast<char *>(&Header) + sizeof(Header)); 348 349 // Release the current thread's buffer before we attempt to write out all the 350 // buffers. This ensures that in case we had only a single thread going, that 351 // we are able to capture the data nonetheless. 352 auto &TLD = getThreadLocalData(); 353 if (TLD.Controller != nullptr) 354 TLD.Controller->flush(); 355 356 BQ->apply([&](const BufferQueue::Buffer &B) { 357 // Starting at version 2 of the FDR logging implementation, we only write 358 // the records identified by the extents of the buffer. We use the Extents 359 // from the Buffer and write that out as the first record in the buffer. We 360 // still use a Metadata record, but fill in the extents instead for the 361 // data. 362 MetadataRecord ExtentsRecord; 363 auto BufferExtents = atomic_load(B.Extents, memory_order_acquire); 364 DCHECK(BufferExtents <= B.Size); 365 ExtentsRecord.Type = uint8_t(RecordType::Metadata); 366 ExtentsRecord.RecordKind = 367 uint8_t(MetadataRecord::RecordKinds::BufferExtents); 368 internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents)); 369 if (BufferExtents > 0) { 370 LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord), 371 reinterpret_cast<char *>(&ExtentsRecord) + 372 sizeof(MetadataRecord)); 373 LW->WriteAll(reinterpret_cast<char *>(B.Data), 374 reinterpret_cast<char *>(B.Data) + BufferExtents); 375 } 376 }); 377 378 atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, 379 memory_order_release); 380 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 381 } 382 383 XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT { 384 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; 385 if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, 386 XRayLogInitStatus::XRAY_LOG_FINALIZING, 387 memory_order_release)) { 388 if (Verbosity()) 389 Report("Cannot finalize log, implementation not initialized.\n"); 390 return static_cast<XRayLogInitStatus>(CurrentStatus); 391 } 392 393 // Do special things to make the log finalize itself, and not allow any more 394 // operations to be performed until re-initialized. 395 if (BQ == nullptr) { 396 if (Verbosity()) 397 Report("Attempting to finalize an uninitialized global buffer!\n"); 398 } else { 399 BQ->finalize(); 400 } 401 402 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, 403 memory_order_release); 404 return XRayLogInitStatus::XRAY_LOG_FINALIZED; 405 } 406 407 struct TSCAndCPU { 408 uint64_t TSC = 0; 409 unsigned char CPU = 0; 410 }; 411 412 static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT { 413 // We want to get the TSC as early as possible, so that we can check whether 414 // we've seen this CPU before. We also do it before we load anything else, 415 // to allow for forward progress with the scheduling. 416 TSCAndCPU Result; 417 418 // Test once for required CPU features 419 static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT; 420 static bool TSCSupported = true; 421 pthread_once( 422 &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); }); 423 424 if (TSCSupported) { 425 Result.TSC = __xray::readTSC(Result.CPU); 426 } else { 427 // FIXME: This code needs refactoring as it appears in multiple locations 428 timespec TS; 429 int result = clock_gettime(CLOCK_REALTIME, &TS); 430 if (result != 0) { 431 Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); 432 TS = {0, 0}; 433 } 434 Result.CPU = 0; 435 Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; 436 } 437 return Result; 438 } 439 440 thread_local atomic_uint8_t Running{0}; 441 442 static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT { 443 // Check if we're finalizing, before proceeding. 444 { 445 auto Status = atomic_load(&LoggingStatus, memory_order_acquire); 446 if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || 447 Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) { 448 if (TLD.Controller != nullptr) { 449 TLD.Controller->flush(); 450 TLD.Controller = nullptr; 451 } 452 return false; 453 } 454 } 455 456 if (UNLIKELY(TLD.Controller == nullptr)) { 457 // Set up the TLD buffer queue. 458 if (UNLIKELY(BQ == nullptr)) 459 return false; 460 TLD.BQ = BQ; 461 462 // Check that we have a valid buffer. 463 if (TLD.Buffer.Generation != BQ->generation() && 464 TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) 465 return false; 466 467 // Set up a buffer, before setting up the log writer. Bail out on failure. 468 if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok) 469 return false; 470 471 // Set up the Log Writer for this thread. 472 if (UNLIKELY(TLD.Writer == nullptr)) { 473 auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage); 474 new (LWStorage) FDRLogWriter(TLD.Buffer); 475 TLD.Writer = LWStorage; 476 } else { 477 TLD.Writer->resetRecord(); 478 } 479 480 auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage); 481 new (CStorage) 482 FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime, 483 atomic_load_relaxed(&ThresholdTicks)); 484 TLD.Controller = CStorage; 485 } 486 487 DCHECK_NE(TLD.Controller, nullptr); 488 return true; 489 } 490 491 void fdrLoggingHandleArg0(int32_t FuncId, 492 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { 493 auto TC = getTimestamp(); 494 auto &TSC = TC.TSC; 495 auto &CPU = TC.CPU; 496 RecursionGuard Guard{Running}; 497 if (!Guard) 498 return; 499 500 auto &TLD = getThreadLocalData(); 501 if (!setupTLD(TLD)) 502 return; 503 504 switch (Entry) { 505 case XRayEntryType::ENTRY: 506 case XRayEntryType::LOG_ARGS_ENTRY: 507 TLD.Controller->functionEnter(FuncId, TSC, CPU); 508 return; 509 case XRayEntryType::EXIT: 510 TLD.Controller->functionExit(FuncId, TSC, CPU); 511 return; 512 case XRayEntryType::TAIL: 513 TLD.Controller->functionTailExit(FuncId, TSC, CPU); 514 return; 515 case XRayEntryType::CUSTOM_EVENT: 516 case XRayEntryType::TYPED_EVENT: 517 break; 518 } 519 } 520 521 void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, 522 uint64_t Arg) XRAY_NEVER_INSTRUMENT { 523 auto TC = getTimestamp(); 524 auto &TSC = TC.TSC; 525 auto &CPU = TC.CPU; 526 RecursionGuard Guard{Running}; 527 if (!Guard) 528 return; 529 530 auto &TLD = getThreadLocalData(); 531 if (!setupTLD(TLD)) 532 return; 533 534 switch (Entry) { 535 case XRayEntryType::ENTRY: 536 case XRayEntryType::LOG_ARGS_ENTRY: 537 TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg); 538 return; 539 case XRayEntryType::EXIT: 540 TLD.Controller->functionExit(FuncId, TSC, CPU); 541 return; 542 case XRayEntryType::TAIL: 543 TLD.Controller->functionTailExit(FuncId, TSC, CPU); 544 return; 545 case XRayEntryType::CUSTOM_EVENT: 546 case XRayEntryType::TYPED_EVENT: 547 break; 548 } 549 } 550 551 void fdrLoggingHandleCustomEvent(void *Event, 552 std::size_t EventSize) XRAY_NEVER_INSTRUMENT { 553 auto TC = getTimestamp(); 554 auto &TSC = TC.TSC; 555 auto &CPU = TC.CPU; 556 RecursionGuard Guard{Running}; 557 if (!Guard) 558 return; 559 560 // Complain when we ever get at least one custom event that's larger than what 561 // we can possibly support. 562 if (EventSize > 563 static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { 564 static pthread_once_t Once = PTHREAD_ONCE_INIT; 565 pthread_once( 566 &Once, +[] { 567 Report("Custom event size too large; truncating to %d.\n", 568 std::numeric_limits<int32_t>::max()); 569 }); 570 } 571 572 auto &TLD = getThreadLocalData(); 573 if (!setupTLD(TLD)) 574 return; 575 576 int32_t ReducedEventSize = static_cast<int32_t>(EventSize); 577 TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize); 578 } 579 580 void fdrLoggingHandleTypedEvent(size_t EventType, const void *Event, 581 size_t EventSize) noexcept 582 XRAY_NEVER_INSTRUMENT { 583 auto TC = getTimestamp(); 584 auto &TSC = TC.TSC; 585 auto &CPU = TC.CPU; 586 RecursionGuard Guard{Running}; 587 if (!Guard) 588 return; 589 590 // Complain when we ever get at least one typed event that's larger than what 591 // we can possibly support. 592 if (EventSize > 593 static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) { 594 static pthread_once_t Once = PTHREAD_ONCE_INIT; 595 pthread_once( 596 &Once, +[] { 597 Report("Typed event size too large; truncating to %d.\n", 598 std::numeric_limits<int32_t>::max()); 599 }); 600 } 601 602 auto &TLD = getThreadLocalData(); 603 if (!setupTLD(TLD)) 604 return; 605 606 int32_t ReducedEventSize = static_cast<int32_t>(EventSize); 607 TLD.Controller->typedEvent(TSC, CPU, static_cast<uint16_t>(EventType), Event, 608 ReducedEventSize); 609 } 610 611 XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options, 612 size_t OptionsSize) XRAY_NEVER_INSTRUMENT { 613 if (Options == nullptr) 614 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 615 616 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 617 if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus, 618 XRayLogInitStatus::XRAY_LOG_INITIALIZING, 619 memory_order_release)) { 620 if (Verbosity()) 621 Report("Cannot initialize already initialized implementation.\n"); 622 return static_cast<XRayLogInitStatus>(CurrentStatus); 623 } 624 625 if (Verbosity()) 626 Report("Initializing FDR mode with options: %s\n", 627 static_cast<const char *>(Options)); 628 629 // TODO: Factor out the flags specific to the FDR mode implementation. For 630 // now, use the global/single definition of the flags, since the FDR mode 631 // flags are already defined there. 632 FlagParser FDRParser; 633 FDRFlags FDRFlags; 634 registerXRayFDRFlags(&FDRParser, &FDRFlags); 635 FDRFlags.setDefaults(); 636 637 // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided 638 // options until we migrate everyone to use the XRAY_FDR_OPTIONS 639 // compiler-provided options. 640 FDRParser.ParseString(useCompilerDefinedFlags()); 641 FDRParser.ParseString(useCompilerDefinedFDRFlags()); 642 auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS"); 643 if (EnvOpts == nullptr) 644 EnvOpts = ""; 645 FDRParser.ParseString(EnvOpts); 646 647 // FIXME: Remove this when we fully remove the deprecated flags. 648 if (internal_strlen(EnvOpts) == 0) { 649 FDRFlags.func_duration_threshold_us = 650 flags()->xray_fdr_log_func_duration_threshold_us; 651 FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; 652 } 653 654 // The provided options should always override the compiler-provided and 655 // environment-variable defined options. 656 FDRParser.ParseString(static_cast<const char *>(Options)); 657 *fdrFlags() = FDRFlags; 658 auto BufferSize = FDRFlags.buffer_size; 659 auto BufferMax = FDRFlags.buffer_max; 660 661 if (BQ == nullptr) { 662 bool Success = false; 663 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage); 664 new (BQ) BufferQueue(BufferSize, BufferMax, Success); 665 if (!Success) { 666 Report("BufferQueue init failed.\n"); 667 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 668 } 669 } else { 670 if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) { 671 if (Verbosity()) 672 Report("Failed to re-initialize global buffer queue. Init failed.\n"); 673 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; 674 } 675 } 676 677 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; 678 pthread_once( 679 &OnceInit, +[] { 680 atomic_store(&TicksPerSec, 681 probeRequiredCPUFeatures() ? getTSCFrequency() 682 : __xray::NanosecondsPerSecond, 683 memory_order_release); 684 pthread_key_create( 685 &Key, +[](void *TLDPtr) { 686 if (TLDPtr == nullptr) 687 return; 688 auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr); 689 if (TLD.BQ == nullptr) 690 return; 691 if (TLD.Buffer.Data == nullptr) 692 return; 693 auto EC = TLD.BQ->releaseBuffer(TLD.Buffer); 694 if (EC != BufferQueue::ErrorCode::Ok) 695 Report("At thread exit, failed to release buffer at %p; " 696 "error=%s\n", 697 TLD.Buffer.Data, BufferQueue::getErrorString(EC)); 698 }); 699 }); 700 701 atomic_store(&ThresholdTicks, 702 atomic_load_relaxed(&TicksPerSec) * 703 fdrFlags()->func_duration_threshold_us / 1000000, 704 memory_order_release); 705 // Arg1 handler should go in first to avoid concurrent code accidentally 706 // falling back to arg0 when it should have ran arg1. 707 __xray_set_handler_arg1(fdrLoggingHandleArg1); 708 // Install the actual handleArg0 handler after initialising the buffers. 709 __xray_set_handler(fdrLoggingHandleArg0); 710 __xray_set_customevent_handler(fdrLoggingHandleCustomEvent); 711 __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent); 712 713 // Install the buffer iterator implementation. 714 __xray_log_set_buffer_iterator(fdrIterator); 715 716 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, 717 memory_order_release); 718 719 if (Verbosity()) 720 Report("XRay FDR init successful.\n"); 721 return XRayLogInitStatus::XRAY_LOG_INITIALIZED; 722 } 723 724 bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { 725 XRayLogImpl Impl{ 726 fdrLoggingInit, 727 fdrLoggingFinalize, 728 fdrLoggingHandleArg0, 729 fdrLoggingFlush, 730 }; 731 auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl); 732 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && 733 Verbosity()) { 734 Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n", 735 RegistrationResult); 736 return false; 737 } 738 739 if (flags()->xray_fdr_log || 740 !internal_strcmp(flags()->xray_mode, "xray-fdr")) { 741 auto SelectResult = __xray_log_select_mode("xray-fdr"); 742 if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && 743 Verbosity()) { 744 Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n", 745 SelectResult); 746 return false; 747 } 748 } 749 return true; 750 } 751 752 } // namespace __xray 753 754 static auto UNUSED Unused = __xray::fdrLogDynamicInitializer(); 755