xref: /freebsd/contrib/llvm-project/compiler-rt/lib/xray/xray_fdr_logging.cpp (revision 7fdf597e96a02165cfe22ff357b857d5fa15ed8a)
1 //===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of XRay, a dynamic runtime instrumentation system.
10 //
11 // Here we implement the Flight Data Recorder mode for XRay, where we use
12 // compact structures to store records in memory as well as when writing out the
13 // data to files.
14 //
15 //===----------------------------------------------------------------------===//
16 #include "xray_fdr_logging.h"
17 #include <cassert>
18 #include <cstddef>
19 #include <errno.h>
20 #include <limits>
21 #include <memory>
22 #include <pthread.h>
23 #include <sys/time.h>
24 #include <time.h>
25 #include <unistd.h>
26 
27 #include "sanitizer_common/sanitizer_allocator_internal.h"
28 #include "sanitizer_common/sanitizer_atomic.h"
29 #include "sanitizer_common/sanitizer_common.h"
30 #include "xray/xray_interface.h"
31 #include "xray/xray_records.h"
32 #include "xray_allocator.h"
33 #include "xray_buffer_queue.h"
34 #include "xray_defs.h"
35 #include "xray_fdr_controller.h"
36 #include "xray_fdr_flags.h"
37 #include "xray_fdr_log_writer.h"
38 #include "xray_flags.h"
39 #include "xray_recursion_guard.h"
40 #include "xray_tsc.h"
41 #include "xray_utils.h"
42 
43 namespace __xray {
44 
45 static atomic_sint32_t LoggingStatus = {
46     XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
47 
48 namespace {
49 
50 // Group together thread-local-data in a struct, then hide it behind a function
51 // call so that it can be initialized on first use instead of as a global. We
52 // force the alignment to 64-bytes for x86 cache line alignment, as this
53 // structure is used in the hot path of implementation.
54 struct XRAY_TLS_ALIGNAS(64) ThreadLocalData {
55   BufferQueue::Buffer Buffer{};
56   BufferQueue *BQ = nullptr;
57 
58   using LogWriterStorage = std::byte[sizeof(FDRLogWriter)];
59   alignas(FDRLogWriter) LogWriterStorage LWStorage;
60   FDRLogWriter *Writer = nullptr;
61 
62   using ControllerStorage = std::byte[sizeof(FDRController<>)];
63   alignas(FDRController<>) ControllerStorage CStorage;
64   FDRController<> *Controller = nullptr;
65 };
66 
67 } // namespace
68 
69 static_assert(std::is_trivially_destructible<ThreadLocalData>::value,
70               "ThreadLocalData must be trivially destructible");
71 
72 // Use a global pthread key to identify thread-local data for logging.
73 static pthread_key_t Key;
74 
75 // Global BufferQueue.
76 static std::byte BufferQueueStorage[sizeof(BufferQueue)];
77 static BufferQueue *BQ = nullptr;
78 
79 // Global thresholds for function durations.
80 static atomic_uint64_t ThresholdTicks{0};
81 
82 // Global for ticks per second.
83 static atomic_uint64_t TicksPerSec{0};
84 
85 static atomic_sint32_t LogFlushStatus = {
86     XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
87 
88 // This function will initialize the thread-local data structure used by the FDR
89 // logging implementation and return a reference to it. The implementation
90 // details require a bit of care to maintain.
91 //
92 // First, some requirements on the implementation in general:
93 //
94 //   - XRay handlers should not call any memory allocation routines that may
95 //     delegate to an instrumented implementation. This means functions like
96 //     malloc() and free() should not be called while instrumenting.
97 //
98 //   - We would like to use some thread-local data initialized on first-use of
99 //     the XRay instrumentation. These allow us to implement unsynchronized
100 //     routines that access resources associated with the thread.
101 //
102 // The implementation here uses a few mechanisms that allow us to provide both
103 // the requirements listed above. We do this by:
104 //
105 //   1. Using a thread-local aligned storage buffer for representing the
106 //      ThreadLocalData struct. This data will be uninitialized memory by
107 //      design.
108 //
109 //   2. Not requiring a thread exit handler/implementation, keeping the
110 //      thread-local as purely a collection of references/data that do not
111 //      require cleanup.
112 //
113 // We're doing this to avoid using a `thread_local` object that has a
114 // non-trivial destructor, because the C++ runtime might call std::malloc(...)
115 // to register calls to destructors. Deadlocks may arise when, for example, an
116 // externally provided malloc implementation is XRay instrumented, and
117 // initializing the thread-locals involves calling into malloc. A malloc
118 // implementation that does global synchronization might be holding a lock for a
119 // critical section, calling a function that might be XRay instrumented (and
120 // thus in turn calling into malloc by virtue of registration of the
121 // thread_local's destructor).
122 #if XRAY_HAS_TLS_ALIGNAS
123 static_assert(alignof(ThreadLocalData) >= 64,
124               "ThreadLocalData must be cache line aligned.");
125 #endif
126 static ThreadLocalData &getThreadLocalData() {
127   alignas(ThreadLocalData) thread_local std::byte
128       TLDStorage[sizeof(ThreadLocalData)];
129 
130   if (pthread_getspecific(Key) == NULL) {
131     new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{};
132     pthread_setspecific(Key, &TLDStorage);
133   }
134 
135   return *reinterpret_cast<ThreadLocalData *>(&TLDStorage);
136 }
137 
138 static XRayFileHeader &fdrCommonHeaderInfo() {
139   alignas(XRayFileHeader) static std::byte HStorage[sizeof(XRayFileHeader)];
140   static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
141   static bool TSCSupported = true;
142   static uint64_t CycleFrequency = NanosecondsPerSecond;
143   pthread_once(
144       &OnceInit, +[] {
145         XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage);
146         // Version 2 of the log writes the extents of the buffer, instead of
147         // relying on an end-of-buffer record.
148         // Version 3 includes PID metadata record.
149         // Version 4 includes CPU data in the custom event records.
150         // Version 5 uses relative deltas for custom and typed event records,
151         // and removes the CPU data in custom event records (similar to how
152         // function records use deltas instead of full TSCs and rely on other
153         // metadata records for TSC wraparound and CPU migration).
154         H.Version = 5;
155         H.Type = FileTypes::FDR_LOG;
156 
157         // Test for required CPU features and cache the cycle frequency
158         TSCSupported = probeRequiredCPUFeatures();
159         if (TSCSupported)
160           CycleFrequency = getTSCFrequency();
161         H.CycleFrequency = CycleFrequency;
162 
163         // FIXME: Actually check whether we have 'constant_tsc' and
164         // 'nonstop_tsc' before setting the values in the header.
165         H.ConstantTSC = 1;
166         H.NonstopTSC = 1;
167       });
168   return reinterpret_cast<XRayFileHeader &>(HStorage);
169 }
170 
171 // This is the iterator implementation, which knows how to handle FDR-mode
172 // specific buffers. This is used as an implementation of the iterator function
173 // needed by __xray_set_buffer_iterator(...). It maintains a global state of the
174 // buffer iteration for the currently installed FDR mode buffers. In particular:
175 //
176 //   - If the argument represents the initial state of XRayBuffer ({nullptr, 0})
177 //     then the iterator returns the header information.
178 //   - If the argument represents the header information ({address of header
179 //     info, size of the header info}) then it returns the first FDR buffer's
180 //     address and extents.
181 //   - It will keep returning the next buffer and extents as there are more
182 //     buffers to process. When the input represents the last buffer, it will
183 //     return the initial state to signal completion ({nullptr, 0}).
184 //
185 // See xray/xray_log_interface.h for more details on the requirements for the
186 // implementations of __xray_set_buffer_iterator(...) and
187 // __xray_log_process_buffers(...).
188 XRayBuffer fdrIterator(const XRayBuffer B) {
189   DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0);
190   DCHECK(BQ->finalizing());
191 
192   if (BQ == nullptr || !BQ->finalizing()) {
193     if (Verbosity())
194       Report(
195           "XRay FDR: Failed global buffer queue is null or not finalizing!\n");
196     return {nullptr, 0};
197   }
198 
199   // We use a global scratch-pad for the header information, which only gets
200   // initialized the first time this function is called. We'll update one part
201   // of this information with some relevant data (in particular the number of
202   // buffers to expect).
203   alignas(
204       XRayFileHeader) static std::byte HeaderStorage[sizeof(XRayFileHeader)];
205   static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT;
206   pthread_once(
207       &HeaderOnce, +[] {
208         reinterpret_cast<XRayFileHeader &>(HeaderStorage) =
209             fdrCommonHeaderInfo();
210       });
211 
212   // We use a convenience alias for code referring to Header from here on out.
213   auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage);
214   if (B.Data == nullptr && B.Size == 0) {
215     Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
216     return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)};
217   }
218 
219   static BufferQueue::const_iterator It{};
220   static BufferQueue::const_iterator End{};
221   static uint8_t *CurrentBuffer{nullptr};
222   static size_t SerializedBufferSize = 0;
223   if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) {
224     // From this point on, we provide raw access to the raw buffer we're getting
225     // from the BufferQueue. We're relying on the iterators from the current
226     // Buffer queue.
227     It = BQ->cbegin();
228     End = BQ->cend();
229   }
230 
231   if (CurrentBuffer != nullptr) {
232     deallocateBuffer(CurrentBuffer, SerializedBufferSize);
233     CurrentBuffer = nullptr;
234   }
235 
236   if (It == End)
237     return {nullptr, 0};
238 
239   // Set up the current buffer to contain the extents like we would when writing
240   // out to disk. The difference here would be that we still write "empty"
241   // buffers, or at least go through the iterators faithfully to let the
242   // handlers see the empty buffers in the queue.
243   //
244   // We need this atomic fence here to ensure that writes happening to the
245   // buffer have been committed before we load the extents atomically. Because
246   // the buffer is not explicitly synchronised across threads, we rely on the
247   // fence ordering to ensure that writes we expect to have been completed
248   // before the fence are fully committed before we read the extents.
249   atomic_thread_fence(memory_order_acquire);
250   auto BufferSize = atomic_load(It->Extents, memory_order_acquire);
251   SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
252   CurrentBuffer = allocateBuffer(SerializedBufferSize);
253   if (CurrentBuffer == nullptr)
254     return {nullptr, 0};
255 
256   // Write out the extents as a Metadata Record into the CurrentBuffer.
257   MetadataRecord ExtentsRecord;
258   ExtentsRecord.Type = uint8_t(RecordType::Metadata);
259   ExtentsRecord.RecordKind =
260       uint8_t(MetadataRecord::RecordKinds::BufferExtents);
261   internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize));
262   auto AfterExtents =
263       static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord,
264                                           sizeof(MetadataRecord))) +
265       sizeof(MetadataRecord);
266   internal_memcpy(AfterExtents, It->Data, BufferSize);
267 
268   XRayBuffer Result;
269   Result.Data = CurrentBuffer;
270   Result.Size = SerializedBufferSize;
271   ++It;
272   return Result;
273 }
274 
275 // Must finalize before flushing.
276 XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
277   if (atomic_load(&LoggingStatus, memory_order_acquire) !=
278       XRayLogInitStatus::XRAY_LOG_FINALIZED) {
279     if (Verbosity())
280       Report("Not flushing log, implementation is not finalized.\n");
281     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
282   }
283 
284   if (atomic_exchange(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHING,
285                       memory_order_release) ==
286       XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
287     if (Verbosity())
288       Report("Not flushing log, implementation is still flushing.\n");
289     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
290   }
291 
292   if (BQ == nullptr) {
293     if (Verbosity())
294       Report("Cannot flush when global buffer queue is null.\n");
295     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
296   }
297 
298   // We wait a number of milliseconds to allow threads to see that we've
299   // finalised before attempting to flush the log.
300   SleepForMillis(fdrFlags()->grace_period_ms);
301 
302   // At this point, we're going to uninstall the iterator implementation, before
303   // we decide to do anything further with the global buffer queue.
304   __xray_log_remove_buffer_iterator();
305 
306   // Once flushed, we should set the global status of the logging implementation
307   // to "uninitialized" to allow for FDR-logging multiple runs.
308   auto ResetToUnitialized = at_scope_exit([] {
309     atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
310                  memory_order_release);
311   });
312 
313   auto CleanupBuffers = at_scope_exit([] {
314     auto &TLD = getThreadLocalData();
315     if (TLD.Controller != nullptr)
316       TLD.Controller->flush();
317   });
318 
319   if (fdrFlags()->no_file_flush) {
320     if (Verbosity())
321       Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n");
322 
323     atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
324                  memory_order_release);
325     return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
326   }
327 
328   // We write out the file in the following format:
329   //
330   //   1) We write down the XRay file header with version 1, type FDR_LOG.
331   //   2) Then we use the 'apply' member of the BufferQueue that's live, to
332   //      ensure that at this point in time we write down the buffers that have
333   //      been released (and marked "used") -- we dump the full buffer for now
334   //      (fixed-sized) and let the tools reading the buffers deal with the data
335   //      afterwards.
336   //
337   LogWriter *LW = LogWriter::Open();
338   if (LW == nullptr) {
339     auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
340     atomic_store(&LogFlushStatus, Result, memory_order_release);
341     return Result;
342   }
343 
344   XRayFileHeader Header = fdrCommonHeaderInfo();
345   Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
346   LW->WriteAll(reinterpret_cast<char *>(&Header),
347                reinterpret_cast<char *>(&Header) + sizeof(Header));
348 
349   // Release the current thread's buffer before we attempt to write out all the
350   // buffers. This ensures that in case we had only a single thread going, that
351   // we are able to capture the data nonetheless.
352   auto &TLD = getThreadLocalData();
353   if (TLD.Controller != nullptr)
354     TLD.Controller->flush();
355 
356   BQ->apply([&](const BufferQueue::Buffer &B) {
357     // Starting at version 2 of the FDR logging implementation, we only write
358     // the records identified by the extents of the buffer. We use the Extents
359     // from the Buffer and write that out as the first record in the buffer.  We
360     // still use a Metadata record, but fill in the extents instead for the
361     // data.
362     MetadataRecord ExtentsRecord;
363     auto BufferExtents = atomic_load(B.Extents, memory_order_acquire);
364     DCHECK(BufferExtents <= B.Size);
365     ExtentsRecord.Type = uint8_t(RecordType::Metadata);
366     ExtentsRecord.RecordKind =
367         uint8_t(MetadataRecord::RecordKinds::BufferExtents);
368     internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents));
369     if (BufferExtents > 0) {
370       LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord),
371                    reinterpret_cast<char *>(&ExtentsRecord) +
372                        sizeof(MetadataRecord));
373       LW->WriteAll(reinterpret_cast<char *>(B.Data),
374                    reinterpret_cast<char *>(B.Data) + BufferExtents);
375     }
376   });
377 
378   atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
379                memory_order_release);
380   return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
381 }
382 
383 XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
384   s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
385   if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
386                                       XRayLogInitStatus::XRAY_LOG_FINALIZING,
387                                       memory_order_release)) {
388     if (Verbosity())
389       Report("Cannot finalize log, implementation not initialized.\n");
390     return static_cast<XRayLogInitStatus>(CurrentStatus);
391   }
392 
393   // Do special things to make the log finalize itself, and not allow any more
394   // operations to be performed until re-initialized.
395   if (BQ == nullptr) {
396     if (Verbosity())
397       Report("Attempting to finalize an uninitialized global buffer!\n");
398   } else {
399     BQ->finalize();
400   }
401 
402   atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
403                memory_order_release);
404   return XRayLogInitStatus::XRAY_LOG_FINALIZED;
405 }
406 
407 struct TSCAndCPU {
408   uint64_t TSC = 0;
409   unsigned char CPU = 0;
410 };
411 
412 static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
413   // We want to get the TSC as early as possible, so that we can check whether
414   // we've seen this CPU before. We also do it before we load anything else,
415   // to allow for forward progress with the scheduling.
416   TSCAndCPU Result;
417 
418   // Test once for required CPU features
419   static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT;
420   static bool TSCSupported = true;
421   pthread_once(
422       &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); });
423 
424   if (TSCSupported) {
425     Result.TSC = __xray::readTSC(Result.CPU);
426   } else {
427     // FIXME: This code needs refactoring as it appears in multiple locations
428     timespec TS;
429     int result = clock_gettime(CLOCK_REALTIME, &TS);
430     if (result != 0) {
431       Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
432       TS = {0, 0};
433     }
434     Result.CPU = 0;
435     Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
436   }
437   return Result;
438 }
439 
440 thread_local atomic_uint8_t Running{0};
441 
442 static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT {
443   // Check if we're finalizing, before proceeding.
444   {
445     auto Status = atomic_load(&LoggingStatus, memory_order_acquire);
446     if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
447         Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) {
448       if (TLD.Controller != nullptr) {
449         TLD.Controller->flush();
450         TLD.Controller = nullptr;
451       }
452       return false;
453     }
454   }
455 
456   if (UNLIKELY(TLD.Controller == nullptr)) {
457     // Set up the TLD buffer queue.
458     if (UNLIKELY(BQ == nullptr))
459       return false;
460     TLD.BQ = BQ;
461 
462     // Check that we have a valid buffer.
463     if (TLD.Buffer.Generation != BQ->generation() &&
464         TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok)
465       return false;
466 
467     // Set up a buffer, before setting up the log writer. Bail out on failure.
468     if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok)
469       return false;
470 
471     // Set up the Log Writer for this thread.
472     if (UNLIKELY(TLD.Writer == nullptr)) {
473       auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage);
474       new (LWStorage) FDRLogWriter(TLD.Buffer);
475       TLD.Writer = LWStorage;
476     } else {
477       TLD.Writer->resetRecord();
478     }
479 
480     auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage);
481     new (CStorage)
482         FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime,
483                         atomic_load_relaxed(&ThresholdTicks));
484     TLD.Controller = CStorage;
485   }
486 
487   DCHECK_NE(TLD.Controller, nullptr);
488   return true;
489 }
490 
491 void fdrLoggingHandleArg0(int32_t FuncId,
492                           XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
493   auto TC = getTimestamp();
494   auto &TSC = TC.TSC;
495   auto &CPU = TC.CPU;
496   RecursionGuard Guard{Running};
497   if (!Guard)
498     return;
499 
500   auto &TLD = getThreadLocalData();
501   if (!setupTLD(TLD))
502     return;
503 
504   switch (Entry) {
505   case XRayEntryType::ENTRY:
506   case XRayEntryType::LOG_ARGS_ENTRY:
507     TLD.Controller->functionEnter(FuncId, TSC, CPU);
508     return;
509   case XRayEntryType::EXIT:
510     TLD.Controller->functionExit(FuncId, TSC, CPU);
511     return;
512   case XRayEntryType::TAIL:
513     TLD.Controller->functionTailExit(FuncId, TSC, CPU);
514     return;
515   case XRayEntryType::CUSTOM_EVENT:
516   case XRayEntryType::TYPED_EVENT:
517     break;
518   }
519 }
520 
521 void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
522                           uint64_t Arg) XRAY_NEVER_INSTRUMENT {
523   auto TC = getTimestamp();
524   auto &TSC = TC.TSC;
525   auto &CPU = TC.CPU;
526   RecursionGuard Guard{Running};
527   if (!Guard)
528     return;
529 
530   auto &TLD = getThreadLocalData();
531   if (!setupTLD(TLD))
532     return;
533 
534   switch (Entry) {
535   case XRayEntryType::ENTRY:
536   case XRayEntryType::LOG_ARGS_ENTRY:
537     TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg);
538     return;
539   case XRayEntryType::EXIT:
540     TLD.Controller->functionExit(FuncId, TSC, CPU);
541     return;
542   case XRayEntryType::TAIL:
543     TLD.Controller->functionTailExit(FuncId, TSC, CPU);
544     return;
545   case XRayEntryType::CUSTOM_EVENT:
546   case XRayEntryType::TYPED_EVENT:
547     break;
548   }
549 }
550 
551 void fdrLoggingHandleCustomEvent(void *Event,
552                                  std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
553   auto TC = getTimestamp();
554   auto &TSC = TC.TSC;
555   auto &CPU = TC.CPU;
556   RecursionGuard Guard{Running};
557   if (!Guard)
558     return;
559 
560   // Complain when we ever get at least one custom event that's larger than what
561   // we can possibly support.
562   if (EventSize >
563       static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
564     static pthread_once_t Once = PTHREAD_ONCE_INIT;
565     pthread_once(
566         &Once, +[] {
567           Report("Custom event size too large; truncating to %d.\n",
568                  std::numeric_limits<int32_t>::max());
569         });
570   }
571 
572   auto &TLD = getThreadLocalData();
573   if (!setupTLD(TLD))
574     return;
575 
576   int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
577   TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize);
578 }
579 
580 void fdrLoggingHandleTypedEvent(size_t EventType, const void *Event,
581                                 size_t EventSize) noexcept
582     XRAY_NEVER_INSTRUMENT {
583   auto TC = getTimestamp();
584   auto &TSC = TC.TSC;
585   auto &CPU = TC.CPU;
586   RecursionGuard Guard{Running};
587   if (!Guard)
588     return;
589 
590   // Complain when we ever get at least one typed event that's larger than what
591   // we can possibly support.
592   if (EventSize >
593       static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
594     static pthread_once_t Once = PTHREAD_ONCE_INIT;
595     pthread_once(
596         &Once, +[] {
597           Report("Typed event size too large; truncating to %d.\n",
598                  std::numeric_limits<int32_t>::max());
599         });
600   }
601 
602   auto &TLD = getThreadLocalData();
603   if (!setupTLD(TLD))
604     return;
605 
606   int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
607   TLD.Controller->typedEvent(TSC, CPU, static_cast<uint16_t>(EventType), Event,
608                              ReducedEventSize);
609 }
610 
611 XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options,
612                                  size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
613   if (Options == nullptr)
614     return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
615 
616   s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
617   if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
618                                       XRayLogInitStatus::XRAY_LOG_INITIALIZING,
619                                       memory_order_release)) {
620     if (Verbosity())
621       Report("Cannot initialize already initialized implementation.\n");
622     return static_cast<XRayLogInitStatus>(CurrentStatus);
623   }
624 
625   if (Verbosity())
626     Report("Initializing FDR mode with options: %s\n",
627            static_cast<const char *>(Options));
628 
629   // TODO: Factor out the flags specific to the FDR mode implementation. For
630   // now, use the global/single definition of the flags, since the FDR mode
631   // flags are already defined there.
632   FlagParser FDRParser;
633   FDRFlags FDRFlags;
634   registerXRayFDRFlags(&FDRParser, &FDRFlags);
635   FDRFlags.setDefaults();
636 
637   // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided
638   // options until we migrate everyone to use the XRAY_FDR_OPTIONS
639   // compiler-provided options.
640   FDRParser.ParseString(useCompilerDefinedFlags());
641   FDRParser.ParseString(useCompilerDefinedFDRFlags());
642   auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS");
643   if (EnvOpts == nullptr)
644     EnvOpts = "";
645   FDRParser.ParseString(EnvOpts);
646 
647   // FIXME: Remove this when we fully remove the deprecated flags.
648   if (internal_strlen(EnvOpts) == 0) {
649     FDRFlags.func_duration_threshold_us =
650         flags()->xray_fdr_log_func_duration_threshold_us;
651     FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms;
652   }
653 
654   // The provided options should always override the compiler-provided and
655   // environment-variable defined options.
656   FDRParser.ParseString(static_cast<const char *>(Options));
657   *fdrFlags() = FDRFlags;
658   auto BufferSize = FDRFlags.buffer_size;
659   auto BufferMax = FDRFlags.buffer_max;
660 
661   if (BQ == nullptr) {
662     bool Success = false;
663     BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
664     new (BQ) BufferQueue(BufferSize, BufferMax, Success);
665     if (!Success) {
666       Report("BufferQueue init failed.\n");
667       return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
668     }
669   } else {
670     if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) {
671       if (Verbosity())
672         Report("Failed to re-initialize global buffer queue. Init failed.\n");
673       return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
674     }
675   }
676 
677   static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
678   pthread_once(
679       &OnceInit, +[] {
680         atomic_store(&TicksPerSec,
681                      probeRequiredCPUFeatures() ? getTSCFrequency()
682                                                 : __xray::NanosecondsPerSecond,
683                      memory_order_release);
684         pthread_key_create(
685             &Key, +[](void *TLDPtr) {
686               if (TLDPtr == nullptr)
687                 return;
688               auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr);
689               if (TLD.BQ == nullptr)
690                 return;
691               if (TLD.Buffer.Data == nullptr)
692                 return;
693               auto EC = TLD.BQ->releaseBuffer(TLD.Buffer);
694               if (EC != BufferQueue::ErrorCode::Ok)
695                 Report("At thread exit, failed to release buffer at %p; "
696                        "error=%s\n",
697                        TLD.Buffer.Data, BufferQueue::getErrorString(EC));
698             });
699       });
700 
701   atomic_store(&ThresholdTicks,
702                atomic_load_relaxed(&TicksPerSec) *
703                    fdrFlags()->func_duration_threshold_us / 1000000,
704                memory_order_release);
705   // Arg1 handler should go in first to avoid concurrent code accidentally
706   // falling back to arg0 when it should have ran arg1.
707   __xray_set_handler_arg1(fdrLoggingHandleArg1);
708   // Install the actual handleArg0 handler after initialising the buffers.
709   __xray_set_handler(fdrLoggingHandleArg0);
710   __xray_set_customevent_handler(fdrLoggingHandleCustomEvent);
711   __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent);
712 
713   // Install the buffer iterator implementation.
714   __xray_log_set_buffer_iterator(fdrIterator);
715 
716   atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
717                memory_order_release);
718 
719   if (Verbosity())
720     Report("XRay FDR init successful.\n");
721   return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
722 }
723 
724 bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
725   XRayLogImpl Impl{
726       fdrLoggingInit,
727       fdrLoggingFinalize,
728       fdrLoggingHandleArg0,
729       fdrLoggingFlush,
730   };
731   auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl);
732   if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
733       Verbosity()) {
734     Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n",
735            RegistrationResult);
736     return false;
737   }
738 
739   if (flags()->xray_fdr_log ||
740       !internal_strcmp(flags()->xray_mode, "xray-fdr")) {
741     auto SelectResult = __xray_log_select_mode("xray-fdr");
742     if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
743         Verbosity()) {
744       Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n",
745              SelectResult);
746       return false;
747     }
748   }
749   return true;
750 }
751 
752 } // namespace __xray
753 
754 static auto UNUSED Unused = __xray::fdrLogDynamicInitializer();
755