1 //===-- xray_fdr_logging.cpp -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of XRay, a dynamic runtime instrumentation system.
10 //
11 // Here we implement the Flight Data Recorder mode for XRay, where we use
12 // compact structures to store records in memory as well as when writing out the
13 // data to files.
14 //
15 //===----------------------------------------------------------------------===//
16 #include "xray_fdr_logging.h"
17 #include <cassert>
18 #include <cstddef>
19 #include <errno.h>
20 #include <limits>
21 #include <memory>
22 #include <pthread.h>
23 #include <sys/time.h>
24 #include <time.h>
25 #include <unistd.h>
26
27 #include "sanitizer_common/sanitizer_allocator_internal.h"
28 #include "sanitizer_common/sanitizer_atomic.h"
29 #include "sanitizer_common/sanitizer_common.h"
30 #include "xray/xray_interface.h"
31 #include "xray/xray_records.h"
32 #include "xray_allocator.h"
33 #include "xray_buffer_queue.h"
34 #include "xray_defs.h"
35 #include "xray_fdr_controller.h"
36 #include "xray_fdr_flags.h"
37 #include "xray_fdr_log_writer.h"
38 #include "xray_flags.h"
39 #include "xray_recursion_guard.h"
40 #include "xray_tsc.h"
41 #include "xray_utils.h"
42
43 namespace __xray {
44
45 static atomic_sint32_t LoggingStatus = {
46 XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
47
48 namespace {
49
50 // Group together thread-local-data in a struct, then hide it behind a function
51 // call so that it can be initialized on first use instead of as a global. We
52 // force the alignment to 64-bytes for x86 cache line alignment, as this
53 // structure is used in the hot path of implementation.
54 struct XRAY_TLS_ALIGNAS(64) ThreadLocalData {
55 BufferQueue::Buffer Buffer{};
56 BufferQueue *BQ = nullptr;
57
58 using LogWriterStorage = std::byte[sizeof(FDRLogWriter)];
59 alignas(FDRLogWriter) LogWriterStorage LWStorage;
60 FDRLogWriter *Writer = nullptr;
61
62 using ControllerStorage = std::byte[sizeof(FDRController<>)];
63 alignas(FDRController<>) ControllerStorage CStorage;
64 FDRController<> *Controller = nullptr;
65 };
66
67 } // namespace
68
69 static_assert(std::is_trivially_destructible<ThreadLocalData>::value,
70 "ThreadLocalData must be trivially destructible");
71
72 // Use a global pthread key to identify thread-local data for logging.
73 static pthread_key_t Key;
74
75 // Global BufferQueue.
76 static std::byte BufferQueueStorage[sizeof(BufferQueue)];
77 static BufferQueue *BQ = nullptr;
78
79 // Global thresholds for function durations.
80 static atomic_uint64_t ThresholdTicks{0};
81
82 // Global for ticks per second.
83 static atomic_uint64_t TicksPerSec{0};
84
85 static atomic_sint32_t LogFlushStatus = {
86 XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
87
88 // This function will initialize the thread-local data structure used by the FDR
89 // logging implementation and return a reference to it. The implementation
90 // details require a bit of care to maintain.
91 //
92 // First, some requirements on the implementation in general:
93 //
94 // - XRay handlers should not call any memory allocation routines that may
95 // delegate to an instrumented implementation. This means functions like
96 // malloc() and free() should not be called while instrumenting.
97 //
98 // - We would like to use some thread-local data initialized on first-use of
99 // the XRay instrumentation. These allow us to implement unsynchronized
100 // routines that access resources associated with the thread.
101 //
102 // The implementation here uses a few mechanisms that allow us to provide both
103 // the requirements listed above. We do this by:
104 //
105 // 1. Using a thread-local aligned storage buffer for representing the
106 // ThreadLocalData struct. This data will be uninitialized memory by
107 // design.
108 //
109 // 2. Not requiring a thread exit handler/implementation, keeping the
110 // thread-local as purely a collection of references/data that do not
111 // require cleanup.
112 //
113 // We're doing this to avoid using a `thread_local` object that has a
114 // non-trivial destructor, because the C++ runtime might call std::malloc(...)
115 // to register calls to destructors. Deadlocks may arise when, for example, an
116 // externally provided malloc implementation is XRay instrumented, and
117 // initializing the thread-locals involves calling into malloc. A malloc
118 // implementation that does global synchronization might be holding a lock for a
119 // critical section, calling a function that might be XRay instrumented (and
120 // thus in turn calling into malloc by virtue of registration of the
121 // thread_local's destructor).
122 #if XRAY_HAS_TLS_ALIGNAS
123 static_assert(alignof(ThreadLocalData) >= 64,
124 "ThreadLocalData must be cache line aligned.");
125 #endif
getThreadLocalData()126 static ThreadLocalData &getThreadLocalData() {
127 alignas(ThreadLocalData) thread_local std::byte
128 TLDStorage[sizeof(ThreadLocalData)];
129
130 if (pthread_getspecific(Key) == NULL) {
131 new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{};
132 pthread_setspecific(Key, &TLDStorage);
133 }
134
135 return *reinterpret_cast<ThreadLocalData *>(&TLDStorage);
136 }
137
fdrCommonHeaderInfo()138 static XRayFileHeader &fdrCommonHeaderInfo() {
139 alignas(XRayFileHeader) static std::byte HStorage[sizeof(XRayFileHeader)];
140 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
141 static bool TSCSupported = true;
142 static uint64_t CycleFrequency = NanosecondsPerSecond;
143 pthread_once(
144 &OnceInit, +[] {
145 XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage);
146 // Version 2 of the log writes the extents of the buffer, instead of
147 // relying on an end-of-buffer record.
148 // Version 3 includes PID metadata record.
149 // Version 4 includes CPU data in the custom event records.
150 // Version 5 uses relative deltas for custom and typed event records,
151 // and removes the CPU data in custom event records (similar to how
152 // function records use deltas instead of full TSCs and rely on other
153 // metadata records for TSC wraparound and CPU migration).
154 H.Version = 5;
155 H.Type = FileTypes::FDR_LOG;
156
157 // Test for required CPU features and cache the cycle frequency
158 TSCSupported = probeRequiredCPUFeatures();
159 if (TSCSupported)
160 CycleFrequency = getTSCFrequency();
161 H.CycleFrequency = CycleFrequency;
162
163 // FIXME: Actually check whether we have 'constant_tsc' and
164 // 'nonstop_tsc' before setting the values in the header.
165 H.ConstantTSC = 1;
166 H.NonstopTSC = 1;
167 });
168 return reinterpret_cast<XRayFileHeader &>(HStorage);
169 }
170
171 // This is the iterator implementation, which knows how to handle FDR-mode
172 // specific buffers. This is used as an implementation of the iterator function
173 // needed by __xray_set_buffer_iterator(...). It maintains a global state of the
174 // buffer iteration for the currently installed FDR mode buffers. In particular:
175 //
176 // - If the argument represents the initial state of XRayBuffer ({nullptr, 0})
177 // then the iterator returns the header information.
178 // - If the argument represents the header information ({address of header
179 // info, size of the header info}) then it returns the first FDR buffer's
180 // address and extents.
181 // - It will keep returning the next buffer and extents as there are more
182 // buffers to process. When the input represents the last buffer, it will
183 // return the initial state to signal completion ({nullptr, 0}).
184 //
185 // See xray/xray_log_interface.h for more details on the requirements for the
186 // implementations of __xray_set_buffer_iterator(...) and
187 // __xray_log_process_buffers(...).
fdrIterator(const XRayBuffer B)188 XRayBuffer fdrIterator(const XRayBuffer B) {
189 DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0);
190 DCHECK(BQ->finalizing());
191
192 if (BQ == nullptr || !BQ->finalizing()) {
193 if (Verbosity())
194 Report(
195 "XRay FDR: Failed global buffer queue is null or not finalizing!\n");
196 return {nullptr, 0};
197 }
198
199 // We use a global scratch-pad for the header information, which only gets
200 // initialized the first time this function is called. We'll update one part
201 // of this information with some relevant data (in particular the number of
202 // buffers to expect).
203 alignas(
204 XRayFileHeader) static std::byte HeaderStorage[sizeof(XRayFileHeader)];
205 static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT;
206 pthread_once(
207 &HeaderOnce, +[] {
208 reinterpret_cast<XRayFileHeader &>(HeaderStorage) =
209 fdrCommonHeaderInfo();
210 });
211
212 // We use a convenience alias for code referring to Header from here on out.
213 auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage);
214 if (B.Data == nullptr && B.Size == 0) {
215 Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
216 return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)};
217 }
218
219 static BufferQueue::const_iterator It{};
220 static BufferQueue::const_iterator End{};
221 static uint8_t *CurrentBuffer{nullptr};
222 static size_t SerializedBufferSize = 0;
223 if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) {
224 // From this point on, we provide raw access to the raw buffer we're getting
225 // from the BufferQueue. We're relying on the iterators from the current
226 // Buffer queue.
227 It = BQ->cbegin();
228 End = BQ->cend();
229 }
230
231 if (CurrentBuffer != nullptr) {
232 deallocateBuffer(CurrentBuffer, SerializedBufferSize);
233 CurrentBuffer = nullptr;
234 }
235
236 if (It == End)
237 return {nullptr, 0};
238
239 // Set up the current buffer to contain the extents like we would when writing
240 // out to disk. The difference here would be that we still write "empty"
241 // buffers, or at least go through the iterators faithfully to let the
242 // handlers see the empty buffers in the queue.
243 //
244 // We need this atomic fence here to ensure that writes happening to the
245 // buffer have been committed before we load the extents atomically. Because
246 // the buffer is not explicitly synchronised across threads, we rely on the
247 // fence ordering to ensure that writes we expect to have been completed
248 // before the fence are fully committed before we read the extents.
249 atomic_thread_fence(memory_order_acquire);
250 auto BufferSize = atomic_load(It->Extents, memory_order_acquire);
251 SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
252 CurrentBuffer = allocateBuffer(SerializedBufferSize);
253 if (CurrentBuffer == nullptr)
254 return {nullptr, 0};
255
256 // Write out the extents as a Metadata Record into the CurrentBuffer.
257 MetadataRecord ExtentsRecord;
258 ExtentsRecord.Type = uint8_t(RecordType::Metadata);
259 ExtentsRecord.RecordKind =
260 uint8_t(MetadataRecord::RecordKinds::BufferExtents);
261 internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize));
262 auto AfterExtents =
263 static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord,
264 sizeof(MetadataRecord))) +
265 sizeof(MetadataRecord);
266 internal_memcpy(AfterExtents, It->Data, BufferSize);
267
268 XRayBuffer Result;
269 Result.Data = CurrentBuffer;
270 Result.Size = SerializedBufferSize;
271 ++It;
272 return Result;
273 }
274
275 // Must finalize before flushing.
fdrLoggingFlush()276 XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
277 if (atomic_load(&LoggingStatus, memory_order_acquire) !=
278 XRayLogInitStatus::XRAY_LOG_FINALIZED) {
279 if (Verbosity())
280 Report("Not flushing log, implementation is not finalized.\n");
281 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
282 }
283
284 if (atomic_exchange(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHING,
285 memory_order_release) ==
286 XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
287 if (Verbosity())
288 Report("Not flushing log, implementation is still flushing.\n");
289 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
290 }
291
292 if (BQ == nullptr) {
293 if (Verbosity())
294 Report("Cannot flush when global buffer queue is null.\n");
295 return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
296 }
297
298 // We wait a number of milliseconds to allow threads to see that we've
299 // finalised before attempting to flush the log.
300 SleepForMillis(fdrFlags()->grace_period_ms);
301
302 // At this point, we're going to uninstall the iterator implementation, before
303 // we decide to do anything further with the global buffer queue.
304 __xray_log_remove_buffer_iterator();
305
306 // Once flushed, we should set the global status of the logging implementation
307 // to "uninitialized" to allow for FDR-logging multiple runs.
308 auto ResetToUnitialized = at_scope_exit([] {
309 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
310 memory_order_release);
311 });
312
313 auto CleanupBuffers = at_scope_exit([] {
314 auto &TLD = getThreadLocalData();
315 if (TLD.Controller != nullptr)
316 TLD.Controller->flush();
317 });
318
319 if (fdrFlags()->no_file_flush) {
320 if (Verbosity())
321 Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n");
322
323 atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
324 memory_order_release);
325 return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
326 }
327
328 // We write out the file in the following format:
329 //
330 // 1) We write down the XRay file header with version 1, type FDR_LOG.
331 // 2) Then we use the 'apply' member of the BufferQueue that's live, to
332 // ensure that at this point in time we write down the buffers that have
333 // been released (and marked "used") -- we dump the full buffer for now
334 // (fixed-sized) and let the tools reading the buffers deal with the data
335 // afterwards.
336 //
337 LogWriter *LW = LogWriter::Open();
338 if (LW == nullptr) {
339 auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
340 atomic_store(&LogFlushStatus, Result, memory_order_release);
341 return Result;
342 }
343
344 XRayFileHeader Header = fdrCommonHeaderInfo();
345 Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
346 LW->WriteAll(reinterpret_cast<char *>(&Header),
347 reinterpret_cast<char *>(&Header) + sizeof(Header));
348
349 // Release the current thread's buffer before we attempt to write out all the
350 // buffers. This ensures that in case we had only a single thread going, that
351 // we are able to capture the data nonetheless.
352 auto &TLD = getThreadLocalData();
353 if (TLD.Controller != nullptr)
354 TLD.Controller->flush();
355
356 BQ->apply([&](const BufferQueue::Buffer &B) {
357 // Starting at version 2 of the FDR logging implementation, we only write
358 // the records identified by the extents of the buffer. We use the Extents
359 // from the Buffer and write that out as the first record in the buffer. We
360 // still use a Metadata record, but fill in the extents instead for the
361 // data.
362 MetadataRecord ExtentsRecord;
363 auto BufferExtents = atomic_load(B.Extents, memory_order_acquire);
364 DCHECK(BufferExtents <= B.Size);
365 ExtentsRecord.Type = uint8_t(RecordType::Metadata);
366 ExtentsRecord.RecordKind =
367 uint8_t(MetadataRecord::RecordKinds::BufferExtents);
368 internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents));
369 if (BufferExtents > 0) {
370 LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord),
371 reinterpret_cast<char *>(&ExtentsRecord) +
372 sizeof(MetadataRecord));
373 LW->WriteAll(reinterpret_cast<char *>(B.Data),
374 reinterpret_cast<char *>(B.Data) + BufferExtents);
375 }
376 });
377
378 atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
379 memory_order_release);
380 return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
381 }
382
fdrLoggingFinalize()383 XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
384 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
385 if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
386 XRayLogInitStatus::XRAY_LOG_FINALIZING,
387 memory_order_release)) {
388 if (Verbosity())
389 Report("Cannot finalize log, implementation not initialized.\n");
390 return static_cast<XRayLogInitStatus>(CurrentStatus);
391 }
392
393 // Do special things to make the log finalize itself, and not allow any more
394 // operations to be performed until re-initialized.
395 if (BQ == nullptr) {
396 if (Verbosity())
397 Report("Attempting to finalize an uninitialized global buffer!\n");
398 } else {
399 BQ->finalize();
400 }
401
402 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
403 memory_order_release);
404 return XRayLogInitStatus::XRAY_LOG_FINALIZED;
405 }
406
407 struct TSCAndCPU {
408 uint64_t TSC = 0;
409 unsigned char CPU = 0;
410 };
411
getTimestamp()412 static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
413 // We want to get the TSC as early as possible, so that we can check whether
414 // we've seen this CPU before. We also do it before we load anything else,
415 // to allow for forward progress with the scheduling.
416 TSCAndCPU Result;
417
418 // Test once for required CPU features
419 static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT;
420 static bool TSCSupported = true;
421 pthread_once(
422 &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); });
423
424 if (TSCSupported) {
425 Result.TSC = __xray::readTSC(Result.CPU);
426 } else {
427 // FIXME: This code needs refactoring as it appears in multiple locations
428 timespec TS;
429 int result = clock_gettime(CLOCK_REALTIME, &TS);
430 if (result != 0) {
431 Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
432 TS = {0, 0};
433 }
434 Result.CPU = 0;
435 Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
436 }
437 return Result;
438 }
439
440 thread_local atomic_uint8_t Running{0};
441
setupTLD(ThreadLocalData & TLD)442 static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT {
443 // Check if we're finalizing, before proceeding.
444 {
445 auto Status = atomic_load(&LoggingStatus, memory_order_acquire);
446 if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
447 Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) {
448 if (TLD.Controller != nullptr) {
449 TLD.Controller->flush();
450 TLD.Controller = nullptr;
451 }
452 return false;
453 }
454 }
455
456 if (UNLIKELY(TLD.Controller == nullptr)) {
457 // Set up the TLD buffer queue.
458 if (UNLIKELY(BQ == nullptr))
459 return false;
460 TLD.BQ = BQ;
461
462 // Check that we have a valid buffer.
463 if (TLD.Buffer.Generation != BQ->generation() &&
464 TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok)
465 return false;
466
467 // Set up a buffer, before setting up the log writer. Bail out on failure.
468 if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok)
469 return false;
470
471 // Set up the Log Writer for this thread.
472 if (UNLIKELY(TLD.Writer == nullptr)) {
473 auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage);
474 new (LWStorage) FDRLogWriter(TLD.Buffer);
475 TLD.Writer = LWStorage;
476 } else {
477 TLD.Writer->resetRecord();
478 }
479
480 auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage);
481 new (CStorage)
482 FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime,
483 atomic_load_relaxed(&ThresholdTicks));
484 TLD.Controller = CStorage;
485 }
486
487 DCHECK_NE(TLD.Controller, nullptr);
488 return true;
489 }
490
fdrLoggingHandleArg0(int32_t FuncId,XRayEntryType Entry)491 void fdrLoggingHandleArg0(int32_t FuncId,
492 XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
493 auto TC = getTimestamp();
494 auto &TSC = TC.TSC;
495 auto &CPU = TC.CPU;
496 RecursionGuard Guard{Running};
497 if (!Guard)
498 return;
499
500 auto &TLD = getThreadLocalData();
501 if (!setupTLD(TLD))
502 return;
503
504 switch (Entry) {
505 case XRayEntryType::ENTRY:
506 case XRayEntryType::LOG_ARGS_ENTRY:
507 TLD.Controller->functionEnter(FuncId, TSC, CPU);
508 return;
509 case XRayEntryType::EXIT:
510 TLD.Controller->functionExit(FuncId, TSC, CPU);
511 return;
512 case XRayEntryType::TAIL:
513 TLD.Controller->functionTailExit(FuncId, TSC, CPU);
514 return;
515 case XRayEntryType::CUSTOM_EVENT:
516 case XRayEntryType::TYPED_EVENT:
517 break;
518 }
519 }
520
fdrLoggingHandleArg1(int32_t FuncId,XRayEntryType Entry,uint64_t Arg)521 void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
522 uint64_t Arg) XRAY_NEVER_INSTRUMENT {
523 auto TC = getTimestamp();
524 auto &TSC = TC.TSC;
525 auto &CPU = TC.CPU;
526 RecursionGuard Guard{Running};
527 if (!Guard)
528 return;
529
530 auto &TLD = getThreadLocalData();
531 if (!setupTLD(TLD))
532 return;
533
534 switch (Entry) {
535 case XRayEntryType::ENTRY:
536 case XRayEntryType::LOG_ARGS_ENTRY:
537 TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg);
538 return;
539 case XRayEntryType::EXIT:
540 TLD.Controller->functionExit(FuncId, TSC, CPU);
541 return;
542 case XRayEntryType::TAIL:
543 TLD.Controller->functionTailExit(FuncId, TSC, CPU);
544 return;
545 case XRayEntryType::CUSTOM_EVENT:
546 case XRayEntryType::TYPED_EVENT:
547 break;
548 }
549 }
550
fdrLoggingHandleCustomEvent(void * Event,std::size_t EventSize)551 void fdrLoggingHandleCustomEvent(void *Event,
552 std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
553 auto TC = getTimestamp();
554 auto &TSC = TC.TSC;
555 auto &CPU = TC.CPU;
556 RecursionGuard Guard{Running};
557 if (!Guard)
558 return;
559
560 // Complain when we ever get at least one custom event that's larger than what
561 // we can possibly support.
562 if (EventSize >
563 static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
564 static pthread_once_t Once = PTHREAD_ONCE_INIT;
565 pthread_once(
566 &Once, +[] {
567 Report("Custom event size too large; truncating to %d.\n",
568 std::numeric_limits<int32_t>::max());
569 });
570 }
571
572 auto &TLD = getThreadLocalData();
573 if (!setupTLD(TLD))
574 return;
575
576 int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
577 TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize);
578 }
579
fdrLoggingHandleTypedEvent(size_t EventType,const void * Event,size_t EventSize)580 void fdrLoggingHandleTypedEvent(size_t EventType, const void *Event,
581 size_t EventSize) noexcept
582 XRAY_NEVER_INSTRUMENT {
583 auto TC = getTimestamp();
584 auto &TSC = TC.TSC;
585 auto &CPU = TC.CPU;
586 RecursionGuard Guard{Running};
587 if (!Guard)
588 return;
589
590 // Complain when we ever get at least one typed event that's larger than what
591 // we can possibly support.
592 if (EventSize >
593 static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
594 static pthread_once_t Once = PTHREAD_ONCE_INIT;
595 pthread_once(
596 &Once, +[] {
597 Report("Typed event size too large; truncating to %d.\n",
598 std::numeric_limits<int32_t>::max());
599 });
600 }
601
602 auto &TLD = getThreadLocalData();
603 if (!setupTLD(TLD))
604 return;
605
606 int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
607 TLD.Controller->typedEvent(TSC, CPU, static_cast<uint16_t>(EventType), Event,
608 ReducedEventSize);
609 }
610
fdrLoggingInit(size_t,size_t,void * Options,size_t OptionsSize)611 XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options,
612 size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
613 if (Options == nullptr)
614 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
615
616 s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
617 if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
618 XRayLogInitStatus::XRAY_LOG_INITIALIZING,
619 memory_order_release)) {
620 if (Verbosity())
621 Report("Cannot initialize already initialized implementation.\n");
622 return static_cast<XRayLogInitStatus>(CurrentStatus);
623 }
624
625 if (Verbosity())
626 Report("Initializing FDR mode with options: %s\n",
627 static_cast<const char *>(Options));
628
629 // TODO: Factor out the flags specific to the FDR mode implementation. For
630 // now, use the global/single definition of the flags, since the FDR mode
631 // flags are already defined there.
632 FlagParser FDRParser;
633 FDRFlags FDRFlags;
634 registerXRayFDRFlags(&FDRParser, &FDRFlags);
635 FDRFlags.setDefaults();
636
637 // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided
638 // options until we migrate everyone to use the XRAY_FDR_OPTIONS
639 // compiler-provided options.
640 FDRParser.ParseString(useCompilerDefinedFlags());
641 FDRParser.ParseString(useCompilerDefinedFDRFlags());
642 auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS");
643 if (EnvOpts == nullptr)
644 EnvOpts = "";
645 FDRParser.ParseString(EnvOpts);
646
647 // FIXME: Remove this when we fully remove the deprecated flags.
648 if (internal_strlen(EnvOpts) == 0) {
649 FDRFlags.func_duration_threshold_us =
650 flags()->xray_fdr_log_func_duration_threshold_us;
651 FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms;
652 }
653
654 // The provided options should always override the compiler-provided and
655 // environment-variable defined options.
656 FDRParser.ParseString(static_cast<const char *>(Options));
657 *fdrFlags() = FDRFlags;
658 auto BufferSize = FDRFlags.buffer_size;
659 auto BufferMax = FDRFlags.buffer_max;
660
661 if (BQ == nullptr) {
662 bool Success = false;
663 BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
664 new (BQ) BufferQueue(BufferSize, BufferMax, Success);
665 if (!Success) {
666 Report("BufferQueue init failed.\n");
667 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
668 }
669 } else {
670 if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) {
671 if (Verbosity())
672 Report("Failed to re-initialize global buffer queue. Init failed.\n");
673 return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
674 }
675 }
676
677 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
678 pthread_once(
679 &OnceInit, +[] {
680 atomic_store(&TicksPerSec,
681 probeRequiredCPUFeatures() ? getTSCFrequency()
682 : __xray::NanosecondsPerSecond,
683 memory_order_release);
684 pthread_key_create(
685 &Key, +[](void *TLDPtr) {
686 if (TLDPtr == nullptr)
687 return;
688 auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr);
689 if (TLD.BQ == nullptr)
690 return;
691 if (TLD.Buffer.Data == nullptr)
692 return;
693 auto EC = TLD.BQ->releaseBuffer(TLD.Buffer);
694 if (EC != BufferQueue::ErrorCode::Ok)
695 Report("At thread exit, failed to release buffer at %p; "
696 "error=%s\n",
697 TLD.Buffer.Data, BufferQueue::getErrorString(EC));
698 });
699 });
700
701 atomic_store(&ThresholdTicks,
702 atomic_load_relaxed(&TicksPerSec) *
703 fdrFlags()->func_duration_threshold_us / 1000000,
704 memory_order_release);
705 // Arg1 handler should go in first to avoid concurrent code accidentally
706 // falling back to arg0 when it should have ran arg1.
707 __xray_set_handler_arg1(fdrLoggingHandleArg1);
708 // Install the actual handleArg0 handler after initialising the buffers.
709 __xray_set_handler(fdrLoggingHandleArg0);
710 __xray_set_customevent_handler(fdrLoggingHandleCustomEvent);
711 __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent);
712
713 // Install the buffer iterator implementation.
714 __xray_log_set_buffer_iterator(fdrIterator);
715
716 atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
717 memory_order_release);
718
719 if (Verbosity())
720 Report("XRay FDR init successful.\n");
721 return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
722 }
723
fdrLogDynamicInitializer()724 bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
725 XRayLogImpl Impl{
726 fdrLoggingInit,
727 fdrLoggingFinalize,
728 fdrLoggingHandleArg0,
729 fdrLoggingFlush,
730 };
731 auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl);
732 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
733 Verbosity()) {
734 Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n",
735 RegistrationResult);
736 return false;
737 }
738
739 if (flags()->xray_fdr_log ||
740 !internal_strcmp(flags()->xray_mode, "xray-fdr")) {
741 auto SelectResult = __xray_log_select_mode("xray-fdr");
742 if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
743 Verbosity()) {
744 Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n",
745 SelectResult);
746 return false;
747 }
748 }
749 return true;
750 }
751
752 } // namespace __xray
753
754 static auto UNUSED Unused = __xray::fdrLogDynamicInitializer();
755