xref: /freebsd/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp (revision e32fecd0c2c3ee37c47ee100f169e7eb0282a873)
1 //===-- xray_profiling.cpp --------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of XRay, a dynamic runtime instrumentation system.
10 //
11 // This is the implementation of a profiling handler.
12 //
13 //===----------------------------------------------------------------------===//
14 #include <memory>
15 #include <time.h>
16 
17 #include "sanitizer_common/sanitizer_atomic.h"
18 #include "sanitizer_common/sanitizer_flags.h"
19 #include "xray/xray_interface.h"
20 #include "xray/xray_log_interface.h"
21 #include "xray_buffer_queue.h"
22 #include "xray_flags.h"
23 #include "xray_profile_collector.h"
24 #include "xray_profiling_flags.h"
25 #include "xray_recursion_guard.h"
26 #include "xray_tsc.h"
27 #include "xray_utils.h"
28 #include <pthread.h>
29 
30 namespace __xray {
31 
32 namespace {
33 
34 static atomic_sint32_t ProfilerLogFlushStatus = {
35     XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
36 
37 static atomic_sint32_t ProfilerLogStatus = {
38     XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
39 
40 static SpinMutex ProfilerOptionsMutex;
41 
42 struct ProfilingData {
43   atomic_uintptr_t Allocators;
44   atomic_uintptr_t FCT;
45 };
46 
47 static pthread_key_t ProfilingKey;
48 
49 // We use a global buffer queue, which gets initialized once at initialisation
50 // time, and gets reset when profiling is "done".
51 static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type
52     BufferQueueStorage;
53 static BufferQueue *BQ = nullptr;
54 
55 thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;
56 thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
57                                   alignof(FunctionCallTrie::Allocators)>::type
58     AllocatorsStorage;
59 thread_local std::aligned_storage<sizeof(FunctionCallTrie),
60                                   alignof(FunctionCallTrie)>::type
61     FunctionCallTrieStorage;
62 thread_local ProfilingData TLD{{0}, {0}};
63 thread_local atomic_uint8_t ReentranceGuard{0};
64 
65 // We use a separate guard for ensuring that for this thread, if we're already
66 // cleaning up, that any signal handlers don't attempt to cleanup nor
67 // initialise.
68 thread_local atomic_uint8_t TLDInitGuard{0};
69 
70 // We also use a separate latch to signal that the thread is exiting, and
71 // non-essential work should be ignored (things like recording events, etc.).
72 thread_local atomic_uint8_t ThreadExitingLatch{0};
73 
74 static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
75   thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
76     pthread_setspecific(ProfilingKey, &TLD);
77     return false;
78   }();
79   (void)ThreadOnce;
80 
81   RecursionGuard TLDInit(TLDInitGuard);
82   if (!TLDInit)
83     return nullptr;
84 
85   if (atomic_load_relaxed(&ThreadExitingLatch))
86     return nullptr;
87 
88   uptr Allocators = 0;
89   if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,
90                                      memory_order_acq_rel)) {
91     bool Success = false;
92     auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
93       if (!Success)
94         atomic_store(&TLD.Allocators, 0, memory_order_release);
95     });
96 
97     // Acquire a set of buffers for this thread.
98     if (BQ == nullptr)
99       return nullptr;
100 
101     if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)
102       return nullptr;
103     auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
104       if (!Success)
105         BQ->releaseBuffer(ThreadBuffers.NodeBuffer);
106     });
107 
108     if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)
109       return nullptr;
110     auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
111       if (!Success)
112         BQ->releaseBuffer(ThreadBuffers.RootsBuffer);
113     });
114 
115     if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) !=
116         BufferQueue::ErrorCode::Ok)
117       return nullptr;
118     auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
119       if (!Success)
120         BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer);
121     });
122 
123     if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) !=
124         BufferQueue::ErrorCode::Ok)
125       return nullptr;
126 
127     Success = true;
128     new (&AllocatorsStorage) FunctionCallTrie::Allocators(
129         FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers));
130     Allocators = reinterpret_cast<uptr>(
131         reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
132     atomic_store(&TLD.Allocators, Allocators, memory_order_release);
133   }
134 
135   if (Allocators == 1)
136     return nullptr;
137 
138   uptr FCT = 0;
139   if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {
140     new (&FunctionCallTrieStorage)
141         FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(
142             atomic_load_relaxed(&TLD.Allocators)));
143     FCT = reinterpret_cast<uptr>(
144         reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
145     atomic_store(&TLD.FCT, FCT, memory_order_release);
146   }
147 
148   if (FCT == 1)
149     return nullptr;
150 
151   return &TLD;
152 }
153 
154 static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
155   auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);
156   if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
157                  &FunctionCallTrieStorage)))
158     reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
159 
160   auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel);
161   if (Allocators ==
162       reinterpret_cast<uptr>(
163           reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
164     reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
165 }
166 
167 static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
168   RecursionGuard TLDInit(TLDInitGuard);
169   if (!TLDInit)
170     return;
171 
172   uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel);
173   if (P != reinterpret_cast<uptr>(
174                reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
175     return;
176 
177   auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
178   DCHECK_NE(FCT, nullptr);
179 
180   uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel);
181   if (A !=
182       reinterpret_cast<uptr>(
183           reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
184     return;
185 
186   auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);
187   DCHECK_NE(Allocators, nullptr);
188 
189   // Always move the data into the profile collector.
190   profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators),
191                                 std::move(ThreadBuffers), GetTid());
192 
193   // Re-initialize the ThreadBuffers object to a known "default" state.
194   ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};
195 }
196 
197 } // namespace
198 
199 const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
200 #ifdef XRAY_PROFILER_DEFAULT_OPTIONS
201   return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS);
202 #else
203   return "";
204 #endif
205 }
206 
207 XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
208   if (atomic_load(&ProfilerLogStatus, memory_order_acquire) !=
209       XRayLogInitStatus::XRAY_LOG_FINALIZED) {
210     if (Verbosity())
211       Report("Not flushing profiles, profiling not been finalized.\n");
212     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
213   }
214 
215   RecursionGuard SignalGuard(ReentranceGuard);
216   if (!SignalGuard) {
217     if (Verbosity())
218       Report("Cannot finalize properly inside a signal handler!\n");
219     atomic_store(&ProfilerLogFlushStatus,
220                  XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
221                  memory_order_release);
222     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
223   }
224 
225   s32 Previous = atomic_exchange(&ProfilerLogFlushStatus,
226                                  XRayLogFlushStatus::XRAY_LOG_FLUSHING,
227                                  memory_order_acq_rel);
228   if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
229     if (Verbosity())
230       Report("Not flushing profiles, implementation still flushing.\n");
231     return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
232   }
233 
234   // At this point, we'll create the file that will contain the profile, but
235   // only if the options say so.
236   if (!profilingFlags()->no_flush) {
237     // First check whether we have data in the profile collector service
238     // before we try and write anything down.
239     XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0});
240     if (B.Data == nullptr) {
241       if (Verbosity())
242         Report("profiling: No data to flush.\n");
243     } else {
244       LogWriter *LW = LogWriter::Open();
245       if (LW == nullptr) {
246         if (Verbosity())
247           Report("profiling: Failed to flush to file, dropping data.\n");
248       } else {
249         // Now for each of the buffers, write out the profile data as we would
250         // see it in memory, verbatim.
251         while (B.Data != nullptr && B.Size != 0) {
252           LW->WriteAll(reinterpret_cast<const char *>(B.Data),
253                        reinterpret_cast<const char *>(B.Data) + B.Size);
254           B = profileCollectorService::nextBuffer(B);
255         }
256       }
257       LogWriter::Close(LW);
258     }
259   }
260 
261   profileCollectorService::reset();
262 
263   atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
264                memory_order_release);
265   atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
266                memory_order_release);
267 
268   return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
269 }
270 
271 void profilingHandleArg0(int32_t FuncId,
272                          XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
273   unsigned char CPU;
274   auto TSC = readTSC(CPU);
275   RecursionGuard G(ReentranceGuard);
276   if (!G)
277     return;
278 
279   auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);
280   if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
281                Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
282     return;
283 
284   if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
285                Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
286     postCurrentThreadFCT(TLD);
287     return;
288   }
289 
290   auto T = getThreadLocalData();
291   if (T == nullptr)
292     return;
293 
294   auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT));
295   switch (Entry) {
296   case XRayEntryType::ENTRY:
297   case XRayEntryType::LOG_ARGS_ENTRY:
298     FCT->enterFunction(FuncId, TSC, CPU);
299     break;
300   case XRayEntryType::EXIT:
301   case XRayEntryType::TAIL:
302     FCT->exitFunction(FuncId, TSC, CPU);
303     break;
304   default:
305     // FIXME: Handle bugs.
306     break;
307   }
308 }
309 
310 void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry,
311                          uint64_t) XRAY_NEVER_INSTRUMENT {
312   return profilingHandleArg0(FuncId, Entry);
313 }
314 
315 XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
316   s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
317   if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
318                                       XRayLogInitStatus::XRAY_LOG_FINALIZING,
319                                       memory_order_release)) {
320     if (Verbosity())
321       Report("Cannot finalize profile, the profiling is not initialized.\n");
322     return static_cast<XRayLogInitStatus>(CurrentStatus);
323   }
324 
325   // Mark then finalize the current generation of buffers. This allows us to let
326   // the threads currently holding onto new buffers still use them, but let the
327   // last reference do the memory cleanup.
328   DCHECK_NE(BQ, nullptr);
329   BQ->finalize();
330 
331   // Wait a grace period to allow threads to see that we're finalizing.
332   SleepForMillis(profilingFlags()->grace_period_ms);
333 
334   // If we for some reason are entering this function from an instrumented
335   // handler, we bail out.
336   RecursionGuard G(ReentranceGuard);
337   if (!G)
338     return static_cast<XRayLogInitStatus>(CurrentStatus);
339 
340   // Post the current thread's data if we have any.
341   postCurrentThreadFCT(TLD);
342 
343   // Then we force serialize the log data.
344   profileCollectorService::serialize();
345 
346   atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
347                memory_order_release);
348   return XRayLogInitStatus::XRAY_LOG_FINALIZED;
349 }
350 
351 XRayLogInitStatus
352 profilingLoggingInit(size_t, size_t, void *Options,
353                      size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
354   RecursionGuard G(ReentranceGuard);
355   if (!G)
356     return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
357 
358   s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
359   if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
360                                       XRayLogInitStatus::XRAY_LOG_INITIALIZING,
361                                       memory_order_acq_rel)) {
362     if (Verbosity())
363       Report("Cannot initialize already initialised profiling "
364              "implementation.\n");
365     return static_cast<XRayLogInitStatus>(CurrentStatus);
366   }
367 
368   {
369     SpinMutexLock Lock(&ProfilerOptionsMutex);
370     FlagParser ConfigParser;
371     ProfilerFlags Flags;
372     Flags.setDefaults();
373     registerProfilerFlags(&ConfigParser, &Flags);
374     ConfigParser.ParseString(profilingCompilerDefinedFlags());
375     const char *Env = GetEnv("XRAY_PROFILING_OPTIONS");
376     if (Env == nullptr)
377       Env = "";
378     ConfigParser.ParseString(Env);
379 
380     // Then parse the configuration string provided.
381     ConfigParser.ParseString(static_cast<const char *>(Options));
382     if (Verbosity())
383       ReportUnrecognizedFlags();
384     *profilingFlags() = Flags;
385   }
386 
387   // We need to reset the profile data collection implementation now.
388   profileCollectorService::reset();
389 
390   // Then also reset the buffer queue implementation.
391   if (BQ == nullptr) {
392     bool Success = false;
393     new (&BufferQueueStorage)
394         BufferQueue(profilingFlags()->per_thread_allocator_max,
395                     profilingFlags()->buffers_max, Success);
396     if (!Success) {
397       if (Verbosity())
398         Report("Failed to initialize preallocated memory buffers!");
399       atomic_store(&ProfilerLogStatus,
400                    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
401                    memory_order_release);
402       return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
403     }
404 
405     // If we've succeeded, set the global pointer to the initialised storage.
406     BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
407   } else {
408     BQ->finalize();
409     auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max,
410                                profilingFlags()->buffers_max);
411 
412     if (InitStatus != BufferQueue::ErrorCode::Ok) {
413       if (Verbosity())
414         Report("Failed to initialize preallocated memory buffers; error: %s",
415                BufferQueue::getErrorString(InitStatus));
416       atomic_store(&ProfilerLogStatus,
417                    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
418                    memory_order_release);
419       return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
420     }
421 
422     DCHECK(!BQ->finalizing());
423   }
424 
425   // We need to set up the exit handlers.
426   static pthread_once_t Once = PTHREAD_ONCE_INIT;
427   pthread_once(
428       &Once, +[] {
429         pthread_key_create(
430             &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT {
431               if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
432                 return;
433 
434               if (P == nullptr)
435                 return;
436 
437               auto T = reinterpret_cast<ProfilingData *>(P);
438               if (atomic_load_relaxed(&T->Allocators) == 0)
439                 return;
440 
441               {
442                 // If we're somehow executing this while inside a
443                 // non-reentrant-friendly context, we skip attempting to post
444                 // the current thread's data.
445                 RecursionGuard G(ReentranceGuard);
446                 if (!G)
447                   return;
448 
449                 postCurrentThreadFCT(*T);
450               }
451             });
452 
453         // We also need to set up an exit handler, so that we can get the
454         // profile information at exit time. We use the C API to do this, to not
455         // rely on C++ ABI functions for registering exit handlers.
456         Atexit(+[]() XRAY_NEVER_INSTRUMENT {
457           if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
458             return;
459 
460           auto Cleanup =
461               at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
462 
463           // Finalize and flush.
464           if (profilingFinalize() != XRAY_LOG_FINALIZED ||
465               profilingFlush() != XRAY_LOG_FLUSHED)
466             return;
467 
468           if (Verbosity())
469             Report("XRay Profile flushed at exit.");
470         });
471       });
472 
473   __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
474   __xray_set_handler(profilingHandleArg0);
475   __xray_set_handler_arg1(profilingHandleArg1);
476 
477   atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
478                memory_order_release);
479   if (Verbosity())
480     Report("XRay Profiling init successful.\n");
481 
482   return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
483 }
484 
485 bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
486   // Set up the flag defaults from the static defaults and the
487   // compiler-provided defaults.
488   {
489     SpinMutexLock Lock(&ProfilerOptionsMutex);
490     auto *F = profilingFlags();
491     F->setDefaults();
492     FlagParser ProfilingParser;
493     registerProfilerFlags(&ProfilingParser, F);
494     ProfilingParser.ParseString(profilingCompilerDefinedFlags());
495   }
496 
497   XRayLogImpl Impl{
498       profilingLoggingInit,
499       profilingFinalize,
500       profilingHandleArg0,
501       profilingFlush,
502   };
503   auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
504   if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
505     if (Verbosity())
506       Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
507              "%d\n",
508              RegistrationResult);
509     return false;
510   }
511 
512   if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
513     __xray_log_select_mode("xray_profiling");
514   return true;
515 }
516 
517 } // namespace __xray
518 
519 static auto UNUSED Unused = __xray::profilingDynamicInitializer();
520