xref: /freebsd/contrib/llvm-project/compiler-rt/lib/xray/xray_profiling.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //===-- xray_profiling.cpp --------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of XRay, a dynamic runtime instrumentation system.
10 //
11 // This is the implementation of a profiling handler.
12 //
13 //===----------------------------------------------------------------------===//
14 #include <memory>
15 #include <time.h>
16 
17 #include "sanitizer_common/sanitizer_atomic.h"
18 #include "sanitizer_common/sanitizer_flags.h"
19 #include "xray/xray_interface.h"
20 #include "xray/xray_log_interface.h"
21 #include "xray_buffer_queue.h"
22 #include "xray_flags.h"
23 #include "xray_profile_collector.h"
24 #include "xray_profiling_flags.h"
25 #include "xray_recursion_guard.h"
26 #include "xray_tsc.h"
27 #include "xray_utils.h"
28 #include <pthread.h>
29 
30 namespace __xray {
31 
32 namespace {
33 
34 static atomic_sint32_t ProfilerLogFlushStatus = {
35     XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
36 
37 static atomic_sint32_t ProfilerLogStatus = {
38     XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
39 
40 static SpinMutex ProfilerOptionsMutex;
41 
42 struct ProfilingData {
43   atomic_uintptr_t Allocators;
44   atomic_uintptr_t FCT;
45 };
46 
47 static pthread_key_t ProfilingKey;
48 
49 // We use a global buffer queue, which gets initialized once at initialisation
50 // time, and gets reset when profiling is "done".
51 alignas(BufferQueue) static std::byte BufferQueueStorage[sizeof(BufferQueue)];
52 static BufferQueue *BQ = nullptr;
53 
54 thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;
55 alignas(FunctionCallTrie::Allocators) thread_local std::byte
56     AllocatorsStorage[sizeof(FunctionCallTrie::Allocators)];
57 alignas(FunctionCallTrie) thread_local std::byte
58     FunctionCallTrieStorage[sizeof(FunctionCallTrie)];
59 thread_local ProfilingData TLD{{0}, {0}};
60 thread_local atomic_uint8_t ReentranceGuard{0};
61 
62 // We use a separate guard for ensuring that for this thread, if we're already
63 // cleaning up, that any signal handlers don't attempt to cleanup nor
64 // initialise.
65 thread_local atomic_uint8_t TLDInitGuard{0};
66 
67 // We also use a separate latch to signal that the thread is exiting, and
68 // non-essential work should be ignored (things like recording events, etc.).
69 thread_local atomic_uint8_t ThreadExitingLatch{0};
70 
71 static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
72   thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
73     pthread_setspecific(ProfilingKey, &TLD);
74     return false;
75   }();
76   (void)ThreadOnce;
77 
78   RecursionGuard TLDInit(TLDInitGuard);
79   if (!TLDInit)
80     return nullptr;
81 
82   if (atomic_load_relaxed(&ThreadExitingLatch))
83     return nullptr;
84 
85   uptr Allocators = 0;
86   if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,
87                                      memory_order_acq_rel)) {
88     bool Success = false;
89     auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
90       if (!Success)
91         atomic_store(&TLD.Allocators, 0, memory_order_release);
92     });
93 
94     // Acquire a set of buffers for this thread.
95     if (BQ == nullptr)
96       return nullptr;
97 
98     if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)
99       return nullptr;
100     auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
101       if (!Success)
102         BQ->releaseBuffer(ThreadBuffers.NodeBuffer);
103     });
104 
105     if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)
106       return nullptr;
107     auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
108       if (!Success)
109         BQ->releaseBuffer(ThreadBuffers.RootsBuffer);
110     });
111 
112     if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) !=
113         BufferQueue::ErrorCode::Ok)
114       return nullptr;
115     auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
116       if (!Success)
117         BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer);
118     });
119 
120     if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) !=
121         BufferQueue::ErrorCode::Ok)
122       return nullptr;
123 
124     Success = true;
125     new (&AllocatorsStorage) FunctionCallTrie::Allocators(
126         FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers));
127     Allocators = reinterpret_cast<uptr>(
128         reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
129     atomic_store(&TLD.Allocators, Allocators, memory_order_release);
130   }
131 
132   if (Allocators == 1)
133     return nullptr;
134 
135   uptr FCT = 0;
136   if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {
137     new (&FunctionCallTrieStorage)
138         FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(
139             atomic_load_relaxed(&TLD.Allocators)));
140     FCT = reinterpret_cast<uptr>(
141         reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
142     atomic_store(&TLD.FCT, FCT, memory_order_release);
143   }
144 
145   if (FCT == 1)
146     return nullptr;
147 
148   return &TLD;
149 }
150 
151 static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
152   auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);
153   if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
154                  &FunctionCallTrieStorage)))
155     reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
156 
157   auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel);
158   if (Allocators ==
159       reinterpret_cast<uptr>(
160           reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
161     reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
162 }
163 
164 static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
165   RecursionGuard TLDInit(TLDInitGuard);
166   if (!TLDInit)
167     return;
168 
169   uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel);
170   if (P != reinterpret_cast<uptr>(
171                reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
172     return;
173 
174   auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
175   DCHECK_NE(FCT, nullptr);
176 
177   uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel);
178   if (A !=
179       reinterpret_cast<uptr>(
180           reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
181     return;
182 
183   auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);
184   DCHECK_NE(Allocators, nullptr);
185 
186   // Always move the data into the profile collector.
187   profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators),
188                                 std::move(ThreadBuffers), GetTid());
189 
190   // Re-initialize the ThreadBuffers object to a known "default" state.
191   ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};
192 }
193 
194 } // namespace
195 
196 const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
197 #ifdef XRAY_PROFILER_DEFAULT_OPTIONS
198   return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS);
199 #else
200   return "";
201 #endif
202 }
203 
204 XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
205   if (atomic_load(&ProfilerLogStatus, memory_order_acquire) !=
206       XRayLogInitStatus::XRAY_LOG_FINALIZED) {
207     if (Verbosity())
208       Report("Not flushing profiles, profiling not been finalized.\n");
209     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
210   }
211 
212   RecursionGuard SignalGuard(ReentranceGuard);
213   if (!SignalGuard) {
214     if (Verbosity())
215       Report("Cannot finalize properly inside a signal handler!\n");
216     atomic_store(&ProfilerLogFlushStatus,
217                  XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
218                  memory_order_release);
219     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
220   }
221 
222   s32 Previous = atomic_exchange(&ProfilerLogFlushStatus,
223                                  XRayLogFlushStatus::XRAY_LOG_FLUSHING,
224                                  memory_order_acq_rel);
225   if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
226     if (Verbosity())
227       Report("Not flushing profiles, implementation still flushing.\n");
228     return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
229   }
230 
231   // At this point, we'll create the file that will contain the profile, but
232   // only if the options say so.
233   if (!profilingFlags()->no_flush) {
234     // First check whether we have data in the profile collector service
235     // before we try and write anything down.
236     XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0});
237     if (B.Data == nullptr) {
238       if (Verbosity())
239         Report("profiling: No data to flush.\n");
240     } else {
241       LogWriter *LW = LogWriter::Open();
242       if (LW == nullptr) {
243         if (Verbosity())
244           Report("profiling: Failed to flush to file, dropping data.\n");
245       } else {
246         // Now for each of the buffers, write out the profile data as we would
247         // see it in memory, verbatim.
248         while (B.Data != nullptr && B.Size != 0) {
249           LW->WriteAll(reinterpret_cast<const char *>(B.Data),
250                        reinterpret_cast<const char *>(B.Data) + B.Size);
251           B = profileCollectorService::nextBuffer(B);
252         }
253         LogWriter::Close(LW);
254       }
255     }
256   }
257 
258   profileCollectorService::reset();
259 
260   atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
261                memory_order_release);
262   atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
263                memory_order_release);
264 
265   return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
266 }
267 
268 void profilingHandleArg0(int32_t FuncId,
269                          XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
270   unsigned char CPU;
271   auto TSC = readTSC(CPU);
272   RecursionGuard G(ReentranceGuard);
273   if (!G)
274     return;
275 
276   auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);
277   if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
278                Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
279     return;
280 
281   if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
282                Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
283     postCurrentThreadFCT(TLD);
284     return;
285   }
286 
287   auto T = getThreadLocalData();
288   if (T == nullptr)
289     return;
290 
291   auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT));
292   switch (Entry) {
293   case XRayEntryType::ENTRY:
294   case XRayEntryType::LOG_ARGS_ENTRY:
295     FCT->enterFunction(FuncId, TSC, CPU);
296     break;
297   case XRayEntryType::EXIT:
298   case XRayEntryType::TAIL:
299     FCT->exitFunction(FuncId, TSC, CPU);
300     break;
301   default:
302     // FIXME: Handle bugs.
303     break;
304   }
305 }
306 
307 void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry,
308                          uint64_t) XRAY_NEVER_INSTRUMENT {
309   return profilingHandleArg0(FuncId, Entry);
310 }
311 
312 XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
313   s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
314   if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
315                                       XRayLogInitStatus::XRAY_LOG_FINALIZING,
316                                       memory_order_release)) {
317     if (Verbosity())
318       Report("Cannot finalize profile, the profiling is not initialized.\n");
319     return static_cast<XRayLogInitStatus>(CurrentStatus);
320   }
321 
322   // Mark then finalize the current generation of buffers. This allows us to let
323   // the threads currently holding onto new buffers still use them, but let the
324   // last reference do the memory cleanup.
325   DCHECK_NE(BQ, nullptr);
326   BQ->finalize();
327 
328   // Wait a grace period to allow threads to see that we're finalizing.
329   SleepForMillis(profilingFlags()->grace_period_ms);
330 
331   // If we for some reason are entering this function from an instrumented
332   // handler, we bail out.
333   RecursionGuard G(ReentranceGuard);
334   if (!G)
335     return static_cast<XRayLogInitStatus>(CurrentStatus);
336 
337   // Post the current thread's data if we have any.
338   postCurrentThreadFCT(TLD);
339 
340   // Then we force serialize the log data.
341   profileCollectorService::serialize();
342 
343   atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
344                memory_order_release);
345   return XRayLogInitStatus::XRAY_LOG_FINALIZED;
346 }
347 
348 XRayLogInitStatus
349 profilingLoggingInit(size_t, size_t, void *Options,
350                      size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
351   RecursionGuard G(ReentranceGuard);
352   if (!G)
353     return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
354 
355   s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
356   if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
357                                       XRayLogInitStatus::XRAY_LOG_INITIALIZING,
358                                       memory_order_acq_rel)) {
359     if (Verbosity())
360       Report("Cannot initialize already initialised profiling "
361              "implementation.\n");
362     return static_cast<XRayLogInitStatus>(CurrentStatus);
363   }
364 
365   {
366     SpinMutexLock Lock(&ProfilerOptionsMutex);
367     FlagParser ConfigParser;
368     ProfilerFlags Flags;
369     Flags.setDefaults();
370     registerProfilerFlags(&ConfigParser, &Flags);
371     ConfigParser.ParseString(profilingCompilerDefinedFlags());
372     const char *Env = GetEnv("XRAY_PROFILING_OPTIONS");
373     if (Env == nullptr)
374       Env = "";
375     ConfigParser.ParseString(Env);
376 
377     // Then parse the configuration string provided.
378     ConfigParser.ParseString(static_cast<const char *>(Options));
379     if (Verbosity())
380       ReportUnrecognizedFlags();
381     *profilingFlags() = Flags;
382   }
383 
384   // We need to reset the profile data collection implementation now.
385   profileCollectorService::reset();
386 
387   // Then also reset the buffer queue implementation.
388   if (BQ == nullptr) {
389     bool Success = false;
390     new (&BufferQueueStorage)
391         BufferQueue(profilingFlags()->per_thread_allocator_max,
392                     profilingFlags()->buffers_max, Success);
393     if (!Success) {
394       if (Verbosity())
395         Report("Failed to initialize preallocated memory buffers!");
396       atomic_store(&ProfilerLogStatus,
397                    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
398                    memory_order_release);
399       return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
400     }
401 
402     // If we've succeeded, set the global pointer to the initialised storage.
403     BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
404   } else {
405     BQ->finalize();
406     auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max,
407                                profilingFlags()->buffers_max);
408 
409     if (InitStatus != BufferQueue::ErrorCode::Ok) {
410       if (Verbosity())
411         Report("Failed to initialize preallocated memory buffers; error: %s",
412                BufferQueue::getErrorString(InitStatus));
413       atomic_store(&ProfilerLogStatus,
414                    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
415                    memory_order_release);
416       return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
417     }
418 
419     DCHECK(!BQ->finalizing());
420   }
421 
422   // We need to set up the exit handlers.
423   static pthread_once_t Once = PTHREAD_ONCE_INIT;
424   pthread_once(
425       &Once, +[] {
426         pthread_key_create(
427             &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT {
428               if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
429                 return;
430 
431               if (P == nullptr)
432                 return;
433 
434               auto T = reinterpret_cast<ProfilingData *>(P);
435               if (atomic_load_relaxed(&T->Allocators) == 0)
436                 return;
437 
438               {
439                 // If we're somehow executing this while inside a
440                 // non-reentrant-friendly context, we skip attempting to post
441                 // the current thread's data.
442                 RecursionGuard G(ReentranceGuard);
443                 if (!G)
444                   return;
445 
446                 postCurrentThreadFCT(*T);
447               }
448             });
449 
450         // We also need to set up an exit handler, so that we can get the
451         // profile information at exit time. We use the C API to do this, to not
452         // rely on C++ ABI functions for registering exit handlers.
453         Atexit(+[]() XRAY_NEVER_INSTRUMENT {
454           if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
455             return;
456 
457           auto Cleanup =
458               at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
459 
460           // Finalize and flush.
461           if (profilingFinalize() != XRAY_LOG_FINALIZED ||
462               profilingFlush() != XRAY_LOG_FLUSHED)
463             return;
464 
465           if (Verbosity())
466             Report("XRay Profile flushed at exit.");
467         });
468       });
469 
470   __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
471   __xray_set_handler(profilingHandleArg0);
472   __xray_set_handler_arg1(profilingHandleArg1);
473 
474   atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
475                memory_order_release);
476   if (Verbosity())
477     Report("XRay Profiling init successful.\n");
478 
479   return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
480 }
481 
482 bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
483   // Set up the flag defaults from the static defaults and the
484   // compiler-provided defaults.
485   {
486     SpinMutexLock Lock(&ProfilerOptionsMutex);
487     auto *F = profilingFlags();
488     F->setDefaults();
489     FlagParser ProfilingParser;
490     registerProfilerFlags(&ProfilingParser, F);
491     ProfilingParser.ParseString(profilingCompilerDefinedFlags());
492   }
493 
494   XRayLogImpl Impl{
495       profilingLoggingInit,
496       profilingFinalize,
497       profilingHandleArg0,
498       profilingFlush,
499   };
500   auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
501   if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
502     if (Verbosity())
503       Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
504              "%d\n",
505              RegistrationResult);
506     return false;
507   }
508 
509   if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
510     __xray_log_select_mode("xray_profiling");
511   return true;
512 }
513 
514 } // namespace __xray
515 
516 static auto UNUSED Unused = __xray::profilingDynamicInitializer();
517