xref: /freebsd/contrib/llvm-project/compiler-rt/lib/ctx_profile/CtxInstrProfiling.h (revision 0e8011faf58b743cc652e3b2ad0f7671227610df)
1 /*===- CtxInstrProfiling.h- Contextual instrumentation-based PGO  ---------===*\
2 |*
3 |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 |* See https://llvm.org/LICENSE.txt for license information.
5 |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 |*
7 \*===----------------------------------------------------------------------===*/
8 
9 #ifndef CTX_PROFILE_CTXINSTRPROFILING_H_
10 #define CTX_PROFILE_CTXINSTRPROFILING_H_
11 
12 #include "CtxInstrContextNode.h"
13 #include "sanitizer_common/sanitizer_mutex.h"
14 #include <sanitizer/common_interface_defs.h>
15 
16 using namespace llvm::ctx_profile;
17 
18 // Forward-declare for the one unittest checking Arena construction zeroes out
19 // its allocatable space.
20 class ArenaTest_ZeroInit_Test;
21 namespace __ctx_profile {
22 
23 static constexpr size_t ExpectedAlignment = 8;
24 // We really depend on this, see further below. We currently support x86_64.
25 // When we want to support other archs, we need to trace the places Alignment is
26 // used and adjust accordingly.
27 static_assert(sizeof(void *) == ExpectedAlignment);
28 
29 /// Arena (bump allocator) forming a linked list. Intentionally not thread safe.
30 /// Allocation and de-allocation happen using sanitizer APIs. We make that
31 /// explicit.
32 class Arena final {
33 public:
34   // When allocating a new Arena, optionally specify an existing one to append
35   // to, assumed to be the last in the Arena list. We only need to support
36   // appending to the arena list.
37   static Arena *allocateNewArena(size_t Size, Arena *Prev = nullptr);
38   static void freeArenaList(Arena *&A);
39 
40   uint64_t size() const { return Size; }
41 
42   // Allocate S bytes or return nullptr if we don't have that many available.
43   char *tryBumpAllocate(size_t S) {
44     if (Pos + S > Size)
45       return nullptr;
46     Pos += S;
47     return start() + (Pos - S);
48   }
49 
50   Arena *next() const { return Next; }
51 
52   // the beginning of allocatable memory.
53   const char *start() const { return const_cast<Arena *>(this)->start(); }
54   const char *pos() const { return start() + Pos; }
55 
56 private:
57   friend class ::ArenaTest_ZeroInit_Test;
58   explicit Arena(uint32_t Size);
59   ~Arena() = delete;
60 
61   char *start() { return reinterpret_cast<char *>(&this[1]); }
62 
63   Arena *Next = nullptr;
64   uint64_t Pos = 0;
65   const uint64_t Size;
66 };
67 
68 // The memory available for allocation follows the Arena header, and we expect
69 // it to be thus aligned.
70 static_assert(alignof(Arena) == ExpectedAlignment);
71 
72 // Verify maintenance to ContextNode doesn't change this invariant, which makes
73 // sure the inlined vectors are appropriately aligned.
74 static_assert(alignof(ContextNode) == ExpectedAlignment);
75 
76 /// ContextRoots are allocated by LLVM for entrypoints. LLVM is only concerned
77 /// with allocating and zero-initializing the global value (as in, GlobalValue)
78 /// for it.
79 struct ContextRoot {
80   ContextNode *FirstNode = nullptr;
81   Arena *FirstMemBlock = nullptr;
82   Arena *CurrentMem = nullptr;
83   // This is init-ed by the static zero initializer in LLVM.
84   // Taken is used to ensure only one thread traverses the contextual graph -
85   // either to read it or to write it. On server side, the same entrypoint will
86   // be entered by numerous threads, but over time, the profile aggregated by
87   // collecting sequentially on one thread at a time is expected to converge to
88   // the aggregate profile that may have been observable on all the threads.
89   // Note that this is node-by-node aggregation, i.e. summing counters of nodes
90   // at the same position in the graph, not flattening.
91   // Threads that cannot lock Taken (fail TryLock) are given a "scratch context"
92   // - a buffer they can clobber, safely from a memory access perspective.
93   //
94   // Note about "scratch"-ness: we currently ignore the data written in them
95   // (which is anyway clobbered). The design allows for that not be the case -
96   // because "scratch"-ness is first and foremost about not trying to build
97   // subcontexts, and is captured by tainting the pointer value (pointer to the
98   // memory treated as context), but right now, we drop that info.
99   //
100   // We could consider relaxing the requirement of more than one thread
101   // entering by holding a few context trees per entrypoint and then aggregating
102   // them (as explained above) at the end of the profile collection - it's a
103   // tradeoff between collection time and memory use: higher precision can be
104   // obtained with either less concurrent collections but more collection time,
105   // or with more concurrent collections (==more memory) and less collection
106   // time. Note that concurrent collection does happen for different
107   // entrypoints, regardless.
108   ::__sanitizer::StaticSpinMutex Taken;
109 
110   // If (unlikely) StaticSpinMutex internals change, we need to modify the LLVM
111   // instrumentation lowering side because it is responsible for allocating and
112   // zero-initializing ContextRoots.
113   static_assert(sizeof(Taken) == 1);
114 };
115 
116 /// This API is exposed for testing. See the APIs below about the contract with
117 /// LLVM.
118 inline bool isScratch(const void *Ctx) {
119   return (reinterpret_cast<uint64_t>(Ctx) & 1);
120 }
121 
122 } // namespace __ctx_profile
123 
124 extern "C" {
125 
126 // LLVM fills these in when lowering a llvm.instrprof.callsite intrinsic.
127 // position 0 is used when the current context isn't scratch, 1 when it is. They
128 // are volatile because of signal handlers - we mean to specifically control
129 // when the data is loaded.
130 //
131 /// TLS where LLVM stores the pointer of the called value, as part of lowering a
132 /// llvm.instrprof.callsite
133 extern __thread void *volatile __llvm_ctx_profile_expected_callee[2];
134 /// TLS where LLVM stores the pointer inside a caller's subcontexts vector that
135 /// corresponds to the callsite being lowered.
136 extern __thread ContextNode **volatile __llvm_ctx_profile_callsite[2];
137 
138 // __llvm_ctx_profile_current_context_root is exposed for unit testing,
139 // othwerise it's only used internally by compiler-rt/ctx_profile.
140 extern __thread __ctx_profile::ContextRoot
141     *volatile __llvm_ctx_profile_current_context_root;
142 
143 /// called by LLVM in the entry BB of a "entry point" function. The returned
144 /// pointer may be "tainted" - its LSB set to 1 - to indicate it's scratch.
145 ContextNode *__llvm_ctx_profile_start_context(__ctx_profile::ContextRoot *Root,
146                                               GUID Guid, uint32_t Counters,
147                                               uint32_t Callsites);
148 
149 /// paired with __llvm_ctx_profile_start_context, and called at the exit of the
150 /// entry point function.
151 void __llvm_ctx_profile_release_context(__ctx_profile::ContextRoot *Root);
152 
153 /// called for any other function than entry points, in the entry BB of such
154 /// function. Same consideration about LSB of returned value as .._start_context
155 ContextNode *__llvm_ctx_profile_get_context(void *Callee, GUID Guid,
156                                             uint32_t NrCounters,
157                                             uint32_t NrCallsites);
158 
159 /// Prepares for collection. Currently this resets counter values but preserves
160 /// internal context tree structure.
161 void __llvm_ctx_profile_start_collection();
162 
163 /// Completely free allocated memory.
164 void __llvm_ctx_profile_free();
165 
166 /// Used to obtain the profile. The Writer is called for each root ContextNode,
167 /// with the ContextRoot::Taken taken. The Writer is responsible for traversing
168 /// the structure underneath.
169 /// The Writer's first parameter plays the role of closure for Writer, and is
170 /// what the caller of __llvm_ctx_profile_fetch passes as the Data parameter.
171 /// The second parameter is the root of a context tree.
172 bool __llvm_ctx_profile_fetch(void *Data,
173                               bool (*Writer)(void *, const ContextNode &));
174 }
175 #endif // CTX_PROFILE_CTXINSTRPROFILING_H_
176