xref: /freebsd/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingValue.c (revision 2f513db72b034fd5ef7f080b11be5c711c15186a)
1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
2 |*
3 |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 |* See https://llvm.org/LICENSE.txt for license information.
5 |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 |*
7 \*===----------------------------------------------------------------------===*/
8 
9 #include <limits.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include "InstrProfiling.h"
15 #include "InstrProfilingInternal.h"
16 #include "InstrProfilingUtil.h"
17 
18 #define INSTR_PROF_VALUE_PROF_DATA
19 #define INSTR_PROF_COMMON_API_IMPL
20 #include "InstrProfData.inc"
21 
22 static int hasStaticCounters = 1;
23 static int OutOfNodesWarnings = 0;
24 static int hasNonDefaultValsPerSite = 0;
25 #define INSTR_PROF_MAX_VP_WARNS 10
26 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 16
27 #define INSTR_PROF_VNODE_POOL_SIZE 1024
28 
29 #ifndef _MSC_VER
30 /* A shared static pool in addition to the vnodes statically
31  * allocated by the compiler.  */
32 COMPILER_RT_VISIBILITY ValueProfNode
33     lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
34        COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);
35 #endif
36 
37 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
38     INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
39 
40 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() {
41   const char *Str = 0;
42   Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
43   if (Str && Str[0]) {
44     VPMaxNumValsPerSite = atoi(Str);
45     hasNonDefaultValsPerSite = 1;
46   }
47   if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
48     VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
49 }
50 
51 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
52   VPMaxNumValsPerSite = MaxVals;
53   hasNonDefaultValsPerSite = 1;
54 }
55 
56 /* This method is only used in value profiler mock testing.  */
57 COMPILER_RT_VISIBILITY void
58 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
59                                    uint32_t ValueKind, uint16_t NumValueSites) {
60   *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
61 }
62 
63 /* This method is only used in value profiler mock testing.  */
64 COMPILER_RT_VISIBILITY const __llvm_profile_data *
65 __llvm_profile_iterate_data(const __llvm_profile_data *Data) {
66   return Data + 1;
67 }
68 
69 /* This method is only used in value profiler mock testing.  */
70 COMPILER_RT_VISIBILITY void *
71 __llvm_get_function_addr(const __llvm_profile_data *Data) {
72   return Data->FunctionPointer;
73 }
74 
75 /* Allocate an array that holds the pointers to the linked lists of
76  * value profile counter nodes. The number of element of the array
77  * is the total number of value profile sites instrumented. Returns
78  * 0 if allocation fails.
79  */
80 
81 static int allocateValueProfileCounters(__llvm_profile_data *Data) {
82   uint64_t NumVSites = 0;
83   uint32_t VKI;
84 
85   /* This function will never be called when value site array is allocated
86      statically at compile time.  */
87   hasStaticCounters = 0;
88   /* When dynamic allocation is enabled, allow tracking the max number of
89    * values allowd.  */
90   if (!hasNonDefaultValsPerSite)
91     VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
92 
93   for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
94     NumVSites += Data->NumValueSites[VKI];
95 
96   ValueProfNode **Mem =
97       (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
98   if (!Mem)
99     return 0;
100   if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
101     free(Mem);
102     return 0;
103   }
104   return 1;
105 }
106 
107 static ValueProfNode *allocateOneNode(void) {
108   ValueProfNode *Node;
109 
110   if (!hasStaticCounters)
111     return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
112 
113   /* Early check to avoid value wrapping around.  */
114   if (CurrentVNode + 1 > EndVNode) {
115     if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
116       PROF_WARN("Unable to track new values: %s. "
117                 " Consider using option -mllvm -vp-counters-per-site=<n> to "
118                 "allocate more"
119                 " value profile counters at compile time. \n",
120                 "Running out of static counters");
121     }
122     return 0;
123   }
124   Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
125   /* Due to section padding, EndVNode point to a byte which is one pass
126    * an incomplete VNode, so we need to skip the last incomplete node. */
127   if (Node + 1 > EndVNode)
128     return 0;
129 
130   return Node;
131 }
132 
133 static COMPILER_RT_ALWAYS_INLINE void
134 instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
135                           uint32_t CounterIndex, uint64_t CountValue) {
136   __llvm_profile_data *PData = (__llvm_profile_data *)Data;
137   if (!PData)
138     return;
139   if (!CountValue)
140     return;
141   if (!PData->Values) {
142     if (!allocateValueProfileCounters(PData))
143       return;
144   }
145 
146   ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
147   ValueProfNode *PrevVNode = NULL;
148   ValueProfNode *MinCountVNode = NULL;
149   ValueProfNode *CurVNode = ValueCounters[CounterIndex];
150   uint64_t MinCount = UINT64_MAX;
151 
152   uint8_t VDataCount = 0;
153   while (CurVNode) {
154     if (TargetValue == CurVNode->Value) {
155       CurVNode->Count += CountValue;
156       return;
157     }
158     if (CurVNode->Count < MinCount) {
159       MinCount = CurVNode->Count;
160       MinCountVNode = CurVNode;
161     }
162     PrevVNode = CurVNode;
163     CurVNode = CurVNode->Next;
164     ++VDataCount;
165   }
166 
167   if (VDataCount >= VPMaxNumValsPerSite) {
168     /* Bump down the min count node's count. If it reaches 0,
169      * evict it. This eviction/replacement policy makes hot
170      * targets more sticky while cold targets less so. In other
171      * words, it makes it less likely for the hot targets to be
172      * prematurally evicted during warmup/establishment period,
173      * when their counts are still low. In a special case when
174      * the number of values tracked is reduced to only one, this
175      * policy will guarantee that the dominating target with >50%
176      * total count will survive in the end. Note that this scheme
177      * allows the runtime to track the min count node in an adaptive
178      * manner. It can correct previous mistakes and eventually
179      * lock on a cold target that is alread in stable state.
180      *
181      * In very rare cases,  this replacement scheme may still lead
182      * to target loss. For instance, out of \c N value slots, \c N-1
183      * slots are occupied by luke warm targets during the warmup
184      * period and the remaining one slot is competed by two or more
185      * very hot targets. If those hot targets occur in an interleaved
186      * way, none of them will survive (gain enough weight to throw out
187      * other established entries) due to the ping-pong effect.
188      * To handle this situation, user can choose to increase the max
189      * number of tracked values per value site. Alternatively, a more
190      * expensive eviction mechanism can be implemented. It requires
191      * the runtime to track the total number of evictions per-site.
192      * When the total number of evictions reaches certain threshold,
193      * the runtime can wipe out more than one lowest count entries
194      * to give space for hot targets.
195      */
196     if (MinCountVNode->Count <= CountValue) {
197       CurVNode = MinCountVNode;
198       CurVNode->Value = TargetValue;
199       CurVNode->Count = CountValue;
200     } else
201       MinCountVNode->Count -= CountValue;
202 
203     return;
204   }
205 
206   CurVNode = allocateOneNode();
207   if (!CurVNode)
208     return;
209   CurVNode->Value = TargetValue;
210   CurVNode->Count += CountValue;
211 
212   uint32_t Success = 0;
213   if (!ValueCounters[CounterIndex])
214     Success =
215         COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
216   else if (PrevVNode && !PrevVNode->Next)
217     Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
218 
219   if (!Success && !hasStaticCounters) {
220     free(CurVNode);
221     return;
222   }
223 }
224 
225 COMPILER_RT_VISIBILITY void
226 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
227                                  uint32_t CounterIndex) {
228   instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);
229 }
230 COMPILER_RT_VISIBILITY void
231 __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
232                                        uint32_t CounterIndex,
233                                        uint64_t CountValue) {
234   instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
235 }
236 
237 /*
238  * The target values are partitioned into multiple regions/ranges. There is one
239  * contiguous region which is precise -- every value in the range is tracked
240  * individually. A value outside the precise region will be collapsed into one
241  * value depending on the region it falls in.
242  *
243  * There are three regions:
244  * 1. (-inf, PreciseRangeStart) and (PreciseRangeLast, LargeRangeValue) belong
245  * to one region -- all values here should be mapped to one value of
246  * "PreciseRangeLast + 1".
247  * 2. [PreciseRangeStart, PreciseRangeLast]
248  * 3. Large values: [LargeValue, +inf) maps to one value of LargeValue.
249  *
250  * The range for large values is optional. The default value of INT64_MIN
251  * indicates it is not specified.
252  */
253 COMPILER_RT_VISIBILITY void __llvm_profile_instrument_range(
254     uint64_t TargetValue, void *Data, uint32_t CounterIndex,
255     int64_t PreciseRangeStart, int64_t PreciseRangeLast, int64_t LargeValue) {
256 
257   if (LargeValue != INT64_MIN && (int64_t)TargetValue >= LargeValue)
258     TargetValue = LargeValue;
259   else if ((int64_t)TargetValue < PreciseRangeStart ||
260            (int64_t)TargetValue > PreciseRangeLast)
261     TargetValue = PreciseRangeLast + 1;
262 
263   __llvm_profile_instrument_target(TargetValue, Data, CounterIndex);
264 }
265 
266 /*
267  * A wrapper struct that represents value profile runtime data.
268  * Like InstrProfRecord class which is used by profiling host tools,
269  * ValueProfRuntimeRecord also implements the abstract intefaces defined in
270  * ValueProfRecordClosure so that the runtime data can be serialized using
271  * shared C implementation.
272  */
273 typedef struct ValueProfRuntimeRecord {
274   const __llvm_profile_data *Data;
275   ValueProfNode **NodesKind[IPVK_Last + 1];
276   uint8_t **SiteCountArray;
277 } ValueProfRuntimeRecord;
278 
279 /* ValueProfRecordClosure Interface implementation. */
280 
281 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
282   return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
283 }
284 
285 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
286   uint32_t S = 0, I;
287   const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
288   if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
289     return 0;
290   for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
291     S += Record->SiteCountArray[VK][I];
292   return S;
293 }
294 
295 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
296                                          uint32_t S) {
297   const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
298   return Record->SiteCountArray[VK][S];
299 }
300 
301 static ValueProfRuntimeRecord RTRecord;
302 static ValueProfRecordClosure RTRecordClosure = {
303     &RTRecord,          INSTR_PROF_NULLPTR, /* GetNumValueKinds */
304     getNumValueSitesRT, getNumValueDataRT,  getNumValueDataForSiteRT,
305     INSTR_PROF_NULLPTR, /* RemapValueData */
306     INSTR_PROF_NULLPTR, /* GetValueForSite, */
307     INSTR_PROF_NULLPTR  /* AllocValueProfData */
308 };
309 
310 static uint32_t
311 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
312                                  uint8_t *SiteCountArray[]) {
313   unsigned I, J, S = 0, NumValueKinds = 0;
314   ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
315   RTRecord.Data = Data;
316   RTRecord.SiteCountArray = SiteCountArray;
317   for (I = 0; I <= IPVK_Last; I++) {
318     uint16_t N = Data->NumValueSites[I];
319     if (!N)
320       continue;
321 
322     NumValueKinds++;
323 
324     RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
325     for (J = 0; J < N; J++) {
326       /* Compute value count for each site. */
327       uint32_t C = 0;
328       ValueProfNode *Site =
329           Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
330       while (Site) {
331         C++;
332         Site = Site->Next;
333       }
334       if (C > UCHAR_MAX)
335         C = UCHAR_MAX;
336       RTRecord.SiteCountArray[I][J] = C;
337     }
338     S += N;
339   }
340   return NumValueKinds;
341 }
342 
343 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
344                                         InstrProfValueData *Dst,
345                                         ValueProfNode *StartNode, uint32_t N) {
346   unsigned I;
347   ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
348   for (I = 0; I < N; I++) {
349     Dst[I].Value = VNode->Value;
350     Dst[I].Count = VNode->Count;
351     VNode = VNode->Next;
352   }
353   return VNode;
354 }
355 
356 static uint32_t getValueProfDataSizeWrapper(void) {
357   return getValueProfDataSize(&RTRecordClosure);
358 }
359 
360 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
361   return getNumValueDataForSiteRT(&RTRecord, VK, S);
362 }
363 
364 static VPDataReaderType TheVPDataReader = {
365     initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
366     getFirstValueProfRecord,          getNumValueDataForSiteWrapper,
367     getValueProfDataSizeWrapper,      getNextNValueData};
368 
369 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader() {
370   return &TheVPDataReader;
371 }
372