xref: /freebsd/contrib/llvm-project/compiler-rt/lib/profile/InstrProfilingValue.c (revision 7a6dacaca14b62ca4b74406814becb87a3fefac0)
1  /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\
2  |*
3  |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  |* See https://llvm.org/LICENSE.txt for license information.
5  |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  |*
7  \*===----------------------------------------------------------------------===*/
8  
9  #include <assert.h>
10  #include <limits.h>
11  #include <stdio.h>
12  #include <stdlib.h>
13  #include <string.h>
14  
15  #include "InstrProfiling.h"
16  #include "InstrProfilingInternal.h"
17  #include "InstrProfilingUtil.h"
18  
19  #define INSTR_PROF_VALUE_PROF_DATA
20  #define INSTR_PROF_COMMON_API_IMPL
21  #define INSTR_PROF_VALUE_PROF_MEMOP_API
22  #include "profile/InstrProfData.inc"
23  
24  static int hasStaticCounters = 1;
25  static int OutOfNodesWarnings = 0;
26  static int hasNonDefaultValsPerSite = 0;
27  #define INSTR_PROF_MAX_VP_WARNS 10
28  #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
29  #define INSTR_PROF_VNODE_POOL_SIZE 1024
30  
31  #ifndef _MSC_VER
32  /* A shared static pool in addition to the vnodes statically
33   * allocated by the compiler.  */
34  COMPILER_RT_VISIBILITY ValueProfNode
35      lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION(
36         COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME);
37  #endif
38  
39  COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite =
40      INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE;
41  
42  COMPILER_RT_VISIBILITY void lprofSetupValueProfiler(void) {
43    const char *Str = 0;
44    Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE");
45    if (Str && Str[0]) {
46      VPMaxNumValsPerSite = atoi(Str);
47      hasNonDefaultValsPerSite = 1;
48    }
49    if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE)
50      VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
51  }
52  
53  COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
54    VPMaxNumValsPerSite = MaxVals;
55    hasNonDefaultValsPerSite = 1;
56  }
57  
58  /* This method is only used in value profiler mock testing.  */
59  COMPILER_RT_VISIBILITY void
60  __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
61                                     uint32_t ValueKind, uint16_t NumValueSites) {
62  #ifdef __GNUC__
63  #pragma GCC diagnostic push
64  #pragma GCC diagnostic ignored "-Wcast-qual"
65  #elif defined(__clang__)
66  #pragma clang diagnostic push
67  #pragma clang diagnostic ignored "-Wcast-qual"
68  #endif
69    *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites;
70  #ifdef __GNUC__
71  #pragma GCC diagnostic pop
72  #elif defined(__clang__)
73  #pragma clang diagnostic pop
74  #endif
75  }
76  
77  /* This method is only used in value profiler mock testing.  */
78  COMPILER_RT_VISIBILITY const __llvm_profile_data *
79  __llvm_profile_iterate_data(const __llvm_profile_data *Data) {
80    return Data + 1;
81  }
82  
83  /* This method is only used in value profiler mock testing.  */
84  COMPILER_RT_VISIBILITY void *
85  __llvm_get_function_addr(const __llvm_profile_data *Data) {
86    return Data->FunctionPointer;
87  }
88  
89  /* Allocate an array that holds the pointers to the linked lists of
90   * value profile counter nodes. The number of element of the array
91   * is the total number of value profile sites instrumented. Returns
92   * 0 if allocation fails.
93   */
94  
95  static int allocateValueProfileCounters(__llvm_profile_data *Data) {
96    uint64_t NumVSites = 0;
97    uint32_t VKI;
98  
99    /* This function will never be called when value site array is allocated
100       statically at compile time.  */
101    hasStaticCounters = 0;
102    /* When dynamic allocation is enabled, allow tracking the max number of
103     * values allowd.  */
104    if (!hasNonDefaultValsPerSite)
105      VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
106  
107    for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI)
108      NumVSites += Data->NumValueSites[VKI];
109  
110    // If NumVSites = 0, calloc is allowed to return a non-null pointer.
111    assert(NumVSites > 0 && "NumVSites can't be zero");
112    ValueProfNode **Mem =
113        (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *));
114    if (!Mem)
115      return 0;
116    if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) {
117      free(Mem);
118      return 0;
119    }
120    return 1;
121  }
122  
123  static ValueProfNode *allocateOneNode(void) {
124    ValueProfNode *Node;
125  
126    if (!hasStaticCounters)
127      return (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
128  
129    /* Early check to avoid value wrapping around.  */
130    if (CurrentVNode + 1 > EndVNode) {
131      if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) {
132        PROF_WARN("Unable to track new values: %s. "
133                  " Consider using option -mllvm -vp-counters-per-site=<n> to "
134                  "allocate more"
135                  " value profile counters at compile time. \n",
136                  "Running out of static counters");
137      }
138      return 0;
139    }
140    Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1);
141    /* Due to section padding, EndVNode point to a byte which is one pass
142     * an incomplete VNode, so we need to skip the last incomplete node. */
143    if (Node + 1 > EndVNode)
144      return 0;
145  
146    return Node;
147  }
148  
149  static COMPILER_RT_ALWAYS_INLINE void
150  instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
151                            uint32_t CounterIndex, uint64_t CountValue) {
152    __llvm_profile_data *PData = (__llvm_profile_data *)Data;
153    if (!PData)
154      return;
155    if (!CountValue)
156      return;
157    if (!PData->Values) {
158      if (!allocateValueProfileCounters(PData))
159        return;
160    }
161  
162    ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
163    ValueProfNode *PrevVNode = NULL;
164    ValueProfNode *MinCountVNode = NULL;
165    ValueProfNode *CurVNode = ValueCounters[CounterIndex];
166    uint64_t MinCount = UINT64_MAX;
167  
168    uint8_t VDataCount = 0;
169    while (CurVNode) {
170      if (TargetValue == CurVNode->Value) {
171        CurVNode->Count += CountValue;
172        return;
173      }
174      if (CurVNode->Count < MinCount) {
175        MinCount = CurVNode->Count;
176        MinCountVNode = CurVNode;
177      }
178      PrevVNode = CurVNode;
179      CurVNode = CurVNode->Next;
180      ++VDataCount;
181    }
182  
183    if (VDataCount >= VPMaxNumValsPerSite) {
184      /* Bump down the min count node's count. If it reaches 0,
185       * evict it. This eviction/replacement policy makes hot
186       * targets more sticky while cold targets less so. In other
187       * words, it makes it less likely for the hot targets to be
188       * prematurally evicted during warmup/establishment period,
189       * when their counts are still low. In a special case when
190       * the number of values tracked is reduced to only one, this
191       * policy will guarantee that the dominating target with >50%
192       * total count will survive in the end. Note that this scheme
193       * allows the runtime to track the min count node in an adaptive
194       * manner. It can correct previous mistakes and eventually
195       * lock on a cold target that is alread in stable state.
196       *
197       * In very rare cases,  this replacement scheme may still lead
198       * to target loss. For instance, out of \c N value slots, \c N-1
199       * slots are occupied by luke warm targets during the warmup
200       * period and the remaining one slot is competed by two or more
201       * very hot targets. If those hot targets occur in an interleaved
202       * way, none of them will survive (gain enough weight to throw out
203       * other established entries) due to the ping-pong effect.
204       * To handle this situation, user can choose to increase the max
205       * number of tracked values per value site. Alternatively, a more
206       * expensive eviction mechanism can be implemented. It requires
207       * the runtime to track the total number of evictions per-site.
208       * When the total number of evictions reaches certain threshold,
209       * the runtime can wipe out more than one lowest count entries
210       * to give space for hot targets.
211       */
212      if (MinCountVNode->Count <= CountValue) {
213        CurVNode = MinCountVNode;
214        CurVNode->Value = TargetValue;
215        CurVNode->Count = CountValue;
216      } else
217        MinCountVNode->Count -= CountValue;
218  
219      return;
220    }
221  
222    CurVNode = allocateOneNode();
223    if (!CurVNode)
224      return;
225    CurVNode->Value = TargetValue;
226    CurVNode->Count += CountValue;
227  
228    uint32_t Success = 0;
229    if (!ValueCounters[CounterIndex])
230      Success =
231          COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode);
232    else if (PrevVNode && !PrevVNode->Next)
233      Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode);
234  
235    if (!Success && !hasStaticCounters) {
236      free(CurVNode);
237      return;
238    }
239  }
240  
241  COMPILER_RT_VISIBILITY void
242  __llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
243                                   uint32_t CounterIndex) {
244    instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);
245  }
246  COMPILER_RT_VISIBILITY void
247  __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
248                                         uint32_t CounterIndex,
249                                         uint64_t CountValue) {
250    instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
251  }
252  
253  /*
254   * The target values are partitioned into multiple ranges. The range spec is
255   * defined in InstrProfData.inc.
256   */
257  COMPILER_RT_VISIBILITY void
258  __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,
259                                  uint32_t CounterIndex) {
260    // Map the target value to the representative value of its range.
261    uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue);
262    __llvm_profile_instrument_target(RepValue, Data, CounterIndex);
263  }
264  
265  /*
266   * A wrapper struct that represents value profile runtime data.
267   * Like InstrProfRecord class which is used by profiling host tools,
268   * ValueProfRuntimeRecord also implements the abstract interfaces defined in
269   * ValueProfRecordClosure so that the runtime data can be serialized using
270   * shared C implementation.
271   */
272  typedef struct ValueProfRuntimeRecord {
273    const __llvm_profile_data *Data;
274    ValueProfNode **NodesKind[IPVK_Last + 1];
275    uint8_t **SiteCountArray;
276  } ValueProfRuntimeRecord;
277  
278  /* ValueProfRecordClosure Interface implementation. */
279  
280  static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
281    return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK];
282  }
283  
284  static uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
285    uint32_t S = 0, I;
286    const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
287    if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR)
288      return 0;
289    for (I = 0; I < Record->Data->NumValueSites[VK]; I++)
290      S += Record->SiteCountArray[VK][I];
291    return S;
292  }
293  
294  static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK,
295                                           uint32_t S) {
296    const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
297    return Record->SiteCountArray[VK][S];
298  }
299  
300  static ValueProfRuntimeRecord RTRecord;
301  static ValueProfRecordClosure RTRecordClosure = {
302      &RTRecord,          INSTR_PROF_NULLPTR, /* GetNumValueKinds */
303      getNumValueSitesRT, getNumValueDataRT,  getNumValueDataForSiteRT,
304      INSTR_PROF_NULLPTR, /* RemapValueData */
305      INSTR_PROF_NULLPTR, /* GetValueForSite, */
306      INSTR_PROF_NULLPTR  /* AllocValueProfData */
307  };
308  
309  static uint32_t
310  initializeValueProfRuntimeRecord(const __llvm_profile_data *Data,
311                                   uint8_t *SiteCountArray[]) {
312    unsigned I, J, S = 0, NumValueKinds = 0;
313    ValueProfNode **Nodes = (ValueProfNode **)Data->Values;
314    RTRecord.Data = Data;
315    RTRecord.SiteCountArray = SiteCountArray;
316    for (I = 0; I <= IPVK_Last; I++) {
317      uint16_t N = Data->NumValueSites[I];
318      if (!N)
319        continue;
320  
321      NumValueKinds++;
322  
323      RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR;
324      for (J = 0; J < N; J++) {
325        /* Compute value count for each site. */
326        uint32_t C = 0;
327        ValueProfNode *Site =
328            Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR;
329        while (Site) {
330          C++;
331          Site = Site->Next;
332        }
333        if (C > UCHAR_MAX)
334          C = UCHAR_MAX;
335        RTRecord.SiteCountArray[I][J] = C;
336      }
337      S += N;
338    }
339    return NumValueKinds;
340  }
341  
342  static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site,
343                                          InstrProfValueData *Dst,
344                                          ValueProfNode *StartNode, uint32_t N) {
345    unsigned I;
346    ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site];
347    for (I = 0; I < N; I++) {
348      Dst[I].Value = VNode->Value;
349      Dst[I].Count = VNode->Count;
350      VNode = VNode->Next;
351    }
352    return VNode;
353  }
354  
355  static uint32_t getValueProfDataSizeWrapper(void) {
356    return getValueProfDataSize(&RTRecordClosure);
357  }
358  
359  static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) {
360    return getNumValueDataForSiteRT(&RTRecord, VK, S);
361  }
362  
363  static VPDataReaderType TheVPDataReader = {
364      initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize,
365      getFirstValueProfRecord,          getNumValueDataForSiteWrapper,
366      getValueProfDataSizeWrapper,      getNextNValueData};
367  
368  COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader(void) {
369    return &TheVPDataReader;
370  }
371