1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\ 2 |* 3 |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 |* See https://llvm.org/LICENSE.txt for license information. 5 |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 |* 7 \*===----------------------------------------------------------------------===*/ 8 9 #include <limits.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include "InstrProfiling.h" 15 #include "InstrProfilingInternal.h" 16 #include "InstrProfilingUtil.h" 17 18 #define INSTR_PROF_VALUE_PROF_DATA 19 #define INSTR_PROF_COMMON_API_IMPL 20 #include "profile/InstrProfData.inc" 21 22 static int hasStaticCounters = 1; 23 static int OutOfNodesWarnings = 0; 24 static int hasNonDefaultValsPerSite = 0; 25 #define INSTR_PROF_MAX_VP_WARNS 10 26 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 16 27 #define INSTR_PROF_VNODE_POOL_SIZE 1024 28 29 #ifndef _MSC_VER 30 /* A shared static pool in addition to the vnodes statically 31 * allocated by the compiler. */ 32 COMPILER_RT_VISIBILITY ValueProfNode 33 lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION( 34 COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME); 35 #endif 36 37 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite = 38 INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE; 39 40 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() { 41 const char *Str = 0; 42 Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE"); 43 if (Str && Str[0]) { 44 VPMaxNumValsPerSite = atoi(Str); 45 hasNonDefaultValsPerSite = 1; 46 } 47 if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE) 48 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 49 } 50 51 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) { 52 VPMaxNumValsPerSite = MaxVals; 53 hasNonDefaultValsPerSite = 1; 54 } 55 56 /* This method is only used in value profiler mock testing. */ 57 COMPILER_RT_VISIBILITY void 58 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data, 59 uint32_t ValueKind, uint16_t NumValueSites) { 60 *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites; 61 } 62 63 /* This method is only used in value profiler mock testing. */ 64 COMPILER_RT_VISIBILITY const __llvm_profile_data * 65 __llvm_profile_iterate_data(const __llvm_profile_data *Data) { 66 return Data + 1; 67 } 68 69 /* This method is only used in value profiler mock testing. */ 70 COMPILER_RT_VISIBILITY void * 71 __llvm_get_function_addr(const __llvm_profile_data *Data) { 72 return Data->FunctionPointer; 73 } 74 75 /* Allocate an array that holds the pointers to the linked lists of 76 * value profile counter nodes. The number of element of the array 77 * is the total number of value profile sites instrumented. Returns 78 * 0 if allocation fails. 79 */ 80 81 static int allocateValueProfileCounters(__llvm_profile_data *Data) { 82 uint64_t NumVSites = 0; 83 uint32_t VKI; 84 85 /* This function will never be called when value site array is allocated 86 statically at compile time. */ 87 hasStaticCounters = 0; 88 /* When dynamic allocation is enabled, allow tracking the max number of 89 * values allowd. */ 90 if (!hasNonDefaultValsPerSite) 91 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 92 93 for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI) 94 NumVSites += Data->NumValueSites[VKI]; 95 96 ValueProfNode **Mem = 97 (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *)); 98 if (!Mem) 99 return 0; 100 if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) { 101 free(Mem); 102 return 0; 103 } 104 return 1; 105 } 106 107 static ValueProfNode *allocateOneNode(void) { 108 ValueProfNode *Node; 109 110 if (!hasStaticCounters) 111 return (ValueProfNode *)calloc(1, sizeof(ValueProfNode)); 112 113 /* Early check to avoid value wrapping around. */ 114 if (CurrentVNode + 1 > EndVNode) { 115 if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) { 116 PROF_WARN("Unable to track new values: %s. " 117 " Consider using option -mllvm -vp-counters-per-site=<n> to " 118 "allocate more" 119 " value profile counters at compile time. \n", 120 "Running out of static counters"); 121 } 122 return 0; 123 } 124 Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1); 125 /* Due to section padding, EndVNode point to a byte which is one pass 126 * an incomplete VNode, so we need to skip the last incomplete node. */ 127 if (Node + 1 > EndVNode) 128 return 0; 129 130 return Node; 131 } 132 133 static COMPILER_RT_ALWAYS_INLINE void 134 instrumentTargetValueImpl(uint64_t TargetValue, void *Data, 135 uint32_t CounterIndex, uint64_t CountValue) { 136 __llvm_profile_data *PData = (__llvm_profile_data *)Data; 137 if (!PData) 138 return; 139 if (!CountValue) 140 return; 141 if (!PData->Values) { 142 if (!allocateValueProfileCounters(PData)) 143 return; 144 } 145 146 ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values; 147 ValueProfNode *PrevVNode = NULL; 148 ValueProfNode *MinCountVNode = NULL; 149 ValueProfNode *CurVNode = ValueCounters[CounterIndex]; 150 uint64_t MinCount = UINT64_MAX; 151 152 uint8_t VDataCount = 0; 153 while (CurVNode) { 154 if (TargetValue == CurVNode->Value) { 155 CurVNode->Count += CountValue; 156 return; 157 } 158 if (CurVNode->Count < MinCount) { 159 MinCount = CurVNode->Count; 160 MinCountVNode = CurVNode; 161 } 162 PrevVNode = CurVNode; 163 CurVNode = CurVNode->Next; 164 ++VDataCount; 165 } 166 167 if (VDataCount >= VPMaxNumValsPerSite) { 168 /* Bump down the min count node's count. If it reaches 0, 169 * evict it. This eviction/replacement policy makes hot 170 * targets more sticky while cold targets less so. In other 171 * words, it makes it less likely for the hot targets to be 172 * prematurally evicted during warmup/establishment period, 173 * when their counts are still low. In a special case when 174 * the number of values tracked is reduced to only one, this 175 * policy will guarantee that the dominating target with >50% 176 * total count will survive in the end. Note that this scheme 177 * allows the runtime to track the min count node in an adaptive 178 * manner. It can correct previous mistakes and eventually 179 * lock on a cold target that is alread in stable state. 180 * 181 * In very rare cases, this replacement scheme may still lead 182 * to target loss. For instance, out of \c N value slots, \c N-1 183 * slots are occupied by luke warm targets during the warmup 184 * period and the remaining one slot is competed by two or more 185 * very hot targets. If those hot targets occur in an interleaved 186 * way, none of them will survive (gain enough weight to throw out 187 * other established entries) due to the ping-pong effect. 188 * To handle this situation, user can choose to increase the max 189 * number of tracked values per value site. Alternatively, a more 190 * expensive eviction mechanism can be implemented. It requires 191 * the runtime to track the total number of evictions per-site. 192 * When the total number of evictions reaches certain threshold, 193 * the runtime can wipe out more than one lowest count entries 194 * to give space for hot targets. 195 */ 196 if (MinCountVNode->Count <= CountValue) { 197 CurVNode = MinCountVNode; 198 CurVNode->Value = TargetValue; 199 CurVNode->Count = CountValue; 200 } else 201 MinCountVNode->Count -= CountValue; 202 203 return; 204 } 205 206 CurVNode = allocateOneNode(); 207 if (!CurVNode) 208 return; 209 CurVNode->Value = TargetValue; 210 CurVNode->Count += CountValue; 211 212 uint32_t Success = 0; 213 if (!ValueCounters[CounterIndex]) 214 Success = 215 COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode); 216 else if (PrevVNode && !PrevVNode->Next) 217 Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode); 218 219 if (!Success && !hasStaticCounters) { 220 free(CurVNode); 221 return; 222 } 223 } 224 225 COMPILER_RT_VISIBILITY void 226 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data, 227 uint32_t CounterIndex) { 228 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1); 229 } 230 COMPILER_RT_VISIBILITY void 231 __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data, 232 uint32_t CounterIndex, 233 uint64_t CountValue) { 234 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue); 235 } 236 237 /* 238 * The target values are partitioned into multiple regions/ranges. There is one 239 * contiguous region which is precise -- every value in the range is tracked 240 * individually. A value outside the precise region will be collapsed into one 241 * value depending on the region it falls in. 242 * 243 * There are three regions: 244 * 1. (-inf, PreciseRangeStart) and (PreciseRangeLast, LargeRangeValue) belong 245 * to one region -- all values here should be mapped to one value of 246 * "PreciseRangeLast + 1". 247 * 2. [PreciseRangeStart, PreciseRangeLast] 248 * 3. Large values: [LargeValue, +inf) maps to one value of LargeValue. 249 * 250 * The range for large values is optional. The default value of INT64_MIN 251 * indicates it is not specified. 252 */ 253 COMPILER_RT_VISIBILITY void __llvm_profile_instrument_range( 254 uint64_t TargetValue, void *Data, uint32_t CounterIndex, 255 int64_t PreciseRangeStart, int64_t PreciseRangeLast, int64_t LargeValue) { 256 257 if (LargeValue != INT64_MIN && (int64_t)TargetValue >= LargeValue) 258 TargetValue = LargeValue; 259 else if ((int64_t)TargetValue < PreciseRangeStart || 260 (int64_t)TargetValue > PreciseRangeLast) 261 TargetValue = PreciseRangeLast + 1; 262 263 __llvm_profile_instrument_target(TargetValue, Data, CounterIndex); 264 } 265 266 /* 267 * A wrapper struct that represents value profile runtime data. 268 * Like InstrProfRecord class which is used by profiling host tools, 269 * ValueProfRuntimeRecord also implements the abstract intefaces defined in 270 * ValueProfRecordClosure so that the runtime data can be serialized using 271 * shared C implementation. 272 */ 273 typedef struct ValueProfRuntimeRecord { 274 const __llvm_profile_data *Data; 275 ValueProfNode **NodesKind[IPVK_Last + 1]; 276 uint8_t **SiteCountArray; 277 } ValueProfRuntimeRecord; 278 279 /* ValueProfRecordClosure Interface implementation. */ 280 281 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) { 282 return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK]; 283 } 284 285 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) { 286 uint32_t S = 0, I; 287 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; 288 if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR) 289 return 0; 290 for (I = 0; I < Record->Data->NumValueSites[VK]; I++) 291 S += Record->SiteCountArray[VK][I]; 292 return S; 293 } 294 295 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK, 296 uint32_t S) { 297 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; 298 return Record->SiteCountArray[VK][S]; 299 } 300 301 static ValueProfRuntimeRecord RTRecord; 302 static ValueProfRecordClosure RTRecordClosure = { 303 &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */ 304 getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT, 305 INSTR_PROF_NULLPTR, /* RemapValueData */ 306 INSTR_PROF_NULLPTR, /* GetValueForSite, */ 307 INSTR_PROF_NULLPTR /* AllocValueProfData */ 308 }; 309 310 static uint32_t 311 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data, 312 uint8_t *SiteCountArray[]) { 313 unsigned I, J, S = 0, NumValueKinds = 0; 314 ValueProfNode **Nodes = (ValueProfNode **)Data->Values; 315 RTRecord.Data = Data; 316 RTRecord.SiteCountArray = SiteCountArray; 317 for (I = 0; I <= IPVK_Last; I++) { 318 uint16_t N = Data->NumValueSites[I]; 319 if (!N) 320 continue; 321 322 NumValueKinds++; 323 324 RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR; 325 for (J = 0; J < N; J++) { 326 /* Compute value count for each site. */ 327 uint32_t C = 0; 328 ValueProfNode *Site = 329 Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR; 330 while (Site) { 331 C++; 332 Site = Site->Next; 333 } 334 if (C > UCHAR_MAX) 335 C = UCHAR_MAX; 336 RTRecord.SiteCountArray[I][J] = C; 337 } 338 S += N; 339 } 340 return NumValueKinds; 341 } 342 343 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site, 344 InstrProfValueData *Dst, 345 ValueProfNode *StartNode, uint32_t N) { 346 unsigned I; 347 ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site]; 348 for (I = 0; I < N; I++) { 349 Dst[I].Value = VNode->Value; 350 Dst[I].Count = VNode->Count; 351 VNode = VNode->Next; 352 } 353 return VNode; 354 } 355 356 static uint32_t getValueProfDataSizeWrapper(void) { 357 return getValueProfDataSize(&RTRecordClosure); 358 } 359 360 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) { 361 return getNumValueDataForSiteRT(&RTRecord, VK, S); 362 } 363 364 static VPDataReaderType TheVPDataReader = { 365 initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize, 366 getFirstValueProfRecord, getNumValueDataForSiteWrapper, 367 getValueProfDataSizeWrapper, getNextNValueData}; 368 369 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader() { 370 return &TheVPDataReader; 371 } 372