1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\ 2 |* 3 |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 |* See https://llvm.org/LICENSE.txt for license information. 5 |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 |* 7 \*===----------------------------------------------------------------------===*/ 8 9 #include <assert.h> 10 #include <limits.h> 11 #include <stdio.h> 12 #include <stdlib.h> 13 #include <string.h> 14 15 #include "InstrProfiling.h" 16 #include "InstrProfilingInternal.h" 17 #include "InstrProfilingUtil.h" 18 19 #define INSTR_PROF_VALUE_PROF_DATA 20 #define INSTR_PROF_COMMON_API_IMPL 21 #define INSTR_PROF_VALUE_PROF_MEMOP_API 22 #include "profile/InstrProfData.inc" 23 24 static int hasStaticCounters = 1; 25 static int OutOfNodesWarnings = 0; 26 static int hasNonDefaultValsPerSite = 0; 27 #define INSTR_PROF_MAX_VP_WARNS 10 28 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24 29 #define INSTR_PROF_VNODE_POOL_SIZE 1024 30 31 #ifndef _MSC_VER 32 /* A shared static pool in addition to the vnodes statically 33 * allocated by the compiler. */ 34 COMPILER_RT_VISIBILITY ValueProfNode 35 lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION( 36 COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME); 37 #endif 38 39 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite = 40 INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE; 41 42 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler() { 43 const char *Str = 0; 44 Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE"); 45 if (Str && Str[0]) { 46 VPMaxNumValsPerSite = atoi(Str); 47 hasNonDefaultValsPerSite = 1; 48 } 49 if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE) 50 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 51 } 52 53 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) { 54 VPMaxNumValsPerSite = MaxVals; 55 hasNonDefaultValsPerSite = 1; 56 } 57 58 /* This method is only used in value profiler mock testing. */ 59 COMPILER_RT_VISIBILITY void 60 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data, 61 uint32_t ValueKind, uint16_t NumValueSites) { 62 *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites; 63 } 64 65 /* This method is only used in value profiler mock testing. */ 66 COMPILER_RT_VISIBILITY const __llvm_profile_data * 67 __llvm_profile_iterate_data(const __llvm_profile_data *Data) { 68 return Data + 1; 69 } 70 71 /* This method is only used in value profiler mock testing. */ 72 COMPILER_RT_VISIBILITY void * 73 __llvm_get_function_addr(const __llvm_profile_data *Data) { 74 return Data->FunctionPointer; 75 } 76 77 /* Allocate an array that holds the pointers to the linked lists of 78 * value profile counter nodes. The number of element of the array 79 * is the total number of value profile sites instrumented. Returns 80 * 0 if allocation fails. 81 */ 82 83 static int allocateValueProfileCounters(__llvm_profile_data *Data) { 84 uint64_t NumVSites = 0; 85 uint32_t VKI; 86 87 /* This function will never be called when value site array is allocated 88 statically at compile time. */ 89 hasStaticCounters = 0; 90 /* When dynamic allocation is enabled, allow tracking the max number of 91 * values allowd. */ 92 if (!hasNonDefaultValsPerSite) 93 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 94 95 for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI) 96 NumVSites += Data->NumValueSites[VKI]; 97 98 // If NumVSites = 0, calloc is allowed to return a non-null pointer. 99 assert(NumVSites > 0 && "NumVSites can't be zero"); 100 ValueProfNode **Mem = 101 (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *)); 102 if (!Mem) 103 return 0; 104 if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) { 105 free(Mem); 106 return 0; 107 } 108 return 1; 109 } 110 111 static ValueProfNode *allocateOneNode(void) { 112 ValueProfNode *Node; 113 114 if (!hasStaticCounters) 115 return (ValueProfNode *)calloc(1, sizeof(ValueProfNode)); 116 117 /* Early check to avoid value wrapping around. */ 118 if (CurrentVNode + 1 > EndVNode) { 119 if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) { 120 PROF_WARN("Unable to track new values: %s. " 121 " Consider using option -mllvm -vp-counters-per-site=<n> to " 122 "allocate more" 123 " value profile counters at compile time. \n", 124 "Running out of static counters"); 125 } 126 return 0; 127 } 128 Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1); 129 /* Due to section padding, EndVNode point to a byte which is one pass 130 * an incomplete VNode, so we need to skip the last incomplete node. */ 131 if (Node + 1 > EndVNode) 132 return 0; 133 134 return Node; 135 } 136 137 static COMPILER_RT_ALWAYS_INLINE void 138 instrumentTargetValueImpl(uint64_t TargetValue, void *Data, 139 uint32_t CounterIndex, uint64_t CountValue) { 140 __llvm_profile_data *PData = (__llvm_profile_data *)Data; 141 if (!PData) 142 return; 143 if (!CountValue) 144 return; 145 if (!PData->Values) { 146 if (!allocateValueProfileCounters(PData)) 147 return; 148 } 149 150 ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values; 151 ValueProfNode *PrevVNode = NULL; 152 ValueProfNode *MinCountVNode = NULL; 153 ValueProfNode *CurVNode = ValueCounters[CounterIndex]; 154 uint64_t MinCount = UINT64_MAX; 155 156 uint8_t VDataCount = 0; 157 while (CurVNode) { 158 if (TargetValue == CurVNode->Value) { 159 CurVNode->Count += CountValue; 160 return; 161 } 162 if (CurVNode->Count < MinCount) { 163 MinCount = CurVNode->Count; 164 MinCountVNode = CurVNode; 165 } 166 PrevVNode = CurVNode; 167 CurVNode = CurVNode->Next; 168 ++VDataCount; 169 } 170 171 if (VDataCount >= VPMaxNumValsPerSite) { 172 /* Bump down the min count node's count. If it reaches 0, 173 * evict it. This eviction/replacement policy makes hot 174 * targets more sticky while cold targets less so. In other 175 * words, it makes it less likely for the hot targets to be 176 * prematurally evicted during warmup/establishment period, 177 * when their counts are still low. In a special case when 178 * the number of values tracked is reduced to only one, this 179 * policy will guarantee that the dominating target with >50% 180 * total count will survive in the end. Note that this scheme 181 * allows the runtime to track the min count node in an adaptive 182 * manner. It can correct previous mistakes and eventually 183 * lock on a cold target that is alread in stable state. 184 * 185 * In very rare cases, this replacement scheme may still lead 186 * to target loss. For instance, out of \c N value slots, \c N-1 187 * slots are occupied by luke warm targets during the warmup 188 * period and the remaining one slot is competed by two or more 189 * very hot targets. If those hot targets occur in an interleaved 190 * way, none of them will survive (gain enough weight to throw out 191 * other established entries) due to the ping-pong effect. 192 * To handle this situation, user can choose to increase the max 193 * number of tracked values per value site. Alternatively, a more 194 * expensive eviction mechanism can be implemented. It requires 195 * the runtime to track the total number of evictions per-site. 196 * When the total number of evictions reaches certain threshold, 197 * the runtime can wipe out more than one lowest count entries 198 * to give space for hot targets. 199 */ 200 if (MinCountVNode->Count <= CountValue) { 201 CurVNode = MinCountVNode; 202 CurVNode->Value = TargetValue; 203 CurVNode->Count = CountValue; 204 } else 205 MinCountVNode->Count -= CountValue; 206 207 return; 208 } 209 210 CurVNode = allocateOneNode(); 211 if (!CurVNode) 212 return; 213 CurVNode->Value = TargetValue; 214 CurVNode->Count += CountValue; 215 216 uint32_t Success = 0; 217 if (!ValueCounters[CounterIndex]) 218 Success = 219 COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode); 220 else if (PrevVNode && !PrevVNode->Next) 221 Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode); 222 223 if (!Success && !hasStaticCounters) { 224 free(CurVNode); 225 return; 226 } 227 } 228 229 COMPILER_RT_VISIBILITY void 230 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data, 231 uint32_t CounterIndex) { 232 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1); 233 } 234 COMPILER_RT_VISIBILITY void 235 __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data, 236 uint32_t CounterIndex, 237 uint64_t CountValue) { 238 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue); 239 } 240 241 /* 242 * The target values are partitioned into multiple ranges. The range spec is 243 * defined in InstrProfData.inc. 244 */ 245 COMPILER_RT_VISIBILITY void 246 __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data, 247 uint32_t CounterIndex) { 248 // Map the target value to the representative value of its range. 249 uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue); 250 __llvm_profile_instrument_target(RepValue, Data, CounterIndex); 251 } 252 253 /* 254 * A wrapper struct that represents value profile runtime data. 255 * Like InstrProfRecord class which is used by profiling host tools, 256 * ValueProfRuntimeRecord also implements the abstract intefaces defined in 257 * ValueProfRecordClosure so that the runtime data can be serialized using 258 * shared C implementation. 259 */ 260 typedef struct ValueProfRuntimeRecord { 261 const __llvm_profile_data *Data; 262 ValueProfNode **NodesKind[IPVK_Last + 1]; 263 uint8_t **SiteCountArray; 264 } ValueProfRuntimeRecord; 265 266 /* ValueProfRecordClosure Interface implementation. */ 267 268 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) { 269 return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK]; 270 } 271 272 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) { 273 uint32_t S = 0, I; 274 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; 275 if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR) 276 return 0; 277 for (I = 0; I < Record->Data->NumValueSites[VK]; I++) 278 S += Record->SiteCountArray[VK][I]; 279 return S; 280 } 281 282 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK, 283 uint32_t S) { 284 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; 285 return Record->SiteCountArray[VK][S]; 286 } 287 288 static ValueProfRuntimeRecord RTRecord; 289 static ValueProfRecordClosure RTRecordClosure = { 290 &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */ 291 getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT, 292 INSTR_PROF_NULLPTR, /* RemapValueData */ 293 INSTR_PROF_NULLPTR, /* GetValueForSite, */ 294 INSTR_PROF_NULLPTR /* AllocValueProfData */ 295 }; 296 297 static uint32_t 298 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data, 299 uint8_t *SiteCountArray[]) { 300 unsigned I, J, S = 0, NumValueKinds = 0; 301 ValueProfNode **Nodes = (ValueProfNode **)Data->Values; 302 RTRecord.Data = Data; 303 RTRecord.SiteCountArray = SiteCountArray; 304 for (I = 0; I <= IPVK_Last; I++) { 305 uint16_t N = Data->NumValueSites[I]; 306 if (!N) 307 continue; 308 309 NumValueKinds++; 310 311 RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR; 312 for (J = 0; J < N; J++) { 313 /* Compute value count for each site. */ 314 uint32_t C = 0; 315 ValueProfNode *Site = 316 Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR; 317 while (Site) { 318 C++; 319 Site = Site->Next; 320 } 321 if (C > UCHAR_MAX) 322 C = UCHAR_MAX; 323 RTRecord.SiteCountArray[I][J] = C; 324 } 325 S += N; 326 } 327 return NumValueKinds; 328 } 329 330 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site, 331 InstrProfValueData *Dst, 332 ValueProfNode *StartNode, uint32_t N) { 333 unsigned I; 334 ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site]; 335 for (I = 0; I < N; I++) { 336 Dst[I].Value = VNode->Value; 337 Dst[I].Count = VNode->Count; 338 VNode = VNode->Next; 339 } 340 return VNode; 341 } 342 343 static uint32_t getValueProfDataSizeWrapper(void) { 344 return getValueProfDataSize(&RTRecordClosure); 345 } 346 347 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) { 348 return getNumValueDataForSiteRT(&RTRecord, VK, S); 349 } 350 351 static VPDataReaderType TheVPDataReader = { 352 initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize, 353 getFirstValueProfRecord, getNumValueDataForSiteWrapper, 354 getValueProfDataSizeWrapper, getNextNValueData}; 355 356 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader() { 357 return &TheVPDataReader; 358 } 359