1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\ 2 |* 3 |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 |* See https://llvm.org/LICENSE.txt for license information. 5 |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 |* 7 \*===----------------------------------------------------------------------===*/ 8 9 #include <assert.h> 10 #include <limits.h> 11 #include <stdio.h> 12 #include <stdlib.h> 13 #include <string.h> 14 15 #include "InstrProfiling.h" 16 #include "InstrProfilingInternal.h" 17 #include "InstrProfilingUtil.h" 18 19 #define INSTR_PROF_VALUE_PROF_DATA 20 #define INSTR_PROF_COMMON_API_IMPL 21 #define INSTR_PROF_VALUE_PROF_MEMOP_API 22 #include "profile/InstrProfData.inc" 23 24 static int hasStaticCounters = 1; 25 static int OutOfNodesWarnings = 0; 26 static int hasNonDefaultValsPerSite = 0; 27 #define INSTR_PROF_MAX_VP_WARNS 10 28 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24 29 #define INSTR_PROF_VNODE_POOL_SIZE 1024 30 31 #ifndef _MSC_VER 32 /* A shared static pool in addition to the vnodes statically 33 * allocated by the compiler. */ 34 COMPILER_RT_VISIBILITY ValueProfNode 35 lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION( 36 COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME); 37 #endif 38 39 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite = 40 INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE; 41 42 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler(void) { 43 const char *Str = 0; 44 Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE"); 45 if (Str && Str[0]) { 46 VPMaxNumValsPerSite = atoi(Str); 47 hasNonDefaultValsPerSite = 1; 48 } 49 if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE) 50 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 51 } 52 53 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) { 54 VPMaxNumValsPerSite = MaxVals; 55 hasNonDefaultValsPerSite = 1; 56 } 57 58 /* This method is only used in value profiler mock testing. */ 59 COMPILER_RT_VISIBILITY void 60 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data, 61 uint32_t ValueKind, uint16_t NumValueSites) { 62 #ifdef __GNUC__ 63 #pragma GCC diagnostic push 64 #pragma GCC diagnostic ignored "-Wcast-qual" 65 #endif 66 *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites; 67 #ifdef __GNUC__ 68 #pragma GCC diagnostic pop 69 #endif 70 } 71 72 /* This method is only used in value profiler mock testing. */ 73 COMPILER_RT_VISIBILITY const __llvm_profile_data * 74 __llvm_profile_iterate_data(const __llvm_profile_data *Data) { 75 return Data + 1; 76 } 77 78 /* This method is only used in value profiler mock testing. */ 79 COMPILER_RT_VISIBILITY void * 80 __llvm_get_function_addr(const __llvm_profile_data *Data) { 81 return Data->FunctionPointer; 82 } 83 84 /* Allocate an array that holds the pointers to the linked lists of 85 * value profile counter nodes. The number of element of the array 86 * is the total number of value profile sites instrumented. Returns 87 * 0 if allocation fails. 88 */ 89 90 static int allocateValueProfileCounters(__llvm_profile_data *Data) { 91 uint64_t NumVSites = 0; 92 uint32_t VKI; 93 94 /* This function will never be called when value site array is allocated 95 statically at compile time. */ 96 hasStaticCounters = 0; 97 /* When dynamic allocation is enabled, allow tracking the max number of 98 * values allowd. */ 99 if (!hasNonDefaultValsPerSite) 100 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 101 102 for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI) 103 NumVSites += Data->NumValueSites[VKI]; 104 105 // If NumVSites = 0, calloc is allowed to return a non-null pointer. 106 assert(NumVSites > 0 && "NumVSites can't be zero"); 107 ValueProfNode **Mem = 108 (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *)); 109 if (!Mem) 110 return 0; 111 if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) { 112 free(Mem); 113 return 0; 114 } 115 return 1; 116 } 117 118 static ValueProfNode *allocateOneNode(void) { 119 ValueProfNode *Node; 120 121 if (!hasStaticCounters) 122 return (ValueProfNode *)calloc(1, sizeof(ValueProfNode)); 123 124 /* Early check to avoid value wrapping around. */ 125 if (CurrentVNode + 1 > EndVNode) { 126 if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) { 127 PROF_WARN("Unable to track new values: %s. " 128 " Consider using option -mllvm -vp-counters-per-site=<n> to " 129 "allocate more" 130 " value profile counters at compile time. \n", 131 "Running out of static counters"); 132 } 133 return 0; 134 } 135 Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1); 136 /* Due to section padding, EndVNode point to a byte which is one pass 137 * an incomplete VNode, so we need to skip the last incomplete node. */ 138 if (Node + 1 > EndVNode) 139 return 0; 140 141 return Node; 142 } 143 144 static COMPILER_RT_ALWAYS_INLINE void 145 instrumentTargetValueImpl(uint64_t TargetValue, void *Data, 146 uint32_t CounterIndex, uint64_t CountValue) { 147 __llvm_profile_data *PData = (__llvm_profile_data *)Data; 148 if (!PData) 149 return; 150 if (!CountValue) 151 return; 152 if (!PData->Values) { 153 if (!allocateValueProfileCounters(PData)) 154 return; 155 } 156 157 ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values; 158 ValueProfNode *PrevVNode = NULL; 159 ValueProfNode *MinCountVNode = NULL; 160 ValueProfNode *CurVNode = ValueCounters[CounterIndex]; 161 uint64_t MinCount = UINT64_MAX; 162 163 uint8_t VDataCount = 0; 164 while (CurVNode) { 165 if (TargetValue == CurVNode->Value) { 166 CurVNode->Count += CountValue; 167 return; 168 } 169 if (CurVNode->Count < MinCount) { 170 MinCount = CurVNode->Count; 171 MinCountVNode = CurVNode; 172 } 173 PrevVNode = CurVNode; 174 CurVNode = CurVNode->Next; 175 ++VDataCount; 176 } 177 178 if (VDataCount >= VPMaxNumValsPerSite) { 179 /* Bump down the min count node's count. If it reaches 0, 180 * evict it. This eviction/replacement policy makes hot 181 * targets more sticky while cold targets less so. In other 182 * words, it makes it less likely for the hot targets to be 183 * prematurally evicted during warmup/establishment period, 184 * when their counts are still low. In a special case when 185 * the number of values tracked is reduced to only one, this 186 * policy will guarantee that the dominating target with >50% 187 * total count will survive in the end. Note that this scheme 188 * allows the runtime to track the min count node in an adaptive 189 * manner. It can correct previous mistakes and eventually 190 * lock on a cold target that is alread in stable state. 191 * 192 * In very rare cases, this replacement scheme may still lead 193 * to target loss. For instance, out of \c N value slots, \c N-1 194 * slots are occupied by luke warm targets during the warmup 195 * period and the remaining one slot is competed by two or more 196 * very hot targets. If those hot targets occur in an interleaved 197 * way, none of them will survive (gain enough weight to throw out 198 * other established entries) due to the ping-pong effect. 199 * To handle this situation, user can choose to increase the max 200 * number of tracked values per value site. Alternatively, a more 201 * expensive eviction mechanism can be implemented. It requires 202 * the runtime to track the total number of evictions per-site. 203 * When the total number of evictions reaches certain threshold, 204 * the runtime can wipe out more than one lowest count entries 205 * to give space for hot targets. 206 */ 207 if (MinCountVNode->Count <= CountValue) { 208 CurVNode = MinCountVNode; 209 CurVNode->Value = TargetValue; 210 CurVNode->Count = CountValue; 211 } else 212 MinCountVNode->Count -= CountValue; 213 214 return; 215 } 216 217 CurVNode = allocateOneNode(); 218 if (!CurVNode) 219 return; 220 CurVNode->Value = TargetValue; 221 CurVNode->Count += CountValue; 222 223 uint32_t Success = 0; 224 if (!ValueCounters[CounterIndex]) 225 Success = 226 COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode); 227 else if (PrevVNode && !PrevVNode->Next) 228 Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode); 229 230 if (!Success && !hasStaticCounters) { 231 free(CurVNode); 232 return; 233 } 234 } 235 236 COMPILER_RT_VISIBILITY void 237 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data, 238 uint32_t CounterIndex) { 239 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1); 240 } 241 COMPILER_RT_VISIBILITY void 242 __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data, 243 uint32_t CounterIndex, 244 uint64_t CountValue) { 245 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue); 246 } 247 248 /* 249 * The target values are partitioned into multiple ranges. The range spec is 250 * defined in InstrProfData.inc. 251 */ 252 COMPILER_RT_VISIBILITY void 253 __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data, 254 uint32_t CounterIndex) { 255 // Map the target value to the representative value of its range. 256 uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue); 257 __llvm_profile_instrument_target(RepValue, Data, CounterIndex); 258 } 259 260 /* 261 * A wrapper struct that represents value profile runtime data. 262 * Like InstrProfRecord class which is used by profiling host tools, 263 * ValueProfRuntimeRecord also implements the abstract interfaces defined in 264 * ValueProfRecordClosure so that the runtime data can be serialized using 265 * shared C implementation. 266 */ 267 typedef struct ValueProfRuntimeRecord { 268 const __llvm_profile_data *Data; 269 ValueProfNode **NodesKind[IPVK_Last + 1]; 270 uint8_t **SiteCountArray; 271 } ValueProfRuntimeRecord; 272 273 /* ValueProfRecordClosure Interface implementation. */ 274 275 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) { 276 return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK]; 277 } 278 279 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) { 280 uint32_t S = 0, I; 281 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; 282 if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR) 283 return 0; 284 for (I = 0; I < Record->Data->NumValueSites[VK]; I++) 285 S += Record->SiteCountArray[VK][I]; 286 return S; 287 } 288 289 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK, 290 uint32_t S) { 291 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; 292 return Record->SiteCountArray[VK][S]; 293 } 294 295 static ValueProfRuntimeRecord RTRecord; 296 static ValueProfRecordClosure RTRecordClosure = { 297 &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */ 298 getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT, 299 INSTR_PROF_NULLPTR, /* RemapValueData */ 300 INSTR_PROF_NULLPTR, /* GetValueForSite, */ 301 INSTR_PROF_NULLPTR /* AllocValueProfData */ 302 }; 303 304 static uint32_t 305 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data, 306 uint8_t *SiteCountArray[]) { 307 unsigned I, J, S = 0, NumValueKinds = 0; 308 ValueProfNode **Nodes = (ValueProfNode **)Data->Values; 309 RTRecord.Data = Data; 310 RTRecord.SiteCountArray = SiteCountArray; 311 for (I = 0; I <= IPVK_Last; I++) { 312 uint16_t N = Data->NumValueSites[I]; 313 if (!N) 314 continue; 315 316 NumValueKinds++; 317 318 RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR; 319 for (J = 0; J < N; J++) { 320 /* Compute value count for each site. */ 321 uint32_t C = 0; 322 ValueProfNode *Site = 323 Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR; 324 while (Site) { 325 C++; 326 Site = Site->Next; 327 } 328 if (C > UCHAR_MAX) 329 C = UCHAR_MAX; 330 RTRecord.SiteCountArray[I][J] = C; 331 } 332 S += N; 333 } 334 return NumValueKinds; 335 } 336 337 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site, 338 InstrProfValueData *Dst, 339 ValueProfNode *StartNode, uint32_t N) { 340 unsigned I; 341 ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site]; 342 for (I = 0; I < N; I++) { 343 Dst[I].Value = VNode->Value; 344 Dst[I].Count = VNode->Count; 345 VNode = VNode->Next; 346 } 347 return VNode; 348 } 349 350 static uint32_t getValueProfDataSizeWrapper(void) { 351 return getValueProfDataSize(&RTRecordClosure); 352 } 353 354 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) { 355 return getNumValueDataForSiteRT(&RTRecord, VK, S); 356 } 357 358 static VPDataReaderType TheVPDataReader = { 359 initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize, 360 getFirstValueProfRecord, getNumValueDataForSiteWrapper, 361 getValueProfDataSizeWrapper, getNextNValueData}; 362 363 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader(void) { 364 return &TheVPDataReader; 365 } 366