1 /*===- InstrProfilingValue.c - Support library for PGO instrumentation ----===*\ 2 |* 3 |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 |* See https://llvm.org/LICENSE.txt for license information. 5 |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 |* 7 \*===----------------------------------------------------------------------===*/ 8 9 #include <assert.h> 10 #include <limits.h> 11 #include <stdio.h> 12 #include <stdlib.h> 13 #include <string.h> 14 15 #include "InstrProfiling.h" 16 #include "InstrProfilingInternal.h" 17 #include "InstrProfilingUtil.h" 18 19 #define INSTR_PROF_VALUE_PROF_DATA 20 #define INSTR_PROF_COMMON_API_IMPL 21 #define INSTR_PROF_VALUE_PROF_MEMOP_API 22 #include "profile/InstrProfData.inc" 23 24 static int hasStaticCounters = 1; 25 static int OutOfNodesWarnings = 0; 26 static int hasNonDefaultValsPerSite = 0; 27 #define INSTR_PROF_MAX_VP_WARNS 10 28 #define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24 29 #define INSTR_PROF_VNODE_POOL_SIZE 1024 30 31 #ifndef _MSC_VER 32 /* A shared static pool in addition to the vnodes statically 33 * allocated by the compiler. */ 34 COMPILER_RT_VISIBILITY ValueProfNode 35 lprofValueProfNodes[INSTR_PROF_VNODE_POOL_SIZE] COMPILER_RT_SECTION( 36 COMPILER_RT_SEG INSTR_PROF_VNODES_SECT_NAME); 37 #endif 38 39 COMPILER_RT_VISIBILITY uint32_t VPMaxNumValsPerSite = 40 INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE; 41 42 COMPILER_RT_VISIBILITY void lprofSetupValueProfiler(void) { 43 const char *Str = 0; 44 Str = getenv("LLVM_VP_MAX_NUM_VALS_PER_SITE"); 45 if (Str && Str[0]) { 46 VPMaxNumValsPerSite = atoi(Str); 47 hasNonDefaultValsPerSite = 1; 48 } 49 if (VPMaxNumValsPerSite > INSTR_PROF_MAX_NUM_VAL_PER_SITE) 50 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 51 } 52 53 COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) { 54 VPMaxNumValsPerSite = MaxVals; 55 hasNonDefaultValsPerSite = 1; 56 } 57 58 /* This method is only used in value profiler mock testing. */ 59 COMPILER_RT_VISIBILITY void 60 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data, 61 uint32_t ValueKind, uint16_t NumValueSites) { 62 #ifdef __GNUC__ 63 #pragma GCC diagnostic push 64 #pragma GCC diagnostic ignored "-Wcast-qual" 65 #elif defined(__clang__) 66 #pragma clang diagnostic push 67 #pragma clang diagnostic ignored "-Wcast-qual" 68 #endif 69 *((uint16_t *)&Data->NumValueSites[ValueKind]) = NumValueSites; 70 #ifdef __GNUC__ 71 #pragma GCC diagnostic pop 72 #elif defined(__clang__) 73 #pragma clang diagnostic pop 74 #endif 75 } 76 77 /* This method is only used in value profiler mock testing. */ 78 COMPILER_RT_VISIBILITY const __llvm_profile_data * 79 __llvm_profile_iterate_data(const __llvm_profile_data *Data) { 80 return Data + 1; 81 } 82 83 /* This method is only used in value profiler mock testing. */ 84 COMPILER_RT_VISIBILITY void * 85 __llvm_get_function_addr(const __llvm_profile_data *Data) { 86 return Data->FunctionPointer; 87 } 88 89 /* Allocate an array that holds the pointers to the linked lists of 90 * value profile counter nodes. The number of element of the array 91 * is the total number of value profile sites instrumented. Returns 92 * 0 if allocation fails. 93 */ 94 95 static int allocateValueProfileCounters(__llvm_profile_data *Data) { 96 uint64_t NumVSites = 0; 97 uint32_t VKI; 98 99 /* This function will never be called when value site array is allocated 100 statically at compile time. */ 101 hasStaticCounters = 0; 102 /* When dynamic allocation is enabled, allow tracking the max number of 103 * values allowd. */ 104 if (!hasNonDefaultValsPerSite) 105 VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE; 106 107 for (VKI = IPVK_First; VKI <= IPVK_Last; ++VKI) 108 NumVSites += Data->NumValueSites[VKI]; 109 110 // If NumVSites = 0, calloc is allowed to return a non-null pointer. 111 assert(NumVSites > 0 && "NumVSites can't be zero"); 112 ValueProfNode **Mem = 113 (ValueProfNode **)calloc(NumVSites, sizeof(ValueProfNode *)); 114 if (!Mem) 115 return 0; 116 if (!COMPILER_RT_BOOL_CMPXCHG(&Data->Values, 0, Mem)) { 117 free(Mem); 118 return 0; 119 } 120 return 1; 121 } 122 123 static ValueProfNode *allocateOneNode(void) { 124 ValueProfNode *Node; 125 126 if (!hasStaticCounters) 127 return (ValueProfNode *)calloc(1, sizeof(ValueProfNode)); 128 129 /* Early check to avoid value wrapping around. */ 130 if (CurrentVNode + 1 > EndVNode) { 131 if (OutOfNodesWarnings++ < INSTR_PROF_MAX_VP_WARNS) { 132 PROF_WARN("Unable to track new values: %s. " 133 " Consider using option -mllvm -vp-counters-per-site=<n> to " 134 "allocate more" 135 " value profile counters at compile time. \n", 136 "Running out of static counters"); 137 } 138 return 0; 139 } 140 Node = COMPILER_RT_PTR_FETCH_ADD(ValueProfNode, CurrentVNode, 1); 141 /* Due to section padding, EndVNode point to a byte which is one pass 142 * an incomplete VNode, so we need to skip the last incomplete node. */ 143 if (Node + 1 > EndVNode) 144 return 0; 145 146 return Node; 147 } 148 149 static COMPILER_RT_ALWAYS_INLINE void 150 instrumentTargetValueImpl(uint64_t TargetValue, void *Data, 151 uint32_t CounterIndex, uint64_t CountValue) { 152 __llvm_profile_data *PData = (__llvm_profile_data *)Data; 153 if (!PData) 154 return; 155 if (!CountValue) 156 return; 157 if (!PData->Values) { 158 if (!allocateValueProfileCounters(PData)) 159 return; 160 } 161 162 ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values; 163 ValueProfNode *PrevVNode = NULL; 164 ValueProfNode *MinCountVNode = NULL; 165 ValueProfNode *CurVNode = ValueCounters[CounterIndex]; 166 uint64_t MinCount = UINT64_MAX; 167 168 uint8_t VDataCount = 0; 169 while (CurVNode) { 170 if (TargetValue == CurVNode->Value) { 171 CurVNode->Count += CountValue; 172 return; 173 } 174 if (CurVNode->Count < MinCount) { 175 MinCount = CurVNode->Count; 176 MinCountVNode = CurVNode; 177 } 178 PrevVNode = CurVNode; 179 CurVNode = CurVNode->Next; 180 ++VDataCount; 181 } 182 183 if (VDataCount >= VPMaxNumValsPerSite) { 184 /* Bump down the min count node's count. If it reaches 0, 185 * evict it. This eviction/replacement policy makes hot 186 * targets more sticky while cold targets less so. In other 187 * words, it makes it less likely for the hot targets to be 188 * prematurally evicted during warmup/establishment period, 189 * when their counts are still low. In a special case when 190 * the number of values tracked is reduced to only one, this 191 * policy will guarantee that the dominating target with >50% 192 * total count will survive in the end. Note that this scheme 193 * allows the runtime to track the min count node in an adaptive 194 * manner. It can correct previous mistakes and eventually 195 * lock on a cold target that is alread in stable state. 196 * 197 * In very rare cases, this replacement scheme may still lead 198 * to target loss. For instance, out of \c N value slots, \c N-1 199 * slots are occupied by luke warm targets during the warmup 200 * period and the remaining one slot is competed by two or more 201 * very hot targets. If those hot targets occur in an interleaved 202 * way, none of them will survive (gain enough weight to throw out 203 * other established entries) due to the ping-pong effect. 204 * To handle this situation, user can choose to increase the max 205 * number of tracked values per value site. Alternatively, a more 206 * expensive eviction mechanism can be implemented. It requires 207 * the runtime to track the total number of evictions per-site. 208 * When the total number of evictions reaches certain threshold, 209 * the runtime can wipe out more than one lowest count entries 210 * to give space for hot targets. 211 */ 212 if (MinCountVNode->Count <= CountValue) { 213 CurVNode = MinCountVNode; 214 CurVNode->Value = TargetValue; 215 CurVNode->Count = CountValue; 216 } else 217 MinCountVNode->Count -= CountValue; 218 219 return; 220 } 221 222 CurVNode = allocateOneNode(); 223 if (!CurVNode) 224 return; 225 CurVNode->Value = TargetValue; 226 CurVNode->Count += CountValue; 227 228 uint32_t Success = 0; 229 if (!ValueCounters[CounterIndex]) 230 Success = 231 COMPILER_RT_BOOL_CMPXCHG(&ValueCounters[CounterIndex], 0, CurVNode); 232 else if (PrevVNode && !PrevVNode->Next) 233 Success = COMPILER_RT_BOOL_CMPXCHG(&(PrevVNode->Next), 0, CurVNode); 234 235 if (!Success && !hasStaticCounters) { 236 free(CurVNode); 237 return; 238 } 239 } 240 241 COMPILER_RT_VISIBILITY void 242 __llvm_profile_instrument_target(uint64_t TargetValue, void *Data, 243 uint32_t CounterIndex) { 244 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1); 245 } 246 COMPILER_RT_VISIBILITY void 247 __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data, 248 uint32_t CounterIndex, 249 uint64_t CountValue) { 250 instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue); 251 } 252 253 /* 254 * The target values are partitioned into multiple ranges. The range spec is 255 * defined in InstrProfData.inc. 256 */ 257 COMPILER_RT_VISIBILITY void 258 __llvm_profile_instrument_memop(uint64_t TargetValue, void *Data, 259 uint32_t CounterIndex) { 260 // Map the target value to the representative value of its range. 261 uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue); 262 __llvm_profile_instrument_target(RepValue, Data, CounterIndex); 263 } 264 265 /* 266 * A wrapper struct that represents value profile runtime data. 267 * Like InstrProfRecord class which is used by profiling host tools, 268 * ValueProfRuntimeRecord also implements the abstract interfaces defined in 269 * ValueProfRecordClosure so that the runtime data can be serialized using 270 * shared C implementation. 271 */ 272 typedef struct ValueProfRuntimeRecord { 273 const __llvm_profile_data *Data; 274 ValueProfNode **NodesKind[IPVK_Last + 1]; 275 uint8_t **SiteCountArray; 276 } ValueProfRuntimeRecord; 277 278 /* ValueProfRecordClosure Interface implementation. */ 279 280 static uint32_t getNumValueSitesRT(const void *R, uint32_t VK) { 281 return ((const ValueProfRuntimeRecord *)R)->Data->NumValueSites[VK]; 282 } 283 284 static uint32_t getNumValueDataRT(const void *R, uint32_t VK) { 285 uint32_t S = 0, I; 286 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; 287 if (Record->SiteCountArray[VK] == INSTR_PROF_NULLPTR) 288 return 0; 289 for (I = 0; I < Record->Data->NumValueSites[VK]; I++) 290 S += Record->SiteCountArray[VK][I]; 291 return S; 292 } 293 294 static uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK, 295 uint32_t S) { 296 const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R; 297 return Record->SiteCountArray[VK][S]; 298 } 299 300 static ValueProfRuntimeRecord RTRecord; 301 static ValueProfRecordClosure RTRecordClosure = { 302 &RTRecord, INSTR_PROF_NULLPTR, /* GetNumValueKinds */ 303 getNumValueSitesRT, getNumValueDataRT, getNumValueDataForSiteRT, 304 INSTR_PROF_NULLPTR, /* RemapValueData */ 305 INSTR_PROF_NULLPTR, /* GetValueForSite, */ 306 INSTR_PROF_NULLPTR /* AllocValueProfData */ 307 }; 308 309 static uint32_t 310 initializeValueProfRuntimeRecord(const __llvm_profile_data *Data, 311 uint8_t *SiteCountArray[]) { 312 unsigned I, J, S = 0, NumValueKinds = 0; 313 ValueProfNode **Nodes = (ValueProfNode **)Data->Values; 314 RTRecord.Data = Data; 315 RTRecord.SiteCountArray = SiteCountArray; 316 for (I = 0; I <= IPVK_Last; I++) { 317 uint16_t N = Data->NumValueSites[I]; 318 if (!N) 319 continue; 320 321 NumValueKinds++; 322 323 RTRecord.NodesKind[I] = Nodes ? &Nodes[S] : INSTR_PROF_NULLPTR; 324 for (J = 0; J < N; J++) { 325 /* Compute value count for each site. */ 326 uint32_t C = 0; 327 ValueProfNode *Site = 328 Nodes ? RTRecord.NodesKind[I][J] : INSTR_PROF_NULLPTR; 329 while (Site) { 330 C++; 331 Site = Site->Next; 332 } 333 if (C > UCHAR_MAX) 334 C = UCHAR_MAX; 335 RTRecord.SiteCountArray[I][J] = C; 336 } 337 S += N; 338 } 339 return NumValueKinds; 340 } 341 342 static ValueProfNode *getNextNValueData(uint32_t VK, uint32_t Site, 343 InstrProfValueData *Dst, 344 ValueProfNode *StartNode, uint32_t N) { 345 unsigned I; 346 ValueProfNode *VNode = StartNode ? StartNode : RTRecord.NodesKind[VK][Site]; 347 for (I = 0; I < N; I++) { 348 Dst[I].Value = VNode->Value; 349 Dst[I].Count = VNode->Count; 350 VNode = VNode->Next; 351 } 352 return VNode; 353 } 354 355 static uint32_t getValueProfDataSizeWrapper(void) { 356 return getValueProfDataSize(&RTRecordClosure); 357 } 358 359 static uint32_t getNumValueDataForSiteWrapper(uint32_t VK, uint32_t S) { 360 return getNumValueDataForSiteRT(&RTRecord, VK, S); 361 } 362 363 static VPDataReaderType TheVPDataReader = { 364 initializeValueProfRuntimeRecord, getValueProfRecordHeaderSize, 365 getFirstValueProfRecord, getNumValueDataForSiteWrapper, 366 getValueProfDataSizeWrapper, getNextNValueData}; 367 368 COMPILER_RT_VISIBILITY VPDataReaderType *lprofGetVPDataReader(void) { 369 return &TheVPDataReader; 370 } 371