1 /* 2 * Copyright (c) Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 12 13 /* ************************************* 14 * Includes 15 ***************************************/ 16 #include <stdlib.h> /* malloc, free */ 17 #include <string.h> /* memset */ 18 #include <assert.h> /* assert */ 19 20 #include "timefn.h" /* UTIL_time_t, UTIL_getTime */ 21 #include "benchfn.h" 22 23 24 /* ************************************* 25 * Constants 26 ***************************************/ 27 #define TIMELOOP_MICROSEC SEC_TO_MICRO /* 1 second */ 28 #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ 29 30 #define KB *(1 <<10) 31 #define MB *(1 <<20) 32 #define GB *(1U<<30) 33 34 35 /* ************************************* 36 * Debug errors 37 ***************************************/ 38 #if defined(DEBUG) && (DEBUG >= 1) 39 # include <stdio.h> /* fprintf */ 40 # define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 41 # define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } 42 #else 43 # define DEBUGOUTPUT(...) 44 #endif 45 46 47 /* error without displaying */ 48 #define RETURN_QUIET_ERROR(retValue, ...) { \ 49 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ 50 DEBUGOUTPUT("Error : "); \ 51 DEBUGOUTPUT(__VA_ARGS__); \ 52 DEBUGOUTPUT(" \n"); \ 53 return retValue; \ 54 } 55 56 /* Abort execution if a condition is not met */ 57 #define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } } 58 59 60 /* ************************************* 61 * Benchmarking an arbitrary function 62 ***************************************/ 63 64 int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) 65 { 66 return outcome.error_tag_never_ever_use_directly == 0; 67 } 68 69 /* warning : this function will stop program execution if outcome is invalid ! 70 * check outcome validity first, using BMK_isValid_runResult() */ 71 BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) 72 { 73 CONTROL(outcome.error_tag_never_ever_use_directly == 0); 74 return outcome.internal_never_ever_use_directly; 75 } 76 77 size_t BMK_extract_errorResult(BMK_runOutcome_t outcome) 78 { 79 CONTROL(outcome.error_tag_never_ever_use_directly != 0); 80 return outcome.error_result_never_ever_use_directly; 81 } 82 83 static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult) 84 { 85 BMK_runOutcome_t b; 86 memset(&b, 0, sizeof(b)); 87 b.error_tag_never_ever_use_directly = 1; 88 b.error_result_never_ever_use_directly = errorResult; 89 return b; 90 } 91 92 static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) 93 { 94 BMK_runOutcome_t outcome; 95 outcome.error_tag_never_ever_use_directly = 0; 96 outcome.internal_never_ever_use_directly = runTime; 97 return outcome; 98 } 99 100 101 /* initFn will be measured once, benchFn will be measured `nbLoops` times */ 102 /* initFn is optional, provide NULL if none */ 103 /* benchFn must return a size_t value that errorFn can interpret */ 104 /* takes # of blocks and list of size & stuff for each. */ 105 /* can report result of benchFn for each block into blockResult. */ 106 /* blockResult is optional, provide NULL if this information is not required */ 107 /* note : time per loop can be reported as zero if run time < timer resolution */ 108 BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p, 109 unsigned nbLoops) 110 { 111 size_t dstSize = 0; 112 nbLoops += !nbLoops; /* minimum nbLoops is 1 */ 113 114 /* init */ 115 { size_t i; 116 for(i = 0; i < p.blockCount; i++) { 117 memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */ 118 } } 119 120 /* benchmark */ 121 { UTIL_time_t const clockStart = UTIL_getTime(); 122 unsigned loopNb, blockNb; 123 if (p.initFn != NULL) p.initFn(p.initPayload); 124 for (loopNb = 0; loopNb < nbLoops; loopNb++) { 125 for (blockNb = 0; blockNb < p.blockCount; blockNb++) { 126 size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb], 127 p.dstBuffers[blockNb], p.dstCapacities[blockNb], 128 p.benchPayload); 129 if (loopNb == 0) { 130 if (p.blockResults != NULL) p.blockResults[blockNb] = res; 131 if ((p.errorFn != NULL) && (p.errorFn(res))) { 132 RETURN_QUIET_ERROR(BMK_runOutcome_error(res), 133 "Function benchmark failed on block %u (of size %u) with error %i", 134 blockNb, (unsigned)p.srcSizes[blockNb], (int)res); 135 } 136 dstSize += res; 137 } } 138 } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ 139 140 { PTime const totalTime = UTIL_clockSpanNano(clockStart); 141 BMK_runTime_t rt; 142 rt.nanoSecPerRun = (double)totalTime / nbLoops; 143 rt.sumOfReturn = dstSize; 144 return BMK_setValid_runTime(rt); 145 } } 146 } 147 148 149 /* ==== Benchmarking any function, providing intermediate results ==== */ 150 151 struct BMK_timedFnState_s { 152 PTime timeSpent_ns; 153 PTime timeBudget_ns; 154 PTime runBudget_ns; 155 BMK_runTime_t fastestRun; 156 unsigned nbLoops; 157 UTIL_time_t coolTime; 158 }; /* typedef'd to BMK_timedFnState_t within bench.h */ 159 160 BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) 161 { 162 BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); 163 if (r == NULL) return NULL; /* malloc() error */ 164 BMK_resetTimedFnState(r, total_ms, run_ms); 165 return r; 166 } 167 168 void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); } 169 170 BMK_timedFnState_t* 171 BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms) 172 { 173 typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1]; /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */ 174 typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align; /* force tfs to be aligned at its next best position */ 175 size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */ 176 BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer; 177 if (buffer == NULL) return NULL; 178 if (size < sizeof(struct BMK_timedFnState_s)) return NULL; 179 if ((size_t)buffer % tfs_alignment) return NULL; /* buffer must be properly aligned */ 180 BMK_resetTimedFnState(r, total_ms, run_ms); 181 return r; 182 } 183 184 void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) 185 { 186 if (!total_ms) total_ms = 1 ; 187 if (!run_ms) run_ms = 1; 188 if (run_ms > total_ms) run_ms = total_ms; 189 timedFnState->timeSpent_ns = 0; 190 timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000; 191 timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000; 192 timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000; /* hopefully large enough : must be larger than any potential measurement */ 193 timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); 194 timedFnState->nbLoops = 1; 195 timedFnState->coolTime = UTIL_getTime(); 196 } 197 198 /* Tells if nb of seconds set in timedFnState for all runs is spent. 199 * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ 200 int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState) 201 { 202 return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns); 203 } 204 205 206 #undef MIN 207 #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) 208 209 #define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ 210 211 BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont, 212 BMK_benchParams_t p) 213 { 214 PTime const runBudget_ns = cont->runBudget_ns; 215 PTime const runTimeMin_ns = runBudget_ns / 2; 216 int completed = 0; 217 BMK_runTime_t bestRunTime = cont->fastestRun; 218 219 while (!completed) { 220 BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops); 221 222 if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ 223 return runResult; 224 } 225 226 { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); 227 double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; 228 229 cont->timeSpent_ns += (unsigned long long)loopDuration_ns; 230 231 /* estimate nbLoops for next run to last approximately 1 second */ 232 if (loopDuration_ns > (runBudget_ns / 50)) { 233 double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); 234 cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1; 235 } else { 236 /* previous run was too short : blindly increase workload by x multiplier */ 237 const unsigned multiplier = 10; 238 assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ 239 cont->nbLoops *= multiplier; 240 } 241 242 if(loopDuration_ns < runTimeMin_ns) { 243 /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ 244 assert(completed == 0); 245 continue; 246 } else { 247 if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { 248 bestRunTime = newRunTime; 249 } 250 completed = 1; 251 } 252 } 253 } /* while (!completed) */ 254 255 return BMK_setValid_runTime(bestRunTime); 256 } 257