1 /* 2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. 3 * All rights reserved. 4 * 5 * This source code is licensed under both the BSD-style license (found in the 6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7 * in the COPYING file in the root directory of this source tree). 8 * You may select, at your option, one of the above-listed licenses. 9 */ 10 11 12 13 /* ************************************* 14 * Includes 15 ***************************************/ 16 #include "platform.h" /* Large Files support */ 17 #include "util.h" /* UTIL_getFileSize, UTIL_sleep */ 18 #include <stdlib.h> /* malloc, free */ 19 #include <string.h> /* memset */ 20 #include <stdio.h> /* fprintf, fopen */ 21 #undef NDEBUG /* assert must not be disabled */ 22 #include <assert.h> /* assert */ 23 24 #include "mem.h" 25 #include "benchfn.h" 26 27 28 /* ************************************* 29 * Constants 30 ***************************************/ 31 #define TIMELOOP_MICROSEC (1*1000000ULL) /* 1 second */ 32 #define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ 33 #define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */ 34 #define COOLPERIOD_SEC 10 35 36 #define KB *(1 <<10) 37 #define MB *(1 <<20) 38 #define GB *(1U<<30) 39 40 41 /* ************************************* 42 * Errors 43 ***************************************/ 44 #ifndef DEBUG 45 # define DEBUG 0 46 #endif 47 48 #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) 49 #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } 50 51 /* error without displaying */ 52 #define RETURN_QUIET_ERROR(retValue, ...) { \ 53 DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ 54 DEBUGOUTPUT("Error : "); \ 55 DEBUGOUTPUT(__VA_ARGS__); \ 56 DEBUGOUTPUT(" \n"); \ 57 return retValue; \ 58 } 59 60 61 /* ************************************* 62 * Benchmarking an arbitrary function 63 ***************************************/ 64 65 int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) 66 { 67 return outcome.error_tag_never_ever_use_directly == 0; 68 } 69 70 /* warning : this function will stop program execution if outcome is invalid ! 71 * check outcome validity first, using BMK_isValid_runResult() */ 72 BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) 73 { 74 assert(outcome.error_tag_never_ever_use_directly == 0); 75 return outcome.internal_never_ever_use_directly; 76 } 77 78 size_t BMK_extract_errorResult(BMK_runOutcome_t outcome) 79 { 80 assert(outcome.error_tag_never_ever_use_directly != 0); 81 return outcome.error_result_never_ever_use_directly; 82 } 83 84 static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult) 85 { 86 BMK_runOutcome_t b; 87 memset(&b, 0, sizeof(b)); 88 b.error_tag_never_ever_use_directly = 1; 89 b.error_result_never_ever_use_directly = errorResult; 90 return b; 91 } 92 93 static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) 94 { 95 BMK_runOutcome_t outcome; 96 outcome.error_tag_never_ever_use_directly = 0; 97 outcome.internal_never_ever_use_directly = runTime; 98 return outcome; 99 } 100 101 102 /* initFn will be measured once, benchFn will be measured `nbLoops` times */ 103 /* initFn is optional, provide NULL if none */ 104 /* benchFn must return a size_t value that errorFn can interpret */ 105 /* takes # of blocks and list of size & stuff for each. */ 106 /* can report result of benchFn for each block into blockResult. */ 107 /* blockResult is optional, provide NULL if this information is not required */ 108 /* note : time per loop can be reported as zero if run time < timer resolution */ 109 BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p, 110 unsigned nbLoops) 111 { 112 size_t dstSize = 0; 113 nbLoops += !nbLoops; /* minimum nbLoops is 1 */ 114 115 /* init */ 116 { size_t i; 117 for(i = 0; i < p.blockCount; i++) { 118 memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]); /* warm up and erase result buffer */ 119 } 120 #if 0 121 /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops 122 * (Makes former slower) 123 */ 124 UTIL_sleepMilli(5); /* give processor time to other processes */ 125 UTIL_waitForNextTick(); 126 #endif 127 } 128 129 /* benchmark */ 130 { UTIL_time_t const clockStart = UTIL_getTime(); 131 unsigned loopNb, blockNb; 132 if (p.initFn != NULL) p.initFn(p.initPayload); 133 for (loopNb = 0; loopNb < nbLoops; loopNb++) { 134 for (blockNb = 0; blockNb < p.blockCount; blockNb++) { 135 size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb], 136 p.dstBuffers[blockNb], p.dstCapacities[blockNb], 137 p.benchPayload); 138 if (loopNb == 0) { 139 if (p.blockResults != NULL) p.blockResults[blockNb] = res; 140 if ((p.errorFn != NULL) && (p.errorFn(res))) { 141 RETURN_QUIET_ERROR(BMK_runOutcome_error(res), 142 "Function benchmark failed on block %u (of size %u) with error %i", 143 blockNb, (unsigned)p.srcSizes[blockNb], (int)res); 144 } 145 dstSize += res; 146 } } 147 } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ 148 149 { U64 const totalTime = UTIL_clockSpanNano(clockStart); 150 BMK_runTime_t rt; 151 rt.nanoSecPerRun = totalTime / nbLoops; 152 rt.sumOfReturn = dstSize; 153 return BMK_setValid_runTime(rt); 154 } } 155 } 156 157 158 /* ==== Benchmarking any function, providing intermediate results ==== */ 159 160 struct BMK_timedFnState_s { 161 U64 timeSpent_ns; 162 U64 timeBudget_ns; 163 U64 runBudget_ns; 164 BMK_runTime_t fastestRun; 165 unsigned nbLoops; 166 UTIL_time_t coolTime; 167 }; /* typedef'd to BMK_timedFnState_t within bench.h */ 168 169 BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) 170 { 171 BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); 172 if (r == NULL) return NULL; /* malloc() error */ 173 BMK_resetTimedFnState(r, total_ms, run_ms); 174 return r; 175 } 176 177 void BMK_freeTimedFnState(BMK_timedFnState_t* state) { 178 free(state); 179 } 180 181 void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) 182 { 183 if (!total_ms) total_ms = 1 ; 184 if (!run_ms) run_ms = 1; 185 if (run_ms > total_ms) run_ms = total_ms; 186 timedFnState->timeSpent_ns = 0; 187 timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000; 188 timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000; 189 timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL); 190 timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); 191 timedFnState->nbLoops = 1; 192 timedFnState->coolTime = UTIL_getTime(); 193 } 194 195 /* Tells if nb of seconds set in timedFnState for all runs is spent. 196 * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ 197 int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState) 198 { 199 return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns); 200 } 201 202 203 #undef MIN 204 #define MIN(a,b) ( (a) < (b) ? (a) : (b) ) 205 206 #define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ 207 208 BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont, 209 BMK_benchParams_t p) 210 { 211 U64 const runBudget_ns = cont->runBudget_ns; 212 U64 const runTimeMin_ns = runBudget_ns / 2; 213 int completed = 0; 214 BMK_runTime_t bestRunTime = cont->fastestRun; 215 216 while (!completed) { 217 BMK_runOutcome_t runResult; 218 219 /* Overheat protection */ 220 if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) { 221 DEBUGOUTPUT("\rcooling down ... \r"); 222 UTIL_sleep(COOLPERIOD_SEC); 223 cont->coolTime = UTIL_getTime(); 224 } 225 226 /* reinitialize capacity */ 227 runResult = BMK_benchFunction(p, cont->nbLoops); 228 229 if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ 230 return runResult; 231 } 232 233 { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); 234 U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; 235 236 cont->timeSpent_ns += loopDuration_ns; 237 238 /* estimate nbLoops for next run to last approximately 1 second */ 239 if (loopDuration_ns > (runBudget_ns / 50)) { 240 U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); 241 cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1; 242 } else { 243 /* previous run was too short : blindly increase workload by x multiplier */ 244 const unsigned multiplier = 10; 245 assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ 246 cont->nbLoops *= multiplier; 247 } 248 249 if(loopDuration_ns < runTimeMin_ns) { 250 /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ 251 assert(completed == 0); 252 continue; 253 } else { 254 if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { 255 bestRunTime = newRunTime; 256 } 257 completed = 1; 258 } 259 } 260 } /* while (!completed) */ 261 262 return BMK_setValid_runTime(bestRunTime); 263 } 264