xref: /freebsd/sys/contrib/zstd/programs/benchfn.c (revision 28f4385e45a2681c14bd04b83fe1796eaefe8265)
1 /*
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 
12 
13 /* *************************************
14 *  Includes
15 ***************************************/
16 #include "platform.h"    /* Large Files support */
17 #include "util.h"        /* UTIL_getFileSize, UTIL_sleep */
18 #include <stdlib.h>      /* malloc, free */
19 #include <string.h>      /* memset */
20 #include <stdio.h>       /* fprintf, fopen */
21 #undef NDEBUG            /* assert must not be disabled */
22 #include <assert.h>      /* assert */
23 
24 #include "mem.h"
25 #include "benchfn.h"
26 
27 
28 /* *************************************
29 *  Constants
30 ***************************************/
31 #define TIMELOOP_MICROSEC     (1*1000000ULL) /* 1 second */
32 #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
33 #define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
34 #define COOLPERIOD_SEC        10
35 
36 #define KB *(1 <<10)
37 #define MB *(1 <<20)
38 #define GB *(1U<<30)
39 
40 
41 /* *************************************
42 *  Errors
43 ***************************************/
44 #ifndef DEBUG
45 #  define DEBUG 0
46 #endif
47 
48 #define DISPLAY(...)       fprintf(stderr, __VA_ARGS__)
49 #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
50 
51 /* error without displaying */
52 #define RETURN_QUIET_ERROR(retValue, ...) {           \
53     DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
54     DEBUGOUTPUT("Error : ");                          \
55     DEBUGOUTPUT(__VA_ARGS__);                         \
56     DEBUGOUTPUT(" \n");                               \
57     return retValue;                                  \
58 }
59 
60 
61 /* *************************************
62 *  Benchmarking an arbitrary function
63 ***************************************/
64 
65 int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
66 {
67     return outcome.error_tag_never_ever_use_directly == 0;
68 }
69 
70 /* warning : this function will stop program execution if outcome is invalid !
71  *           check outcome validity first, using BMK_isValid_runResult() */
72 BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
73 {
74     assert(outcome.error_tag_never_ever_use_directly == 0);
75     return outcome.internal_never_ever_use_directly;
76 }
77 
78 size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
79 {
80     assert(outcome.error_tag_never_ever_use_directly != 0);
81     return outcome.error_result_never_ever_use_directly;
82 }
83 
84 static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
85 {
86     BMK_runOutcome_t b;
87     memset(&b, 0, sizeof(b));
88     b.error_tag_never_ever_use_directly = 1;
89     b.error_result_never_ever_use_directly = errorResult;
90     return b;
91 }
92 
93 static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
94 {
95     BMK_runOutcome_t outcome;
96     outcome.error_tag_never_ever_use_directly = 0;
97     outcome.internal_never_ever_use_directly = runTime;
98     return outcome;
99 }
100 
101 
102 /* initFn will be measured once, benchFn will be measured `nbLoops` times */
103 /* initFn is optional, provide NULL if none */
104 /* benchFn must return a size_t value that errorFn can interpret */
105 /* takes # of blocks and list of size & stuff for each. */
106 /* can report result of benchFn for each block into blockResult. */
107 /* blockResult is optional, provide NULL if this information is not required */
108 /* note : time per loop can be reported as zero if run time < timer resolution */
109 BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
110                                    unsigned nbLoops)
111 {
112     size_t dstSize = 0;
113     nbLoops += !nbLoops;   /* minimum nbLoops is 1 */
114 
115     /* init */
116     {   size_t i;
117         for(i = 0; i < p.blockCount; i++) {
118             memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]);  /* warm up and erase result buffer */
119         }
120 #if 0
121         /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops
122          * (Makes former slower)
123          */
124         UTIL_sleepMilli(5);  /* give processor time to other processes */
125         UTIL_waitForNextTick();
126 #endif
127     }
128 
129     /* benchmark */
130     {   UTIL_time_t const clockStart = UTIL_getTime();
131         unsigned loopNb, blockNb;
132         if (p.initFn != NULL) p.initFn(p.initPayload);
133         for (loopNb = 0; loopNb < nbLoops; loopNb++) {
134             for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
135                 size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
136                                    p.dstBuffers[blockNb], p.dstCapacities[blockNb],
137                                    p.benchPayload);
138                 if (loopNb == 0) {
139                     if (p.blockResults != NULL) p.blockResults[blockNb] = res;
140                     if ((p.errorFn != NULL) && (p.errorFn(res))) {
141                         RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
142                             "Function benchmark failed on block %u (of size %u) with error %i",
143                             blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
144                     }
145                     dstSize += res;
146             }   }
147         }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
148 
149         {   U64 const totalTime = UTIL_clockSpanNano(clockStart);
150             BMK_runTime_t rt;
151             rt.nanoSecPerRun = totalTime / nbLoops;
152             rt.sumOfReturn = dstSize;
153             return BMK_setValid_runTime(rt);
154     }   }
155 }
156 
157 
158 /* ====  Benchmarking any function, providing intermediate results  ==== */
159 
160 struct BMK_timedFnState_s {
161     U64 timeSpent_ns;
162     U64 timeBudget_ns;
163     U64 runBudget_ns;
164     BMK_runTime_t fastestRun;
165     unsigned nbLoops;
166     UTIL_time_t coolTime;
167 };  /* typedef'd to BMK_timedFnState_t within bench.h */
168 
169 BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
170 {
171     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
172     if (r == NULL) return NULL;   /* malloc() error */
173     BMK_resetTimedFnState(r, total_ms, run_ms);
174     return r;
175 }
176 
177 void BMK_freeTimedFnState(BMK_timedFnState_t* state) {
178     free(state);
179 }
180 
181 void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
182 {
183     if (!total_ms) total_ms = 1 ;
184     if (!run_ms) run_ms = 1;
185     if (run_ms > total_ms) run_ms = total_ms;
186     timedFnState->timeSpent_ns = 0;
187     timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000;
188     timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000;
189     timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL);
190     timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
191     timedFnState->nbLoops = 1;
192     timedFnState->coolTime = UTIL_getTime();
193 }
194 
195 /* Tells if nb of seconds set in timedFnState for all runs is spent.
196  * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
197 int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
198 {
199     return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
200 }
201 
202 
203 #undef MIN
204 #define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
205 
206 #define MINUSABLETIME  (TIMELOOP_NANOSEC / 2)  /* 0.5 seconds */
207 
208 BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
209                                   BMK_benchParams_t p)
210 {
211     U64 const runBudget_ns = cont->runBudget_ns;
212     U64 const runTimeMin_ns = runBudget_ns / 2;
213     int completed = 0;
214     BMK_runTime_t bestRunTime = cont->fastestRun;
215 
216     while (!completed) {
217         BMK_runOutcome_t runResult;
218 
219         /* Overheat protection */
220         if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) {
221             DEBUGOUTPUT("\rcooling down ...    \r");
222             UTIL_sleep(COOLPERIOD_SEC);
223             cont->coolTime = UTIL_getTime();
224         }
225 
226         /* reinitialize capacity */
227         runResult = BMK_benchFunction(p, cont->nbLoops);
228 
229         if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
230             return runResult;
231         }
232 
233         {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
234             U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
235 
236             cont->timeSpent_ns += loopDuration_ns;
237 
238             /* estimate nbLoops for next run to last approximately 1 second */
239             if (loopDuration_ns > (runBudget_ns / 50)) {
240                 U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
241                 cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1;
242             } else {
243                 /* previous run was too short : blindly increase workload by x multiplier */
244                 const unsigned multiplier = 10;
245                 assert(cont->nbLoops < ((unsigned)-1) / multiplier);  /* avoid overflow */
246                 cont->nbLoops *= multiplier;
247             }
248 
249             if(loopDuration_ns < runTimeMin_ns) {
250                 /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
251                 assert(completed == 0);
252                 continue;
253             } else {
254                 if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
255                     bestRunTime = newRunTime;
256                 }
257                 completed = 1;
258             }
259         }
260     }   /* while (!completed) */
261 
262     return BMK_setValid_runTime(bestRunTime);
263 }
264