xref: /freebsd/sys/contrib/zstd/programs/benchfn.c (revision 5ff13fbc199bdf5f0572845351c68ee5ca828e71)
1a0483764SConrad Meyer /*
2*5ff13fbcSAllan Jude  * Copyright (c) Yann Collet, Facebook, Inc.
3a0483764SConrad Meyer  * All rights reserved.
4a0483764SConrad Meyer  *
5a0483764SConrad Meyer  * This source code is licensed under both the BSD-style license (found in the
6a0483764SConrad Meyer  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7a0483764SConrad Meyer  * in the COPYING file in the root directory of this source tree).
8a0483764SConrad Meyer  * You may select, at your option, one of the above-listed licenses.
9a0483764SConrad Meyer  */
10a0483764SConrad Meyer 
11a0483764SConrad Meyer 
12a0483764SConrad Meyer 
13a0483764SConrad Meyer /* *************************************
14a0483764SConrad Meyer *  Includes
15a0483764SConrad Meyer ***************************************/
16a0483764SConrad Meyer #include <stdlib.h>      /* malloc, free */
17a0483764SConrad Meyer #include <string.h>      /* memset */
18a0483764SConrad Meyer #include <assert.h>      /* assert */
19a0483764SConrad Meyer 
202b9c00cbSConrad Meyer #include "timefn.h"        /* UTIL_time_t, UTIL_getTime */
21a0483764SConrad Meyer #include "benchfn.h"
22a0483764SConrad Meyer 
23a0483764SConrad Meyer 
24a0483764SConrad Meyer /* *************************************
25a0483764SConrad Meyer *  Constants
26a0483764SConrad Meyer ***************************************/
272b9c00cbSConrad Meyer #define TIMELOOP_MICROSEC     SEC_TO_MICRO      /* 1 second */
28a0483764SConrad Meyer #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
29a0483764SConrad Meyer 
30a0483764SConrad Meyer #define KB *(1 <<10)
31a0483764SConrad Meyer #define MB *(1 <<20)
32a0483764SConrad Meyer #define GB *(1U<<30)
33a0483764SConrad Meyer 
34a0483764SConrad Meyer 
35a0483764SConrad Meyer /* *************************************
362b9c00cbSConrad Meyer *  Debug errors
37a0483764SConrad Meyer ***************************************/
382b9c00cbSConrad Meyer #if defined(DEBUG) && (DEBUG >= 1)
392b9c00cbSConrad Meyer #  include <stdio.h>       /* fprintf */
40a0483764SConrad Meyer #  define DISPLAY(...)       fprintf(stderr, __VA_ARGS__)
41a0483764SConrad Meyer #  define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
422b9c00cbSConrad Meyer #else
432b9c00cbSConrad Meyer #  define DEBUGOUTPUT(...)
442b9c00cbSConrad Meyer #endif
452b9c00cbSConrad Meyer 
46a0483764SConrad Meyer 
47a0483764SConrad Meyer /* error without displaying */
48a0483764SConrad Meyer #define RETURN_QUIET_ERROR(retValue, ...) {           \
49a0483764SConrad Meyer     DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
50a0483764SConrad Meyer     DEBUGOUTPUT("Error : ");                          \
51a0483764SConrad Meyer     DEBUGOUTPUT(__VA_ARGS__);                         \
52a0483764SConrad Meyer     DEBUGOUTPUT(" \n");                               \
53a0483764SConrad Meyer     return retValue;                                  \
54a0483764SConrad Meyer }
55a0483764SConrad Meyer 
564d3f1eafSConrad Meyer /* Abort execution if a condition is not met */
574d3f1eafSConrad Meyer #define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
584d3f1eafSConrad Meyer 
59a0483764SConrad Meyer 
60a0483764SConrad Meyer /* *************************************
61a0483764SConrad Meyer *  Benchmarking an arbitrary function
62a0483764SConrad Meyer ***************************************/
63a0483764SConrad Meyer 
BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)64a0483764SConrad Meyer int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
65a0483764SConrad Meyer {
66a0483764SConrad Meyer     return outcome.error_tag_never_ever_use_directly == 0;
67a0483764SConrad Meyer }
68a0483764SConrad Meyer 
69a0483764SConrad Meyer /* warning : this function will stop program execution if outcome is invalid !
70a0483764SConrad Meyer  *           check outcome validity first, using BMK_isValid_runResult() */
BMK_extract_runTime(BMK_runOutcome_t outcome)71a0483764SConrad Meyer BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
72a0483764SConrad Meyer {
734d3f1eafSConrad Meyer     CONTROL(outcome.error_tag_never_ever_use_directly == 0);
74a0483764SConrad Meyer     return outcome.internal_never_ever_use_directly;
75a0483764SConrad Meyer }
76a0483764SConrad Meyer 
BMK_extract_errorResult(BMK_runOutcome_t outcome)77a0483764SConrad Meyer size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
78a0483764SConrad Meyer {
794d3f1eafSConrad Meyer     CONTROL(outcome.error_tag_never_ever_use_directly != 0);
80a0483764SConrad Meyer     return outcome.error_result_never_ever_use_directly;
81a0483764SConrad Meyer }
82a0483764SConrad Meyer 
BMK_runOutcome_error(size_t errorResult)83a0483764SConrad Meyer static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
84a0483764SConrad Meyer {
85a0483764SConrad Meyer     BMK_runOutcome_t b;
86a0483764SConrad Meyer     memset(&b, 0, sizeof(b));
87a0483764SConrad Meyer     b.error_tag_never_ever_use_directly = 1;
88a0483764SConrad Meyer     b.error_result_never_ever_use_directly = errorResult;
89a0483764SConrad Meyer     return b;
90a0483764SConrad Meyer }
91a0483764SConrad Meyer 
BMK_setValid_runTime(BMK_runTime_t runTime)92a0483764SConrad Meyer static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
93a0483764SConrad Meyer {
94a0483764SConrad Meyer     BMK_runOutcome_t outcome;
95a0483764SConrad Meyer     outcome.error_tag_never_ever_use_directly = 0;
96a0483764SConrad Meyer     outcome.internal_never_ever_use_directly = runTime;
97a0483764SConrad Meyer     return outcome;
98a0483764SConrad Meyer }
99a0483764SConrad Meyer 
100a0483764SConrad Meyer 
101a0483764SConrad Meyer /* initFn will be measured once, benchFn will be measured `nbLoops` times */
102a0483764SConrad Meyer /* initFn is optional, provide NULL if none */
103a0483764SConrad Meyer /* benchFn must return a size_t value that errorFn can interpret */
104a0483764SConrad Meyer /* takes # of blocks and list of size & stuff for each. */
105a0483764SConrad Meyer /* can report result of benchFn for each block into blockResult. */
106a0483764SConrad Meyer /* blockResult is optional, provide NULL if this information is not required */
107a0483764SConrad Meyer /* note : time per loop can be reported as zero if run time < timer resolution */
BMK_benchFunction(BMK_benchParams_t p,unsigned nbLoops)108a0483764SConrad Meyer BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
109a0483764SConrad Meyer                                    unsigned nbLoops)
110a0483764SConrad Meyer {
111a0483764SConrad Meyer     size_t dstSize = 0;
112a0483764SConrad Meyer     nbLoops += !nbLoops;   /* minimum nbLoops is 1 */
113a0483764SConrad Meyer 
114a0483764SConrad Meyer     /* init */
115a0483764SConrad Meyer     {   size_t i;
116a0483764SConrad Meyer         for(i = 0; i < p.blockCount; i++) {
117a0483764SConrad Meyer             memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]);  /* warm up and erase result buffer */
1182b9c00cbSConrad Meyer     }   }
119a0483764SConrad Meyer 
120a0483764SConrad Meyer     /* benchmark */
121a0483764SConrad Meyer     {   UTIL_time_t const clockStart = UTIL_getTime();
122a0483764SConrad Meyer         unsigned loopNb, blockNb;
123a0483764SConrad Meyer         if (p.initFn != NULL) p.initFn(p.initPayload);
124a0483764SConrad Meyer         for (loopNb = 0; loopNb < nbLoops; loopNb++) {
125a0483764SConrad Meyer             for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
126a0483764SConrad Meyer                 size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
127a0483764SConrad Meyer                                    p.dstBuffers[blockNb], p.dstCapacities[blockNb],
128a0483764SConrad Meyer                                    p.benchPayload);
129a0483764SConrad Meyer                 if (loopNb == 0) {
130a0483764SConrad Meyer                     if (p.blockResults != NULL) p.blockResults[blockNb] = res;
131a0483764SConrad Meyer                     if ((p.errorFn != NULL) && (p.errorFn(res))) {
132a0483764SConrad Meyer                         RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
133a0483764SConrad Meyer                             "Function benchmark failed on block %u (of size %u) with error %i",
134a0483764SConrad Meyer                             blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
135a0483764SConrad Meyer                     }
136a0483764SConrad Meyer                     dstSize += res;
137a0483764SConrad Meyer             }   }
138a0483764SConrad Meyer         }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
139a0483764SConrad Meyer 
1402b9c00cbSConrad Meyer         {   PTime const totalTime = UTIL_clockSpanNano(clockStart);
141a0483764SConrad Meyer             BMK_runTime_t rt;
1422b9c00cbSConrad Meyer             rt.nanoSecPerRun = (double)totalTime / nbLoops;
143a0483764SConrad Meyer             rt.sumOfReturn = dstSize;
144a0483764SConrad Meyer             return BMK_setValid_runTime(rt);
145a0483764SConrad Meyer     }   }
146a0483764SConrad Meyer }
147a0483764SConrad Meyer 
148a0483764SConrad Meyer 
149a0483764SConrad Meyer /* ====  Benchmarking any function, providing intermediate results  ==== */
150a0483764SConrad Meyer 
151a0483764SConrad Meyer struct BMK_timedFnState_s {
1522b9c00cbSConrad Meyer     PTime timeSpent_ns;
1532b9c00cbSConrad Meyer     PTime timeBudget_ns;
1542b9c00cbSConrad Meyer     PTime runBudget_ns;
155a0483764SConrad Meyer     BMK_runTime_t fastestRun;
156a0483764SConrad Meyer     unsigned nbLoops;
157a0483764SConrad Meyer     UTIL_time_t coolTime;
158a0483764SConrad Meyer };  /* typedef'd to BMK_timedFnState_t within bench.h */
159a0483764SConrad Meyer 
BMK_createTimedFnState(unsigned total_ms,unsigned run_ms)160a0483764SConrad Meyer BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
161a0483764SConrad Meyer {
162a0483764SConrad Meyer     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
163a0483764SConrad Meyer     if (r == NULL) return NULL;   /* malloc() error */
164a0483764SConrad Meyer     BMK_resetTimedFnState(r, total_ms, run_ms);
165a0483764SConrad Meyer     return r;
166a0483764SConrad Meyer }
167a0483764SConrad Meyer 
BMK_freeTimedFnState(BMK_timedFnState_t * state)1682b9c00cbSConrad Meyer void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
1692b9c00cbSConrad Meyer 
1702b9c00cbSConrad Meyer BMK_timedFnState_t*
BMK_initStatic_timedFnState(void * buffer,size_t size,unsigned total_ms,unsigned run_ms)1712b9c00cbSConrad Meyer BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
1722b9c00cbSConrad Meyer {
1732b9c00cbSConrad Meyer     typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1];  /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
1742b9c00cbSConrad Meyer     typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align;  /* force tfs to be aligned at its next best position */
1752b9c00cbSConrad Meyer     size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
1762b9c00cbSConrad Meyer     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
1772b9c00cbSConrad Meyer     if (buffer == NULL) return NULL;
1782b9c00cbSConrad Meyer     if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
1792b9c00cbSConrad Meyer     if ((size_t)buffer % tfs_alignment) return NULL;  /* buffer must be properly aligned */
1802b9c00cbSConrad Meyer     BMK_resetTimedFnState(r, total_ms, run_ms);
1812b9c00cbSConrad Meyer     return r;
182a0483764SConrad Meyer }
183a0483764SConrad Meyer 
BMK_resetTimedFnState(BMK_timedFnState_t * timedFnState,unsigned total_ms,unsigned run_ms)184a0483764SConrad Meyer void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
185a0483764SConrad Meyer {
186a0483764SConrad Meyer     if (!total_ms) total_ms = 1 ;
187a0483764SConrad Meyer     if (!run_ms) run_ms = 1;
188a0483764SConrad Meyer     if (run_ms > total_ms) run_ms = total_ms;
189a0483764SConrad Meyer     timedFnState->timeSpent_ns = 0;
1902b9c00cbSConrad Meyer     timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
1912b9c00cbSConrad Meyer     timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
1922b9c00cbSConrad Meyer     timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000;  /* hopefully large enough : must be larger than any potential measurement */
193a0483764SConrad Meyer     timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
194a0483764SConrad Meyer     timedFnState->nbLoops = 1;
195a0483764SConrad Meyer     timedFnState->coolTime = UTIL_getTime();
196a0483764SConrad Meyer }
197a0483764SConrad Meyer 
198a0483764SConrad Meyer /* Tells if nb of seconds set in timedFnState for all runs is spent.
199a0483764SConrad Meyer  * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
BMK_isCompleted_TimedFn(const BMK_timedFnState_t * timedFnState)200a0483764SConrad Meyer int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
201a0483764SConrad Meyer {
202a0483764SConrad Meyer     return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
203a0483764SConrad Meyer }
204a0483764SConrad Meyer 
205a0483764SConrad Meyer 
206a0483764SConrad Meyer #undef MIN
207a0483764SConrad Meyer #define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
208a0483764SConrad Meyer 
209a0483764SConrad Meyer #define MINUSABLETIME  (TIMELOOP_NANOSEC / 2)  /* 0.5 seconds */
210a0483764SConrad Meyer 
BMK_benchTimedFn(BMK_timedFnState_t * cont,BMK_benchParams_t p)211a0483764SConrad Meyer BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
212a0483764SConrad Meyer                                   BMK_benchParams_t p)
213a0483764SConrad Meyer {
2142b9c00cbSConrad Meyer     PTime const runBudget_ns = cont->runBudget_ns;
2152b9c00cbSConrad Meyer     PTime const runTimeMin_ns = runBudget_ns / 2;
216a0483764SConrad Meyer     int completed = 0;
217a0483764SConrad Meyer     BMK_runTime_t bestRunTime = cont->fastestRun;
218a0483764SConrad Meyer 
219a0483764SConrad Meyer     while (!completed) {
2202b9c00cbSConrad Meyer         BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
221a0483764SConrad Meyer 
222a0483764SConrad Meyer         if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
223a0483764SConrad Meyer             return runResult;
224a0483764SConrad Meyer         }
225a0483764SConrad Meyer 
226a0483764SConrad Meyer         {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
2272b9c00cbSConrad Meyer             double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
228a0483764SConrad Meyer 
2292b9c00cbSConrad Meyer             cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
230a0483764SConrad Meyer 
231a0483764SConrad Meyer             /* estimate nbLoops for next run to last approximately 1 second */
232a0483764SConrad Meyer             if (loopDuration_ns > (runBudget_ns / 50)) {
2332b9c00cbSConrad Meyer                 double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
2342b9c00cbSConrad Meyer                 cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
235a0483764SConrad Meyer             } else {
236a0483764SConrad Meyer                 /* previous run was too short : blindly increase workload by x multiplier */
237a0483764SConrad Meyer                 const unsigned multiplier = 10;
238a0483764SConrad Meyer                 assert(cont->nbLoops < ((unsigned)-1) / multiplier);  /* avoid overflow */
239a0483764SConrad Meyer                 cont->nbLoops *= multiplier;
240a0483764SConrad Meyer             }
241a0483764SConrad Meyer 
242a0483764SConrad Meyer             if(loopDuration_ns < runTimeMin_ns) {
243a0483764SConrad Meyer                 /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
244a0483764SConrad Meyer                 assert(completed == 0);
245a0483764SConrad Meyer                 continue;
246a0483764SConrad Meyer             } else {
247a0483764SConrad Meyer                 if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
248a0483764SConrad Meyer                     bestRunTime = newRunTime;
249a0483764SConrad Meyer                 }
250a0483764SConrad Meyer                 completed = 1;
251a0483764SConrad Meyer             }
252a0483764SConrad Meyer         }
253a0483764SConrad Meyer     }   /* while (!completed) */
254a0483764SConrad Meyer 
255a0483764SConrad Meyer     return BMK_setValid_runTime(bestRunTime);
256a0483764SConrad Meyer }
257