131914882SAlex Richardson /*
231914882SAlex Richardson * Microbenchmark for math functions.
331914882SAlex Richardson *
4*f3087befSAndrew Turner * Copyright (c) 2018-2024, Arm Limited.
5072a4ba8SAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
631914882SAlex Richardson */
731914882SAlex Richardson
8*f3087befSAndrew Turner #if WANT_SVE_TESTS
9*f3087befSAndrew Turner # if __aarch64__ && __linux__
10*f3087befSAndrew Turner # ifdef __clang__
11*f3087befSAndrew Turner # pragma clang attribute push(__attribute__((target("sve"))), \
12*f3087befSAndrew Turner apply_to = any(function))
13*f3087befSAndrew Turner # else
14*f3087befSAndrew Turner # pragma GCC target("+sve")
15*f3087befSAndrew Turner # endif
16*f3087befSAndrew Turner # else
17*f3087befSAndrew Turner # error "SVE not supported - please disable WANT_SVE_TESTS"
18*f3087befSAndrew Turner # endif
19*f3087befSAndrew Turner #endif
20*f3087befSAndrew Turner
2131914882SAlex Richardson #undef _GNU_SOURCE
2231914882SAlex Richardson #define _GNU_SOURCE 1
2331914882SAlex Richardson #include <stdint.h>
2431914882SAlex Richardson #include <stdlib.h>
2531914882SAlex Richardson #include <stdio.h>
2631914882SAlex Richardson #include <string.h>
2731914882SAlex Richardson #include <time.h>
2831914882SAlex Richardson #include <math.h>
2931914882SAlex Richardson #include "mathlib.h"
3031914882SAlex Richardson
3131914882SAlex Richardson /* Number of measurements, best result is reported. */
3231914882SAlex Richardson #define MEASURE 60
3331914882SAlex Richardson /* Array size. */
3431914882SAlex Richardson #define N 8000
3531914882SAlex Richardson /* Iterations over the array. */
3631914882SAlex Richardson #define ITER 125
3731914882SAlex Richardson
3831914882SAlex Richardson static double *Trace;
3931914882SAlex Richardson static size_t trace_size;
4031914882SAlex Richardson static double A[N];
4131914882SAlex Richardson static float Af[N];
4231914882SAlex Richardson static long measurecount = MEASURE;
4331914882SAlex Richardson static long itercount = ITER;
4431914882SAlex Richardson
4531914882SAlex Richardson static double
dummy(double x)4631914882SAlex Richardson dummy (double x)
4731914882SAlex Richardson {
4831914882SAlex Richardson return x;
4931914882SAlex Richardson }
5031914882SAlex Richardson
5131914882SAlex Richardson static float
dummyf(float x)5231914882SAlex Richardson dummyf (float x)
5331914882SAlex Richardson {
5431914882SAlex Richardson return x;
5531914882SAlex Richardson }
56*f3087befSAndrew Turner #if __aarch64__ && __linux__
57*f3087befSAndrew Turner __vpcs static float64x2_t
__vn_dummy(float64x2_t x)58*f3087befSAndrew Turner __vn_dummy (float64x2_t x)
5931914882SAlex Richardson {
6031914882SAlex Richardson return x;
6131914882SAlex Richardson }
6231914882SAlex Richardson
63*f3087befSAndrew Turner __vpcs static float32x4_t
__vn_dummyf(float32x4_t x)64*f3087befSAndrew Turner __vn_dummyf (float32x4_t x)
6531914882SAlex Richardson {
6631914882SAlex Richardson return x;
6731914882SAlex Richardson }
68072a4ba8SAndrew Turner #endif
69*f3087befSAndrew Turner #if WANT_SVE_TESTS
70*f3087befSAndrew Turner static svfloat64_t
__sv_dummy(svfloat64_t x,svbool_t pg)71*f3087befSAndrew Turner __sv_dummy (svfloat64_t x, svbool_t pg)
7231914882SAlex Richardson {
73072a4ba8SAndrew Turner return x;
7431914882SAlex Richardson }
7531914882SAlex Richardson
76*f3087befSAndrew Turner static svfloat32_t
__sv_dummyf(svfloat32_t x,svbool_t pg)77*f3087befSAndrew Turner __sv_dummyf (svfloat32_t x, svbool_t pg)
7831914882SAlex Richardson {
79072a4ba8SAndrew Turner return x;
8031914882SAlex Richardson }
8131914882SAlex Richardson
82072a4ba8SAndrew Turner #endif
8331914882SAlex Richardson
84072a4ba8SAndrew Turner #include "test/mathbench_wrappers.h"
8531914882SAlex Richardson
8631914882SAlex Richardson static const struct fun
8731914882SAlex Richardson {
8831914882SAlex Richardson const char *name;
8931914882SAlex Richardson int prec;
9031914882SAlex Richardson int vec;
9131914882SAlex Richardson double lo;
9231914882SAlex Richardson double hi;
9331914882SAlex Richardson union
9431914882SAlex Richardson {
9531914882SAlex Richardson double (*d) (double);
9631914882SAlex Richardson float (*f) (float);
97*f3087befSAndrew Turner #if __aarch64__ && __linux__
98*f3087befSAndrew Turner __vpcs float64x2_t (*vnd) (float64x2_t);
99*f3087befSAndrew Turner __vpcs float32x4_t (*vnf) (float32x4_t);
10031914882SAlex Richardson #endif
101*f3087befSAndrew Turner #if WANT_SVE_TESTS
102*f3087befSAndrew Turner svfloat64_t (*svd) (svfloat64_t, svbool_t);
103*f3087befSAndrew Turner svfloat32_t (*svf) (svfloat32_t, svbool_t);
104072a4ba8SAndrew Turner #endif
10531914882SAlex Richardson } fun;
10631914882SAlex Richardson } funtab[] = {
107*f3087befSAndrew Turner // clang-format off
10831914882SAlex Richardson #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}},
10931914882SAlex Richardson #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}},
11031914882SAlex Richardson #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}},
11131914882SAlex Richardson #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}},
112072a4ba8SAndrew Turner #define SVD(func, lo, hi) {#func, 'd', 's', lo, hi, {.svd = func}},
113072a4ba8SAndrew Turner #define SVF(func, lo, hi) {#func, 'f', 's', lo, hi, {.svf = func}},
11431914882SAlex Richardson D (dummy, 1.0, 2.0)
11531914882SAlex Richardson F (dummyf, 1.0, 2.0)
116*f3087befSAndrew Turner #if __aarch64__ && __linux__
11731914882SAlex Richardson VND (__vn_dummy, 1.0, 2.0)
11831914882SAlex Richardson VNF (__vn_dummyf, 1.0, 2.0)
119072a4ba8SAndrew Turner #endif
120*f3087befSAndrew Turner #if WANT_SVE_TESTS
121072a4ba8SAndrew Turner SVD (__sv_dummy, 1.0, 2.0)
122072a4ba8SAndrew Turner SVF (__sv_dummyf, 1.0, 2.0)
12331914882SAlex Richardson #endif
124072a4ba8SAndrew Turner #include "test/mathbench_funcs.h"
12531914882SAlex Richardson {0},
12631914882SAlex Richardson #undef F
12731914882SAlex Richardson #undef D
12831914882SAlex Richardson #undef VNF
12931914882SAlex Richardson #undef VND
130072a4ba8SAndrew Turner #undef SVF
131072a4ba8SAndrew Turner #undef SVD
132*f3087befSAndrew Turner // clang-format on
13331914882SAlex Richardson };
13431914882SAlex Richardson
13531914882SAlex Richardson static void
gen_linear(double lo,double hi)13631914882SAlex Richardson gen_linear (double lo, double hi)
13731914882SAlex Richardson {
13831914882SAlex Richardson for (int i = 0; i < N; i++)
13931914882SAlex Richardson A[i] = (lo * (N - i) + hi * i) / N;
14031914882SAlex Richardson }
14131914882SAlex Richardson
14231914882SAlex Richardson static void
genf_linear(double lo,double hi)14331914882SAlex Richardson genf_linear (double lo, double hi)
14431914882SAlex Richardson {
14531914882SAlex Richardson for (int i = 0; i < N; i++)
14631914882SAlex Richardson Af[i] = (float)(lo * (N - i) + hi * i) / N;
14731914882SAlex Richardson }
14831914882SAlex Richardson
14931914882SAlex Richardson static inline double
asdouble(uint64_t i)15031914882SAlex Richardson asdouble (uint64_t i)
15131914882SAlex Richardson {
15231914882SAlex Richardson union
15331914882SAlex Richardson {
15431914882SAlex Richardson uint64_t i;
15531914882SAlex Richardson double f;
15631914882SAlex Richardson } u = {i};
15731914882SAlex Richardson return u.f;
15831914882SAlex Richardson }
15931914882SAlex Richardson
16031914882SAlex Richardson static uint64_t seed = 0x0123456789abcdef;
16131914882SAlex Richardson
16231914882SAlex Richardson static double
frand(double lo,double hi)16331914882SAlex Richardson frand (double lo, double hi)
16431914882SAlex Richardson {
16531914882SAlex Richardson seed = 6364136223846793005ULL * seed + 1;
16631914882SAlex Richardson return lo + (hi - lo) * (asdouble (seed >> 12 | 0x3ffULL << 52) - 1.0);
16731914882SAlex Richardson }
16831914882SAlex Richardson
16931914882SAlex Richardson static void
gen_rand(double lo,double hi)17031914882SAlex Richardson gen_rand (double lo, double hi)
17131914882SAlex Richardson {
17231914882SAlex Richardson for (int i = 0; i < N; i++)
17331914882SAlex Richardson A[i] = frand (lo, hi);
17431914882SAlex Richardson }
17531914882SAlex Richardson
17631914882SAlex Richardson static void
genf_rand(double lo,double hi)17731914882SAlex Richardson genf_rand (double lo, double hi)
17831914882SAlex Richardson {
17931914882SAlex Richardson for (int i = 0; i < N; i++)
18031914882SAlex Richardson Af[i] = (float)frand (lo, hi);
18131914882SAlex Richardson }
18231914882SAlex Richardson
18331914882SAlex Richardson static void
gen_trace(int index)18431914882SAlex Richardson gen_trace (int index)
18531914882SAlex Richardson {
18631914882SAlex Richardson for (int i = 0; i < N; i++)
18731914882SAlex Richardson A[i] = Trace[index + i];
18831914882SAlex Richardson }
18931914882SAlex Richardson
19031914882SAlex Richardson static void
genf_trace(int index)19131914882SAlex Richardson genf_trace (int index)
19231914882SAlex Richardson {
19331914882SAlex Richardson for (int i = 0; i < N; i++)
19431914882SAlex Richardson Af[i] = (float)Trace[index + i];
19531914882SAlex Richardson }
19631914882SAlex Richardson
19731914882SAlex Richardson static void
run_thruput(double f (double))19831914882SAlex Richardson run_thruput (double f (double))
19931914882SAlex Richardson {
20031914882SAlex Richardson for (int i = 0; i < N; i++)
20131914882SAlex Richardson f (A[i]);
20231914882SAlex Richardson }
20331914882SAlex Richardson
20431914882SAlex Richardson static void
runf_thruput(float f (float))20531914882SAlex Richardson runf_thruput (float f (float))
20631914882SAlex Richardson {
20731914882SAlex Richardson for (int i = 0; i < N; i++)
20831914882SAlex Richardson f (Af[i]);
20931914882SAlex Richardson }
21031914882SAlex Richardson
21131914882SAlex Richardson volatile double zero = 0;
21231914882SAlex Richardson
21331914882SAlex Richardson static void
run_latency(double f (double))21431914882SAlex Richardson run_latency (double f (double))
21531914882SAlex Richardson {
21631914882SAlex Richardson double z = zero;
21731914882SAlex Richardson double prev = z;
21831914882SAlex Richardson for (int i = 0; i < N; i++)
21931914882SAlex Richardson prev = f (A[i] + prev * z);
22031914882SAlex Richardson }
22131914882SAlex Richardson
22231914882SAlex Richardson static void
runf_latency(float f (float))22331914882SAlex Richardson runf_latency (float f (float))
22431914882SAlex Richardson {
22531914882SAlex Richardson float z = (float)zero;
22631914882SAlex Richardson float prev = z;
22731914882SAlex Richardson for (int i = 0; i < N; i++)
22831914882SAlex Richardson prev = f (Af[i] + prev * z);
22931914882SAlex Richardson }
23031914882SAlex Richardson
231*f3087befSAndrew Turner #if __aarch64__ && __linux__
23231914882SAlex Richardson static void
run_vn_thruput(__vpcs float64x2_t f (float64x2_t))233*f3087befSAndrew Turner run_vn_thruput (__vpcs float64x2_t f (float64x2_t))
23431914882SAlex Richardson {
235*f3087befSAndrew Turner for (int i = 0; i < N; i += 2)
236*f3087befSAndrew Turner f (vld1q_f64 (A + i));
23731914882SAlex Richardson }
23831914882SAlex Richardson
23931914882SAlex Richardson static void
runf_vn_thruput(__vpcs float32x4_t f (float32x4_t))240*f3087befSAndrew Turner runf_vn_thruput (__vpcs float32x4_t f (float32x4_t))
24131914882SAlex Richardson {
242*f3087befSAndrew Turner for (int i = 0; i < N; i += 4)
243*f3087befSAndrew Turner f (vld1q_f32 (Af + i));
24431914882SAlex Richardson }
24531914882SAlex Richardson
24631914882SAlex Richardson static void
run_vn_latency(__vpcs float64x2_t f (float64x2_t))247*f3087befSAndrew Turner run_vn_latency (__vpcs float64x2_t f (float64x2_t))
24831914882SAlex Richardson {
2495a02ffc3SAndrew Turner volatile uint64x2_t vsel = (uint64x2_t) { 0, 0 };
2505a02ffc3SAndrew Turner uint64x2_t sel = vsel;
251*f3087befSAndrew Turner float64x2_t prev = vdupq_n_f64 (0);
252*f3087befSAndrew Turner for (int i = 0; i < N; i += 2)
253*f3087befSAndrew Turner prev = f (vbslq_f64 (sel, prev, vld1q_f64 (A + i)));
25431914882SAlex Richardson }
25531914882SAlex Richardson
25631914882SAlex Richardson static void
runf_vn_latency(__vpcs float32x4_t f (float32x4_t))257*f3087befSAndrew Turner runf_vn_latency (__vpcs float32x4_t f (float32x4_t))
25831914882SAlex Richardson {
2595a02ffc3SAndrew Turner volatile uint32x4_t vsel = (uint32x4_t) { 0, 0, 0, 0 };
2605a02ffc3SAndrew Turner uint32x4_t sel = vsel;
261*f3087befSAndrew Turner float32x4_t prev = vdupq_n_f32 (0);
262*f3087befSAndrew Turner for (int i = 0; i < N; i += 4)
263*f3087befSAndrew Turner prev = f (vbslq_f32 (sel, prev, vld1q_f32 (Af + i)));
26431914882SAlex Richardson }
26531914882SAlex Richardson #endif
26631914882SAlex Richardson
267*f3087befSAndrew Turner #if WANT_SVE_TESTS
268072a4ba8SAndrew Turner static void
run_sv_thruput(svfloat64_t f (svfloat64_t,svbool_t))269*f3087befSAndrew Turner run_sv_thruput (svfloat64_t f (svfloat64_t, svbool_t))
270072a4ba8SAndrew Turner {
271*f3087befSAndrew Turner for (int i = 0; i < N; i += svcntd ())
272*f3087befSAndrew Turner f (svld1_f64 (svptrue_b64 (), A + i), svptrue_b64 ());
273072a4ba8SAndrew Turner }
274072a4ba8SAndrew Turner
275072a4ba8SAndrew Turner static void
runf_sv_thruput(svfloat32_t f (svfloat32_t,svbool_t))276*f3087befSAndrew Turner runf_sv_thruput (svfloat32_t f (svfloat32_t, svbool_t))
277072a4ba8SAndrew Turner {
278*f3087befSAndrew Turner for (int i = 0; i < N; i += svcntw ())
279*f3087befSAndrew Turner f (svld1_f32 (svptrue_b32 (), Af + i), svptrue_b32 ());
280072a4ba8SAndrew Turner }
281072a4ba8SAndrew Turner
282072a4ba8SAndrew Turner static void
run_sv_latency(svfloat64_t f (svfloat64_t,svbool_t))283*f3087befSAndrew Turner run_sv_latency (svfloat64_t f (svfloat64_t, svbool_t))
284072a4ba8SAndrew Turner {
285*f3087befSAndrew Turner volatile svbool_t vsel = svptrue_b64 ();
286*f3087befSAndrew Turner svbool_t sel = vsel;
287*f3087befSAndrew Turner svfloat64_t prev = svdup_f64 (0);
288*f3087befSAndrew Turner for (int i = 0; i < N; i += svcntd ())
289*f3087befSAndrew Turner prev = f (svsel_f64 (sel, svld1_f64 (svptrue_b64 (), A + i), prev),
290*f3087befSAndrew Turner svptrue_b64 ());
291072a4ba8SAndrew Turner }
292072a4ba8SAndrew Turner
293072a4ba8SAndrew Turner static void
runf_sv_latency(svfloat32_t f (svfloat32_t,svbool_t))294*f3087befSAndrew Turner runf_sv_latency (svfloat32_t f (svfloat32_t, svbool_t))
295072a4ba8SAndrew Turner {
296*f3087befSAndrew Turner volatile svbool_t vsel = svptrue_b32 ();
297*f3087befSAndrew Turner svbool_t sel = vsel;
298*f3087befSAndrew Turner svfloat32_t prev = svdup_f32 (0);
299*f3087befSAndrew Turner for (int i = 0; i < N; i += svcntw ())
300*f3087befSAndrew Turner prev = f (svsel_f32 (sel, svld1_f32 (svptrue_b32 (), Af + i), prev),
301*f3087befSAndrew Turner svptrue_b32 ());
302072a4ba8SAndrew Turner }
303072a4ba8SAndrew Turner #endif
304072a4ba8SAndrew Turner
30531914882SAlex Richardson static uint64_t
tic(void)30631914882SAlex Richardson tic (void)
30731914882SAlex Richardson {
30831914882SAlex Richardson struct timespec ts;
309*f3087befSAndrew Turner #if defined(_MSC_VER)
310*f3087befSAndrew Turner if (!timespec_get (&ts, TIME_UTC))
311*f3087befSAndrew Turner #else
31231914882SAlex Richardson if (clock_gettime (CLOCK_REALTIME, &ts))
313*f3087befSAndrew Turner #endif
31431914882SAlex Richardson abort ();
31531914882SAlex Richardson return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
31631914882SAlex Richardson }
31731914882SAlex Richardson
31831914882SAlex Richardson #define TIMEIT(run, f) do { \
31931914882SAlex Richardson dt = -1; \
32031914882SAlex Richardson run (f); /* Warm up. */ \
32131914882SAlex Richardson for (int j = 0; j < measurecount; j++) \
32231914882SAlex Richardson { \
32331914882SAlex Richardson uint64_t t0 = tic (); \
32431914882SAlex Richardson for (int i = 0; i < itercount; i++) \
32531914882SAlex Richardson run (f); \
32631914882SAlex Richardson uint64_t t1 = tic (); \
32731914882SAlex Richardson if (t1 - t0 < dt) \
32831914882SAlex Richardson dt = t1 - t0; \
32931914882SAlex Richardson } \
33031914882SAlex Richardson } while (0)
33131914882SAlex Richardson
33231914882SAlex Richardson static void
bench1(const struct fun * f,int type,double lo,double hi)33331914882SAlex Richardson bench1 (const struct fun *f, int type, double lo, double hi)
33431914882SAlex Richardson {
33531914882SAlex Richardson uint64_t dt = 0;
33631914882SAlex Richardson uint64_t ns100;
33731914882SAlex Richardson const char *s = type == 't' ? "rthruput" : "latency";
33831914882SAlex Richardson int vlen = 1;
33931914882SAlex Richardson
3405a02ffc3SAndrew Turner if (f->vec == 'n')
341*f3087befSAndrew Turner vlen = f->prec == 'd' ? 2 : 4;
342*f3087befSAndrew Turner #if WANT_SVE_TESTS
3435a02ffc3SAndrew Turner else if (f->vec == 's')
344*f3087befSAndrew Turner vlen = f->prec == 'd' ? svcntd () : svcntw ();
345*f3087befSAndrew Turner #endif
34631914882SAlex Richardson
34731914882SAlex Richardson if (f->prec == 'd' && type == 't' && f->vec == 0)
34831914882SAlex Richardson TIMEIT (run_thruput, f->fun.d);
34931914882SAlex Richardson else if (f->prec == 'd' && type == 'l' && f->vec == 0)
35031914882SAlex Richardson TIMEIT (run_latency, f->fun.d);
35131914882SAlex Richardson else if (f->prec == 'f' && type == 't' && f->vec == 0)
35231914882SAlex Richardson TIMEIT (runf_thruput, f->fun.f);
35331914882SAlex Richardson else if (f->prec == 'f' && type == 'l' && f->vec == 0)
35431914882SAlex Richardson TIMEIT (runf_latency, f->fun.f);
355*f3087befSAndrew Turner #if __aarch64__ && __linux__
35631914882SAlex Richardson else if (f->prec == 'd' && type == 't' && f->vec == 'n')
35731914882SAlex Richardson TIMEIT (run_vn_thruput, f->fun.vnd);
35831914882SAlex Richardson else if (f->prec == 'd' && type == 'l' && f->vec == 'n')
35931914882SAlex Richardson TIMEIT (run_vn_latency, f->fun.vnd);
36031914882SAlex Richardson else if (f->prec == 'f' && type == 't' && f->vec == 'n')
36131914882SAlex Richardson TIMEIT (runf_vn_thruput, f->fun.vnf);
36231914882SAlex Richardson else if (f->prec == 'f' && type == 'l' && f->vec == 'n')
36331914882SAlex Richardson TIMEIT (runf_vn_latency, f->fun.vnf);
36431914882SAlex Richardson #endif
365*f3087befSAndrew Turner #if WANT_SVE_TESTS
366072a4ba8SAndrew Turner else if (f->prec == 'd' && type == 't' && f->vec == 's')
367072a4ba8SAndrew Turner TIMEIT (run_sv_thruput, f->fun.svd);
368072a4ba8SAndrew Turner else if (f->prec == 'd' && type == 'l' && f->vec == 's')
369072a4ba8SAndrew Turner TIMEIT (run_sv_latency, f->fun.svd);
370072a4ba8SAndrew Turner else if (f->prec == 'f' && type == 't' && f->vec == 's')
371072a4ba8SAndrew Turner TIMEIT (runf_sv_thruput, f->fun.svf);
372072a4ba8SAndrew Turner else if (f->prec == 'f' && type == 'l' && f->vec == 's')
373072a4ba8SAndrew Turner TIMEIT (runf_sv_latency, f->fun.svf);
374072a4ba8SAndrew Turner #endif
37531914882SAlex Richardson
37631914882SAlex Richardson if (type == 't')
37731914882SAlex Richardson {
37831914882SAlex Richardson ns100 = (100 * dt + itercount * N / 2) / (itercount * N);
3795a02ffc3SAndrew Turner printf ("%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g] vlen %d\n",
3805a02ffc3SAndrew Turner f->name, s,
38131914882SAlex Richardson (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
3825a02ffc3SAndrew Turner (unsigned long long) dt, lo, hi, vlen);
38331914882SAlex Richardson }
38431914882SAlex Richardson else if (type == 'l')
38531914882SAlex Richardson {
38631914882SAlex Richardson ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen);
3875a02ffc3SAndrew Turner printf ("%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g] vlen %d\n",
3885a02ffc3SAndrew Turner f->name, s,
38931914882SAlex Richardson (unsigned) (ns100 / 100), (unsigned) (ns100 % 100),
3905a02ffc3SAndrew Turner (unsigned long long) dt, lo, hi, vlen);
39131914882SAlex Richardson }
39231914882SAlex Richardson fflush (stdout);
39331914882SAlex Richardson }
39431914882SAlex Richardson
39531914882SAlex Richardson static void
bench(const struct fun * f,double lo,double hi,int type,int gen)39631914882SAlex Richardson bench (const struct fun *f, double lo, double hi, int type, int gen)
39731914882SAlex Richardson {
39831914882SAlex Richardson if (f->prec == 'd' && gen == 'r')
39931914882SAlex Richardson gen_rand (lo, hi);
40031914882SAlex Richardson else if (f->prec == 'd' && gen == 'l')
40131914882SAlex Richardson gen_linear (lo, hi);
40231914882SAlex Richardson else if (f->prec == 'd' && gen == 't')
40331914882SAlex Richardson gen_trace (0);
40431914882SAlex Richardson else if (f->prec == 'f' && gen == 'r')
40531914882SAlex Richardson genf_rand (lo, hi);
40631914882SAlex Richardson else if (f->prec == 'f' && gen == 'l')
40731914882SAlex Richardson genf_linear (lo, hi);
40831914882SAlex Richardson else if (f->prec == 'f' && gen == 't')
40931914882SAlex Richardson genf_trace (0);
41031914882SAlex Richardson
41131914882SAlex Richardson if (gen == 't')
41231914882SAlex Richardson hi = trace_size / N;
41331914882SAlex Richardson
41431914882SAlex Richardson if (type == 'b' || type == 't')
41531914882SAlex Richardson bench1 (f, 't', lo, hi);
41631914882SAlex Richardson
41731914882SAlex Richardson if (type == 'b' || type == 'l')
41831914882SAlex Richardson bench1 (f, 'l', lo, hi);
41931914882SAlex Richardson
42031914882SAlex Richardson for (int i = N; i < trace_size; i += N)
42131914882SAlex Richardson {
42231914882SAlex Richardson if (f->prec == 'd')
42331914882SAlex Richardson gen_trace (i);
42431914882SAlex Richardson else
42531914882SAlex Richardson genf_trace (i);
42631914882SAlex Richardson
42731914882SAlex Richardson lo = i / N;
42831914882SAlex Richardson if (type == 'b' || type == 't')
42931914882SAlex Richardson bench1 (f, 't', lo, hi);
43031914882SAlex Richardson
43131914882SAlex Richardson if (type == 'b' || type == 'l')
43231914882SAlex Richardson bench1 (f, 'l', lo, hi);
43331914882SAlex Richardson }
43431914882SAlex Richardson }
43531914882SAlex Richardson
43631914882SAlex Richardson static void
readtrace(const char * name)43731914882SAlex Richardson readtrace (const char *name)
43831914882SAlex Richardson {
43931914882SAlex Richardson int n = 0;
44031914882SAlex Richardson FILE *f = strcmp (name, "-") == 0 ? stdin : fopen (name, "r");
44131914882SAlex Richardson if (!f)
44231914882SAlex Richardson {
44331914882SAlex Richardson printf ("openning \"%s\" failed: %m\n", name);
44431914882SAlex Richardson exit (1);
44531914882SAlex Richardson }
44631914882SAlex Richardson for (;;)
44731914882SAlex Richardson {
44831914882SAlex Richardson if (n >= trace_size)
44931914882SAlex Richardson {
45031914882SAlex Richardson trace_size += N;
45131914882SAlex Richardson Trace = realloc (Trace, trace_size * sizeof (Trace[0]));
45231914882SAlex Richardson if (Trace == NULL)
45331914882SAlex Richardson {
45431914882SAlex Richardson printf ("out of memory\n");
45531914882SAlex Richardson exit (1);
45631914882SAlex Richardson }
45731914882SAlex Richardson }
45831914882SAlex Richardson if (fscanf (f, "%lf", Trace + n) != 1)
45931914882SAlex Richardson break;
46031914882SAlex Richardson n++;
46131914882SAlex Richardson }
46231914882SAlex Richardson if (ferror (f) || n == 0)
46331914882SAlex Richardson {
46431914882SAlex Richardson printf ("reading \"%s\" failed: %m\n", name);
46531914882SAlex Richardson exit (1);
46631914882SAlex Richardson }
46731914882SAlex Richardson fclose (f);
46831914882SAlex Richardson if (n % N == 0)
46931914882SAlex Richardson trace_size = n;
47031914882SAlex Richardson for (int i = 0; n < trace_size; n++, i++)
47131914882SAlex Richardson Trace[n] = Trace[i];
47231914882SAlex Richardson }
47331914882SAlex Richardson
47431914882SAlex Richardson static void
usage(void)47531914882SAlex Richardson usage (void)
47631914882SAlex Richardson {
47731914882SAlex Richardson printf ("usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] "
47831914882SAlex Richardson "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func "
47931914882SAlex Richardson "[func2 ..]\n");
48031914882SAlex Richardson printf ("func:\n");
48131914882SAlex Richardson printf ("%7s [run all benchmarks]\n", "all");
48231914882SAlex Richardson for (const struct fun *f = funtab; f->name; f++)
48331914882SAlex Richardson printf ("%7s [low: %g high: %g]\n", f->name, f->lo, f->hi);
48431914882SAlex Richardson exit (1);
48531914882SAlex Richardson }
48631914882SAlex Richardson
48731914882SAlex Richardson int
main(int argc,char * argv[])48831914882SAlex Richardson main (int argc, char *argv[])
48931914882SAlex Richardson {
49031914882SAlex Richardson int usergen = 0, gen = 'r', type = 'b', all = 0;
49131914882SAlex Richardson double lo = 0, hi = 0;
49231914882SAlex Richardson const char *tracefile = "-";
49331914882SAlex Richardson
49431914882SAlex Richardson argv++;
49531914882SAlex Richardson argc--;
49631914882SAlex Richardson for (;;)
49731914882SAlex Richardson {
49831914882SAlex Richardson if (argc <= 0)
49931914882SAlex Richardson usage ();
50031914882SAlex Richardson if (argv[0][0] != '-')
50131914882SAlex Richardson break;
50231914882SAlex Richardson else if (argc >= 3 && strcmp (argv[0], "-i") == 0)
50331914882SAlex Richardson {
50431914882SAlex Richardson usergen = 1;
50531914882SAlex Richardson lo = strtod (argv[1], 0);
50631914882SAlex Richardson hi = strtod (argv[2], 0);
50731914882SAlex Richardson argv += 3;
50831914882SAlex Richardson argc -= 3;
50931914882SAlex Richardson }
51031914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-m") == 0)
51131914882SAlex Richardson {
51231914882SAlex Richardson measurecount = strtol (argv[1], 0, 0);
51331914882SAlex Richardson argv += 2;
51431914882SAlex Richardson argc -= 2;
51531914882SAlex Richardson }
51631914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-c") == 0)
51731914882SAlex Richardson {
51831914882SAlex Richardson itercount = strtol (argv[1], 0, 0);
51931914882SAlex Richardson argv += 2;
52031914882SAlex Richardson argc -= 2;
52131914882SAlex Richardson }
52231914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-g") == 0)
52331914882SAlex Richardson {
52431914882SAlex Richardson gen = argv[1][0];
52531914882SAlex Richardson if (strchr ("rlt", gen) == 0)
52631914882SAlex Richardson usage ();
52731914882SAlex Richardson argv += 2;
52831914882SAlex Richardson argc -= 2;
52931914882SAlex Richardson }
53031914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-f") == 0)
53131914882SAlex Richardson {
53231914882SAlex Richardson gen = 't'; /* -f implies -g trace. */
53331914882SAlex Richardson tracefile = argv[1];
53431914882SAlex Richardson argv += 2;
53531914882SAlex Richardson argc -= 2;
53631914882SAlex Richardson }
53731914882SAlex Richardson else if (argc >= 2 && strcmp (argv[0], "-t") == 0)
53831914882SAlex Richardson {
53931914882SAlex Richardson type = argv[1][0];
54031914882SAlex Richardson if (strchr ("ltb", type) == 0)
54131914882SAlex Richardson usage ();
54231914882SAlex Richardson argv += 2;
54331914882SAlex Richardson argc -= 2;
54431914882SAlex Richardson }
54531914882SAlex Richardson else
54631914882SAlex Richardson usage ();
54731914882SAlex Richardson }
54831914882SAlex Richardson if (gen == 't')
54931914882SAlex Richardson {
55031914882SAlex Richardson readtrace (tracefile);
55131914882SAlex Richardson lo = hi = 0;
55231914882SAlex Richardson usergen = 1;
55331914882SAlex Richardson }
55431914882SAlex Richardson while (argc > 0)
55531914882SAlex Richardson {
55631914882SAlex Richardson int found = 0;
55731914882SAlex Richardson all = strcmp (argv[0], "all") == 0;
55831914882SAlex Richardson for (const struct fun *f = funtab; f->name; f++)
55931914882SAlex Richardson if (all || strcmp (argv[0], f->name) == 0)
56031914882SAlex Richardson {
56131914882SAlex Richardson found = 1;
56231914882SAlex Richardson if (!usergen)
56331914882SAlex Richardson {
56431914882SAlex Richardson lo = f->lo;
56531914882SAlex Richardson hi = f->hi;
56631914882SAlex Richardson }
56731914882SAlex Richardson bench (f, lo, hi, type, gen);
56831914882SAlex Richardson if (usergen && !all)
56931914882SAlex Richardson break;
57031914882SAlex Richardson }
57131914882SAlex Richardson if (!found)
57231914882SAlex Richardson printf ("unknown function: %s\n", argv[0]);
57331914882SAlex Richardson argv++;
57431914882SAlex Richardson argc--;
57531914882SAlex Richardson }
57631914882SAlex Richardson return 0;
57731914882SAlex Richardson }
578*f3087befSAndrew Turner
579*f3087befSAndrew Turner #if __aarch64__ && __linux__ && WANT_SVE_TESTS && defined(__clang__)
580*f3087befSAndrew Turner # pragma clang attribute pop
581*f3087befSAndrew Turner #endif
582