xref: /linux/tools/testing/selftests/bpf/benchs/bench_bpf_timing.c (revision abac8acb633a9448369d658889ac2bcfbd96f54b)
108158c11SPuranjay Mohan // SPDX-License-Identifier: GPL-2.0
208158c11SPuranjay Mohan /* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
308158c11SPuranjay Mohan 
408158c11SPuranjay Mohan #include <stdio.h>
508158c11SPuranjay Mohan #include <stdlib.h>
608158c11SPuranjay Mohan #include <string.h>
708158c11SPuranjay Mohan #include <math.h>
808158c11SPuranjay Mohan #include "bench_bpf_timing.h"
908158c11SPuranjay Mohan #include "bpf_util.h"
1008158c11SPuranjay Mohan 
1108158c11SPuranjay Mohan struct timing_stats {
1208158c11SPuranjay Mohan 	double min, max;
1308158c11SPuranjay Mohan 	double median, p99;
1408158c11SPuranjay Mohan 	double mean, stddev;
1508158c11SPuranjay Mohan 	int count;
1608158c11SPuranjay Mohan };
1708158c11SPuranjay Mohan 
1808158c11SPuranjay Mohan static int cmp_double(const void *a, const void *b)
1908158c11SPuranjay Mohan {
2008158c11SPuranjay Mohan 	double da = *(const double *)a;
2108158c11SPuranjay Mohan 	double db = *(const double *)b;
2208158c11SPuranjay Mohan 
2308158c11SPuranjay Mohan 	if (da < db)
2408158c11SPuranjay Mohan 		return -1;
2508158c11SPuranjay Mohan 	if (da > db)
2608158c11SPuranjay Mohan 		return 1;
2708158c11SPuranjay Mohan 	return 0;
2808158c11SPuranjay Mohan }
2908158c11SPuranjay Mohan 
3008158c11SPuranjay Mohan static double percentile(const double *sorted, int n, double pct)
3108158c11SPuranjay Mohan {
3208158c11SPuranjay Mohan 	int idx = (int)(n * pct / 100.0);
3308158c11SPuranjay Mohan 
3408158c11SPuranjay Mohan 	if (idx >= n)
3508158c11SPuranjay Mohan 		idx = n - 1;
3608158c11SPuranjay Mohan 	return sorted[idx];
3708158c11SPuranjay Mohan }
3808158c11SPuranjay Mohan 
3908158c11SPuranjay Mohan static int collect_samples(struct bpf_bench_timing *t,
4008158c11SPuranjay Mohan 			   double *out, int max_out)
4108158c11SPuranjay Mohan {
4208158c11SPuranjay Mohan 	unsigned int nr_cpus = bpf_num_possible_cpus();
4308158c11SPuranjay Mohan 	__u32 timed_iters = t->batch_iters;
4408158c11SPuranjay Mohan 	int total = 0;
4508158c11SPuranjay Mohan 
4608158c11SPuranjay Mohan 	if (nr_cpus > BENCH_NR_CPUS)
4708158c11SPuranjay Mohan 		nr_cpus = BENCH_NR_CPUS;
4808158c11SPuranjay Mohan 
4908158c11SPuranjay Mohan 	for (unsigned int cpu = 0; cpu < nr_cpus; cpu++) {
5008158c11SPuranjay Mohan 		__u32 count = t->idx[cpu];
5108158c11SPuranjay Mohan 
5208158c11SPuranjay Mohan 		if (count > BENCH_NR_SAMPLES)
5308158c11SPuranjay Mohan 			count = BENCH_NR_SAMPLES;
5408158c11SPuranjay Mohan 
5508158c11SPuranjay Mohan 		for (__u32 i = 0; i < count && total < max_out; i++) {
5608158c11SPuranjay Mohan 			__u64 sample = t->samples[cpu][i];
5708158c11SPuranjay Mohan 
5808158c11SPuranjay Mohan 			if (sample == 0)
5908158c11SPuranjay Mohan 				continue;
6008158c11SPuranjay Mohan 			out[total++] = (double)sample / timed_iters;
6108158c11SPuranjay Mohan 		}
6208158c11SPuranjay Mohan 	}
6308158c11SPuranjay Mohan 
6408158c11SPuranjay Mohan 	qsort(out, total, sizeof(double), cmp_double);
6508158c11SPuranjay Mohan 	return total;
6608158c11SPuranjay Mohan }
6708158c11SPuranjay Mohan 
68*abac8acbSPuranjay Mohan static int filter_outliers_iqr(double *sorted, int n)
69*abac8acbSPuranjay Mohan {
70*abac8acbSPuranjay Mohan 	double q1, q3, iqr, lo, hi;
71*abac8acbSPuranjay Mohan 	int start = 0, end = n;
72*abac8acbSPuranjay Mohan 
73*abac8acbSPuranjay Mohan 	if (n < 8)
74*abac8acbSPuranjay Mohan 		return n;
75*abac8acbSPuranjay Mohan 
76*abac8acbSPuranjay Mohan 	q1 = sorted[n / 4];
77*abac8acbSPuranjay Mohan 	q3 = sorted[3 * n / 4];
78*abac8acbSPuranjay Mohan 	iqr = q3 - q1;
79*abac8acbSPuranjay Mohan 	lo = q1 - 1.5 * iqr;
80*abac8acbSPuranjay Mohan 	hi = q3 + 1.5 * iqr;
81*abac8acbSPuranjay Mohan 
82*abac8acbSPuranjay Mohan 	while (start < end && sorted[start] < lo)
83*abac8acbSPuranjay Mohan 		start++;
84*abac8acbSPuranjay Mohan 	while (end > start && sorted[end - 1] > hi)
85*abac8acbSPuranjay Mohan 		end--;
86*abac8acbSPuranjay Mohan 
87*abac8acbSPuranjay Mohan 	if (start > 0)
88*abac8acbSPuranjay Mohan 		memmove(sorted, sorted + start, (end - start) * sizeof(double));
89*abac8acbSPuranjay Mohan 
90*abac8acbSPuranjay Mohan 	return end - start;
91*abac8acbSPuranjay Mohan }
92*abac8acbSPuranjay Mohan 
9308158c11SPuranjay Mohan static void compute_stats(const double *sorted, int n,
9408158c11SPuranjay Mohan 			  struct timing_stats *s)
9508158c11SPuranjay Mohan {
9608158c11SPuranjay Mohan 	double sum = 0, var_sum = 0;
9708158c11SPuranjay Mohan 
9808158c11SPuranjay Mohan 	memset(s, 0, sizeof(*s));
9908158c11SPuranjay Mohan 	s->count = n;
10008158c11SPuranjay Mohan 
10108158c11SPuranjay Mohan 	if (n == 0)
10208158c11SPuranjay Mohan 		return;
10308158c11SPuranjay Mohan 
10408158c11SPuranjay Mohan 	s->min    = sorted[0];
10508158c11SPuranjay Mohan 	s->max    = sorted[n - 1];
10608158c11SPuranjay Mohan 	s->median = sorted[n / 2];
10708158c11SPuranjay Mohan 	s->p99    = percentile(sorted, n, 99);
10808158c11SPuranjay Mohan 
10908158c11SPuranjay Mohan 	for (int i = 0; i < n; i++)
11008158c11SPuranjay Mohan 		sum += sorted[i];
11108158c11SPuranjay Mohan 	s->mean = sum / n;
11208158c11SPuranjay Mohan 
11308158c11SPuranjay Mohan 	for (int i = 0; i < n; i++) {
11408158c11SPuranjay Mohan 		double d = sorted[i] - s->mean;
11508158c11SPuranjay Mohan 
11608158c11SPuranjay Mohan 		var_sum += d * d;
11708158c11SPuranjay Mohan 	}
11808158c11SPuranjay Mohan 	s->stddev = n > 1 ? sqrt(var_sum / (n - 1)) : 0;
11908158c11SPuranjay Mohan }
12008158c11SPuranjay Mohan 
12108158c11SPuranjay Mohan void bpf_bench_timing_measure(struct bpf_bench_timing *t, struct bench_res *res)
12208158c11SPuranjay Mohan {
12308158c11SPuranjay Mohan 	unsigned int nr_cpus;
12408158c11SPuranjay Mohan 	__u32 total_samples;
12508158c11SPuranjay Mohan 	int i;
12608158c11SPuranjay Mohan 
12708158c11SPuranjay Mohan 	t->warmup_ticks++;
12808158c11SPuranjay Mohan 
12908158c11SPuranjay Mohan 	if (t->warmup_ticks < env.warmup_sec)
13008158c11SPuranjay Mohan 		return;
13108158c11SPuranjay Mohan 
13208158c11SPuranjay Mohan 	if (t->warmup_ticks == env.warmup_sec) {
13308158c11SPuranjay Mohan 		*t->timing_enabled = 1;
13408158c11SPuranjay Mohan 		return;
13508158c11SPuranjay Mohan 	}
13608158c11SPuranjay Mohan 
13708158c11SPuranjay Mohan 	nr_cpus = bpf_num_possible_cpus();
13808158c11SPuranjay Mohan 	if (nr_cpus > BENCH_NR_CPUS)
13908158c11SPuranjay Mohan 		nr_cpus = BENCH_NR_CPUS;
14008158c11SPuranjay Mohan 
14108158c11SPuranjay Mohan 	total_samples = 0;
14208158c11SPuranjay Mohan 	for (i = 0; i < (int)nr_cpus; i++) {
14308158c11SPuranjay Mohan 		__u32 cnt = t->idx[i];
14408158c11SPuranjay Mohan 
14508158c11SPuranjay Mohan 		if (cnt > BENCH_NR_SAMPLES)
14608158c11SPuranjay Mohan 			cnt = BENCH_NR_SAMPLES;
14708158c11SPuranjay Mohan 		total_samples += cnt;
14808158c11SPuranjay Mohan 	}
14908158c11SPuranjay Mohan 
15008158c11SPuranjay Mohan 	if (total_samples >= (__u32)env.producer_cnt * t->target_samples && !t->done) {
15108158c11SPuranjay Mohan 		t->done = true;
15208158c11SPuranjay Mohan 		*t->timing_enabled = 0;
15308158c11SPuranjay Mohan 		bench_force_done();
15408158c11SPuranjay Mohan 	}
15508158c11SPuranjay Mohan }
15608158c11SPuranjay Mohan 
15708158c11SPuranjay Mohan void bpf_bench_timing_report(struct bpf_bench_timing *t, const char *name, const char *description)
15808158c11SPuranjay Mohan {
15908158c11SPuranjay Mohan 	int max_out = BENCH_NR_CPUS * BENCH_NR_SAMPLES;
16008158c11SPuranjay Mohan 	struct timing_stats s;
16108158c11SPuranjay Mohan 	double *all;
16208158c11SPuranjay Mohan 	int total;
16308158c11SPuranjay Mohan 
16408158c11SPuranjay Mohan 	all = calloc(max_out, sizeof(*all));
16508158c11SPuranjay Mohan 	if (!all) {
16608158c11SPuranjay Mohan 		fprintf(stderr, "failed to allocate timing buffer\n");
16708158c11SPuranjay Mohan 		return;
16808158c11SPuranjay Mohan 	}
16908158c11SPuranjay Mohan 
17008158c11SPuranjay Mohan 	total = collect_samples(t, all, max_out);
17108158c11SPuranjay Mohan 
17208158c11SPuranjay Mohan 	if (total == 0) {
17308158c11SPuranjay Mohan 		printf("No timing samples collected.\n");
17408158c11SPuranjay Mohan 		free(all);
17508158c11SPuranjay Mohan 		return;
17608158c11SPuranjay Mohan 	}
17708158c11SPuranjay Mohan 
178*abac8acbSPuranjay Mohan 	total = filter_outliers_iqr(all, total);
17908158c11SPuranjay Mohan 	compute_stats(all, total, &s);
18008158c11SPuranjay Mohan 
18108158c11SPuranjay Mohan 	if (t->machine_readable) {
18208158c11SPuranjay Mohan 		printf("RESULT scenario=%s samples=%d median=%.2f stddev=%.2f cv=%.2f min=%.2f "
18308158c11SPuranjay Mohan 		       "p99=%.2f max=%.2f\n", name, total, s.median, s.stddev,
18408158c11SPuranjay Mohan 		       s.mean > 0 ? s.stddev / s.mean * 100.0 : 0.0, s.min, s.p99, s.max);
18508158c11SPuranjay Mohan 	} else {
18608158c11SPuranjay Mohan 		printf("%s: median %.2f ns/op, stddev %.2f, p99 %.2f (%d samples)\n", name,
18708158c11SPuranjay Mohan 		       s.median, s.stddev, s.p99, total);
18808158c11SPuranjay Mohan 	}
18908158c11SPuranjay Mohan 
19008158c11SPuranjay Mohan 	free(all);
19108158c11SPuranjay Mohan }
19208158c11SPuranjay Mohan 
19308158c11SPuranjay Mohan #define CALIBRATE_SEED_BATCH	100
19408158c11SPuranjay Mohan #define CALIBRATE_MIN_BATCH	100
19508158c11SPuranjay Mohan #define CALIBRATE_MAX_BATCH	10000000
19608158c11SPuranjay Mohan #define CALIBRATE_TARGET_MS	10
19708158c11SPuranjay Mohan #define CALIBRATE_RUNS		5
19808158c11SPuranjay Mohan #define PROPORTIONALITY_TOL	0.05	/* 5% */
19908158c11SPuranjay Mohan 
20008158c11SPuranjay Mohan static void reset_timing(struct bpf_bench_timing *t)
20108158c11SPuranjay Mohan {
20208158c11SPuranjay Mohan 	*t->timing_enabled = 0;
20308158c11SPuranjay Mohan 	memset(t->samples, 0, sizeof(__u64) * BENCH_NR_CPUS * BENCH_NR_SAMPLES);
20408158c11SPuranjay Mohan 	memset(t->idx, 0, sizeof(__u32) * BENCH_NR_CPUS);
20508158c11SPuranjay Mohan }
20608158c11SPuranjay Mohan 
20708158c11SPuranjay Mohan static __u64 measure_elapsed(struct bpf_bench_timing *t, bpf_bench_run_fn run_fn, void *run_ctx,
20808158c11SPuranjay Mohan 			     __u32 iters, int runs)
20908158c11SPuranjay Mohan {
21008158c11SPuranjay Mohan 	__u64 buf[CALIBRATE_RUNS];
21108158c11SPuranjay Mohan 	int n = 0, i, j;
21208158c11SPuranjay Mohan 
21308158c11SPuranjay Mohan 	reset_timing(t);
21408158c11SPuranjay Mohan 	*t->batch_iters_bss = iters;
21508158c11SPuranjay Mohan 	*t->timing_enabled = 1;
21608158c11SPuranjay Mohan 
21708158c11SPuranjay Mohan 	for (i = 0; i < runs; i++)
21808158c11SPuranjay Mohan 		run_fn(run_ctx);
21908158c11SPuranjay Mohan 
22008158c11SPuranjay Mohan 	*t->timing_enabled = 0;
22108158c11SPuranjay Mohan 
22208158c11SPuranjay Mohan 	for (i = 0; i < BENCH_NR_CPUS && n < runs; i++) {
22308158c11SPuranjay Mohan 		__u32 cnt = t->idx[i];
22408158c11SPuranjay Mohan 
22508158c11SPuranjay Mohan 		for (j = 0; j < (int)cnt && n < runs; j++)
22608158c11SPuranjay Mohan 			buf[n++] = t->samples[i][j];
22708158c11SPuranjay Mohan 	}
22808158c11SPuranjay Mohan 
22908158c11SPuranjay Mohan 	if (n == 0)
23008158c11SPuranjay Mohan 		return 0;
23108158c11SPuranjay Mohan 
23208158c11SPuranjay Mohan 	for (i = 1; i < n; i++) {
23308158c11SPuranjay Mohan 		__u64 key = buf[i];
23408158c11SPuranjay Mohan 
23508158c11SPuranjay Mohan 		j = i - 1;
23608158c11SPuranjay Mohan 		while (j >= 0 && buf[j] > key) {
23708158c11SPuranjay Mohan 			buf[j + 1] = buf[j];
23808158c11SPuranjay Mohan 			j--;
23908158c11SPuranjay Mohan 		}
24008158c11SPuranjay Mohan 		buf[j + 1] = key;
24108158c11SPuranjay Mohan 	}
24208158c11SPuranjay Mohan 
24308158c11SPuranjay Mohan 	return buf[n / 2];
24408158c11SPuranjay Mohan }
24508158c11SPuranjay Mohan 
24608158c11SPuranjay Mohan static __u32 compute_batch_iters(__u64 per_op_ns)
24708158c11SPuranjay Mohan {
24808158c11SPuranjay Mohan 	__u64 target_ns = (__u64)CALIBRATE_TARGET_MS * 1000000ULL;
24908158c11SPuranjay Mohan 	__u32 iters;
25008158c11SPuranjay Mohan 
25108158c11SPuranjay Mohan 	if (per_op_ns == 0)
25208158c11SPuranjay Mohan 		return CALIBRATE_MIN_BATCH;
25308158c11SPuranjay Mohan 
25408158c11SPuranjay Mohan 	iters = target_ns / per_op_ns;
25508158c11SPuranjay Mohan 
25608158c11SPuranjay Mohan 	if (iters < CALIBRATE_MIN_BATCH)
25708158c11SPuranjay Mohan 		iters = CALIBRATE_MIN_BATCH;
25808158c11SPuranjay Mohan 	if (iters > CALIBRATE_MAX_BATCH)
25908158c11SPuranjay Mohan 		iters = CALIBRATE_MAX_BATCH;
26008158c11SPuranjay Mohan 
26108158c11SPuranjay Mohan 	return iters;
26208158c11SPuranjay Mohan }
26308158c11SPuranjay Mohan 
26408158c11SPuranjay Mohan void bpf_bench_calibrate(struct bpf_bench_timing *t, bpf_bench_run_fn run_fn, void *run_ctx)
26508158c11SPuranjay Mohan {
26608158c11SPuranjay Mohan 	__u64 elapsed, per_op_ns;
26708158c11SPuranjay Mohan 	__u64 time_n, time_2n;
26808158c11SPuranjay Mohan 	double ratio;
26908158c11SPuranjay Mohan 
27008158c11SPuranjay Mohan 	elapsed = measure_elapsed(t, run_fn, run_ctx, CALIBRATE_SEED_BATCH, CALIBRATE_RUNS);
27108158c11SPuranjay Mohan 	if (elapsed == 0) {
27208158c11SPuranjay Mohan 		fprintf(stderr, "calibration: no timing samples, using default\n");
27308158c11SPuranjay Mohan 		t->batch_iters = 10000;
27408158c11SPuranjay Mohan 		*t->batch_iters_bss = t->batch_iters;
27508158c11SPuranjay Mohan 		reset_timing(t);
27608158c11SPuranjay Mohan 		return;
27708158c11SPuranjay Mohan 	}
27808158c11SPuranjay Mohan 
27908158c11SPuranjay Mohan 	per_op_ns = elapsed / CALIBRATE_SEED_BATCH;
28008158c11SPuranjay Mohan 	t->batch_iters = compute_batch_iters(per_op_ns);
28108158c11SPuranjay Mohan 
28208158c11SPuranjay Mohan 	time_n = measure_elapsed(t, run_fn, run_ctx, t->batch_iters, CALIBRATE_RUNS);
28308158c11SPuranjay Mohan 	time_2n = measure_elapsed(t, run_fn, run_ctx, t->batch_iters * 2, CALIBRATE_RUNS);
28408158c11SPuranjay Mohan 
28508158c11SPuranjay Mohan 	if (time_n > 0 && time_2n > 0) {
28608158c11SPuranjay Mohan 		ratio = (double)time_2n / (double)time_n;
28708158c11SPuranjay Mohan 
28808158c11SPuranjay Mohan 		if (fabs(ratio - 2.0) / 2.0 > PROPORTIONALITY_TOL)
28908158c11SPuranjay Mohan 			fprintf(stderr,
29008158c11SPuranjay Mohan 				"WARNING: proportionality check failed (2N/N ratio=%.3f, "
29108158c11SPuranjay Mohan 				"expected=2.000, error=%.1f%%)\n  System noise may be affecting "
29208158c11SPuranjay Mohan 				"results.\n",
29308158c11SPuranjay Mohan 				ratio, fabs(ratio - 2.0) / 2.0 * 100.0);
29408158c11SPuranjay Mohan 	}
29508158c11SPuranjay Mohan 
29608158c11SPuranjay Mohan 	*t->batch_iters_bss = t->batch_iters;
29708158c11SPuranjay Mohan 	reset_timing(t);
29808158c11SPuranjay Mohan }
299