xref: /linux/tools/testing/selftests/bpf/benchs/bench_trigger.c (revision 3124591f686115aca25d772c2ccb7b1e202c3197)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <unistd.h>
6 #include <stdint.h>
7 #include "bench.h"
8 #include "trigger_bench.skel.h"
9 #include "trace_helpers.h"
10 
11 #define MAX_TRIG_BATCH_ITERS 1000
12 
13 static struct {
14 	__u32 batch_iters;
15 } args = {
16 	.batch_iters = 100,
17 };
18 
19 enum {
20 	ARG_TRIG_BATCH_ITERS = 7000,
21 };
22 
23 static const struct argp_option opts[] = {
24 	{ "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,
25 		"Number of in-kernel iterations per one driver test run"},
26 	{},
27 };
28 
29 static error_t parse_arg(int key, char *arg, struct argp_state *state)
30 {
31 	long ret;
32 
33 	switch (key) {
34 	case ARG_TRIG_BATCH_ITERS:
35 		ret = strtol(arg, NULL, 10);
36 		if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) {
37 			fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n",
38 				1, MAX_TRIG_BATCH_ITERS);
39 			argp_usage(state);
40 		}
41 		args.batch_iters = ret;
42 		break;
43 	default:
44 		return ARGP_ERR_UNKNOWN;
45 	}
46 
47 	return 0;
48 }
49 
50 const struct argp bench_trigger_batch_argp = {
51 	.options = opts,
52 	.parser = parse_arg,
53 };
54 
55 /* adjust slot shift in inc_hits() if changing */
56 #define MAX_BUCKETS 256
57 
58 #pragma GCC diagnostic ignored "-Wattributes"
59 
60 /* BPF triggering benchmarks */
61 static struct trigger_ctx {
62 	struct trigger_bench *skel;
63 	bool usermode_counters;
64 	int driver_prog_fd;
65 } ctx;
66 
67 static struct counter base_hits[MAX_BUCKETS];
68 
69 static __always_inline void inc_counter(struct counter *counters)
70 {
71 	static __thread int tid = 0;
72 	unsigned slot;
73 
74 	if (unlikely(tid == 0))
75 		tid = syscall(SYS_gettid);
76 
77 	/* multiplicative hashing, it's fast */
78 	slot = 2654435769U * tid;
79 	slot >>= 24;
80 
81 	atomic_inc(&base_hits[slot].value); /* use highest byte as an index */
82 }
83 
84 static long sum_and_reset_counters(struct counter *counters)
85 {
86 	int i;
87 	long sum = 0;
88 
89 	for (i = 0; i < MAX_BUCKETS; i++)
90 		sum += atomic_swap(&counters[i].value, 0);
91 	return sum;
92 }
93 
94 static void trigger_validate(void)
95 {
96 	if (env.consumer_cnt != 0) {
97 		fprintf(stderr, "benchmark doesn't support consumer!\n");
98 		exit(1);
99 	}
100 }
101 
102 static void *trigger_producer(void *input)
103 {
104 	if (ctx.usermode_counters) {
105 		while (true) {
106 			(void)syscall(__NR_getpgid);
107 			inc_counter(base_hits);
108 		}
109 	} else {
110 		while (true)
111 			(void)syscall(__NR_getpgid);
112 	}
113 	return NULL;
114 }
115 
116 static void *trigger_producer_batch(void *input)
117 {
118 	int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);
119 
120 	while (true)
121 		bpf_prog_test_run_opts(fd, NULL);
122 
123 	return NULL;
124 }
125 
126 static void trigger_measure(struct bench_res *res)
127 {
128 	if (ctx.usermode_counters)
129 		res->hits = sum_and_reset_counters(base_hits);
130 	else
131 		res->hits = sum_and_reset_counters(ctx.skel->bss->hits);
132 }
133 
134 static void setup_ctx(void)
135 {
136 	setup_libbpf();
137 
138 	ctx.skel = trigger_bench__open();
139 	if (!ctx.skel) {
140 		fprintf(stderr, "failed to open skeleton\n");
141 		exit(1);
142 	}
143 
144 	/* default "driver" BPF program */
145 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
146 
147 	ctx.skel->rodata->batch_iters = args.batch_iters;
148 }
149 
150 static void load_ctx(void)
151 {
152 	int err;
153 
154 	err = trigger_bench__load(ctx.skel);
155 	if (err) {
156 		fprintf(stderr, "failed to open skeleton\n");
157 		exit(1);
158 	}
159 }
160 
161 static void attach_bpf(struct bpf_program *prog)
162 {
163 	struct bpf_link *link;
164 
165 	link = bpf_program__attach(prog);
166 	if (!link) {
167 		fprintf(stderr, "failed to attach program!\n");
168 		exit(1);
169 	}
170 }
171 
172 static void trigger_syscall_count_setup(void)
173 {
174 	ctx.usermode_counters = true;
175 }
176 
177 /* Batched, staying mostly in-kernel triggering setups */
178 static void trigger_kernel_count_setup(void)
179 {
180 	setup_ctx();
181 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
182 	bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
183 	load_ctx();
184 	/* override driver program */
185 	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
186 }
187 
188 static void trigger_kprobe_setup(void)
189 {
190 	setup_ctx();
191 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true);
192 	load_ctx();
193 	attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
194 }
195 
196 static void trigger_kretprobe_setup(void)
197 {
198 	setup_ctx();
199 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true);
200 	load_ctx();
201 	attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
202 }
203 
204 static void trigger_kprobe_multi_setup(void)
205 {
206 	setup_ctx();
207 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true);
208 	load_ctx();
209 	attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
210 }
211 
212 static void trigger_kretprobe_multi_setup(void)
213 {
214 	setup_ctx();
215 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true);
216 	load_ctx();
217 	attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
218 }
219 
220 static void trigger_fentry_setup(void)
221 {
222 	setup_ctx();
223 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true);
224 	load_ctx();
225 	attach_bpf(ctx.skel->progs.bench_trigger_fentry);
226 }
227 
228 static void trigger_fexit_setup(void)
229 {
230 	setup_ctx();
231 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true);
232 	load_ctx();
233 	attach_bpf(ctx.skel->progs.bench_trigger_fexit);
234 }
235 
236 /* make sure call is not inlined and not avoided by compiler, so __weak and
237  * inline asm volatile in the body of the function
238  *
239  * There is a performance difference between uprobing at nop location vs other
240  * instructions. So use two different targets, one of which starts with nop
241  * and another doesn't.
242  *
243  * GCC doesn't generate stack setup preample for these functions due to them
244  * having no input arguments and doing nothing in the body.
245  */
246 __nocf_check __weak void uprobe_target_nop(void)
247 {
248 	asm volatile ("nop");
249 }
250 
251 __weak void opaque_noop_func(void)
252 {
253 }
254 
255 __nocf_check __weak int uprobe_target_push(void)
256 {
257 	/* overhead of function call is negligible compared to uprobe
258 	 * triggering, so this shouldn't affect benchmark results much
259 	 */
260 	opaque_noop_func();
261 	return 1;
262 }
263 
264 __nocf_check __weak void uprobe_target_ret(void)
265 {
266 	asm volatile ("");
267 }
268 
269 static void *uprobe_producer_count(void *input)
270 {
271 	while (true) {
272 		uprobe_target_nop();
273 		inc_counter(base_hits);
274 	}
275 	return NULL;
276 }
277 
278 static void *uprobe_producer_nop(void *input)
279 {
280 	while (true)
281 		uprobe_target_nop();
282 	return NULL;
283 }
284 
285 static void *uprobe_producer_push(void *input)
286 {
287 	while (true)
288 		uprobe_target_push();
289 	return NULL;
290 }
291 
292 static void *uprobe_producer_ret(void *input)
293 {
294 	while (true)
295 		uprobe_target_ret();
296 	return NULL;
297 }
298 
299 static void usetup(bool use_retprobe, void *target_addr)
300 {
301 	size_t uprobe_offset;
302 	struct bpf_link *link;
303 	int err;
304 
305 	setup_libbpf();
306 
307 	ctx.skel = trigger_bench__open();
308 	if (!ctx.skel) {
309 		fprintf(stderr, "failed to open skeleton\n");
310 		exit(1);
311 	}
312 
313 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
314 
315 	err = trigger_bench__load(ctx.skel);
316 	if (err) {
317 		fprintf(stderr, "failed to load skeleton\n");
318 		exit(1);
319 	}
320 
321 	uprobe_offset = get_uprobe_offset(target_addr);
322 	link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
323 					  use_retprobe,
324 					  -1 /* all PIDs */,
325 					  "/proc/self/exe",
326 					  uprobe_offset);
327 	if (!link) {
328 		fprintf(stderr, "failed to attach uprobe!\n");
329 		exit(1);
330 	}
331 	ctx.skel->links.bench_trigger_uprobe = link;
332 }
333 
334 static void usermode_count_setup(void)
335 {
336 	ctx.usermode_counters = true;
337 }
338 
339 static void uprobe_nop_setup(void)
340 {
341 	usetup(false, &uprobe_target_nop);
342 }
343 
344 static void uretprobe_nop_setup(void)
345 {
346 	usetup(true, &uprobe_target_nop);
347 }
348 
349 static void uprobe_push_setup(void)
350 {
351 	usetup(false, &uprobe_target_push);
352 }
353 
354 static void uretprobe_push_setup(void)
355 {
356 	usetup(true, &uprobe_target_push);
357 }
358 
359 static void uprobe_ret_setup(void)
360 {
361 	usetup(false, &uprobe_target_ret);
362 }
363 
364 static void uretprobe_ret_setup(void)
365 {
366 	usetup(true, &uprobe_target_ret);
367 }
368 
369 const struct bench bench_trig_syscall_count = {
370 	.name = "trig-syscall-count",
371 	.validate = trigger_validate,
372 	.setup = trigger_syscall_count_setup,
373 	.producer_thread = trigger_producer,
374 	.measure = trigger_measure,
375 	.report_progress = hits_drops_report_progress,
376 	.report_final = hits_drops_report_final,
377 };
378 
379 /* batched (staying mostly in kernel) kprobe/fentry benchmarks */
380 #define BENCH_TRIG_KERNEL(KIND, NAME)					\
381 const struct bench bench_trig_##KIND = {				\
382 	.name = "trig-" NAME,						\
383 	.setup = trigger_##KIND##_setup,				\
384 	.producer_thread = trigger_producer_batch,			\
385 	.measure = trigger_measure,					\
386 	.report_progress = hits_drops_report_progress,			\
387 	.report_final = hits_drops_report_final,			\
388 	.argp = &bench_trigger_batch_argp,				\
389 }
390 
391 BENCH_TRIG_KERNEL(kernel_count, "kernel-count");
392 BENCH_TRIG_KERNEL(kprobe, "kprobe");
393 BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
394 BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
395 BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
396 BENCH_TRIG_KERNEL(fentry, "fentry");
397 BENCH_TRIG_KERNEL(fexit, "fexit");
398 
399 /* uprobe benchmarks */
400 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME)			\
401 const struct bench bench_trig_##KIND = {				\
402 	.name = "trig-" NAME,						\
403 	.validate = trigger_validate,					\
404 	.setup = KIND##_setup,						\
405 	.producer_thread = uprobe_producer_##PRODUCER,			\
406 	.measure = trigger_measure,					\
407 	.report_progress = hits_drops_report_progress,			\
408 	.report_final = hits_drops_report_final,			\
409 }
410 
411 BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count");
412 BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop");
413 BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push");
414 BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
415 BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
416 BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
417 BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
418