xref: /linux/tools/testing/selftests/bpf/benchs/bench_trigger.c (revision 37a93dd5c49b5fda807fd204edf2547c3493319c)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <unistd.h>
6 #include <stdint.h>
7 #include "bpf_util.h"
8 #include "bench.h"
9 #include "trigger_bench.skel.h"
10 #include "trace_helpers.h"
11 
12 #define MAX_TRIG_BATCH_ITERS 1000
13 
14 static struct {
15 	__u32 batch_iters;
16 } args = {
17 	.batch_iters = 100,
18 };
19 
20 enum {
21 	ARG_TRIG_BATCH_ITERS = 7000,
22 };
23 
24 static const struct argp_option opts[] = {
25 	{ "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,
26 		"Number of in-kernel iterations per one driver test run"},
27 	{},
28 };
29 
30 static error_t parse_arg(int key, char *arg, struct argp_state *state)
31 {
32 	long ret;
33 
34 	switch (key) {
35 	case ARG_TRIG_BATCH_ITERS:
36 		ret = strtol(arg, NULL, 10);
37 		if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) {
38 			fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n",
39 				1, MAX_TRIG_BATCH_ITERS);
40 			argp_usage(state);
41 		}
42 		args.batch_iters = ret;
43 		break;
44 	default:
45 		return ARGP_ERR_UNKNOWN;
46 	}
47 
48 	return 0;
49 }
50 
51 const struct argp bench_trigger_batch_argp = {
52 	.options = opts,
53 	.parser = parse_arg,
54 };
55 
56 /* adjust slot shift in inc_hits() if changing */
57 #define MAX_BUCKETS 256
58 
59 #pragma GCC diagnostic ignored "-Wattributes"
60 
61 /* BPF triggering benchmarks */
62 static struct trigger_ctx {
63 	struct trigger_bench *skel;
64 	bool usermode_counters;
65 	int driver_prog_fd;
66 } ctx;
67 
68 static struct counter base_hits[MAX_BUCKETS];
69 
70 static __always_inline void inc_counter(struct counter *counters)
71 {
72 	static __thread int tid = 0;
73 	unsigned slot;
74 
75 	if (unlikely(tid == 0))
76 		tid = sys_gettid();
77 
78 	/* multiplicative hashing, it's fast */
79 	slot = 2654435769U * tid;
80 	slot >>= 24;
81 
82 	atomic_inc(&base_hits[slot].value); /* use highest byte as an index */
83 }
84 
85 static long sum_and_reset_counters(struct counter *counters)
86 {
87 	int i;
88 	long sum = 0;
89 
90 	for (i = 0; i < MAX_BUCKETS; i++)
91 		sum += atomic_swap(&counters[i].value, 0);
92 	return sum;
93 }
94 
95 static void trigger_validate(void)
96 {
97 	if (env.consumer_cnt != 0) {
98 		fprintf(stderr, "benchmark doesn't support consumer!\n");
99 		exit(1);
100 	}
101 }
102 
103 static void *trigger_producer(void *input)
104 {
105 	if (ctx.usermode_counters) {
106 		while (true) {
107 			(void)syscall(__NR_getpgid);
108 			inc_counter(base_hits);
109 		}
110 	} else {
111 		while (true)
112 			(void)syscall(__NR_getpgid);
113 	}
114 	return NULL;
115 }
116 
117 static void *trigger_producer_batch(void *input)
118 {
119 	int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);
120 
121 	while (true)
122 		bpf_prog_test_run_opts(fd, NULL);
123 
124 	return NULL;
125 }
126 
127 static void trigger_measure(struct bench_res *res)
128 {
129 	if (ctx.usermode_counters)
130 		res->hits = sum_and_reset_counters(base_hits);
131 	else
132 		res->hits = sum_and_reset_counters(ctx.skel->bss->hits);
133 }
134 
135 static void setup_ctx(void)
136 {
137 	setup_libbpf();
138 
139 	ctx.skel = trigger_bench__open();
140 	if (!ctx.skel) {
141 		fprintf(stderr, "failed to open skeleton\n");
142 		exit(1);
143 	}
144 
145 	/* default "driver" BPF program */
146 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
147 
148 	ctx.skel->rodata->batch_iters = args.batch_iters;
149 	ctx.skel->rodata->stacktrace = env.stacktrace;
150 }
151 
152 static void load_ctx(void)
153 {
154 	int err;
155 
156 	err = trigger_bench__load(ctx.skel);
157 	if (err) {
158 		fprintf(stderr, "failed to open skeleton\n");
159 		exit(1);
160 	}
161 }
162 
163 static void attach_bpf(struct bpf_program *prog)
164 {
165 	struct bpf_link *link;
166 
167 	link = bpf_program__attach(prog);
168 	if (!link) {
169 		fprintf(stderr, "failed to attach program!\n");
170 		exit(1);
171 	}
172 }
173 
174 static void trigger_syscall_count_setup(void)
175 {
176 	ctx.usermode_counters = true;
177 }
178 
179 /* Batched, staying mostly in-kernel triggering setups */
180 static void trigger_kernel_count_setup(void)
181 {
182 	setup_ctx();
183 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
184 	bpf_program__set_autoload(ctx.skel->progs.trigger_kernel_count, true);
185 	load_ctx();
186 	/* override driver program */
187 	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_kernel_count);
188 }
189 
190 static void trigger_kprobe_setup(void)
191 {
192 	setup_ctx();
193 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true);
194 	load_ctx();
195 	attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
196 }
197 
198 static void trigger_kretprobe_setup(void)
199 {
200 	setup_ctx();
201 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true);
202 	load_ctx();
203 	attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
204 }
205 
206 static void trigger_kprobe_multi_setup(void)
207 {
208 	setup_ctx();
209 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true);
210 	load_ctx();
211 	attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
212 }
213 
214 static void trigger_kretprobe_multi_setup(void)
215 {
216 	setup_ctx();
217 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true);
218 	load_ctx();
219 	attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
220 }
221 
222 static void trigger_fentry_setup(void)
223 {
224 	setup_ctx();
225 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true);
226 	load_ctx();
227 	attach_bpf(ctx.skel->progs.bench_trigger_fentry);
228 }
229 
230 static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
231 {
232 	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
233 	char **syms = NULL;
234 	size_t cnt = 0;
235 
236 	/* Some recursive functions will be skipped in
237 	 * bpf_get_ksyms -> skip_entry, as they can introduce sufficient
238 	 * overhead. However, it's difficut to skip all the recursive
239 	 * functions for a debug kernel.
240 	 *
241 	 * So, don't run the kprobe-multi-all and kretprobe-multi-all on
242 	 * a debug kernel.
243 	 */
244 	if (bpf_get_ksyms(&syms, &cnt, true)) {
245 		fprintf(stderr, "failed to get ksyms\n");
246 		exit(1);
247 	}
248 
249 	opts.syms = (const char **) syms;
250 	opts.cnt = cnt;
251 	opts.retprobe = kretprobe;
252 	/* attach empty to all the kernel functions except bpf_get_numa_node_id. */
253 	if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {
254 		fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n");
255 		exit(1);
256 	}
257 }
258 
259 static void trigger_kprobe_multi_all_setup(void)
260 {
261 	struct bpf_program *prog, *empty;
262 
263 	setup_ctx();
264 	empty = ctx.skel->progs.bench_kprobe_multi_empty;
265 	prog = ctx.skel->progs.bench_trigger_kprobe_multi;
266 	bpf_program__set_autoload(empty, true);
267 	bpf_program__set_autoload(prog, true);
268 	load_ctx();
269 
270 	attach_ksyms_all(empty, false);
271 	attach_bpf(prog);
272 }
273 
274 static void trigger_kretprobe_multi_all_setup(void)
275 {
276 	struct bpf_program *prog, *empty;
277 
278 	setup_ctx();
279 	empty = ctx.skel->progs.bench_kretprobe_multi_empty;
280 	prog = ctx.skel->progs.bench_trigger_kretprobe_multi;
281 	bpf_program__set_autoload(empty, true);
282 	bpf_program__set_autoload(prog, true);
283 	load_ctx();
284 
285 	attach_ksyms_all(empty, true);
286 	attach_bpf(prog);
287 }
288 
289 static void trigger_fexit_setup(void)
290 {
291 	setup_ctx();
292 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true);
293 	load_ctx();
294 	attach_bpf(ctx.skel->progs.bench_trigger_fexit);
295 }
296 
297 static void trigger_fmodret_setup(void)
298 {
299 	setup_ctx();
300 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
301 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
302 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true);
303 	load_ctx();
304 	/* override driver program */
305 	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
306 	attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
307 }
308 
309 static void trigger_tp_setup(void)
310 {
311 	setup_ctx();
312 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
313 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
314 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true);
315 	load_ctx();
316 	/* override driver program */
317 	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
318 	attach_bpf(ctx.skel->progs.bench_trigger_tp);
319 }
320 
321 static void trigger_rawtp_setup(void)
322 {
323 	setup_ctx();
324 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
325 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
326 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true);
327 	load_ctx();
328 	/* override driver program */
329 	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
330 	attach_bpf(ctx.skel->progs.bench_trigger_rawtp);
331 }
332 
333 /* make sure call is not inlined and not avoided by compiler, so __weak and
334  * inline asm volatile in the body of the function
335  *
336  * There is a performance difference between uprobing at nop location vs other
337  * instructions. So use two different targets, one of which starts with nop
338  * and another doesn't.
339  *
340  * GCC doesn't generate stack setup preamble for these functions due to them
341  * having no input arguments and doing nothing in the body.
342  */
343 __nocf_check __weak void uprobe_target_nop(void)
344 {
345 	asm volatile ("nop");
346 }
347 
348 __weak void opaque_noop_func(void)
349 {
350 }
351 
352 __nocf_check __weak int uprobe_target_push(void)
353 {
354 	/* overhead of function call is negligible compared to uprobe
355 	 * triggering, so this shouldn't affect benchmark results much
356 	 */
357 	opaque_noop_func();
358 	return 1;
359 }
360 
361 __nocf_check __weak void uprobe_target_ret(void)
362 {
363 	asm volatile ("");
364 }
365 
366 static void *uprobe_producer_count(void *input)
367 {
368 	while (true) {
369 		uprobe_target_nop();
370 		inc_counter(base_hits);
371 	}
372 	return NULL;
373 }
374 
375 static void *uprobe_producer_nop(void *input)
376 {
377 	while (true)
378 		uprobe_target_nop();
379 	return NULL;
380 }
381 
382 static void *uprobe_producer_push(void *input)
383 {
384 	while (true)
385 		uprobe_target_push();
386 	return NULL;
387 }
388 
389 static void *uprobe_producer_ret(void *input)
390 {
391 	while (true)
392 		uprobe_target_ret();
393 	return NULL;
394 }
395 
396 #ifdef __x86_64__
397 __nocf_check __weak void uprobe_target_nop5(void)
398 {
399 	asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00");
400 }
401 
402 static void *uprobe_producer_nop5(void *input)
403 {
404 	while (true)
405 		uprobe_target_nop5();
406 	return NULL;
407 }
408 #endif
409 
410 static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
411 {
412 	size_t uprobe_offset;
413 	struct bpf_link *link;
414 	int err;
415 
416 	setup_libbpf();
417 
418 	ctx.skel = trigger_bench__open();
419 	if (!ctx.skel) {
420 		fprintf(stderr, "failed to open skeleton\n");
421 		exit(1);
422 	}
423 
424 	if (use_multi)
425 		bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true);
426 	else
427 		bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
428 
429 	err = trigger_bench__load(ctx.skel);
430 	if (err) {
431 		fprintf(stderr, "failed to load skeleton\n");
432 		exit(1);
433 	}
434 
435 	uprobe_offset = get_uprobe_offset(target_addr);
436 	if (use_multi) {
437 		LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
438 			.retprobe = use_retprobe,
439 			.cnt = 1,
440 			.offsets = &uprobe_offset,
441 		);
442 		link = bpf_program__attach_uprobe_multi(
443 			ctx.skel->progs.bench_trigger_uprobe_multi,
444 			-1 /* all PIDs */, "/proc/self/exe", NULL, &opts);
445 		ctx.skel->links.bench_trigger_uprobe_multi = link;
446 	} else {
447 		link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
448 						  use_retprobe,
449 						  -1 /* all PIDs */,
450 						  "/proc/self/exe",
451 						  uprobe_offset);
452 		ctx.skel->links.bench_trigger_uprobe = link;
453 	}
454 	if (!link) {
455 		fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe");
456 		exit(1);
457 	}
458 }
459 
460 static void usermode_count_setup(void)
461 {
462 	ctx.usermode_counters = true;
463 }
464 
465 static void uprobe_nop_setup(void)
466 {
467 	usetup(false, false /* !use_multi */, &uprobe_target_nop);
468 }
469 
470 static void uretprobe_nop_setup(void)
471 {
472 	usetup(true, false /* !use_multi */, &uprobe_target_nop);
473 }
474 
475 static void uprobe_push_setup(void)
476 {
477 	usetup(false, false /* !use_multi */, &uprobe_target_push);
478 }
479 
480 static void uretprobe_push_setup(void)
481 {
482 	usetup(true, false /* !use_multi */, &uprobe_target_push);
483 }
484 
485 static void uprobe_ret_setup(void)
486 {
487 	usetup(false, false /* !use_multi */, &uprobe_target_ret);
488 }
489 
490 static void uretprobe_ret_setup(void)
491 {
492 	usetup(true, false /* !use_multi */, &uprobe_target_ret);
493 }
494 
495 static void uprobe_multi_nop_setup(void)
496 {
497 	usetup(false, true /* use_multi */, &uprobe_target_nop);
498 }
499 
500 static void uretprobe_multi_nop_setup(void)
501 {
502 	usetup(true, true /* use_multi */, &uprobe_target_nop);
503 }
504 
505 static void uprobe_multi_push_setup(void)
506 {
507 	usetup(false, true /* use_multi */, &uprobe_target_push);
508 }
509 
510 static void uretprobe_multi_push_setup(void)
511 {
512 	usetup(true, true /* use_multi */, &uprobe_target_push);
513 }
514 
515 static void uprobe_multi_ret_setup(void)
516 {
517 	usetup(false, true /* use_multi */, &uprobe_target_ret);
518 }
519 
520 static void uretprobe_multi_ret_setup(void)
521 {
522 	usetup(true, true /* use_multi */, &uprobe_target_ret);
523 }
524 
525 #ifdef __x86_64__
526 static void uprobe_nop5_setup(void)
527 {
528 	usetup(false, false /* !use_multi */, &uprobe_target_nop5);
529 }
530 
531 static void uretprobe_nop5_setup(void)
532 {
533 	usetup(true, false /* !use_multi */, &uprobe_target_nop5);
534 }
535 
536 static void uprobe_multi_nop5_setup(void)
537 {
538 	usetup(false, true /* use_multi */, &uprobe_target_nop5);
539 }
540 
541 static void uretprobe_multi_nop5_setup(void)
542 {
543 	usetup(true, true /* use_multi */, &uprobe_target_nop5);
544 }
545 #endif
546 
547 const struct bench bench_trig_syscall_count = {
548 	.name = "trig-syscall-count",
549 	.validate = trigger_validate,
550 	.setup = trigger_syscall_count_setup,
551 	.producer_thread = trigger_producer,
552 	.measure = trigger_measure,
553 	.report_progress = hits_drops_report_progress,
554 	.report_final = hits_drops_report_final,
555 };
556 
557 /* batched (staying mostly in kernel) kprobe/fentry benchmarks */
558 #define BENCH_TRIG_KERNEL(KIND, NAME)					\
559 const struct bench bench_trig_##KIND = {				\
560 	.name = "trig-" NAME,						\
561 	.setup = trigger_##KIND##_setup,				\
562 	.producer_thread = trigger_producer_batch,			\
563 	.measure = trigger_measure,					\
564 	.report_progress = hits_drops_report_progress,			\
565 	.report_final = hits_drops_report_final,			\
566 	.argp = &bench_trigger_batch_argp,				\
567 }
568 
569 BENCH_TRIG_KERNEL(kernel_count, "kernel-count");
570 BENCH_TRIG_KERNEL(kprobe, "kprobe");
571 BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
572 BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
573 BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
574 BENCH_TRIG_KERNEL(fentry, "fentry");
575 BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all");
576 BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all");
577 BENCH_TRIG_KERNEL(fexit, "fexit");
578 BENCH_TRIG_KERNEL(fmodret, "fmodret");
579 BENCH_TRIG_KERNEL(tp, "tp");
580 BENCH_TRIG_KERNEL(rawtp, "rawtp");
581 
582 /* uprobe benchmarks */
583 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME)			\
584 const struct bench bench_trig_##KIND = {				\
585 	.name = "trig-" NAME,						\
586 	.validate = trigger_validate,					\
587 	.setup = KIND##_setup,						\
588 	.producer_thread = uprobe_producer_##PRODUCER,			\
589 	.measure = trigger_measure,					\
590 	.report_progress = hits_drops_report_progress,			\
591 	.report_final = hits_drops_report_final,			\
592 }
593 
594 BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count");
595 BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop");
596 BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push");
597 BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
598 BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
599 BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
600 BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
601 BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop");
602 BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push");
603 BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
604 BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
605 BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
606 BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
607 #ifdef __x86_64__
608 BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");
609 BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");
610 BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");
611 BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");
612 #endif
613