xref: /linux/tools/testing/selftests/bpf/benchs/bench_trigger.c (revision 3fd6c59042dbba50391e30862beac979491145fe)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <unistd.h>
6 #include <stdint.h>
7 #include "bpf_util.h"
8 #include "bench.h"
9 #include "trigger_bench.skel.h"
10 #include "trace_helpers.h"
11 
12 #define MAX_TRIG_BATCH_ITERS 1000
13 
14 static struct {
15 	__u32 batch_iters;
16 } args = {
17 	.batch_iters = 100,
18 };
19 
20 enum {
21 	ARG_TRIG_BATCH_ITERS = 7000,
22 };
23 
24 static const struct argp_option opts[] = {
25 	{ "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,
26 		"Number of in-kernel iterations per one driver test run"},
27 	{},
28 };
29 
parse_arg(int key,char * arg,struct argp_state * state)30 static error_t parse_arg(int key, char *arg, struct argp_state *state)
31 {
32 	long ret;
33 
34 	switch (key) {
35 	case ARG_TRIG_BATCH_ITERS:
36 		ret = strtol(arg, NULL, 10);
37 		if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) {
38 			fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n",
39 				1, MAX_TRIG_BATCH_ITERS);
40 			argp_usage(state);
41 		}
42 		args.batch_iters = ret;
43 		break;
44 	default:
45 		return ARGP_ERR_UNKNOWN;
46 	}
47 
48 	return 0;
49 }
50 
51 const struct argp bench_trigger_batch_argp = {
52 	.options = opts,
53 	.parser = parse_arg,
54 };
55 
56 /* adjust slot shift in inc_hits() if changing */
57 #define MAX_BUCKETS 256
58 
59 #pragma GCC diagnostic ignored "-Wattributes"
60 
61 /* BPF triggering benchmarks */
62 static struct trigger_ctx {
63 	struct trigger_bench *skel;
64 	bool usermode_counters;
65 	int driver_prog_fd;
66 } ctx;
67 
68 static struct counter base_hits[MAX_BUCKETS];
69 
inc_counter(struct counter * counters)70 static __always_inline void inc_counter(struct counter *counters)
71 {
72 	static __thread int tid = 0;
73 	unsigned slot;
74 
75 	if (unlikely(tid == 0))
76 		tid = sys_gettid();
77 
78 	/* multiplicative hashing, it's fast */
79 	slot = 2654435769U * tid;
80 	slot >>= 24;
81 
82 	atomic_inc(&base_hits[slot].value); /* use highest byte as an index */
83 }
84 
sum_and_reset_counters(struct counter * counters)85 static long sum_and_reset_counters(struct counter *counters)
86 {
87 	int i;
88 	long sum = 0;
89 
90 	for (i = 0; i < MAX_BUCKETS; i++)
91 		sum += atomic_swap(&counters[i].value, 0);
92 	return sum;
93 }
94 
trigger_validate(void)95 static void trigger_validate(void)
96 {
97 	if (env.consumer_cnt != 0) {
98 		fprintf(stderr, "benchmark doesn't support consumer!\n");
99 		exit(1);
100 	}
101 }
102 
trigger_producer(void * input)103 static void *trigger_producer(void *input)
104 {
105 	if (ctx.usermode_counters) {
106 		while (true) {
107 			(void)syscall(__NR_getpgid);
108 			inc_counter(base_hits);
109 		}
110 	} else {
111 		while (true)
112 			(void)syscall(__NR_getpgid);
113 	}
114 	return NULL;
115 }
116 
trigger_producer_batch(void * input)117 static void *trigger_producer_batch(void *input)
118 {
119 	int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);
120 
121 	while (true)
122 		bpf_prog_test_run_opts(fd, NULL);
123 
124 	return NULL;
125 }
126 
trigger_measure(struct bench_res * res)127 static void trigger_measure(struct bench_res *res)
128 {
129 	if (ctx.usermode_counters)
130 		res->hits = sum_and_reset_counters(base_hits);
131 	else
132 		res->hits = sum_and_reset_counters(ctx.skel->bss->hits);
133 }
134 
setup_ctx(void)135 static void setup_ctx(void)
136 {
137 	setup_libbpf();
138 
139 	ctx.skel = trigger_bench__open();
140 	if (!ctx.skel) {
141 		fprintf(stderr, "failed to open skeleton\n");
142 		exit(1);
143 	}
144 
145 	/* default "driver" BPF program */
146 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
147 
148 	ctx.skel->rodata->batch_iters = args.batch_iters;
149 }
150 
load_ctx(void)151 static void load_ctx(void)
152 {
153 	int err;
154 
155 	err = trigger_bench__load(ctx.skel);
156 	if (err) {
157 		fprintf(stderr, "failed to open skeleton\n");
158 		exit(1);
159 	}
160 }
161 
attach_bpf(struct bpf_program * prog)162 static void attach_bpf(struct bpf_program *prog)
163 {
164 	struct bpf_link *link;
165 
166 	link = bpf_program__attach(prog);
167 	if (!link) {
168 		fprintf(stderr, "failed to attach program!\n");
169 		exit(1);
170 	}
171 }
172 
trigger_syscall_count_setup(void)173 static void trigger_syscall_count_setup(void)
174 {
175 	ctx.usermode_counters = true;
176 }
177 
178 /* Batched, staying mostly in-kernel triggering setups */
trigger_kernel_count_setup(void)179 static void trigger_kernel_count_setup(void)
180 {
181 	setup_ctx();
182 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
183 	bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
184 	load_ctx();
185 	/* override driver program */
186 	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
187 }
188 
trigger_kprobe_setup(void)189 static void trigger_kprobe_setup(void)
190 {
191 	setup_ctx();
192 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true);
193 	load_ctx();
194 	attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
195 }
196 
trigger_kretprobe_setup(void)197 static void trigger_kretprobe_setup(void)
198 {
199 	setup_ctx();
200 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true);
201 	load_ctx();
202 	attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
203 }
204 
trigger_kprobe_multi_setup(void)205 static void trigger_kprobe_multi_setup(void)
206 {
207 	setup_ctx();
208 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true);
209 	load_ctx();
210 	attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
211 }
212 
trigger_kretprobe_multi_setup(void)213 static void trigger_kretprobe_multi_setup(void)
214 {
215 	setup_ctx();
216 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true);
217 	load_ctx();
218 	attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
219 }
220 
trigger_fentry_setup(void)221 static void trigger_fentry_setup(void)
222 {
223 	setup_ctx();
224 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true);
225 	load_ctx();
226 	attach_bpf(ctx.skel->progs.bench_trigger_fentry);
227 }
228 
trigger_fexit_setup(void)229 static void trigger_fexit_setup(void)
230 {
231 	setup_ctx();
232 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true);
233 	load_ctx();
234 	attach_bpf(ctx.skel->progs.bench_trigger_fexit);
235 }
236 
trigger_fmodret_setup(void)237 static void trigger_fmodret_setup(void)
238 {
239 	setup_ctx();
240 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
241 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
242 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true);
243 	load_ctx();
244 	/* override driver program */
245 	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
246 	attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
247 }
248 
trigger_tp_setup(void)249 static void trigger_tp_setup(void)
250 {
251 	setup_ctx();
252 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
253 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
254 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true);
255 	load_ctx();
256 	/* override driver program */
257 	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
258 	attach_bpf(ctx.skel->progs.bench_trigger_tp);
259 }
260 
trigger_rawtp_setup(void)261 static void trigger_rawtp_setup(void)
262 {
263 	setup_ctx();
264 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
265 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
266 	bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true);
267 	load_ctx();
268 	/* override driver program */
269 	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
270 	attach_bpf(ctx.skel->progs.bench_trigger_rawtp);
271 }
272 
273 /* make sure call is not inlined and not avoided by compiler, so __weak and
274  * inline asm volatile in the body of the function
275  *
276  * There is a performance difference between uprobing at nop location vs other
277  * instructions. So use two different targets, one of which starts with nop
278  * and another doesn't.
279  *
280  * GCC doesn't generate stack setup preamble for these functions due to them
281  * having no input arguments and doing nothing in the body.
282  */
uprobe_target_nop(void)283 __nocf_check __weak void uprobe_target_nop(void)
284 {
285 	asm volatile ("nop");
286 }
287 
opaque_noop_func(void)288 __weak void opaque_noop_func(void)
289 {
290 }
291 
uprobe_target_push(void)292 __nocf_check __weak int uprobe_target_push(void)
293 {
294 	/* overhead of function call is negligible compared to uprobe
295 	 * triggering, so this shouldn't affect benchmark results much
296 	 */
297 	opaque_noop_func();
298 	return 1;
299 }
300 
uprobe_target_ret(void)301 __nocf_check __weak void uprobe_target_ret(void)
302 {
303 	asm volatile ("");
304 }
305 
uprobe_producer_count(void * input)306 static void *uprobe_producer_count(void *input)
307 {
308 	while (true) {
309 		uprobe_target_nop();
310 		inc_counter(base_hits);
311 	}
312 	return NULL;
313 }
314 
uprobe_producer_nop(void * input)315 static void *uprobe_producer_nop(void *input)
316 {
317 	while (true)
318 		uprobe_target_nop();
319 	return NULL;
320 }
321 
uprobe_producer_push(void * input)322 static void *uprobe_producer_push(void *input)
323 {
324 	while (true)
325 		uprobe_target_push();
326 	return NULL;
327 }
328 
uprobe_producer_ret(void * input)329 static void *uprobe_producer_ret(void *input)
330 {
331 	while (true)
332 		uprobe_target_ret();
333 	return NULL;
334 }
335 
usetup(bool use_retprobe,bool use_multi,void * target_addr)336 static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
337 {
338 	size_t uprobe_offset;
339 	struct bpf_link *link;
340 	int err;
341 
342 	setup_libbpf();
343 
344 	ctx.skel = trigger_bench__open();
345 	if (!ctx.skel) {
346 		fprintf(stderr, "failed to open skeleton\n");
347 		exit(1);
348 	}
349 
350 	if (use_multi)
351 		bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true);
352 	else
353 		bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
354 
355 	err = trigger_bench__load(ctx.skel);
356 	if (err) {
357 		fprintf(stderr, "failed to load skeleton\n");
358 		exit(1);
359 	}
360 
361 	uprobe_offset = get_uprobe_offset(target_addr);
362 	if (use_multi) {
363 		LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
364 			.retprobe = use_retprobe,
365 			.cnt = 1,
366 			.offsets = &uprobe_offset,
367 		);
368 		link = bpf_program__attach_uprobe_multi(
369 			ctx.skel->progs.bench_trigger_uprobe_multi,
370 			-1 /* all PIDs */, "/proc/self/exe", NULL, &opts);
371 		ctx.skel->links.bench_trigger_uprobe_multi = link;
372 	} else {
373 		link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
374 						  use_retprobe,
375 						  -1 /* all PIDs */,
376 						  "/proc/self/exe",
377 						  uprobe_offset);
378 		ctx.skel->links.bench_trigger_uprobe = link;
379 	}
380 	if (!link) {
381 		fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe");
382 		exit(1);
383 	}
384 }
385 
usermode_count_setup(void)386 static void usermode_count_setup(void)
387 {
388 	ctx.usermode_counters = true;
389 }
390 
uprobe_nop_setup(void)391 static void uprobe_nop_setup(void)
392 {
393 	usetup(false, false /* !use_multi */, &uprobe_target_nop);
394 }
395 
uretprobe_nop_setup(void)396 static void uretprobe_nop_setup(void)
397 {
398 	usetup(true, false /* !use_multi */, &uprobe_target_nop);
399 }
400 
uprobe_push_setup(void)401 static void uprobe_push_setup(void)
402 {
403 	usetup(false, false /* !use_multi */, &uprobe_target_push);
404 }
405 
uretprobe_push_setup(void)406 static void uretprobe_push_setup(void)
407 {
408 	usetup(true, false /* !use_multi */, &uprobe_target_push);
409 }
410 
uprobe_ret_setup(void)411 static void uprobe_ret_setup(void)
412 {
413 	usetup(false, false /* !use_multi */, &uprobe_target_ret);
414 }
415 
uretprobe_ret_setup(void)416 static void uretprobe_ret_setup(void)
417 {
418 	usetup(true, false /* !use_multi */, &uprobe_target_ret);
419 }
420 
uprobe_multi_nop_setup(void)421 static void uprobe_multi_nop_setup(void)
422 {
423 	usetup(false, true /* use_multi */, &uprobe_target_nop);
424 }
425 
uretprobe_multi_nop_setup(void)426 static void uretprobe_multi_nop_setup(void)
427 {
428 	usetup(true, true /* use_multi */, &uprobe_target_nop);
429 }
430 
uprobe_multi_push_setup(void)431 static void uprobe_multi_push_setup(void)
432 {
433 	usetup(false, true /* use_multi */, &uprobe_target_push);
434 }
435 
uretprobe_multi_push_setup(void)436 static void uretprobe_multi_push_setup(void)
437 {
438 	usetup(true, true /* use_multi */, &uprobe_target_push);
439 }
440 
uprobe_multi_ret_setup(void)441 static void uprobe_multi_ret_setup(void)
442 {
443 	usetup(false, true /* use_multi */, &uprobe_target_ret);
444 }
445 
uretprobe_multi_ret_setup(void)446 static void uretprobe_multi_ret_setup(void)
447 {
448 	usetup(true, true /* use_multi */, &uprobe_target_ret);
449 }
450 
451 const struct bench bench_trig_syscall_count = {
452 	.name = "trig-syscall-count",
453 	.validate = trigger_validate,
454 	.setup = trigger_syscall_count_setup,
455 	.producer_thread = trigger_producer,
456 	.measure = trigger_measure,
457 	.report_progress = hits_drops_report_progress,
458 	.report_final = hits_drops_report_final,
459 };
460 
461 /* batched (staying mostly in kernel) kprobe/fentry benchmarks */
462 #define BENCH_TRIG_KERNEL(KIND, NAME)					\
463 const struct bench bench_trig_##KIND = {				\
464 	.name = "trig-" NAME,						\
465 	.setup = trigger_##KIND##_setup,				\
466 	.producer_thread = trigger_producer_batch,			\
467 	.measure = trigger_measure,					\
468 	.report_progress = hits_drops_report_progress,			\
469 	.report_final = hits_drops_report_final,			\
470 	.argp = &bench_trigger_batch_argp,				\
471 }
472 
473 BENCH_TRIG_KERNEL(kernel_count, "kernel-count");
474 BENCH_TRIG_KERNEL(kprobe, "kprobe");
475 BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
476 BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
477 BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
478 BENCH_TRIG_KERNEL(fentry, "fentry");
479 BENCH_TRIG_KERNEL(fexit, "fexit");
480 BENCH_TRIG_KERNEL(fmodret, "fmodret");
481 BENCH_TRIG_KERNEL(tp, "tp");
482 BENCH_TRIG_KERNEL(rawtp, "rawtp");
483 
484 /* uprobe benchmarks */
485 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME)			\
486 const struct bench bench_trig_##KIND = {				\
487 	.name = "trig-" NAME,						\
488 	.validate = trigger_validate,					\
489 	.setup = KIND##_setup,						\
490 	.producer_thread = uprobe_producer_##PRODUCER,			\
491 	.measure = trigger_measure,					\
492 	.report_progress = hits_drops_report_progress,			\
493 	.report_final = hits_drops_report_final,			\
494 }
495 
496 BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count");
497 BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop");
498 BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push");
499 BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
500 BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
501 BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
502 BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
503 BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop");
504 BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push");
505 BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
506 BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
507 BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
508 BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
509