1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #define _GNU_SOURCE
4 #include <argp.h>
5 #include <unistd.h>
6 #include <stdint.h>
7 #include "bench.h"
8 #include "trigger_bench.skel.h"
9 #include "trace_helpers.h"
10
11 #define MAX_TRIG_BATCH_ITERS 1000
12
13 static struct {
14 __u32 batch_iters;
15 } args = {
16 .batch_iters = 100,
17 };
18
19 enum {
20 ARG_TRIG_BATCH_ITERS = 7000,
21 };
22
23 static const struct argp_option opts[] = {
24 { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,
25 "Number of in-kernel iterations per one driver test run"},
26 {},
27 };
28
parse_arg(int key,char * arg,struct argp_state * state)29 static error_t parse_arg(int key, char *arg, struct argp_state *state)
30 {
31 long ret;
32
33 switch (key) {
34 case ARG_TRIG_BATCH_ITERS:
35 ret = strtol(arg, NULL, 10);
36 if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) {
37 fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n",
38 1, MAX_TRIG_BATCH_ITERS);
39 argp_usage(state);
40 }
41 args.batch_iters = ret;
42 break;
43 default:
44 return ARGP_ERR_UNKNOWN;
45 }
46
47 return 0;
48 }
49
50 const struct argp bench_trigger_batch_argp = {
51 .options = opts,
52 .parser = parse_arg,
53 };
54
55 /* adjust slot shift in inc_hits() if changing */
56 #define MAX_BUCKETS 256
57
58 #pragma GCC diagnostic ignored "-Wattributes"
59
60 /* BPF triggering benchmarks */
61 static struct trigger_ctx {
62 struct trigger_bench *skel;
63 bool usermode_counters;
64 int driver_prog_fd;
65 } ctx;
66
67 static struct counter base_hits[MAX_BUCKETS];
68
inc_counter(struct counter * counters)69 static __always_inline void inc_counter(struct counter *counters)
70 {
71 static __thread int tid = 0;
72 unsigned slot;
73
74 if (unlikely(tid == 0))
75 tid = syscall(SYS_gettid);
76
77 /* multiplicative hashing, it's fast */
78 slot = 2654435769U * tid;
79 slot >>= 24;
80
81 atomic_inc(&base_hits[slot].value); /* use highest byte as an index */
82 }
83
sum_and_reset_counters(struct counter * counters)84 static long sum_and_reset_counters(struct counter *counters)
85 {
86 int i;
87 long sum = 0;
88
89 for (i = 0; i < MAX_BUCKETS; i++)
90 sum += atomic_swap(&counters[i].value, 0);
91 return sum;
92 }
93
trigger_validate(void)94 static void trigger_validate(void)
95 {
96 if (env.consumer_cnt != 0) {
97 fprintf(stderr, "benchmark doesn't support consumer!\n");
98 exit(1);
99 }
100 }
101
trigger_producer(void * input)102 static void *trigger_producer(void *input)
103 {
104 if (ctx.usermode_counters) {
105 while (true) {
106 (void)syscall(__NR_getpgid);
107 inc_counter(base_hits);
108 }
109 } else {
110 while (true)
111 (void)syscall(__NR_getpgid);
112 }
113 return NULL;
114 }
115
trigger_producer_batch(void * input)116 static void *trigger_producer_batch(void *input)
117 {
118 int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);
119
120 while (true)
121 bpf_prog_test_run_opts(fd, NULL);
122
123 return NULL;
124 }
125
trigger_measure(struct bench_res * res)126 static void trigger_measure(struct bench_res *res)
127 {
128 if (ctx.usermode_counters)
129 res->hits = sum_and_reset_counters(base_hits);
130 else
131 res->hits = sum_and_reset_counters(ctx.skel->bss->hits);
132 }
133
setup_ctx(void)134 static void setup_ctx(void)
135 {
136 setup_libbpf();
137
138 ctx.skel = trigger_bench__open();
139 if (!ctx.skel) {
140 fprintf(stderr, "failed to open skeleton\n");
141 exit(1);
142 }
143
144 /* default "driver" BPF program */
145 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
146
147 ctx.skel->rodata->batch_iters = args.batch_iters;
148 }
149
load_ctx(void)150 static void load_ctx(void)
151 {
152 int err;
153
154 err = trigger_bench__load(ctx.skel);
155 if (err) {
156 fprintf(stderr, "failed to open skeleton\n");
157 exit(1);
158 }
159 }
160
attach_bpf(struct bpf_program * prog)161 static void attach_bpf(struct bpf_program *prog)
162 {
163 struct bpf_link *link;
164
165 link = bpf_program__attach(prog);
166 if (!link) {
167 fprintf(stderr, "failed to attach program!\n");
168 exit(1);
169 }
170 }
171
trigger_syscall_count_setup(void)172 static void trigger_syscall_count_setup(void)
173 {
174 ctx.usermode_counters = true;
175 }
176
177 /* Batched, staying mostly in-kernel triggering setups */
trigger_kernel_count_setup(void)178 static void trigger_kernel_count_setup(void)
179 {
180 setup_ctx();
181 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
182 bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
183 load_ctx();
184 /* override driver program */
185 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
186 }
187
trigger_kprobe_setup(void)188 static void trigger_kprobe_setup(void)
189 {
190 setup_ctx();
191 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true);
192 load_ctx();
193 attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
194 }
195
trigger_kretprobe_setup(void)196 static void trigger_kretprobe_setup(void)
197 {
198 setup_ctx();
199 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true);
200 load_ctx();
201 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
202 }
203
trigger_kprobe_multi_setup(void)204 static void trigger_kprobe_multi_setup(void)
205 {
206 setup_ctx();
207 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true);
208 load_ctx();
209 attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
210 }
211
trigger_kretprobe_multi_setup(void)212 static void trigger_kretprobe_multi_setup(void)
213 {
214 setup_ctx();
215 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true);
216 load_ctx();
217 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
218 }
219
trigger_fentry_setup(void)220 static void trigger_fentry_setup(void)
221 {
222 setup_ctx();
223 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true);
224 load_ctx();
225 attach_bpf(ctx.skel->progs.bench_trigger_fentry);
226 }
227
trigger_fexit_setup(void)228 static void trigger_fexit_setup(void)
229 {
230 setup_ctx();
231 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true);
232 load_ctx();
233 attach_bpf(ctx.skel->progs.bench_trigger_fexit);
234 }
235
trigger_fmodret_setup(void)236 static void trigger_fmodret_setup(void)
237 {
238 setup_ctx();
239 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
240 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
241 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true);
242 load_ctx();
243 /* override driver program */
244 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
245 attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
246 }
247
trigger_tp_setup(void)248 static void trigger_tp_setup(void)
249 {
250 setup_ctx();
251 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
252 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
253 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true);
254 load_ctx();
255 /* override driver program */
256 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
257 attach_bpf(ctx.skel->progs.bench_trigger_tp);
258 }
259
trigger_rawtp_setup(void)260 static void trigger_rawtp_setup(void)
261 {
262 setup_ctx();
263 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
264 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
265 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true);
266 load_ctx();
267 /* override driver program */
268 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
269 attach_bpf(ctx.skel->progs.bench_trigger_rawtp);
270 }
271
272 /* make sure call is not inlined and not avoided by compiler, so __weak and
273 * inline asm volatile in the body of the function
274 *
275 * There is a performance difference between uprobing at nop location vs other
276 * instructions. So use two different targets, one of which starts with nop
277 * and another doesn't.
278 *
279 * GCC doesn't generate stack setup preamble for these functions due to them
280 * having no input arguments and doing nothing in the body.
281 */
uprobe_target_nop(void)282 __nocf_check __weak void uprobe_target_nop(void)
283 {
284 asm volatile ("nop");
285 }
286
opaque_noop_func(void)287 __weak void opaque_noop_func(void)
288 {
289 }
290
uprobe_target_push(void)291 __nocf_check __weak int uprobe_target_push(void)
292 {
293 /* overhead of function call is negligible compared to uprobe
294 * triggering, so this shouldn't affect benchmark results much
295 */
296 opaque_noop_func();
297 return 1;
298 }
299
uprobe_target_ret(void)300 __nocf_check __weak void uprobe_target_ret(void)
301 {
302 asm volatile ("");
303 }
304
uprobe_producer_count(void * input)305 static void *uprobe_producer_count(void *input)
306 {
307 while (true) {
308 uprobe_target_nop();
309 inc_counter(base_hits);
310 }
311 return NULL;
312 }
313
uprobe_producer_nop(void * input)314 static void *uprobe_producer_nop(void *input)
315 {
316 while (true)
317 uprobe_target_nop();
318 return NULL;
319 }
320
uprobe_producer_push(void * input)321 static void *uprobe_producer_push(void *input)
322 {
323 while (true)
324 uprobe_target_push();
325 return NULL;
326 }
327
uprobe_producer_ret(void * input)328 static void *uprobe_producer_ret(void *input)
329 {
330 while (true)
331 uprobe_target_ret();
332 return NULL;
333 }
334
usetup(bool use_retprobe,bool use_multi,void * target_addr)335 static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
336 {
337 size_t uprobe_offset;
338 struct bpf_link *link;
339 int err;
340
341 setup_libbpf();
342
343 ctx.skel = trigger_bench__open();
344 if (!ctx.skel) {
345 fprintf(stderr, "failed to open skeleton\n");
346 exit(1);
347 }
348
349 if (use_multi)
350 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true);
351 else
352 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
353
354 err = trigger_bench__load(ctx.skel);
355 if (err) {
356 fprintf(stderr, "failed to load skeleton\n");
357 exit(1);
358 }
359
360 uprobe_offset = get_uprobe_offset(target_addr);
361 if (use_multi) {
362 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
363 .retprobe = use_retprobe,
364 .cnt = 1,
365 .offsets = &uprobe_offset,
366 );
367 link = bpf_program__attach_uprobe_multi(
368 ctx.skel->progs.bench_trigger_uprobe_multi,
369 -1 /* all PIDs */, "/proc/self/exe", NULL, &opts);
370 ctx.skel->links.bench_trigger_uprobe_multi = link;
371 } else {
372 link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
373 use_retprobe,
374 -1 /* all PIDs */,
375 "/proc/self/exe",
376 uprobe_offset);
377 ctx.skel->links.bench_trigger_uprobe = link;
378 }
379 if (!link) {
380 fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe");
381 exit(1);
382 }
383 }
384
usermode_count_setup(void)385 static void usermode_count_setup(void)
386 {
387 ctx.usermode_counters = true;
388 }
389
uprobe_nop_setup(void)390 static void uprobe_nop_setup(void)
391 {
392 usetup(false, false /* !use_multi */, &uprobe_target_nop);
393 }
394
uretprobe_nop_setup(void)395 static void uretprobe_nop_setup(void)
396 {
397 usetup(true, false /* !use_multi */, &uprobe_target_nop);
398 }
399
uprobe_push_setup(void)400 static void uprobe_push_setup(void)
401 {
402 usetup(false, false /* !use_multi */, &uprobe_target_push);
403 }
404
uretprobe_push_setup(void)405 static void uretprobe_push_setup(void)
406 {
407 usetup(true, false /* !use_multi */, &uprobe_target_push);
408 }
409
uprobe_ret_setup(void)410 static void uprobe_ret_setup(void)
411 {
412 usetup(false, false /* !use_multi */, &uprobe_target_ret);
413 }
414
uretprobe_ret_setup(void)415 static void uretprobe_ret_setup(void)
416 {
417 usetup(true, false /* !use_multi */, &uprobe_target_ret);
418 }
419
uprobe_multi_nop_setup(void)420 static void uprobe_multi_nop_setup(void)
421 {
422 usetup(false, true /* use_multi */, &uprobe_target_nop);
423 }
424
uretprobe_multi_nop_setup(void)425 static void uretprobe_multi_nop_setup(void)
426 {
427 usetup(true, true /* use_multi */, &uprobe_target_nop);
428 }
429
uprobe_multi_push_setup(void)430 static void uprobe_multi_push_setup(void)
431 {
432 usetup(false, true /* use_multi */, &uprobe_target_push);
433 }
434
uretprobe_multi_push_setup(void)435 static void uretprobe_multi_push_setup(void)
436 {
437 usetup(true, true /* use_multi */, &uprobe_target_push);
438 }
439
uprobe_multi_ret_setup(void)440 static void uprobe_multi_ret_setup(void)
441 {
442 usetup(false, true /* use_multi */, &uprobe_target_ret);
443 }
444
uretprobe_multi_ret_setup(void)445 static void uretprobe_multi_ret_setup(void)
446 {
447 usetup(true, true /* use_multi */, &uprobe_target_ret);
448 }
449
450 const struct bench bench_trig_syscall_count = {
451 .name = "trig-syscall-count",
452 .validate = trigger_validate,
453 .setup = trigger_syscall_count_setup,
454 .producer_thread = trigger_producer,
455 .measure = trigger_measure,
456 .report_progress = hits_drops_report_progress,
457 .report_final = hits_drops_report_final,
458 };
459
460 /* batched (staying mostly in kernel) kprobe/fentry benchmarks */
461 #define BENCH_TRIG_KERNEL(KIND, NAME) \
462 const struct bench bench_trig_##KIND = { \
463 .name = "trig-" NAME, \
464 .setup = trigger_##KIND##_setup, \
465 .producer_thread = trigger_producer_batch, \
466 .measure = trigger_measure, \
467 .report_progress = hits_drops_report_progress, \
468 .report_final = hits_drops_report_final, \
469 .argp = &bench_trigger_batch_argp, \
470 }
471
472 BENCH_TRIG_KERNEL(kernel_count, "kernel-count");
473 BENCH_TRIG_KERNEL(kprobe, "kprobe");
474 BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
475 BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
476 BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
477 BENCH_TRIG_KERNEL(fentry, "fentry");
478 BENCH_TRIG_KERNEL(fexit, "fexit");
479 BENCH_TRIG_KERNEL(fmodret, "fmodret");
480 BENCH_TRIG_KERNEL(tp, "tp");
481 BENCH_TRIG_KERNEL(rawtp, "rawtp");
482
483 /* uprobe benchmarks */
484 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \
485 const struct bench bench_trig_##KIND = { \
486 .name = "trig-" NAME, \
487 .validate = trigger_validate, \
488 .setup = KIND##_setup, \
489 .producer_thread = uprobe_producer_##PRODUCER, \
490 .measure = trigger_measure, \
491 .report_progress = hits_drops_report_progress, \
492 .report_final = hits_drops_report_final, \
493 }
494
495 BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count");
496 BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop");
497 BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push");
498 BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
499 BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
500 BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
501 BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
502 BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop");
503 BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push");
504 BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
505 BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
506 BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
507 BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
508