// SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ #define _GNU_SOURCE #include #include #include #include "bench.h" #include "trigger_bench.skel.h" #include "trace_helpers.h" #define MAX_TRIG_BATCH_ITERS 1000 static struct { __u32 batch_iters; } args = { .batch_iters = 100, }; enum { ARG_TRIG_BATCH_ITERS = 7000, }; static const struct argp_option opts[] = { { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0, "Number of in-kernel iterations per one driver test run"}, {}, }; static error_t parse_arg(int key, char *arg, struct argp_state *state) { long ret; switch (key) { case ARG_TRIG_BATCH_ITERS: ret = strtol(arg, NULL, 10); if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) { fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n", 1, MAX_TRIG_BATCH_ITERS); argp_usage(state); } args.batch_iters = ret; break; default: return ARGP_ERR_UNKNOWN; } return 0; } const struct argp bench_trigger_batch_argp = { .options = opts, .parser = parse_arg, }; /* adjust slot shift in inc_hits() if changing */ #define MAX_BUCKETS 256 #pragma GCC diagnostic ignored "-Wattributes" /* BPF triggering benchmarks */ static struct trigger_ctx { struct trigger_bench *skel; bool usermode_counters; int driver_prog_fd; } ctx; static struct counter base_hits[MAX_BUCKETS]; static __always_inline void inc_counter(struct counter *counters) { static __thread int tid = 0; unsigned slot; if (unlikely(tid == 0)) tid = syscall(SYS_gettid); /* multiplicative hashing, it's fast */ slot = 2654435769U * tid; slot >>= 24; atomic_inc(&base_hits[slot].value); /* use highest byte as an index */ } static long sum_and_reset_counters(struct counter *counters) { int i; long sum = 0; for (i = 0; i < MAX_BUCKETS; i++) sum += atomic_swap(&counters[i].value, 0); return sum; } static void trigger_validate(void) { if (env.consumer_cnt != 0) { fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } } static void *trigger_producer(void *input) { if (ctx.usermode_counters) { while (true) { (void)syscall(__NR_getpgid); inc_counter(base_hits); } } else { while (true) (void)syscall(__NR_getpgid); } return NULL; } static void *trigger_producer_batch(void *input) { int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver); while (true) bpf_prog_test_run_opts(fd, NULL); return NULL; } static void trigger_measure(struct bench_res *res) { if (ctx.usermode_counters) res->hits = sum_and_reset_counters(base_hits); else res->hits = sum_and_reset_counters(ctx.skel->bss->hits); } static void setup_ctx(void) { setup_libbpf(); ctx.skel = trigger_bench__open(); if (!ctx.skel) { fprintf(stderr, "failed to open skeleton\n"); exit(1); } /* default "driver" BPF program */ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true); ctx.skel->rodata->batch_iters = args.batch_iters; } static void load_ctx(void) { int err; err = trigger_bench__load(ctx.skel); if (err) { fprintf(stderr, "failed to open skeleton\n"); exit(1); } } static void attach_bpf(struct bpf_program *prog) { struct bpf_link *link; link = bpf_program__attach(prog); if (!link) { fprintf(stderr, "failed to attach program!\n"); exit(1); } } static void trigger_syscall_count_setup(void) { ctx.usermode_counters = true; } /* Batched, staying mostly in-kernel triggering setups */ static void trigger_kernel_count_setup(void) { setup_ctx(); bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); bpf_program__set_autoload(ctx.skel->progs.trigger_count, true); load_ctx(); /* override driver program */ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count); } static void trigger_kprobe_setup(void) { setup_ctx(); bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true); load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_kprobe); } static void trigger_kretprobe_setup(void) { setup_ctx(); bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true); load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_kretprobe); } static void trigger_kprobe_multi_setup(void) { setup_ctx(); bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true); load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi); } static void trigger_kretprobe_multi_setup(void) { setup_ctx(); bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true); load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi); } static void trigger_fentry_setup(void) { setup_ctx(); bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true); load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_fentry); } static void trigger_fexit_setup(void) { setup_ctx(); bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true); load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_fexit); } static void trigger_fmodret_setup(void) { setup_ctx(); bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true); load_ctx(); /* override driver program */ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); attach_bpf(ctx.skel->progs.bench_trigger_fmodret); } static void trigger_tp_setup(void) { setup_ctx(); bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true); load_ctx(); /* override driver program */ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); attach_bpf(ctx.skel->progs.bench_trigger_tp); } static void trigger_rawtp_setup(void) { setup_ctx(); bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true); load_ctx(); /* override driver program */ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); attach_bpf(ctx.skel->progs.bench_trigger_rawtp); } /* make sure call is not inlined and not avoided by compiler, so __weak and * inline asm volatile in the body of the function * * There is a performance difference between uprobing at nop location vs other * instructions. So use two different targets, one of which starts with nop * and another doesn't. * * GCC doesn't generate stack setup preamble for these functions due to them * having no input arguments and doing nothing in the body. */ __nocf_check __weak void uprobe_target_nop(void) { asm volatile ("nop"); } __weak void opaque_noop_func(void) { } __nocf_check __weak int uprobe_target_push(void) { /* overhead of function call is negligible compared to uprobe * triggering, so this shouldn't affect benchmark results much */ opaque_noop_func(); return 1; } __nocf_check __weak void uprobe_target_ret(void) { asm volatile (""); } static void *uprobe_producer_count(void *input) { while (true) { uprobe_target_nop(); inc_counter(base_hits); } return NULL; } static void *uprobe_producer_nop(void *input) { while (true) uprobe_target_nop(); return NULL; } static void *uprobe_producer_push(void *input) { while (true) uprobe_target_push(); return NULL; } static void *uprobe_producer_ret(void *input) { while (true) uprobe_target_ret(); return NULL; } static void usetup(bool use_retprobe, bool use_multi, void *target_addr) { size_t uprobe_offset; struct bpf_link *link; int err; setup_libbpf(); ctx.skel = trigger_bench__open(); if (!ctx.skel) { fprintf(stderr, "failed to open skeleton\n"); exit(1); } if (use_multi) bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true); else bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true); err = trigger_bench__load(ctx.skel); if (err) { fprintf(stderr, "failed to load skeleton\n"); exit(1); } uprobe_offset = get_uprobe_offset(target_addr); if (use_multi) { LIBBPF_OPTS(bpf_uprobe_multi_opts, opts, .retprobe = use_retprobe, .cnt = 1, .offsets = &uprobe_offset, ); link = bpf_program__attach_uprobe_multi( ctx.skel->progs.bench_trigger_uprobe_multi, -1 /* all PIDs */, "/proc/self/exe", NULL, &opts); ctx.skel->links.bench_trigger_uprobe_multi = link; } else { link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, use_retprobe, -1 /* all PIDs */, "/proc/self/exe", uprobe_offset); ctx.skel->links.bench_trigger_uprobe = link; } if (!link) { fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe"); exit(1); } } static void usermode_count_setup(void) { ctx.usermode_counters = true; } static void uprobe_nop_setup(void) { usetup(false, false /* !use_multi */, &uprobe_target_nop); } static void uretprobe_nop_setup(void) { usetup(true, false /* !use_multi */, &uprobe_target_nop); } static void uprobe_push_setup(void) { usetup(false, false /* !use_multi */, &uprobe_target_push); } static void uretprobe_push_setup(void) { usetup(true, false /* !use_multi */, &uprobe_target_push); } static void uprobe_ret_setup(void) { usetup(false, false /* !use_multi */, &uprobe_target_ret); } static void uretprobe_ret_setup(void) { usetup(true, false /* !use_multi */, &uprobe_target_ret); } static void uprobe_multi_nop_setup(void) { usetup(false, true /* use_multi */, &uprobe_target_nop); } static void uretprobe_multi_nop_setup(void) { usetup(true, true /* use_multi */, &uprobe_target_nop); } static void uprobe_multi_push_setup(void) { usetup(false, true /* use_multi */, &uprobe_target_push); } static void uretprobe_multi_push_setup(void) { usetup(true, true /* use_multi */, &uprobe_target_push); } static void uprobe_multi_ret_setup(void) { usetup(false, true /* use_multi */, &uprobe_target_ret); } static void uretprobe_multi_ret_setup(void) { usetup(true, true /* use_multi */, &uprobe_target_ret); } const struct bench bench_trig_syscall_count = { .name = "trig-syscall-count", .validate = trigger_validate, .setup = trigger_syscall_count_setup, .producer_thread = trigger_producer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, }; /* batched (staying mostly in kernel) kprobe/fentry benchmarks */ #define BENCH_TRIG_KERNEL(KIND, NAME) \ const struct bench bench_trig_##KIND = { \ .name = "trig-" NAME, \ .setup = trigger_##KIND##_setup, \ .producer_thread = trigger_producer_batch, \ .measure = trigger_measure, \ .report_progress = hits_drops_report_progress, \ .report_final = hits_drops_report_final, \ .argp = &bench_trigger_batch_argp, \ } BENCH_TRIG_KERNEL(kernel_count, "kernel-count"); BENCH_TRIG_KERNEL(kprobe, "kprobe"); BENCH_TRIG_KERNEL(kretprobe, "kretprobe"); BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi"); BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi"); BENCH_TRIG_KERNEL(fentry, "fentry"); BENCH_TRIG_KERNEL(fexit, "fexit"); BENCH_TRIG_KERNEL(fmodret, "fmodret"); BENCH_TRIG_KERNEL(tp, "tp"); BENCH_TRIG_KERNEL(rawtp, "rawtp"); /* uprobe benchmarks */ #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \ const struct bench bench_trig_##KIND = { \ .name = "trig-" NAME, \ .validate = trigger_validate, \ .setup = KIND##_setup, \ .producer_thread = uprobe_producer_##PRODUCER, \ .measure = trigger_measure, \ .report_progress = hits_drops_report_progress, \ .report_final = hits_drops_report_final, \ } BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count"); BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop"); BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push"); BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret"); BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop"); BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push"); BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret"); BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop"); BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push"); BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret"); BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop"); BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push"); BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");