1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2020 Facebook */ 3 #define _GNU_SOURCE 4 #include <argp.h> 5 #include <unistd.h> 6 #include <stdint.h> 7 #include "bench.h" 8 #include "trigger_bench.skel.h" 9 #include "trace_helpers.h" 10 11 #define MAX_TRIG_BATCH_ITERS 1000 12 13 static struct { 14 __u32 batch_iters; 15 } args = { 16 .batch_iters = 100, 17 }; 18 19 enum { 20 ARG_TRIG_BATCH_ITERS = 7000, 21 }; 22 23 static const struct argp_option opts[] = { 24 { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0, 25 "Number of in-kernel iterations per one driver test run"}, 26 {}, 27 }; 28 29 static error_t parse_arg(int key, char *arg, struct argp_state *state) 30 { 31 long ret; 32 33 switch (key) { 34 case ARG_TRIG_BATCH_ITERS: 35 ret = strtol(arg, NULL, 10); 36 if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) { 37 fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n", 38 1, MAX_TRIG_BATCH_ITERS); 39 argp_usage(state); 40 } 41 args.batch_iters = ret; 42 break; 43 default: 44 return ARGP_ERR_UNKNOWN; 45 } 46 47 return 0; 48 } 49 50 const struct argp bench_trigger_batch_argp = { 51 .options = opts, 52 .parser = parse_arg, 53 }; 54 55 /* adjust slot shift in inc_hits() if changing */ 56 #define MAX_BUCKETS 256 57 58 #pragma GCC diagnostic ignored "-Wattributes" 59 60 /* BPF triggering benchmarks */ 61 static struct trigger_ctx { 62 struct trigger_bench *skel; 63 bool usermode_counters; 64 int driver_prog_fd; 65 } ctx; 66 67 static struct counter base_hits[MAX_BUCKETS]; 68 69 static __always_inline void inc_counter(struct counter *counters) 70 { 71 static __thread int tid = 0; 72 unsigned slot; 73 74 if (unlikely(tid == 0)) 75 tid = syscall(SYS_gettid); 76 77 /* multiplicative hashing, it's fast */ 78 slot = 2654435769U * tid; 79 slot >>= 24; 80 81 atomic_inc(&base_hits[slot].value); /* use highest byte as an index */ 82 } 83 84 static long sum_and_reset_counters(struct counter *counters) 85 { 86 int i; 87 long sum = 0; 88 89 for (i = 0; i < MAX_BUCKETS; i++) 90 sum += atomic_swap(&counters[i].value, 0); 91 return sum; 92 } 93 94 static void trigger_validate(void) 95 { 96 if (env.consumer_cnt != 0) { 97 fprintf(stderr, "benchmark doesn't support consumer!\n"); 98 exit(1); 99 } 100 } 101 102 static void *trigger_producer(void *input) 103 { 104 if (ctx.usermode_counters) { 105 while (true) { 106 (void)syscall(__NR_getpgid); 107 inc_counter(base_hits); 108 } 109 } else { 110 while (true) 111 (void)syscall(__NR_getpgid); 112 } 113 return NULL; 114 } 115 116 static void *trigger_producer_batch(void *input) 117 { 118 int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver); 119 120 while (true) 121 bpf_prog_test_run_opts(fd, NULL); 122 123 return NULL; 124 } 125 126 static void trigger_measure(struct bench_res *res) 127 { 128 if (ctx.usermode_counters) 129 res->hits = sum_and_reset_counters(base_hits); 130 else 131 res->hits = sum_and_reset_counters(ctx.skel->bss->hits); 132 } 133 134 static void setup_ctx(void) 135 { 136 setup_libbpf(); 137 138 ctx.skel = trigger_bench__open(); 139 if (!ctx.skel) { 140 fprintf(stderr, "failed to open skeleton\n"); 141 exit(1); 142 } 143 144 /* default "driver" BPF program */ 145 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true); 146 147 ctx.skel->rodata->batch_iters = args.batch_iters; 148 } 149 150 static void load_ctx(void) 151 { 152 int err; 153 154 err = trigger_bench__load(ctx.skel); 155 if (err) { 156 fprintf(stderr, "failed to open skeleton\n"); 157 exit(1); 158 } 159 } 160 161 static void attach_bpf(struct bpf_program *prog) 162 { 163 struct bpf_link *link; 164 165 link = bpf_program__attach(prog); 166 if (!link) { 167 fprintf(stderr, "failed to attach program!\n"); 168 exit(1); 169 } 170 } 171 172 static void trigger_syscall_count_setup(void) 173 { 174 ctx.usermode_counters = true; 175 } 176 177 /* Batched, staying mostly in-kernel triggering setups */ 178 static void trigger_kernel_count_setup(void) 179 { 180 setup_ctx(); 181 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 182 bpf_program__set_autoload(ctx.skel->progs.trigger_count, true); 183 load_ctx(); 184 /* override driver program */ 185 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count); 186 } 187 188 static void trigger_kprobe_setup(void) 189 { 190 setup_ctx(); 191 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true); 192 load_ctx(); 193 attach_bpf(ctx.skel->progs.bench_trigger_kprobe); 194 } 195 196 static void trigger_kretprobe_setup(void) 197 { 198 setup_ctx(); 199 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true); 200 load_ctx(); 201 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe); 202 } 203 204 static void trigger_kprobe_multi_setup(void) 205 { 206 setup_ctx(); 207 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true); 208 load_ctx(); 209 attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi); 210 } 211 212 static void trigger_kretprobe_multi_setup(void) 213 { 214 setup_ctx(); 215 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true); 216 load_ctx(); 217 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi); 218 } 219 220 static void trigger_fentry_setup(void) 221 { 222 setup_ctx(); 223 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true); 224 load_ctx(); 225 attach_bpf(ctx.skel->progs.bench_trigger_fentry); 226 } 227 228 static void trigger_fexit_setup(void) 229 { 230 setup_ctx(); 231 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true); 232 load_ctx(); 233 attach_bpf(ctx.skel->progs.bench_trigger_fexit); 234 } 235 236 static void trigger_fmodret_setup(void) 237 { 238 setup_ctx(); 239 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 240 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); 241 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true); 242 load_ctx(); 243 /* override driver program */ 244 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); 245 attach_bpf(ctx.skel->progs.bench_trigger_fmodret); 246 } 247 248 static void trigger_tp_setup(void) 249 { 250 setup_ctx(); 251 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 252 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); 253 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true); 254 load_ctx(); 255 /* override driver program */ 256 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); 257 attach_bpf(ctx.skel->progs.bench_trigger_tp); 258 } 259 260 static void trigger_rawtp_setup(void) 261 { 262 setup_ctx(); 263 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 264 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); 265 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true); 266 load_ctx(); 267 /* override driver program */ 268 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); 269 attach_bpf(ctx.skel->progs.bench_trigger_rawtp); 270 } 271 272 /* make sure call is not inlined and not avoided by compiler, so __weak and 273 * inline asm volatile in the body of the function 274 * 275 * There is a performance difference between uprobing at nop location vs other 276 * instructions. So use two different targets, one of which starts with nop 277 * and another doesn't. 278 * 279 * GCC doesn't generate stack setup preample for these functions due to them 280 * having no input arguments and doing nothing in the body. 281 */ 282 __nocf_check __weak void uprobe_target_nop(void) 283 { 284 asm volatile ("nop"); 285 } 286 287 __weak void opaque_noop_func(void) 288 { 289 } 290 291 __nocf_check __weak int uprobe_target_push(void) 292 { 293 /* overhead of function call is negligible compared to uprobe 294 * triggering, so this shouldn't affect benchmark results much 295 */ 296 opaque_noop_func(); 297 return 1; 298 } 299 300 __nocf_check __weak void uprobe_target_ret(void) 301 { 302 asm volatile (""); 303 } 304 305 static void *uprobe_producer_count(void *input) 306 { 307 while (true) { 308 uprobe_target_nop(); 309 inc_counter(base_hits); 310 } 311 return NULL; 312 } 313 314 static void *uprobe_producer_nop(void *input) 315 { 316 while (true) 317 uprobe_target_nop(); 318 return NULL; 319 } 320 321 static void *uprobe_producer_push(void *input) 322 { 323 while (true) 324 uprobe_target_push(); 325 return NULL; 326 } 327 328 static void *uprobe_producer_ret(void *input) 329 { 330 while (true) 331 uprobe_target_ret(); 332 return NULL; 333 } 334 335 static void usetup(bool use_retprobe, void *target_addr) 336 { 337 size_t uprobe_offset; 338 struct bpf_link *link; 339 int err; 340 341 setup_libbpf(); 342 343 ctx.skel = trigger_bench__open(); 344 if (!ctx.skel) { 345 fprintf(stderr, "failed to open skeleton\n"); 346 exit(1); 347 } 348 349 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true); 350 351 err = trigger_bench__load(ctx.skel); 352 if (err) { 353 fprintf(stderr, "failed to load skeleton\n"); 354 exit(1); 355 } 356 357 uprobe_offset = get_uprobe_offset(target_addr); 358 link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, 359 use_retprobe, 360 -1 /* all PIDs */, 361 "/proc/self/exe", 362 uprobe_offset); 363 if (!link) { 364 fprintf(stderr, "failed to attach uprobe!\n"); 365 exit(1); 366 } 367 ctx.skel->links.bench_trigger_uprobe = link; 368 } 369 370 static void usermode_count_setup(void) 371 { 372 ctx.usermode_counters = true; 373 } 374 375 static void uprobe_nop_setup(void) 376 { 377 usetup(false, &uprobe_target_nop); 378 } 379 380 static void uretprobe_nop_setup(void) 381 { 382 usetup(true, &uprobe_target_nop); 383 } 384 385 static void uprobe_push_setup(void) 386 { 387 usetup(false, &uprobe_target_push); 388 } 389 390 static void uretprobe_push_setup(void) 391 { 392 usetup(true, &uprobe_target_push); 393 } 394 395 static void uprobe_ret_setup(void) 396 { 397 usetup(false, &uprobe_target_ret); 398 } 399 400 static void uretprobe_ret_setup(void) 401 { 402 usetup(true, &uprobe_target_ret); 403 } 404 405 const struct bench bench_trig_syscall_count = { 406 .name = "trig-syscall-count", 407 .validate = trigger_validate, 408 .setup = trigger_syscall_count_setup, 409 .producer_thread = trigger_producer, 410 .measure = trigger_measure, 411 .report_progress = hits_drops_report_progress, 412 .report_final = hits_drops_report_final, 413 }; 414 415 /* batched (staying mostly in kernel) kprobe/fentry benchmarks */ 416 #define BENCH_TRIG_KERNEL(KIND, NAME) \ 417 const struct bench bench_trig_##KIND = { \ 418 .name = "trig-" NAME, \ 419 .setup = trigger_##KIND##_setup, \ 420 .producer_thread = trigger_producer_batch, \ 421 .measure = trigger_measure, \ 422 .report_progress = hits_drops_report_progress, \ 423 .report_final = hits_drops_report_final, \ 424 .argp = &bench_trigger_batch_argp, \ 425 } 426 427 BENCH_TRIG_KERNEL(kernel_count, "kernel-count"); 428 BENCH_TRIG_KERNEL(kprobe, "kprobe"); 429 BENCH_TRIG_KERNEL(kretprobe, "kretprobe"); 430 BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi"); 431 BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi"); 432 BENCH_TRIG_KERNEL(fentry, "fentry"); 433 BENCH_TRIG_KERNEL(fexit, "fexit"); 434 BENCH_TRIG_KERNEL(fmodret, "fmodret"); 435 BENCH_TRIG_KERNEL(tp, "tp"); 436 BENCH_TRIG_KERNEL(rawtp, "rawtp"); 437 438 /* uprobe benchmarks */ 439 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \ 440 const struct bench bench_trig_##KIND = { \ 441 .name = "trig-" NAME, \ 442 .validate = trigger_validate, \ 443 .setup = KIND##_setup, \ 444 .producer_thread = uprobe_producer_##PRODUCER, \ 445 .measure = trigger_measure, \ 446 .report_progress = hits_drops_report_progress, \ 447 .report_final = hits_drops_report_final, \ 448 } 449 450 BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count"); 451 BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop"); 452 BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push"); 453 BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret"); 454 BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop"); 455 BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push"); 456 BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret"); 457