1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2020 Facebook */ 3 #define _GNU_SOURCE 4 #include <argp.h> 5 #include <unistd.h> 6 #include <stdint.h> 7 #include "bpf_util.h" 8 #include "bench.h" 9 #include "trigger_bench.skel.h" 10 #include "trace_helpers.h" 11 12 #define MAX_TRIG_BATCH_ITERS 1000 13 14 static struct { 15 __u32 batch_iters; 16 } args = { 17 .batch_iters = 100, 18 }; 19 20 enum { 21 ARG_TRIG_BATCH_ITERS = 7000, 22 }; 23 24 static const struct argp_option opts[] = { 25 { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0, 26 "Number of in-kernel iterations per one driver test run"}, 27 {}, 28 }; 29 30 static error_t parse_arg(int key, char *arg, struct argp_state *state) 31 { 32 long ret; 33 34 switch (key) { 35 case ARG_TRIG_BATCH_ITERS: 36 ret = strtol(arg, NULL, 10); 37 if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) { 38 fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n", 39 1, MAX_TRIG_BATCH_ITERS); 40 argp_usage(state); 41 } 42 args.batch_iters = ret; 43 break; 44 default: 45 return ARGP_ERR_UNKNOWN; 46 } 47 48 return 0; 49 } 50 51 const struct argp bench_trigger_batch_argp = { 52 .options = opts, 53 .parser = parse_arg, 54 }; 55 56 /* adjust slot shift in inc_hits() if changing */ 57 #define MAX_BUCKETS 256 58 59 #pragma GCC diagnostic ignored "-Wattributes" 60 61 /* BPF triggering benchmarks */ 62 static struct trigger_ctx { 63 struct trigger_bench *skel; 64 bool usermode_counters; 65 int driver_prog_fd; 66 } ctx; 67 68 static struct counter base_hits[MAX_BUCKETS]; 69 70 static __always_inline void inc_counter(struct counter *counters) 71 { 72 static __thread int tid = 0; 73 unsigned slot; 74 75 if (unlikely(tid == 0)) 76 tid = sys_gettid(); 77 78 /* multiplicative hashing, it's fast */ 79 slot = 2654435769U * tid; 80 slot >>= 24; 81 82 atomic_inc(&base_hits[slot].value); /* use highest byte as an index */ 83 } 84 85 static long sum_and_reset_counters(struct counter *counters) 86 { 87 int i; 88 long sum = 0; 89 90 for (i = 0; i < MAX_BUCKETS; i++) 91 sum += atomic_swap(&counters[i].value, 0); 92 return sum; 93 } 94 95 static void trigger_validate(void) 96 { 97 if (env.consumer_cnt != 0) { 98 fprintf(stderr, "benchmark doesn't support consumer!\n"); 99 exit(1); 100 } 101 } 102 103 static void *trigger_producer(void *input) 104 { 105 if (ctx.usermode_counters) { 106 while (true) { 107 (void)syscall(__NR_getpgid); 108 inc_counter(base_hits); 109 } 110 } else { 111 while (true) 112 (void)syscall(__NR_getpgid); 113 } 114 return NULL; 115 } 116 117 static void *trigger_producer_batch(void *input) 118 { 119 int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver); 120 121 while (true) 122 bpf_prog_test_run_opts(fd, NULL); 123 124 return NULL; 125 } 126 127 static void trigger_measure(struct bench_res *res) 128 { 129 if (ctx.usermode_counters) 130 res->hits = sum_and_reset_counters(base_hits); 131 else 132 res->hits = sum_and_reset_counters(ctx.skel->bss->hits); 133 } 134 135 static void setup_ctx(void) 136 { 137 setup_libbpf(); 138 139 ctx.skel = trigger_bench__open(); 140 if (!ctx.skel) { 141 fprintf(stderr, "failed to open skeleton\n"); 142 exit(1); 143 } 144 145 /* default "driver" BPF program */ 146 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true); 147 148 ctx.skel->rodata->batch_iters = args.batch_iters; 149 ctx.skel->rodata->stacktrace = env.stacktrace; 150 } 151 152 static void load_ctx(void) 153 { 154 int err; 155 156 err = trigger_bench__load(ctx.skel); 157 if (err) { 158 fprintf(stderr, "failed to open skeleton\n"); 159 exit(1); 160 } 161 } 162 163 static void attach_bpf(struct bpf_program *prog) 164 { 165 struct bpf_link *link; 166 167 link = bpf_program__attach(prog); 168 if (!link) { 169 fprintf(stderr, "failed to attach program!\n"); 170 exit(1); 171 } 172 } 173 174 static void trigger_syscall_count_setup(void) 175 { 176 ctx.usermode_counters = true; 177 } 178 179 /* Batched, staying mostly in-kernel triggering setups */ 180 static void trigger_kernel_count_setup(void) 181 { 182 setup_ctx(); 183 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 184 bpf_program__set_autoload(ctx.skel->progs.trigger_kernel_count, true); 185 load_ctx(); 186 /* override driver program */ 187 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_kernel_count); 188 } 189 190 static void trigger_kprobe_setup(void) 191 { 192 setup_ctx(); 193 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true); 194 load_ctx(); 195 attach_bpf(ctx.skel->progs.bench_trigger_kprobe); 196 } 197 198 static void trigger_kretprobe_setup(void) 199 { 200 setup_ctx(); 201 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true); 202 load_ctx(); 203 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe); 204 } 205 206 static void trigger_kprobe_multi_setup(void) 207 { 208 setup_ctx(); 209 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true); 210 load_ctx(); 211 attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi); 212 } 213 214 static void trigger_kretprobe_multi_setup(void) 215 { 216 setup_ctx(); 217 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true); 218 load_ctx(); 219 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi); 220 } 221 222 static void trigger_fentry_setup(void) 223 { 224 setup_ctx(); 225 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true); 226 load_ctx(); 227 attach_bpf(ctx.skel->progs.bench_trigger_fentry); 228 } 229 230 static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe) 231 { 232 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); 233 struct bpf_link *link = NULL; 234 struct ksyms *ksyms = NULL; 235 236 /* Some recursive functions will be skipped in 237 * bpf_get_ksyms -> skip_entry, as they can introduce sufficient 238 * overhead. However, it's difficut to skip all the recursive 239 * functions for a debug kernel. 240 * 241 * So, don't run the kprobe-multi-all and kretprobe-multi-all on 242 * a debug kernel. 243 */ 244 if (bpf_get_ksyms(&ksyms, true)) { 245 fprintf(stderr, "failed to get ksyms\n"); 246 exit(1); 247 } 248 249 opts.syms = (const char **)ksyms->filtered_syms; 250 opts.cnt = ksyms->filtered_cnt; 251 opts.retprobe = kretprobe; 252 /* attach empty to all the kernel functions except bpf_get_numa_node_id. */ 253 link = bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts); 254 free_kallsyms_local(ksyms); 255 if (!link) { 256 fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n"); 257 exit(1); 258 } 259 } 260 261 static void trigger_kprobe_multi_all_setup(void) 262 { 263 struct bpf_program *prog, *empty; 264 265 setup_ctx(); 266 empty = ctx.skel->progs.bench_kprobe_multi_empty; 267 prog = ctx.skel->progs.bench_trigger_kprobe_multi; 268 bpf_program__set_autoload(empty, true); 269 bpf_program__set_autoload(prog, true); 270 load_ctx(); 271 272 attach_ksyms_all(empty, false); 273 attach_bpf(prog); 274 } 275 276 static void trigger_kretprobe_multi_all_setup(void) 277 { 278 struct bpf_program *prog, *empty; 279 280 setup_ctx(); 281 empty = ctx.skel->progs.bench_kretprobe_multi_empty; 282 prog = ctx.skel->progs.bench_trigger_kretprobe_multi; 283 bpf_program__set_autoload(empty, true); 284 bpf_program__set_autoload(prog, true); 285 load_ctx(); 286 287 attach_ksyms_all(empty, true); 288 attach_bpf(prog); 289 } 290 291 static void trigger_fexit_setup(void) 292 { 293 setup_ctx(); 294 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true); 295 load_ctx(); 296 attach_bpf(ctx.skel->progs.bench_trigger_fexit); 297 } 298 299 static void trigger_fmodret_setup(void) 300 { 301 setup_ctx(); 302 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 303 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); 304 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true); 305 load_ctx(); 306 /* override driver program */ 307 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); 308 attach_bpf(ctx.skel->progs.bench_trigger_fmodret); 309 } 310 311 static void trigger_tp_setup(void) 312 { 313 setup_ctx(); 314 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 315 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); 316 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true); 317 load_ctx(); 318 /* override driver program */ 319 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); 320 attach_bpf(ctx.skel->progs.bench_trigger_tp); 321 } 322 323 static void trigger_rawtp_setup(void) 324 { 325 setup_ctx(); 326 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 327 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); 328 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true); 329 load_ctx(); 330 /* override driver program */ 331 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); 332 attach_bpf(ctx.skel->progs.bench_trigger_rawtp); 333 } 334 335 /* make sure call is not inlined and not avoided by compiler, so __weak and 336 * inline asm volatile in the body of the function 337 * 338 * There is a performance difference between uprobing at nop location vs other 339 * instructions. So use two different targets, one of which starts with nop 340 * and another doesn't. 341 * 342 * GCC doesn't generate stack setup preamble for these functions due to them 343 * having no input arguments and doing nothing in the body. 344 */ 345 __nocf_check __weak void uprobe_target_nop(void) 346 { 347 asm volatile ("nop"); 348 } 349 350 __weak void opaque_noop_func(void) 351 { 352 } 353 354 __nocf_check __weak int uprobe_target_push(void) 355 { 356 /* overhead of function call is negligible compared to uprobe 357 * triggering, so this shouldn't affect benchmark results much 358 */ 359 opaque_noop_func(); 360 return 1; 361 } 362 363 __nocf_check __weak void uprobe_target_ret(void) 364 { 365 asm volatile (""); 366 } 367 368 static void *uprobe_producer_count(void *input) 369 { 370 while (true) { 371 uprobe_target_nop(); 372 inc_counter(base_hits); 373 } 374 return NULL; 375 } 376 377 static void *uprobe_producer_nop(void *input) 378 { 379 while (true) 380 uprobe_target_nop(); 381 return NULL; 382 } 383 384 static void *uprobe_producer_push(void *input) 385 { 386 while (true) 387 uprobe_target_push(); 388 return NULL; 389 } 390 391 static void *uprobe_producer_ret(void *input) 392 { 393 while (true) 394 uprobe_target_ret(); 395 return NULL; 396 } 397 398 #ifdef __x86_64__ 399 __nocf_check __weak void uprobe_target_nop5(void) 400 { 401 asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00"); 402 } 403 404 static void *uprobe_producer_nop5(void *input) 405 { 406 while (true) 407 uprobe_target_nop5(); 408 return NULL; 409 } 410 411 void usdt_1(void); 412 void usdt_2(void); 413 414 static void *uprobe_producer_usdt_nop(void *input) 415 { 416 while (true) 417 usdt_1(); 418 return NULL; 419 } 420 421 static void *uprobe_producer_usdt_nop5(void *input) 422 { 423 while (true) 424 usdt_2(); 425 return NULL; 426 } 427 #endif 428 429 static void usetup(bool use_retprobe, bool use_multi, void *target_addr) 430 { 431 size_t uprobe_offset; 432 struct bpf_link *link; 433 int err; 434 435 setup_libbpf(); 436 437 ctx.skel = trigger_bench__open(); 438 if (!ctx.skel) { 439 fprintf(stderr, "failed to open skeleton\n"); 440 exit(1); 441 } 442 443 if (use_multi) 444 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true); 445 else 446 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true); 447 448 err = trigger_bench__load(ctx.skel); 449 if (err) { 450 fprintf(stderr, "failed to load skeleton\n"); 451 exit(1); 452 } 453 454 uprobe_offset = get_uprobe_offset(target_addr); 455 if (use_multi) { 456 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts, 457 .retprobe = use_retprobe, 458 .cnt = 1, 459 .offsets = &uprobe_offset, 460 ); 461 link = bpf_program__attach_uprobe_multi( 462 ctx.skel->progs.bench_trigger_uprobe_multi, 463 -1 /* all PIDs */, "/proc/self/exe", NULL, &opts); 464 ctx.skel->links.bench_trigger_uprobe_multi = link; 465 } else { 466 link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, 467 use_retprobe, 468 -1 /* all PIDs */, 469 "/proc/self/exe", 470 uprobe_offset); 471 ctx.skel->links.bench_trigger_uprobe = link; 472 } 473 if (!link) { 474 fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe"); 475 exit(1); 476 } 477 } 478 479 static void usermode_count_setup(void) 480 { 481 ctx.usermode_counters = true; 482 } 483 484 static void uprobe_nop_setup(void) 485 { 486 usetup(false, false /* !use_multi */, &uprobe_target_nop); 487 } 488 489 static void uretprobe_nop_setup(void) 490 { 491 usetup(true, false /* !use_multi */, &uprobe_target_nop); 492 } 493 494 static void uprobe_push_setup(void) 495 { 496 usetup(false, false /* !use_multi */, &uprobe_target_push); 497 } 498 499 static void uretprobe_push_setup(void) 500 { 501 usetup(true, false /* !use_multi */, &uprobe_target_push); 502 } 503 504 static void uprobe_ret_setup(void) 505 { 506 usetup(false, false /* !use_multi */, &uprobe_target_ret); 507 } 508 509 static void uretprobe_ret_setup(void) 510 { 511 usetup(true, false /* !use_multi */, &uprobe_target_ret); 512 } 513 514 static void uprobe_multi_nop_setup(void) 515 { 516 usetup(false, true /* use_multi */, &uprobe_target_nop); 517 } 518 519 static void uretprobe_multi_nop_setup(void) 520 { 521 usetup(true, true /* use_multi */, &uprobe_target_nop); 522 } 523 524 static void uprobe_multi_push_setup(void) 525 { 526 usetup(false, true /* use_multi */, &uprobe_target_push); 527 } 528 529 static void uretprobe_multi_push_setup(void) 530 { 531 usetup(true, true /* use_multi */, &uprobe_target_push); 532 } 533 534 static void uprobe_multi_ret_setup(void) 535 { 536 usetup(false, true /* use_multi */, &uprobe_target_ret); 537 } 538 539 static void uretprobe_multi_ret_setup(void) 540 { 541 usetup(true, true /* use_multi */, &uprobe_target_ret); 542 } 543 544 #ifdef __x86_64__ 545 static void uprobe_nop5_setup(void) 546 { 547 usetup(false, false /* !use_multi */, &uprobe_target_nop5); 548 } 549 550 static void uretprobe_nop5_setup(void) 551 { 552 usetup(true, false /* !use_multi */, &uprobe_target_nop5); 553 } 554 555 static void uprobe_multi_nop5_setup(void) 556 { 557 usetup(false, true /* use_multi */, &uprobe_target_nop5); 558 } 559 560 static void uretprobe_multi_nop5_setup(void) 561 { 562 usetup(true, true /* use_multi */, &uprobe_target_nop5); 563 } 564 565 static void usdt_setup(const char *name) 566 { 567 struct bpf_link *link; 568 int err; 569 570 setup_libbpf(); 571 572 ctx.skel = trigger_bench__open(); 573 if (!ctx.skel) { 574 fprintf(stderr, "failed to open skeleton\n"); 575 exit(1); 576 } 577 578 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_usdt, true); 579 580 err = trigger_bench__load(ctx.skel); 581 if (err) { 582 fprintf(stderr, "failed to load skeleton\n"); 583 exit(1); 584 } 585 586 link = bpf_program__attach_usdt(ctx.skel->progs.bench_trigger_usdt, 587 0 /*self*/, "/proc/self/exe", 588 "optimized_attach", name, NULL); 589 if (libbpf_get_error(link)) { 590 fprintf(stderr, "failed to attach optimized_attach:%s usdt probe\n", name); 591 exit(1); 592 } 593 ctx.skel->links.bench_trigger_usdt = link; 594 } 595 596 static void usdt_nop_setup(void) 597 { 598 usdt_setup("usdt_1"); 599 } 600 601 static void usdt_nop5_setup(void) 602 { 603 usdt_setup("usdt_2"); 604 } 605 #endif 606 607 const struct bench bench_trig_syscall_count = { 608 .name = "trig-syscall-count", 609 .validate = trigger_validate, 610 .setup = trigger_syscall_count_setup, 611 .producer_thread = trigger_producer, 612 .measure = trigger_measure, 613 .report_progress = hits_drops_report_progress, 614 .report_final = hits_drops_report_final, 615 }; 616 617 /* batched (staying mostly in kernel) kprobe/fentry benchmarks */ 618 #define BENCH_TRIG_KERNEL(KIND, NAME) \ 619 const struct bench bench_trig_##KIND = { \ 620 .name = "trig-" NAME, \ 621 .setup = trigger_##KIND##_setup, \ 622 .producer_thread = trigger_producer_batch, \ 623 .measure = trigger_measure, \ 624 .report_progress = hits_drops_report_progress, \ 625 .report_final = hits_drops_report_final, \ 626 .argp = &bench_trigger_batch_argp, \ 627 } 628 629 BENCH_TRIG_KERNEL(kernel_count, "kernel-count"); 630 BENCH_TRIG_KERNEL(kprobe, "kprobe"); 631 BENCH_TRIG_KERNEL(kretprobe, "kretprobe"); 632 BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi"); 633 BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi"); 634 BENCH_TRIG_KERNEL(fentry, "fentry"); 635 BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all"); 636 BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all"); 637 BENCH_TRIG_KERNEL(fexit, "fexit"); 638 BENCH_TRIG_KERNEL(fmodret, "fmodret"); 639 BENCH_TRIG_KERNEL(tp, "tp"); 640 BENCH_TRIG_KERNEL(rawtp, "rawtp"); 641 642 /* uprobe benchmarks */ 643 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \ 644 const struct bench bench_trig_##KIND = { \ 645 .name = "trig-" NAME, \ 646 .validate = trigger_validate, \ 647 .setup = KIND##_setup, \ 648 .producer_thread = uprobe_producer_##PRODUCER, \ 649 .measure = trigger_measure, \ 650 .report_progress = hits_drops_report_progress, \ 651 .report_final = hits_drops_report_final, \ 652 } 653 654 BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count"); 655 BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop"); 656 BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push"); 657 BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret"); 658 BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop"); 659 BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push"); 660 BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret"); 661 BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop"); 662 BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push"); 663 BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret"); 664 BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop"); 665 BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push"); 666 BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret"); 667 #ifdef __x86_64__ 668 BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5"); 669 BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5"); 670 BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5"); 671 BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5"); 672 BENCH_TRIG_USERMODE(usdt_nop, usdt_nop, "usdt-nop"); 673 BENCH_TRIG_USERMODE(usdt_nop5, usdt_nop5, "usdt-nop5"); 674 #endif 675