1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2020 Facebook */ 3 #define _GNU_SOURCE 4 #include <argp.h> 5 #include <unistd.h> 6 #include <stdint.h> 7 #include "bpf_util.h" 8 #include "bench.h" 9 #include "trigger_bench.skel.h" 10 #include "trace_helpers.h" 11 12 #define MAX_TRIG_BATCH_ITERS 1000 13 14 static struct { 15 __u32 batch_iters; 16 } args = { 17 .batch_iters = 100, 18 }; 19 20 enum { 21 ARG_TRIG_BATCH_ITERS = 7000, 22 }; 23 24 static const struct argp_option opts[] = { 25 { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0, 26 "Number of in-kernel iterations per one driver test run"}, 27 {}, 28 }; 29 30 static error_t parse_arg(int key, char *arg, struct argp_state *state) 31 { 32 long ret; 33 34 switch (key) { 35 case ARG_TRIG_BATCH_ITERS: 36 ret = strtol(arg, NULL, 10); 37 if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) { 38 fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n", 39 1, MAX_TRIG_BATCH_ITERS); 40 argp_usage(state); 41 } 42 args.batch_iters = ret; 43 break; 44 default: 45 return ARGP_ERR_UNKNOWN; 46 } 47 48 return 0; 49 } 50 51 const struct argp bench_trigger_batch_argp = { 52 .options = opts, 53 .parser = parse_arg, 54 }; 55 56 /* adjust slot shift in inc_hits() if changing */ 57 #define MAX_BUCKETS 256 58 59 #pragma GCC diagnostic ignored "-Wattributes" 60 61 /* BPF triggering benchmarks */ 62 static struct trigger_ctx { 63 struct trigger_bench *skel; 64 bool usermode_counters; 65 int driver_prog_fd; 66 } ctx; 67 68 static struct counter base_hits[MAX_BUCKETS]; 69 70 static __always_inline void inc_counter(struct counter *counters) 71 { 72 static __thread int tid = 0; 73 unsigned slot; 74 75 if (unlikely(tid == 0)) 76 tid = sys_gettid(); 77 78 /* multiplicative hashing, it's fast */ 79 slot = 2654435769U * tid; 80 slot >>= 24; 81 82 atomic_inc(&base_hits[slot].value); /* use highest byte as an index */ 83 } 84 85 static long sum_and_reset_counters(struct counter *counters) 86 { 87 int i; 88 long sum = 0; 89 90 for (i = 0; i < MAX_BUCKETS; i++) 91 sum += atomic_swap(&counters[i].value, 0); 92 return sum; 93 } 94 95 static void trigger_validate(void) 96 { 97 if (env.consumer_cnt != 0) { 98 fprintf(stderr, "benchmark doesn't support consumer!\n"); 99 exit(1); 100 } 101 } 102 103 static void *trigger_producer(void *input) 104 { 105 if (ctx.usermode_counters) { 106 while (true) { 107 (void)syscall(__NR_getpgid); 108 inc_counter(base_hits); 109 } 110 } else { 111 while (true) 112 (void)syscall(__NR_getpgid); 113 } 114 return NULL; 115 } 116 117 static void *trigger_producer_batch(void *input) 118 { 119 int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver); 120 121 while (true) 122 bpf_prog_test_run_opts(fd, NULL); 123 124 return NULL; 125 } 126 127 static void trigger_measure(struct bench_res *res) 128 { 129 if (ctx.usermode_counters) 130 res->hits = sum_and_reset_counters(base_hits); 131 else 132 res->hits = sum_and_reset_counters(ctx.skel->bss->hits); 133 } 134 135 static void setup_ctx(void) 136 { 137 setup_libbpf(); 138 139 ctx.skel = trigger_bench__open(); 140 if (!ctx.skel) { 141 fprintf(stderr, "failed to open skeleton\n"); 142 exit(1); 143 } 144 145 /* default "driver" BPF program */ 146 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true); 147 148 ctx.skel->rodata->batch_iters = args.batch_iters; 149 } 150 151 static void load_ctx(void) 152 { 153 int err; 154 155 err = trigger_bench__load(ctx.skel); 156 if (err) { 157 fprintf(stderr, "failed to open skeleton\n"); 158 exit(1); 159 } 160 } 161 162 static void attach_bpf(struct bpf_program *prog) 163 { 164 struct bpf_link *link; 165 166 link = bpf_program__attach(prog); 167 if (!link) { 168 fprintf(stderr, "failed to attach program!\n"); 169 exit(1); 170 } 171 } 172 173 static void trigger_syscall_count_setup(void) 174 { 175 ctx.usermode_counters = true; 176 } 177 178 /* Batched, staying mostly in-kernel triggering setups */ 179 static void trigger_kernel_count_setup(void) 180 { 181 setup_ctx(); 182 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 183 bpf_program__set_autoload(ctx.skel->progs.trigger_count, true); 184 load_ctx(); 185 /* override driver program */ 186 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count); 187 } 188 189 static void trigger_kprobe_setup(void) 190 { 191 setup_ctx(); 192 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true); 193 load_ctx(); 194 attach_bpf(ctx.skel->progs.bench_trigger_kprobe); 195 } 196 197 static void trigger_kretprobe_setup(void) 198 { 199 setup_ctx(); 200 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true); 201 load_ctx(); 202 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe); 203 } 204 205 static void trigger_kprobe_multi_setup(void) 206 { 207 setup_ctx(); 208 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true); 209 load_ctx(); 210 attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi); 211 } 212 213 static void trigger_kretprobe_multi_setup(void) 214 { 215 setup_ctx(); 216 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true); 217 load_ctx(); 218 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi); 219 } 220 221 static void trigger_fentry_setup(void) 222 { 223 setup_ctx(); 224 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true); 225 load_ctx(); 226 attach_bpf(ctx.skel->progs.bench_trigger_fentry); 227 } 228 229 static void trigger_fexit_setup(void) 230 { 231 setup_ctx(); 232 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true); 233 load_ctx(); 234 attach_bpf(ctx.skel->progs.bench_trigger_fexit); 235 } 236 237 static void trigger_fmodret_setup(void) 238 { 239 setup_ctx(); 240 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 241 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); 242 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true); 243 load_ctx(); 244 /* override driver program */ 245 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); 246 attach_bpf(ctx.skel->progs.bench_trigger_fmodret); 247 } 248 249 static void trigger_tp_setup(void) 250 { 251 setup_ctx(); 252 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 253 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); 254 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true); 255 load_ctx(); 256 /* override driver program */ 257 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); 258 attach_bpf(ctx.skel->progs.bench_trigger_tp); 259 } 260 261 static void trigger_rawtp_setup(void) 262 { 263 setup_ctx(); 264 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 265 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); 266 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true); 267 load_ctx(); 268 /* override driver program */ 269 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); 270 attach_bpf(ctx.skel->progs.bench_trigger_rawtp); 271 } 272 273 /* make sure call is not inlined and not avoided by compiler, so __weak and 274 * inline asm volatile in the body of the function 275 * 276 * There is a performance difference between uprobing at nop location vs other 277 * instructions. So use two different targets, one of which starts with nop 278 * and another doesn't. 279 * 280 * GCC doesn't generate stack setup preamble for these functions due to them 281 * having no input arguments and doing nothing in the body. 282 */ 283 __nocf_check __weak void uprobe_target_nop(void) 284 { 285 asm volatile ("nop"); 286 } 287 288 __weak void opaque_noop_func(void) 289 { 290 } 291 292 __nocf_check __weak int uprobe_target_push(void) 293 { 294 /* overhead of function call is negligible compared to uprobe 295 * triggering, so this shouldn't affect benchmark results much 296 */ 297 opaque_noop_func(); 298 return 1; 299 } 300 301 __nocf_check __weak void uprobe_target_ret(void) 302 { 303 asm volatile (""); 304 } 305 306 static void *uprobe_producer_count(void *input) 307 { 308 while (true) { 309 uprobe_target_nop(); 310 inc_counter(base_hits); 311 } 312 return NULL; 313 } 314 315 static void *uprobe_producer_nop(void *input) 316 { 317 while (true) 318 uprobe_target_nop(); 319 return NULL; 320 } 321 322 static void *uprobe_producer_push(void *input) 323 { 324 while (true) 325 uprobe_target_push(); 326 return NULL; 327 } 328 329 static void *uprobe_producer_ret(void *input) 330 { 331 while (true) 332 uprobe_target_ret(); 333 return NULL; 334 } 335 336 #ifdef __x86_64__ 337 __nocf_check __weak void uprobe_target_nop5(void) 338 { 339 asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00"); 340 } 341 342 static void *uprobe_producer_nop5(void *input) 343 { 344 while (true) 345 uprobe_target_nop5(); 346 return NULL; 347 } 348 #endif 349 350 static void usetup(bool use_retprobe, bool use_multi, void *target_addr) 351 { 352 size_t uprobe_offset; 353 struct bpf_link *link; 354 int err; 355 356 setup_libbpf(); 357 358 ctx.skel = trigger_bench__open(); 359 if (!ctx.skel) { 360 fprintf(stderr, "failed to open skeleton\n"); 361 exit(1); 362 } 363 364 if (use_multi) 365 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true); 366 else 367 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true); 368 369 err = trigger_bench__load(ctx.skel); 370 if (err) { 371 fprintf(stderr, "failed to load skeleton\n"); 372 exit(1); 373 } 374 375 uprobe_offset = get_uprobe_offset(target_addr); 376 if (use_multi) { 377 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts, 378 .retprobe = use_retprobe, 379 .cnt = 1, 380 .offsets = &uprobe_offset, 381 ); 382 link = bpf_program__attach_uprobe_multi( 383 ctx.skel->progs.bench_trigger_uprobe_multi, 384 -1 /* all PIDs */, "/proc/self/exe", NULL, &opts); 385 ctx.skel->links.bench_trigger_uprobe_multi = link; 386 } else { 387 link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, 388 use_retprobe, 389 -1 /* all PIDs */, 390 "/proc/self/exe", 391 uprobe_offset); 392 ctx.skel->links.bench_trigger_uprobe = link; 393 } 394 if (!link) { 395 fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe"); 396 exit(1); 397 } 398 } 399 400 static void usermode_count_setup(void) 401 { 402 ctx.usermode_counters = true; 403 } 404 405 static void uprobe_nop_setup(void) 406 { 407 usetup(false, false /* !use_multi */, &uprobe_target_nop); 408 } 409 410 static void uretprobe_nop_setup(void) 411 { 412 usetup(true, false /* !use_multi */, &uprobe_target_nop); 413 } 414 415 static void uprobe_push_setup(void) 416 { 417 usetup(false, false /* !use_multi */, &uprobe_target_push); 418 } 419 420 static void uretprobe_push_setup(void) 421 { 422 usetup(true, false /* !use_multi */, &uprobe_target_push); 423 } 424 425 static void uprobe_ret_setup(void) 426 { 427 usetup(false, false /* !use_multi */, &uprobe_target_ret); 428 } 429 430 static void uretprobe_ret_setup(void) 431 { 432 usetup(true, false /* !use_multi */, &uprobe_target_ret); 433 } 434 435 static void uprobe_multi_nop_setup(void) 436 { 437 usetup(false, true /* use_multi */, &uprobe_target_nop); 438 } 439 440 static void uretprobe_multi_nop_setup(void) 441 { 442 usetup(true, true /* use_multi */, &uprobe_target_nop); 443 } 444 445 static void uprobe_multi_push_setup(void) 446 { 447 usetup(false, true /* use_multi */, &uprobe_target_push); 448 } 449 450 static void uretprobe_multi_push_setup(void) 451 { 452 usetup(true, true /* use_multi */, &uprobe_target_push); 453 } 454 455 static void uprobe_multi_ret_setup(void) 456 { 457 usetup(false, true /* use_multi */, &uprobe_target_ret); 458 } 459 460 static void uretprobe_multi_ret_setup(void) 461 { 462 usetup(true, true /* use_multi */, &uprobe_target_ret); 463 } 464 465 #ifdef __x86_64__ 466 static void uprobe_nop5_setup(void) 467 { 468 usetup(false, false /* !use_multi */, &uprobe_target_nop5); 469 } 470 471 static void uretprobe_nop5_setup(void) 472 { 473 usetup(true, false /* !use_multi */, &uprobe_target_nop5); 474 } 475 476 static void uprobe_multi_nop5_setup(void) 477 { 478 usetup(false, true /* use_multi */, &uprobe_target_nop5); 479 } 480 481 static void uretprobe_multi_nop5_setup(void) 482 { 483 usetup(true, true /* use_multi */, &uprobe_target_nop5); 484 } 485 #endif 486 487 const struct bench bench_trig_syscall_count = { 488 .name = "trig-syscall-count", 489 .validate = trigger_validate, 490 .setup = trigger_syscall_count_setup, 491 .producer_thread = trigger_producer, 492 .measure = trigger_measure, 493 .report_progress = hits_drops_report_progress, 494 .report_final = hits_drops_report_final, 495 }; 496 497 /* batched (staying mostly in kernel) kprobe/fentry benchmarks */ 498 #define BENCH_TRIG_KERNEL(KIND, NAME) \ 499 const struct bench bench_trig_##KIND = { \ 500 .name = "trig-" NAME, \ 501 .setup = trigger_##KIND##_setup, \ 502 .producer_thread = trigger_producer_batch, \ 503 .measure = trigger_measure, \ 504 .report_progress = hits_drops_report_progress, \ 505 .report_final = hits_drops_report_final, \ 506 .argp = &bench_trigger_batch_argp, \ 507 } 508 509 BENCH_TRIG_KERNEL(kernel_count, "kernel-count"); 510 BENCH_TRIG_KERNEL(kprobe, "kprobe"); 511 BENCH_TRIG_KERNEL(kretprobe, "kretprobe"); 512 BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi"); 513 BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi"); 514 BENCH_TRIG_KERNEL(fentry, "fentry"); 515 BENCH_TRIG_KERNEL(fexit, "fexit"); 516 BENCH_TRIG_KERNEL(fmodret, "fmodret"); 517 BENCH_TRIG_KERNEL(tp, "tp"); 518 BENCH_TRIG_KERNEL(rawtp, "rawtp"); 519 520 /* uprobe benchmarks */ 521 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \ 522 const struct bench bench_trig_##KIND = { \ 523 .name = "trig-" NAME, \ 524 .validate = trigger_validate, \ 525 .setup = KIND##_setup, \ 526 .producer_thread = uprobe_producer_##PRODUCER, \ 527 .measure = trigger_measure, \ 528 .report_progress = hits_drops_report_progress, \ 529 .report_final = hits_drops_report_final, \ 530 } 531 532 BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count"); 533 BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop"); 534 BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push"); 535 BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret"); 536 BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop"); 537 BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push"); 538 BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret"); 539 BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop"); 540 BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push"); 541 BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret"); 542 BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop"); 543 BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push"); 544 BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret"); 545 #ifdef __x86_64__ 546 BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5"); 547 BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5"); 548 BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5"); 549 BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5"); 550 #endif 551