1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2020 Facebook */ 3 #define _GNU_SOURCE 4 #include <argp.h> 5 #include <unistd.h> 6 #include <stdint.h> 7 #include "bench.h" 8 #include "trigger_bench.skel.h" 9 #include "trace_helpers.h" 10 11 #define MAX_TRIG_BATCH_ITERS 1000 12 13 static struct { 14 __u32 batch_iters; 15 } args = { 16 .batch_iters = 100, 17 }; 18 19 enum { 20 ARG_TRIG_BATCH_ITERS = 7000, 21 }; 22 23 static const struct argp_option opts[] = { 24 { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0, 25 "Number of in-kernel iterations per one driver test run"}, 26 {}, 27 }; 28 29 static error_t parse_arg(int key, char *arg, struct argp_state *state) 30 { 31 long ret; 32 33 switch (key) { 34 case ARG_TRIG_BATCH_ITERS: 35 ret = strtol(arg, NULL, 10); 36 if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) { 37 fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n", 38 1, MAX_TRIG_BATCH_ITERS); 39 argp_usage(state); 40 } 41 args.batch_iters = ret; 42 break; 43 default: 44 return ARGP_ERR_UNKNOWN; 45 } 46 47 return 0; 48 } 49 50 const struct argp bench_trigger_batch_argp = { 51 .options = opts, 52 .parser = parse_arg, 53 }; 54 55 /* adjust slot shift in inc_hits() if changing */ 56 #define MAX_BUCKETS 256 57 58 #pragma GCC diagnostic ignored "-Wattributes" 59 60 /* BPF triggering benchmarks */ 61 static struct trigger_ctx { 62 struct trigger_bench *skel; 63 bool usermode_counters; 64 int driver_prog_fd; 65 } ctx; 66 67 static struct counter base_hits[MAX_BUCKETS]; 68 69 static __always_inline void inc_counter(struct counter *counters) 70 { 71 static __thread int tid = 0; 72 unsigned slot; 73 74 if (unlikely(tid == 0)) 75 tid = syscall(SYS_gettid); 76 77 /* multiplicative hashing, it's fast */ 78 slot = 2654435769U * tid; 79 slot >>= 24; 80 81 atomic_inc(&base_hits[slot].value); /* use highest byte as an index */ 82 } 83 84 static long sum_and_reset_counters(struct counter *counters) 85 { 86 int i; 87 long sum = 0; 88 89 for (i = 0; i < MAX_BUCKETS; i++) 90 sum += atomic_swap(&counters[i].value, 0); 91 return sum; 92 } 93 94 static void trigger_validate(void) 95 { 96 if (env.consumer_cnt != 0) { 97 fprintf(stderr, "benchmark doesn't support consumer!\n"); 98 exit(1); 99 } 100 } 101 102 static void *trigger_producer(void *input) 103 { 104 if (ctx.usermode_counters) { 105 while (true) { 106 (void)syscall(__NR_getpgid); 107 inc_counter(base_hits); 108 } 109 } else { 110 while (true) 111 (void)syscall(__NR_getpgid); 112 } 113 return NULL; 114 } 115 116 static void *trigger_producer_batch(void *input) 117 { 118 int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver); 119 120 while (true) 121 bpf_prog_test_run_opts(fd, NULL); 122 123 return NULL; 124 } 125 126 static void trigger_measure(struct bench_res *res) 127 { 128 if (ctx.usermode_counters) 129 res->hits = sum_and_reset_counters(base_hits); 130 else 131 res->hits = sum_and_reset_counters(ctx.skel->bss->hits); 132 } 133 134 static void setup_ctx(void) 135 { 136 setup_libbpf(); 137 138 ctx.skel = trigger_bench__open(); 139 if (!ctx.skel) { 140 fprintf(stderr, "failed to open skeleton\n"); 141 exit(1); 142 } 143 144 /* default "driver" BPF program */ 145 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true); 146 147 ctx.skel->rodata->batch_iters = args.batch_iters; 148 } 149 150 static void load_ctx(void) 151 { 152 int err; 153 154 err = trigger_bench__load(ctx.skel); 155 if (err) { 156 fprintf(stderr, "failed to open skeleton\n"); 157 exit(1); 158 } 159 } 160 161 static void attach_bpf(struct bpf_program *prog) 162 { 163 struct bpf_link *link; 164 165 link = bpf_program__attach(prog); 166 if (!link) { 167 fprintf(stderr, "failed to attach program!\n"); 168 exit(1); 169 } 170 } 171 172 static void trigger_syscall_count_setup(void) 173 { 174 ctx.usermode_counters = true; 175 } 176 177 /* Batched, staying mostly in-kernel triggering setups */ 178 static void trigger_kernel_count_setup(void) 179 { 180 setup_ctx(); 181 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 182 bpf_program__set_autoload(ctx.skel->progs.trigger_count, true); 183 load_ctx(); 184 /* override driver program */ 185 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count); 186 } 187 188 static void trigger_kprobe_setup(void) 189 { 190 setup_ctx(); 191 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true); 192 load_ctx(); 193 attach_bpf(ctx.skel->progs.bench_trigger_kprobe); 194 } 195 196 static void trigger_kretprobe_setup(void) 197 { 198 setup_ctx(); 199 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true); 200 load_ctx(); 201 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe); 202 } 203 204 static void trigger_kprobe_multi_setup(void) 205 { 206 setup_ctx(); 207 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true); 208 load_ctx(); 209 attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi); 210 } 211 212 static void trigger_kretprobe_multi_setup(void) 213 { 214 setup_ctx(); 215 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true); 216 load_ctx(); 217 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi); 218 } 219 220 static void trigger_fentry_setup(void) 221 { 222 setup_ctx(); 223 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true); 224 load_ctx(); 225 attach_bpf(ctx.skel->progs.bench_trigger_fentry); 226 } 227 228 static void trigger_fexit_setup(void) 229 { 230 setup_ctx(); 231 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true); 232 load_ctx(); 233 attach_bpf(ctx.skel->progs.bench_trigger_fexit); 234 } 235 236 static void trigger_fmodret_setup(void) 237 { 238 setup_ctx(); 239 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 240 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); 241 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true); 242 load_ctx(); 243 /* override driver program */ 244 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); 245 attach_bpf(ctx.skel->progs.bench_trigger_fmodret); 246 } 247 248 static void trigger_tp_setup(void) 249 { 250 setup_ctx(); 251 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 252 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); 253 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true); 254 load_ctx(); 255 /* override driver program */ 256 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); 257 attach_bpf(ctx.skel->progs.bench_trigger_tp); 258 } 259 260 static void trigger_rawtp_setup(void) 261 { 262 setup_ctx(); 263 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 264 bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); 265 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true); 266 load_ctx(); 267 /* override driver program */ 268 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); 269 attach_bpf(ctx.skel->progs.bench_trigger_rawtp); 270 } 271 272 /* make sure call is not inlined and not avoided by compiler, so __weak and 273 * inline asm volatile in the body of the function 274 * 275 * There is a performance difference between uprobing at nop location vs other 276 * instructions. So use two different targets, one of which starts with nop 277 * and another doesn't. 278 * 279 * GCC doesn't generate stack setup preamble for these functions due to them 280 * having no input arguments and doing nothing in the body. 281 */ 282 __nocf_check __weak void uprobe_target_nop(void) 283 { 284 asm volatile ("nop"); 285 } 286 287 __weak void opaque_noop_func(void) 288 { 289 } 290 291 __nocf_check __weak int uprobe_target_push(void) 292 { 293 /* overhead of function call is negligible compared to uprobe 294 * triggering, so this shouldn't affect benchmark results much 295 */ 296 opaque_noop_func(); 297 return 1; 298 } 299 300 __nocf_check __weak void uprobe_target_ret(void) 301 { 302 asm volatile (""); 303 } 304 305 static void *uprobe_producer_count(void *input) 306 { 307 while (true) { 308 uprobe_target_nop(); 309 inc_counter(base_hits); 310 } 311 return NULL; 312 } 313 314 static void *uprobe_producer_nop(void *input) 315 { 316 while (true) 317 uprobe_target_nop(); 318 return NULL; 319 } 320 321 static void *uprobe_producer_push(void *input) 322 { 323 while (true) 324 uprobe_target_push(); 325 return NULL; 326 } 327 328 static void *uprobe_producer_ret(void *input) 329 { 330 while (true) 331 uprobe_target_ret(); 332 return NULL; 333 } 334 335 static void usetup(bool use_retprobe, bool use_multi, void *target_addr) 336 { 337 size_t uprobe_offset; 338 struct bpf_link *link; 339 int err; 340 341 setup_libbpf(); 342 343 ctx.skel = trigger_bench__open(); 344 if (!ctx.skel) { 345 fprintf(stderr, "failed to open skeleton\n"); 346 exit(1); 347 } 348 349 if (use_multi) 350 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true); 351 else 352 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true); 353 354 err = trigger_bench__load(ctx.skel); 355 if (err) { 356 fprintf(stderr, "failed to load skeleton\n"); 357 exit(1); 358 } 359 360 uprobe_offset = get_uprobe_offset(target_addr); 361 if (use_multi) { 362 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts, 363 .retprobe = use_retprobe, 364 .cnt = 1, 365 .offsets = &uprobe_offset, 366 ); 367 link = bpf_program__attach_uprobe_multi( 368 ctx.skel->progs.bench_trigger_uprobe_multi, 369 -1 /* all PIDs */, "/proc/self/exe", NULL, &opts); 370 ctx.skel->links.bench_trigger_uprobe_multi = link; 371 } else { 372 link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, 373 use_retprobe, 374 -1 /* all PIDs */, 375 "/proc/self/exe", 376 uprobe_offset); 377 ctx.skel->links.bench_trigger_uprobe = link; 378 } 379 if (!link) { 380 fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe"); 381 exit(1); 382 } 383 } 384 385 static void usermode_count_setup(void) 386 { 387 ctx.usermode_counters = true; 388 } 389 390 static void uprobe_nop_setup(void) 391 { 392 usetup(false, false /* !use_multi */, &uprobe_target_nop); 393 } 394 395 static void uretprobe_nop_setup(void) 396 { 397 usetup(true, false /* !use_multi */, &uprobe_target_nop); 398 } 399 400 static void uprobe_push_setup(void) 401 { 402 usetup(false, false /* !use_multi */, &uprobe_target_push); 403 } 404 405 static void uretprobe_push_setup(void) 406 { 407 usetup(true, false /* !use_multi */, &uprobe_target_push); 408 } 409 410 static void uprobe_ret_setup(void) 411 { 412 usetup(false, false /* !use_multi */, &uprobe_target_ret); 413 } 414 415 static void uretprobe_ret_setup(void) 416 { 417 usetup(true, false /* !use_multi */, &uprobe_target_ret); 418 } 419 420 static void uprobe_multi_nop_setup(void) 421 { 422 usetup(false, true /* use_multi */, &uprobe_target_nop); 423 } 424 425 static void uretprobe_multi_nop_setup(void) 426 { 427 usetup(true, true /* use_multi */, &uprobe_target_nop); 428 } 429 430 static void uprobe_multi_push_setup(void) 431 { 432 usetup(false, true /* use_multi */, &uprobe_target_push); 433 } 434 435 static void uretprobe_multi_push_setup(void) 436 { 437 usetup(true, true /* use_multi */, &uprobe_target_push); 438 } 439 440 static void uprobe_multi_ret_setup(void) 441 { 442 usetup(false, true /* use_multi */, &uprobe_target_ret); 443 } 444 445 static void uretprobe_multi_ret_setup(void) 446 { 447 usetup(true, true /* use_multi */, &uprobe_target_ret); 448 } 449 450 const struct bench bench_trig_syscall_count = { 451 .name = "trig-syscall-count", 452 .validate = trigger_validate, 453 .setup = trigger_syscall_count_setup, 454 .producer_thread = trigger_producer, 455 .measure = trigger_measure, 456 .report_progress = hits_drops_report_progress, 457 .report_final = hits_drops_report_final, 458 }; 459 460 /* batched (staying mostly in kernel) kprobe/fentry benchmarks */ 461 #define BENCH_TRIG_KERNEL(KIND, NAME) \ 462 const struct bench bench_trig_##KIND = { \ 463 .name = "trig-" NAME, \ 464 .setup = trigger_##KIND##_setup, \ 465 .producer_thread = trigger_producer_batch, \ 466 .measure = trigger_measure, \ 467 .report_progress = hits_drops_report_progress, \ 468 .report_final = hits_drops_report_final, \ 469 .argp = &bench_trigger_batch_argp, \ 470 } 471 472 BENCH_TRIG_KERNEL(kernel_count, "kernel-count"); 473 BENCH_TRIG_KERNEL(kprobe, "kprobe"); 474 BENCH_TRIG_KERNEL(kretprobe, "kretprobe"); 475 BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi"); 476 BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi"); 477 BENCH_TRIG_KERNEL(fentry, "fentry"); 478 BENCH_TRIG_KERNEL(fexit, "fexit"); 479 BENCH_TRIG_KERNEL(fmodret, "fmodret"); 480 BENCH_TRIG_KERNEL(tp, "tp"); 481 BENCH_TRIG_KERNEL(rawtp, "rawtp"); 482 483 /* uprobe benchmarks */ 484 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \ 485 const struct bench bench_trig_##KIND = { \ 486 .name = "trig-" NAME, \ 487 .validate = trigger_validate, \ 488 .setup = KIND##_setup, \ 489 .producer_thread = uprobe_producer_##PRODUCER, \ 490 .measure = trigger_measure, \ 491 .report_progress = hits_drops_report_progress, \ 492 .report_final = hits_drops_report_final, \ 493 } 494 495 BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count"); 496 BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop"); 497 BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push"); 498 BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret"); 499 BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop"); 500 BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push"); 501 BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret"); 502 BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop"); 503 BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push"); 504 BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret"); 505 BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop"); 506 BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push"); 507 BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret"); 508