1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2020 Facebook */ 3 #define _GNU_SOURCE 4 #include <argp.h> 5 #include <unistd.h> 6 #include <stdint.h> 7 #include "bench.h" 8 #include "trigger_bench.skel.h" 9 #include "trace_helpers.h" 10 11 #define MAX_TRIG_BATCH_ITERS 1000 12 13 static struct { 14 __u32 batch_iters; 15 } args = { 16 .batch_iters = 100, 17 }; 18 19 enum { 20 ARG_TRIG_BATCH_ITERS = 7000, 21 }; 22 23 static const struct argp_option opts[] = { 24 { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0, 25 "Number of in-kernel iterations per one driver test run"}, 26 {}, 27 }; 28 29 static error_t parse_arg(int key, char *arg, struct argp_state *state) 30 { 31 long ret; 32 33 switch (key) { 34 case ARG_TRIG_BATCH_ITERS: 35 ret = strtol(arg, NULL, 10); 36 if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) { 37 fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n", 38 1, MAX_TRIG_BATCH_ITERS); 39 argp_usage(state); 40 } 41 args.batch_iters = ret; 42 break; 43 default: 44 return ARGP_ERR_UNKNOWN; 45 } 46 47 return 0; 48 } 49 50 const struct argp bench_trigger_batch_argp = { 51 .options = opts, 52 .parser = parse_arg, 53 }; 54 55 /* adjust slot shift in inc_hits() if changing */ 56 #define MAX_BUCKETS 256 57 58 #pragma GCC diagnostic ignored "-Wattributes" 59 60 /* BPF triggering benchmarks */ 61 static struct trigger_ctx { 62 struct trigger_bench *skel; 63 bool usermode_counters; 64 int driver_prog_fd; 65 } ctx; 66 67 static struct counter base_hits[MAX_BUCKETS]; 68 69 static __always_inline void inc_counter(struct counter *counters) 70 { 71 static __thread int tid = 0; 72 unsigned slot; 73 74 if (unlikely(tid == 0)) 75 tid = syscall(SYS_gettid); 76 77 /* multiplicative hashing, it's fast */ 78 slot = 2654435769U * tid; 79 slot >>= 24; 80 81 atomic_inc(&base_hits[slot].value); /* use highest byte as an index */ 82 } 83 84 static long sum_and_reset_counters(struct counter *counters) 85 { 86 int i; 87 long sum = 0; 88 89 for (i = 0; i < MAX_BUCKETS; i++) 90 sum += atomic_swap(&counters[i].value, 0); 91 return sum; 92 } 93 94 static void trigger_validate(void) 95 { 96 if (env.consumer_cnt != 0) { 97 fprintf(stderr, "benchmark doesn't support consumer!\n"); 98 exit(1); 99 } 100 } 101 102 static void *trigger_producer(void *input) 103 { 104 if (ctx.usermode_counters) { 105 while (true) { 106 (void)syscall(__NR_getpgid); 107 inc_counter(base_hits); 108 } 109 } else { 110 while (true) 111 (void)syscall(__NR_getpgid); 112 } 113 return NULL; 114 } 115 116 static void *trigger_producer_batch(void *input) 117 { 118 int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver); 119 120 while (true) 121 bpf_prog_test_run_opts(fd, NULL); 122 123 return NULL; 124 } 125 126 static void trigger_measure(struct bench_res *res) 127 { 128 if (ctx.usermode_counters) 129 res->hits = sum_and_reset_counters(base_hits); 130 else 131 res->hits = sum_and_reset_counters(ctx.skel->bss->hits); 132 } 133 134 static void setup_ctx(void) 135 { 136 setup_libbpf(); 137 138 ctx.skel = trigger_bench__open(); 139 if (!ctx.skel) { 140 fprintf(stderr, "failed to open skeleton\n"); 141 exit(1); 142 } 143 144 /* default "driver" BPF program */ 145 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true); 146 147 ctx.skel->rodata->batch_iters = args.batch_iters; 148 } 149 150 static void load_ctx(void) 151 { 152 int err; 153 154 err = trigger_bench__load(ctx.skel); 155 if (err) { 156 fprintf(stderr, "failed to open skeleton\n"); 157 exit(1); 158 } 159 } 160 161 static void attach_bpf(struct bpf_program *prog) 162 { 163 struct bpf_link *link; 164 165 link = bpf_program__attach(prog); 166 if (!link) { 167 fprintf(stderr, "failed to attach program!\n"); 168 exit(1); 169 } 170 } 171 172 static void trigger_syscall_count_setup(void) 173 { 174 ctx.usermode_counters = true; 175 } 176 177 /* Batched, staying mostly in-kernel triggering setups */ 178 static void trigger_kernel_count_setup(void) 179 { 180 setup_ctx(); 181 bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); 182 bpf_program__set_autoload(ctx.skel->progs.trigger_count, true); 183 load_ctx(); 184 /* override driver program */ 185 ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count); 186 } 187 188 static void trigger_kprobe_setup(void) 189 { 190 setup_ctx(); 191 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true); 192 load_ctx(); 193 attach_bpf(ctx.skel->progs.bench_trigger_kprobe); 194 } 195 196 static void trigger_kretprobe_setup(void) 197 { 198 setup_ctx(); 199 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true); 200 load_ctx(); 201 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe); 202 } 203 204 static void trigger_kprobe_multi_setup(void) 205 { 206 setup_ctx(); 207 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true); 208 load_ctx(); 209 attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi); 210 } 211 212 static void trigger_kretprobe_multi_setup(void) 213 { 214 setup_ctx(); 215 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true); 216 load_ctx(); 217 attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi); 218 } 219 220 static void trigger_fentry_setup(void) 221 { 222 setup_ctx(); 223 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true); 224 load_ctx(); 225 attach_bpf(ctx.skel->progs.bench_trigger_fentry); 226 } 227 228 static void trigger_fexit_setup(void) 229 { 230 setup_ctx(); 231 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true); 232 load_ctx(); 233 attach_bpf(ctx.skel->progs.bench_trigger_fexit); 234 } 235 236 /* make sure call is not inlined and not avoided by compiler, so __weak and 237 * inline asm volatile in the body of the function 238 * 239 * There is a performance difference between uprobing at nop location vs other 240 * instructions. So use two different targets, one of which starts with nop 241 * and another doesn't. 242 * 243 * GCC doesn't generate stack setup preample for these functions due to them 244 * having no input arguments and doing nothing in the body. 245 */ 246 __nocf_check __weak void uprobe_target_nop(void) 247 { 248 asm volatile ("nop"); 249 } 250 251 __weak void opaque_noop_func(void) 252 { 253 } 254 255 __nocf_check __weak int uprobe_target_push(void) 256 { 257 /* overhead of function call is negligible compared to uprobe 258 * triggering, so this shouldn't affect benchmark results much 259 */ 260 opaque_noop_func(); 261 return 1; 262 } 263 264 __nocf_check __weak void uprobe_target_ret(void) 265 { 266 asm volatile (""); 267 } 268 269 static void *uprobe_producer_count(void *input) 270 { 271 while (true) { 272 uprobe_target_nop(); 273 inc_counter(base_hits); 274 } 275 return NULL; 276 } 277 278 static void *uprobe_producer_nop(void *input) 279 { 280 while (true) 281 uprobe_target_nop(); 282 return NULL; 283 } 284 285 static void *uprobe_producer_push(void *input) 286 { 287 while (true) 288 uprobe_target_push(); 289 return NULL; 290 } 291 292 static void *uprobe_producer_ret(void *input) 293 { 294 while (true) 295 uprobe_target_ret(); 296 return NULL; 297 } 298 299 static void usetup(bool use_retprobe, void *target_addr) 300 { 301 size_t uprobe_offset; 302 struct bpf_link *link; 303 int err; 304 305 setup_libbpf(); 306 307 ctx.skel = trigger_bench__open(); 308 if (!ctx.skel) { 309 fprintf(stderr, "failed to open skeleton\n"); 310 exit(1); 311 } 312 313 bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true); 314 315 err = trigger_bench__load(ctx.skel); 316 if (err) { 317 fprintf(stderr, "failed to load skeleton\n"); 318 exit(1); 319 } 320 321 uprobe_offset = get_uprobe_offset(target_addr); 322 link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, 323 use_retprobe, 324 -1 /* all PIDs */, 325 "/proc/self/exe", 326 uprobe_offset); 327 if (!link) { 328 fprintf(stderr, "failed to attach uprobe!\n"); 329 exit(1); 330 } 331 ctx.skel->links.bench_trigger_uprobe = link; 332 } 333 334 static void usermode_count_setup(void) 335 { 336 ctx.usermode_counters = true; 337 } 338 339 static void uprobe_nop_setup(void) 340 { 341 usetup(false, &uprobe_target_nop); 342 } 343 344 static void uretprobe_nop_setup(void) 345 { 346 usetup(true, &uprobe_target_nop); 347 } 348 349 static void uprobe_push_setup(void) 350 { 351 usetup(false, &uprobe_target_push); 352 } 353 354 static void uretprobe_push_setup(void) 355 { 356 usetup(true, &uprobe_target_push); 357 } 358 359 static void uprobe_ret_setup(void) 360 { 361 usetup(false, &uprobe_target_ret); 362 } 363 364 static void uretprobe_ret_setup(void) 365 { 366 usetup(true, &uprobe_target_ret); 367 } 368 369 const struct bench bench_trig_syscall_count = { 370 .name = "trig-syscall-count", 371 .validate = trigger_validate, 372 .setup = trigger_syscall_count_setup, 373 .producer_thread = trigger_producer, 374 .measure = trigger_measure, 375 .report_progress = hits_drops_report_progress, 376 .report_final = hits_drops_report_final, 377 }; 378 379 /* batched (staying mostly in kernel) kprobe/fentry benchmarks */ 380 #define BENCH_TRIG_KERNEL(KIND, NAME) \ 381 const struct bench bench_trig_##KIND = { \ 382 .name = "trig-" NAME, \ 383 .setup = trigger_##KIND##_setup, \ 384 .producer_thread = trigger_producer_batch, \ 385 .measure = trigger_measure, \ 386 .report_progress = hits_drops_report_progress, \ 387 .report_final = hits_drops_report_final, \ 388 .argp = &bench_trigger_batch_argp, \ 389 } 390 391 BENCH_TRIG_KERNEL(kernel_count, "kernel-count"); 392 BENCH_TRIG_KERNEL(kprobe, "kprobe"); 393 BENCH_TRIG_KERNEL(kretprobe, "kretprobe"); 394 BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi"); 395 BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi"); 396 BENCH_TRIG_KERNEL(fentry, "fentry"); 397 BENCH_TRIG_KERNEL(fexit, "fexit"); 398 399 /* uprobe benchmarks */ 400 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \ 401 const struct bench bench_trig_##KIND = { \ 402 .name = "trig-" NAME, \ 403 .validate = trigger_validate, \ 404 .setup = KIND##_setup, \ 405 .producer_thread = uprobe_producer_##PRODUCER, \ 406 .measure = trigger_measure, \ 407 .report_progress = hits_drops_report_progress, \ 408 .report_final = hits_drops_report_final, \ 409 } 410 411 BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count"); 412 BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop"); 413 BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push"); 414 BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret"); 415 BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop"); 416 BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push"); 417 BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret"); 418