1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2020 Facebook */ 3 #include <asm/barrier.h> 4 #include <linux/perf_event.h> 5 #include <linux/ring_buffer.h> 6 #include <sys/epoll.h> 7 #include <sys/mman.h> 8 #include <argp.h> 9 #include <stdlib.h> 10 #include "bench.h" 11 #include "ringbuf_bench.skel.h" 12 #include "perfbuf_bench.skel.h" 13 14 static struct { 15 bool back2back; 16 int batch_cnt; 17 bool sampled; 18 int sample_rate; 19 int ringbuf_sz; /* per-ringbuf, in bytes */ 20 bool ringbuf_use_output; /* use slower output API */ 21 int perfbuf_sz; /* per-CPU size, in pages */ 22 bool overwrite; 23 bool bench_producer; 24 } args = { 25 .back2back = false, 26 .batch_cnt = 500, 27 .sampled = false, 28 .sample_rate = 500, 29 .ringbuf_sz = 512 * 1024, 30 .ringbuf_use_output = false, 31 .perfbuf_sz = 128, 32 .overwrite = false, 33 .bench_producer = false, 34 }; 35 36 enum { 37 ARG_RB_BACK2BACK = 2000, 38 ARG_RB_USE_OUTPUT = 2001, 39 ARG_RB_BATCH_CNT = 2002, 40 ARG_RB_SAMPLED = 2003, 41 ARG_RB_SAMPLE_RATE = 2004, 42 ARG_RB_OVERWRITE = 2005, 43 ARG_RB_BENCH_PRODUCER = 2006, 44 }; 45 46 static const struct argp_option opts[] = { 47 { "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"}, 48 { "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"}, 49 { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"}, 50 { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"}, 51 { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"}, 52 { "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"}, 53 { "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"}, 54 {}, 55 }; 56 57 static error_t parse_arg(int key, char *arg, struct argp_state *state) 58 { 59 switch (key) { 60 case ARG_RB_BACK2BACK: 61 args.back2back = true; 62 break; 63 case ARG_RB_USE_OUTPUT: 64 args.ringbuf_use_output = true; 65 break; 66 case ARG_RB_BATCH_CNT: 67 args.batch_cnt = strtol(arg, NULL, 10); 68 if (args.batch_cnt < 0) { 69 fprintf(stderr, "Invalid batch count."); 70 argp_usage(state); 71 } 72 break; 73 case ARG_RB_SAMPLED: 74 args.sampled = true; 75 break; 76 case ARG_RB_SAMPLE_RATE: 77 args.sample_rate = strtol(arg, NULL, 10); 78 if (args.sample_rate < 0) { 79 fprintf(stderr, "Invalid perfbuf sample rate."); 80 argp_usage(state); 81 } 82 break; 83 case ARG_RB_OVERWRITE: 84 args.overwrite = true; 85 break; 86 case ARG_RB_BENCH_PRODUCER: 87 args.bench_producer = true; 88 break; 89 default: 90 return ARGP_ERR_UNKNOWN; 91 } 92 return 0; 93 } 94 95 /* exported into benchmark runner */ 96 const struct argp bench_ringbufs_argp = { 97 .options = opts, 98 .parser = parse_arg, 99 }; 100 101 /* RINGBUF-LIBBPF benchmark */ 102 103 static struct counter buf_hits; 104 105 static inline void bufs_trigger_batch(void) 106 { 107 (void)syscall(__NR_getpgid); 108 } 109 110 static void bufs_validate(void) 111 { 112 if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) { 113 fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n"); 114 exit(1); 115 } 116 117 if (args.overwrite && !args.bench_producer) { 118 fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n"); 119 exit(1); 120 } 121 122 if (args.bench_producer && env.consumer_cnt != 0) { 123 fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n"); 124 exit(1); 125 } 126 127 if (args.bench_producer && args.back2back) { 128 fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n"); 129 exit(1); 130 } 131 132 if (args.bench_producer && args.sampled) { 133 fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n"); 134 exit(1); 135 } 136 137 if (!args.bench_producer && env.consumer_cnt != 1) { 138 fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n"); 139 exit(1); 140 } 141 142 if (args.back2back && env.producer_cnt > 1) { 143 fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n"); 144 exit(1); 145 } 146 } 147 148 static void *bufs_sample_producer(void *input) 149 { 150 if (args.back2back) { 151 /* initial batch to get everything started */ 152 bufs_trigger_batch(); 153 return NULL; 154 } 155 156 while (true) 157 bufs_trigger_batch(); 158 return NULL; 159 } 160 161 static struct ringbuf_libbpf_ctx { 162 struct ringbuf_bench *skel; 163 struct ring_buffer *ringbuf; 164 } ringbuf_libbpf_ctx; 165 166 static void ringbuf_libbpf_measure(struct bench_res *res) 167 { 168 struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; 169 170 if (args.bench_producer) 171 res->hits = atomic_swap(&ctx->skel->bss->hits, 0); 172 else 173 res->hits = atomic_swap(&buf_hits.value, 0); 174 res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); 175 } 176 177 static struct ringbuf_bench *ringbuf_setup_skeleton(void) 178 { 179 __u32 flags; 180 struct bpf_map *ringbuf; 181 struct ringbuf_bench *skel; 182 183 setup_libbpf(); 184 185 skel = ringbuf_bench__open(); 186 if (!skel) { 187 fprintf(stderr, "failed to open skeleton\n"); 188 exit(1); 189 } 190 191 skel->rodata->batch_cnt = args.batch_cnt; 192 skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0; 193 skel->rodata->bench_producer = args.bench_producer; 194 195 if (args.sampled) 196 /* record data + header take 16 bytes */ 197 skel->rodata->wakeup_data_size = args.sample_rate * 16; 198 199 ringbuf = skel->maps.ringbuf; 200 if (args.overwrite) { 201 flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE; 202 bpf_map__set_map_flags(ringbuf, flags); 203 } 204 205 bpf_map__set_max_entries(ringbuf, args.ringbuf_sz); 206 207 if (ringbuf_bench__load(skel)) { 208 fprintf(stderr, "failed to load skeleton\n"); 209 exit(1); 210 } 211 212 return skel; 213 } 214 215 static int buf_process_sample(void *ctx, void *data, size_t len) 216 { 217 atomic_inc(&buf_hits.value); 218 return 0; 219 } 220 221 static void ringbuf_libbpf_setup(void) 222 { 223 struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; 224 struct bpf_link *link; 225 int map_fd; 226 227 ctx->skel = ringbuf_setup_skeleton(); 228 229 map_fd = bpf_map__fd(ctx->skel->maps.ringbuf); 230 ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample, NULL, NULL); 231 if (!ctx->ringbuf) { 232 fprintf(stderr, "failed to create ringbuf\n"); 233 exit(1); 234 } 235 236 link = bpf_program__attach(ctx->skel->progs.bench_ringbuf); 237 if (!link) { 238 fprintf(stderr, "failed to attach program!\n"); 239 exit(1); 240 } 241 } 242 243 static void *ringbuf_libbpf_consumer(void *input) 244 { 245 struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; 246 247 while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) { 248 if (args.back2back) 249 bufs_trigger_batch(); 250 } 251 fprintf(stderr, "ringbuf polling failed!\n"); 252 return NULL; 253 } 254 255 /* RINGBUF-CUSTOM benchmark */ 256 struct ringbuf_custom { 257 __u64 *consumer_pos; 258 __u64 *producer_pos; 259 __u64 mask; 260 void *data; 261 int map_fd; 262 }; 263 264 static struct ringbuf_custom_ctx { 265 struct ringbuf_bench *skel; 266 struct ringbuf_custom ringbuf; 267 int epoll_fd; 268 struct epoll_event event; 269 } ringbuf_custom_ctx; 270 271 static void ringbuf_custom_measure(struct bench_res *res) 272 { 273 struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; 274 275 res->hits = atomic_swap(&buf_hits.value, 0); 276 res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); 277 } 278 279 static void ringbuf_custom_setup(void) 280 { 281 struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; 282 const size_t page_size = getpagesize(); 283 struct bpf_link *link; 284 struct ringbuf_custom *r; 285 void *tmp; 286 int err; 287 288 ctx->skel = ringbuf_setup_skeleton(); 289 290 ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC); 291 if (ctx->epoll_fd < 0) { 292 fprintf(stderr, "failed to create epoll fd: %d\n", -errno); 293 exit(1); 294 } 295 296 r = &ctx->ringbuf; 297 r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf); 298 r->mask = args.ringbuf_sz - 1; 299 300 /* Map writable consumer page */ 301 tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, 302 r->map_fd, 0); 303 if (tmp == MAP_FAILED) { 304 fprintf(stderr, "failed to mmap consumer page: %d\n", -errno); 305 exit(1); 306 } 307 r->consumer_pos = tmp; 308 309 /* Map read-only producer page and data pages. */ 310 tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED, 311 r->map_fd, page_size); 312 if (tmp == MAP_FAILED) { 313 fprintf(stderr, "failed to mmap data pages: %d\n", -errno); 314 exit(1); 315 } 316 r->producer_pos = tmp; 317 r->data = tmp + page_size; 318 319 ctx->event.events = EPOLLIN; 320 err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event); 321 if (err < 0) { 322 fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno); 323 exit(1); 324 } 325 326 link = bpf_program__attach(ctx->skel->progs.bench_ringbuf); 327 if (!link) { 328 fprintf(stderr, "failed to attach program\n"); 329 exit(1); 330 } 331 } 332 333 #define RINGBUF_BUSY_BIT (1 << 31) 334 #define RINGBUF_DISCARD_BIT (1 << 30) 335 #define RINGBUF_META_LEN 8 336 337 static inline int roundup_len(__u32 len) 338 { 339 /* clear out top 2 bits */ 340 len <<= 2; 341 len >>= 2; 342 /* add length prefix */ 343 len += RINGBUF_META_LEN; 344 /* round up to 8 byte alignment */ 345 return (len + 7) / 8 * 8; 346 } 347 348 static void ringbuf_custom_process_ring(struct ringbuf_custom *r) 349 { 350 unsigned long cons_pos, prod_pos; 351 int *len_ptr, len; 352 bool got_new_data; 353 354 cons_pos = smp_load_acquire(r->consumer_pos); 355 while (true) { 356 got_new_data = false; 357 prod_pos = smp_load_acquire(r->producer_pos); 358 while (cons_pos < prod_pos) { 359 len_ptr = r->data + (cons_pos & r->mask); 360 len = smp_load_acquire(len_ptr); 361 362 /* sample not committed yet, bail out for now */ 363 if (len & RINGBUF_BUSY_BIT) 364 return; 365 366 got_new_data = true; 367 cons_pos += roundup_len(len); 368 369 atomic_inc(&buf_hits.value); 370 } 371 if (got_new_data) 372 smp_store_release(r->consumer_pos, cons_pos); 373 else 374 break; 375 } 376 } 377 378 static void *ringbuf_custom_consumer(void *input) 379 { 380 struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; 381 int cnt; 382 383 do { 384 if (args.back2back) 385 bufs_trigger_batch(); 386 cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1); 387 if (cnt > 0) 388 ringbuf_custom_process_ring(&ctx->ringbuf); 389 } while (cnt >= 0); 390 fprintf(stderr, "ringbuf polling failed!\n"); 391 return 0; 392 } 393 394 /* PERFBUF-LIBBPF benchmark */ 395 static struct perfbuf_libbpf_ctx { 396 struct perfbuf_bench *skel; 397 struct perf_buffer *perfbuf; 398 } perfbuf_libbpf_ctx; 399 400 static void perfbuf_measure(struct bench_res *res) 401 { 402 struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; 403 404 res->hits = atomic_swap(&buf_hits.value, 0); 405 res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); 406 } 407 408 static struct perfbuf_bench *perfbuf_setup_skeleton(void) 409 { 410 struct perfbuf_bench *skel; 411 412 setup_libbpf(); 413 414 skel = perfbuf_bench__open(); 415 if (!skel) { 416 fprintf(stderr, "failed to open skeleton\n"); 417 exit(1); 418 } 419 420 skel->rodata->batch_cnt = args.batch_cnt; 421 422 if (perfbuf_bench__load(skel)) { 423 fprintf(stderr, "failed to load skeleton\n"); 424 exit(1); 425 } 426 427 return skel; 428 } 429 430 static enum bpf_perf_event_ret 431 perfbuf_process_sample_raw(void *input_ctx, int cpu, 432 struct perf_event_header *e) 433 { 434 switch (e->type) { 435 case PERF_RECORD_SAMPLE: 436 atomic_inc(&buf_hits.value); 437 break; 438 case PERF_RECORD_LOST: 439 break; 440 default: 441 return LIBBPF_PERF_EVENT_ERROR; 442 } 443 return LIBBPF_PERF_EVENT_CONT; 444 } 445 446 static void perfbuf_libbpf_setup(void) 447 { 448 struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; 449 struct perf_event_attr attr; 450 struct bpf_link *link; 451 452 ctx->skel = perfbuf_setup_skeleton(); 453 454 memset(&attr, 0, sizeof(attr)); 455 attr.config = PERF_COUNT_SW_BPF_OUTPUT; 456 attr.type = PERF_TYPE_SOFTWARE; 457 attr.sample_type = PERF_SAMPLE_RAW; 458 /* notify only every Nth sample */ 459 if (args.sampled) { 460 attr.sample_period = args.sample_rate; 461 attr.wakeup_events = args.sample_rate; 462 } else { 463 attr.sample_period = 1; 464 attr.wakeup_events = 1; 465 } 466 467 if (args.sample_rate > args.batch_cnt) { 468 fprintf(stderr, "sample rate %d is too high for given batch count %d\n", 469 args.sample_rate, args.batch_cnt); 470 exit(1); 471 } 472 473 ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf), 474 args.perfbuf_sz, &attr, 475 perfbuf_process_sample_raw, NULL, NULL); 476 if (!ctx->perfbuf) { 477 fprintf(stderr, "failed to create perfbuf\n"); 478 exit(1); 479 } 480 481 link = bpf_program__attach(ctx->skel->progs.bench_perfbuf); 482 if (!link) { 483 fprintf(stderr, "failed to attach program\n"); 484 exit(1); 485 } 486 } 487 488 static void *perfbuf_libbpf_consumer(void *input) 489 { 490 struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; 491 492 while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) { 493 if (args.back2back) 494 bufs_trigger_batch(); 495 } 496 fprintf(stderr, "perfbuf polling failed!\n"); 497 return NULL; 498 } 499 500 /* PERFBUF-CUSTOM benchmark */ 501 502 /* copies of internal libbpf definitions */ 503 struct perf_cpu_buf { 504 struct perf_buffer *pb; 505 void *base; /* mmap()'ed memory */ 506 void *buf; /* for reconstructing segmented data */ 507 size_t buf_size; 508 int fd; 509 int cpu; 510 int map_key; 511 }; 512 513 struct perf_buffer { 514 perf_buffer_event_fn event_cb; 515 perf_buffer_sample_fn sample_cb; 516 perf_buffer_lost_fn lost_cb; 517 void *ctx; /* passed into callbacks */ 518 519 size_t page_size; 520 size_t mmap_size; 521 struct perf_cpu_buf **cpu_bufs; 522 struct epoll_event *events; 523 int cpu_cnt; /* number of allocated CPU buffers */ 524 int epoll_fd; /* perf event FD */ 525 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ 526 }; 527 528 static void *perfbuf_custom_consumer(void *input) 529 { 530 struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; 531 struct perf_buffer *pb = ctx->perfbuf; 532 struct perf_cpu_buf *cpu_buf; 533 struct perf_event_mmap_page *header; 534 size_t mmap_mask = pb->mmap_size - 1; 535 struct perf_event_header *ehdr; 536 __u64 data_head, data_tail; 537 size_t ehdr_size; 538 void *base; 539 int i, cnt; 540 541 while (true) { 542 if (args.back2back) 543 bufs_trigger_batch(); 544 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1); 545 if (cnt <= 0) { 546 fprintf(stderr, "perf epoll failed: %d\n", -errno); 547 exit(1); 548 } 549 550 for (i = 0; i < cnt; ++i) { 551 cpu_buf = pb->events[i].data.ptr; 552 header = cpu_buf->base; 553 base = ((void *)header) + pb->page_size; 554 555 data_head = ring_buffer_read_head(header); 556 data_tail = header->data_tail; 557 while (data_head != data_tail) { 558 ehdr = base + (data_tail & mmap_mask); 559 ehdr_size = ehdr->size; 560 561 if (ehdr->type == PERF_RECORD_SAMPLE) 562 atomic_inc(&buf_hits.value); 563 564 data_tail += ehdr_size; 565 } 566 ring_buffer_write_tail(header, data_tail); 567 } 568 } 569 return NULL; 570 } 571 572 const struct bench bench_rb_libbpf = { 573 .name = "rb-libbpf", 574 .argp = &bench_ringbufs_argp, 575 .validate = bufs_validate, 576 .setup = ringbuf_libbpf_setup, 577 .producer_thread = bufs_sample_producer, 578 .consumer_thread = ringbuf_libbpf_consumer, 579 .measure = ringbuf_libbpf_measure, 580 .report_progress = hits_drops_report_progress, 581 .report_final = hits_drops_report_final, 582 }; 583 584 const struct bench bench_rb_custom = { 585 .name = "rb-custom", 586 .argp = &bench_ringbufs_argp, 587 .validate = bufs_validate, 588 .setup = ringbuf_custom_setup, 589 .producer_thread = bufs_sample_producer, 590 .consumer_thread = ringbuf_custom_consumer, 591 .measure = ringbuf_custom_measure, 592 .report_progress = hits_drops_report_progress, 593 .report_final = hits_drops_report_final, 594 }; 595 596 const struct bench bench_pb_libbpf = { 597 .name = "pb-libbpf", 598 .argp = &bench_ringbufs_argp, 599 .validate = bufs_validate, 600 .setup = perfbuf_libbpf_setup, 601 .producer_thread = bufs_sample_producer, 602 .consumer_thread = perfbuf_libbpf_consumer, 603 .measure = perfbuf_measure, 604 .report_progress = hits_drops_report_progress, 605 .report_final = hits_drops_report_final, 606 }; 607 608 const struct bench bench_pb_custom = { 609 .name = "pb-custom", 610 .argp = &bench_ringbufs_argp, 611 .validate = bufs_validate, 612 .setup = perfbuf_libbpf_setup, 613 .producer_thread = bufs_sample_producer, 614 .consumer_thread = perfbuf_custom_consumer, 615 .measure = perfbuf_measure, 616 .report_progress = hits_drops_report_progress, 617 .report_final = hits_drops_report_final, 618 }; 619 620