xref: /linux/tools/testing/selftests/bpf/benchs/bench_ringbufs.c (revision 015e7b0b0e8e51f7321ec2aafc1d7fc0a8a5536f)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2020 Facebook */
3 #include <asm/barrier.h>
4 #include <linux/perf_event.h>
5 #include <linux/ring_buffer.h>
6 #include <sys/epoll.h>
7 #include <sys/mman.h>
8 #include <argp.h>
9 #include <stdlib.h>
10 #include "bench.h"
11 #include "ringbuf_bench.skel.h"
12 #include "perfbuf_bench.skel.h"
13 
14 static struct {
15 	bool back2back;
16 	int batch_cnt;
17 	bool sampled;
18 	int sample_rate;
19 	int ringbuf_sz; /* per-ringbuf, in bytes */
20 	bool ringbuf_use_output; /* use slower output API */
21 	int perfbuf_sz; /* per-CPU size, in pages */
22 	bool overwrite;
23 	bool bench_producer;
24 } args = {
25 	.back2back = false,
26 	.batch_cnt = 500,
27 	.sampled = false,
28 	.sample_rate = 500,
29 	.ringbuf_sz = 512 * 1024,
30 	.ringbuf_use_output = false,
31 	.perfbuf_sz = 128,
32 	.overwrite = false,
33 	.bench_producer = false,
34 };
35 
36 enum {
37 	ARG_RB_BACK2BACK = 2000,
38 	ARG_RB_USE_OUTPUT = 2001,
39 	ARG_RB_BATCH_CNT = 2002,
40 	ARG_RB_SAMPLED = 2003,
41 	ARG_RB_SAMPLE_RATE = 2004,
42 	ARG_RB_OVERWRITE = 2005,
43 	ARG_RB_BENCH_PRODUCER = 2006,
44 };
45 
46 static const struct argp_option opts[] = {
47 	{ "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
48 	{ "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
49 	{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
50 	{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
51 	{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
52 	{ "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"},
53 	{ "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"},
54 	{},
55 };
56 
57 static error_t parse_arg(int key, char *arg, struct argp_state *state)
58 {
59 	switch (key) {
60 	case ARG_RB_BACK2BACK:
61 		args.back2back = true;
62 		break;
63 	case ARG_RB_USE_OUTPUT:
64 		args.ringbuf_use_output = true;
65 		break;
66 	case ARG_RB_BATCH_CNT:
67 		args.batch_cnt = strtol(arg, NULL, 10);
68 		if (args.batch_cnt < 0) {
69 			fprintf(stderr, "Invalid batch count.");
70 			argp_usage(state);
71 		}
72 		break;
73 	case ARG_RB_SAMPLED:
74 		args.sampled = true;
75 		break;
76 	case ARG_RB_SAMPLE_RATE:
77 		args.sample_rate = strtol(arg, NULL, 10);
78 		if (args.sample_rate < 0) {
79 			fprintf(stderr, "Invalid perfbuf sample rate.");
80 			argp_usage(state);
81 		}
82 		break;
83 	case ARG_RB_OVERWRITE:
84 		args.overwrite = true;
85 		break;
86 	case ARG_RB_BENCH_PRODUCER:
87 		args.bench_producer = true;
88 		break;
89 	default:
90 		return ARGP_ERR_UNKNOWN;
91 	}
92 	return 0;
93 }
94 
95 /* exported into benchmark runner */
96 const struct argp bench_ringbufs_argp = {
97 	.options = opts,
98 	.parser = parse_arg,
99 };
100 
101 /* RINGBUF-LIBBPF benchmark */
102 
103 static struct counter buf_hits;
104 
105 static inline void bufs_trigger_batch(void)
106 {
107 	(void)syscall(__NR_getpgid);
108 }
109 
110 static void bufs_validate(void)
111 {
112 	if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) {
113 		fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n");
114 		exit(1);
115 	}
116 
117 	if (args.overwrite && !args.bench_producer) {
118 		fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n");
119 		exit(1);
120 	}
121 
122 	if (args.bench_producer && env.consumer_cnt != 0) {
123 		fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n");
124 		exit(1);
125 	}
126 
127 	if (args.bench_producer && args.back2back) {
128 		fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n");
129 		exit(1);
130 	}
131 
132 	if (args.bench_producer && args.sampled) {
133 		fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n");
134 		exit(1);
135 	}
136 
137 	if (!args.bench_producer && env.consumer_cnt != 1) {
138 		fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n");
139 		exit(1);
140 	}
141 
142 	if (args.back2back && env.producer_cnt > 1) {
143 		fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
144 		exit(1);
145 	}
146 }
147 
148 static void *bufs_sample_producer(void *input)
149 {
150 	if (args.back2back) {
151 		/* initial batch to get everything started */
152 		bufs_trigger_batch();
153 		return NULL;
154 	}
155 
156 	while (true)
157 		bufs_trigger_batch();
158 	return NULL;
159 }
160 
161 static struct ringbuf_libbpf_ctx {
162 	struct ringbuf_bench *skel;
163 	struct ring_buffer *ringbuf;
164 } ringbuf_libbpf_ctx;
165 
166 static void ringbuf_libbpf_measure(struct bench_res *res)
167 {
168 	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
169 
170 	if (args.bench_producer)
171 		res->hits = atomic_swap(&ctx->skel->bss->hits, 0);
172 	else
173 		res->hits = atomic_swap(&buf_hits.value, 0);
174 	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
175 }
176 
177 static struct ringbuf_bench *ringbuf_setup_skeleton(void)
178 {
179 	__u32 flags;
180 	struct bpf_map *ringbuf;
181 	struct ringbuf_bench *skel;
182 
183 	setup_libbpf();
184 
185 	skel = ringbuf_bench__open();
186 	if (!skel) {
187 		fprintf(stderr, "failed to open skeleton\n");
188 		exit(1);
189 	}
190 
191 	skel->rodata->batch_cnt = args.batch_cnt;
192 	skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
193 	skel->rodata->bench_producer = args.bench_producer;
194 
195 	if (args.sampled)
196 		/* record data + header take 16 bytes */
197 		skel->rodata->wakeup_data_size = args.sample_rate * 16;
198 
199 	ringbuf = skel->maps.ringbuf;
200 	if (args.overwrite) {
201 		flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE;
202 		bpf_map__set_map_flags(ringbuf, flags);
203 	}
204 
205 	bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);
206 
207 	if (ringbuf_bench__load(skel)) {
208 		fprintf(stderr, "failed to load skeleton\n");
209 		exit(1);
210 	}
211 
212 	return skel;
213 }
214 
215 static int buf_process_sample(void *ctx, void *data, size_t len)
216 {
217 	atomic_inc(&buf_hits.value);
218 	return 0;
219 }
220 
221 static void ringbuf_libbpf_setup(void)
222 {
223 	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
224 	struct bpf_link *link;
225 	int map_fd;
226 
227 	ctx->skel = ringbuf_setup_skeleton();
228 
229 	map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
230 	ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample, NULL, NULL);
231 	if (!ctx->ringbuf) {
232 		fprintf(stderr, "failed to create ringbuf\n");
233 		exit(1);
234 	}
235 
236 	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
237 	if (!link) {
238 		fprintf(stderr, "failed to attach program!\n");
239 		exit(1);
240 	}
241 }
242 
243 static void *ringbuf_libbpf_consumer(void *input)
244 {
245 	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
246 
247 	while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
248 		if (args.back2back)
249 			bufs_trigger_batch();
250 	}
251 	fprintf(stderr, "ringbuf polling failed!\n");
252 	return NULL;
253 }
254 
255 /* RINGBUF-CUSTOM benchmark */
256 struct ringbuf_custom {
257 	__u64 *consumer_pos;
258 	__u64 *producer_pos;
259 	__u64 mask;
260 	void *data;
261 	int map_fd;
262 };
263 
264 static struct ringbuf_custom_ctx {
265 	struct ringbuf_bench *skel;
266 	struct ringbuf_custom ringbuf;
267 	int epoll_fd;
268 	struct epoll_event event;
269 } ringbuf_custom_ctx;
270 
271 static void ringbuf_custom_measure(struct bench_res *res)
272 {
273 	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
274 
275 	res->hits = atomic_swap(&buf_hits.value, 0);
276 	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
277 }
278 
279 static void ringbuf_custom_setup(void)
280 {
281 	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
282 	const size_t page_size = getpagesize();
283 	struct bpf_link *link;
284 	struct ringbuf_custom *r;
285 	void *tmp;
286 	int err;
287 
288 	ctx->skel = ringbuf_setup_skeleton();
289 
290 	ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
291 	if (ctx->epoll_fd < 0) {
292 		fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
293 		exit(1);
294 	}
295 
296 	r = &ctx->ringbuf;
297 	r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
298 	r->mask = args.ringbuf_sz - 1;
299 
300 	/* Map writable consumer page */
301 	tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
302 		   r->map_fd, 0);
303 	if (tmp == MAP_FAILED) {
304 		fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
305 		exit(1);
306 	}
307 	r->consumer_pos = tmp;
308 
309 	/* Map read-only producer page and data pages. */
310 	tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
311 		   r->map_fd, page_size);
312 	if (tmp == MAP_FAILED) {
313 		fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
314 		exit(1);
315 	}
316 	r->producer_pos = tmp;
317 	r->data = tmp + page_size;
318 
319 	ctx->event.events = EPOLLIN;
320 	err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
321 	if (err < 0) {
322 		fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
323 		exit(1);
324 	}
325 
326 	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
327 	if (!link) {
328 		fprintf(stderr, "failed to attach program\n");
329 		exit(1);
330 	}
331 }
332 
333 #define RINGBUF_BUSY_BIT (1 << 31)
334 #define RINGBUF_DISCARD_BIT (1 << 30)
335 #define RINGBUF_META_LEN 8
336 
337 static inline int roundup_len(__u32 len)
338 {
339 	/* clear out top 2 bits */
340 	len <<= 2;
341 	len >>= 2;
342 	/* add length prefix */
343 	len += RINGBUF_META_LEN;
344 	/* round up to 8 byte alignment */
345 	return (len + 7) / 8 * 8;
346 }
347 
348 static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
349 {
350 	unsigned long cons_pos, prod_pos;
351 	int *len_ptr, len;
352 	bool got_new_data;
353 
354 	cons_pos = smp_load_acquire(r->consumer_pos);
355 	while (true) {
356 		got_new_data = false;
357 		prod_pos = smp_load_acquire(r->producer_pos);
358 		while (cons_pos < prod_pos) {
359 			len_ptr = r->data + (cons_pos & r->mask);
360 			len = smp_load_acquire(len_ptr);
361 
362 			/* sample not committed yet, bail out for now */
363 			if (len & RINGBUF_BUSY_BIT)
364 				return;
365 
366 			got_new_data = true;
367 			cons_pos += roundup_len(len);
368 
369 			atomic_inc(&buf_hits.value);
370 		}
371 		if (got_new_data)
372 			smp_store_release(r->consumer_pos, cons_pos);
373 		else
374 			break;
375 	}
376 }
377 
378 static void *ringbuf_custom_consumer(void *input)
379 {
380 	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
381 	int cnt;
382 
383 	do {
384 		if (args.back2back)
385 			bufs_trigger_batch();
386 		cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
387 		if (cnt > 0)
388 			ringbuf_custom_process_ring(&ctx->ringbuf);
389 	} while (cnt >= 0);
390 	fprintf(stderr, "ringbuf polling failed!\n");
391 	return 0;
392 }
393 
394 /* PERFBUF-LIBBPF benchmark */
395 static struct perfbuf_libbpf_ctx {
396 	struct perfbuf_bench *skel;
397 	struct perf_buffer *perfbuf;
398 } perfbuf_libbpf_ctx;
399 
400 static void perfbuf_measure(struct bench_res *res)
401 {
402 	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
403 
404 	res->hits = atomic_swap(&buf_hits.value, 0);
405 	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
406 }
407 
408 static struct perfbuf_bench *perfbuf_setup_skeleton(void)
409 {
410 	struct perfbuf_bench *skel;
411 
412 	setup_libbpf();
413 
414 	skel = perfbuf_bench__open();
415 	if (!skel) {
416 		fprintf(stderr, "failed to open skeleton\n");
417 		exit(1);
418 	}
419 
420 	skel->rodata->batch_cnt = args.batch_cnt;
421 
422 	if (perfbuf_bench__load(skel)) {
423 		fprintf(stderr, "failed to load skeleton\n");
424 		exit(1);
425 	}
426 
427 	return skel;
428 }
429 
430 static enum bpf_perf_event_ret
431 perfbuf_process_sample_raw(void *input_ctx, int cpu,
432 			   struct perf_event_header *e)
433 {
434 	switch (e->type) {
435 	case PERF_RECORD_SAMPLE:
436 		atomic_inc(&buf_hits.value);
437 		break;
438 	case PERF_RECORD_LOST:
439 		break;
440 	default:
441 		return LIBBPF_PERF_EVENT_ERROR;
442 	}
443 	return LIBBPF_PERF_EVENT_CONT;
444 }
445 
446 static void perfbuf_libbpf_setup(void)
447 {
448 	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
449 	struct perf_event_attr attr;
450 	struct bpf_link *link;
451 
452 	ctx->skel = perfbuf_setup_skeleton();
453 
454 	memset(&attr, 0, sizeof(attr));
455 	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
456 	attr.type = PERF_TYPE_SOFTWARE;
457 	attr.sample_type = PERF_SAMPLE_RAW;
458 	/* notify only every Nth sample */
459 	if (args.sampled) {
460 		attr.sample_period = args.sample_rate;
461 		attr.wakeup_events = args.sample_rate;
462 	} else {
463 		attr.sample_period = 1;
464 		attr.wakeup_events = 1;
465 	}
466 
467 	if (args.sample_rate > args.batch_cnt) {
468 		fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
469 			args.sample_rate, args.batch_cnt);
470 		exit(1);
471 	}
472 
473 	ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
474 					    args.perfbuf_sz, &attr,
475 					    perfbuf_process_sample_raw, NULL, NULL);
476 	if (!ctx->perfbuf) {
477 		fprintf(stderr, "failed to create perfbuf\n");
478 		exit(1);
479 	}
480 
481 	link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
482 	if (!link) {
483 		fprintf(stderr, "failed to attach program\n");
484 		exit(1);
485 	}
486 }
487 
488 static void *perfbuf_libbpf_consumer(void *input)
489 {
490 	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
491 
492 	while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
493 		if (args.back2back)
494 			bufs_trigger_batch();
495 	}
496 	fprintf(stderr, "perfbuf polling failed!\n");
497 	return NULL;
498 }
499 
500 /* PERFBUF-CUSTOM benchmark */
501 
502 /* copies of internal libbpf definitions */
503 struct perf_cpu_buf {
504 	struct perf_buffer *pb;
505 	void *base; /* mmap()'ed memory */
506 	void *buf; /* for reconstructing segmented data */
507 	size_t buf_size;
508 	int fd;
509 	int cpu;
510 	int map_key;
511 };
512 
513 struct perf_buffer {
514 	perf_buffer_event_fn event_cb;
515 	perf_buffer_sample_fn sample_cb;
516 	perf_buffer_lost_fn lost_cb;
517 	void *ctx; /* passed into callbacks */
518 
519 	size_t page_size;
520 	size_t mmap_size;
521 	struct perf_cpu_buf **cpu_bufs;
522 	struct epoll_event *events;
523 	int cpu_cnt; /* number of allocated CPU buffers */
524 	int epoll_fd; /* perf event FD */
525 	int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
526 };
527 
528 static void *perfbuf_custom_consumer(void *input)
529 {
530 	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
531 	struct perf_buffer *pb = ctx->perfbuf;
532 	struct perf_cpu_buf *cpu_buf;
533 	struct perf_event_mmap_page *header;
534 	size_t mmap_mask = pb->mmap_size - 1;
535 	struct perf_event_header *ehdr;
536 	__u64 data_head, data_tail;
537 	size_t ehdr_size;
538 	void *base;
539 	int i, cnt;
540 
541 	while (true) {
542 		if (args.back2back)
543 			bufs_trigger_batch();
544 		cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
545 		if (cnt <= 0) {
546 			fprintf(stderr, "perf epoll failed: %d\n", -errno);
547 			exit(1);
548 		}
549 
550 		for (i = 0; i < cnt; ++i) {
551 			cpu_buf = pb->events[i].data.ptr;
552 			header = cpu_buf->base;
553 			base = ((void *)header) + pb->page_size;
554 
555 			data_head = ring_buffer_read_head(header);
556 			data_tail = header->data_tail;
557 			while (data_head != data_tail) {
558 				ehdr = base + (data_tail & mmap_mask);
559 				ehdr_size = ehdr->size;
560 
561 				if (ehdr->type == PERF_RECORD_SAMPLE)
562 					atomic_inc(&buf_hits.value);
563 
564 				data_tail += ehdr_size;
565 			}
566 			ring_buffer_write_tail(header, data_tail);
567 		}
568 	}
569 	return NULL;
570 }
571 
572 const struct bench bench_rb_libbpf = {
573 	.name = "rb-libbpf",
574 	.argp = &bench_ringbufs_argp,
575 	.validate = bufs_validate,
576 	.setup = ringbuf_libbpf_setup,
577 	.producer_thread = bufs_sample_producer,
578 	.consumer_thread = ringbuf_libbpf_consumer,
579 	.measure = ringbuf_libbpf_measure,
580 	.report_progress = hits_drops_report_progress,
581 	.report_final = hits_drops_report_final,
582 };
583 
584 const struct bench bench_rb_custom = {
585 	.name = "rb-custom",
586 	.argp = &bench_ringbufs_argp,
587 	.validate = bufs_validate,
588 	.setup = ringbuf_custom_setup,
589 	.producer_thread = bufs_sample_producer,
590 	.consumer_thread = ringbuf_custom_consumer,
591 	.measure = ringbuf_custom_measure,
592 	.report_progress = hits_drops_report_progress,
593 	.report_final = hits_drops_report_final,
594 };
595 
596 const struct bench bench_pb_libbpf = {
597 	.name = "pb-libbpf",
598 	.argp = &bench_ringbufs_argp,
599 	.validate = bufs_validate,
600 	.setup = perfbuf_libbpf_setup,
601 	.producer_thread = bufs_sample_producer,
602 	.consumer_thread = perfbuf_libbpf_consumer,
603 	.measure = perfbuf_measure,
604 	.report_progress = hits_drops_report_progress,
605 	.report_final = hits_drops_report_final,
606 };
607 
608 const struct bench bench_pb_custom = {
609 	.name = "pb-custom",
610 	.argp = &bench_ringbufs_argp,
611 	.validate = bufs_validate,
612 	.setup = perfbuf_libbpf_setup,
613 	.producer_thread = bufs_sample_producer,
614 	.consumer_thread = perfbuf_custom_consumer,
615 	.measure = perfbuf_measure,
616 	.report_progress = hits_drops_report_progress,
617 	.report_final = hits_drops_report_final,
618 };
619 
620