xref: /linux/tools/testing/selftests/bpf/benchs/bench_sockmap.c (revision 90b83efa6701656e02c86e7df2cb1765ea602d07)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include <error.h>
4 #include <sys/types.h>
5 #include <sys/socket.h>
6 #include <netinet/in.h>
7 #include <sys/sendfile.h>
8 #include <arpa/inet.h>
9 #include <fcntl.h>
10 #include <argp.h>
11 #include "bench.h"
12 #include "bench_sockmap_prog.skel.h"
13 
14 #define FILE_SIZE (128 * 1024)
15 #define DATA_REPEAT_SIZE 10
16 
17 static const char snd_data[DATA_REPEAT_SIZE] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
18 
19 /* c1 <-> [p1, p2] <-> c2
20  * RX bench(BPF_SK_SKB_STREAM_VERDICT):
21  *	ARG_FW_RX_PASS:
22  *		send(p2) -> recv(c2) -> bpf skb passthrough -> recv(c2)
23  *	ARG_FW_RX_VERDICT_EGRESS:
24  *		send(c1) -> verdict skb to tx queuec of p2 -> recv(c2)
25  *	ARG_FW_RX_VERDICT_INGRESS:
26  *		send(c1) -> verdict skb to rx queuec of c2 -> recv(c2)
27  *
28  * TX bench(BPF_SK_MSG_VERDIC):
29  *	ARG_FW_TX_PASS:
30  *		send(p2) -> bpf msg passthrough -> send(p2) -> recv(c2)
31  *	ARG_FW_TX_VERDICT_INGRESS:
32  *		send(p2) -> verdict msg to rx queue of c2 -> recv(c2)
33  *	ARG_FW_TX_VERDICT_EGRESS:
34  *		send(p1) -> verdict msg to tx queue of p2 -> recv(c2)
35  */
36 enum SOCKMAP_ARG_FLAG {
37 	ARG_FW_RX_NORMAL = 11000,
38 	ARG_FW_RX_PASS,
39 	ARG_FW_RX_VERDICT_EGRESS,
40 	ARG_FW_RX_VERDICT_INGRESS,
41 	ARG_FW_TX_NORMAL,
42 	ARG_FW_TX_PASS,
43 	ARG_FW_TX_VERDICT_INGRESS,
44 	ARG_FW_TX_VERDICT_EGRESS,
45 	ARG_CTL_RX_STRP,
46 	ARG_CONSUMER_DELAY_TIME,
47 	ARG_PRODUCER_DURATION,
48 };
49 
50 #define TXMODE_NORMAL()				\
51 	((ctx.mode) == ARG_FW_TX_NORMAL)
52 
53 #define TXMODE_BPF_INGRESS()			\
54 	((ctx.mode) == ARG_FW_TX_VERDICT_INGRESS)
55 
56 #define TXMODE_BPF_EGRESS()			\
57 	((ctx.mode) == ARG_FW_TX_VERDICT_EGRESS)
58 
59 #define TXMODE_BPF_PASS()			\
60 	((ctx.mode) == ARG_FW_TX_PASS)
61 
62 #define TXMODE_BPF() (				\
63 	TXMODE_BPF_PASS() ||			\
64 	TXMODE_BPF_INGRESS() ||			\
65 	TXMODE_BPF_EGRESS())
66 
67 #define TXMODE() (				\
68 	TXMODE_NORMAL() ||			\
69 	TXMODE_BPF())
70 
71 #define RXMODE_NORMAL()				\
72 	((ctx.mode) == ARG_FW_RX_NORMAL)
73 
74 #define RXMODE_BPF_PASS()			\
75 	((ctx.mode) == ARG_FW_RX_PASS)
76 
77 #define RXMODE_BPF_VERDICT_EGRESS()		\
78 	((ctx.mode) == ARG_FW_RX_VERDICT_EGRESS)
79 
80 #define RXMODE_BPF_VERDICT_INGRESS()		\
81 	((ctx.mode) == ARG_FW_RX_VERDICT_INGRESS)
82 
83 #define RXMODE_BPF_VERDICT() (			\
84 	RXMODE_BPF_VERDICT_INGRESS() ||		\
85 	RXMODE_BPF_VERDICT_EGRESS())
86 
87 #define RXMODE_BPF() (				\
88 	RXMODE_BPF_PASS() ||			\
89 	RXMODE_BPF_VERDICT())
90 
91 #define RXMODE() (				\
92 	RXMODE_NORMAL() ||			\
93 	RXMODE_BPF())
94 
95 static struct socmap_ctx {
96 	struct bench_sockmap_prog *skel;
97 	enum SOCKMAP_ARG_FLAG mode;
98 	#define c1	fds[0]
99 	#define p1	fds[1]
100 	#define c2	fds[2]
101 	#define p2	fds[3]
102 	#define sfd	fds[4]
103 	int		fds[5];
104 	long		send_calls;
105 	long		read_calls;
106 	long		prod_send;
107 	long		user_read;
108 	int		file_size;
109 	int		delay_consumer;
110 	int		prod_run_time;
111 	int		strp_size;
112 } ctx = {
113 	.prod_send	= 0,
114 	.user_read	= 0,
115 	.file_size	= FILE_SIZE,
116 	.mode		= ARG_FW_RX_VERDICT_EGRESS,
117 	.fds		= {0},
118 	.delay_consumer = 0,
119 	.prod_run_time	= 0,
120 	.strp_size	= 0,
121 };
122 
123 static void bench_sockmap_prog_destroy(void)
124 {
125 	int i;
126 
127 	for (i = 0; i < sizeof(ctx.fds); i++) {
128 		if (ctx.fds[0] > 0)
129 			close(ctx.fds[i]);
130 	}
131 
132 	bench_sockmap_prog__destroy(ctx.skel);
133 }
134 
135 static void init_addr(struct sockaddr_storage *ss,
136 		      socklen_t *len)
137 {
138 	struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
139 
140 	addr4->sin_family = AF_INET;
141 	addr4->sin_port = 0;
142 	addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
143 	*len = sizeof(*addr4);
144 }
145 
146 static bool set_non_block(int fd, bool blocking)
147 {
148 	int flags = fcntl(fd, F_GETFL, 0);
149 
150 	if (flags == -1)
151 		return false;
152 	flags = blocking ? (flags | O_NONBLOCK) : (flags & ~O_NONBLOCK);
153 	return (fcntl(fd, F_SETFL, flags) == 0);
154 }
155 
156 static int create_pair(int *c, int *p, int type)
157 {
158 	struct sockaddr_storage addr;
159 	int err, cfd, pfd;
160 	socklen_t addr_len = sizeof(struct sockaddr_storage);
161 
162 	err = getsockname(ctx.sfd, (struct sockaddr *)&addr, &addr_len);
163 	if (err) {
164 		fprintf(stderr, "getsockname error %d\n", errno);
165 		return err;
166 	}
167 	cfd = socket(AF_INET, type, 0);
168 	if (cfd < 0) {
169 		fprintf(stderr, "socket error %d\n", errno);
170 		return err;
171 	}
172 
173 	err = connect(cfd, (struct sockaddr *)&addr, addr_len);
174 	if (err && errno != EINPROGRESS) {
175 		fprintf(stderr, "connect error %d\n", errno);
176 		return err;
177 	}
178 
179 	pfd = accept(ctx.sfd, NULL, NULL);
180 	if (pfd < 0) {
181 		fprintf(stderr, "accept error %d\n", errno);
182 		return err;
183 	}
184 	*c = cfd;
185 	*p = pfd;
186 	return 0;
187 }
188 
189 static int create_sockets(void)
190 {
191 	struct sockaddr_storage addr;
192 	int err, one = 1;
193 	socklen_t addr_len;
194 
195 	init_addr(&addr, &addr_len);
196 	ctx.sfd = socket(AF_INET, SOCK_STREAM, 0);
197 	if (ctx.sfd < 0) {
198 		fprintf(stderr, "socket error:%d\n", errno);
199 		return ctx.sfd;
200 	}
201 	err = setsockopt(ctx.sfd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
202 	if (err) {
203 		fprintf(stderr, "setsockopt error:%d\n", errno);
204 		return err;
205 	}
206 
207 	err = bind(ctx.sfd, (struct sockaddr *)&addr, addr_len);
208 	if (err) {
209 		fprintf(stderr, "bind error:%d\n", errno);
210 		return err;
211 	}
212 
213 	err = listen(ctx.sfd, SOMAXCONN);
214 	if (err) {
215 		fprintf(stderr, "listen error:%d\n", errno);
216 		return err;
217 	}
218 
219 	err = create_pair(&ctx.c1, &ctx.p1, SOCK_STREAM);
220 	if (err) {
221 		fprintf(stderr, "create_pair 1 error\n");
222 		return err;
223 	}
224 
225 	err = create_pair(&ctx.c2, &ctx.p2, SOCK_STREAM);
226 	if (err) {
227 		fprintf(stderr, "create_pair 2 error\n");
228 		return err;
229 	}
230 	printf("create socket fd c1:%d p1:%d c2:%d p2:%d\n",
231 	       ctx.c1, ctx.p1, ctx.c2, ctx.p2);
232 	return 0;
233 }
234 
235 static void validate(void)
236 {
237 	if (env.consumer_cnt != 2 || env.producer_cnt != 1 ||
238 	    !env.affinity)
239 		goto err;
240 	return;
241 err:
242 	fprintf(stderr, "argument '-c 2 -p 1 -a' is necessary");
243 	exit(1);
244 }
245 
246 static int setup_rx_sockmap(void)
247 {
248 	int verdict, pass, parser, map;
249 	int zero = 0, one = 1;
250 	int err;
251 
252 	parser = bpf_program__fd(ctx.skel->progs.prog_skb_parser);
253 	verdict = bpf_program__fd(ctx.skel->progs.prog_skb_verdict);
254 	pass = bpf_program__fd(ctx.skel->progs.prog_skb_pass);
255 	map = bpf_map__fd(ctx.skel->maps.sock_map_rx);
256 
257 	if (ctx.strp_size != 0) {
258 		ctx.skel->bss->pkt_size = ctx.strp_size;
259 		err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
260 		if (err)
261 			return err;
262 	}
263 
264 	if (RXMODE_BPF_VERDICT())
265 		err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
266 	else if (RXMODE_BPF_PASS())
267 		err = bpf_prog_attach(pass, map, BPF_SK_SKB_STREAM_VERDICT, 0);
268 	if (err)
269 		return err;
270 
271 	if (RXMODE_BPF_PASS())
272 		return bpf_map_update_elem(map, &zero, &ctx.c2, BPF_NOEXIST);
273 
274 	err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
275 	if (err < 0)
276 		return err;
277 
278 	if (RXMODE_BPF_VERDICT_INGRESS()) {
279 		ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
280 		err = bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
281 	} else {
282 		err = bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
283 	}
284 	if (err < 0)
285 		return err;
286 
287 	return 0;
288 }
289 
290 static int setup_tx_sockmap(void)
291 {
292 	int zero = 0, one = 1;
293 	int prog, map;
294 	int err;
295 
296 	map = bpf_map__fd(ctx.skel->maps.sock_map_tx);
297 	prog = TXMODE_BPF_PASS() ?
298 		bpf_program__fd(ctx.skel->progs.prog_skmsg_pass) :
299 		bpf_program__fd(ctx.skel->progs.prog_skmsg_verdict);
300 
301 	err = bpf_prog_attach(prog, map, BPF_SK_MSG_VERDICT, 0);
302 	if (err)
303 		return err;
304 
305 	if (TXMODE_BPF_EGRESS()) {
306 		err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
307 		err |= bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
308 	} else {
309 		ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
310 		err = bpf_map_update_elem(map, &zero, &ctx.p2, BPF_NOEXIST);
311 		err |= bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
312 	}
313 
314 	if (err < 0)
315 		return err;
316 
317 	return 0;
318 }
319 
320 static void setup(void)
321 {
322 	int err;
323 
324 	ctx.skel = bench_sockmap_prog__open_and_load();
325 	if (!ctx.skel) {
326 		fprintf(stderr, "error loading skel\n");
327 		exit(1);
328 	}
329 
330 	if (create_sockets()) {
331 		fprintf(stderr, "create_net_mode error\n");
332 		goto err;
333 	}
334 
335 	if (RXMODE_BPF()) {
336 		err = setup_rx_sockmap();
337 		if (err) {
338 			fprintf(stderr, "setup_rx_sockmap error:%d\n", err);
339 			goto err;
340 		}
341 	} else if (TXMODE_BPF()) {
342 		err = setup_tx_sockmap();
343 		if (err) {
344 			fprintf(stderr, "setup_tx_sockmap error:%d\n", err);
345 			goto err;
346 		}
347 	} else {
348 		fprintf(stderr, "unknown sockmap bench mode: %d\n", ctx.mode);
349 		goto err;
350 	}
351 
352 	return;
353 
354 err:
355 	bench_sockmap_prog_destroy();
356 	exit(1);
357 }
358 
359 static void measure(struct bench_res *res)
360 {
361 	res->drops = atomic_swap(&ctx.prod_send, 0);
362 	res->hits = atomic_swap(&ctx.skel->bss->process_byte, 0);
363 	res->false_hits = atomic_swap(&ctx.user_read, 0);
364 	res->important_hits = atomic_swap(&ctx.send_calls, 0);
365 	res->important_hits |= atomic_swap(&ctx.read_calls, 0) << 32;
366 }
367 
368 static void verify_data(int *check_pos, char *buf, int rcv)
369 {
370 	for (int i = 0 ; i < rcv; i++) {
371 		if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) {
372 			fprintf(stderr, "verify data fail");
373 			exit(1);
374 		}
375 		(*check_pos)++;
376 		if (*check_pos >= FILE_SIZE)
377 			*check_pos = 0;
378 	}
379 }
380 
381 static void *consumer(void *input)
382 {
383 	int rcv, sent;
384 	int check_pos = 0;
385 	int tid = (long)input;
386 	int recv_buf_size = FILE_SIZE;
387 	char *buf = malloc(recv_buf_size);
388 	int delay_read = ctx.delay_consumer;
389 
390 	if (!buf) {
391 		fprintf(stderr, "fail to init read buffer");
392 		return NULL;
393 	}
394 
395 	while (true) {
396 		if (tid == 1) {
397 			/* consumer 1 is unused for tx test and stream verdict test */
398 			if (RXMODE_BPF() || TXMODE())
399 				return NULL;
400 			/* it's only for RX_NORMAL which service as reserve-proxy mode */
401 			rcv = read(ctx.p1, buf, recv_buf_size);
402 			if (rcv < 0) {
403 				fprintf(stderr, "fail to read p1");
404 				return NULL;
405 			}
406 
407 			sent = send(ctx.p2, buf, recv_buf_size, 0);
408 			if (sent < 0) {
409 				fprintf(stderr, "fail to send p2");
410 				return NULL;
411 			}
412 		} else {
413 			if (delay_read != 0) {
414 				if (delay_read < 0)
415 					return NULL;
416 				sleep(delay_read);
417 				delay_read = 0;
418 			}
419 			/* read real endpoint by consumer 0 */
420 			atomic_inc(&ctx.read_calls);
421 			rcv = read(ctx.c2, buf, recv_buf_size);
422 			if (rcv < 0 && errno != EAGAIN) {
423 				fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno);
424 				return NULL;
425 			}
426 			verify_data(&check_pos, buf, rcv);
427 			atomic_add(&ctx.user_read, rcv);
428 		}
429 	}
430 
431 	return NULL;
432 }
433 
434 static void *producer(void *input)
435 {
436 	int off = 0, fp, need_sent, sent;
437 	int file_size = ctx.file_size;
438 	struct timespec ts1, ts2;
439 	int target;
440 	FILE *file;
441 
442 	file = tmpfile();
443 	if (!file) {
444 		fprintf(stderr, "create file for sendfile");
445 		return NULL;
446 	}
447 
448 	/* we need simple verify */
449 	for (int i = 0; i < file_size; i++) {
450 		if (fwrite(&snd_data[off], sizeof(char), 1, file) != 1) {
451 			fprintf(stderr, "init tmpfile error");
452 			return NULL;
453 		}
454 		if (++off >= sizeof(snd_data))
455 			off = 0;
456 	}
457 	fflush(file);
458 	fseek(file, 0, SEEK_SET);
459 
460 	fp = fileno(file);
461 	need_sent = file_size;
462 	clock_gettime(CLOCK_MONOTONIC, &ts1);
463 
464 	if (RXMODE_BPF_VERDICT())
465 		target = ctx.c1;
466 	else if (TXMODE_BPF_EGRESS())
467 		target = ctx.p1;
468 	else
469 		target = ctx.p2;
470 	set_non_block(target, true);
471 	while (true) {
472 		if (ctx.prod_run_time) {
473 			clock_gettime(CLOCK_MONOTONIC, &ts2);
474 			if (ts2.tv_sec - ts1.tv_sec > ctx.prod_run_time)
475 				return NULL;
476 		}
477 
478 		errno = 0;
479 		atomic_inc(&ctx.send_calls);
480 		sent = sendfile(target, fp, NULL, need_sent);
481 		if (sent < 0) {
482 			if (errno != EAGAIN && errno != ENOMEM && errno != ENOBUFS) {
483 				fprintf(stderr, "sendfile return %d, errorno %d:%s\n",
484 					sent, errno, strerror(errno));
485 				return NULL;
486 			}
487 			continue;
488 		} else if (sent < need_sent) {
489 			need_sent -= sent;
490 			atomic_add(&ctx.prod_send, sent);
491 			continue;
492 		}
493 		atomic_add(&ctx.prod_send, need_sent);
494 		need_sent = file_size;
495 		lseek(fp, 0, SEEK_SET);
496 	}
497 
498 	return NULL;
499 }
500 
501 static void report_progress(int iter, struct bench_res *res, long delta_ns)
502 {
503 	double speed_mbs, prod_mbs, bpf_mbs, send_hz, read_hz;
504 
505 	prod_mbs = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
506 	speed_mbs = res->false_hits / 1000000.0 / (delta_ns / 1000000000.0);
507 	bpf_mbs = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
508 	send_hz = (res->important_hits & 0xFFFFFFFF) / (delta_ns / 1000000000.0);
509 	read_hz = (res->important_hits >> 32) / (delta_ns / 1000000000.0);
510 
511 	printf("Iter %3d (%7.3lfus): ",
512 	       iter, (delta_ns - 1000000000) / 1000.0);
513 	printf("Send Speed %8.3lf MB/s (%8.3lf calls/s), BPF Speed %8.3lf MB/s, "
514 	       "Rcv Speed %8.3lf MB/s (%8.3lf calls/s)\n",
515 	       prod_mbs, send_hz, bpf_mbs, speed_mbs, read_hz);
516 }
517 
518 static void report_final(struct bench_res res[], int res_cnt)
519 {
520 	double verdict_mbs_mean = 0.0;
521 	long verdict_total = 0;
522 	int i;
523 
524 	for (i = 0; i < res_cnt; i++) {
525 		verdict_mbs_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
526 		verdict_total += res[i].hits / 1000000.0;
527 	}
528 
529 	printf("Summary: total trans %8.3lu MB \u00B1 %5.3lf MB/s\n",
530 	       verdict_total, verdict_mbs_mean);
531 }
532 
533 static const struct argp_option opts[] = {
534 	{ "rx-normal", ARG_FW_RX_NORMAL, NULL, 0,
535 		"simple reserve-proxy mode, no bfp enabled"},
536 	{ "rx-pass", ARG_FW_RX_PASS, NULL, 0,
537 		"run bpf prog but no redir applied"},
538 	{ "rx-strp", ARG_CTL_RX_STRP, "Byte", 0,
539 		"enable strparser and set the encapsulation size"},
540 	{ "rx-verdict-egress", ARG_FW_RX_VERDICT_EGRESS, NULL, 0,
541 		"forward data with bpf(stream verdict)"},
542 	{ "rx-verdict-ingress", ARG_FW_RX_VERDICT_INGRESS, NULL, 0,
543 		"forward data with bpf(stream verdict)"},
544 	{ "tx-normal", ARG_FW_TX_NORMAL, NULL, 0,
545 		"simple c-s mode, no bfp enabled"},
546 	{ "tx-pass", ARG_FW_TX_PASS, NULL, 0,
547 		"run bpf prog but no redir applied"},
548 	{ "tx-verdict-ingress", ARG_FW_TX_VERDICT_INGRESS, NULL, 0,
549 		"forward msg to ingress queue of another socket"},
550 	{ "tx-verdict-egress", ARG_FW_TX_VERDICT_EGRESS, NULL, 0,
551 		"forward msg to egress queue of another socket"},
552 	{ "delay-consumer", ARG_CONSUMER_DELAY_TIME, "SEC", 0,
553 		"delay consumer start"},
554 	{ "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0,
555 		"producer duration"},
556 	{},
557 };
558 
559 static error_t parse_arg(int key, char *arg, struct argp_state *state)
560 {
561 	switch (key) {
562 	case ARG_FW_RX_NORMAL...ARG_FW_TX_VERDICT_EGRESS:
563 		ctx.mode = key;
564 		break;
565 	case ARG_CONSUMER_DELAY_TIME:
566 		ctx.delay_consumer = strtol(arg, NULL, 10);
567 		break;
568 	case ARG_PRODUCER_DURATION:
569 		ctx.prod_run_time = strtol(arg, NULL, 10);
570 		break;
571 	case ARG_CTL_RX_STRP:
572 		ctx.strp_size = strtol(arg, NULL, 10);
573 		break;
574 	default:
575 		return ARGP_ERR_UNKNOWN;
576 	}
577 
578 	return 0;
579 }
580 
581 /* exported into benchmark runner */
582 const struct argp bench_sockmap_argp = {
583 	.options	= opts,
584 	.parser		= parse_arg,
585 };
586 
587 /* Benchmark performance of creating bpf local storage  */
588 const struct bench bench_sockmap = {
589 	.name			= "sockmap",
590 	.argp			= &bench_sockmap_argp,
591 	.validate		= validate,
592 	.setup			= setup,
593 	.producer_thread	= producer,
594 	.consumer_thread	= consumer,
595 	.measure		= measure,
596 	.report_progress	= report_progress,
597 	.report_final		= report_final,
598 };
599