xref: /linux/tools/testing/selftests/bpf/test_sockmap.c (revision 6af8971d910ec80d7ed33e41a68b86c08142df08)
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <sys/socket.h>
6 #include <sys/ioctl.h>
7 #include <sys/select.h>
8 #include <netinet/in.h>
9 #include <arpa/inet.h>
10 #include <unistd.h>
11 #include <string.h>
12 #include <errno.h>
13 #include <stdbool.h>
14 #include <signal.h>
15 #include <fcntl.h>
16 #include <sys/wait.h>
17 #include <time.h>
18 #include <sched.h>
19 
20 #include <sys/time.h>
21 #include <sys/types.h>
22 #include <sys/sendfile.h>
23 
24 #include <linux/netlink.h>
25 #include <linux/socket.h>
26 #include <linux/sock_diag.h>
27 #include <linux/bpf.h>
28 #include <linux/if_link.h>
29 #include <assert.h>
30 #include <libgen.h>
31 
32 #include <getopt.h>
33 
34 #include <bpf/bpf.h>
35 #include <bpf/libbpf.h>
36 
37 #include "bpf_util.h"
38 #include "cgroup_helpers.h"
39 
40 int running;
41 static void running_handler(int a);
42 
43 /* randomly selected ports for testing on lo */
44 #define S1_PORT 10000
45 #define S2_PORT 10001
46 
47 #define BPF_SOCKMAP_FILENAME  "test_sockmap_kern.bpf.o"
48 #define BPF_SOCKHASH_FILENAME "test_sockhash_kern.bpf.o"
49 #define CG_PATH "/sockmap"
50 
51 #define EDATAINTEGRITY 2001
52 
53 /* global sockets */
54 int s1, s2, c1, c2, p1, p2;
55 int test_cnt;
56 int passed;
57 int failed;
58 int map_fd[8];
59 struct bpf_map *maps[8];
60 struct bpf_program *progs[8];
61 struct bpf_link *links[8];
62 
63 int txmsg_pass;
64 int txmsg_redir;
65 int txmsg_drop;
66 int txmsg_apply;
67 int txmsg_cork;
68 int txmsg_start;
69 int txmsg_end;
70 int txmsg_start_push;
71 int txmsg_end_push;
72 int txmsg_start_pop;
73 int txmsg_pop;
74 int txmsg_ingress;
75 int txmsg_redir_skb;
76 int peek_flag;
77 int skb_use_parser;
78 int txmsg_omit_skb_parser;
79 int verify_push_start;
80 int verify_push_len;
81 int verify_pop_start;
82 int verify_pop_len;
83 
84 static const struct option long_options[] = {
85 	{"help",	no_argument,		NULL, 'h' },
86 	{"cgroup",	required_argument,	NULL, 'c' },
87 	{"rate",	required_argument,	NULL, 'r' },
88 	{"verbose",	optional_argument,	NULL, 'v' },
89 	{"iov_count",	required_argument,	NULL, 'i' },
90 	{"length",	required_argument,	NULL, 'l' },
91 	{"test",	required_argument,	NULL, 't' },
92 	{"data_test",   no_argument,		NULL, 'd' },
93 	{"txmsg",		no_argument,	&txmsg_pass,  1  },
94 	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
95 	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
96 	{"txmsg_apply",	required_argument,	NULL, 'a'},
97 	{"txmsg_cork",	required_argument,	NULL, 'k'},
98 	{"txmsg_start", required_argument,	NULL, 's'},
99 	{"txmsg_end",	required_argument,	NULL, 'e'},
100 	{"txmsg_start_push", required_argument,	NULL, 'p'},
101 	{"txmsg_end_push",   required_argument,	NULL, 'q'},
102 	{"txmsg_start_pop",  required_argument,	NULL, 'w'},
103 	{"txmsg_pop",	     required_argument,	NULL, 'x'},
104 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
105 	{"txmsg_redir_skb", no_argument,	&txmsg_redir_skb, 1 },
106 	{"peek", no_argument,			&peek_flag, 1 },
107 	{"txmsg_omit_skb_parser", no_argument,      &txmsg_omit_skb_parser, 1},
108 	{"whitelist", required_argument,	NULL, 'n' },
109 	{"blacklist", required_argument,	NULL, 'b' },
110 	{0, 0, NULL, 0 }
111 };
112 
113 struct test_env {
114 	const char *type;
115 	const char *subtest;
116 	const char *prepend;
117 
118 	int test_num;
119 	int subtest_num;
120 
121 	int succ_cnt;
122 	int fail_cnt;
123 	int fail_last;
124 };
125 
126 struct test_env env;
127 
128 struct sockmap_options {
129 	int verbose;
130 	bool base;
131 	bool sendpage;
132 	bool data_test;
133 	bool drop_expected;
134 	bool check_recved_len;
135 	bool tx_wait_mem;
136 	int iov_count;
137 	int iov_length;
138 	int rate;
139 	char *map;
140 	char *whitelist;
141 	char *blacklist;
142 	char *prepend;
143 };
144 
145 struct _test {
146 	char *title;
147 	void (*tester)(int cg_fd, struct sockmap_options *opt);
148 };
149 
150 static void test_start(void)
151 {
152 	env.subtest_num++;
153 }
154 
155 static void test_fail(void)
156 {
157 	env.fail_cnt++;
158 }
159 
160 static void test_pass(void)
161 {
162 	env.succ_cnt++;
163 }
164 
165 static void test_reset(void)
166 {
167 	txmsg_start = txmsg_end = 0;
168 	txmsg_start_pop = txmsg_pop = 0;
169 	txmsg_start_push = txmsg_end_push = 0;
170 	txmsg_pass = txmsg_drop = txmsg_redir = 0;
171 	txmsg_apply = txmsg_cork = 0;
172 	txmsg_ingress = txmsg_redir_skb = 0;
173 	txmsg_omit_skb_parser = 0;
174 	skb_use_parser = 0;
175 }
176 
177 static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
178 {
179 	env.type = o->map;
180 	env.subtest = t->title;
181 	env.prepend = o->prepend;
182 	env.test_num++;
183 	env.subtest_num = 0;
184 	env.fail_last = env.fail_cnt;
185 	test_reset();
186 	return 0;
187 }
188 
189 static void test_end_subtest(void)
190 {
191 	int error = env.fail_cnt - env.fail_last;
192 	int type = strcmp(env.type, BPF_SOCKMAP_FILENAME);
193 
194 	if (!error)
195 		test_pass();
196 
197 	fprintf(stdout, "#%2d/%2d %8s:%s:%s:%s\n",
198 		env.test_num, env.subtest_num,
199 		!type ? "sockmap" : "sockhash",
200 		env.prepend ? : "",
201 		env.subtest, error ? "FAIL" : "OK");
202 }
203 
204 static void test_print_results(void)
205 {
206 	fprintf(stdout, "Pass: %d Fail: %d\n",
207 		env.succ_cnt, env.fail_cnt);
208 }
209 
210 static void usage(char *argv[])
211 {
212 	int i;
213 
214 	printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
215 	printf(" options:\n");
216 	for (i = 0; long_options[i].name != 0; i++) {
217 		printf(" --%-12s", long_options[i].name);
218 		if (long_options[i].flag != NULL)
219 			printf(" flag (internal value:%d)\n",
220 				*long_options[i].flag);
221 		else
222 			printf(" -%c\n", long_options[i].val);
223 	}
224 	printf("\n");
225 }
226 
227 static int sockmap_init_sockets(int verbose)
228 {
229 	int i, err, one = 1;
230 	struct sockaddr_in addr;
231 	int *fds[4] = {&s1, &s2, &c1, &c2};
232 
233 	s1 = s2 = p1 = p2 = c1 = c2 = 0;
234 
235 	/* Init sockets */
236 	for (i = 0; i < 4; i++) {
237 		*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
238 		if (*fds[i] < 0) {
239 			perror("socket s1 failed()");
240 			return errno;
241 		}
242 	}
243 
244 	/* Allow reuse */
245 	for (i = 0; i < 2; i++) {
246 		err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
247 				 (char *)&one, sizeof(one));
248 		if (err) {
249 			perror("setsockopt failed()");
250 			return errno;
251 		}
252 	}
253 
254 	/* Non-blocking sockets */
255 	for (i = 0; i < 2; i++) {
256 		err = ioctl(*fds[i], FIONBIO, (char *)&one);
257 		if (err < 0) {
258 			perror("ioctl s1 failed()");
259 			return errno;
260 		}
261 	}
262 
263 	/* Bind server sockets */
264 	memset(&addr, 0, sizeof(struct sockaddr_in));
265 	addr.sin_family = AF_INET;
266 	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
267 
268 	addr.sin_port = htons(S1_PORT);
269 	err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
270 	if (err < 0) {
271 		perror("bind s1 failed()");
272 		return errno;
273 	}
274 
275 	addr.sin_port = htons(S2_PORT);
276 	err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
277 	if (err < 0) {
278 		perror("bind s2 failed()");
279 		return errno;
280 	}
281 
282 	/* Listen server sockets */
283 	addr.sin_port = htons(S1_PORT);
284 	err = listen(s1, 32);
285 	if (err < 0) {
286 		perror("listen s1 failed()");
287 		return errno;
288 	}
289 
290 	addr.sin_port = htons(S2_PORT);
291 	err = listen(s2, 32);
292 	if (err < 0) {
293 		perror("listen s1 failed()");
294 		return errno;
295 	}
296 
297 	/* Initiate Connect */
298 	addr.sin_port = htons(S1_PORT);
299 	err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
300 	if (err < 0 && errno != EINPROGRESS) {
301 		perror("connect c1 failed()");
302 		return errno;
303 	}
304 
305 	addr.sin_port = htons(S2_PORT);
306 	err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
307 	if (err < 0 && errno != EINPROGRESS) {
308 		perror("connect c2 failed()");
309 		return errno;
310 	} else if (err < 0) {
311 		err = 0;
312 	}
313 
314 	/* Accept Connecrtions */
315 	p1 = accept(s1, NULL, NULL);
316 	if (p1 < 0) {
317 		perror("accept s1 failed()");
318 		return errno;
319 	}
320 
321 	p2 = accept(s2, NULL, NULL);
322 	if (p2 < 0) {
323 		perror("accept s1 failed()");
324 		return errno;
325 	}
326 
327 	if (verbose > 1) {
328 		printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
329 		printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
330 			c1, s1, c2, s2);
331 	}
332 	return 0;
333 }
334 
335 struct msg_stats {
336 	size_t bytes_sent;
337 	size_t bytes_recvd;
338 	struct timespec start;
339 	struct timespec end;
340 };
341 
342 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
343 			     struct msg_stats *s,
344 			     struct sockmap_options *opt)
345 {
346 	bool drop = opt->drop_expected;
347 	unsigned char k = 0;
348 	int i, j, fp;
349 	FILE *file;
350 
351 	file = tmpfile();
352 	if (!file) {
353 		perror("create file for sendpage");
354 		return 1;
355 	}
356 	for (i = 0; i < cnt; i++, k = 0) {
357 		for (j = 0; j < iov_length; j++, k++)
358 			fwrite(&k, sizeof(char), 1, file);
359 	}
360 	fflush(file);
361 	fseek(file, 0, SEEK_SET);
362 
363 	fp = fileno(file);
364 
365 	clock_gettime(CLOCK_MONOTONIC, &s->start);
366 	for (i = 0; i < cnt; i++) {
367 		int sent;
368 
369 		errno = 0;
370 		sent = sendfile(fd, fp, NULL, iov_length);
371 
372 		if (!drop && sent < 0) {
373 			perror("sendpage loop error");
374 			fclose(file);
375 			return sent;
376 		} else if (drop && sent >= 0) {
377 			printf("sendpage loop error expected: %i errno %i\n",
378 			       sent, errno);
379 			fclose(file);
380 			return -EIO;
381 		}
382 
383 		if (sent > 0)
384 			s->bytes_sent += sent;
385 	}
386 	clock_gettime(CLOCK_MONOTONIC, &s->end);
387 	fclose(file);
388 	return 0;
389 }
390 
391 static void msg_free_iov(struct msghdr *msg)
392 {
393 	int i;
394 
395 	for (i = 0; i < msg->msg_iovlen; i++)
396 		free(msg->msg_iov[i].iov_base);
397 	free(msg->msg_iov);
398 	msg->msg_iov = NULL;
399 	msg->msg_iovlen = 0;
400 }
401 
402 static int msg_alloc_iov(struct msghdr *msg,
403 			 int iov_count, int iov_length,
404 			 bool data, bool xmit)
405 {
406 	unsigned char k = 0;
407 	struct iovec *iov;
408 	int i;
409 
410 	iov = calloc(iov_count, sizeof(struct iovec));
411 	if (!iov)
412 		return errno;
413 
414 	for (i = 0; i < iov_count; i++) {
415 		unsigned char *d = calloc(iov_length, sizeof(char));
416 
417 		if (!d) {
418 			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
419 			goto unwind_iov;
420 		}
421 		iov[i].iov_base = d;
422 		iov[i].iov_len = iov_length;
423 
424 		if (data && xmit) {
425 			int j;
426 
427 			for (j = 0; j < iov_length; j++)
428 				d[j] = k++;
429 		}
430 	}
431 
432 	msg->msg_iov = iov;
433 	msg->msg_iovlen = iov_count;
434 
435 	return 0;
436 unwind_iov:
437 	for (i--; i >= 0 ; i--)
438 		free(msg->msg_iov[i].iov_base);
439 	return -ENOMEM;
440 }
441 
442 /* In push or pop test, we need to do some calculations for msg_verify_data */
443 static void msg_verify_date_prep(void)
444 {
445 	int push_range_end = txmsg_start_push + txmsg_end_push - 1;
446 	int pop_range_end = txmsg_start_pop + txmsg_pop - 1;
447 
448 	if (txmsg_end_push && txmsg_pop &&
449 	    txmsg_start_push <= pop_range_end && txmsg_start_pop <= push_range_end) {
450 		/* The push range and the pop range overlap */
451 		int overlap_len;
452 
453 		verify_push_start = txmsg_start_push;
454 		verify_pop_start = txmsg_start_pop;
455 		if (txmsg_start_push < txmsg_start_pop)
456 			overlap_len = min(push_range_end - txmsg_start_pop + 1, txmsg_pop);
457 		else
458 			overlap_len = min(pop_range_end - txmsg_start_push + 1, txmsg_end_push);
459 		verify_push_len = max(txmsg_end_push - overlap_len, 0);
460 		verify_pop_len = max(txmsg_pop - overlap_len, 0);
461 	} else {
462 		/* Otherwise */
463 		verify_push_start = txmsg_start_push;
464 		verify_pop_start = txmsg_start_pop;
465 		verify_push_len = txmsg_end_push;
466 		verify_pop_len = txmsg_pop;
467 	}
468 }
469 
470 static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz,
471 			   unsigned char *k_p, int *bytes_cnt_p,
472 			   int *check_cnt_p, int *push_p)
473 {
474 	int bytes_cnt = *bytes_cnt_p, check_cnt = *check_cnt_p, push = *push_p;
475 	unsigned char k = *k_p;
476 	int i, j;
477 
478 	for (i = 0, j = 0; i < msg->msg_iovlen && size; i++, j = 0) {
479 		unsigned char *d = msg->msg_iov[i].iov_base;
480 
481 		for (; j < msg->msg_iov[i].iov_len && size; j++) {
482 			if (push > 0 &&
483 			    check_cnt == verify_push_start + verify_push_len - push) {
484 				int skipped;
485 revisit_push:
486 				skipped = push;
487 				if (j + push >= msg->msg_iov[i].iov_len)
488 					skipped = msg->msg_iov[i].iov_len - j;
489 				push -= skipped;
490 				size -= skipped;
491 				j += skipped - 1;
492 				check_cnt += skipped;
493 				continue;
494 			}
495 
496 			if (verify_pop_len > 0 && check_cnt == verify_pop_start) {
497 				bytes_cnt += verify_pop_len;
498 				check_cnt += verify_pop_len;
499 				k += verify_pop_len;
500 
501 				if (bytes_cnt == chunk_sz) {
502 					k = 0;
503 					bytes_cnt = 0;
504 					check_cnt = 0;
505 					push = verify_push_len;
506 				}
507 
508 				if (push > 0 &&
509 				    check_cnt == verify_push_start + verify_push_len - push)
510 					goto revisit_push;
511 			}
512 
513 			if (d[j] != k++) {
514 				fprintf(stderr,
515 					"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
516 					i, j, d[j], k - 1, d[j+1], k);
517 				return -EDATAINTEGRITY;
518 			}
519 			bytes_cnt++;
520 			check_cnt++;
521 			if (bytes_cnt == chunk_sz) {
522 				k = 0;
523 				bytes_cnt = 0;
524 				check_cnt = 0;
525 				push = verify_push_len;
526 			}
527 			size--;
528 		}
529 	}
530 	*k_p = k;
531 	*bytes_cnt_p = bytes_cnt;
532 	*check_cnt_p = check_cnt;
533 	*push_p = push;
534 	return 0;
535 }
536 
537 static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
538 		    struct msg_stats *s, bool tx,
539 		    struct sockmap_options *opt)
540 {
541 	struct msghdr msg = {0}, msg_peek = {0};
542 	int err, i, flags = MSG_NOSIGNAL;
543 	bool drop = opt->drop_expected;
544 	bool data = opt->data_test;
545 	int iov_alloc_length = iov_length;
546 
547 	if (!tx && opt->check_recved_len)
548 		iov_alloc_length *= 2;
549 
550 	err = msg_alloc_iov(&msg, iov_count, iov_alloc_length, data, tx);
551 	if (err)
552 		goto out_errno;
553 	if (peek_flag) {
554 		err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx);
555 		if (err)
556 			goto out_errno;
557 	}
558 
559 	if (tx) {
560 		clock_gettime(CLOCK_MONOTONIC, &s->start);
561 		for (i = 0; i < cnt; i++) {
562 			int sent;
563 
564 			errno = 0;
565 			sent = sendmsg(fd, &msg, flags);
566 
567 			if (!drop && sent < 0) {
568 				if (opt->tx_wait_mem && errno == EACCES) {
569 					errno = 0;
570 					goto out_errno;
571 				}
572 				perror("sendmsg loop error");
573 				goto out_errno;
574 			} else if (drop && sent >= 0) {
575 				fprintf(stderr,
576 					"sendmsg loop error expected: %i errno %i\n",
577 					sent, errno);
578 				errno = -EIO;
579 				goto out_errno;
580 			}
581 			if (sent > 0)
582 				s->bytes_sent += sent;
583 		}
584 		clock_gettime(CLOCK_MONOTONIC, &s->end);
585 	} else {
586 		float total_bytes, txmsg_pop_total, txmsg_push_total;
587 		int slct, recvp = 0, recv, max_fd = fd;
588 		int fd_flags = O_NONBLOCK;
589 		struct timeval timeout;
590 		unsigned char k = 0;
591 		int bytes_cnt = 0;
592 		int check_cnt = 0;
593 		int push = 0;
594 		fd_set w;
595 
596 		fcntl(fd, fd_flags);
597 		/* Account for pop bytes noting each iteration of apply will
598 		 * call msg_pop_data helper so we need to account for this
599 		 * by calculating the number of apply iterations. Note user
600 		 * of the tool can create cases where no data is sent by
601 		 * manipulating pop/push/pull/etc. For example txmsg_apply 1
602 		 * with txmsg_pop 1 will try to apply 1B at a time but each
603 		 * iteration will then pop 1B so no data will ever be sent.
604 		 * This is really only useful for testing edge cases in code
605 		 * paths.
606 		 */
607 		total_bytes = (float)iov_length * (float)cnt;
608 		if (!opt->sendpage)
609 			total_bytes *= (float)iov_count;
610 		if (txmsg_apply) {
611 			txmsg_push_total = txmsg_end_push * (total_bytes / txmsg_apply);
612 			txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
613 		} else {
614 			txmsg_push_total = txmsg_end_push * cnt;
615 			txmsg_pop_total = txmsg_pop * cnt;
616 		}
617 		total_bytes += txmsg_push_total;
618 		total_bytes -= txmsg_pop_total;
619 		if (data) {
620 			msg_verify_date_prep();
621 			push = verify_push_len;
622 		}
623 		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
624 		if (err < 0)
625 			perror("recv start time");
626 		while (s->bytes_recvd < total_bytes) {
627 			if (txmsg_cork) {
628 				timeout.tv_sec = 0;
629 				timeout.tv_usec = 300000;
630 			} else {
631 				timeout.tv_sec = 3;
632 				timeout.tv_usec = 0;
633 			}
634 
635 			/* FD sets */
636 			FD_ZERO(&w);
637 			FD_SET(fd, &w);
638 
639 			slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
640 			if (slct == -1) {
641 				perror("select()");
642 				clock_gettime(CLOCK_MONOTONIC, &s->end);
643 				goto out_errno;
644 			} else if (!slct) {
645 				if (opt->verbose)
646 					fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total);
647 				errno = -EIO;
648 				clock_gettime(CLOCK_MONOTONIC, &s->end);
649 				goto out_errno;
650 			}
651 
652 			if (opt->tx_wait_mem) {
653 				FD_ZERO(&w);
654 				FD_SET(fd, &w);
655 				slct = select(max_fd + 1, NULL, NULL, &w, &timeout);
656 				errno = 0;
657 				close(fd);
658 				goto out_errno;
659 			}
660 
661 			errno = 0;
662 			if (peek_flag) {
663 				flags |= MSG_PEEK;
664 				recvp = recvmsg(fd, &msg_peek, flags);
665 				if (recvp < 0) {
666 					if (errno != EWOULDBLOCK) {
667 						clock_gettime(CLOCK_MONOTONIC, &s->end);
668 						goto out_errno;
669 					}
670 				}
671 				flags = 0;
672 			}
673 
674 			recv = recvmsg(fd, &msg, flags);
675 			if (recv < 0) {
676 				if (errno != EWOULDBLOCK) {
677 					clock_gettime(CLOCK_MONOTONIC, &s->end);
678 					perror("recv failed()");
679 					goto out_errno;
680 				}
681 			}
682 
683 			if (recv > 0)
684 				s->bytes_recvd += recv;
685 
686 			if (opt->check_recved_len && s->bytes_recvd > total_bytes) {
687 				errno = EMSGSIZE;
688 				fprintf(stderr, "recv failed(), bytes_recvd:%zd, total_bytes:%f\n",
689 						s->bytes_recvd, total_bytes);
690 				goto out_errno;
691 			}
692 
693 			if (data) {
694 				int chunk_sz = opt->sendpage ?
695 						iov_length :
696 						iov_length * iov_count;
697 
698 				errno = msg_verify_data(&msg, recv, chunk_sz, &k, &bytes_cnt,
699 							&check_cnt, &push);
700 				if (errno) {
701 					perror("data verify msg failed");
702 					goto out_errno;
703 				}
704 				if (recvp) {
705 					errno = msg_verify_data(&msg_peek,
706 								recvp,
707 								chunk_sz,
708 								&k,
709 								&bytes_cnt,
710 								&check_cnt,
711 								&push);
712 					if (errno) {
713 						perror("data verify msg_peek failed");
714 						goto out_errno;
715 					}
716 				}
717 			}
718 		}
719 		clock_gettime(CLOCK_MONOTONIC, &s->end);
720 	}
721 
722 	msg_free_iov(&msg);
723 	msg_free_iov(&msg_peek);
724 	return err;
725 out_errno:
726 	msg_free_iov(&msg);
727 	msg_free_iov(&msg_peek);
728 	return errno;
729 }
730 
731 static float giga = 1000000000;
732 
733 static inline float sentBps(struct msg_stats s)
734 {
735 	return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
736 }
737 
738 static inline float recvdBps(struct msg_stats s)
739 {
740 	return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
741 }
742 
743 static int sendmsg_test(struct sockmap_options *opt)
744 {
745 	float sent_Bps = 0, recvd_Bps = 0;
746 	int rx_fd, txpid, rxpid, err = 0;
747 	struct msg_stats s = {0};
748 	int iov_count = opt->iov_count;
749 	int iov_buf = opt->iov_length;
750 	int rx_status, tx_status;
751 	int cnt = opt->rate;
752 
753 	errno = 0;
754 
755 	if (opt->base)
756 		rx_fd = p1;
757 	else
758 		rx_fd = p2;
759 
760 	if (opt->tx_wait_mem) {
761 		struct timeval timeout;
762 		int rxtx_buf_len = 1024;
763 
764 		timeout.tv_sec = 3;
765 		timeout.tv_usec = 0;
766 
767 		err = setsockopt(c2, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(struct timeval));
768 		err |= setsockopt(c2, SOL_SOCKET, SO_SNDBUFFORCE, &rxtx_buf_len, sizeof(int));
769 		err |= setsockopt(p2, SOL_SOCKET, SO_RCVBUFFORCE, &rxtx_buf_len, sizeof(int));
770 		if (err) {
771 			perror("setsockopt failed()");
772 			return errno;
773 		}
774 	}
775 
776 	rxpid = fork();
777 	if (rxpid == 0) {
778 		if (opt->drop_expected)
779 			_exit(0);
780 
781 		if (!iov_buf) /* zero bytes sent case */
782 			_exit(0);
783 
784 		if (opt->sendpage)
785 			iov_count = 1;
786 		err = msg_loop(rx_fd, iov_count, iov_buf,
787 			       cnt, &s, false, opt);
788 		if (opt->verbose > 1)
789 			fprintf(stderr,
790 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
791 				iov_count, iov_buf, cnt, err);
792 		if (s.end.tv_sec - s.start.tv_sec) {
793 			sent_Bps = sentBps(s);
794 			recvd_Bps = recvdBps(s);
795 		}
796 		if (opt->verbose > 1)
797 			fprintf(stdout,
798 				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
799 				s.bytes_sent, sent_Bps, sent_Bps/giga,
800 				s.bytes_recvd, recvd_Bps, recvd_Bps/giga,
801 				peek_flag ? "(peek_msg)" : "");
802 		if (err && err != -EDATAINTEGRITY && txmsg_cork)
803 			err = 0;
804 		exit(err ? 1 : 0);
805 	} else if (rxpid == -1) {
806 		perror("msg_loop_rx");
807 		return errno;
808 	}
809 
810 	if (opt->tx_wait_mem)
811 		close(c2);
812 
813 	txpid = fork();
814 	if (txpid == 0) {
815 		if (opt->sendpage)
816 			err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
817 		else
818 			err = msg_loop(c1, iov_count, iov_buf,
819 				       cnt, &s, true, opt);
820 
821 		if (err)
822 			fprintf(stderr,
823 				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
824 				iov_count, iov_buf, cnt, err);
825 		if (s.end.tv_sec - s.start.tv_sec) {
826 			sent_Bps = sentBps(s);
827 			recvd_Bps = recvdBps(s);
828 		}
829 		if (opt->verbose > 1)
830 			fprintf(stdout,
831 				"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
832 				s.bytes_sent, sent_Bps, sent_Bps/giga,
833 				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
834 		exit(err ? 1 : 0);
835 	} else if (txpid == -1) {
836 		perror("msg_loop_tx");
837 		return errno;
838 	}
839 
840 	assert(waitpid(rxpid, &rx_status, 0) == rxpid);
841 	assert(waitpid(txpid, &tx_status, 0) == txpid);
842 	if (WIFEXITED(rx_status)) {
843 		err = WEXITSTATUS(rx_status);
844 		if (err) {
845 			fprintf(stderr, "rx thread exited with err %d.\n", err);
846 			goto out;
847 		}
848 	}
849 	if (WIFEXITED(tx_status)) {
850 		err = WEXITSTATUS(tx_status);
851 		if (err)
852 			fprintf(stderr, "tx thread exited with err %d.\n", err);
853 	}
854 out:
855 	return err;
856 }
857 
858 static int forever_ping_pong(int rate, struct sockmap_options *opt)
859 {
860 	struct timeval timeout;
861 	char buf[1024] = {0};
862 	int sc;
863 
864 	timeout.tv_sec = 10;
865 	timeout.tv_usec = 0;
866 
867 	/* Ping/Pong data from client to server */
868 	sc = send(c1, buf, sizeof(buf), 0);
869 	if (sc < 0) {
870 		perror("send failed()");
871 		return sc;
872 	}
873 
874 	do {
875 		int s, rc, i, max_fd = p2;
876 		fd_set w;
877 
878 		/* FD sets */
879 		FD_ZERO(&w);
880 		FD_SET(c1, &w);
881 		FD_SET(c2, &w);
882 		FD_SET(p1, &w);
883 		FD_SET(p2, &w);
884 
885 		s = select(max_fd + 1, &w, NULL, NULL, &timeout);
886 		if (s == -1) {
887 			perror("select()");
888 			break;
889 		} else if (!s) {
890 			fprintf(stderr, "unexpected timeout\n");
891 			break;
892 		}
893 
894 		for (i = 0; i <= max_fd && s > 0; ++i) {
895 			if (!FD_ISSET(i, &w))
896 				continue;
897 
898 			s--;
899 
900 			rc = recv(i, buf, sizeof(buf), 0);
901 			if (rc < 0) {
902 				if (errno != EWOULDBLOCK) {
903 					perror("recv failed()");
904 					return rc;
905 				}
906 			}
907 
908 			if (rc == 0) {
909 				close(i);
910 				break;
911 			}
912 
913 			sc = send(i, buf, rc, 0);
914 			if (sc < 0) {
915 				perror("send failed()");
916 				return sc;
917 			}
918 		}
919 
920 		if (rate)
921 			sleep(rate);
922 
923 		if (opt->verbose) {
924 			printf(".");
925 			fflush(stdout);
926 
927 		}
928 	} while (running);
929 
930 	return 0;
931 }
932 
933 enum {
934 	SELFTESTS,
935 	PING_PONG,
936 	SENDMSG,
937 	BASE,
938 	BASE_SENDPAGE,
939 	SENDPAGE,
940 };
941 
942 static int run_options(struct sockmap_options *options, int cg_fd,  int test)
943 {
944 	int i, key, next_key, err, zero = 0;
945 	struct bpf_program *tx_prog;
946 
947 	/* If base test skip BPF setup */
948 	if (test == BASE || test == BASE_SENDPAGE)
949 		goto run;
950 
951 	/* Attach programs to sockmap */
952 	if (!txmsg_omit_skb_parser) {
953 		links[0] = bpf_program__attach_sockmap(progs[0], map_fd[0]);
954 		if (!links[0]) {
955 			fprintf(stderr,
956 				"ERROR: bpf_program__attach_sockmap (sockmap %i->%i): (%s)\n",
957 				bpf_program__fd(progs[0]), map_fd[0], strerror(errno));
958 			return -1;
959 		}
960 	}
961 
962 	links[1] = bpf_program__attach_sockmap(progs[1], map_fd[0]);
963 	if (!links[1]) {
964 		fprintf(stderr, "ERROR: bpf_program__attach_sockmap (sockmap): (%s)\n",
965 			strerror(errno));
966 		return -1;
967 	}
968 
969 	/* Attach to cgroups */
970 	err = bpf_prog_attach(bpf_program__fd(progs[2]), cg_fd, BPF_CGROUP_SOCK_OPS, 0);
971 	if (err) {
972 		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
973 			err, strerror(errno));
974 		return err;
975 	}
976 
977 run:
978 	err = sockmap_init_sockets(options->verbose);
979 	if (err) {
980 		fprintf(stderr, "ERROR: test socket failed: %d\n", err);
981 		goto out;
982 	}
983 
984 	/* Attach txmsg program to sockmap */
985 	if (txmsg_pass)
986 		tx_prog = progs[3];
987 	else if (txmsg_redir)
988 		tx_prog = progs[4];
989 	else if (txmsg_apply)
990 		tx_prog = progs[5];
991 	else if (txmsg_cork)
992 		tx_prog = progs[6];
993 	else if (txmsg_drop)
994 		tx_prog = progs[7];
995 	else
996 		tx_prog = NULL;
997 
998 	if (tx_prog) {
999 		int redir_fd;
1000 
1001 		links[4] = bpf_program__attach_sockmap(tx_prog, map_fd[1]);
1002 		if (!links[4]) {
1003 			fprintf(stderr,
1004 				"ERROR: bpf_program__attach_sockmap (txmsg): (%s)\n",
1005 				strerror(errno));
1006 			err = -1;
1007 			goto out;
1008 		}
1009 
1010 		i = 0;
1011 		err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
1012 		if (err) {
1013 			fprintf(stderr,
1014 				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
1015 				err, strerror(errno));
1016 			goto out;
1017 		}
1018 
1019 		if (txmsg_redir)
1020 			redir_fd = c2;
1021 		else
1022 			redir_fd = c1;
1023 
1024 		err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
1025 		if (err) {
1026 			fprintf(stderr,
1027 				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
1028 				err, strerror(errno));
1029 			goto out;
1030 		}
1031 
1032 		if (txmsg_apply) {
1033 			err = bpf_map_update_elem(map_fd[3],
1034 						  &i, &txmsg_apply, BPF_ANY);
1035 			if (err) {
1036 				fprintf(stderr,
1037 					"ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
1038 					err, strerror(errno));
1039 				goto out;
1040 			}
1041 		}
1042 
1043 		if (txmsg_cork) {
1044 			err = bpf_map_update_elem(map_fd[4],
1045 						  &i, &txmsg_cork, BPF_ANY);
1046 			if (err) {
1047 				fprintf(stderr,
1048 					"ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
1049 					err, strerror(errno));
1050 				goto out;
1051 			}
1052 		}
1053 
1054 		if (txmsg_start) {
1055 			err = bpf_map_update_elem(map_fd[5],
1056 						  &i, &txmsg_start, BPF_ANY);
1057 			if (err) {
1058 				fprintf(stderr,
1059 					"ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
1060 					err, strerror(errno));
1061 				goto out;
1062 			}
1063 		}
1064 
1065 		if (txmsg_end) {
1066 			i = 1;
1067 			err = bpf_map_update_elem(map_fd[5],
1068 						  &i, &txmsg_end, BPF_ANY);
1069 			if (err) {
1070 				fprintf(stderr,
1071 					"ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
1072 					err, strerror(errno));
1073 				goto out;
1074 			}
1075 		}
1076 
1077 		if (txmsg_start_push) {
1078 			i = 2;
1079 			err = bpf_map_update_elem(map_fd[5],
1080 						  &i, &txmsg_start_push, BPF_ANY);
1081 			if (err) {
1082 				fprintf(stderr,
1083 					"ERROR: bpf_map_update_elem (txmsg_start_push):  %d (%s)\n",
1084 					err, strerror(errno));
1085 				goto out;
1086 			}
1087 		}
1088 
1089 		if (txmsg_end_push) {
1090 			i = 3;
1091 			err = bpf_map_update_elem(map_fd[5],
1092 						  &i, &txmsg_end_push, BPF_ANY);
1093 			if (err) {
1094 				fprintf(stderr,
1095 					"ERROR: bpf_map_update_elem %i@%i (txmsg_end_push):  %d (%s)\n",
1096 					txmsg_end_push, i, err, strerror(errno));
1097 				goto out;
1098 			}
1099 		}
1100 
1101 		if (txmsg_start_pop) {
1102 			i = 4;
1103 			err = bpf_map_update_elem(map_fd[5],
1104 						  &i, &txmsg_start_pop, BPF_ANY);
1105 			if (err) {
1106 				fprintf(stderr,
1107 					"ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop):  %d (%s)\n",
1108 					txmsg_start_pop, i, err, strerror(errno));
1109 				goto out;
1110 			}
1111 		} else {
1112 			i = 4;
1113 			bpf_map_update_elem(map_fd[5],
1114 						  &i, &txmsg_start_pop, BPF_ANY);
1115 		}
1116 
1117 		if (txmsg_pop) {
1118 			i = 5;
1119 			err = bpf_map_update_elem(map_fd[5],
1120 						  &i, &txmsg_pop, BPF_ANY);
1121 			if (err) {
1122 				fprintf(stderr,
1123 					"ERROR: bpf_map_update_elem %i@%i (txmsg_pop):  %d (%s)\n",
1124 					txmsg_pop, i, err, strerror(errno));
1125 				goto out;
1126 			}
1127 		} else {
1128 			i = 5;
1129 			bpf_map_update_elem(map_fd[5],
1130 					    &i, &txmsg_pop, BPF_ANY);
1131 
1132 		}
1133 
1134 		if (txmsg_ingress) {
1135 			int in = BPF_F_INGRESS;
1136 
1137 			i = 0;
1138 			err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
1139 			if (err) {
1140 				fprintf(stderr,
1141 					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1142 					err, strerror(errno));
1143 			}
1144 			i = 1;
1145 			err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
1146 			if (err) {
1147 				fprintf(stderr,
1148 					"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
1149 					err, strerror(errno));
1150 			}
1151 			err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
1152 			if (err) {
1153 				fprintf(stderr,
1154 					"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
1155 					err, strerror(errno));
1156 			}
1157 
1158 			i = 2;
1159 			err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
1160 			if (err) {
1161 				fprintf(stderr,
1162 					"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
1163 					err, strerror(errno));
1164 			}
1165 		}
1166 
1167 		if (txmsg_redir_skb) {
1168 			int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
1169 					p2 : p1;
1170 			int ingress = BPF_F_INGRESS;
1171 
1172 			i = 0;
1173 			err = bpf_map_update_elem(map_fd[7],
1174 						  &i, &ingress, BPF_ANY);
1175 			if (err) {
1176 				fprintf(stderr,
1177 					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
1178 					err, strerror(errno));
1179 			}
1180 
1181 			i = 3;
1182 			err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
1183 			if (err) {
1184 				fprintf(stderr,
1185 					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
1186 					err, strerror(errno));
1187 			}
1188 		}
1189 	}
1190 
1191 	if (skb_use_parser) {
1192 		i = 2;
1193 		err = bpf_map_update_elem(map_fd[7], &i, &skb_use_parser, BPF_ANY);
1194 	}
1195 
1196 	if (txmsg_drop)
1197 		options->drop_expected = true;
1198 
1199 	if (test == PING_PONG)
1200 		err = forever_ping_pong(options->rate, options);
1201 	else if (test == SENDMSG) {
1202 		options->base = false;
1203 		options->sendpage = false;
1204 		err = sendmsg_test(options);
1205 	} else if (test == SENDPAGE) {
1206 		options->base = false;
1207 		options->sendpage = true;
1208 		err = sendmsg_test(options);
1209 	} else if (test == BASE) {
1210 		options->base = true;
1211 		options->sendpage = false;
1212 		err = sendmsg_test(options);
1213 	} else if (test == BASE_SENDPAGE) {
1214 		options->base = true;
1215 		options->sendpage = true;
1216 		err = sendmsg_test(options);
1217 	} else
1218 		fprintf(stderr, "unknown test\n");
1219 out:
1220 	/* Detach and zero all the maps */
1221 	bpf_prog_detach2(bpf_program__fd(progs[2]), cg_fd, BPF_CGROUP_SOCK_OPS);
1222 
1223 	for (i = 0; i < ARRAY_SIZE(links); i++) {
1224 		if (links[i])
1225 			bpf_link__detach(links[i]);
1226 	}
1227 
1228 	for (i = 0; i < ARRAY_SIZE(map_fd); i++) {
1229 		key = next_key = 0;
1230 		bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
1231 		while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
1232 			bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
1233 			key = next_key;
1234 		}
1235 	}
1236 
1237 	close(s1);
1238 	close(s2);
1239 	close(p1);
1240 	close(p2);
1241 	close(c1);
1242 	close(c2);
1243 	return err;
1244 }
1245 
1246 static char *test_to_str(int test)
1247 {
1248 	switch (test) {
1249 	case SENDMSG:
1250 		return "sendmsg";
1251 	case SENDPAGE:
1252 		return "sendpage";
1253 	}
1254 	return "unknown";
1255 }
1256 
1257 static void append_str(char *dst, const char *src, size_t dst_cap)
1258 {
1259 	size_t avail = dst_cap - strlen(dst);
1260 
1261 	if (avail <= 1) /* just zero byte could be written */
1262 		return;
1263 
1264 	strncat(dst, src, avail - 1); /* strncat() adds + 1 for zero byte */
1265 }
1266 
1267 #define OPTSTRING 60
1268 static void test_options(char *options)
1269 {
1270 	char tstr[OPTSTRING];
1271 
1272 	memset(options, 0, OPTSTRING);
1273 
1274 	if (txmsg_pass)
1275 		append_str(options, "pass,", OPTSTRING);
1276 	if (txmsg_redir)
1277 		append_str(options, "redir,", OPTSTRING);
1278 	if (txmsg_drop)
1279 		append_str(options, "drop,", OPTSTRING);
1280 	if (txmsg_apply) {
1281 		snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
1282 		append_str(options, tstr, OPTSTRING);
1283 	}
1284 	if (txmsg_cork) {
1285 		snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
1286 		append_str(options, tstr, OPTSTRING);
1287 	}
1288 	if (txmsg_start) {
1289 		snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
1290 		append_str(options, tstr, OPTSTRING);
1291 	}
1292 	if (txmsg_end) {
1293 		snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
1294 		append_str(options, tstr, OPTSTRING);
1295 	}
1296 	if (txmsg_start_pop) {
1297 		snprintf(tstr, OPTSTRING, "pop (%d,%d),",
1298 			 txmsg_start_pop, txmsg_start_pop + txmsg_pop);
1299 		append_str(options, tstr, OPTSTRING);
1300 	}
1301 	if (txmsg_ingress)
1302 		append_str(options, "ingress,", OPTSTRING);
1303 	if (txmsg_redir_skb)
1304 		append_str(options, "redir_skb,", OPTSTRING);
1305 	if (peek_flag)
1306 		append_str(options, "peek,", OPTSTRING);
1307 }
1308 
1309 static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
1310 {
1311 	char *options = calloc(OPTSTRING, sizeof(char));
1312 	int err;
1313 
1314 	if (test == SENDPAGE)
1315 		opt->sendpage = true;
1316 	else
1317 		opt->sendpage = false;
1318 
1319 	if (txmsg_drop)
1320 		opt->drop_expected = true;
1321 	else
1322 		opt->drop_expected = false;
1323 
1324 	test_options(options);
1325 
1326 	if (opt->verbose) {
1327 		fprintf(stdout,
1328 			" [TEST %i]: (%i, %i, %i, %s, %s): ",
1329 			test_cnt, opt->rate, opt->iov_count, opt->iov_length,
1330 			test_to_str(test), options);
1331 		fflush(stdout);
1332 	}
1333 	err = run_options(opt, cgrp, test);
1334 	if (opt->verbose)
1335 		fprintf(stdout, " %s\n", !err ? "PASS" : "FAILED");
1336 	test_cnt++;
1337 	!err ? passed++ : failed++;
1338 	free(options);
1339 	return err;
1340 }
1341 
1342 static void test_exec(int cgrp, struct sockmap_options *opt)
1343 {
1344 	int type = strcmp(opt->map, BPF_SOCKMAP_FILENAME);
1345 	int err;
1346 
1347 	if (type == 0) {
1348 		test_start();
1349 		err = __test_exec(cgrp, SENDMSG, opt);
1350 		if (err)
1351 			test_fail();
1352 	} else {
1353 		test_start();
1354 		err = __test_exec(cgrp, SENDPAGE, opt);
1355 		if (err)
1356 			test_fail();
1357 	}
1358 }
1359 
1360 static void test_send_one(struct sockmap_options *opt, int cgrp)
1361 {
1362 	opt->iov_length = 1;
1363 	opt->iov_count = 1;
1364 	opt->rate = 1;
1365 	test_exec(cgrp, opt);
1366 
1367 	opt->iov_length = 1;
1368 	opt->iov_count = 1024;
1369 	opt->rate = 1;
1370 	test_exec(cgrp, opt);
1371 
1372 	opt->iov_length = 1024;
1373 	opt->iov_count = 1;
1374 	opt->rate = 1;
1375 	test_exec(cgrp, opt);
1376 
1377 }
1378 
1379 static void test_send_many(struct sockmap_options *opt, int cgrp)
1380 {
1381 	opt->iov_length = 3;
1382 	opt->iov_count = 1;
1383 	opt->rate = 512;
1384 	test_exec(cgrp, opt);
1385 
1386 	opt->rate = 100;
1387 	opt->iov_count = 1;
1388 	opt->iov_length = 5;
1389 	test_exec(cgrp, opt);
1390 }
1391 
1392 static void test_send_large(struct sockmap_options *opt, int cgrp)
1393 {
1394 	opt->iov_length = 8192;
1395 	opt->iov_count = 32;
1396 	opt->rate = 2;
1397 	test_exec(cgrp, opt);
1398 }
1399 
1400 static void test_send(struct sockmap_options *opt, int cgrp)
1401 {
1402 	test_send_one(opt, cgrp);
1403 	test_send_many(opt, cgrp);
1404 	test_send_large(opt, cgrp);
1405 	sched_yield();
1406 }
1407 
1408 static void test_txmsg_pass(int cgrp, struct sockmap_options *opt)
1409 {
1410 	/* Test small and large iov_count values with pass/redir/apply/cork */
1411 	txmsg_pass = 1;
1412 	test_send(opt, cgrp);
1413 }
1414 
1415 static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
1416 {
1417 	txmsg_redir = 1;
1418 	test_send(opt, cgrp);
1419 }
1420 
1421 static void test_txmsg_redir_wait_sndmem(int cgrp, struct sockmap_options *opt)
1422 {
1423 	opt->tx_wait_mem = true;
1424 	txmsg_redir = 1;
1425 	test_send_large(opt, cgrp);
1426 
1427 	txmsg_redir = 1;
1428 	txmsg_apply = 4097;
1429 	test_send_large(opt, cgrp);
1430 	opt->tx_wait_mem = false;
1431 }
1432 
1433 static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
1434 {
1435 	txmsg_drop = 1;
1436 	test_send(opt, cgrp);
1437 }
1438 
1439 static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
1440 {
1441 	txmsg_pass = txmsg_drop = 0;
1442 	txmsg_ingress = txmsg_redir = 1;
1443 	test_send(opt, cgrp);
1444 }
1445 
1446 /* Test cork with hung data. This tests poor usage patterns where
1447  * cork can leave data on the ring if user program is buggy and
1448  * doesn't flush them somehow. They do take some time however
1449  * because they wait for a timeout. Test pass, redir and cork with
1450  * apply logic. Use cork size of 4097 with send_large to avoid
1451  * aligning cork size with send size.
1452  */
1453 static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
1454 {
1455 	txmsg_pass = 1;
1456 	txmsg_redir = 0;
1457 	txmsg_cork = 4097;
1458 	txmsg_apply = 4097;
1459 	test_send_large(opt, cgrp);
1460 
1461 	txmsg_pass = 0;
1462 	txmsg_redir = 1;
1463 	txmsg_apply = 0;
1464 	txmsg_cork = 4097;
1465 	test_send_large(opt, cgrp);
1466 
1467 	txmsg_pass = 0;
1468 	txmsg_redir = 1;
1469 	txmsg_apply = 4097;
1470 	txmsg_cork = 4097;
1471 	test_send_large(opt, cgrp);
1472 }
1473 
1474 static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
1475 {
1476 	/* Test basic start/end */
1477 	txmsg_pass = 1;
1478 	txmsg_start = 1;
1479 	txmsg_end = 2;
1480 	test_send(opt, cgrp);
1481 
1482 	/* Test >4k pull */
1483 	txmsg_pass = 1;
1484 	txmsg_start = 4096;
1485 	txmsg_end = 9182;
1486 	test_send_large(opt, cgrp);
1487 
1488 	/* Test pull + redirect */
1489 	txmsg_redir = 1;
1490 	txmsg_start = 1;
1491 	txmsg_end = 2;
1492 	test_send(opt, cgrp);
1493 
1494 	/* Test pull + cork */
1495 	txmsg_redir = 0;
1496 	txmsg_cork = 512;
1497 	txmsg_start = 1;
1498 	txmsg_end = 2;
1499 	test_send_many(opt, cgrp);
1500 
1501 	/* Test pull + cork + redirect */
1502 	txmsg_redir = 1;
1503 	txmsg_cork = 512;
1504 	txmsg_start = 1;
1505 	txmsg_end = 2;
1506 	test_send_many(opt, cgrp);
1507 }
1508 
1509 static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
1510 {
1511 	bool data = opt->data_test;
1512 
1513 	/* Test basic pop */
1514 	txmsg_pass = 1;
1515 	txmsg_start_pop = 1;
1516 	txmsg_pop = 2;
1517 	test_send_many(opt, cgrp);
1518 
1519 	/* Test pop with >4k */
1520 	txmsg_pass = 1;
1521 	txmsg_start_pop = 4096;
1522 	txmsg_pop = 4096;
1523 	test_send_large(opt, cgrp);
1524 
1525 	/* Test pop + redirect */
1526 	txmsg_redir = 1;
1527 	txmsg_start_pop = 1;
1528 	txmsg_pop = 2;
1529 	test_send_many(opt, cgrp);
1530 
1531 	/* TODO: Test for pop + cork should be different,
1532 	 * - It makes the layout of the received data difficult
1533 	 * - It makes it hard to calculate the total_bytes in the recvmsg
1534 	 * Temporarily skip the data integrity test for this case now.
1535 	 */
1536 	opt->data_test = false;
1537 	/* Test pop + cork */
1538 	txmsg_redir = 0;
1539 	txmsg_cork = 512;
1540 	txmsg_start_pop = 1;
1541 	txmsg_pop = 2;
1542 	test_send_many(opt, cgrp);
1543 
1544 	/* Test pop + redirect + cork */
1545 	txmsg_redir = 1;
1546 	txmsg_cork = 4;
1547 	txmsg_start_pop = 1;
1548 	txmsg_pop = 2;
1549 	test_send_many(opt, cgrp);
1550 	opt->data_test = data;
1551 }
1552 
1553 static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
1554 {
1555 	bool data = opt->data_test;
1556 
1557 	/* Test basic push */
1558 	txmsg_pass = 1;
1559 	txmsg_start_push = 1;
1560 	txmsg_end_push = 1;
1561 	test_send(opt, cgrp);
1562 
1563 	/* Test push 4kB >4k */
1564 	txmsg_pass = 1;
1565 	txmsg_start_push = 4096;
1566 	txmsg_end_push = 4096;
1567 	test_send_large(opt, cgrp);
1568 
1569 	/* Test push + redirect */
1570 	txmsg_redir = 1;
1571 	txmsg_start_push = 1;
1572 	txmsg_end_push = 2;
1573 	test_send_many(opt, cgrp);
1574 
1575 	/* TODO: Test for push + cork should be different,
1576 	 * - It makes the layout of the received data difficult
1577 	 * - It makes it hard to calculate the total_bytes in the recvmsg
1578 	 * Temporarily skip the data integrity test for this case now.
1579 	 */
1580 	opt->data_test = false;
1581 	/* Test push + cork */
1582 	txmsg_redir = 0;
1583 	txmsg_cork = 512;
1584 	txmsg_start_push = 1;
1585 	txmsg_end_push = 2;
1586 	test_send_many(opt, cgrp);
1587 	opt->data_test = data;
1588 }
1589 
1590 static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
1591 {
1592 	/* Test push/pop range overlapping */
1593 	txmsg_pass = 1;
1594 	txmsg_start_push = 1;
1595 	txmsg_end_push = 10;
1596 	txmsg_start_pop = 5;
1597 	txmsg_pop = 4;
1598 	test_send_large(opt, cgrp);
1599 
1600 	txmsg_pass = 1;
1601 	txmsg_start_push = 1;
1602 	txmsg_end_push = 10;
1603 	txmsg_start_pop = 5;
1604 	txmsg_pop = 16;
1605 	test_send_large(opt, cgrp);
1606 
1607 	txmsg_pass = 1;
1608 	txmsg_start_push = 5;
1609 	txmsg_end_push = 4;
1610 	txmsg_start_pop = 1;
1611 	txmsg_pop = 10;
1612 	test_send_large(opt, cgrp);
1613 
1614 	txmsg_pass = 1;
1615 	txmsg_start_push = 5;
1616 	txmsg_end_push = 16;
1617 	txmsg_start_pop = 1;
1618 	txmsg_pop = 10;
1619 	test_send_large(opt, cgrp);
1620 
1621 	/* Test push/pop range non-overlapping */
1622 	txmsg_pass = 1;
1623 	txmsg_start_push = 1;
1624 	txmsg_end_push = 10;
1625 	txmsg_start_pop = 16;
1626 	txmsg_pop = 4;
1627 	test_send_large(opt, cgrp);
1628 
1629 	txmsg_pass = 1;
1630 	txmsg_start_push = 16;
1631 	txmsg_end_push = 10;
1632 	txmsg_start_pop = 5;
1633 	txmsg_pop = 4;
1634 	test_send_large(opt, cgrp);
1635 }
1636 
1637 static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
1638 {
1639 	txmsg_pass = 1;
1640 	txmsg_redir = 0;
1641 	txmsg_ingress = 0;
1642 	txmsg_apply = 1;
1643 	txmsg_cork = 0;
1644 	test_send_one(opt, cgrp);
1645 
1646 	txmsg_pass = 0;
1647 	txmsg_redir = 1;
1648 	txmsg_ingress = 0;
1649 	txmsg_apply = 1;
1650 	txmsg_cork = 0;
1651 	test_send_one(opt, cgrp);
1652 
1653 	txmsg_pass = 0;
1654 	txmsg_redir = 1;
1655 	txmsg_ingress = 1;
1656 	txmsg_apply = 1;
1657 	txmsg_cork = 0;
1658 	test_send_one(opt, cgrp);
1659 
1660 	txmsg_pass = 1;
1661 	txmsg_redir = 0;
1662 	txmsg_ingress = 0;
1663 	txmsg_apply = 1024;
1664 	txmsg_cork = 0;
1665 	test_send_large(opt, cgrp);
1666 
1667 	txmsg_pass = 0;
1668 	txmsg_redir = 1;
1669 	txmsg_ingress = 0;
1670 	txmsg_apply = 1024;
1671 	txmsg_cork = 0;
1672 	test_send_large(opt, cgrp);
1673 
1674 	txmsg_pass = 0;
1675 	txmsg_redir = 1;
1676 	txmsg_ingress = 1;
1677 	txmsg_apply = 1024;
1678 	txmsg_cork = 0;
1679 	test_send_large(opt, cgrp);
1680 }
1681 
1682 static void test_txmsg_cork(int cgrp, struct sockmap_options *opt)
1683 {
1684 	txmsg_pass = 1;
1685 	txmsg_redir = 0;
1686 	txmsg_apply = 0;
1687 	txmsg_cork = 1;
1688 	test_send(opt, cgrp);
1689 
1690 	txmsg_pass = 1;
1691 	txmsg_redir = 0;
1692 	txmsg_apply = 1;
1693 	txmsg_cork = 1;
1694 	test_send(opt, cgrp);
1695 }
1696 
1697 static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt)
1698 {
1699 	txmsg_pass = 1;
1700 	skb_use_parser = 512;
1701 	opt->iov_length = 256;
1702 	opt->iov_count = 1;
1703 	opt->rate = 2;
1704 	test_exec(cgrp, opt);
1705 }
1706 
1707 static void test_txmsg_ingress_parser2(int cgrp, struct sockmap_options *opt)
1708 {
1709 	skb_use_parser = 10;
1710 	opt->iov_length = 20;
1711 	opt->iov_count = 1;
1712 	opt->rate = 1;
1713 	opt->check_recved_len = true;
1714 	test_exec(cgrp, opt);
1715 	opt->check_recved_len = false;
1716 }
1717 
1718 char *map_names[] = {
1719 	"sock_map",
1720 	"sock_map_txmsg",
1721 	"sock_map_redir",
1722 	"sock_apply_bytes",
1723 	"sock_cork_bytes",
1724 	"sock_bytes",
1725 	"sock_redir_flags",
1726 	"sock_skb_opts",
1727 };
1728 
1729 static int populate_progs(char *bpf_file)
1730 {
1731 	struct bpf_program *prog;
1732 	struct bpf_object *obj;
1733 	int i = 0;
1734 	long err;
1735 
1736 	obj = bpf_object__open(bpf_file);
1737 	err = libbpf_get_error(obj);
1738 	if (err) {
1739 		char err_buf[256];
1740 
1741 		libbpf_strerror(err, err_buf, sizeof(err_buf));
1742 		printf("Unable to load eBPF objects in file '%s' : %s\n",
1743 		       bpf_file, err_buf);
1744 		return -1;
1745 	}
1746 
1747 	i = bpf_object__load(obj);
1748 	i = 0;
1749 	bpf_object__for_each_program(prog, obj) {
1750 		progs[i] = prog;
1751 		i++;
1752 	}
1753 
1754 	for (i = 0; i < ARRAY_SIZE(map_fd); i++) {
1755 		maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
1756 		map_fd[i] = bpf_map__fd(maps[i]);
1757 		if (map_fd[i] < 0) {
1758 			fprintf(stderr, "load_bpf_file: (%i) %s\n",
1759 				map_fd[i], strerror(errno));
1760 			return -1;
1761 		}
1762 	}
1763 
1764 	for (i = 0; i < ARRAY_SIZE(links); i++)
1765 		links[i] = NULL;
1766 
1767 	return 0;
1768 }
1769 
1770 struct _test test[] = {
1771 	{"txmsg test passthrough", test_txmsg_pass},
1772 	{"txmsg test redirect", test_txmsg_redir},
1773 	{"txmsg test redirect wait send mem", test_txmsg_redir_wait_sndmem},
1774 	{"txmsg test drop", test_txmsg_drop},
1775 	{"txmsg test ingress redirect", test_txmsg_ingress_redir},
1776 	{"txmsg test apply", test_txmsg_apply},
1777 	{"txmsg test cork", test_txmsg_cork},
1778 	{"txmsg test hanging corks", test_txmsg_cork_hangs},
1779 	{"txmsg test push_data", test_txmsg_push},
1780 	{"txmsg test pull-data", test_txmsg_pull},
1781 	{"txmsg test pop-data", test_txmsg_pop},
1782 	{"txmsg test push/pop data", test_txmsg_push_pop},
1783 	{"txmsg test ingress parser", test_txmsg_ingress_parser},
1784 	{"txmsg test ingress parser2", test_txmsg_ingress_parser2},
1785 };
1786 
1787 static int check_whitelist(struct _test *t, struct sockmap_options *opt)
1788 {
1789 	char *entry, *ptr;
1790 
1791 	if (!opt->whitelist)
1792 		return 0;
1793 	ptr = strdup(opt->whitelist);
1794 	if (!ptr)
1795 		return -ENOMEM;
1796 	entry = strtok(ptr, ",");
1797 	while (entry) {
1798 		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
1799 		    strstr(opt->map, entry) != 0 ||
1800 		    strstr(t->title, entry) != 0) {
1801 			free(ptr);
1802 			return 0;
1803 		}
1804 		entry = strtok(NULL, ",");
1805 	}
1806 	free(ptr);
1807 	return -EINVAL;
1808 }
1809 
1810 static int check_blacklist(struct _test *t, struct sockmap_options *opt)
1811 {
1812 	char *entry, *ptr;
1813 
1814 	if (!opt->blacklist)
1815 		return -EINVAL;
1816 	ptr = strdup(opt->blacklist);
1817 	if (!ptr)
1818 		return -ENOMEM;
1819 	entry = strtok(ptr, ",");
1820 	while (entry) {
1821 		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
1822 		    strstr(opt->map, entry) != 0 ||
1823 		    strstr(t->title, entry) != 0) {
1824 			free(ptr);
1825 			return 0;
1826 		}
1827 		entry = strtok(NULL, ",");
1828 	}
1829 	free(ptr);
1830 	return -EINVAL;
1831 }
1832 
1833 static int __test_selftests(int cg_fd, struct sockmap_options *opt)
1834 {
1835 	int i, err;
1836 
1837 	err = populate_progs(opt->map);
1838 	if (err < 0) {
1839 		fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
1840 		return err;
1841 	}
1842 
1843 	/* Tests basic commands and APIs */
1844 	for (i = 0; i < ARRAY_SIZE(test); i++) {
1845 		struct _test t = test[i];
1846 
1847 		if (check_whitelist(&t, opt) != 0)
1848 			continue;
1849 		if (check_blacklist(&t, opt) == 0)
1850 			continue;
1851 
1852 		test_start_subtest(&t, opt);
1853 		t.tester(cg_fd, opt);
1854 		test_end_subtest();
1855 	}
1856 
1857 	return err;
1858 }
1859 
1860 static void test_selftests_sockmap(int cg_fd, struct sockmap_options *opt)
1861 {
1862 	opt->map = BPF_SOCKMAP_FILENAME;
1863 	__test_selftests(cg_fd, opt);
1864 }
1865 
1866 static void test_selftests_sockhash(int cg_fd, struct sockmap_options *opt)
1867 {
1868 	opt->map = BPF_SOCKHASH_FILENAME;
1869 	__test_selftests(cg_fd, opt);
1870 }
1871 
1872 static int test_selftest(int cg_fd, struct sockmap_options *opt)
1873 {
1874 	test_selftests_sockmap(cg_fd, opt);
1875 	test_selftests_sockhash(cg_fd, opt);
1876 	test_print_results();
1877 	return 0;
1878 }
1879 
1880 int main(int argc, char **argv)
1881 {
1882 	int iov_count = 1, length = 1024, rate = 1;
1883 	struct sockmap_options options = {0};
1884 	int opt, longindex, err, cg_fd = 0;
1885 	char *bpf_file = BPF_SOCKMAP_FILENAME;
1886 	int test = SELFTESTS;
1887 	bool cg_created = 0;
1888 
1889 	while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:b:",
1890 				  long_options, &longindex)) != -1) {
1891 		switch (opt) {
1892 		case 's':
1893 			txmsg_start = atoi(optarg);
1894 			break;
1895 		case 'e':
1896 			txmsg_end = atoi(optarg);
1897 			break;
1898 		case 'p':
1899 			txmsg_start_push = atoi(optarg);
1900 			break;
1901 		case 'q':
1902 			txmsg_end_push = atoi(optarg);
1903 			break;
1904 		case 'w':
1905 			txmsg_start_pop = atoi(optarg);
1906 			break;
1907 		case 'x':
1908 			txmsg_pop = atoi(optarg);
1909 			break;
1910 		case 'a':
1911 			txmsg_apply = atoi(optarg);
1912 			break;
1913 		case 'k':
1914 			txmsg_cork = atoi(optarg);
1915 			break;
1916 		case 'c':
1917 			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
1918 			if (cg_fd < 0) {
1919 				fprintf(stderr,
1920 					"ERROR: (%i) open cg path failed: %s\n",
1921 					cg_fd, optarg);
1922 				return cg_fd;
1923 			}
1924 			break;
1925 		case 'r':
1926 			rate = atoi(optarg);
1927 			break;
1928 		case 'v':
1929 			options.verbose = 1;
1930 			if (optarg)
1931 				options.verbose = atoi(optarg);
1932 			break;
1933 		case 'i':
1934 			iov_count = atoi(optarg);
1935 			break;
1936 		case 'l':
1937 			length = atoi(optarg);
1938 			break;
1939 		case 'd':
1940 			options.data_test = true;
1941 			break;
1942 		case 't':
1943 			if (strcmp(optarg, "ping") == 0) {
1944 				test = PING_PONG;
1945 			} else if (strcmp(optarg, "sendmsg") == 0) {
1946 				test = SENDMSG;
1947 			} else if (strcmp(optarg, "base") == 0) {
1948 				test = BASE;
1949 			} else if (strcmp(optarg, "base_sendpage") == 0) {
1950 				test = BASE_SENDPAGE;
1951 			} else if (strcmp(optarg, "sendpage") == 0) {
1952 				test = SENDPAGE;
1953 			} else {
1954 				usage(argv);
1955 				return -1;
1956 			}
1957 			break;
1958 		case 'n':
1959 			options.whitelist = strdup(optarg);
1960 			if (!options.whitelist)
1961 				return -ENOMEM;
1962 			break;
1963 		case 'b':
1964 			options.blacklist = strdup(optarg);
1965 			if (!options.blacklist)
1966 				return -ENOMEM;
1967 		case 0:
1968 			break;
1969 		case 'h':
1970 		default:
1971 			usage(argv);
1972 			return -1;
1973 		}
1974 	}
1975 
1976 	if (!cg_fd) {
1977 		cg_fd = cgroup_setup_and_join(CG_PATH);
1978 		if (cg_fd < 0)
1979 			return cg_fd;
1980 		cg_created = 1;
1981 	}
1982 
1983 	/* Use libbpf 1.0 API mode */
1984 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
1985 
1986 	if (test == SELFTESTS) {
1987 		err = test_selftest(cg_fd, &options);
1988 		goto out;
1989 	}
1990 
1991 	err = populate_progs(bpf_file);
1992 	if (err) {
1993 		fprintf(stderr, "populate program: (%s) %s\n",
1994 			bpf_file, strerror(errno));
1995 		return 1;
1996 	}
1997 	running = 1;
1998 
1999 	/* catch SIGINT */
2000 	signal(SIGINT, running_handler);
2001 
2002 	options.iov_count = iov_count;
2003 	options.iov_length = length;
2004 	options.rate = rate;
2005 
2006 	err = run_options(&options, cg_fd, test);
2007 out:
2008 	if (options.whitelist)
2009 		free(options.whitelist);
2010 	if (options.blacklist)
2011 		free(options.blacklist);
2012 	close(cg_fd);
2013 	if (cg_created)
2014 		cleanup_cgroup_environment();
2015 	return err;
2016 }
2017 
2018 void running_handler(int a)
2019 {
2020 	running = 0;
2021 }
2022