xref: /linux/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c (revision 70b139d0483cd42808326c36c4b63d5be4a3cccb)
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2020 Cloudflare
3 #include <error.h>
4 #include <linux/tcp.h>
5 #include <linux/socket.h>
6 #include <sys/epoll.h>
7 
8 #include "test_progs.h"
9 #include "test_skmsg_load_helpers.skel.h"
10 #include "test_sockmap_update.skel.h"
11 #include "test_sockmap_invalid_update.skel.h"
12 #include "test_sockmap_skb_verdict_attach.skel.h"
13 #include "test_sockmap_progs_query.skel.h"
14 #include "test_sockmap_pass_prog.skel.h"
15 #include "test_sockmap_drop_prog.skel.h"
16 #include "test_sockmap_change_tail.skel.h"
17 #include "test_sockmap_msg_pop_data.skel.h"
18 #include "bpf_iter_sockmap.skel.h"
19 
20 #include "sockmap_helpers.h"
21 
22 #define TCP_REPAIR		19	/* TCP sock is under repair right now */
23 
24 #define TCP_REPAIR_ON		1
25 #define TCP_REPAIR_OFF_NO_WP	-1	/* Turn off without window probes */
26 
27 /**
28  * SOL_TCP is defined in <netinet/tcp.h> (glibc), but the copybuf_address
29  * field of tcp_zerocopy_receive is not yet included in older versions.
30  * This workaround remains necessary until the glibc update propagates.
31  */
32 #ifndef SOL_TCP
33 #define SOL_TCP 6
34 #endif
35 
36 static int connected_socket_v4(void)
37 {
38 	struct sockaddr_in addr = {
39 		.sin_family = AF_INET,
40 		.sin_port = htons(80),
41 		.sin_addr = { inet_addr("127.0.0.1") },
42 	};
43 	socklen_t len = sizeof(addr);
44 	int s, repair, err;
45 
46 	s = socket(AF_INET, SOCK_STREAM, 0);
47 	if (!ASSERT_GE(s, 0, "socket"))
48 		goto error;
49 
50 	repair = TCP_REPAIR_ON;
51 	err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair));
52 	if (!ASSERT_OK(err, "setsockopt(TCP_REPAIR)"))
53 		goto error;
54 
55 	err = connect(s, (struct sockaddr *)&addr, len);
56 	if (!ASSERT_OK(err, "connect"))
57 		goto error;
58 
59 	repair = TCP_REPAIR_OFF_NO_WP;
60 	err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair));
61 	if (!ASSERT_OK(err, "setsockopt(TCP_REPAIR)"))
62 		goto error;
63 
64 	return s;
65 error:
66 	perror(__func__);
67 	close(s);
68 	return -1;
69 }
70 
71 static void compare_cookies(struct bpf_map *src, struct bpf_map *dst)
72 {
73 	__u32 i, max_entries = bpf_map__max_entries(src);
74 	int err, src_fd, dst_fd;
75 
76 	src_fd = bpf_map__fd(src);
77 	dst_fd = bpf_map__fd(dst);
78 
79 	for (i = 0; i < max_entries; i++) {
80 		__u64 src_cookie, dst_cookie;
81 
82 		err = bpf_map_lookup_elem(src_fd, &i, &src_cookie);
83 		if (err && errno == ENOENT) {
84 			err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
85 			ASSERT_ERR(err, "map_lookup_elem(dst)");
86 			ASSERT_EQ(errno, ENOENT, "map_lookup_elem(dst)");
87 			continue;
88 		}
89 		if (!ASSERT_OK(err, "lookup_elem(src)"))
90 			continue;
91 
92 		err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
93 		if (!ASSERT_OK(err, "lookup_elem(dst)"))
94 			continue;
95 
96 		ASSERT_EQ(dst_cookie, src_cookie, "cookie mismatch");
97 	}
98 }
99 
100 /* Create a map, populate it with one socket, and free the map. */
101 static void test_sockmap_create_update_free(enum bpf_map_type map_type)
102 {
103 	const int zero = 0;
104 	int s, map, err;
105 
106 	s = connected_socket_v4();
107 	if (!ASSERT_GE(s, 0, "connected_socket_v4"))
108 		return;
109 
110 	map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL);
111 	if (!ASSERT_GE(map, 0, "bpf_map_create"))
112 		goto out;
113 
114 	err = bpf_map_update_elem(map, &zero, &s, BPF_NOEXIST);
115 	if (!ASSERT_OK(err, "bpf_map_update"))
116 		goto out;
117 
118 out:
119 	close(map);
120 	close(s);
121 }
122 
123 static void test_sockmap_vsock_delete_on_close(void)
124 {
125 	int map, c, p, err, zero = 0;
126 
127 	map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
128 			     sizeof(int), 1, NULL);
129 	if (!ASSERT_OK_FD(map, "bpf_map_create"))
130 		return;
131 
132 	err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
133 	if (!ASSERT_OK(err, "create_pair"))
134 		goto close_map;
135 
136 	if (xbpf_map_update_elem(map, &zero, &c, BPF_NOEXIST))
137 		goto close_socks;
138 
139 	xclose(c);
140 	xclose(p);
141 
142 	err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
143 	if (!ASSERT_OK(err, "create_pair"))
144 		goto close_map;
145 
146 	err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
147 	ASSERT_OK(err, "after close(), bpf_map_update");
148 
149 close_socks:
150 	xclose(c);
151 	xclose(p);
152 close_map:
153 	xclose(map);
154 }
155 
156 static void test_skmsg_helpers(enum bpf_map_type map_type)
157 {
158 	struct test_skmsg_load_helpers *skel;
159 	int err, map, verdict;
160 
161 	skel = test_skmsg_load_helpers__open_and_load();
162 	if (!ASSERT_OK_PTR(skel, "test_skmsg_load_helpers__open_and_load"))
163 		return;
164 
165 	verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
166 	map = bpf_map__fd(skel->maps.sock_map);
167 
168 	err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
169 	if (!ASSERT_OK(err, "bpf_prog_attach"))
170 		goto out;
171 
172 	err = bpf_prog_detach2(verdict, map, BPF_SK_MSG_VERDICT);
173 	if (!ASSERT_OK(err, "bpf_prog_detach2"))
174 		goto out;
175 out:
176 	test_skmsg_load_helpers__destroy(skel);
177 }
178 
179 static void test_skmsg_helpers_with_link(enum bpf_map_type map_type)
180 {
181 	struct bpf_program *prog, *prog_clone, *prog_clone2;
182 	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, opts);
183 	struct test_skmsg_load_helpers *skel;
184 	struct bpf_link *link, *link2;
185 	int err, map;
186 
187 	skel = test_skmsg_load_helpers__open_and_load();
188 	if (!ASSERT_OK_PTR(skel, "test_skmsg_load_helpers__open_and_load"))
189 		return;
190 
191 	prog = skel->progs.prog_msg_verdict;
192 	prog_clone = skel->progs.prog_msg_verdict_clone;
193 	prog_clone2 = skel->progs.prog_msg_verdict_clone2;
194 	map = bpf_map__fd(skel->maps.sock_map);
195 
196 	link = bpf_program__attach_sockmap(prog, map);
197 	if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
198 		goto out;
199 
200 	/* Fail since bpf_link for the same prog has been created. */
201 	err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_MSG_VERDICT, 0);
202 	if (!ASSERT_ERR(err, "bpf_prog_attach"))
203 		goto out;
204 
205 	/* Fail since bpf_link for the same prog type has been created. */
206 	link2 = bpf_program__attach_sockmap(prog_clone, map);
207 	if (!ASSERT_ERR_PTR(link2, "bpf_program__attach_sockmap")) {
208 		bpf_link__destroy(link2);
209 		goto out;
210 	}
211 
212 	err = bpf_link__update_program(link, prog_clone);
213 	if (!ASSERT_OK(err, "bpf_link__update_program"))
214 		goto out;
215 
216 	/* Fail since a prog with different type attempts to do update. */
217 	err = bpf_link__update_program(link, skel->progs.prog_skb_verdict);
218 	if (!ASSERT_ERR(err, "bpf_link__update_program"))
219 		goto out;
220 
221 	/* Fail since the old prog does not match the one in the kernel. */
222 	opts.old_prog_fd = bpf_program__fd(prog_clone2);
223 	opts.flags = BPF_F_REPLACE;
224 	err = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), &opts);
225 	if (!ASSERT_ERR(err, "bpf_link_update"))
226 		goto out;
227 
228 	opts.old_prog_fd = bpf_program__fd(prog_clone);
229 	opts.flags = BPF_F_REPLACE;
230 	err = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), &opts);
231 	if (!ASSERT_OK(err, "bpf_link_update"))
232 		goto out;
233 out:
234 	bpf_link__destroy(link);
235 	test_skmsg_load_helpers__destroy(skel);
236 }
237 
238 static void test_sockmap_update(enum bpf_map_type map_type)
239 {
240 	int err, prog, src;
241 	struct test_sockmap_update *skel;
242 	struct bpf_map *dst_map;
243 	const __u32 zero = 0;
244 	char dummy[14] = {0};
245 	LIBBPF_OPTS(bpf_test_run_opts, topts,
246 		.data_in = dummy,
247 		.data_size_in = sizeof(dummy),
248 		.repeat = 1,
249 	);
250 	__s64 sk;
251 
252 	sk = connected_socket_v4();
253 	if (!ASSERT_NEQ(sk, -1, "connected_socket_v4"))
254 		return;
255 
256 	skel = test_sockmap_update__open_and_load();
257 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
258 		goto close_sk;
259 
260 	prog = bpf_program__fd(skel->progs.copy_sock_map);
261 	src = bpf_map__fd(skel->maps.src);
262 	if (map_type == BPF_MAP_TYPE_SOCKMAP)
263 		dst_map = skel->maps.dst_sock_map;
264 	else
265 		dst_map = skel->maps.dst_sock_hash;
266 
267 	err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST);
268 	if (!ASSERT_OK(err, "update_elem(src)"))
269 		goto out;
270 
271 	err = bpf_prog_test_run_opts(prog, &topts);
272 	if (!ASSERT_OK(err, "test_run"))
273 		goto out;
274 	if (!ASSERT_NEQ(topts.retval, 0, "test_run retval"))
275 		goto out;
276 
277 	compare_cookies(skel->maps.src, dst_map);
278 
279 out:
280 	test_sockmap_update__destroy(skel);
281 close_sk:
282 	close(sk);
283 }
284 
285 static void test_sockmap_invalid_update(void)
286 {
287 	struct test_sockmap_invalid_update *skel;
288 
289 	skel = test_sockmap_invalid_update__open_and_load();
290 	if (!ASSERT_NULL(skel, "open_and_load"))
291 		test_sockmap_invalid_update__destroy(skel);
292 }
293 
294 static void test_sockmap_copy(enum bpf_map_type map_type)
295 {
296 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
297 	int err, len, src_fd, iter_fd;
298 	union bpf_iter_link_info linfo = {};
299 	__u32 i, num_sockets, num_elems;
300 	struct bpf_iter_sockmap *skel;
301 	__s64 *sock_fd = NULL;
302 	struct bpf_link *link;
303 	struct bpf_map *src;
304 	char buf[64];
305 
306 	skel = bpf_iter_sockmap__open_and_load();
307 	if (!ASSERT_OK_PTR(skel, "bpf_iter_sockmap__open_and_load"))
308 		return;
309 
310 	if (map_type == BPF_MAP_TYPE_SOCKMAP) {
311 		src = skel->maps.sockmap;
312 		num_elems = bpf_map__max_entries(src);
313 		num_sockets = num_elems - 1;
314 	} else {
315 		src = skel->maps.sockhash;
316 		num_elems = bpf_map__max_entries(src) - 1;
317 		num_sockets = num_elems;
318 	}
319 
320 	sock_fd = calloc(num_sockets, sizeof(*sock_fd));
321 	if (!ASSERT_OK_PTR(sock_fd, "calloc(sock_fd)"))
322 		goto out;
323 
324 	for (i = 0; i < num_sockets; i++)
325 		sock_fd[i] = -1;
326 
327 	src_fd = bpf_map__fd(src);
328 
329 	for (i = 0; i < num_sockets; i++) {
330 		sock_fd[i] = connected_socket_v4();
331 		if (!ASSERT_NEQ(sock_fd[i], -1, "connected_socket_v4"))
332 			goto out;
333 
334 		err = bpf_map_update_elem(src_fd, &i, &sock_fd[i], BPF_NOEXIST);
335 		if (!ASSERT_OK(err, "map_update"))
336 			goto out;
337 	}
338 
339 	linfo.map.map_fd = src_fd;
340 	opts.link_info = &linfo;
341 	opts.link_info_len = sizeof(linfo);
342 	link = bpf_program__attach_iter(skel->progs.copy, &opts);
343 	if (!ASSERT_OK_PTR(link, "attach_iter"))
344 		goto out;
345 
346 	iter_fd = bpf_iter_create(bpf_link__fd(link));
347 	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
348 		goto free_link;
349 
350 	/* do some tests */
351 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
352 		;
353 	if (!ASSERT_GE(len, 0, "read"))
354 		goto close_iter;
355 
356 	/* test results */
357 	if (!ASSERT_EQ(skel->bss->elems, num_elems, "elems"))
358 		goto close_iter;
359 
360 	if (!ASSERT_EQ(skel->bss->socks, num_sockets, "socks"))
361 		goto close_iter;
362 
363 	compare_cookies(src, skel->maps.dst);
364 
365 close_iter:
366 	close(iter_fd);
367 free_link:
368 	bpf_link__destroy(link);
369 out:
370 	for (i = 0; sock_fd && i < num_sockets; i++)
371 		if (sock_fd[i] >= 0)
372 			close(sock_fd[i]);
373 	if (sock_fd)
374 		free(sock_fd);
375 	bpf_iter_sockmap__destroy(skel);
376 }
377 
378 static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
379 					    enum bpf_attach_type second)
380 {
381 	struct test_sockmap_skb_verdict_attach *skel;
382 	int err, map, verdict;
383 
384 	skel = test_sockmap_skb_verdict_attach__open_and_load();
385 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
386 		return;
387 
388 	verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
389 	map = bpf_map__fd(skel->maps.sock_map);
390 
391 	err = bpf_prog_attach(verdict, map, first, 0);
392 	if (!ASSERT_OK(err, "bpf_prog_attach"))
393 		goto out;
394 
395 	err = bpf_prog_attach(verdict, map, second, 0);
396 	ASSERT_EQ(err, -EBUSY, "prog_attach_fail");
397 
398 	err = bpf_prog_detach2(verdict, map, first);
399 	if (!ASSERT_OK(err, "bpf_prog_detach2"))
400 		goto out;
401 out:
402 	test_sockmap_skb_verdict_attach__destroy(skel);
403 }
404 
405 static void test_sockmap_skb_verdict_attach_with_link(void)
406 {
407 	struct test_sockmap_skb_verdict_attach *skel;
408 	struct bpf_program *prog;
409 	struct bpf_link *link;
410 	int err, map;
411 
412 	skel = test_sockmap_skb_verdict_attach__open_and_load();
413 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
414 		return;
415 	prog = skel->progs.prog_skb_verdict;
416 	map = bpf_map__fd(skel->maps.sock_map);
417 	link = bpf_program__attach_sockmap(prog, map);
418 	if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
419 		goto out;
420 
421 	bpf_link__destroy(link);
422 
423 	err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
424 	if (!ASSERT_OK(err, "bpf_prog_attach"))
425 		goto out;
426 
427 	/* Fail since attaching with the same prog/map has been done. */
428 	link = bpf_program__attach_sockmap(prog, map);
429 	if (!ASSERT_ERR_PTR(link, "bpf_program__attach_sockmap"))
430 		bpf_link__destroy(link);
431 
432 	err = bpf_prog_detach2(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT);
433 	if (!ASSERT_OK(err, "bpf_prog_detach2"))
434 		goto out;
435 out:
436 	test_sockmap_skb_verdict_attach__destroy(skel);
437 }
438 
439 static __u32 query_prog_id(int prog_fd)
440 {
441 	struct bpf_prog_info info = {};
442 	__u32 info_len = sizeof(info);
443 	int err;
444 
445 	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
446 	if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") ||
447 	    !ASSERT_EQ(info_len, sizeof(info), "bpf_prog_get_info_by_fd"))
448 		return 0;
449 
450 	return info.id;
451 }
452 
453 static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
454 {
455 	struct test_sockmap_progs_query *skel;
456 	int err, map_fd, verdict_fd;
457 	__u32 attach_flags = 0;
458 	__u32 prog_ids[3] = {};
459 	__u32 prog_cnt = 3;
460 
461 	skel = test_sockmap_progs_query__open_and_load();
462 	if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load"))
463 		return;
464 
465 	map_fd = bpf_map__fd(skel->maps.sock_map);
466 
467 	if (attach_type == BPF_SK_MSG_VERDICT)
468 		verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict);
469 	else
470 		verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict);
471 
472 	err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
473 			     &attach_flags, prog_ids, &prog_cnt);
474 	ASSERT_OK(err, "bpf_prog_query failed");
475 	ASSERT_EQ(attach_flags,  0, "wrong attach_flags on query");
476 	ASSERT_EQ(prog_cnt, 0, "wrong program count on query");
477 
478 	err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
479 	if (!ASSERT_OK(err, "bpf_prog_attach failed"))
480 		goto out;
481 
482 	prog_cnt = 1;
483 	err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
484 			     &attach_flags, prog_ids, &prog_cnt);
485 	ASSERT_OK(err, "bpf_prog_query failed");
486 	ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
487 	ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
488 	ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
489 		  "wrong prog_ids on query");
490 
491 	bpf_prog_detach2(verdict_fd, map_fd, attach_type);
492 out:
493 	test_sockmap_progs_query__destroy(skel);
494 }
495 
496 #define MAX_EVENTS 10
497 static void test_sockmap_skb_verdict_shutdown(void)
498 {
499 	int n, err, map, verdict, c1 = -1, p1 = -1;
500 	struct epoll_event ev, events[MAX_EVENTS];
501 	struct test_sockmap_pass_prog *skel;
502 	int zero = 0;
503 	int epollfd;
504 	char b;
505 
506 	skel = test_sockmap_pass_prog__open_and_load();
507 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
508 		return;
509 
510 	verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
511 	map = bpf_map__fd(skel->maps.sock_map_rx);
512 
513 	err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
514 	if (!ASSERT_OK(err, "bpf_prog_attach"))
515 		goto out;
516 
517 	err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
518 	if (err < 0)
519 		goto out;
520 
521 	err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
522 	if (err < 0)
523 		goto out_close;
524 
525 	shutdown(p1, SHUT_WR);
526 
527 	ev.events = EPOLLIN;
528 	ev.data.fd = c1;
529 
530 	epollfd = epoll_create1(0);
531 	if (!ASSERT_GT(epollfd, -1, "epoll_create(0)"))
532 		goto out_close;
533 	err = epoll_ctl(epollfd, EPOLL_CTL_ADD, c1, &ev);
534 	if (!ASSERT_OK(err, "epoll_ctl(EPOLL_CTL_ADD)"))
535 		goto out_close;
536 	err = epoll_wait(epollfd, events, MAX_EVENTS, -1);
537 	if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
538 		goto out_close;
539 
540 	n = recv(c1, &b, 1, MSG_DONTWAIT);
541 	ASSERT_EQ(n, 0, "recv(fin)");
542 out_close:
543 	close(c1);
544 	close(p1);
545 out:
546 	test_sockmap_pass_prog__destroy(skel);
547 }
548 
549 
550 static void do_test_sockmap_skb_verdict_fionread(int sotype, bool pass_prog)
551 {
552 	int err, map, verdict, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
553 	int expected, zero = 0, sent, recvd, avail;
554 	struct test_sockmap_pass_prog *pass = NULL;
555 	struct test_sockmap_drop_prog *drop = NULL;
556 	char buf[256] = "0123456789";
557 	int split_len = sizeof(buf) / 2;
558 
559 	if (pass_prog) {
560 		pass = test_sockmap_pass_prog__open_and_load();
561 		if (!ASSERT_OK_PTR(pass, "open_and_load"))
562 			return;
563 		verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
564 		map = bpf_map__fd(pass->maps.sock_map_rx);
565 		if (sotype == SOCK_DGRAM)
566 			expected = split_len; /* FIONREAD for UDP is different from TCP */
567 		else
568 			expected = sizeof(buf);
569 	} else {
570 		drop = test_sockmap_drop_prog__open_and_load();
571 		if (!ASSERT_OK_PTR(drop, "open_and_load"))
572 			return;
573 		verdict = bpf_program__fd(drop->progs.prog_skb_verdict);
574 		map = bpf_map__fd(drop->maps.sock_map_rx);
575 		/* On drop data is consumed immediately and copied_seq inc'd */
576 		expected = 0;
577 	}
578 
579 
580 	err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
581 	if (!ASSERT_OK(err, "bpf_prog_attach"))
582 		goto out;
583 
584 	err = create_socket_pairs(AF_INET, sotype, &c0, &c1, &p0, &p1);
585 	if (!ASSERT_OK(err, "create_socket_pairs()"))
586 		goto out;
587 
588 	err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
589 	if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
590 		goto out_close;
591 
592 	sent = xsend(p1, &buf, split_len, 0);
593 	sent += xsend(p1, &buf, sizeof(buf) - split_len, 0);
594 	ASSERT_EQ(sent, sizeof(buf), "xsend(p1)");
595 	err = ioctl(c1, FIONREAD, &avail);
596 	ASSERT_OK(err, "ioctl(FIONREAD) error");
597 	ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
598 	/* On DROP test there will be no data to read */
599 	if (pass_prog) {
600 		recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC);
601 		ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
602 	}
603 
604 out_close:
605 	close(c0);
606 	close(p0);
607 	close(c1);
608 	close(p1);
609 out:
610 	if (pass_prog)
611 		test_sockmap_pass_prog__destroy(pass);
612 	else
613 		test_sockmap_drop_prog__destroy(drop);
614 }
615 
616 static void test_sockmap_skb_verdict_fionread(bool pass_prog)
617 {
618 	do_test_sockmap_skb_verdict_fionread(SOCK_STREAM, pass_prog);
619 	do_test_sockmap_skb_verdict_fionread(SOCK_DGRAM, pass_prog);
620 }
621 
622 static void test_sockmap_skb_verdict_change_tail(void)
623 {
624 	struct test_sockmap_change_tail *skel;
625 	int err, map, verdict;
626 	int c1, p1, sent, recvd;
627 	int zero = 0;
628 	char buf[2];
629 
630 	skel = test_sockmap_change_tail__open_and_load();
631 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
632 		return;
633 	verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
634 	map = bpf_map__fd(skel->maps.sock_map_rx);
635 
636 	err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
637 	if (!ASSERT_OK(err, "bpf_prog_attach"))
638 		goto out;
639 	err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
640 	if (!ASSERT_OK(err, "create_pair()"))
641 		goto out;
642 	err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
643 	if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
644 		goto out_close;
645 	sent = xsend(p1, "Tr", 2, 0);
646 	ASSERT_EQ(sent, 2, "xsend(p1)");
647 	recvd = recv(c1, buf, 2, 0);
648 	ASSERT_EQ(recvd, 1, "recv(c1)");
649 	ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
650 
651 	sent = xsend(p1, "G", 1, 0);
652 	ASSERT_EQ(sent, 1, "xsend(p1)");
653 	recvd = recv(c1, buf, 2, 0);
654 	ASSERT_EQ(recvd, 2, "recv(c1)");
655 	ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
656 
657 	sent = xsend(p1, "E", 1, 0);
658 	ASSERT_EQ(sent, 1, "xsend(p1)");
659 	recvd = recv(c1, buf, 1, 0);
660 	ASSERT_EQ(recvd, 1, "recv(c1)");
661 	ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
662 
663 out_close:
664 	close(c1);
665 	close(p1);
666 out:
667 	test_sockmap_change_tail__destroy(skel);
668 }
669 
670 static void test_sockmap_msg_verdict_pop_data(void)
671 {
672 	struct test_sockmap_msg_pop_data *skel;
673 	int err, map, verdict;
674 	int c1 = -1, p1 = -1, sent;
675 	int zero = 0;
676 	char *buf;
677 	const size_t len = 32 * 1024;
678 
679 	skel = test_sockmap_msg_pop_data__open_and_load();
680 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
681 		return;
682 
683 	verdict = bpf_program__fd(skel->progs.prog_msg_pop_data);
684 	map = bpf_map__fd(skel->maps.sock_map);
685 
686 	err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
687 	if (!ASSERT_OK(err, "bpf_prog_attach"))
688 		goto out;
689 
690 	err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
691 	if (!ASSERT_OK(err, "create_pair"))
692 		goto out;
693 
694 	err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
695 	if (!ASSERT_OK(err, "bpf_map_update_elem"))
696 		goto out_close;
697 
698 	buf = calloc(len, 1);
699 	if (!ASSERT_OK_PTR(buf, "calloc"))
700 		goto out_close;
701 
702 	sent = xsend(c1, buf, len, 0);
703 	ASSERT_EQ(sent, (ssize_t)len, "xsend");
704 	ASSERT_EQ(skel->data->pop_data_ret, -EINVAL, "pop_data_rejects overflow");
705 
706 	free(buf);
707 
708 out_close:
709 	close(c1);
710 	close(p1);
711 out:
712 	test_sockmap_msg_pop_data__destroy(skel);
713 }
714 
715 static void test_sockmap_skb_verdict_peek_helper(int map)
716 {
717 	int err, c1, p1, zero = 0, sent, recvd, avail;
718 	char snd[256] = "0123456789";
719 	char rcv[256] = "0";
720 
721 	err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
722 	if (!ASSERT_OK(err, "create_pair()"))
723 		return;
724 
725 	err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
726 	if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
727 		goto out_close;
728 
729 	sent = xsend(p1, snd, sizeof(snd), 0);
730 	ASSERT_EQ(sent, sizeof(snd), "xsend(p1)");
731 	recvd = recv(c1, rcv, sizeof(rcv), MSG_PEEK);
732 	ASSERT_EQ(recvd, sizeof(rcv), "recv(c1)");
733 	err = ioctl(c1, FIONREAD, &avail);
734 	ASSERT_OK(err, "ioctl(FIONREAD) error");
735 	ASSERT_EQ(avail, sizeof(snd), "after peek ioctl(FIONREAD)");
736 	recvd = recv(c1, rcv, sizeof(rcv), 0);
737 	ASSERT_EQ(recvd, sizeof(rcv), "recv(p0)");
738 	err = ioctl(c1, FIONREAD, &avail);
739 	ASSERT_OK(err, "ioctl(FIONREAD) error");
740 	ASSERT_EQ(avail, 0, "after read ioctl(FIONREAD)");
741 
742 out_close:
743 	close(c1);
744 	close(p1);
745 }
746 
747 static void test_sockmap_skb_verdict_peek(void)
748 {
749 	struct test_sockmap_pass_prog *pass;
750 	int err, map, verdict;
751 
752 	pass = test_sockmap_pass_prog__open_and_load();
753 	if (!ASSERT_OK_PTR(pass, "open_and_load"))
754 		return;
755 	verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
756 	map = bpf_map__fd(pass->maps.sock_map_rx);
757 
758 	err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
759 	if (!ASSERT_OK(err, "bpf_prog_attach"))
760 		goto out;
761 
762 	test_sockmap_skb_verdict_peek_helper(map);
763 
764 out:
765 	test_sockmap_pass_prog__destroy(pass);
766 }
767 
768 static void test_sockmap_skb_verdict_peek_with_link(void)
769 {
770 	struct test_sockmap_pass_prog *pass;
771 	struct bpf_program *prog;
772 	struct bpf_link *link;
773 	int err, map;
774 
775 	pass = test_sockmap_pass_prog__open_and_load();
776 	if (!ASSERT_OK_PTR(pass, "open_and_load"))
777 		return;
778 	prog = pass->progs.prog_skb_verdict;
779 	map = bpf_map__fd(pass->maps.sock_map_rx);
780 	link = bpf_program__attach_sockmap(prog, map);
781 	if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
782 		goto out;
783 
784 	err = bpf_link__update_program(link, pass->progs.prog_skb_verdict_clone);
785 	if (!ASSERT_OK(err, "bpf_link__update_program"))
786 		goto out;
787 
788 	/* Fail since a prog with different attach type attempts to do update. */
789 	err = bpf_link__update_program(link, pass->progs.prog_skb_parser);
790 	if (!ASSERT_ERR(err, "bpf_link__update_program"))
791 		goto out;
792 
793 	test_sockmap_skb_verdict_peek_helper(map);
794 	ASSERT_EQ(pass->bss->clone_called, 1, "clone_called");
795 out:
796 	bpf_link__destroy(link);
797 	test_sockmap_pass_prog__destroy(pass);
798 }
799 
800 static void test_sockmap_unconnected_unix(void)
801 {
802 	int err, map, stream = -1, dgram = -1, zero = 0;
803 	struct test_sockmap_pass_prog *skel;
804 
805 	skel = test_sockmap_pass_prog__open_and_load();
806 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
807 		return;
808 
809 	map = bpf_map__fd(skel->maps.sock_map_rx);
810 
811 	stream = xsocket(AF_UNIX, SOCK_STREAM, 0);
812 	if (stream < 0)
813 		goto out;
814 
815 	dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
816 	if (dgram < 0)
817 		goto out;
818 
819 	err = bpf_map_update_elem(map, &zero, &stream, BPF_ANY);
820 	if (!ASSERT_ERR(err, "bpf_map_update_elem(stream)"))
821 		goto out;
822 
823 	err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY);
824 	ASSERT_OK(err, "bpf_map_update_elem(dgram)");
825 out:
826 	close(stream);
827 	close(dgram);
828 	test_sockmap_pass_prog__destroy(skel);
829 }
830 
831 static void test_sockmap_many_socket(void)
832 {
833 	struct test_sockmap_pass_prog *skel;
834 	int stream[2], dgram, udp, tcp;
835 	int i, err, map, entry = 0;
836 
837 	skel = test_sockmap_pass_prog__open_and_load();
838 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
839 		return;
840 
841 	map = bpf_map__fd(skel->maps.sock_map_rx);
842 
843 	dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
844 	if (dgram < 0) {
845 		test_sockmap_pass_prog__destroy(skel);
846 		return;
847 	}
848 
849 	tcp = connected_socket_v4();
850 	if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
851 		close(dgram);
852 		test_sockmap_pass_prog__destroy(skel);
853 		return;
854 	}
855 
856 	udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
857 	if (udp < 0) {
858 		close(dgram);
859 		close(tcp);
860 		test_sockmap_pass_prog__destroy(skel);
861 		return;
862 	}
863 
864 	err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
865 	ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
866 	if (err)
867 		goto out;
868 
869 	for (i = 0; i < 2; i++, entry++) {
870 		err = bpf_map_update_elem(map, &entry, &stream[0], BPF_ANY);
871 		ASSERT_OK(err, "bpf_map_update_elem(stream)");
872 	}
873 	for (i = 0; i < 2; i++, entry++) {
874 		err = bpf_map_update_elem(map, &entry, &dgram, BPF_ANY);
875 		ASSERT_OK(err, "bpf_map_update_elem(dgram)");
876 	}
877 	for (i = 0; i < 2; i++, entry++) {
878 		err = bpf_map_update_elem(map, &entry, &udp, BPF_ANY);
879 		ASSERT_OK(err, "bpf_map_update_elem(udp)");
880 	}
881 	for (i = 0; i < 2; i++, entry++) {
882 		err = bpf_map_update_elem(map, &entry, &tcp, BPF_ANY);
883 		ASSERT_OK(err, "bpf_map_update_elem(tcp)");
884 	}
885 	for (entry--; entry >= 0; entry--) {
886 		err = bpf_map_delete_elem(map, &entry);
887 		ASSERT_OK(err, "bpf_map_delete_elem(entry)");
888 	}
889 
890 	close(stream[0]);
891 	close(stream[1]);
892 out:
893 	close(dgram);
894 	close(tcp);
895 	close(udp);
896 	test_sockmap_pass_prog__destroy(skel);
897 }
898 
899 static void test_sockmap_many_maps(void)
900 {
901 	struct test_sockmap_pass_prog *skel;
902 	int stream[2], dgram, udp, tcp;
903 	int i, err, map[2], entry = 0;
904 
905 	skel = test_sockmap_pass_prog__open_and_load();
906 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
907 		return;
908 
909 	map[0] = bpf_map__fd(skel->maps.sock_map_rx);
910 	map[1] = bpf_map__fd(skel->maps.sock_map_tx);
911 
912 	dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
913 	if (dgram < 0) {
914 		test_sockmap_pass_prog__destroy(skel);
915 		return;
916 	}
917 
918 	tcp = connected_socket_v4();
919 	if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
920 		close(dgram);
921 		test_sockmap_pass_prog__destroy(skel);
922 		return;
923 	}
924 
925 	udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
926 	if (udp < 0) {
927 		close(dgram);
928 		close(tcp);
929 		test_sockmap_pass_prog__destroy(skel);
930 		return;
931 	}
932 
933 	err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
934 	ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
935 	if (err)
936 		goto out;
937 
938 	for (i = 0; i < 2; i++, entry++) {
939 		err = bpf_map_update_elem(map[i], &entry, &stream[0], BPF_ANY);
940 		ASSERT_OK(err, "bpf_map_update_elem(stream)");
941 	}
942 	for (i = 0; i < 2; i++, entry++) {
943 		err = bpf_map_update_elem(map[i], &entry, &dgram, BPF_ANY);
944 		ASSERT_OK(err, "bpf_map_update_elem(dgram)");
945 	}
946 	for (i = 0; i < 2; i++, entry++) {
947 		err = bpf_map_update_elem(map[i], &entry, &udp, BPF_ANY);
948 		ASSERT_OK(err, "bpf_map_update_elem(udp)");
949 	}
950 	for (i = 0; i < 2; i++, entry++) {
951 		err = bpf_map_update_elem(map[i], &entry, &tcp, BPF_ANY);
952 		ASSERT_OK(err, "bpf_map_update_elem(tcp)");
953 	}
954 	for (entry--; entry >= 0; entry--) {
955 		err = bpf_map_delete_elem(map[1], &entry);
956 		entry--;
957 		ASSERT_OK(err, "bpf_map_delete_elem(entry)");
958 		err = bpf_map_delete_elem(map[0], &entry);
959 		ASSERT_OK(err, "bpf_map_delete_elem(entry)");
960 	}
961 
962 	close(stream[0]);
963 	close(stream[1]);
964 out:
965 	close(dgram);
966 	close(tcp);
967 	close(udp);
968 	test_sockmap_pass_prog__destroy(skel);
969 }
970 
971 static void test_sockmap_same_sock(void)
972 {
973 	struct test_sockmap_pass_prog *skel;
974 	int stream[2], dgram, udp, tcp;
975 	int i, err, map, zero = 0;
976 
977 	skel = test_sockmap_pass_prog__open_and_load();
978 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
979 		return;
980 
981 	map = bpf_map__fd(skel->maps.sock_map_rx);
982 
983 	dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
984 	if (dgram < 0) {
985 		test_sockmap_pass_prog__destroy(skel);
986 		return;
987 	}
988 
989 	tcp = connected_socket_v4();
990 	if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
991 		close(dgram);
992 		test_sockmap_pass_prog__destroy(skel);
993 		return;
994 	}
995 
996 	udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
997 	if (udp < 0) {
998 		close(dgram);
999 		close(tcp);
1000 		test_sockmap_pass_prog__destroy(skel);
1001 		return;
1002 	}
1003 
1004 	err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
1005 	ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
1006 	if (err) {
1007 		close(tcp);
1008 		goto out;
1009 	}
1010 
1011 	for (i = 0; i < 2; i++) {
1012 		err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY);
1013 		ASSERT_OK(err, "bpf_map_update_elem(stream)");
1014 	}
1015 	for (i = 0; i < 2; i++) {
1016 		err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY);
1017 		ASSERT_OK(err, "bpf_map_update_elem(dgram)");
1018 	}
1019 	for (i = 0; i < 2; i++) {
1020 		err = bpf_map_update_elem(map, &zero, &udp, BPF_ANY);
1021 		ASSERT_OK(err, "bpf_map_update_elem(udp)");
1022 	}
1023 	for (i = 0; i < 2; i++) {
1024 		err = bpf_map_update_elem(map, &zero, &tcp, BPF_ANY);
1025 		ASSERT_OK(err, "bpf_map_update_elem(tcp)");
1026 	}
1027 
1028 	close(tcp);
1029 	err = bpf_map_delete_elem(map, &zero);
1030 	ASSERT_ERR(err, "bpf_map_delete_elem(entry)");
1031 
1032 	close(stream[0]);
1033 	close(stream[1]);
1034 out:
1035 	close(dgram);
1036 	close(udp);
1037 	test_sockmap_pass_prog__destroy(skel);
1038 }
1039 
1040 static void test_sockmap_skb_verdict_vsock_poll(void)
1041 {
1042 	struct test_sockmap_pass_prog *skel;
1043 	int err, map, conn, peer;
1044 	struct bpf_program *prog;
1045 	struct bpf_link *link;
1046 	char buf = 'x';
1047 	int zero = 0;
1048 
1049 	skel = test_sockmap_pass_prog__open_and_load();
1050 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
1051 		return;
1052 
1053 	if (create_pair(AF_VSOCK, SOCK_STREAM, &conn, &peer))
1054 		goto destroy;
1055 
1056 	prog = skel->progs.prog_skb_verdict;
1057 	map = bpf_map__fd(skel->maps.sock_map_rx);
1058 	link = bpf_program__attach_sockmap(prog, map);
1059 	if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
1060 		goto close;
1061 
1062 	err = bpf_map_update_elem(map, &zero, &conn, BPF_ANY);
1063 	if (!ASSERT_OK(err, "bpf_map_update_elem"))
1064 		goto detach;
1065 
1066 	if (xsend(peer, &buf, 1, 0) != 1)
1067 		goto detach;
1068 
1069 	err = poll_read(conn, IO_TIMEOUT_SEC);
1070 	if (!ASSERT_OK(err, "poll"))
1071 		goto detach;
1072 
1073 	if (xrecv_nonblock(conn, &buf, 1, 0) != 1)
1074 		FAIL("xrecv_nonblock");
1075 detach:
1076 	bpf_link__destroy(link);
1077 close:
1078 	xclose(conn);
1079 	xclose(peer);
1080 destroy:
1081 	test_sockmap_pass_prog__destroy(skel);
1082 }
1083 
1084 static void test_sockmap_vsock_unconnected(void)
1085 {
1086 	struct sockaddr_storage addr;
1087 	int map, s, zero = 0;
1088 	socklen_t alen;
1089 
1090 	map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
1091 			     sizeof(int), 1, NULL);
1092 	if (!ASSERT_OK_FD(map, "bpf_map_create"))
1093 		return;
1094 
1095 	s = xsocket(AF_VSOCK, SOCK_STREAM, 0);
1096 	if (s < 0)
1097 		goto close_map;
1098 
1099 	/* Fail connect(), but trigger transport assignment. */
1100 	init_addr_loopback(AF_VSOCK, &addr, &alen);
1101 	if (!ASSERT_ERR(connect(s, sockaddr(&addr), alen), "connect"))
1102 		goto close_sock;
1103 
1104 	ASSERT_ERR(bpf_map_update_elem(map, &zero, &s, BPF_ANY), "map_update");
1105 
1106 close_sock:
1107 	xclose(s);
1108 close_map:
1109 	xclose(map);
1110 }
1111 
1112 /* it is used to reproduce WARNING */
1113 static void test_sockmap_zc(void)
1114 {
1115 	int map, err, sent, recvd, zero = 0, one = 1, on = 1;
1116 	char buf[10] = "0123456789", rcv[11], addr[100];
1117 	struct test_sockmap_pass_prog *skel = NULL;
1118 	int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
1119 	struct tcp_zerocopy_receive zc;
1120 	socklen_t zc_len = sizeof(zc);
1121 	struct bpf_program *prog;
1122 
1123 	skel = test_sockmap_pass_prog__open_and_load();
1124 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
1125 		return;
1126 
1127 	if (create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1))
1128 		goto end;
1129 
1130 	prog = skel->progs.prog_skb_verdict_ingress;
1131 	map = bpf_map__fd(skel->maps.sock_map_rx);
1132 
1133 	err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
1134 	if (!ASSERT_OK(err, "bpf_prog_attach"))
1135 		goto end;
1136 
1137 	err = bpf_map_update_elem(map, &zero, &p0, BPF_ANY);
1138 	if (!ASSERT_OK(err, "bpf_map_update_elem"))
1139 		goto end;
1140 
1141 	err = bpf_map_update_elem(map, &one, &p1, BPF_ANY);
1142 	if (!ASSERT_OK(err, "bpf_map_update_elem"))
1143 		goto end;
1144 
1145 	sent = xsend(c0, buf, sizeof(buf), 0);
1146 	if (!ASSERT_EQ(sent, sizeof(buf), "xsend"))
1147 		goto end;
1148 
1149 	/* trigger tcp_bpf_recvmsg_parser and inc copied_seq of p1 */
1150 	recvd = recv_timeout(p1, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
1151 	if (!ASSERT_EQ(recvd, sent, "recv_timeout(p1)"))
1152 		goto end;
1153 
1154 	/* uninstall sockmap of p1 */
1155 	bpf_map_delete_elem(map, &one);
1156 
1157 	/* trigger tcp stack and the rcv_nxt of p1 is less than copied_seq */
1158 	sent = xsend(c1, buf, sizeof(buf) - 1, 0);
1159 	if (!ASSERT_EQ(sent, sizeof(buf) - 1, "xsend"))
1160 		goto end;
1161 
1162 	err = setsockopt(p1, SOL_SOCKET, SO_ZEROCOPY, &on, sizeof(on));
1163 	if (!ASSERT_OK(err, "setsockopt"))
1164 		goto end;
1165 
1166 	memset(&zc, 0, sizeof(zc));
1167 	zc.copybuf_address = (__u64)((unsigned long)addr);
1168 	zc.copybuf_len = sizeof(addr);
1169 
1170 	err = getsockopt(p1, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, &zc, &zc_len);
1171 	if (!ASSERT_OK(err, "getsockopt"))
1172 		goto end;
1173 
1174 end:
1175 	if (c0 >= 0)
1176 		close(c0);
1177 	if (p0 >= 0)
1178 		close(p0);
1179 	if (c1 >= 0)
1180 		close(c1);
1181 	if (p1 >= 0)
1182 		close(p1);
1183 	test_sockmap_pass_prog__destroy(skel);
1184 }
1185 
1186 /* it is used to check whether copied_seq of sk is correct */
1187 static void test_sockmap_copied_seq(bool strp)
1188 {
1189 	int i, map, err, sent, recvd, zero = 0, one = 1;
1190 	struct test_sockmap_pass_prog *skel = NULL;
1191 	int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
1192 	char buf[10] = "0123456789", rcv[11];
1193 	struct bpf_program *prog;
1194 
1195 	skel = test_sockmap_pass_prog__open_and_load();
1196 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
1197 		return;
1198 
1199 	if (create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1))
1200 		goto end;
1201 
1202 	prog = skel->progs.prog_skb_verdict_ingress;
1203 	map = bpf_map__fd(skel->maps.sock_map_rx);
1204 
1205 	err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
1206 	if (!ASSERT_OK(err, "bpf_prog_attach verdict"))
1207 		goto end;
1208 
1209 	if (strp) {
1210 		prog = skel->progs.prog_skb_verdict_ingress_strp;
1211 		err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_PARSER, 0);
1212 		if (!ASSERT_OK(err, "bpf_prog_attach parser"))
1213 			goto end;
1214 	}
1215 
1216 	err = bpf_map_update_elem(map, &zero, &p0, BPF_ANY);
1217 	if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
1218 		goto end;
1219 
1220 	err = bpf_map_update_elem(map, &one, &p1, BPF_ANY);
1221 	if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
1222 		goto end;
1223 
1224 	/* just trigger sockamp: data sent by c0 will be received by p1 */
1225 	sent = xsend(c0, buf, sizeof(buf), 0);
1226 	if (!ASSERT_EQ(sent, sizeof(buf), "xsend(c0), bpf"))
1227 		goto end;
1228 
1229 	/* do partial read */
1230 	recvd = recv_timeout(p1, rcv, 1, MSG_DONTWAIT, 1);
1231 	recvd += recv_timeout(p1, rcv + 1, sizeof(rcv) - 1, MSG_DONTWAIT, 1);
1232 	if (!ASSERT_EQ(recvd, sent, "recv_timeout(p1), bpf") ||
1233 	    !ASSERT_OK(memcmp(buf, rcv, recvd), "data mismatch"))
1234 		goto end;
1235 
1236 	/* uninstall sockmap of p1 and p0 */
1237 	err = bpf_map_delete_elem(map, &one);
1238 	if (!ASSERT_OK(err, "bpf_map_delete_elem(1)"))
1239 		goto end;
1240 
1241 	err = bpf_map_delete_elem(map, &zero);
1242 	if (!ASSERT_OK(err, "bpf_map_delete_elem(0)"))
1243 		goto end;
1244 
1245 	/* now all sockets become plain socket, they should still work */
1246 	for (i = 0; i < 5; i++) {
1247 		/* test copied_seq of p1 by running tcp native stack */
1248 		sent = xsend(c1, buf, sizeof(buf), 0);
1249 		if (!ASSERT_EQ(sent, sizeof(buf), "xsend(c1), native"))
1250 			goto end;
1251 
1252 		recvd = recv(p1, rcv, sizeof(rcv), MSG_DONTWAIT);
1253 		if (!ASSERT_EQ(recvd, sent, "recv_timeout(p1), native"))
1254 			goto end;
1255 
1256 		/* p0 previously redirected skb to p1, we also check copied_seq of p0 */
1257 		sent = xsend(c0, buf, sizeof(buf), 0);
1258 		if (!ASSERT_EQ(sent, sizeof(buf), "xsend(c0), native"))
1259 			goto end;
1260 
1261 		recvd = recv(p0, rcv, sizeof(rcv), MSG_DONTWAIT);
1262 		if (!ASSERT_EQ(recvd, sent, "recv_timeout(p0), native"))
1263 			goto end;
1264 	}
1265 
1266 end:
1267 	if (c0 >= 0)
1268 		close(c0);
1269 	if (p0 >= 0)
1270 		close(p0);
1271 	if (c1 >= 0)
1272 		close(c1);
1273 	if (p1 >= 0)
1274 		close(p1);
1275 	test_sockmap_pass_prog__destroy(skel);
1276 }
1277 
1278 /* Wait until FIONREAD returns the expected value or timeout */
1279 static int wait_for_fionread(int fd, int expected, unsigned int timeout_ms)
1280 {
1281 	unsigned int elapsed = 0;
1282 	int avail = 0;
1283 
1284 	while (elapsed < timeout_ms) {
1285 		if (ioctl(fd, FIONREAD, &avail) < 0)
1286 			return -errno;
1287 		if (avail >= expected)
1288 			return avail;
1289 		usleep(1000);
1290 		elapsed++;
1291 	}
1292 	return avail;
1293 }
1294 
1295 /* it is used to send data to via native stack and BPF redirecting */
1296 static void test_sockmap_multi_channels(int sotype)
1297 {
1298 	int map, err, sent, recvd, zero = 0, one = 1, avail = 0, expected;
1299 	struct test_sockmap_pass_prog *skel = NULL;
1300 	int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
1301 	char buf[10] = "0123456789", rcv[11];
1302 	struct bpf_program *prog;
1303 
1304 	skel = test_sockmap_pass_prog__open_and_load();
1305 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
1306 		return;
1307 
1308 	err = create_socket_pairs(AF_INET, sotype, &c0, &c1, &p0, &p1);
1309 	if (err)
1310 		goto end;
1311 
1312 	prog = skel->progs.prog_skb_verdict_ingress;
1313 	map = bpf_map__fd(skel->maps.sock_map_rx);
1314 
1315 	err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
1316 	if (!ASSERT_OK(err, "bpf_prog_attach verdict"))
1317 		goto end;
1318 
1319 	err = bpf_map_update_elem(map, &zero, &p0, BPF_ANY);
1320 	if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
1321 		goto end;
1322 
1323 	err = bpf_map_update_elem(map, &one, &p1, BPF_ANY);
1324 	if (!ASSERT_OK(err, "bpf_map_update_elem"))
1325 		goto end;
1326 
1327 	/* send data to p1 via native stack */
1328 	sent = xsend(c1, buf, 2, 0);
1329 	if (!ASSERT_EQ(sent, 2, "xsend(2)"))
1330 		goto end;
1331 
1332 	avail = wait_for_fionread(p1, 2, IO_TIMEOUT_SEC);
1333 	ASSERT_EQ(avail, 2, "ioctl(FIONREAD) partial return");
1334 
1335 	/* send data to p1 via bpf redirecting */
1336 	sent = xsend(c0, buf + 2, sizeof(buf) - 2, 0);
1337 	if (!ASSERT_EQ(sent, sizeof(buf) - 2, "xsend(remain-data)"))
1338 		goto end;
1339 
1340 	/* Poll FIONREAD until expected bytes arrive, poll_read() is unreliable
1341 	 * here since it may return immediately if prior data is already queued.
1342 	 */
1343 	expected = sotype == SOCK_DGRAM ? 2 : sizeof(buf);
1344 	avail = wait_for_fionread(p1, expected, IO_TIMEOUT_SEC);
1345 	ASSERT_EQ(avail, expected, "ioctl(FIONREAD) full return");
1346 
1347 	recvd = recv_timeout(p1, rcv, expected, MSG_DONTWAIT, 1);
1348 	if (!ASSERT_EQ(recvd, expected, "recv_timeout(p1)") ||
1349 	    !ASSERT_OK(memcmp(buf, rcv, recvd), "data mismatch"))
1350 		goto end;
1351 
1352 	/* process remaining data for udp if secondary data is available */
1353 	expected = sizeof(buf) - expected;
1354 	if (expected) {
1355 		avail = wait_for_fionread(p1, expected, IO_TIMEOUT_SEC);
1356 		ASSERT_EQ(avail, expected, "second ioctl(FIONREAD) full return");
1357 
1358 		recvd = recv_timeout(p1, rcv, expected, MSG_DONTWAIT, 1);
1359 		if (!ASSERT_EQ(recvd, expected, "second recv_timeout(p1)") ||
1360 		    !ASSERT_OK(memcmp(buf + sizeof(buf) - expected, rcv, recvd),
1361 			       "second data mismatch"))
1362 			goto end;
1363 	}
1364 end:
1365 	if (c0 >= 0)
1366 		close(c0);
1367 	if (p0 >= 0)
1368 		close(p0);
1369 	if (c1 >= 0)
1370 		close(c1);
1371 	if (p1 >= 0)
1372 		close(p1);
1373 	test_sockmap_pass_prog__destroy(skel);
1374 }
1375 
1376 void test_sockmap_basic(void)
1377 {
1378 	if (test__start_subtest("sockmap create_update_free"))
1379 		test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP);
1380 	if (test__start_subtest("sockhash create_update_free"))
1381 		test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH);
1382 	if (test__start_subtest("sockmap vsock delete on close"))
1383 		test_sockmap_vsock_delete_on_close();
1384 	if (test__start_subtest("sockmap sk_msg load helpers"))
1385 		test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
1386 	if (test__start_subtest("sockhash sk_msg load helpers"))
1387 		test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH);
1388 	if (test__start_subtest("sockmap update"))
1389 		test_sockmap_update(BPF_MAP_TYPE_SOCKMAP);
1390 	if (test__start_subtest("sockhash update"))
1391 		test_sockmap_update(BPF_MAP_TYPE_SOCKHASH);
1392 	if (test__start_subtest("sockmap update in unsafe context"))
1393 		test_sockmap_invalid_update();
1394 	if (test__start_subtest("sockmap copy"))
1395 		test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP);
1396 	if (test__start_subtest("sockhash copy"))
1397 		test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH);
1398 	if (test__start_subtest("sockmap skb_verdict attach")) {
1399 		test_sockmap_skb_verdict_attach(BPF_SK_SKB_VERDICT,
1400 						BPF_SK_SKB_STREAM_VERDICT);
1401 		test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
1402 						BPF_SK_SKB_VERDICT);
1403 	}
1404 	if (test__start_subtest("sockmap skb_verdict attach_with_link"))
1405 		test_sockmap_skb_verdict_attach_with_link();
1406 	if (test__start_subtest("sockmap msg_verdict progs query"))
1407 		test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
1408 	if (test__start_subtest("sockmap stream_parser progs query"))
1409 		test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
1410 	if (test__start_subtest("sockmap stream_verdict progs query"))
1411 		test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
1412 	if (test__start_subtest("sockmap skb_verdict progs query"))
1413 		test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
1414 	if (test__start_subtest("sockmap skb_verdict shutdown"))
1415 		test_sockmap_skb_verdict_shutdown();
1416 	if (test__start_subtest("sockmap skb_verdict fionread"))
1417 		test_sockmap_skb_verdict_fionread(true);
1418 	if (test__start_subtest("sockmap skb_verdict fionread on drop"))
1419 		test_sockmap_skb_verdict_fionread(false);
1420 	if (test__start_subtest("sockmap skb_verdict change tail"))
1421 		test_sockmap_skb_verdict_change_tail();
1422 	if (test__start_subtest("sockmap msg_verdict pop_data overflow"))
1423 		test_sockmap_msg_verdict_pop_data();
1424 	if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
1425 		test_sockmap_skb_verdict_peek();
1426 	if (test__start_subtest("sockmap skb_verdict msg_f_peek with link"))
1427 		test_sockmap_skb_verdict_peek_with_link();
1428 	if (test__start_subtest("sockmap unconnected af_unix"))
1429 		test_sockmap_unconnected_unix();
1430 	if (test__start_subtest("sockmap one socket to many map entries"))
1431 		test_sockmap_many_socket();
1432 	if (test__start_subtest("sockmap one socket to many maps"))
1433 		test_sockmap_many_maps();
1434 	if (test__start_subtest("sockmap same socket replace"))
1435 		test_sockmap_same_sock();
1436 	if (test__start_subtest("sockmap sk_msg attach sockmap helpers with link"))
1437 		test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKMAP);
1438 	if (test__start_subtest("sockhash sk_msg attach sockhash helpers with link"))
1439 		test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH);
1440 	if (test__start_subtest("sockmap skb_verdict vsock poll"))
1441 		test_sockmap_skb_verdict_vsock_poll();
1442 	if (test__start_subtest("sockmap vsock unconnected"))
1443 		test_sockmap_vsock_unconnected();
1444 	if (test__start_subtest("sockmap with zc"))
1445 		test_sockmap_zc();
1446 	if (test__start_subtest("sockmap recover"))
1447 		test_sockmap_copied_seq(false);
1448 	if (test__start_subtest("sockmap recover with strp"))
1449 		test_sockmap_copied_seq(true);
1450 	if (test__start_subtest("sockmap tcp multi channels"))
1451 		test_sockmap_multi_channels(SOCK_STREAM);
1452 	if (test__start_subtest("sockmap udp multi channels"))
1453 		test_sockmap_multi_channels(SOCK_DGRAM);
1454 }
1455