1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2020 Cloudflare
3 /*
4 * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
5 * Covers:
6 * 1. BPF map operations - bpf_map_{update,lookup delete}_elem
7 * 2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
8 * 3. BPF reuseport helper - bpf_sk_select_reuseport
9 */
10
11 #include <linux/compiler.h>
12 #include <errno.h>
13 #include <error.h>
14 #include <limits.h>
15 #include <netinet/in.h>
16 #include <pthread.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/select.h>
20 #include <unistd.h>
21 #include <linux/vm_sockets.h>
22
23 #include <bpf/bpf.h>
24 #include <bpf/libbpf.h>
25
26 #include "bpf_util.h"
27 #include "test_progs.h"
28 #include "test_sockmap_listen.skel.h"
29
30 #include "sockmap_helpers.h"
31
32 #define NO_FLAGS 0
33
test_insert_invalid(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)34 static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
35 int family, int sotype, int mapfd)
36 {
37 u32 key = 0;
38 u64 value;
39 int err;
40
41 value = -1;
42 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
43 if (!err || errno != EINVAL)
44 FAIL_ERRNO("map_update: expected EINVAL");
45
46 value = INT_MAX;
47 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
48 if (!err || errno != EBADF)
49 FAIL_ERRNO("map_update: expected EBADF");
50 }
51
test_insert_opened(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)52 static void test_insert_opened(struct test_sockmap_listen *skel __always_unused,
53 int family, int sotype, int mapfd)
54 {
55 u32 key = 0;
56 u64 value;
57 int err, s;
58
59 s = xsocket(family, sotype, 0);
60 if (s == -1)
61 return;
62
63 errno = 0;
64 value = s;
65 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
66 if (sotype == SOCK_STREAM) {
67 if (!err || errno != EOPNOTSUPP)
68 FAIL_ERRNO("map_update: expected EOPNOTSUPP");
69 } else if (err)
70 FAIL_ERRNO("map_update: expected success");
71 xclose(s);
72 }
73
test_insert_bound(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)74 static void test_insert_bound(struct test_sockmap_listen *skel __always_unused,
75 int family, int sotype, int mapfd)
76 {
77 struct sockaddr_storage addr;
78 socklen_t len = 0;
79 u32 key = 0;
80 u64 value;
81 int err, s;
82
83 init_addr_loopback(family, &addr, &len);
84
85 s = xsocket(family, sotype, 0);
86 if (s == -1)
87 return;
88
89 err = xbind(s, sockaddr(&addr), len);
90 if (err)
91 goto close;
92
93 errno = 0;
94 value = s;
95 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
96 if (!err || errno != EOPNOTSUPP)
97 FAIL_ERRNO("map_update: expected EOPNOTSUPP");
98 close:
99 xclose(s);
100 }
101
test_insert(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)102 static void test_insert(struct test_sockmap_listen *skel __always_unused,
103 int family, int sotype, int mapfd)
104 {
105 u64 value;
106 u32 key;
107 int s;
108
109 s = socket_loopback(family, sotype);
110 if (s < 0)
111 return;
112
113 key = 0;
114 value = s;
115 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
116 xclose(s);
117 }
118
test_delete_after_insert(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)119 static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused,
120 int family, int sotype, int mapfd)
121 {
122 u64 value;
123 u32 key;
124 int s;
125
126 s = socket_loopback(family, sotype);
127 if (s < 0)
128 return;
129
130 key = 0;
131 value = s;
132 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
133 xbpf_map_delete_elem(mapfd, &key);
134 xclose(s);
135 }
136
test_delete_after_close(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)137 static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused,
138 int family, int sotype, int mapfd)
139 {
140 int err, s;
141 u64 value;
142 u32 key;
143
144 s = socket_loopback(family, sotype);
145 if (s < 0)
146 return;
147
148 key = 0;
149 value = s;
150 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
151
152 xclose(s);
153
154 errno = 0;
155 err = bpf_map_delete_elem(mapfd, &key);
156 if (!err || (errno != EINVAL && errno != ENOENT))
157 /* SOCKMAP and SOCKHASH return different error codes */
158 FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
159 }
160
test_lookup_after_insert(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)161 static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused,
162 int family, int sotype, int mapfd)
163 {
164 u64 cookie, value;
165 socklen_t len;
166 u32 key;
167 int s;
168
169 s = socket_loopback(family, sotype);
170 if (s < 0)
171 return;
172
173 key = 0;
174 value = s;
175 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
176
177 len = sizeof(cookie);
178 xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
179
180 xbpf_map_lookup_elem(mapfd, &key, &value);
181
182 if (value != cookie) {
183 FAIL("map_lookup: have %#llx, want %#llx",
184 (unsigned long long)value, (unsigned long long)cookie);
185 }
186
187 xclose(s);
188 }
189
test_lookup_after_delete(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)190 static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused,
191 int family, int sotype, int mapfd)
192 {
193 int err, s;
194 u64 value;
195 u32 key;
196
197 s = socket_loopback(family, sotype);
198 if (s < 0)
199 return;
200
201 key = 0;
202 value = s;
203 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
204 xbpf_map_delete_elem(mapfd, &key);
205
206 errno = 0;
207 err = bpf_map_lookup_elem(mapfd, &key, &value);
208 if (!err || errno != ENOENT)
209 FAIL_ERRNO("map_lookup: expected ENOENT");
210
211 xclose(s);
212 }
213
test_lookup_32_bit_value(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)214 static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused,
215 int family, int sotype, int mapfd)
216 {
217 u32 key, value32;
218 int err, s;
219
220 s = socket_loopback(family, sotype);
221 if (s < 0)
222 return;
223
224 mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key),
225 sizeof(value32), 1, NULL);
226 if (mapfd < 0) {
227 FAIL_ERRNO("map_create");
228 goto close;
229 }
230
231 key = 0;
232 value32 = s;
233 xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
234
235 errno = 0;
236 err = bpf_map_lookup_elem(mapfd, &key, &value32);
237 if (!err || errno != ENOSPC)
238 FAIL_ERRNO("map_lookup: expected ENOSPC");
239
240 xclose(mapfd);
241 close:
242 xclose(s);
243 }
244
test_update_existing(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)245 static void test_update_existing(struct test_sockmap_listen *skel __always_unused,
246 int family, int sotype, int mapfd)
247 {
248 int s1, s2;
249 u64 value;
250 u32 key;
251
252 s1 = socket_loopback(family, sotype);
253 if (s1 < 0)
254 return;
255
256 s2 = socket_loopback(family, sotype);
257 if (s2 < 0)
258 goto close_s1;
259
260 key = 0;
261 value = s1;
262 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
263
264 value = s2;
265 xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
266 xclose(s2);
267 close_s1:
268 xclose(s1);
269 }
270
271 /* Exercise the code path where we destroy child sockets that never
272 * got accept()'ed, aka orphans, when parent socket gets closed.
273 */
do_destroy_orphan_child(int family,int sotype,int mapfd)274 static void do_destroy_orphan_child(int family, int sotype, int mapfd)
275 {
276 struct sockaddr_storage addr;
277 socklen_t len;
278 int err, s, c;
279 u64 value;
280 u32 key;
281
282 s = socket_loopback(family, sotype);
283 if (s < 0)
284 return;
285
286 len = sizeof(addr);
287 err = xgetsockname(s, sockaddr(&addr), &len);
288 if (err)
289 goto close_srv;
290
291 key = 0;
292 value = s;
293 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
294
295 c = xsocket(family, sotype, 0);
296 if (c == -1)
297 goto close_srv;
298
299 xconnect(c, sockaddr(&addr), len);
300 xclose(c);
301 close_srv:
302 xclose(s);
303 }
304
test_destroy_orphan_child(struct test_sockmap_listen * skel,int family,int sotype,int mapfd)305 static void test_destroy_orphan_child(struct test_sockmap_listen *skel,
306 int family, int sotype, int mapfd)
307 {
308 int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
309 int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
310 const struct test {
311 int progfd;
312 enum bpf_attach_type atype;
313 } tests[] = {
314 { -1, -1 },
315 { msg_verdict, BPF_SK_MSG_VERDICT },
316 { skb_verdict, BPF_SK_SKB_VERDICT },
317 };
318 const struct test *t;
319
320 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
321 if (t->progfd != -1 &&
322 xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0)
323 return;
324
325 do_destroy_orphan_child(family, sotype, mapfd);
326
327 if (t->progfd != -1)
328 xbpf_prog_detach2(t->progfd, mapfd, t->atype);
329 }
330 }
331
332 /* Perform a passive open after removing listening socket from SOCKMAP
333 * to ensure that callbacks get restored properly.
334 */
test_clone_after_delete(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)335 static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused,
336 int family, int sotype, int mapfd)
337 {
338 struct sockaddr_storage addr;
339 socklen_t len;
340 int err, s, c;
341 u64 value;
342 u32 key;
343
344 s = socket_loopback(family, sotype);
345 if (s < 0)
346 return;
347
348 len = sizeof(addr);
349 err = xgetsockname(s, sockaddr(&addr), &len);
350 if (err)
351 goto close_srv;
352
353 key = 0;
354 value = s;
355 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
356 xbpf_map_delete_elem(mapfd, &key);
357
358 c = xsocket(family, sotype, 0);
359 if (c < 0)
360 goto close_srv;
361
362 xconnect(c, sockaddr(&addr), len);
363 xclose(c);
364 close_srv:
365 xclose(s);
366 }
367
368 /* Check that child socket that got created while parent was in a
369 * SOCKMAP, but got accept()'ed only after the parent has been removed
370 * from SOCKMAP, gets cloned without parent psock state or callbacks.
371 */
test_accept_after_delete(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)372 static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused,
373 int family, int sotype, int mapfd)
374 {
375 struct sockaddr_storage addr;
376 const u32 zero = 0;
377 int err, s, c, p;
378 socklen_t len;
379 u64 value;
380
381 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
382 if (s == -1)
383 return;
384
385 len = sizeof(addr);
386 err = xgetsockname(s, sockaddr(&addr), &len);
387 if (err)
388 goto close_srv;
389
390 value = s;
391 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
392 if (err)
393 goto close_srv;
394
395 c = xsocket(family, sotype, 0);
396 if (c == -1)
397 goto close_srv;
398
399 /* Create child while parent is in sockmap */
400 err = xconnect(c, sockaddr(&addr), len);
401 if (err)
402 goto close_cli;
403
404 /* Remove parent from sockmap */
405 err = xbpf_map_delete_elem(mapfd, &zero);
406 if (err)
407 goto close_cli;
408
409 p = xaccept_nonblock(s, NULL, NULL);
410 if (p == -1)
411 goto close_cli;
412
413 /* Check that child sk_user_data is not set */
414 value = p;
415 xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
416
417 xclose(p);
418 close_cli:
419 xclose(c);
420 close_srv:
421 xclose(s);
422 }
423
424 /* Check that child socket that got created and accepted while parent
425 * was in a SOCKMAP is cloned without parent psock state or callbacks.
426 */
test_accept_before_delete(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)427 static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused,
428 int family, int sotype, int mapfd)
429 {
430 struct sockaddr_storage addr;
431 const u32 zero = 0, one = 1;
432 int err, s, c, p;
433 socklen_t len;
434 u64 value;
435
436 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
437 if (s == -1)
438 return;
439
440 len = sizeof(addr);
441 err = xgetsockname(s, sockaddr(&addr), &len);
442 if (err)
443 goto close_srv;
444
445 value = s;
446 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
447 if (err)
448 goto close_srv;
449
450 c = xsocket(family, sotype, 0);
451 if (c == -1)
452 goto close_srv;
453
454 /* Create & accept child while parent is in sockmap */
455 err = xconnect(c, sockaddr(&addr), len);
456 if (err)
457 goto close_cli;
458
459 p = xaccept_nonblock(s, NULL, NULL);
460 if (p == -1)
461 goto close_cli;
462
463 /* Check that child sk_user_data is not set */
464 value = p;
465 xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
466
467 xclose(p);
468 close_cli:
469 xclose(c);
470 close_srv:
471 xclose(s);
472 }
473
474 struct connect_accept_ctx {
475 int sockfd;
476 unsigned int done;
477 unsigned int nr_iter;
478 };
479
is_thread_done(struct connect_accept_ctx * ctx)480 static bool is_thread_done(struct connect_accept_ctx *ctx)
481 {
482 return READ_ONCE(ctx->done);
483 }
484
connect_accept_thread(void * arg)485 static void *connect_accept_thread(void *arg)
486 {
487 struct connect_accept_ctx *ctx = arg;
488 struct sockaddr_storage addr;
489 int family, socktype;
490 socklen_t len;
491 int err, i, s;
492
493 s = ctx->sockfd;
494
495 len = sizeof(addr);
496 err = xgetsockname(s, sockaddr(&addr), &len);
497 if (err)
498 goto done;
499
500 len = sizeof(family);
501 err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
502 if (err)
503 goto done;
504
505 len = sizeof(socktype);
506 err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
507 if (err)
508 goto done;
509
510 for (i = 0; i < ctx->nr_iter; i++) {
511 int c, p;
512
513 c = xsocket(family, socktype, 0);
514 if (c < 0)
515 break;
516
517 err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
518 if (err) {
519 xclose(c);
520 break;
521 }
522
523 p = xaccept_nonblock(s, NULL, NULL);
524 if (p < 0) {
525 xclose(c);
526 break;
527 }
528
529 xclose(p);
530 xclose(c);
531 }
532 done:
533 WRITE_ONCE(ctx->done, 1);
534 return NULL;
535 }
536
test_syn_recv_insert_delete(struct test_sockmap_listen * skel __always_unused,int family,int sotype,int mapfd)537 static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused,
538 int family, int sotype, int mapfd)
539 {
540 struct connect_accept_ctx ctx = { 0 };
541 struct sockaddr_storage addr;
542 socklen_t len;
543 u32 zero = 0;
544 pthread_t t;
545 int err, s;
546 u64 value;
547
548 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
549 if (s < 0)
550 return;
551
552 len = sizeof(addr);
553 err = xgetsockname(s, sockaddr(&addr), &len);
554 if (err)
555 goto close;
556
557 ctx.sockfd = s;
558 ctx.nr_iter = 1000;
559
560 err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
561 if (err)
562 goto close;
563
564 value = s;
565 while (!is_thread_done(&ctx)) {
566 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
567 if (err)
568 break;
569
570 err = xbpf_map_delete_elem(mapfd, &zero);
571 if (err)
572 break;
573 }
574
575 xpthread_join(t, NULL);
576 close:
577 xclose(s);
578 }
579
listen_thread(void * arg)580 static void *listen_thread(void *arg)
581 {
582 struct sockaddr unspec = { AF_UNSPEC };
583 struct connect_accept_ctx *ctx = arg;
584 int err, i, s;
585
586 s = ctx->sockfd;
587
588 for (i = 0; i < ctx->nr_iter; i++) {
589 err = xlisten(s, 1);
590 if (err)
591 break;
592 err = xconnect(s, &unspec, sizeof(unspec));
593 if (err)
594 break;
595 }
596
597 WRITE_ONCE(ctx->done, 1);
598 return NULL;
599 }
600
test_race_insert_listen(struct test_sockmap_listen * skel __always_unused,int family,int socktype,int mapfd)601 static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused,
602 int family, int socktype, int mapfd)
603 {
604 struct connect_accept_ctx ctx = { 0 };
605 const u32 zero = 0;
606 const int one = 1;
607 pthread_t t;
608 int err, s;
609 u64 value;
610
611 s = xsocket(family, socktype, 0);
612 if (s < 0)
613 return;
614
615 err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
616 if (err)
617 goto close;
618
619 ctx.sockfd = s;
620 ctx.nr_iter = 10000;
621
622 err = pthread_create(&t, NULL, listen_thread, &ctx);
623 if (err)
624 goto close;
625
626 value = s;
627 while (!is_thread_done(&ctx)) {
628 err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
629 /* Expecting EOPNOTSUPP before listen() */
630 if (err && errno != EOPNOTSUPP) {
631 FAIL_ERRNO("map_update");
632 break;
633 }
634
635 err = bpf_map_delete_elem(mapfd, &zero);
636 /* Expecting no entry after unhash on connect(AF_UNSPEC) */
637 if (err && errno != EINVAL && errno != ENOENT) {
638 FAIL_ERRNO("map_delete");
639 break;
640 }
641 }
642
643 xpthread_join(t, NULL);
644 close:
645 xclose(s);
646 }
647
zero_verdict_count(int mapfd)648 static void zero_verdict_count(int mapfd)
649 {
650 unsigned int zero = 0;
651 int key;
652
653 key = SK_DROP;
654 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
655 key = SK_PASS;
656 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
657 }
658
659 enum redir_mode {
660 REDIR_INGRESS,
661 REDIR_EGRESS,
662 };
663
redir_mode_str(enum redir_mode mode)664 static const char *redir_mode_str(enum redir_mode mode)
665 {
666 switch (mode) {
667 case REDIR_INGRESS:
668 return "ingress";
669 case REDIR_EGRESS:
670 return "egress";
671 default:
672 return "unknown";
673 }
674 }
675
redir_to_connected(int family,int sotype,int sock_mapfd,int verd_mapfd,enum redir_mode mode)676 static void redir_to_connected(int family, int sotype, int sock_mapfd,
677 int verd_mapfd, enum redir_mode mode)
678 {
679 const char *log_prefix = redir_mode_str(mode);
680 int c0, c1, p0, p1;
681 unsigned int pass;
682 int err, n;
683 u32 key;
684 char b;
685
686 zero_verdict_count(verd_mapfd);
687
688 err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1,
689 &p0, &p1);
690 if (err)
691 return;
692
693 err = add_to_sockmap(sock_mapfd, p0, p1);
694 if (err)
695 goto close;
696
697 n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
698 if (n < 0)
699 FAIL_ERRNO("%s: write", log_prefix);
700 if (n == 0)
701 FAIL("%s: incomplete write", log_prefix);
702 if (n < 1)
703 goto close;
704
705 key = SK_PASS;
706 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
707 if (err)
708 goto close;
709 if (pass != 1)
710 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
711 n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
712 if (n < 0)
713 FAIL_ERRNO("%s: recv_timeout", log_prefix);
714 if (n == 0)
715 FAIL("%s: incomplete recv", log_prefix);
716
717 close:
718 xclose(p1);
719 xclose(c1);
720 xclose(p0);
721 xclose(c0);
722 }
723
test_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)724 static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
725 struct bpf_map *inner_map, int family,
726 int sotype)
727 {
728 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
729 int parser = bpf_program__fd(skel->progs.prog_stream_parser);
730 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
731 int sock_map = bpf_map__fd(inner_map);
732 int err;
733
734 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
735 if (err)
736 return;
737 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
738 if (err)
739 goto detach;
740
741 redir_to_connected(family, sotype, sock_map, verdict_map,
742 REDIR_INGRESS);
743
744 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
745 detach:
746 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
747 }
748
test_msg_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)749 static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
750 struct bpf_map *inner_map, int family,
751 int sotype)
752 {
753 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
754 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
755 int sock_map = bpf_map__fd(inner_map);
756 int err;
757
758 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
759 if (err)
760 return;
761
762 redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
763
764 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
765 }
766
test_msg_redir_to_connected_with_link(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)767 static void test_msg_redir_to_connected_with_link(struct test_sockmap_listen *skel,
768 struct bpf_map *inner_map, int family,
769 int sotype)
770 {
771 int prog_msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
772 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
773 int sock_map = bpf_map__fd(inner_map);
774 int link_fd;
775
776 link_fd = bpf_link_create(prog_msg_verdict, sock_map, BPF_SK_MSG_VERDICT, NULL);
777 if (!ASSERT_GE(link_fd, 0, "bpf_link_create"))
778 return;
779
780 redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
781
782 close(link_fd);
783 }
784
redir_to_listening(int family,int sotype,int sock_mapfd,int verd_mapfd,enum redir_mode mode)785 static void redir_to_listening(int family, int sotype, int sock_mapfd,
786 int verd_mapfd, enum redir_mode mode)
787 {
788 const char *log_prefix = redir_mode_str(mode);
789 struct sockaddr_storage addr;
790 int s, c, p, err, n;
791 unsigned int drop;
792 socklen_t len;
793 u32 key;
794
795 zero_verdict_count(verd_mapfd);
796
797 s = socket_loopback(family, sotype | SOCK_NONBLOCK);
798 if (s < 0)
799 return;
800
801 len = sizeof(addr);
802 err = xgetsockname(s, sockaddr(&addr), &len);
803 if (err)
804 goto close_srv;
805
806 c = xsocket(family, sotype, 0);
807 if (c < 0)
808 goto close_srv;
809 err = xconnect(c, sockaddr(&addr), len);
810 if (err)
811 goto close_cli;
812
813 p = xaccept_nonblock(s, NULL, NULL);
814 if (p < 0)
815 goto close_cli;
816
817 err = add_to_sockmap(sock_mapfd, s, p);
818 if (err)
819 goto close_peer;
820
821 n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
822 if (n < 0 && errno != EACCES)
823 FAIL_ERRNO("%s: write", log_prefix);
824 if (n == 0)
825 FAIL("%s: incomplete write", log_prefix);
826 if (n < 1)
827 goto close_peer;
828
829 key = SK_DROP;
830 err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
831 if (err)
832 goto close_peer;
833 if (drop != 1)
834 FAIL("%s: want drop count 1, have %d", log_prefix, drop);
835
836 close_peer:
837 xclose(p);
838 close_cli:
839 xclose(c);
840 close_srv:
841 xclose(s);
842 }
843
test_skb_redir_to_listening(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)844 static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
845 struct bpf_map *inner_map, int family,
846 int sotype)
847 {
848 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
849 int parser = bpf_program__fd(skel->progs.prog_stream_parser);
850 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
851 int sock_map = bpf_map__fd(inner_map);
852 int err;
853
854 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
855 if (err)
856 return;
857 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
858 if (err)
859 goto detach;
860
861 redir_to_listening(family, sotype, sock_map, verdict_map,
862 REDIR_INGRESS);
863
864 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
865 detach:
866 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
867 }
868
test_msg_redir_to_listening(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)869 static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
870 struct bpf_map *inner_map, int family,
871 int sotype)
872 {
873 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
874 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
875 int sock_map = bpf_map__fd(inner_map);
876 int err;
877
878 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
879 if (err)
880 return;
881
882 redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
883
884 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
885 }
886
test_msg_redir_to_listening_with_link(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)887 static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *skel,
888 struct bpf_map *inner_map, int family,
889 int sotype)
890 {
891 struct bpf_program *verdict = skel->progs.prog_msg_verdict;
892 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
893 int sock_map = bpf_map__fd(inner_map);
894 struct bpf_link *link;
895
896 link = bpf_program__attach_sockmap(verdict, sock_map);
897 if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
898 return;
899
900 redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
901
902 bpf_link__detach(link);
903 }
904
redir_partial(int family,int sotype,int sock_map,int parser_map)905 static void redir_partial(int family, int sotype, int sock_map, int parser_map)
906 {
907 int c0 = -1, c1 = -1, p0 = -1, p1 = -1;
908 int err, n, key, value;
909 char buf[] = "abc";
910
911 key = 0;
912 value = sizeof(buf) - 1;
913 err = xbpf_map_update_elem(parser_map, &key, &value, 0);
914 if (err)
915 return;
916
917 err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1,
918 &p0, &p1);
919 if (err)
920 goto clean_parser_map;
921
922 err = add_to_sockmap(sock_map, p0, p1);
923 if (err)
924 goto close;
925
926 n = xsend(c1, buf, sizeof(buf), 0);
927 if (n < sizeof(buf))
928 FAIL("incomplete write");
929
930 n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
931 if (n != sizeof(buf) - 1)
932 FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
933
934 close:
935 xclose(c0);
936 xclose(p0);
937 xclose(c1);
938 xclose(p1);
939
940 clean_parser_map:
941 key = 0;
942 value = 0;
943 xbpf_map_update_elem(parser_map, &key, &value, 0);
944 }
945
test_skb_redir_partial(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family,int sotype)946 static void test_skb_redir_partial(struct test_sockmap_listen *skel,
947 struct bpf_map *inner_map, int family,
948 int sotype)
949 {
950 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
951 int parser = bpf_program__fd(skel->progs.prog_stream_parser);
952 int parser_map = bpf_map__fd(skel->maps.parser_map);
953 int sock_map = bpf_map__fd(inner_map);
954 int err;
955
956 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
957 if (err)
958 return;
959
960 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
961 if (err)
962 goto detach;
963
964 redir_partial(family, sotype, sock_map, parser_map);
965
966 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
967 detach:
968 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
969 }
970
test_reuseport_select_listening(int family,int sotype,int sock_map,int verd_map,int reuseport_prog)971 static void test_reuseport_select_listening(int family, int sotype,
972 int sock_map, int verd_map,
973 int reuseport_prog)
974 {
975 struct sockaddr_storage addr;
976 unsigned int pass;
977 int s, c, err;
978 socklen_t len;
979 u64 value;
980 u32 key;
981
982 zero_verdict_count(verd_map);
983
984 s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
985 reuseport_prog);
986 if (s < 0)
987 return;
988
989 len = sizeof(addr);
990 err = xgetsockname(s, sockaddr(&addr), &len);
991 if (err)
992 goto close_srv;
993
994 key = 0;
995 value = s;
996 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
997 if (err)
998 goto close_srv;
999
1000 c = xsocket(family, sotype, 0);
1001 if (c < 0)
1002 goto close_srv;
1003 err = xconnect(c, sockaddr(&addr), len);
1004 if (err)
1005 goto close_cli;
1006
1007 if (sotype == SOCK_STREAM) {
1008 int p;
1009
1010 p = xaccept_nonblock(s, NULL, NULL);
1011 if (p < 0)
1012 goto close_cli;
1013 xclose(p);
1014 } else {
1015 char b = 'a';
1016 ssize_t n;
1017
1018 n = xsend(c, &b, sizeof(b), 0);
1019 if (n == -1)
1020 goto close_cli;
1021
1022 n = xrecv_nonblock(s, &b, sizeof(b), 0);
1023 if (n == -1)
1024 goto close_cli;
1025 }
1026
1027 key = SK_PASS;
1028 err = xbpf_map_lookup_elem(verd_map, &key, &pass);
1029 if (err)
1030 goto close_cli;
1031 if (pass != 1)
1032 FAIL("want pass count 1, have %d", pass);
1033
1034 close_cli:
1035 xclose(c);
1036 close_srv:
1037 xclose(s);
1038 }
1039
test_reuseport_select_connected(int family,int sotype,int sock_map,int verd_map,int reuseport_prog)1040 static void test_reuseport_select_connected(int family, int sotype,
1041 int sock_map, int verd_map,
1042 int reuseport_prog)
1043 {
1044 struct sockaddr_storage addr;
1045 int s, c0, c1, p0, err;
1046 unsigned int drop;
1047 socklen_t len;
1048 u64 value;
1049 u32 key;
1050
1051 zero_verdict_count(verd_map);
1052
1053 s = socket_loopback_reuseport(family, sotype, reuseport_prog);
1054 if (s < 0)
1055 return;
1056
1057 /* Populate sock_map[0] to avoid ENOENT on first connection */
1058 key = 0;
1059 value = s;
1060 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
1061 if (err)
1062 goto close_srv;
1063
1064 len = sizeof(addr);
1065 err = xgetsockname(s, sockaddr(&addr), &len);
1066 if (err)
1067 goto close_srv;
1068
1069 c0 = xsocket(family, sotype, 0);
1070 if (c0 < 0)
1071 goto close_srv;
1072
1073 err = xconnect(c0, sockaddr(&addr), len);
1074 if (err)
1075 goto close_cli0;
1076
1077 if (sotype == SOCK_STREAM) {
1078 p0 = xaccept_nonblock(s, NULL, NULL);
1079 if (p0 < 0)
1080 goto close_cli0;
1081 } else {
1082 p0 = xsocket(family, sotype, 0);
1083 if (p0 < 0)
1084 goto close_cli0;
1085
1086 len = sizeof(addr);
1087 err = xgetsockname(c0, sockaddr(&addr), &len);
1088 if (err)
1089 goto close_cli0;
1090
1091 err = xconnect(p0, sockaddr(&addr), len);
1092 if (err)
1093 goto close_cli0;
1094 }
1095
1096 /* Update sock_map[0] to redirect to a connected socket */
1097 key = 0;
1098 value = p0;
1099 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
1100 if (err)
1101 goto close_peer0;
1102
1103 c1 = xsocket(family, sotype, 0);
1104 if (c1 < 0)
1105 goto close_peer0;
1106
1107 len = sizeof(addr);
1108 err = xgetsockname(s, sockaddr(&addr), &len);
1109 if (err)
1110 goto close_srv;
1111
1112 errno = 0;
1113 err = connect(c1, sockaddr(&addr), len);
1114 if (sotype == SOCK_DGRAM) {
1115 char b = 'a';
1116 ssize_t n;
1117
1118 n = xsend(c1, &b, sizeof(b), 0);
1119 if (n == -1)
1120 goto close_cli1;
1121
1122 n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1123 err = n == -1;
1124 }
1125 if (!err || errno != ECONNREFUSED)
1126 FAIL_ERRNO("connect: expected ECONNREFUSED");
1127
1128 key = SK_DROP;
1129 err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1130 if (err)
1131 goto close_cli1;
1132 if (drop != 1)
1133 FAIL("want drop count 1, have %d", drop);
1134
1135 close_cli1:
1136 xclose(c1);
1137 close_peer0:
1138 xclose(p0);
1139 close_cli0:
1140 xclose(c0);
1141 close_srv:
1142 xclose(s);
1143 }
1144
1145 /* Check that redirecting across reuseport groups is not allowed. */
test_reuseport_mixed_groups(int family,int sotype,int sock_map,int verd_map,int reuseport_prog)1146 static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
1147 int verd_map, int reuseport_prog)
1148 {
1149 struct sockaddr_storage addr;
1150 int s1, s2, c, err;
1151 unsigned int drop;
1152 socklen_t len;
1153 u32 key;
1154
1155 zero_verdict_count(verd_map);
1156
1157 /* Create two listeners, each in its own reuseport group */
1158 s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1159 if (s1 < 0)
1160 return;
1161
1162 s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
1163 if (s2 < 0)
1164 goto close_srv1;
1165
1166 err = add_to_sockmap(sock_map, s1, s2);
1167 if (err)
1168 goto close_srv2;
1169
1170 /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
1171 len = sizeof(addr);
1172 err = xgetsockname(s2, sockaddr(&addr), &len);
1173 if (err)
1174 goto close_srv2;
1175
1176 c = xsocket(family, sotype, 0);
1177 if (c < 0)
1178 goto close_srv2;
1179
1180 err = connect(c, sockaddr(&addr), len);
1181 if (sotype == SOCK_DGRAM) {
1182 char b = 'a';
1183 ssize_t n;
1184
1185 n = xsend(c, &b, sizeof(b), 0);
1186 if (n == -1)
1187 goto close_cli;
1188
1189 n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
1190 err = n == -1;
1191 }
1192 if (!err || errno != ECONNREFUSED) {
1193 FAIL_ERRNO("connect: expected ECONNREFUSED");
1194 goto close_cli;
1195 }
1196
1197 /* Expect drop, can't redirect outside of reuseport group */
1198 key = SK_DROP;
1199 err = xbpf_map_lookup_elem(verd_map, &key, &drop);
1200 if (err)
1201 goto close_cli;
1202 if (drop != 1)
1203 FAIL("want drop count 1, have %d", drop);
1204
1205 close_cli:
1206 xclose(c);
1207 close_srv2:
1208 xclose(s2);
1209 close_srv1:
1210 xclose(s1);
1211 }
1212
1213 #define TEST(fn, ...) \
1214 { \
1215 fn, #fn, __VA_ARGS__ \
1216 }
1217
test_ops_cleanup(const struct bpf_map * map)1218 static void test_ops_cleanup(const struct bpf_map *map)
1219 {
1220 int err, mapfd;
1221 u32 key;
1222
1223 mapfd = bpf_map__fd(map);
1224
1225 for (key = 0; key < bpf_map__max_entries(map); key++) {
1226 err = bpf_map_delete_elem(mapfd, &key);
1227 if (err && errno != EINVAL && errno != ENOENT)
1228 FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
1229 }
1230 }
1231
family_str(sa_family_t family)1232 static const char *family_str(sa_family_t family)
1233 {
1234 switch (family) {
1235 case AF_INET:
1236 return "IPv4";
1237 case AF_INET6:
1238 return "IPv6";
1239 case AF_UNIX:
1240 return "Unix";
1241 case AF_VSOCK:
1242 return "VSOCK";
1243 default:
1244 return "unknown";
1245 }
1246 }
1247
map_type_str(const struct bpf_map * map)1248 static const char *map_type_str(const struct bpf_map *map)
1249 {
1250 int type;
1251
1252 if (!map)
1253 return "invalid";
1254 type = bpf_map__type(map);
1255
1256 switch (type) {
1257 case BPF_MAP_TYPE_SOCKMAP:
1258 return "sockmap";
1259 case BPF_MAP_TYPE_SOCKHASH:
1260 return "sockhash";
1261 default:
1262 return "unknown";
1263 }
1264 }
1265
sotype_str(int sotype)1266 static const char *sotype_str(int sotype)
1267 {
1268 switch (sotype) {
1269 case SOCK_DGRAM:
1270 return "UDP";
1271 case SOCK_STREAM:
1272 return "TCP";
1273 default:
1274 return "unknown";
1275 }
1276 }
1277
test_ops(struct test_sockmap_listen * skel,struct bpf_map * map,int family,int sotype)1278 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
1279 int family, int sotype)
1280 {
1281 const struct op_test {
1282 void (*fn)(struct test_sockmap_listen *skel,
1283 int family, int sotype, int mapfd);
1284 const char *name;
1285 int sotype;
1286 } tests[] = {
1287 /* insert */
1288 TEST(test_insert_invalid),
1289 TEST(test_insert_opened),
1290 TEST(test_insert_bound, SOCK_STREAM),
1291 TEST(test_insert),
1292 /* delete */
1293 TEST(test_delete_after_insert),
1294 TEST(test_delete_after_close),
1295 /* lookup */
1296 TEST(test_lookup_after_insert),
1297 TEST(test_lookup_after_delete),
1298 TEST(test_lookup_32_bit_value),
1299 /* update */
1300 TEST(test_update_existing),
1301 /* races with insert/delete */
1302 TEST(test_destroy_orphan_child, SOCK_STREAM),
1303 TEST(test_syn_recv_insert_delete, SOCK_STREAM),
1304 TEST(test_race_insert_listen, SOCK_STREAM),
1305 /* child clone */
1306 TEST(test_clone_after_delete, SOCK_STREAM),
1307 TEST(test_accept_after_delete, SOCK_STREAM),
1308 TEST(test_accept_before_delete, SOCK_STREAM),
1309 };
1310 const char *family_name, *map_name, *sotype_name;
1311 const struct op_test *t;
1312 char s[MAX_TEST_NAME];
1313 int map_fd;
1314
1315 family_name = family_str(family);
1316 map_name = map_type_str(map);
1317 sotype_name = sotype_str(sotype);
1318 map_fd = bpf_map__fd(map);
1319
1320 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1321 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1322 sotype_name, t->name);
1323
1324 if (t->sotype != 0 && t->sotype != sotype)
1325 continue;
1326
1327 if (!test__start_subtest(s))
1328 continue;
1329
1330 t->fn(skel, family, sotype, map_fd);
1331 test_ops_cleanup(map);
1332 }
1333 }
1334
test_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int family,int sotype)1335 static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1336 int family, int sotype)
1337 {
1338 const struct redir_test {
1339 void (*fn)(struct test_sockmap_listen *skel,
1340 struct bpf_map *map, int family, int sotype);
1341 const char *name;
1342 } tests[] = {
1343 TEST(test_skb_redir_to_connected),
1344 TEST(test_skb_redir_to_listening),
1345 TEST(test_skb_redir_partial),
1346 TEST(test_msg_redir_to_connected),
1347 TEST(test_msg_redir_to_connected_with_link),
1348 TEST(test_msg_redir_to_listening),
1349 TEST(test_msg_redir_to_listening_with_link),
1350 };
1351 const char *family_name, *map_name;
1352 const struct redir_test *t;
1353 char s[MAX_TEST_NAME];
1354
1355 family_name = family_str(family);
1356 map_name = map_type_str(map);
1357
1358 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1359 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
1360 t->name);
1361
1362 if (!test__start_subtest(s))
1363 continue;
1364
1365 t->fn(skel, map, family, sotype);
1366 }
1367 }
1368
pairs_redir_to_connected(int cli0,int peer0,int cli1,int peer1,int sock_mapfd,int nop_mapfd,int verd_mapfd,enum redir_mode mode,int send_flags)1369 static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
1370 int sock_mapfd, int nop_mapfd,
1371 int verd_mapfd, enum redir_mode mode,
1372 int send_flags)
1373 {
1374 const char *log_prefix = redir_mode_str(mode);
1375 unsigned int pass;
1376 int err, n;
1377 u32 key;
1378 char b;
1379
1380 zero_verdict_count(verd_mapfd);
1381
1382 err = add_to_sockmap(sock_mapfd, peer0, peer1);
1383 if (err)
1384 return;
1385
1386 if (nop_mapfd >= 0) {
1387 err = add_to_sockmap(nop_mapfd, cli0, cli1);
1388 if (err)
1389 return;
1390 }
1391
1392 /* Last byte is OOB data when send_flags has MSG_OOB bit set */
1393 n = xsend(cli1, "ab", 2, send_flags);
1394 if (n >= 0 && n < 2)
1395 FAIL("%s: incomplete send", log_prefix);
1396 if (n < 2)
1397 return;
1398
1399 key = SK_PASS;
1400 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1401 if (err)
1402 return;
1403 if (pass != 1)
1404 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1405
1406 n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
1407 if (n < 0)
1408 FAIL_ERRNO("%s: recv_timeout", log_prefix);
1409 if (n == 0)
1410 FAIL("%s: incomplete recv", log_prefix);
1411
1412 if (send_flags & MSG_OOB) {
1413 /* Check that we can't read OOB while in sockmap */
1414 errno = 0;
1415 n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
1416 if (n != -1 || errno != EOPNOTSUPP)
1417 FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d",
1418 log_prefix, n, errno);
1419
1420 /* Remove peer1 from sockmap */
1421 xbpf_map_delete_elem(sock_mapfd, &(int){ 1 });
1422
1423 /* Check that OOB was dropped on redirect */
1424 errno = 0;
1425 n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
1426 if (n != -1 || errno != EINVAL)
1427 FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d",
1428 log_prefix, n, errno);
1429 }
1430 }
1431
unix_redir_to_connected(int sotype,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1432 static void unix_redir_to_connected(int sotype, int sock_mapfd,
1433 int verd_mapfd, enum redir_mode mode)
1434 {
1435 int c0, c1, p0, p1;
1436 int sfd[2];
1437
1438 if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1439 return;
1440 c0 = sfd[0], p0 = sfd[1];
1441
1442 if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
1443 goto close0;
1444 c1 = sfd[0], p1 = sfd[1];
1445
1446 pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
1447 mode, NO_FLAGS);
1448
1449 xclose(c1);
1450 xclose(p1);
1451 close0:
1452 xclose(c0);
1453 xclose(p0);
1454 }
1455
unix_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int sotype)1456 static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1457 struct bpf_map *inner_map, int sotype)
1458 {
1459 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1460 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1461 int sock_map = bpf_map__fd(inner_map);
1462 int err;
1463
1464 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1465 if (err)
1466 return;
1467
1468 skel->bss->test_ingress = false;
1469 unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
1470 skel->bss->test_ingress = true;
1471 unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
1472
1473 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1474 }
1475
test_unix_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int sotype)1476 static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1477 int sotype)
1478 {
1479 const char *family_name, *map_name;
1480 char s[MAX_TEST_NAME];
1481
1482 family_name = family_str(AF_UNIX);
1483 map_name = map_type_str(map);
1484 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1485 if (!test__start_subtest(s))
1486 return;
1487 unix_skb_redir_to_connected(skel, map, sotype);
1488 }
1489
1490 /* Returns two connected loopback vsock sockets */
vsock_socketpair_connectible(int sotype,int * v0,int * v1)1491 static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
1492 {
1493 return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1);
1494 }
1495
vsock_unix_redir_connectible(int sock_mapfd,int verd_mapfd,enum redir_mode mode,int sotype)1496 static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
1497 enum redir_mode mode, int sotype)
1498 {
1499 const char *log_prefix = redir_mode_str(mode);
1500 char a = 'a', b = 'b';
1501 int u0, u1, v0, v1;
1502 int sfd[2];
1503 unsigned int pass;
1504 int err, n;
1505 u32 key;
1506
1507 zero_verdict_count(verd_mapfd);
1508
1509 if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
1510 return;
1511
1512 u0 = sfd[0];
1513 u1 = sfd[1];
1514
1515 err = vsock_socketpair_connectible(sotype, &v0, &v1);
1516 if (err) {
1517 FAIL("vsock_socketpair_connectible() failed");
1518 goto close_uds;
1519 }
1520
1521 err = add_to_sockmap(sock_mapfd, u0, v0);
1522 if (err) {
1523 FAIL("add_to_sockmap failed");
1524 goto close_vsock;
1525 }
1526
1527 n = write(v1, &a, sizeof(a));
1528 if (n < 0)
1529 FAIL_ERRNO("%s: write", log_prefix);
1530 if (n == 0)
1531 FAIL("%s: incomplete write", log_prefix);
1532 if (n < 1)
1533 goto out;
1534
1535 n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
1536 if (n < 0)
1537 FAIL("%s: recv() err, errno=%d", log_prefix, errno);
1538 if (n == 0)
1539 FAIL("%s: incomplete recv", log_prefix);
1540 if (b != a)
1541 FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
1542
1543 key = SK_PASS;
1544 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
1545 if (err)
1546 goto out;
1547 if (pass != 1)
1548 FAIL("%s: want pass count 1, have %d", log_prefix, pass);
1549 out:
1550 key = 0;
1551 bpf_map_delete_elem(sock_mapfd, &key);
1552 key = 1;
1553 bpf_map_delete_elem(sock_mapfd, &key);
1554
1555 close_vsock:
1556 close(v0);
1557 close(v1);
1558
1559 close_uds:
1560 close(u0);
1561 close(u1);
1562 }
1563
vsock_unix_skb_redir_connectible(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int sotype)1564 static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
1565 struct bpf_map *inner_map,
1566 int sotype)
1567 {
1568 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1569 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1570 int sock_map = bpf_map__fd(inner_map);
1571 int err;
1572
1573 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1574 if (err)
1575 return;
1576
1577 skel->bss->test_ingress = false;
1578 vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
1579 skel->bss->test_ingress = true;
1580 vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
1581
1582 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1583 }
1584
test_vsock_redir(struct test_sockmap_listen * skel,struct bpf_map * map)1585 static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
1586 {
1587 const char *family_name, *map_name;
1588 char s[MAX_TEST_NAME];
1589
1590 family_name = family_str(AF_VSOCK);
1591 map_name = map_type_str(map);
1592 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1593 if (!test__start_subtest(s))
1594 return;
1595
1596 vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
1597 vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
1598 }
1599
test_reuseport(struct test_sockmap_listen * skel,struct bpf_map * map,int family,int sotype)1600 static void test_reuseport(struct test_sockmap_listen *skel,
1601 struct bpf_map *map, int family, int sotype)
1602 {
1603 const struct reuseport_test {
1604 void (*fn)(int family, int sotype, int socket_map,
1605 int verdict_map, int reuseport_prog);
1606 const char *name;
1607 int sotype;
1608 } tests[] = {
1609 TEST(test_reuseport_select_listening),
1610 TEST(test_reuseport_select_connected),
1611 TEST(test_reuseport_mixed_groups),
1612 };
1613 int socket_map, verdict_map, reuseport_prog;
1614 const char *family_name, *map_name, *sotype_name;
1615 const struct reuseport_test *t;
1616 char s[MAX_TEST_NAME];
1617
1618 family_name = family_str(family);
1619 map_name = map_type_str(map);
1620 sotype_name = sotype_str(sotype);
1621
1622 socket_map = bpf_map__fd(map);
1623 verdict_map = bpf_map__fd(skel->maps.verdict_map);
1624 reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
1625
1626 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
1627 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
1628 sotype_name, t->name);
1629
1630 if (t->sotype != 0 && t->sotype != sotype)
1631 continue;
1632
1633 if (!test__start_subtest(s))
1634 continue;
1635
1636 t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
1637 }
1638 }
1639
inet_socketpair(int family,int type,int * s,int * c)1640 static int inet_socketpair(int family, int type, int *s, int *c)
1641 {
1642 return create_pair(family, type | SOCK_NONBLOCK, s, c);
1643 }
1644
udp_redir_to_connected(int family,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1645 static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
1646 enum redir_mode mode)
1647 {
1648 int c0, c1, p0, p1;
1649 int err;
1650
1651 err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
1652 if (err)
1653 return;
1654 err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
1655 if (err)
1656 goto close_cli0;
1657
1658 pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
1659 mode, NO_FLAGS);
1660
1661 xclose(c1);
1662 xclose(p1);
1663 close_cli0:
1664 xclose(c0);
1665 xclose(p0);
1666 }
1667
udp_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family)1668 static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
1669 struct bpf_map *inner_map, int family)
1670 {
1671 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1672 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1673 int sock_map = bpf_map__fd(inner_map);
1674 int err;
1675
1676 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1677 if (err)
1678 return;
1679
1680 skel->bss->test_ingress = false;
1681 udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
1682 skel->bss->test_ingress = true;
1683 udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
1684
1685 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1686 }
1687
test_udp_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int family)1688 static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1689 int family)
1690 {
1691 const char *family_name, *map_name;
1692 char s[MAX_TEST_NAME];
1693
1694 family_name = family_str(family);
1695 map_name = map_type_str(map);
1696 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1697 if (!test__start_subtest(s))
1698 return;
1699 udp_skb_redir_to_connected(skel, map, family);
1700 }
1701
inet_unix_redir_to_connected(int family,int type,int sock_mapfd,int verd_mapfd,enum redir_mode mode)1702 static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
1703 int verd_mapfd, enum redir_mode mode)
1704 {
1705 int c0, c1, p0, p1;
1706 int sfd[2];
1707 int err;
1708
1709 if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
1710 return;
1711 c0 = sfd[0], p0 = sfd[1];
1712
1713 err = inet_socketpair(family, type, &p1, &c1);
1714 if (err)
1715 goto close;
1716
1717 pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
1718 mode, NO_FLAGS);
1719
1720 xclose(c1);
1721 xclose(p1);
1722 close:
1723 xclose(c0);
1724 xclose(p0);
1725 }
1726
inet_unix_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family)1727 static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
1728 struct bpf_map *inner_map, int family)
1729 {
1730 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1731 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1732 int sock_map = bpf_map__fd(inner_map);
1733 int err;
1734
1735 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1736 if (err)
1737 return;
1738
1739 skel->bss->test_ingress = false;
1740 inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1741 REDIR_EGRESS);
1742 inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1743 REDIR_EGRESS);
1744 skel->bss->test_ingress = true;
1745 inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
1746 REDIR_INGRESS);
1747 inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
1748 REDIR_INGRESS);
1749
1750 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1751 }
1752
unix_inet_redir_to_connected(int family,int type,int sock_mapfd,int nop_mapfd,int verd_mapfd,enum redir_mode mode,int send_flags)1753 static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
1754 int nop_mapfd, int verd_mapfd,
1755 enum redir_mode mode, int send_flags)
1756 {
1757 int c0, c1, p0, p1;
1758 int sfd[2];
1759 int err;
1760
1761 err = inet_socketpair(family, type, &p0, &c0);
1762 if (err)
1763 return;
1764
1765 if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
1766 goto close_cli0;
1767 c1 = sfd[0], p1 = sfd[1];
1768
1769 pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd,
1770 verd_mapfd, mode, send_flags);
1771
1772 xclose(c1);
1773 xclose(p1);
1774 close_cli0:
1775 xclose(c0);
1776 xclose(p0);
1777 }
1778
unix_inet_skb_redir_to_connected(struct test_sockmap_listen * skel,struct bpf_map * inner_map,int family)1779 static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
1780 struct bpf_map *inner_map, int family)
1781 {
1782 int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
1783 int nop_map = bpf_map__fd(skel->maps.nop_map);
1784 int verdict_map = bpf_map__fd(skel->maps.verdict_map);
1785 int sock_map = bpf_map__fd(inner_map);
1786 int err;
1787
1788 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
1789 if (err)
1790 return;
1791
1792 skel->bss->test_ingress = false;
1793 unix_inet_redir_to_connected(family, SOCK_DGRAM,
1794 sock_map, -1, verdict_map,
1795 REDIR_EGRESS, NO_FLAGS);
1796 unix_inet_redir_to_connected(family, SOCK_STREAM,
1797 sock_map, -1, verdict_map,
1798 REDIR_EGRESS, NO_FLAGS);
1799
1800 unix_inet_redir_to_connected(family, SOCK_DGRAM,
1801 sock_map, nop_map, verdict_map,
1802 REDIR_EGRESS, NO_FLAGS);
1803 unix_inet_redir_to_connected(family, SOCK_STREAM,
1804 sock_map, nop_map, verdict_map,
1805 REDIR_EGRESS, NO_FLAGS);
1806
1807 /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
1808 unix_inet_redir_to_connected(family, SOCK_STREAM,
1809 sock_map, nop_map, verdict_map,
1810 REDIR_EGRESS, MSG_OOB);
1811
1812 skel->bss->test_ingress = true;
1813 unix_inet_redir_to_connected(family, SOCK_DGRAM,
1814 sock_map, -1, verdict_map,
1815 REDIR_INGRESS, NO_FLAGS);
1816 unix_inet_redir_to_connected(family, SOCK_STREAM,
1817 sock_map, -1, verdict_map,
1818 REDIR_INGRESS, NO_FLAGS);
1819
1820 unix_inet_redir_to_connected(family, SOCK_DGRAM,
1821 sock_map, nop_map, verdict_map,
1822 REDIR_INGRESS, NO_FLAGS);
1823 unix_inet_redir_to_connected(family, SOCK_STREAM,
1824 sock_map, nop_map, verdict_map,
1825 REDIR_INGRESS, NO_FLAGS);
1826
1827 /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
1828 unix_inet_redir_to_connected(family, SOCK_STREAM,
1829 sock_map, nop_map, verdict_map,
1830 REDIR_INGRESS, MSG_OOB);
1831
1832 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
1833 }
1834
test_udp_unix_redir(struct test_sockmap_listen * skel,struct bpf_map * map,int family)1835 static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
1836 int family)
1837 {
1838 const char *family_name, *map_name;
1839 struct netns_obj *netns;
1840 char s[MAX_TEST_NAME];
1841
1842 family_name = family_str(family);
1843 map_name = map_type_str(map);
1844 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
1845 if (!test__start_subtest(s))
1846 return;
1847
1848 netns = netns_new("sockmap_listen", true);
1849 if (!ASSERT_OK_PTR(netns, "netns_new"))
1850 return;
1851
1852 inet_unix_skb_redir_to_connected(skel, map, family);
1853 unix_inet_skb_redir_to_connected(skel, map, family);
1854
1855 netns_free(netns);
1856 }
1857
run_tests(struct test_sockmap_listen * skel,struct bpf_map * map,int family)1858 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
1859 int family)
1860 {
1861 test_ops(skel, map, family, SOCK_STREAM);
1862 test_ops(skel, map, family, SOCK_DGRAM);
1863 test_redir(skel, map, family, SOCK_STREAM);
1864 test_reuseport(skel, map, family, SOCK_STREAM);
1865 test_reuseport(skel, map, family, SOCK_DGRAM);
1866 test_udp_redir(skel, map, family);
1867 test_udp_unix_redir(skel, map, family);
1868 }
1869
serial_test_sockmap_listen(void)1870 void serial_test_sockmap_listen(void)
1871 {
1872 struct test_sockmap_listen *skel;
1873
1874 skel = test_sockmap_listen__open_and_load();
1875 if (!skel) {
1876 FAIL("skeleton open/load failed");
1877 return;
1878 }
1879
1880 skel->bss->test_sockmap = true;
1881 run_tests(skel, skel->maps.sock_map, AF_INET);
1882 run_tests(skel, skel->maps.sock_map, AF_INET6);
1883 test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
1884 test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
1885 test_vsock_redir(skel, skel->maps.sock_map);
1886
1887 skel->bss->test_sockmap = false;
1888 run_tests(skel, skel->maps.sock_hash, AF_INET);
1889 run_tests(skel, skel->maps.sock_hash, AF_INET6);
1890 test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
1891 test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
1892 test_vsock_redir(skel, skel->maps.sock_hash);
1893
1894 test_sockmap_listen__destroy(skel);
1895 }
1896