1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2020 Cloudflare 3 /* 4 * Test suite for SOCKMAP/SOCKHASH holding listening sockets. 5 * Covers: 6 * 1. BPF map operations - bpf_map_{update,lookup delete}_elem 7 * 2. BPF redirect helpers - bpf_{sk,msg}_redirect_map 8 * 3. BPF reuseport helper - bpf_sk_select_reuseport 9 */ 10 11 #include <linux/compiler.h> 12 #include <errno.h> 13 #include <error.h> 14 #include <limits.h> 15 #include <netinet/in.h> 16 #include <pthread.h> 17 #include <stdlib.h> 18 #include <string.h> 19 #include <sys/select.h> 20 #include <unistd.h> 21 #include <linux/vm_sockets.h> 22 23 #include <bpf/bpf.h> 24 #include <bpf/libbpf.h> 25 26 #include "bpf_util.h" 27 #include "test_progs.h" 28 #include "test_sockmap_listen.skel.h" 29 30 #include "sockmap_helpers.h" 31 32 #define NO_FLAGS 0 33 34 static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused, 35 int family, int sotype, int mapfd) 36 { 37 u32 key = 0; 38 u64 value; 39 int err; 40 41 value = -1; 42 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 43 if (!err || errno != EINVAL) 44 FAIL_ERRNO("map_update: expected EINVAL"); 45 46 value = INT_MAX; 47 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 48 if (!err || errno != EBADF) 49 FAIL_ERRNO("map_update: expected EBADF"); 50 } 51 52 static void test_insert_opened(struct test_sockmap_listen *skel __always_unused, 53 int family, int sotype, int mapfd) 54 { 55 u32 key = 0; 56 u64 value; 57 int err, s; 58 59 s = xsocket(family, sotype, 0); 60 if (s == -1) 61 return; 62 63 errno = 0; 64 value = s; 65 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 66 if (sotype == SOCK_STREAM) { 67 if (!err || errno != EOPNOTSUPP) 68 FAIL_ERRNO("map_update: expected EOPNOTSUPP"); 69 } else if (err) 70 FAIL_ERRNO("map_update: expected success"); 71 xclose(s); 72 } 73 74 static void test_insert_bound(struct test_sockmap_listen *skel __always_unused, 75 int family, int sotype, int mapfd) 76 { 77 struct sockaddr_storage addr; 78 socklen_t len = 0; 79 u32 key = 0; 80 u64 value; 81 int err, s; 82 83 init_addr_loopback(family, &addr, &len); 84 85 s = xsocket(family, sotype, 0); 86 if (s == -1) 87 return; 88 89 err = xbind(s, sockaddr(&addr), len); 90 if (err) 91 goto close; 92 93 errno = 0; 94 value = s; 95 err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 96 if (!err || errno != EOPNOTSUPP) 97 FAIL_ERRNO("map_update: expected EOPNOTSUPP"); 98 close: 99 xclose(s); 100 } 101 102 static void test_insert(struct test_sockmap_listen *skel __always_unused, 103 int family, int sotype, int mapfd) 104 { 105 u64 value; 106 u32 key; 107 int s; 108 109 s = socket_loopback(family, sotype); 110 if (s < 0) 111 return; 112 113 key = 0; 114 value = s; 115 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 116 xclose(s); 117 } 118 119 static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused, 120 int family, int sotype, int mapfd) 121 { 122 u64 value; 123 u32 key; 124 int s; 125 126 s = socket_loopback(family, sotype); 127 if (s < 0) 128 return; 129 130 key = 0; 131 value = s; 132 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 133 xbpf_map_delete_elem(mapfd, &key); 134 xclose(s); 135 } 136 137 static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused, 138 int family, int sotype, int mapfd) 139 { 140 int err, s; 141 u64 value; 142 u32 key; 143 144 s = socket_loopback(family, sotype); 145 if (s < 0) 146 return; 147 148 key = 0; 149 value = s; 150 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 151 152 xclose(s); 153 154 errno = 0; 155 err = bpf_map_delete_elem(mapfd, &key); 156 if (!err || (errno != EINVAL && errno != ENOENT)) 157 /* SOCKMAP and SOCKHASH return different error codes */ 158 FAIL_ERRNO("map_delete: expected EINVAL/EINVAL"); 159 } 160 161 static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused, 162 int family, int sotype, int mapfd) 163 { 164 u64 cookie, value; 165 socklen_t len; 166 u32 key; 167 int s; 168 169 s = socket_loopback(family, sotype); 170 if (s < 0) 171 return; 172 173 key = 0; 174 value = s; 175 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 176 177 len = sizeof(cookie); 178 xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len); 179 180 xbpf_map_lookup_elem(mapfd, &key, &value); 181 182 if (value != cookie) { 183 FAIL("map_lookup: have %#llx, want %#llx", 184 (unsigned long long)value, (unsigned long long)cookie); 185 } 186 187 xclose(s); 188 } 189 190 static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused, 191 int family, int sotype, int mapfd) 192 { 193 int err, s; 194 u64 value; 195 u32 key; 196 197 s = socket_loopback(family, sotype); 198 if (s < 0) 199 return; 200 201 key = 0; 202 value = s; 203 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 204 xbpf_map_delete_elem(mapfd, &key); 205 206 errno = 0; 207 err = bpf_map_lookup_elem(mapfd, &key, &value); 208 if (!err || errno != ENOENT) 209 FAIL_ERRNO("map_lookup: expected ENOENT"); 210 211 xclose(s); 212 } 213 214 static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused, 215 int family, int sotype, int mapfd) 216 { 217 u32 key, value32; 218 int err, s; 219 220 s = socket_loopback(family, sotype); 221 if (s < 0) 222 return; 223 224 mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key), 225 sizeof(value32), 1, NULL); 226 if (mapfd < 0) { 227 FAIL_ERRNO("map_create"); 228 goto close; 229 } 230 231 key = 0; 232 value32 = s; 233 xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST); 234 235 errno = 0; 236 err = bpf_map_lookup_elem(mapfd, &key, &value32); 237 if (!err || errno != ENOSPC) 238 FAIL_ERRNO("map_lookup: expected ENOSPC"); 239 240 xclose(mapfd); 241 close: 242 xclose(s); 243 } 244 245 static void test_update_existing(struct test_sockmap_listen *skel __always_unused, 246 int family, int sotype, int mapfd) 247 { 248 int s1, s2; 249 u64 value; 250 u32 key; 251 252 s1 = socket_loopback(family, sotype); 253 if (s1 < 0) 254 return; 255 256 s2 = socket_loopback(family, sotype); 257 if (s2 < 0) 258 goto close_s1; 259 260 key = 0; 261 value = s1; 262 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 263 264 value = s2; 265 xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST); 266 xclose(s2); 267 close_s1: 268 xclose(s1); 269 } 270 271 /* Exercise the code path where we destroy child sockets that never 272 * got accept()'ed, aka orphans, when parent socket gets closed. 273 */ 274 static void do_destroy_orphan_child(int family, int sotype, int mapfd) 275 { 276 struct sockaddr_storage addr; 277 socklen_t len; 278 int err, s, c; 279 u64 value; 280 u32 key; 281 282 s = socket_loopback(family, sotype); 283 if (s < 0) 284 return; 285 286 len = sizeof(addr); 287 err = xgetsockname(s, sockaddr(&addr), &len); 288 if (err) 289 goto close_srv; 290 291 key = 0; 292 value = s; 293 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 294 295 c = xsocket(family, sotype, 0); 296 if (c == -1) 297 goto close_srv; 298 299 xconnect(c, sockaddr(&addr), len); 300 xclose(c); 301 close_srv: 302 xclose(s); 303 } 304 305 static void test_destroy_orphan_child(struct test_sockmap_listen *skel, 306 int family, int sotype, int mapfd) 307 { 308 int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict); 309 int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict); 310 const struct test { 311 int progfd; 312 enum bpf_attach_type atype; 313 } tests[] = { 314 { -1, -1 }, 315 { msg_verdict, BPF_SK_MSG_VERDICT }, 316 { skb_verdict, BPF_SK_SKB_VERDICT }, 317 }; 318 const struct test *t; 319 320 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 321 if (t->progfd != -1 && 322 xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0) 323 return; 324 325 do_destroy_orphan_child(family, sotype, mapfd); 326 327 if (t->progfd != -1) 328 xbpf_prog_detach2(t->progfd, mapfd, t->atype); 329 } 330 } 331 332 /* Perform a passive open after removing listening socket from SOCKMAP 333 * to ensure that callbacks get restored properly. 334 */ 335 static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused, 336 int family, int sotype, int mapfd) 337 { 338 struct sockaddr_storage addr; 339 socklen_t len; 340 int err, s, c; 341 u64 value; 342 u32 key; 343 344 s = socket_loopback(family, sotype); 345 if (s < 0) 346 return; 347 348 len = sizeof(addr); 349 err = xgetsockname(s, sockaddr(&addr), &len); 350 if (err) 351 goto close_srv; 352 353 key = 0; 354 value = s; 355 xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST); 356 xbpf_map_delete_elem(mapfd, &key); 357 358 c = xsocket(family, sotype, 0); 359 if (c < 0) 360 goto close_srv; 361 362 xconnect(c, sockaddr(&addr), len); 363 xclose(c); 364 close_srv: 365 xclose(s); 366 } 367 368 /* Check that child socket that got created while parent was in a 369 * SOCKMAP, but got accept()'ed only after the parent has been removed 370 * from SOCKMAP, gets cloned without parent psock state or callbacks. 371 */ 372 static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused, 373 int family, int sotype, int mapfd) 374 { 375 struct sockaddr_storage addr; 376 const u32 zero = 0; 377 int err, s, c, p; 378 socklen_t len; 379 u64 value; 380 381 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 382 if (s == -1) 383 return; 384 385 len = sizeof(addr); 386 err = xgetsockname(s, sockaddr(&addr), &len); 387 if (err) 388 goto close_srv; 389 390 value = s; 391 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 392 if (err) 393 goto close_srv; 394 395 c = xsocket(family, sotype, 0); 396 if (c == -1) 397 goto close_srv; 398 399 /* Create child while parent is in sockmap */ 400 err = xconnect(c, sockaddr(&addr), len); 401 if (err) 402 goto close_cli; 403 404 /* Remove parent from sockmap */ 405 err = xbpf_map_delete_elem(mapfd, &zero); 406 if (err) 407 goto close_cli; 408 409 p = xaccept_nonblock(s, NULL, NULL); 410 if (p == -1) 411 goto close_cli; 412 413 /* Check that child sk_user_data is not set */ 414 value = p; 415 xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 416 417 xclose(p); 418 close_cli: 419 xclose(c); 420 close_srv: 421 xclose(s); 422 } 423 424 /* Check that child socket that got created and accepted while parent 425 * was in a SOCKMAP is cloned without parent psock state or callbacks. 426 */ 427 static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused, 428 int family, int sotype, int mapfd) 429 { 430 struct sockaddr_storage addr; 431 const u32 zero = 0, one = 1; 432 int err, s, c, p; 433 socklen_t len; 434 u64 value; 435 436 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 437 if (s == -1) 438 return; 439 440 len = sizeof(addr); 441 err = xgetsockname(s, sockaddr(&addr), &len); 442 if (err) 443 goto close_srv; 444 445 value = s; 446 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 447 if (err) 448 goto close_srv; 449 450 c = xsocket(family, sotype, 0); 451 if (c == -1) 452 goto close_srv; 453 454 /* Create & accept child while parent is in sockmap */ 455 err = xconnect(c, sockaddr(&addr), len); 456 if (err) 457 goto close_cli; 458 459 p = xaccept_nonblock(s, NULL, NULL); 460 if (p == -1) 461 goto close_cli; 462 463 /* Check that child sk_user_data is not set */ 464 value = p; 465 xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST); 466 467 xclose(p); 468 close_cli: 469 xclose(c); 470 close_srv: 471 xclose(s); 472 } 473 474 struct connect_accept_ctx { 475 int sockfd; 476 unsigned int done; 477 unsigned int nr_iter; 478 }; 479 480 static bool is_thread_done(struct connect_accept_ctx *ctx) 481 { 482 return READ_ONCE(ctx->done); 483 } 484 485 static void *connect_accept_thread(void *arg) 486 { 487 struct connect_accept_ctx *ctx = arg; 488 struct sockaddr_storage addr; 489 int family, socktype; 490 socklen_t len; 491 int err, i, s; 492 493 s = ctx->sockfd; 494 495 len = sizeof(addr); 496 err = xgetsockname(s, sockaddr(&addr), &len); 497 if (err) 498 goto done; 499 500 len = sizeof(family); 501 err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len); 502 if (err) 503 goto done; 504 505 len = sizeof(socktype); 506 err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len); 507 if (err) 508 goto done; 509 510 for (i = 0; i < ctx->nr_iter; i++) { 511 int c, p; 512 513 c = xsocket(family, socktype, 0); 514 if (c < 0) 515 break; 516 517 err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr)); 518 if (err) { 519 xclose(c); 520 break; 521 } 522 523 p = xaccept_nonblock(s, NULL, NULL); 524 if (p < 0) { 525 xclose(c); 526 break; 527 } 528 529 xclose(p); 530 xclose(c); 531 } 532 done: 533 WRITE_ONCE(ctx->done, 1); 534 return NULL; 535 } 536 537 static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused, 538 int family, int sotype, int mapfd) 539 { 540 struct connect_accept_ctx ctx = { 0 }; 541 struct sockaddr_storage addr; 542 socklen_t len; 543 u32 zero = 0; 544 pthread_t t; 545 int err, s; 546 u64 value; 547 548 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 549 if (s < 0) 550 return; 551 552 len = sizeof(addr); 553 err = xgetsockname(s, sockaddr(&addr), &len); 554 if (err) 555 goto close; 556 557 ctx.sockfd = s; 558 ctx.nr_iter = 1000; 559 560 err = xpthread_create(&t, NULL, connect_accept_thread, &ctx); 561 if (err) 562 goto close; 563 564 value = s; 565 while (!is_thread_done(&ctx)) { 566 err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 567 if (err) 568 break; 569 570 err = xbpf_map_delete_elem(mapfd, &zero); 571 if (err) 572 break; 573 } 574 575 xpthread_join(t, NULL); 576 close: 577 xclose(s); 578 } 579 580 static void *listen_thread(void *arg) 581 { 582 struct sockaddr unspec = { AF_UNSPEC }; 583 struct connect_accept_ctx *ctx = arg; 584 int err, i, s; 585 586 s = ctx->sockfd; 587 588 for (i = 0; i < ctx->nr_iter; i++) { 589 err = xlisten(s, 1); 590 if (err) 591 break; 592 err = xconnect(s, &unspec, sizeof(unspec)); 593 if (err) 594 break; 595 } 596 597 WRITE_ONCE(ctx->done, 1); 598 return NULL; 599 } 600 601 static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused, 602 int family, int socktype, int mapfd) 603 { 604 struct connect_accept_ctx ctx = { 0 }; 605 const u32 zero = 0; 606 const int one = 1; 607 pthread_t t; 608 int err, s; 609 u64 value; 610 611 s = xsocket(family, socktype, 0); 612 if (s < 0) 613 return; 614 615 err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); 616 if (err) 617 goto close; 618 619 ctx.sockfd = s; 620 ctx.nr_iter = 10000; 621 622 err = pthread_create(&t, NULL, listen_thread, &ctx); 623 if (err) 624 goto close; 625 626 value = s; 627 while (!is_thread_done(&ctx)) { 628 err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST); 629 /* Expecting EOPNOTSUPP before listen() */ 630 if (err && errno != EOPNOTSUPP) { 631 FAIL_ERRNO("map_update"); 632 break; 633 } 634 635 err = bpf_map_delete_elem(mapfd, &zero); 636 /* Expecting no entry after unhash on connect(AF_UNSPEC) */ 637 if (err && errno != EINVAL && errno != ENOENT) { 638 FAIL_ERRNO("map_delete"); 639 break; 640 } 641 } 642 643 xpthread_join(t, NULL); 644 close: 645 xclose(s); 646 } 647 648 static void zero_verdict_count(int mapfd) 649 { 650 unsigned int zero = 0; 651 int key; 652 653 key = SK_DROP; 654 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY); 655 key = SK_PASS; 656 xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY); 657 } 658 659 enum redir_mode { 660 REDIR_INGRESS, 661 REDIR_EGRESS, 662 }; 663 664 static const char *redir_mode_str(enum redir_mode mode) 665 { 666 switch (mode) { 667 case REDIR_INGRESS: 668 return "ingress"; 669 case REDIR_EGRESS: 670 return "egress"; 671 default: 672 return "unknown"; 673 } 674 } 675 676 static void redir_to_connected(int family, int sotype, int sock_mapfd, 677 int verd_mapfd, enum redir_mode mode) 678 { 679 const char *log_prefix = redir_mode_str(mode); 680 int c0, c1, p0, p1; 681 unsigned int pass; 682 int err, n; 683 u32 key; 684 char b; 685 686 zero_verdict_count(verd_mapfd); 687 688 err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1, 689 &p0, &p1); 690 if (err) 691 return; 692 693 err = add_to_sockmap(sock_mapfd, p0, p1); 694 if (err) 695 goto close; 696 697 n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1); 698 if (n < 0) 699 FAIL_ERRNO("%s: write", log_prefix); 700 if (n == 0) 701 FAIL("%s: incomplete write", log_prefix); 702 if (n < 1) 703 goto close; 704 705 key = SK_PASS; 706 err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass); 707 if (err) 708 goto close; 709 if (pass != 1) 710 FAIL("%s: want pass count 1, have %d", log_prefix, pass); 711 n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC); 712 if (n < 0) 713 FAIL_ERRNO("%s: recv_timeout", log_prefix); 714 if (n == 0) 715 FAIL("%s: incomplete recv", log_prefix); 716 717 close: 718 xclose(p1); 719 xclose(c1); 720 xclose(p0); 721 xclose(c0); 722 } 723 724 static void test_skb_redir_to_connected(struct test_sockmap_listen *skel, 725 struct bpf_map *inner_map, int family, 726 int sotype) 727 { 728 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict); 729 int parser = bpf_program__fd(skel->progs.prog_stream_parser); 730 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 731 int sock_map = bpf_map__fd(inner_map); 732 int err; 733 734 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); 735 if (err) 736 return; 737 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); 738 if (err) 739 goto detach; 740 741 redir_to_connected(family, sotype, sock_map, verdict_map, 742 REDIR_INGRESS); 743 744 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); 745 detach: 746 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); 747 } 748 749 static void test_msg_redir_to_connected(struct test_sockmap_listen *skel, 750 struct bpf_map *inner_map, int family, 751 int sotype) 752 { 753 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict); 754 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 755 int sock_map = bpf_map__fd(inner_map); 756 int err; 757 758 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0); 759 if (err) 760 return; 761 762 redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS); 763 764 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); 765 } 766 767 static void test_msg_redir_to_connected_with_link(struct test_sockmap_listen *skel, 768 struct bpf_map *inner_map, int family, 769 int sotype) 770 { 771 int prog_msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict); 772 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 773 int sock_map = bpf_map__fd(inner_map); 774 int link_fd; 775 776 link_fd = bpf_link_create(prog_msg_verdict, sock_map, BPF_SK_MSG_VERDICT, NULL); 777 if (!ASSERT_GE(link_fd, 0, "bpf_link_create")) 778 return; 779 780 redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS); 781 782 close(link_fd); 783 } 784 785 static void redir_to_listening(int family, int sotype, int sock_mapfd, 786 int verd_mapfd, enum redir_mode mode) 787 { 788 const char *log_prefix = redir_mode_str(mode); 789 struct sockaddr_storage addr; 790 int s, c, p, err, n; 791 unsigned int drop; 792 socklen_t len; 793 u32 key; 794 795 zero_verdict_count(verd_mapfd); 796 797 s = socket_loopback(family, sotype | SOCK_NONBLOCK); 798 if (s < 0) 799 return; 800 801 len = sizeof(addr); 802 err = xgetsockname(s, sockaddr(&addr), &len); 803 if (err) 804 goto close_srv; 805 806 c = xsocket(family, sotype, 0); 807 if (c < 0) 808 goto close_srv; 809 err = xconnect(c, sockaddr(&addr), len); 810 if (err) 811 goto close_cli; 812 813 p = xaccept_nonblock(s, NULL, NULL); 814 if (p < 0) 815 goto close_cli; 816 817 err = add_to_sockmap(sock_mapfd, s, p); 818 if (err) 819 goto close_peer; 820 821 n = write(mode == REDIR_INGRESS ? c : p, "a", 1); 822 if (n < 0 && errno != EACCES) 823 FAIL_ERRNO("%s: write", log_prefix); 824 if (n == 0) 825 FAIL("%s: incomplete write", log_prefix); 826 if (n < 1) 827 goto close_peer; 828 829 key = SK_DROP; 830 err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop); 831 if (err) 832 goto close_peer; 833 if (drop != 1) 834 FAIL("%s: want drop count 1, have %d", log_prefix, drop); 835 836 close_peer: 837 xclose(p); 838 close_cli: 839 xclose(c); 840 close_srv: 841 xclose(s); 842 } 843 844 static void test_skb_redir_to_listening(struct test_sockmap_listen *skel, 845 struct bpf_map *inner_map, int family, 846 int sotype) 847 { 848 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict); 849 int parser = bpf_program__fd(skel->progs.prog_stream_parser); 850 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 851 int sock_map = bpf_map__fd(inner_map); 852 int err; 853 854 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); 855 if (err) 856 return; 857 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); 858 if (err) 859 goto detach; 860 861 redir_to_listening(family, sotype, sock_map, verdict_map, 862 REDIR_INGRESS); 863 864 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); 865 detach: 866 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); 867 } 868 869 static void test_msg_redir_to_listening(struct test_sockmap_listen *skel, 870 struct bpf_map *inner_map, int family, 871 int sotype) 872 { 873 int verdict = bpf_program__fd(skel->progs.prog_msg_verdict); 874 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 875 int sock_map = bpf_map__fd(inner_map); 876 int err; 877 878 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0); 879 if (err) 880 return; 881 882 redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS); 883 884 xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); 885 } 886 887 static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *skel, 888 struct bpf_map *inner_map, int family, 889 int sotype) 890 { 891 struct bpf_program *verdict = skel->progs.prog_msg_verdict; 892 int verdict_map = bpf_map__fd(skel->maps.verdict_map); 893 int sock_map = bpf_map__fd(inner_map); 894 struct bpf_link *link; 895 896 link = bpf_program__attach_sockmap(verdict, sock_map); 897 if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap")) 898 return; 899 900 redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS); 901 902 bpf_link__detach(link); 903 } 904 905 static void redir_partial(int family, int sotype, int sock_map, int parser_map) 906 { 907 int c0 = -1, c1 = -1, p0 = -1, p1 = -1; 908 int err, n, key, value; 909 char buf[] = "abc"; 910 911 key = 0; 912 value = sizeof(buf) - 1; 913 err = xbpf_map_update_elem(parser_map, &key, &value, 0); 914 if (err) 915 return; 916 917 err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1, 918 &p0, &p1); 919 if (err) 920 goto clean_parser_map; 921 922 err = add_to_sockmap(sock_map, p0, p1); 923 if (err) 924 goto close; 925 926 n = xsend(c1, buf, sizeof(buf), 0); 927 if (n < sizeof(buf)) 928 FAIL("incomplete write"); 929 930 n = xrecv_nonblock(c0, buf, sizeof(buf), 0); 931 if (n != sizeof(buf) - 1) 932 FAIL("expect %zu, received %d", sizeof(buf) - 1, n); 933 934 close: 935 xclose(c0); 936 xclose(p0); 937 xclose(c1); 938 xclose(p1); 939 940 clean_parser_map: 941 key = 0; 942 value = 0; 943 xbpf_map_update_elem(parser_map, &key, &value, 0); 944 } 945 946 static void test_skb_redir_partial(struct test_sockmap_listen *skel, 947 struct bpf_map *inner_map, int family, 948 int sotype) 949 { 950 int verdict = bpf_program__fd(skel->progs.prog_stream_verdict); 951 int parser = bpf_program__fd(skel->progs.prog_stream_parser); 952 int parser_map = bpf_map__fd(skel->maps.parser_map); 953 int sock_map = bpf_map__fd(inner_map); 954 int err; 955 956 err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); 957 if (err) 958 return; 959 960 err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); 961 if (err) 962 goto detach; 963 964 redir_partial(family, sotype, sock_map, parser_map); 965 966 xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); 967 detach: 968 xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); 969 } 970 971 static void test_reuseport_select_listening(int family, int sotype, 972 int sock_map, int verd_map, 973 int reuseport_prog) 974 { 975 struct sockaddr_storage addr; 976 unsigned int pass; 977 int s, c, err; 978 socklen_t len; 979 u64 value; 980 u32 key; 981 982 zero_verdict_count(verd_map); 983 984 s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK, 985 reuseport_prog); 986 if (s < 0) 987 return; 988 989 len = sizeof(addr); 990 err = xgetsockname(s, sockaddr(&addr), &len); 991 if (err) 992 goto close_srv; 993 994 key = 0; 995 value = s; 996 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); 997 if (err) 998 goto close_srv; 999 1000 c = xsocket(family, sotype, 0); 1001 if (c < 0) 1002 goto close_srv; 1003 err = xconnect(c, sockaddr(&addr), len); 1004 if (err) 1005 goto close_cli; 1006 1007 if (sotype == SOCK_STREAM) { 1008 int p; 1009 1010 p = xaccept_nonblock(s, NULL, NULL); 1011 if (p < 0) 1012 goto close_cli; 1013 xclose(p); 1014 } else { 1015 char b = 'a'; 1016 ssize_t n; 1017 1018 n = xsend(c, &b, sizeof(b), 0); 1019 if (n == -1) 1020 goto close_cli; 1021 1022 n = xrecv_nonblock(s, &b, sizeof(b), 0); 1023 if (n == -1) 1024 goto close_cli; 1025 } 1026 1027 key = SK_PASS; 1028 err = xbpf_map_lookup_elem(verd_map, &key, &pass); 1029 if (err) 1030 goto close_cli; 1031 if (pass != 1) 1032 FAIL("want pass count 1, have %d", pass); 1033 1034 close_cli: 1035 xclose(c); 1036 close_srv: 1037 xclose(s); 1038 } 1039 1040 static void test_reuseport_select_connected(int family, int sotype, 1041 int sock_map, int verd_map, 1042 int reuseport_prog) 1043 { 1044 struct sockaddr_storage addr; 1045 int s, c0, c1, p0, err; 1046 unsigned int drop; 1047 socklen_t len; 1048 u64 value; 1049 u32 key; 1050 1051 zero_verdict_count(verd_map); 1052 1053 s = socket_loopback_reuseport(family, sotype, reuseport_prog); 1054 if (s < 0) 1055 return; 1056 1057 /* Populate sock_map[0] to avoid ENOENT on first connection */ 1058 key = 0; 1059 value = s; 1060 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST); 1061 if (err) 1062 goto close_srv; 1063 1064 len = sizeof(addr); 1065 err = xgetsockname(s, sockaddr(&addr), &len); 1066 if (err) 1067 goto close_srv; 1068 1069 c0 = xsocket(family, sotype, 0); 1070 if (c0 < 0) 1071 goto close_srv; 1072 1073 err = xconnect(c0, sockaddr(&addr), len); 1074 if (err) 1075 goto close_cli0; 1076 1077 if (sotype == SOCK_STREAM) { 1078 p0 = xaccept_nonblock(s, NULL, NULL); 1079 if (p0 < 0) 1080 goto close_cli0; 1081 } else { 1082 p0 = xsocket(family, sotype, 0); 1083 if (p0 < 0) 1084 goto close_cli0; 1085 1086 len = sizeof(addr); 1087 err = xgetsockname(c0, sockaddr(&addr), &len); 1088 if (err) 1089 goto close_cli0; 1090 1091 err = xconnect(p0, sockaddr(&addr), len); 1092 if (err) 1093 goto close_cli0; 1094 } 1095 1096 /* Update sock_map[0] to redirect to a connected socket */ 1097 key = 0; 1098 value = p0; 1099 err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST); 1100 if (err) 1101 goto close_peer0; 1102 1103 c1 = xsocket(family, sotype, 0); 1104 if (c1 < 0) 1105 goto close_peer0; 1106 1107 len = sizeof(addr); 1108 err = xgetsockname(s, sockaddr(&addr), &len); 1109 if (err) 1110 goto close_srv; 1111 1112 errno = 0; 1113 err = connect(c1, sockaddr(&addr), len); 1114 if (sotype == SOCK_DGRAM) { 1115 char b = 'a'; 1116 ssize_t n; 1117 1118 n = xsend(c1, &b, sizeof(b), 0); 1119 if (n == -1) 1120 goto close_cli1; 1121 1122 n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC); 1123 err = n == -1; 1124 } 1125 if (!err || errno != ECONNREFUSED) 1126 FAIL_ERRNO("connect: expected ECONNREFUSED"); 1127 1128 key = SK_DROP; 1129 err = xbpf_map_lookup_elem(verd_map, &key, &drop); 1130 if (err) 1131 goto close_cli1; 1132 if (drop != 1) 1133 FAIL("want drop count 1, have %d", drop); 1134 1135 close_cli1: 1136 xclose(c1); 1137 close_peer0: 1138 xclose(p0); 1139 close_cli0: 1140 xclose(c0); 1141 close_srv: 1142 xclose(s); 1143 } 1144 1145 /* Check that redirecting across reuseport groups is not allowed. */ 1146 static void test_reuseport_mixed_groups(int family, int sotype, int sock_map, 1147 int verd_map, int reuseport_prog) 1148 { 1149 struct sockaddr_storage addr; 1150 int s1, s2, c, err; 1151 unsigned int drop; 1152 socklen_t len; 1153 u32 key; 1154 1155 zero_verdict_count(verd_map); 1156 1157 /* Create two listeners, each in its own reuseport group */ 1158 s1 = socket_loopback_reuseport(family, sotype, reuseport_prog); 1159 if (s1 < 0) 1160 return; 1161 1162 s2 = socket_loopback_reuseport(family, sotype, reuseport_prog); 1163 if (s2 < 0) 1164 goto close_srv1; 1165 1166 err = add_to_sockmap(sock_map, s1, s2); 1167 if (err) 1168 goto close_srv2; 1169 1170 /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */ 1171 len = sizeof(addr); 1172 err = xgetsockname(s2, sockaddr(&addr), &len); 1173 if (err) 1174 goto close_srv2; 1175 1176 c = xsocket(family, sotype, 0); 1177 if (c < 0) 1178 goto close_srv2; 1179 1180 err = connect(c, sockaddr(&addr), len); 1181 if (sotype == SOCK_DGRAM) { 1182 char b = 'a'; 1183 ssize_t n; 1184 1185 n = xsend(c, &b, sizeof(b), 0); 1186 if (n == -1) 1187 goto close_cli; 1188 1189 n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC); 1190 err = n == -1; 1191 } 1192 if (!err || errno != ECONNREFUSED) { 1193 FAIL_ERRNO("connect: expected ECONNREFUSED"); 1194 goto close_cli; 1195 } 1196 1197 /* Expect drop, can't redirect outside of reuseport group */ 1198 key = SK_DROP; 1199 err = xbpf_map_lookup_elem(verd_map, &key, &drop); 1200 if (err) 1201 goto close_cli; 1202 if (drop != 1) 1203 FAIL("want drop count 1, have %d", drop); 1204 1205 close_cli: 1206 xclose(c); 1207 close_srv2: 1208 xclose(s2); 1209 close_srv1: 1210 xclose(s1); 1211 } 1212 1213 #define TEST(fn, ...) \ 1214 { \ 1215 fn, #fn, __VA_ARGS__ \ 1216 } 1217 1218 static void test_ops_cleanup(const struct bpf_map *map) 1219 { 1220 int err, mapfd; 1221 u32 key; 1222 1223 mapfd = bpf_map__fd(map); 1224 1225 for (key = 0; key < bpf_map__max_entries(map); key++) { 1226 err = bpf_map_delete_elem(mapfd, &key); 1227 if (err && errno != EINVAL && errno != ENOENT) 1228 FAIL_ERRNO("map_delete: expected EINVAL/ENOENT"); 1229 } 1230 } 1231 1232 static const char *family_str(sa_family_t family) 1233 { 1234 switch (family) { 1235 case AF_INET: 1236 return "IPv4"; 1237 case AF_INET6: 1238 return "IPv6"; 1239 case AF_UNIX: 1240 return "Unix"; 1241 case AF_VSOCK: 1242 return "VSOCK"; 1243 default: 1244 return "unknown"; 1245 } 1246 } 1247 1248 static const char *map_type_str(const struct bpf_map *map) 1249 { 1250 int type; 1251 1252 if (!map) 1253 return "invalid"; 1254 type = bpf_map__type(map); 1255 1256 switch (type) { 1257 case BPF_MAP_TYPE_SOCKMAP: 1258 return "sockmap"; 1259 case BPF_MAP_TYPE_SOCKHASH: 1260 return "sockhash"; 1261 default: 1262 return "unknown"; 1263 } 1264 } 1265 1266 static const char *sotype_str(int sotype) 1267 { 1268 switch (sotype) { 1269 case SOCK_DGRAM: 1270 return "UDP"; 1271 case SOCK_STREAM: 1272 return "TCP"; 1273 default: 1274 return "unknown"; 1275 } 1276 } 1277 1278 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, 1279 int family, int sotype) 1280 { 1281 const struct op_test { 1282 void (*fn)(struct test_sockmap_listen *skel, 1283 int family, int sotype, int mapfd); 1284 const char *name; 1285 int sotype; 1286 } tests[] = { 1287 /* insert */ 1288 TEST(test_insert_invalid), 1289 TEST(test_insert_opened), 1290 TEST(test_insert_bound, SOCK_STREAM), 1291 TEST(test_insert), 1292 /* delete */ 1293 TEST(test_delete_after_insert), 1294 TEST(test_delete_after_close), 1295 /* lookup */ 1296 TEST(test_lookup_after_insert), 1297 TEST(test_lookup_after_delete), 1298 TEST(test_lookup_32_bit_value), 1299 /* update */ 1300 TEST(test_update_existing), 1301 /* races with insert/delete */ 1302 TEST(test_destroy_orphan_child, SOCK_STREAM), 1303 TEST(test_syn_recv_insert_delete, SOCK_STREAM), 1304 TEST(test_race_insert_listen, SOCK_STREAM), 1305 /* child clone */ 1306 TEST(test_clone_after_delete, SOCK_STREAM), 1307 TEST(test_accept_after_delete, SOCK_STREAM), 1308 TEST(test_accept_before_delete, SOCK_STREAM), 1309 }; 1310 const char *family_name, *map_name, *sotype_name; 1311 const struct op_test *t; 1312 char s[MAX_TEST_NAME]; 1313 int map_fd; 1314 1315 family_name = family_str(family); 1316 map_name = map_type_str(map); 1317 sotype_name = sotype_str(sotype); 1318 map_fd = bpf_map__fd(map); 1319 1320 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1321 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, 1322 sotype_name, t->name); 1323 1324 if (t->sotype != 0 && t->sotype != sotype) 1325 continue; 1326 1327 if (!test__start_subtest(s)) 1328 continue; 1329 1330 t->fn(skel, family, sotype, map_fd); 1331 test_ops_cleanup(map); 1332 } 1333 } 1334 1335 static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, 1336 int family, int sotype) 1337 { 1338 const struct redir_test { 1339 void (*fn)(struct test_sockmap_listen *skel, 1340 struct bpf_map *map, int family, int sotype); 1341 const char *name; 1342 } tests[] = { 1343 TEST(test_skb_redir_to_connected), 1344 TEST(test_skb_redir_to_listening), 1345 TEST(test_skb_redir_partial), 1346 TEST(test_msg_redir_to_connected), 1347 TEST(test_msg_redir_to_connected_with_link), 1348 TEST(test_msg_redir_to_listening), 1349 TEST(test_msg_redir_to_listening_with_link), 1350 }; 1351 const char *family_name, *map_name; 1352 const struct redir_test *t; 1353 char s[MAX_TEST_NAME]; 1354 1355 family_name = family_str(family); 1356 map_name = map_type_str(map); 1357 1358 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1359 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, 1360 t->name); 1361 1362 if (!test__start_subtest(s)) 1363 continue; 1364 1365 t->fn(skel, map, family, sotype); 1366 } 1367 } 1368 1369 static void test_reuseport(struct test_sockmap_listen *skel, 1370 struct bpf_map *map, int family, int sotype) 1371 { 1372 const struct reuseport_test { 1373 void (*fn)(int family, int sotype, int socket_map, 1374 int verdict_map, int reuseport_prog); 1375 const char *name; 1376 int sotype; 1377 } tests[] = { 1378 TEST(test_reuseport_select_listening), 1379 TEST(test_reuseport_select_connected), 1380 TEST(test_reuseport_mixed_groups), 1381 }; 1382 int socket_map, verdict_map, reuseport_prog; 1383 const char *family_name, *map_name, *sotype_name; 1384 const struct reuseport_test *t; 1385 char s[MAX_TEST_NAME]; 1386 1387 family_name = family_str(family); 1388 map_name = map_type_str(map); 1389 sotype_name = sotype_str(sotype); 1390 1391 socket_map = bpf_map__fd(map); 1392 verdict_map = bpf_map__fd(skel->maps.verdict_map); 1393 reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport); 1394 1395 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1396 snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, 1397 sotype_name, t->name); 1398 1399 if (t->sotype != 0 && t->sotype != sotype) 1400 continue; 1401 1402 if (!test__start_subtest(s)) 1403 continue; 1404 1405 t->fn(family, sotype, socket_map, verdict_map, reuseport_prog); 1406 } 1407 } 1408 1409 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map, 1410 int family) 1411 { 1412 test_ops(skel, map, family, SOCK_STREAM); 1413 test_ops(skel, map, family, SOCK_DGRAM); 1414 test_redir(skel, map, family, SOCK_STREAM); 1415 test_reuseport(skel, map, family, SOCK_STREAM); 1416 test_reuseport(skel, map, family, SOCK_DGRAM); 1417 } 1418 1419 void serial_test_sockmap_listen(void) 1420 { 1421 struct test_sockmap_listen *skel; 1422 1423 skel = test_sockmap_listen__open_and_load(); 1424 if (!skel) { 1425 FAIL("skeleton open/load failed"); 1426 return; 1427 } 1428 1429 skel->bss->test_sockmap = true; 1430 run_tests(skel, skel->maps.sock_map, AF_INET); 1431 run_tests(skel, skel->maps.sock_map, AF_INET6); 1432 1433 skel->bss->test_sockmap = false; 1434 run_tests(skel, skel->maps.sock_hash, AF_INET); 1435 run_tests(skel, skel->maps.sock_hash, AF_INET6); 1436 1437 test_sockmap_listen__destroy(skel); 1438 } 1439