1 // SPDX-License-Identifier: GPL-2.0-only 2 #define _GNU_SOURCE 3 4 #include <errno.h> 5 #include <stdbool.h> 6 #include <stdio.h> 7 #include <string.h> 8 #include <unistd.h> 9 #include <sched.h> 10 11 #include <arpa/inet.h> 12 #include <sys/mount.h> 13 #include <sys/stat.h> 14 #include <sys/types.h> 15 #include <sys/un.h> 16 #include <sys/eventfd.h> 17 18 #include <linux/err.h> 19 #include <linux/in.h> 20 #include <linux/in6.h> 21 #include <linux/limits.h> 22 23 #include <linux/ip.h> 24 #include <netinet/udp.h> 25 #include <netinet/tcp.h> 26 #include <net/if.h> 27 28 #include "bpf_util.h" 29 #include "network_helpers.h" 30 #include "test_progs.h" 31 32 #ifdef TRAFFIC_MONITOR 33 /* Prevent pcap.h from including pcap/bpf.h and causing conflicts */ 34 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 35 #include <pcap/pcap.h> 36 #include <pcap/dlt.h> 37 #endif 38 39 #ifndef IPPROTO_MPTCP 40 #define IPPROTO_MPTCP 262 41 #endif 42 43 #define clean_errno() (errno == 0 ? "None" : strerror(errno)) 44 #define log_err(MSG, ...) ({ \ 45 int __save = errno; \ 46 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ 47 __FILE__, __LINE__, clean_errno(), \ 48 ##__VA_ARGS__); \ 49 errno = __save; \ 50 }) 51 52 struct ipv4_packet pkt_v4 = { 53 .eth.h_proto = __bpf_constant_htons(ETH_P_IP), 54 .iph.ihl = 5, 55 .iph.protocol = IPPROTO_TCP, 56 .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), 57 .tcp.urg_ptr = 123, 58 .tcp.doff = 5, 59 }; 60 61 struct ipv6_packet pkt_v6 = { 62 .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), 63 .iph.nexthdr = IPPROTO_TCP, 64 .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), 65 .tcp.urg_ptr = 123, 66 .tcp.doff = 5, 67 }; 68 69 static const struct network_helper_opts default_opts; 70 71 int settimeo(int fd, int timeout_ms) 72 { 73 struct timeval timeout = { .tv_sec = 3 }; 74 75 if (timeout_ms > 0) { 76 timeout.tv_sec = timeout_ms / 1000; 77 timeout.tv_usec = (timeout_ms % 1000) * 1000; 78 } 79 80 if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, 81 sizeof(timeout))) { 82 log_err("Failed to set SO_RCVTIMEO"); 83 return -1; 84 } 85 86 if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout, 87 sizeof(timeout))) { 88 log_err("Failed to set SO_SNDTIMEO"); 89 return -1; 90 } 91 92 return 0; 93 } 94 95 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) 96 97 int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, 98 const struct network_helper_opts *opts) 99 { 100 int on = 1, fd; 101 102 if (!opts) 103 opts = &default_opts; 104 105 fd = socket(addr->ss_family, type, opts->proto); 106 if (fd < 0) { 107 log_err("Failed to create server socket"); 108 return -1; 109 } 110 111 if (settimeo(fd, opts->timeout_ms)) 112 goto error_close; 113 114 if (type == SOCK_STREAM && 115 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) { 116 log_err("Failed to enable SO_REUSEADDR"); 117 goto error_close; 118 } 119 120 if (opts->post_socket_cb && 121 opts->post_socket_cb(fd, opts->cb_opts)) { 122 log_err("Failed to call post_socket_cb"); 123 goto error_close; 124 } 125 126 if (bind(fd, (struct sockaddr *)addr, addrlen) < 0) { 127 log_err("Failed to bind socket"); 128 goto error_close; 129 } 130 131 if (type == SOCK_STREAM) { 132 if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) { 133 log_err("Failed to listed on socket"); 134 goto error_close; 135 } 136 } 137 138 return fd; 139 140 error_close: 141 save_errno_close(fd); 142 return -1; 143 } 144 145 int start_server_str(int family, int type, const char *addr_str, __u16 port, 146 const struct network_helper_opts *opts) 147 { 148 struct sockaddr_storage addr; 149 socklen_t addrlen; 150 151 if (!opts) 152 opts = &default_opts; 153 154 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 155 return -1; 156 157 return start_server_addr(type, &addr, addrlen, opts); 158 } 159 160 int start_server(int family, int type, const char *addr_str, __u16 port, 161 int timeout_ms) 162 { 163 struct network_helper_opts opts = { 164 .timeout_ms = timeout_ms, 165 }; 166 167 return start_server_str(family, type, addr_str, port, &opts); 168 } 169 170 static int reuseport_cb(int fd, void *opts) 171 { 172 int on = 1; 173 174 return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on)); 175 } 176 177 int *start_reuseport_server(int family, int type, const char *addr_str, 178 __u16 port, int timeout_ms, unsigned int nr_listens) 179 { 180 struct network_helper_opts opts = { 181 .timeout_ms = timeout_ms, 182 .post_socket_cb = reuseport_cb, 183 }; 184 struct sockaddr_storage addr; 185 unsigned int nr_fds = 0; 186 socklen_t addrlen; 187 int *fds; 188 189 if (!nr_listens) 190 return NULL; 191 192 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 193 return NULL; 194 195 fds = malloc(sizeof(*fds) * nr_listens); 196 if (!fds) 197 return NULL; 198 199 fds[0] = start_server_addr(type, &addr, addrlen, &opts); 200 if (fds[0] == -1) 201 goto close_fds; 202 nr_fds = 1; 203 204 if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen)) 205 goto close_fds; 206 207 for (; nr_fds < nr_listens; nr_fds++) { 208 fds[nr_fds] = start_server_addr(type, &addr, addrlen, &opts); 209 if (fds[nr_fds] == -1) 210 goto close_fds; 211 } 212 213 return fds; 214 215 close_fds: 216 free_fds(fds, nr_fds); 217 return NULL; 218 } 219 220 void free_fds(int *fds, unsigned int nr_close_fds) 221 { 222 if (fds) { 223 while (nr_close_fds) 224 close(fds[--nr_close_fds]); 225 free(fds); 226 } 227 } 228 229 int fastopen_connect(int server_fd, const char *data, unsigned int data_len, 230 int timeout_ms) 231 { 232 struct sockaddr_storage addr; 233 socklen_t addrlen = sizeof(addr); 234 struct sockaddr_in *addr_in; 235 int fd, ret; 236 237 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { 238 log_err("Failed to get server addr"); 239 return -1; 240 } 241 242 addr_in = (struct sockaddr_in *)&addr; 243 fd = socket(addr_in->sin_family, SOCK_STREAM, 0); 244 if (fd < 0) { 245 log_err("Failed to create client socket"); 246 return -1; 247 } 248 249 if (settimeo(fd, timeout_ms)) 250 goto error_close; 251 252 ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr, 253 addrlen); 254 if (ret != data_len) { 255 log_err("sendto(data, %u) != %d\n", data_len, ret); 256 goto error_close; 257 } 258 259 return fd; 260 261 error_close: 262 save_errno_close(fd); 263 return -1; 264 } 265 266 int client_socket(int family, int type, 267 const struct network_helper_opts *opts) 268 { 269 int fd; 270 271 if (!opts) 272 opts = &default_opts; 273 274 fd = socket(family, type, opts->proto); 275 if (fd < 0) { 276 log_err("Failed to create client socket"); 277 return -1; 278 } 279 280 if (settimeo(fd, opts->timeout_ms)) 281 goto error_close; 282 283 if (opts->post_socket_cb && 284 opts->post_socket_cb(fd, opts->cb_opts)) 285 goto error_close; 286 287 return fd; 288 289 error_close: 290 save_errno_close(fd); 291 return -1; 292 } 293 294 int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, 295 const struct network_helper_opts *opts) 296 { 297 int fd; 298 299 if (!opts) 300 opts = &default_opts; 301 302 fd = client_socket(addr->ss_family, type, opts); 303 if (fd < 0) { 304 log_err("Failed to create client socket"); 305 return -1; 306 } 307 308 if (connect(fd, (const struct sockaddr *)addr, addrlen)) { 309 log_err("Failed to connect to server"); 310 save_errno_close(fd); 311 return -1; 312 } 313 314 return fd; 315 } 316 317 int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port, 318 const struct network_helper_opts *opts) 319 { 320 struct sockaddr_storage addr; 321 socklen_t addrlen; 322 323 if (!opts) 324 opts = &default_opts; 325 326 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 327 return -1; 328 329 return connect_to_addr(type, &addr, addrlen, opts); 330 } 331 332 int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) 333 { 334 struct sockaddr_storage addr; 335 socklen_t addrlen, optlen; 336 int type; 337 338 if (!opts) 339 opts = &default_opts; 340 341 optlen = sizeof(type); 342 if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { 343 log_err("getsockopt(SOL_TYPE)"); 344 return -1; 345 } 346 347 addrlen = sizeof(addr); 348 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { 349 log_err("Failed to get server addr"); 350 return -1; 351 } 352 353 return connect_to_addr(type, &addr, addrlen, opts); 354 } 355 356 int connect_to_fd(int server_fd, int timeout_ms) 357 { 358 struct network_helper_opts opts = { 359 .timeout_ms = timeout_ms, 360 }; 361 socklen_t optlen; 362 int protocol; 363 364 optlen = sizeof(protocol); 365 if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { 366 log_err("getsockopt(SOL_PROTOCOL)"); 367 return -1; 368 } 369 opts.proto = protocol; 370 371 return connect_to_fd_opts(server_fd, &opts); 372 } 373 374 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) 375 { 376 struct sockaddr_storage addr; 377 socklen_t len = sizeof(addr); 378 379 if (settimeo(client_fd, timeout_ms)) 380 return -1; 381 382 if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { 383 log_err("Failed to get server addr"); 384 return -1; 385 } 386 387 if (connect(client_fd, (const struct sockaddr *)&addr, len)) { 388 log_err("Failed to connect to server"); 389 return -1; 390 } 391 392 return 0; 393 } 394 395 int make_sockaddr(int family, const char *addr_str, __u16 port, 396 struct sockaddr_storage *addr, socklen_t *len) 397 { 398 if (family == AF_INET) { 399 struct sockaddr_in *sin = (void *)addr; 400 401 memset(addr, 0, sizeof(*sin)); 402 sin->sin_family = AF_INET; 403 sin->sin_port = htons(port); 404 if (addr_str && 405 inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) { 406 log_err("inet_pton(AF_INET, %s)", addr_str); 407 return -1; 408 } 409 if (len) 410 *len = sizeof(*sin); 411 return 0; 412 } else if (family == AF_INET6) { 413 struct sockaddr_in6 *sin6 = (void *)addr; 414 415 memset(addr, 0, sizeof(*sin6)); 416 sin6->sin6_family = AF_INET6; 417 sin6->sin6_port = htons(port); 418 if (addr_str && 419 inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) { 420 log_err("inet_pton(AF_INET6, %s)", addr_str); 421 return -1; 422 } 423 if (len) 424 *len = sizeof(*sin6); 425 return 0; 426 } else if (family == AF_UNIX) { 427 /* Note that we always use abstract unix sockets to avoid having 428 * to clean up leftover files. 429 */ 430 struct sockaddr_un *sun = (void *)addr; 431 432 memset(addr, 0, sizeof(*sun)); 433 sun->sun_family = family; 434 sun->sun_path[0] = 0; 435 strcpy(sun->sun_path + 1, addr_str); 436 if (len) 437 *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str); 438 return 0; 439 } 440 return -1; 441 } 442 443 char *ping_command(int family) 444 { 445 if (family == AF_INET6) { 446 /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */ 447 if (!system("which ping6 >/dev/null 2>&1")) 448 return "ping6"; 449 else 450 return "ping -6"; 451 } 452 return "ping"; 453 } 454 455 int append_tid(char *str, size_t sz) 456 { 457 size_t end; 458 459 if (!str) 460 return -1; 461 462 end = strlen(str); 463 if (end + 8 > sz) 464 return -1; 465 466 sprintf(&str[end], "%07ld", sys_gettid()); 467 str[end + 7] = '\0'; 468 469 return 0; 470 } 471 472 int remove_netns(const char *name) 473 { 474 char *cmd; 475 int r; 476 477 r = asprintf(&cmd, "ip netns del %s >/dev/null 2>&1", name); 478 if (r < 0) { 479 log_err("Failed to malloc cmd"); 480 return -1; 481 } 482 483 r = system(cmd); 484 free(cmd); 485 return r; 486 } 487 488 int make_netns(const char *name) 489 { 490 char *cmd; 491 int r; 492 493 r = asprintf(&cmd, "ip netns add %s", name); 494 if (r < 0) { 495 log_err("Failed to malloc cmd"); 496 return -1; 497 } 498 499 r = system(cmd); 500 free(cmd); 501 502 if (r) 503 return r; 504 505 r = asprintf(&cmd, "ip -n %s link set lo up", name); 506 if (r < 0) { 507 log_err("Failed to malloc cmd for setting up lo"); 508 remove_netns(name); 509 return -1; 510 } 511 512 r = system(cmd); 513 free(cmd); 514 515 return r; 516 } 517 518 struct nstoken { 519 int orig_netns_fd; 520 }; 521 522 struct nstoken *open_netns(const char *name) 523 { 524 int nsfd; 525 char nspath[PATH_MAX]; 526 int err; 527 struct nstoken *token; 528 529 token = calloc(1, sizeof(struct nstoken)); 530 if (!token) { 531 log_err("Failed to malloc token"); 532 return NULL; 533 } 534 535 token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY); 536 if (token->orig_netns_fd == -1) { 537 log_err("Failed to open(/proc/self/ns/net)"); 538 goto fail; 539 } 540 541 snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); 542 nsfd = open(nspath, O_RDONLY | O_CLOEXEC); 543 if (nsfd == -1) { 544 log_err("Failed to open(%s)", nspath); 545 goto fail; 546 } 547 548 err = setns(nsfd, CLONE_NEWNET); 549 close(nsfd); 550 if (err) { 551 log_err("Failed to setns(nsfd)"); 552 goto fail; 553 } 554 555 return token; 556 fail: 557 if (token->orig_netns_fd != -1) 558 close(token->orig_netns_fd); 559 free(token); 560 return NULL; 561 } 562 563 void close_netns(struct nstoken *token) 564 { 565 if (!token) 566 return; 567 568 if (setns(token->orig_netns_fd, CLONE_NEWNET)) 569 log_err("Failed to setns(orig_netns_fd)"); 570 close(token->orig_netns_fd); 571 free(token); 572 } 573 574 int open_tuntap(const char *dev_name, bool need_mac) 575 { 576 int err = 0; 577 struct ifreq ifr; 578 int fd = open("/dev/net/tun", O_RDWR); 579 580 if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)")) 581 return -1; 582 583 ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); 584 strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); 585 ifr.ifr_name[IFNAMSIZ - 1] = '\0'; 586 587 err = ioctl(fd, TUNSETIFF, &ifr); 588 if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { 589 close(fd); 590 return -1; 591 } 592 593 err = fcntl(fd, F_SETFL, O_NONBLOCK); 594 if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { 595 close(fd); 596 return -1; 597 } 598 599 return fd; 600 } 601 602 int get_socket_local_port(int sock_fd) 603 { 604 struct sockaddr_storage addr; 605 socklen_t addrlen = sizeof(addr); 606 int err; 607 608 err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen); 609 if (err < 0) 610 return err; 611 612 if (addr.ss_family == AF_INET) { 613 struct sockaddr_in *sin = (struct sockaddr_in *)&addr; 614 615 return sin->sin_port; 616 } else if (addr.ss_family == AF_INET6) { 617 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr; 618 619 return sin->sin6_port; 620 } 621 622 return -1; 623 } 624 625 int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) 626 { 627 struct ifreq ifr = {0}; 628 int sockfd, err; 629 630 sockfd = socket(AF_INET, SOCK_DGRAM, 0); 631 if (sockfd < 0) 632 return -errno; 633 634 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 635 636 ring_param->cmd = ETHTOOL_GRINGPARAM; 637 ifr.ifr_data = (char *)ring_param; 638 639 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { 640 err = errno; 641 close(sockfd); 642 return -err; 643 } 644 645 close(sockfd); 646 return 0; 647 } 648 649 int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) 650 { 651 struct ifreq ifr = {0}; 652 int sockfd, err; 653 654 sockfd = socket(AF_INET, SOCK_DGRAM, 0); 655 if (sockfd < 0) 656 return -errno; 657 658 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 659 660 ring_param->cmd = ETHTOOL_SRINGPARAM; 661 ifr.ifr_data = (char *)ring_param; 662 663 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { 664 err = errno; 665 close(sockfd); 666 return -err; 667 } 668 669 close(sockfd); 670 return 0; 671 } 672 673 struct send_recv_arg { 674 int fd; 675 uint32_t bytes; 676 int stop; 677 }; 678 679 static void *send_recv_server(void *arg) 680 { 681 struct send_recv_arg *a = (struct send_recv_arg *)arg; 682 ssize_t nr_sent = 0, bytes = 0; 683 char batch[1500]; 684 int err = 0, fd; 685 686 fd = accept(a->fd, NULL, NULL); 687 while (fd == -1) { 688 if (errno == EINTR) 689 continue; 690 err = -errno; 691 goto done; 692 } 693 694 if (settimeo(fd, 0)) { 695 err = -errno; 696 goto done; 697 } 698 699 while (bytes < a->bytes && !READ_ONCE(a->stop)) { 700 nr_sent = send(fd, &batch, 701 MIN(a->bytes - bytes, sizeof(batch)), 0); 702 if (nr_sent == -1 && errno == EINTR) 703 continue; 704 if (nr_sent == -1) { 705 err = -errno; 706 break; 707 } 708 bytes += nr_sent; 709 } 710 711 if (bytes != a->bytes) { 712 log_err("send %zd expected %u", bytes, a->bytes); 713 if (!err) 714 err = bytes > a->bytes ? -E2BIG : -EINTR; 715 } 716 717 done: 718 if (fd >= 0) 719 close(fd); 720 if (err) { 721 WRITE_ONCE(a->stop, 1); 722 return ERR_PTR(err); 723 } 724 return NULL; 725 } 726 727 int send_recv_data(int lfd, int fd, uint32_t total_bytes) 728 { 729 ssize_t nr_recv = 0, bytes = 0; 730 struct send_recv_arg arg = { 731 .fd = lfd, 732 .bytes = total_bytes, 733 .stop = 0, 734 }; 735 pthread_t srv_thread; 736 void *thread_ret; 737 char batch[1500]; 738 int err = 0; 739 740 err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg); 741 if (err) { 742 log_err("Failed to pthread_create"); 743 return err; 744 } 745 746 /* recv total_bytes */ 747 while (bytes < total_bytes && !READ_ONCE(arg.stop)) { 748 nr_recv = recv(fd, &batch, 749 MIN(total_bytes - bytes, sizeof(batch)), 0); 750 if (nr_recv == -1 && errno == EINTR) 751 continue; 752 if (nr_recv == -1) { 753 err = -errno; 754 break; 755 } 756 bytes += nr_recv; 757 } 758 759 if (bytes != total_bytes) { 760 log_err("recv %zd expected %u", bytes, total_bytes); 761 if (!err) 762 err = bytes > total_bytes ? -E2BIG : -EINTR; 763 } 764 765 WRITE_ONCE(arg.stop, 1); 766 pthread_join(srv_thread, &thread_ret); 767 if (IS_ERR(thread_ret)) { 768 log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret)); 769 err = err ? : PTR_ERR(thread_ret); 770 } 771 772 return err; 773 } 774 775 int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd) 776 { 777 int ifindex, ret; 778 779 if (!ASSERT_TRUE(ingress_fd >= 0 || egress_fd >= 0, 780 "at least one program fd is valid")) 781 return -1; 782 783 ifindex = if_nametoindex(dev); 784 if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) 785 return -1; 786 787 DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex, 788 .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS); 789 DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, 790 .priority = 1, .prog_fd = ingress_fd); 791 DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, 792 .priority = 1, .prog_fd = egress_fd); 793 794 ret = bpf_tc_hook_create(&hook); 795 if (!ASSERT_OK(ret, "create tc hook")) 796 return ret; 797 798 if (ingress_fd >= 0) { 799 hook.attach_point = BPF_TC_INGRESS; 800 ret = bpf_tc_attach(&hook, &opts1); 801 if (!ASSERT_OK(ret, "bpf_tc_attach")) { 802 bpf_tc_hook_destroy(&hook); 803 return ret; 804 } 805 } 806 807 if (egress_fd >= 0) { 808 hook.attach_point = BPF_TC_EGRESS; 809 ret = bpf_tc_attach(&hook, &opts2); 810 if (!ASSERT_OK(ret, "bpf_tc_attach")) { 811 bpf_tc_hook_destroy(&hook); 812 return ret; 813 } 814 } 815 816 return 0; 817 } 818 819 #ifdef TRAFFIC_MONITOR 820 struct tmonitor_ctx { 821 pcap_t *pcap; 822 pcap_dumper_t *dumper; 823 pthread_t thread; 824 int wake_fd; 825 826 volatile bool done; 827 char pkt_fname[PATH_MAX]; 828 int pcap_fd; 829 }; 830 831 static int __base_pr(const char *format, va_list args) 832 { 833 return vfprintf(stdout, format, args); 834 } 835 836 static tm_print_fn_t __tm_pr = __base_pr; 837 838 tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn) 839 { 840 tm_print_fn_t old_print_fn; 841 842 old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED); 843 844 return old_print_fn; 845 } 846 847 void tm_print(const char *format, ...) 848 { 849 tm_print_fn_t print_fn; 850 va_list args; 851 852 print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED); 853 if (!print_fn) 854 return; 855 856 va_start(args, format); 857 print_fn(format, args); 858 va_end(args); 859 } 860 861 /* Is this packet captured with a Ethernet protocol type? */ 862 static bool is_ethernet(const u_char *packet) 863 { 864 u16 arphdr_type; 865 866 memcpy(&arphdr_type, packet + 8, 2); 867 arphdr_type = ntohs(arphdr_type); 868 869 /* Except the following cases, the protocol type contains the 870 * Ethernet protocol type for the packet. 871 * 872 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html 873 */ 874 switch (arphdr_type) { 875 case 770: /* ARPHRD_FRAD */ 876 case 778: /* ARPHDR_IPGRE */ 877 case 803: /* ARPHRD_IEEE80211_RADIOTAP */ 878 tm_print("Packet captured: arphdr_type=%d\n", arphdr_type); 879 return false; 880 } 881 return true; 882 } 883 884 static const char * const pkt_types[] = { 885 "In", 886 "B", /* Broadcast */ 887 "M", /* Multicast */ 888 "C", /* Captured with the promiscuous mode */ 889 "Out", 890 }; 891 892 static const char *pkt_type_str(u16 pkt_type) 893 { 894 if (pkt_type < ARRAY_SIZE(pkt_types)) 895 return pkt_types[pkt_type]; 896 return "Unknown"; 897 } 898 899 #define MAX_FLAGS_STRLEN 21 900 /* Show the information of the transport layer in the packet */ 901 static void show_transport(const u_char *packet, u16 len, u32 ifindex, 902 const char *src_addr, const char *dst_addr, 903 u16 proto, bool ipv6, u8 pkt_type) 904 { 905 char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = ""; 906 const char *transport_str; 907 u16 src_port, dst_port; 908 struct udphdr *udp; 909 struct tcphdr *tcp; 910 911 ifname = if_indextoname(ifindex, _ifname); 912 if (!ifname) { 913 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex); 914 ifname = _ifname; 915 } 916 917 if (proto == IPPROTO_UDP) { 918 udp = (struct udphdr *)packet; 919 src_port = ntohs(udp->source); 920 dst_port = ntohs(udp->dest); 921 transport_str = "UDP"; 922 } else if (proto == IPPROTO_TCP) { 923 tcp = (struct tcphdr *)packet; 924 src_port = ntohs(tcp->source); 925 dst_port = ntohs(tcp->dest); 926 transport_str = "TCP"; 927 } else if (proto == IPPROTO_ICMP) { 928 tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n", 929 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 930 packet[0], packet[1]); 931 return; 932 } else if (proto == IPPROTO_ICMPV6) { 933 tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n", 934 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 935 packet[0], packet[1]); 936 return; 937 } else { 938 tm_print("%-7s %-3s %s %s > %s: protocol %d\n", 939 ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4", 940 src_addr, dst_addr, proto); 941 return; 942 } 943 944 /* TCP or UDP*/ 945 946 if (proto == IPPROTO_TCP) 947 snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s", 948 tcp->fin ? ", FIN" : "", 949 tcp->syn ? ", SYN" : "", 950 tcp->rst ? ", RST" : "", 951 tcp->ack ? ", ACK" : ""); 952 953 if (ipv6) 954 tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n", 955 ifname, pkt_type_str(pkt_type), src_addr, src_port, 956 dst_addr, dst_port, transport_str, len, flags); 957 else 958 tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n", 959 ifname, pkt_type_str(pkt_type), src_addr, src_port, 960 dst_addr, dst_port, transport_str, len, flags); 961 } 962 963 static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type) 964 { 965 char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN]; 966 struct ipv6hdr *pkt = (struct ipv6hdr *)packet; 967 const char *src, *dst; 968 u_char proto; 969 970 src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf)); 971 if (!src) 972 src = "<invalid>"; 973 dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf)); 974 if (!dst) 975 dst = "<invalid>"; 976 proto = pkt->nexthdr; 977 show_transport(packet + sizeof(struct ipv6hdr), 978 ntohs(pkt->payload_len), 979 ifindex, src, dst, proto, true, pkt_type); 980 } 981 982 static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type) 983 { 984 char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN]; 985 struct iphdr *pkt = (struct iphdr *)packet; 986 const char *src, *dst; 987 u_char proto; 988 989 src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf)); 990 if (!src) 991 src = "<invalid>"; 992 dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf)); 993 if (!dst) 994 dst = "<invalid>"; 995 proto = pkt->protocol; 996 show_transport(packet + sizeof(struct iphdr), 997 ntohs(pkt->tot_len), 998 ifindex, src, dst, proto, false, pkt_type); 999 } 1000 1001 static void *traffic_monitor_thread(void *arg) 1002 { 1003 char *ifname, _ifname[IF_NAMESIZE]; 1004 const u_char *packet, *payload; 1005 struct tmonitor_ctx *ctx = arg; 1006 pcap_dumper_t *dumper = ctx->dumper; 1007 int fd = ctx->pcap_fd, nfds, r; 1008 int wake_fd = ctx->wake_fd; 1009 struct pcap_pkthdr header; 1010 pcap_t *pcap = ctx->pcap; 1011 u32 ifindex; 1012 fd_set fds; 1013 u16 proto; 1014 u8 ptype; 1015 1016 nfds = (fd > wake_fd ? fd : wake_fd) + 1; 1017 FD_ZERO(&fds); 1018 1019 while (!ctx->done) { 1020 FD_SET(fd, &fds); 1021 FD_SET(wake_fd, &fds); 1022 r = select(nfds, &fds, NULL, NULL, NULL); 1023 if (!r) 1024 continue; 1025 if (r < 0) { 1026 if (errno == EINTR) 1027 continue; 1028 log_err("Fail to select on pcap fd and wake fd"); 1029 break; 1030 } 1031 1032 /* This instance of pcap is non-blocking */ 1033 packet = pcap_next(pcap, &header); 1034 if (!packet) 1035 continue; 1036 1037 /* According to the man page of pcap_dump(), first argument 1038 * is the pcap_dumper_t pointer even it's argument type is 1039 * u_char *. 1040 */ 1041 pcap_dump((u_char *)dumper, &header, packet); 1042 1043 /* Not sure what other types of packets look like. Here, we 1044 * parse only Ethernet and compatible packets. 1045 */ 1046 if (!is_ethernet(packet)) 1047 continue; 1048 1049 /* Skip SLL2 header 1050 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html 1051 * 1052 * Although the document doesn't mention that, the payload 1053 * doesn't include the Ethernet header. The payload starts 1054 * from the first byte of the network layer header. 1055 */ 1056 payload = packet + 20; 1057 1058 memcpy(&proto, packet, 2); 1059 proto = ntohs(proto); 1060 memcpy(&ifindex, packet + 4, 4); 1061 ifindex = ntohl(ifindex); 1062 ptype = packet[10]; 1063 1064 if (proto == ETH_P_IPV6) { 1065 show_ipv6_packet(payload, ifindex, ptype); 1066 } else if (proto == ETH_P_IP) { 1067 show_ipv4_packet(payload, ifindex, ptype); 1068 } else { 1069 ifname = if_indextoname(ifindex, _ifname); 1070 if (!ifname) { 1071 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex); 1072 ifname = _ifname; 1073 } 1074 1075 tm_print("%-7s %-3s Unknown network protocol type 0x%x\n", 1076 ifname, pkt_type_str(ptype), proto); 1077 } 1078 } 1079 1080 return NULL; 1081 } 1082 1083 /* Prepare the pcap handle to capture packets. 1084 * 1085 * This pcap is non-blocking and immediate mode is enabled to receive 1086 * captured packets as soon as possible. The snaplen is set to 1024 bytes 1087 * to limit the size of captured content. The format of the link-layer 1088 * header is set to DLT_LINUX_SLL2 to enable handling various link-layer 1089 * technologies. 1090 */ 1091 static pcap_t *traffic_monitor_prepare_pcap(void) 1092 { 1093 char errbuf[PCAP_ERRBUF_SIZE]; 1094 pcap_t *pcap; 1095 int r; 1096 1097 /* Listen on all NICs in the namespace */ 1098 pcap = pcap_create("any", errbuf); 1099 if (!pcap) { 1100 log_err("Failed to open pcap: %s", errbuf); 1101 return NULL; 1102 } 1103 /* Limit the size of the packet (first N bytes) */ 1104 r = pcap_set_snaplen(pcap, 1024); 1105 if (r) { 1106 log_err("Failed to set snaplen: %s", pcap_geterr(pcap)); 1107 goto error; 1108 } 1109 /* To receive packets as fast as possible */ 1110 r = pcap_set_immediate_mode(pcap, 1); 1111 if (r) { 1112 log_err("Failed to set immediate mode: %s", pcap_geterr(pcap)); 1113 goto error; 1114 } 1115 r = pcap_setnonblock(pcap, 1, errbuf); 1116 if (r) { 1117 log_err("Failed to set nonblock: %s", errbuf); 1118 goto error; 1119 } 1120 r = pcap_activate(pcap); 1121 if (r) { 1122 log_err("Failed to activate pcap: %s", pcap_geterr(pcap)); 1123 goto error; 1124 } 1125 /* Determine the format of the link-layer header */ 1126 r = pcap_set_datalink(pcap, DLT_LINUX_SLL2); 1127 if (r) { 1128 log_err("Failed to set datalink: %s", pcap_geterr(pcap)); 1129 goto error; 1130 } 1131 1132 return pcap; 1133 error: 1134 pcap_close(pcap); 1135 return NULL; 1136 } 1137 1138 static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name) 1139 { 1140 char *p; 1141 1142 if (subtest_name) 1143 snprintf(buf, len, "%s__%s", test_name, subtest_name); 1144 else 1145 snprintf(buf, len, "%s", test_name); 1146 while ((p = strchr(buf, '/'))) 1147 *p = '_'; 1148 while ((p = strchr(buf, ' '))) 1149 *p = '_'; 1150 } 1151 1152 #define PCAP_DIR "/tmp/tmon_pcap" 1153 1154 /* Start to monitor the network traffic in the given network namespace. 1155 * 1156 * netns: the name of the network namespace to monitor. If NULL, the 1157 * current network namespace is monitored. 1158 * test_name: the name of the running test. 1159 * subtest_name: the name of the running subtest if there is. It should be 1160 * NULL if it is not a subtest. 1161 * 1162 * This function will start a thread to capture packets going through NICs 1163 * in the give network namespace. 1164 */ 1165 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, 1166 const char *subtest_name) 1167 { 1168 struct nstoken *nstoken = NULL; 1169 struct tmonitor_ctx *ctx; 1170 char test_name_buf[64]; 1171 static int tmon_seq; 1172 int r; 1173 1174 if (netns) { 1175 nstoken = open_netns(netns); 1176 if (!nstoken) 1177 return NULL; 1178 } 1179 ctx = malloc(sizeof(*ctx)); 1180 if (!ctx) { 1181 log_err("Failed to malloc ctx"); 1182 goto fail_ctx; 1183 } 1184 memset(ctx, 0, sizeof(*ctx)); 1185 1186 encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name); 1187 snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname), 1188 PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++, 1189 test_name_buf, netns ? netns : "unknown"); 1190 1191 r = mkdir(PCAP_DIR, 0755); 1192 if (r && errno != EEXIST) { 1193 log_err("Failed to create " PCAP_DIR); 1194 goto fail_pcap; 1195 } 1196 1197 ctx->pcap = traffic_monitor_prepare_pcap(); 1198 if (!ctx->pcap) 1199 goto fail_pcap; 1200 ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap); 1201 if (ctx->pcap_fd < 0) { 1202 log_err("Failed to get pcap fd"); 1203 goto fail_dumper; 1204 } 1205 1206 /* Create a packet file */ 1207 ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname); 1208 if (!ctx->dumper) { 1209 log_err("Failed to open pcap dump: %s", ctx->pkt_fname); 1210 goto fail_dumper; 1211 } 1212 1213 /* Create an eventfd to wake up the monitor thread */ 1214 ctx->wake_fd = eventfd(0, 0); 1215 if (ctx->wake_fd < 0) { 1216 log_err("Failed to create eventfd"); 1217 goto fail_eventfd; 1218 } 1219 1220 r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx); 1221 if (r) { 1222 log_err("Failed to create thread"); 1223 goto fail; 1224 } 1225 1226 close_netns(nstoken); 1227 1228 return ctx; 1229 1230 fail: 1231 close(ctx->wake_fd); 1232 1233 fail_eventfd: 1234 pcap_dump_close(ctx->dumper); 1235 unlink(ctx->pkt_fname); 1236 1237 fail_dumper: 1238 pcap_close(ctx->pcap); 1239 1240 fail_pcap: 1241 free(ctx); 1242 1243 fail_ctx: 1244 close_netns(nstoken); 1245 1246 return NULL; 1247 } 1248 1249 static void traffic_monitor_release(struct tmonitor_ctx *ctx) 1250 { 1251 pcap_close(ctx->pcap); 1252 pcap_dump_close(ctx->dumper); 1253 1254 close(ctx->wake_fd); 1255 1256 free(ctx); 1257 } 1258 1259 /* Stop the network traffic monitor. 1260 * 1261 * ctx: the context returned by traffic_monitor_start() 1262 */ 1263 void traffic_monitor_stop(struct tmonitor_ctx *ctx) 1264 { 1265 __u64 w = 1; 1266 1267 if (!ctx) 1268 return; 1269 1270 /* Stop the monitor thread */ 1271 ctx->done = true; 1272 /* Wake up the background thread. */ 1273 write(ctx->wake_fd, &w, sizeof(w)); 1274 pthread_join(ctx->thread, NULL); 1275 1276 tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1); 1277 1278 traffic_monitor_release(ctx); 1279 } 1280 1281 #endif /* TRAFFIC_MONITOR */ 1282