1 // SPDX-License-Identifier: GPL-2.0-only 2 #define _GNU_SOURCE 3 4 #include <errno.h> 5 #include <stdbool.h> 6 #include <stdio.h> 7 #include <string.h> 8 #include <unistd.h> 9 #include <sched.h> 10 11 #include <arpa/inet.h> 12 #include <sys/mount.h> 13 #include <sys/stat.h> 14 #include <sys/types.h> 15 #include <sys/un.h> 16 #include <sys/eventfd.h> 17 18 #include <linux/err.h> 19 #include <linux/in.h> 20 #include <linux/in6.h> 21 #include <linux/limits.h> 22 23 #include <linux/ip.h> 24 #include <netinet/udp.h> 25 #include <netinet/tcp.h> 26 #include <net/if.h> 27 28 #include "bpf_util.h" 29 #include "network_helpers.h" 30 #include "test_progs.h" 31 32 #ifdef TRAFFIC_MONITOR 33 /* Prevent pcap.h from including pcap/bpf.h and causing conflicts */ 34 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 35 #include <pcap/pcap.h> 36 #include <pcap/dlt.h> 37 #endif 38 39 #ifndef IPPROTO_MPTCP 40 #define IPPROTO_MPTCP 262 41 #endif 42 43 #define clean_errno() (errno == 0 ? "None" : strerror(errno)) 44 #define log_err(MSG, ...) ({ \ 45 int __save = errno; \ 46 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ 47 __FILE__, __LINE__, clean_errno(), \ 48 ##__VA_ARGS__); \ 49 errno = __save; \ 50 }) 51 52 struct ipv4_packet pkt_v4 = { 53 .eth.h_proto = __bpf_constant_htons(ETH_P_IP), 54 .iph.ihl = 5, 55 .iph.protocol = IPPROTO_TCP, 56 .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), 57 .tcp.urg_ptr = 123, 58 .tcp.doff = 5, 59 }; 60 61 struct ipv6_packet pkt_v6 = { 62 .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), 63 .iph.nexthdr = IPPROTO_TCP, 64 .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), 65 .tcp.urg_ptr = 123, 66 .tcp.doff = 5, 67 }; 68 69 static const struct network_helper_opts default_opts; 70 71 int settimeo(int fd, int timeout_ms) 72 { 73 struct timeval timeout = { .tv_sec = 3 }; 74 75 if (timeout_ms > 0) { 76 timeout.tv_sec = timeout_ms / 1000; 77 timeout.tv_usec = (timeout_ms % 1000) * 1000; 78 } 79 80 if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, 81 sizeof(timeout))) { 82 log_err("Failed to set SO_RCVTIMEO"); 83 return -1; 84 } 85 86 if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout, 87 sizeof(timeout))) { 88 log_err("Failed to set SO_SNDTIMEO"); 89 return -1; 90 } 91 92 return 0; 93 } 94 95 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) 96 97 int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, 98 const struct network_helper_opts *opts) 99 { 100 int fd; 101 102 if (!opts) 103 opts = &default_opts; 104 105 fd = socket(addr->ss_family, type, opts->proto); 106 if (fd < 0) { 107 log_err("Failed to create server socket"); 108 return -1; 109 } 110 111 if (settimeo(fd, opts->timeout_ms)) 112 goto error_close; 113 114 if (opts->post_socket_cb && 115 opts->post_socket_cb(fd, opts->cb_opts)) { 116 log_err("Failed to call post_socket_cb"); 117 goto error_close; 118 } 119 120 if (bind(fd, (struct sockaddr *)addr, addrlen) < 0) { 121 log_err("Failed to bind socket"); 122 goto error_close; 123 } 124 125 if (type == SOCK_STREAM) { 126 if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) { 127 log_err("Failed to listed on socket"); 128 goto error_close; 129 } 130 } 131 132 return fd; 133 134 error_close: 135 save_errno_close(fd); 136 return -1; 137 } 138 139 int start_server_str(int family, int type, const char *addr_str, __u16 port, 140 const struct network_helper_opts *opts) 141 { 142 struct sockaddr_storage addr; 143 socklen_t addrlen; 144 145 if (!opts) 146 opts = &default_opts; 147 148 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 149 return -1; 150 151 return start_server_addr(type, &addr, addrlen, opts); 152 } 153 154 int start_server(int family, int type, const char *addr_str, __u16 port, 155 int timeout_ms) 156 { 157 struct network_helper_opts opts = { 158 .timeout_ms = timeout_ms, 159 }; 160 161 return start_server_str(family, type, addr_str, port, &opts); 162 } 163 164 static int reuseport_cb(int fd, void *opts) 165 { 166 int on = 1; 167 168 return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on)); 169 } 170 171 int *start_reuseport_server(int family, int type, const char *addr_str, 172 __u16 port, int timeout_ms, unsigned int nr_listens) 173 { 174 struct network_helper_opts opts = { 175 .timeout_ms = timeout_ms, 176 .post_socket_cb = reuseport_cb, 177 }; 178 struct sockaddr_storage addr; 179 unsigned int nr_fds = 0; 180 socklen_t addrlen; 181 int *fds; 182 183 if (!nr_listens) 184 return NULL; 185 186 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 187 return NULL; 188 189 fds = malloc(sizeof(*fds) * nr_listens); 190 if (!fds) 191 return NULL; 192 193 fds[0] = start_server_addr(type, &addr, addrlen, &opts); 194 if (fds[0] == -1) 195 goto close_fds; 196 nr_fds = 1; 197 198 if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen)) 199 goto close_fds; 200 201 for (; nr_fds < nr_listens; nr_fds++) { 202 fds[nr_fds] = start_server_addr(type, &addr, addrlen, &opts); 203 if (fds[nr_fds] == -1) 204 goto close_fds; 205 } 206 207 return fds; 208 209 close_fds: 210 free_fds(fds, nr_fds); 211 return NULL; 212 } 213 214 void free_fds(int *fds, unsigned int nr_close_fds) 215 { 216 if (fds) { 217 while (nr_close_fds) 218 close(fds[--nr_close_fds]); 219 free(fds); 220 } 221 } 222 223 int fastopen_connect(int server_fd, const char *data, unsigned int data_len, 224 int timeout_ms) 225 { 226 struct sockaddr_storage addr; 227 socklen_t addrlen = sizeof(addr); 228 struct sockaddr_in *addr_in; 229 int fd, ret; 230 231 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { 232 log_err("Failed to get server addr"); 233 return -1; 234 } 235 236 addr_in = (struct sockaddr_in *)&addr; 237 fd = socket(addr_in->sin_family, SOCK_STREAM, 0); 238 if (fd < 0) { 239 log_err("Failed to create client socket"); 240 return -1; 241 } 242 243 if (settimeo(fd, timeout_ms)) 244 goto error_close; 245 246 ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr, 247 addrlen); 248 if (ret != data_len) { 249 log_err("sendto(data, %u) != %d\n", data_len, ret); 250 goto error_close; 251 } 252 253 return fd; 254 255 error_close: 256 save_errno_close(fd); 257 return -1; 258 } 259 260 int client_socket(int family, int type, 261 const struct network_helper_opts *opts) 262 { 263 int fd; 264 265 if (!opts) 266 opts = &default_opts; 267 268 fd = socket(family, type, opts->proto); 269 if (fd < 0) { 270 log_err("Failed to create client socket"); 271 return -1; 272 } 273 274 if (settimeo(fd, opts->timeout_ms)) 275 goto error_close; 276 277 if (opts->post_socket_cb && 278 opts->post_socket_cb(fd, opts->cb_opts)) 279 goto error_close; 280 281 return fd; 282 283 error_close: 284 save_errno_close(fd); 285 return -1; 286 } 287 288 int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, 289 const struct network_helper_opts *opts) 290 { 291 int fd; 292 293 if (!opts) 294 opts = &default_opts; 295 296 fd = client_socket(addr->ss_family, type, opts); 297 if (fd < 0) { 298 log_err("Failed to create client socket"); 299 return -1; 300 } 301 302 if (connect(fd, (const struct sockaddr *)addr, addrlen)) { 303 log_err("Failed to connect to server"); 304 save_errno_close(fd); 305 return -1; 306 } 307 308 return fd; 309 } 310 311 int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port, 312 const struct network_helper_opts *opts) 313 { 314 struct sockaddr_storage addr; 315 socklen_t addrlen; 316 317 if (!opts) 318 opts = &default_opts; 319 320 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 321 return -1; 322 323 return connect_to_addr(type, &addr, addrlen, opts); 324 } 325 326 int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) 327 { 328 struct sockaddr_storage addr; 329 socklen_t addrlen, optlen; 330 int type; 331 332 if (!opts) 333 opts = &default_opts; 334 335 optlen = sizeof(type); 336 if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { 337 log_err("getsockopt(SOL_TYPE)"); 338 return -1; 339 } 340 341 addrlen = sizeof(addr); 342 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { 343 log_err("Failed to get server addr"); 344 return -1; 345 } 346 347 return connect_to_addr(type, &addr, addrlen, opts); 348 } 349 350 int connect_to_fd(int server_fd, int timeout_ms) 351 { 352 struct network_helper_opts opts = { 353 .timeout_ms = timeout_ms, 354 }; 355 socklen_t optlen; 356 int protocol; 357 358 optlen = sizeof(protocol); 359 if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { 360 log_err("getsockopt(SOL_PROTOCOL)"); 361 return -1; 362 } 363 opts.proto = protocol; 364 365 return connect_to_fd_opts(server_fd, &opts); 366 } 367 368 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) 369 { 370 struct sockaddr_storage addr; 371 socklen_t len = sizeof(addr); 372 373 if (settimeo(client_fd, timeout_ms)) 374 return -1; 375 376 if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { 377 log_err("Failed to get server addr"); 378 return -1; 379 } 380 381 if (connect(client_fd, (const struct sockaddr *)&addr, len)) { 382 log_err("Failed to connect to server"); 383 return -1; 384 } 385 386 return 0; 387 } 388 389 int make_sockaddr(int family, const char *addr_str, __u16 port, 390 struct sockaddr_storage *addr, socklen_t *len) 391 { 392 if (family == AF_INET) { 393 struct sockaddr_in *sin = (void *)addr; 394 395 memset(addr, 0, sizeof(*sin)); 396 sin->sin_family = AF_INET; 397 sin->sin_port = htons(port); 398 if (addr_str && 399 inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) { 400 log_err("inet_pton(AF_INET, %s)", addr_str); 401 return -1; 402 } 403 if (len) 404 *len = sizeof(*sin); 405 return 0; 406 } else if (family == AF_INET6) { 407 struct sockaddr_in6 *sin6 = (void *)addr; 408 409 memset(addr, 0, sizeof(*sin6)); 410 sin6->sin6_family = AF_INET6; 411 sin6->sin6_port = htons(port); 412 if (addr_str && 413 inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) { 414 log_err("inet_pton(AF_INET6, %s)", addr_str); 415 return -1; 416 } 417 if (len) 418 *len = sizeof(*sin6); 419 return 0; 420 } else if (family == AF_UNIX) { 421 /* Note that we always use abstract unix sockets to avoid having 422 * to clean up leftover files. 423 */ 424 struct sockaddr_un *sun = (void *)addr; 425 426 memset(addr, 0, sizeof(*sun)); 427 sun->sun_family = family; 428 sun->sun_path[0] = 0; 429 strcpy(sun->sun_path + 1, addr_str); 430 if (len) 431 *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str); 432 return 0; 433 } 434 return -1; 435 } 436 437 char *ping_command(int family) 438 { 439 if (family == AF_INET6) { 440 /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */ 441 if (!system("which ping6 >/dev/null 2>&1")) 442 return "ping6"; 443 else 444 return "ping -6"; 445 } 446 return "ping"; 447 } 448 449 int remove_netns(const char *name) 450 { 451 char *cmd; 452 int r; 453 454 r = asprintf(&cmd, "ip netns del %s >/dev/null 2>&1", name); 455 if (r < 0) { 456 log_err("Failed to malloc cmd"); 457 return -1; 458 } 459 460 r = system(cmd); 461 free(cmd); 462 return r; 463 } 464 465 int make_netns(const char *name) 466 { 467 char *cmd; 468 int r; 469 470 r = asprintf(&cmd, "ip netns add %s", name); 471 if (r < 0) { 472 log_err("Failed to malloc cmd"); 473 return -1; 474 } 475 476 r = system(cmd); 477 free(cmd); 478 479 if (r) 480 return r; 481 482 r = asprintf(&cmd, "ip -n %s link set lo up", name); 483 if (r < 0) { 484 log_err("Failed to malloc cmd for setting up lo"); 485 remove_netns(name); 486 return -1; 487 } 488 489 r = system(cmd); 490 free(cmd); 491 492 return r; 493 } 494 495 struct nstoken { 496 int orig_netns_fd; 497 }; 498 499 struct nstoken *open_netns(const char *name) 500 { 501 int nsfd; 502 char nspath[PATH_MAX]; 503 int err; 504 struct nstoken *token; 505 506 token = calloc(1, sizeof(struct nstoken)); 507 if (!token) { 508 log_err("Failed to malloc token"); 509 return NULL; 510 } 511 512 token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY); 513 if (token->orig_netns_fd == -1) { 514 log_err("Failed to open(/proc/self/ns/net)"); 515 goto fail; 516 } 517 518 snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); 519 nsfd = open(nspath, O_RDONLY | O_CLOEXEC); 520 if (nsfd == -1) { 521 log_err("Failed to open(%s)", nspath); 522 goto fail; 523 } 524 525 err = setns(nsfd, CLONE_NEWNET); 526 close(nsfd); 527 if (err) { 528 log_err("Failed to setns(nsfd)"); 529 goto fail; 530 } 531 532 return token; 533 fail: 534 if (token->orig_netns_fd != -1) 535 close(token->orig_netns_fd); 536 free(token); 537 return NULL; 538 } 539 540 void close_netns(struct nstoken *token) 541 { 542 if (!token) 543 return; 544 545 if (setns(token->orig_netns_fd, CLONE_NEWNET)) 546 log_err("Failed to setns(orig_netns_fd)"); 547 close(token->orig_netns_fd); 548 free(token); 549 } 550 551 int open_tuntap(const char *dev_name, bool need_mac) 552 { 553 int err = 0; 554 struct ifreq ifr; 555 int fd = open("/dev/net/tun", O_RDWR); 556 557 if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)")) 558 return -1; 559 560 ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); 561 strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); 562 ifr.ifr_name[IFNAMSIZ - 1] = '\0'; 563 564 err = ioctl(fd, TUNSETIFF, &ifr); 565 if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { 566 close(fd); 567 return -1; 568 } 569 570 err = fcntl(fd, F_SETFL, O_NONBLOCK); 571 if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { 572 close(fd); 573 return -1; 574 } 575 576 return fd; 577 } 578 579 int get_socket_local_port(int sock_fd) 580 { 581 struct sockaddr_storage addr; 582 socklen_t addrlen = sizeof(addr); 583 int err; 584 585 err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen); 586 if (err < 0) 587 return err; 588 589 if (addr.ss_family == AF_INET) { 590 struct sockaddr_in *sin = (struct sockaddr_in *)&addr; 591 592 return sin->sin_port; 593 } else if (addr.ss_family == AF_INET6) { 594 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr; 595 596 return sin->sin6_port; 597 } 598 599 return -1; 600 } 601 602 int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) 603 { 604 struct ifreq ifr = {0}; 605 int sockfd, err; 606 607 sockfd = socket(AF_INET, SOCK_DGRAM, 0); 608 if (sockfd < 0) 609 return -errno; 610 611 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 612 613 ring_param->cmd = ETHTOOL_GRINGPARAM; 614 ifr.ifr_data = (char *)ring_param; 615 616 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { 617 err = errno; 618 close(sockfd); 619 return -err; 620 } 621 622 close(sockfd); 623 return 0; 624 } 625 626 int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) 627 { 628 struct ifreq ifr = {0}; 629 int sockfd, err; 630 631 sockfd = socket(AF_INET, SOCK_DGRAM, 0); 632 if (sockfd < 0) 633 return -errno; 634 635 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 636 637 ring_param->cmd = ETHTOOL_SRINGPARAM; 638 ifr.ifr_data = (char *)ring_param; 639 640 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { 641 err = errno; 642 close(sockfd); 643 return -err; 644 } 645 646 close(sockfd); 647 return 0; 648 } 649 650 struct send_recv_arg { 651 int fd; 652 uint32_t bytes; 653 int stop; 654 }; 655 656 static void *send_recv_server(void *arg) 657 { 658 struct send_recv_arg *a = (struct send_recv_arg *)arg; 659 ssize_t nr_sent = 0, bytes = 0; 660 char batch[1500]; 661 int err = 0, fd; 662 663 fd = accept(a->fd, NULL, NULL); 664 while (fd == -1) { 665 if (errno == EINTR) 666 continue; 667 err = -errno; 668 goto done; 669 } 670 671 if (settimeo(fd, 0)) { 672 err = -errno; 673 goto done; 674 } 675 676 while (bytes < a->bytes && !READ_ONCE(a->stop)) { 677 nr_sent = send(fd, &batch, 678 MIN(a->bytes - bytes, sizeof(batch)), 0); 679 if (nr_sent == -1 && errno == EINTR) 680 continue; 681 if (nr_sent == -1) { 682 err = -errno; 683 break; 684 } 685 bytes += nr_sent; 686 } 687 688 if (bytes != a->bytes) { 689 log_err("send %zd expected %u", bytes, a->bytes); 690 if (!err) 691 err = bytes > a->bytes ? -E2BIG : -EINTR; 692 } 693 694 done: 695 if (fd >= 0) 696 close(fd); 697 if (err) { 698 WRITE_ONCE(a->stop, 1); 699 return ERR_PTR(err); 700 } 701 return NULL; 702 } 703 704 int send_recv_data(int lfd, int fd, uint32_t total_bytes) 705 { 706 ssize_t nr_recv = 0, bytes = 0; 707 struct send_recv_arg arg = { 708 .fd = lfd, 709 .bytes = total_bytes, 710 .stop = 0, 711 }; 712 pthread_t srv_thread; 713 void *thread_ret; 714 char batch[1500]; 715 int err = 0; 716 717 err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg); 718 if (err) { 719 log_err("Failed to pthread_create"); 720 return err; 721 } 722 723 /* recv total_bytes */ 724 while (bytes < total_bytes && !READ_ONCE(arg.stop)) { 725 nr_recv = recv(fd, &batch, 726 MIN(total_bytes - bytes, sizeof(batch)), 0); 727 if (nr_recv == -1 && errno == EINTR) 728 continue; 729 if (nr_recv == -1) { 730 err = -errno; 731 break; 732 } 733 bytes += nr_recv; 734 } 735 736 if (bytes != total_bytes) { 737 log_err("recv %zd expected %u", bytes, total_bytes); 738 if (!err) 739 err = bytes > total_bytes ? -E2BIG : -EINTR; 740 } 741 742 WRITE_ONCE(arg.stop, 1); 743 pthread_join(srv_thread, &thread_ret); 744 if (IS_ERR(thread_ret)) { 745 log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret)); 746 err = err ? : PTR_ERR(thread_ret); 747 } 748 749 return err; 750 } 751 752 #ifdef TRAFFIC_MONITOR 753 struct tmonitor_ctx { 754 pcap_t *pcap; 755 pcap_dumper_t *dumper; 756 pthread_t thread; 757 int wake_fd; 758 759 volatile bool done; 760 char pkt_fname[PATH_MAX]; 761 int pcap_fd; 762 }; 763 764 /* Is this packet captured with a Ethernet protocol type? */ 765 static bool is_ethernet(const u_char *packet) 766 { 767 u16 arphdr_type; 768 769 memcpy(&arphdr_type, packet + 8, 2); 770 arphdr_type = ntohs(arphdr_type); 771 772 /* Except the following cases, the protocol type contains the 773 * Ethernet protocol type for the packet. 774 * 775 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html 776 */ 777 switch (arphdr_type) { 778 case 770: /* ARPHRD_FRAD */ 779 case 778: /* ARPHDR_IPGRE */ 780 case 803: /* ARPHRD_IEEE80211_RADIOTAP */ 781 printf("Packet captured: arphdr_type=%d\n", arphdr_type); 782 return false; 783 } 784 return true; 785 } 786 787 static const char * const pkt_types[] = { 788 "In", 789 "B", /* Broadcast */ 790 "M", /* Multicast */ 791 "C", /* Captured with the promiscuous mode */ 792 "Out", 793 }; 794 795 static const char *pkt_type_str(u16 pkt_type) 796 { 797 if (pkt_type < ARRAY_SIZE(pkt_types)) 798 return pkt_types[pkt_type]; 799 return "Unknown"; 800 } 801 802 /* Show the information of the transport layer in the packet */ 803 static void show_transport(const u_char *packet, u16 len, u32 ifindex, 804 const char *src_addr, const char *dst_addr, 805 u16 proto, bool ipv6, u8 pkt_type) 806 { 807 char *ifname, _ifname[IF_NAMESIZE]; 808 const char *transport_str; 809 u16 src_port, dst_port; 810 struct udphdr *udp; 811 struct tcphdr *tcp; 812 813 ifname = if_indextoname(ifindex, _ifname); 814 if (!ifname) { 815 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex); 816 ifname = _ifname; 817 } 818 819 if (proto == IPPROTO_UDP) { 820 udp = (struct udphdr *)packet; 821 src_port = ntohs(udp->source); 822 dst_port = ntohs(udp->dest); 823 transport_str = "UDP"; 824 } else if (proto == IPPROTO_TCP) { 825 tcp = (struct tcphdr *)packet; 826 src_port = ntohs(tcp->source); 827 dst_port = ntohs(tcp->dest); 828 transport_str = "TCP"; 829 } else if (proto == IPPROTO_ICMP) { 830 printf("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n", 831 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 832 packet[0], packet[1]); 833 return; 834 } else if (proto == IPPROTO_ICMPV6) { 835 printf("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n", 836 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 837 packet[0], packet[1]); 838 return; 839 } else { 840 printf("%-7s %-3s %s %s > %s: protocol %d\n", 841 ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4", 842 src_addr, dst_addr, proto); 843 return; 844 } 845 846 /* TCP or UDP*/ 847 848 flockfile(stdout); 849 if (ipv6) 850 printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d", 851 ifname, pkt_type_str(pkt_type), src_addr, src_port, 852 dst_addr, dst_port, transport_str, len); 853 else 854 printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d", 855 ifname, pkt_type_str(pkt_type), src_addr, src_port, 856 dst_addr, dst_port, transport_str, len); 857 858 if (proto == IPPROTO_TCP) { 859 if (tcp->fin) 860 printf(", FIN"); 861 if (tcp->syn) 862 printf(", SYN"); 863 if (tcp->rst) 864 printf(", RST"); 865 if (tcp->ack) 866 printf(", ACK"); 867 } 868 869 printf("\n"); 870 funlockfile(stdout); 871 } 872 873 static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type) 874 { 875 char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN]; 876 struct ipv6hdr *pkt = (struct ipv6hdr *)packet; 877 const char *src, *dst; 878 u_char proto; 879 880 src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf)); 881 if (!src) 882 src = "<invalid>"; 883 dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf)); 884 if (!dst) 885 dst = "<invalid>"; 886 proto = pkt->nexthdr; 887 show_transport(packet + sizeof(struct ipv6hdr), 888 ntohs(pkt->payload_len), 889 ifindex, src, dst, proto, true, pkt_type); 890 } 891 892 static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type) 893 { 894 char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN]; 895 struct iphdr *pkt = (struct iphdr *)packet; 896 const char *src, *dst; 897 u_char proto; 898 899 src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf)); 900 if (!src) 901 src = "<invalid>"; 902 dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf)); 903 if (!dst) 904 dst = "<invalid>"; 905 proto = pkt->protocol; 906 show_transport(packet + sizeof(struct iphdr), 907 ntohs(pkt->tot_len), 908 ifindex, src, dst, proto, false, pkt_type); 909 } 910 911 static void *traffic_monitor_thread(void *arg) 912 { 913 char *ifname, _ifname[IF_NAMESIZE]; 914 const u_char *packet, *payload; 915 struct tmonitor_ctx *ctx = arg; 916 pcap_dumper_t *dumper = ctx->dumper; 917 int fd = ctx->pcap_fd, nfds, r; 918 int wake_fd = ctx->wake_fd; 919 struct pcap_pkthdr header; 920 pcap_t *pcap = ctx->pcap; 921 u32 ifindex; 922 fd_set fds; 923 u16 proto; 924 u8 ptype; 925 926 nfds = (fd > wake_fd ? fd : wake_fd) + 1; 927 FD_ZERO(&fds); 928 929 while (!ctx->done) { 930 FD_SET(fd, &fds); 931 FD_SET(wake_fd, &fds); 932 r = select(nfds, &fds, NULL, NULL, NULL); 933 if (!r) 934 continue; 935 if (r < 0) { 936 if (errno == EINTR) 937 continue; 938 log_err("Fail to select on pcap fd and wake fd"); 939 break; 940 } 941 942 /* This instance of pcap is non-blocking */ 943 packet = pcap_next(pcap, &header); 944 if (!packet) 945 continue; 946 947 /* According to the man page of pcap_dump(), first argument 948 * is the pcap_dumper_t pointer even it's argument type is 949 * u_char *. 950 */ 951 pcap_dump((u_char *)dumper, &header, packet); 952 953 /* Not sure what other types of packets look like. Here, we 954 * parse only Ethernet and compatible packets. 955 */ 956 if (!is_ethernet(packet)) 957 continue; 958 959 /* Skip SLL2 header 960 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html 961 * 962 * Although the document doesn't mention that, the payload 963 * doesn't include the Ethernet header. The payload starts 964 * from the first byte of the network layer header. 965 */ 966 payload = packet + 20; 967 968 memcpy(&proto, packet, 2); 969 proto = ntohs(proto); 970 memcpy(&ifindex, packet + 4, 4); 971 ifindex = ntohl(ifindex); 972 ptype = packet[10]; 973 974 if (proto == ETH_P_IPV6) { 975 show_ipv6_packet(payload, ifindex, ptype); 976 } else if (proto == ETH_P_IP) { 977 show_ipv4_packet(payload, ifindex, ptype); 978 } else { 979 ifname = if_indextoname(ifindex, _ifname); 980 if (!ifname) { 981 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex); 982 ifname = _ifname; 983 } 984 985 printf("%-7s %-3s Unknown network protocol type 0x%x\n", 986 ifname, pkt_type_str(ptype), proto); 987 } 988 } 989 990 return NULL; 991 } 992 993 /* Prepare the pcap handle to capture packets. 994 * 995 * This pcap is non-blocking and immediate mode is enabled to receive 996 * captured packets as soon as possible. The snaplen is set to 1024 bytes 997 * to limit the size of captured content. The format of the link-layer 998 * header is set to DLT_LINUX_SLL2 to enable handling various link-layer 999 * technologies. 1000 */ 1001 static pcap_t *traffic_monitor_prepare_pcap(void) 1002 { 1003 char errbuf[PCAP_ERRBUF_SIZE]; 1004 pcap_t *pcap; 1005 int r; 1006 1007 /* Listen on all NICs in the namespace */ 1008 pcap = pcap_create("any", errbuf); 1009 if (!pcap) { 1010 log_err("Failed to open pcap: %s", errbuf); 1011 return NULL; 1012 } 1013 /* Limit the size of the packet (first N bytes) */ 1014 r = pcap_set_snaplen(pcap, 1024); 1015 if (r) { 1016 log_err("Failed to set snaplen: %s", pcap_geterr(pcap)); 1017 goto error; 1018 } 1019 /* To receive packets as fast as possible */ 1020 r = pcap_set_immediate_mode(pcap, 1); 1021 if (r) { 1022 log_err("Failed to set immediate mode: %s", pcap_geterr(pcap)); 1023 goto error; 1024 } 1025 r = pcap_setnonblock(pcap, 1, errbuf); 1026 if (r) { 1027 log_err("Failed to set nonblock: %s", errbuf); 1028 goto error; 1029 } 1030 r = pcap_activate(pcap); 1031 if (r) { 1032 log_err("Failed to activate pcap: %s", pcap_geterr(pcap)); 1033 goto error; 1034 } 1035 /* Determine the format of the link-layer header */ 1036 r = pcap_set_datalink(pcap, DLT_LINUX_SLL2); 1037 if (r) { 1038 log_err("Failed to set datalink: %s", pcap_geterr(pcap)); 1039 goto error; 1040 } 1041 1042 return pcap; 1043 error: 1044 pcap_close(pcap); 1045 return NULL; 1046 } 1047 1048 static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name) 1049 { 1050 char *p; 1051 1052 if (subtest_name) 1053 snprintf(buf, len, "%s__%s", test_name, subtest_name); 1054 else 1055 snprintf(buf, len, "%s", test_name); 1056 while ((p = strchr(buf, '/'))) 1057 *p = '_'; 1058 while ((p = strchr(buf, ' '))) 1059 *p = '_'; 1060 } 1061 1062 #define PCAP_DIR "/tmp/tmon_pcap" 1063 1064 /* Start to monitor the network traffic in the given network namespace. 1065 * 1066 * netns: the name of the network namespace to monitor. If NULL, the 1067 * current network namespace is monitored. 1068 * test_name: the name of the running test. 1069 * subtest_name: the name of the running subtest if there is. It should be 1070 * NULL if it is not a subtest. 1071 * 1072 * This function will start a thread to capture packets going through NICs 1073 * in the give network namespace. 1074 */ 1075 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, 1076 const char *subtest_name) 1077 { 1078 struct nstoken *nstoken = NULL; 1079 struct tmonitor_ctx *ctx; 1080 char test_name_buf[64]; 1081 static int tmon_seq; 1082 int r; 1083 1084 if (netns) { 1085 nstoken = open_netns(netns); 1086 if (!nstoken) 1087 return NULL; 1088 } 1089 ctx = malloc(sizeof(*ctx)); 1090 if (!ctx) { 1091 log_err("Failed to malloc ctx"); 1092 goto fail_ctx; 1093 } 1094 memset(ctx, 0, sizeof(*ctx)); 1095 1096 encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name); 1097 snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname), 1098 PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++, 1099 test_name_buf, netns ? netns : "unknown"); 1100 1101 r = mkdir(PCAP_DIR, 0755); 1102 if (r && errno != EEXIST) { 1103 log_err("Failed to create " PCAP_DIR); 1104 goto fail_pcap; 1105 } 1106 1107 ctx->pcap = traffic_monitor_prepare_pcap(); 1108 if (!ctx->pcap) 1109 goto fail_pcap; 1110 ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap); 1111 if (ctx->pcap_fd < 0) { 1112 log_err("Failed to get pcap fd"); 1113 goto fail_dumper; 1114 } 1115 1116 /* Create a packet file */ 1117 ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname); 1118 if (!ctx->dumper) { 1119 log_err("Failed to open pcap dump: %s", ctx->pkt_fname); 1120 goto fail_dumper; 1121 } 1122 1123 /* Create an eventfd to wake up the monitor thread */ 1124 ctx->wake_fd = eventfd(0, 0); 1125 if (ctx->wake_fd < 0) { 1126 log_err("Failed to create eventfd"); 1127 goto fail_eventfd; 1128 } 1129 1130 r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx); 1131 if (r) { 1132 log_err("Failed to create thread"); 1133 goto fail; 1134 } 1135 1136 close_netns(nstoken); 1137 1138 return ctx; 1139 1140 fail: 1141 close(ctx->wake_fd); 1142 1143 fail_eventfd: 1144 pcap_dump_close(ctx->dumper); 1145 unlink(ctx->pkt_fname); 1146 1147 fail_dumper: 1148 pcap_close(ctx->pcap); 1149 1150 fail_pcap: 1151 free(ctx); 1152 1153 fail_ctx: 1154 close_netns(nstoken); 1155 1156 return NULL; 1157 } 1158 1159 static void traffic_monitor_release(struct tmonitor_ctx *ctx) 1160 { 1161 pcap_close(ctx->pcap); 1162 pcap_dump_close(ctx->dumper); 1163 1164 close(ctx->wake_fd); 1165 1166 free(ctx); 1167 } 1168 1169 /* Stop the network traffic monitor. 1170 * 1171 * ctx: the context returned by traffic_monitor_start() 1172 */ 1173 void traffic_monitor_stop(struct tmonitor_ctx *ctx) 1174 { 1175 __u64 w = 1; 1176 1177 if (!ctx) 1178 return; 1179 1180 /* Stop the monitor thread */ 1181 ctx->done = true; 1182 /* Wake up the background thread. */ 1183 write(ctx->wake_fd, &w, sizeof(w)); 1184 pthread_join(ctx->thread, NULL); 1185 1186 printf("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1); 1187 1188 traffic_monitor_release(ctx); 1189 } 1190 #endif /* TRAFFIC_MONITOR */ 1191