1 // SPDX-License-Identifier: GPL-2.0-only 2 #define _GNU_SOURCE 3 4 #include <errno.h> 5 #include <stdbool.h> 6 #include <stdio.h> 7 #include <string.h> 8 #include <unistd.h> 9 #include <sched.h> 10 11 #include <arpa/inet.h> 12 #include <sys/mount.h> 13 #include <sys/stat.h> 14 #include <sys/types.h> 15 #include <sys/un.h> 16 #include <sys/eventfd.h> 17 18 #include <linux/err.h> 19 #include <linux/in.h> 20 #include <linux/in6.h> 21 #include <linux/limits.h> 22 23 #include <linux/ip.h> 24 #include <netinet/udp.h> 25 #include <netinet/tcp.h> 26 #include <net/if.h> 27 28 #include "bpf_util.h" 29 #include "network_helpers.h" 30 #include "test_progs.h" 31 32 #ifdef TRAFFIC_MONITOR 33 /* Prevent pcap.h from including pcap/bpf.h and causing conflicts */ 34 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 35 #include <pcap/pcap.h> 36 #include <pcap/dlt.h> 37 #endif 38 39 #ifndef IPPROTO_MPTCP 40 #define IPPROTO_MPTCP 262 41 #endif 42 43 #define clean_errno() (errno == 0 ? "None" : strerror(errno)) 44 #define log_err(MSG, ...) ({ \ 45 int __save = errno; \ 46 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ 47 __FILE__, __LINE__, clean_errno(), \ 48 ##__VA_ARGS__); \ 49 errno = __save; \ 50 }) 51 52 struct ipv4_packet pkt_v4 = { 53 .eth.h_proto = __bpf_constant_htons(ETH_P_IP), 54 .iph.ihl = 5, 55 .iph.protocol = IPPROTO_TCP, 56 .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), 57 .tcp.urg_ptr = 123, 58 .tcp.doff = 5, 59 }; 60 61 struct ipv6_packet pkt_v6 = { 62 .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), 63 .iph.nexthdr = IPPROTO_TCP, 64 .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), 65 .tcp.urg_ptr = 123, 66 .tcp.doff = 5, 67 }; 68 69 static const struct network_helper_opts default_opts; 70 71 int settimeo(int fd, int timeout_ms) 72 { 73 struct timeval timeout = { .tv_sec = 3 }; 74 75 if (timeout_ms > 0) { 76 timeout.tv_sec = timeout_ms / 1000; 77 timeout.tv_usec = (timeout_ms % 1000) * 1000; 78 } 79 80 if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, 81 sizeof(timeout))) { 82 log_err("Failed to set SO_RCVTIMEO"); 83 return -1; 84 } 85 86 if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout, 87 sizeof(timeout))) { 88 log_err("Failed to set SO_SNDTIMEO"); 89 return -1; 90 } 91 92 return 0; 93 } 94 95 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) 96 97 int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, 98 const struct network_helper_opts *opts) 99 { 100 int fd; 101 102 if (!opts) 103 opts = &default_opts; 104 105 fd = socket(addr->ss_family, type, opts->proto); 106 if (fd < 0) { 107 log_err("Failed to create server socket"); 108 return -1; 109 } 110 111 if (settimeo(fd, opts->timeout_ms)) 112 goto error_close; 113 114 if (opts->post_socket_cb && 115 opts->post_socket_cb(fd, opts->cb_opts)) { 116 log_err("Failed to call post_socket_cb"); 117 goto error_close; 118 } 119 120 if (bind(fd, (struct sockaddr *)addr, addrlen) < 0) { 121 log_err("Failed to bind socket"); 122 goto error_close; 123 } 124 125 if (type == SOCK_STREAM) { 126 if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) { 127 log_err("Failed to listed on socket"); 128 goto error_close; 129 } 130 } 131 132 return fd; 133 134 error_close: 135 save_errno_close(fd); 136 return -1; 137 } 138 139 int start_server_str(int family, int type, const char *addr_str, __u16 port, 140 const struct network_helper_opts *opts) 141 { 142 struct sockaddr_storage addr; 143 socklen_t addrlen; 144 145 if (!opts) 146 opts = &default_opts; 147 148 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 149 return -1; 150 151 return start_server_addr(type, &addr, addrlen, opts); 152 } 153 154 int start_server(int family, int type, const char *addr_str, __u16 port, 155 int timeout_ms) 156 { 157 struct network_helper_opts opts = { 158 .timeout_ms = timeout_ms, 159 }; 160 161 return start_server_str(family, type, addr_str, port, &opts); 162 } 163 164 static int reuseport_cb(int fd, void *opts) 165 { 166 int on = 1; 167 168 return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on)); 169 } 170 171 int *start_reuseport_server(int family, int type, const char *addr_str, 172 __u16 port, int timeout_ms, unsigned int nr_listens) 173 { 174 struct network_helper_opts opts = { 175 .timeout_ms = timeout_ms, 176 .post_socket_cb = reuseport_cb, 177 }; 178 struct sockaddr_storage addr; 179 unsigned int nr_fds = 0; 180 socklen_t addrlen; 181 int *fds; 182 183 if (!nr_listens) 184 return NULL; 185 186 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 187 return NULL; 188 189 fds = malloc(sizeof(*fds) * nr_listens); 190 if (!fds) 191 return NULL; 192 193 fds[0] = start_server_addr(type, &addr, addrlen, &opts); 194 if (fds[0] == -1) 195 goto close_fds; 196 nr_fds = 1; 197 198 if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen)) 199 goto close_fds; 200 201 for (; nr_fds < nr_listens; nr_fds++) { 202 fds[nr_fds] = start_server_addr(type, &addr, addrlen, &opts); 203 if (fds[nr_fds] == -1) 204 goto close_fds; 205 } 206 207 return fds; 208 209 close_fds: 210 free_fds(fds, nr_fds); 211 return NULL; 212 } 213 214 void free_fds(int *fds, unsigned int nr_close_fds) 215 { 216 if (fds) { 217 while (nr_close_fds) 218 close(fds[--nr_close_fds]); 219 free(fds); 220 } 221 } 222 223 int fastopen_connect(int server_fd, const char *data, unsigned int data_len, 224 int timeout_ms) 225 { 226 struct sockaddr_storage addr; 227 socklen_t addrlen = sizeof(addr); 228 struct sockaddr_in *addr_in; 229 int fd, ret; 230 231 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { 232 log_err("Failed to get server addr"); 233 return -1; 234 } 235 236 addr_in = (struct sockaddr_in *)&addr; 237 fd = socket(addr_in->sin_family, SOCK_STREAM, 0); 238 if (fd < 0) { 239 log_err("Failed to create client socket"); 240 return -1; 241 } 242 243 if (settimeo(fd, timeout_ms)) 244 goto error_close; 245 246 ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr, 247 addrlen); 248 if (ret != data_len) { 249 log_err("sendto(data, %u) != %d\n", data_len, ret); 250 goto error_close; 251 } 252 253 return fd; 254 255 error_close: 256 save_errno_close(fd); 257 return -1; 258 } 259 260 int client_socket(int family, int type, 261 const struct network_helper_opts *opts) 262 { 263 int fd; 264 265 if (!opts) 266 opts = &default_opts; 267 268 fd = socket(family, type, opts->proto); 269 if (fd < 0) { 270 log_err("Failed to create client socket"); 271 return -1; 272 } 273 274 if (settimeo(fd, opts->timeout_ms)) 275 goto error_close; 276 277 if (opts->post_socket_cb && 278 opts->post_socket_cb(fd, opts->cb_opts)) 279 goto error_close; 280 281 return fd; 282 283 error_close: 284 save_errno_close(fd); 285 return -1; 286 } 287 288 int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, 289 const struct network_helper_opts *opts) 290 { 291 int fd; 292 293 if (!opts) 294 opts = &default_opts; 295 296 fd = client_socket(addr->ss_family, type, opts); 297 if (fd < 0) { 298 log_err("Failed to create client socket"); 299 return -1; 300 } 301 302 if (connect(fd, (const struct sockaddr *)addr, addrlen)) { 303 log_err("Failed to connect to server"); 304 save_errno_close(fd); 305 return -1; 306 } 307 308 return fd; 309 } 310 311 int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port, 312 const struct network_helper_opts *opts) 313 { 314 struct sockaddr_storage addr; 315 socklen_t addrlen; 316 317 if (!opts) 318 opts = &default_opts; 319 320 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 321 return -1; 322 323 return connect_to_addr(type, &addr, addrlen, opts); 324 } 325 326 int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) 327 { 328 struct sockaddr_storage addr; 329 socklen_t addrlen, optlen; 330 int type; 331 332 if (!opts) 333 opts = &default_opts; 334 335 optlen = sizeof(type); 336 if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { 337 log_err("getsockopt(SOL_TYPE)"); 338 return -1; 339 } 340 341 addrlen = sizeof(addr); 342 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { 343 log_err("Failed to get server addr"); 344 return -1; 345 } 346 347 return connect_to_addr(type, &addr, addrlen, opts); 348 } 349 350 int connect_to_fd(int server_fd, int timeout_ms) 351 { 352 struct network_helper_opts opts = { 353 .timeout_ms = timeout_ms, 354 }; 355 socklen_t optlen; 356 int protocol; 357 358 optlen = sizeof(protocol); 359 if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { 360 log_err("getsockopt(SOL_PROTOCOL)"); 361 return -1; 362 } 363 opts.proto = protocol; 364 365 return connect_to_fd_opts(server_fd, &opts); 366 } 367 368 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) 369 { 370 struct sockaddr_storage addr; 371 socklen_t len = sizeof(addr); 372 373 if (settimeo(client_fd, timeout_ms)) 374 return -1; 375 376 if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { 377 log_err("Failed to get server addr"); 378 return -1; 379 } 380 381 if (connect(client_fd, (const struct sockaddr *)&addr, len)) { 382 log_err("Failed to connect to server"); 383 return -1; 384 } 385 386 return 0; 387 } 388 389 int make_sockaddr(int family, const char *addr_str, __u16 port, 390 struct sockaddr_storage *addr, socklen_t *len) 391 { 392 if (family == AF_INET) { 393 struct sockaddr_in *sin = (void *)addr; 394 395 memset(addr, 0, sizeof(*sin)); 396 sin->sin_family = AF_INET; 397 sin->sin_port = htons(port); 398 if (addr_str && 399 inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) { 400 log_err("inet_pton(AF_INET, %s)", addr_str); 401 return -1; 402 } 403 if (len) 404 *len = sizeof(*sin); 405 return 0; 406 } else if (family == AF_INET6) { 407 struct sockaddr_in6 *sin6 = (void *)addr; 408 409 memset(addr, 0, sizeof(*sin6)); 410 sin6->sin6_family = AF_INET6; 411 sin6->sin6_port = htons(port); 412 if (addr_str && 413 inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) { 414 log_err("inet_pton(AF_INET6, %s)", addr_str); 415 return -1; 416 } 417 if (len) 418 *len = sizeof(*sin6); 419 return 0; 420 } else if (family == AF_UNIX) { 421 /* Note that we always use abstract unix sockets to avoid having 422 * to clean up leftover files. 423 */ 424 struct sockaddr_un *sun = (void *)addr; 425 426 memset(addr, 0, sizeof(*sun)); 427 sun->sun_family = family; 428 sun->sun_path[0] = 0; 429 strcpy(sun->sun_path + 1, addr_str); 430 if (len) 431 *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str); 432 return 0; 433 } 434 return -1; 435 } 436 437 char *ping_command(int family) 438 { 439 if (family == AF_INET6) { 440 /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */ 441 if (!system("which ping6 >/dev/null 2>&1")) 442 return "ping6"; 443 else 444 return "ping -6"; 445 } 446 return "ping"; 447 } 448 449 int append_tid(char *str, size_t sz) 450 { 451 size_t end; 452 453 if (!str) 454 return -1; 455 456 end = strlen(str); 457 if (end + 8 > sz) 458 return -1; 459 460 sprintf(&str[end], "%07d", gettid()); 461 str[end + 7] = '\0'; 462 463 return 0; 464 } 465 466 int remove_netns(const char *name) 467 { 468 char *cmd; 469 int r; 470 471 r = asprintf(&cmd, "ip netns del %s >/dev/null 2>&1", name); 472 if (r < 0) { 473 log_err("Failed to malloc cmd"); 474 return -1; 475 } 476 477 r = system(cmd); 478 free(cmd); 479 return r; 480 } 481 482 int make_netns(const char *name) 483 { 484 char *cmd; 485 int r; 486 487 r = asprintf(&cmd, "ip netns add %s", name); 488 if (r < 0) { 489 log_err("Failed to malloc cmd"); 490 return -1; 491 } 492 493 r = system(cmd); 494 free(cmd); 495 496 if (r) 497 return r; 498 499 r = asprintf(&cmd, "ip -n %s link set lo up", name); 500 if (r < 0) { 501 log_err("Failed to malloc cmd for setting up lo"); 502 remove_netns(name); 503 return -1; 504 } 505 506 r = system(cmd); 507 free(cmd); 508 509 return r; 510 } 511 512 struct nstoken { 513 int orig_netns_fd; 514 }; 515 516 struct nstoken *open_netns(const char *name) 517 { 518 int nsfd; 519 char nspath[PATH_MAX]; 520 int err; 521 struct nstoken *token; 522 523 token = calloc(1, sizeof(struct nstoken)); 524 if (!token) { 525 log_err("Failed to malloc token"); 526 return NULL; 527 } 528 529 token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY); 530 if (token->orig_netns_fd == -1) { 531 log_err("Failed to open(/proc/self/ns/net)"); 532 goto fail; 533 } 534 535 snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); 536 nsfd = open(nspath, O_RDONLY | O_CLOEXEC); 537 if (nsfd == -1) { 538 log_err("Failed to open(%s)", nspath); 539 goto fail; 540 } 541 542 err = setns(nsfd, CLONE_NEWNET); 543 close(nsfd); 544 if (err) { 545 log_err("Failed to setns(nsfd)"); 546 goto fail; 547 } 548 549 return token; 550 fail: 551 if (token->orig_netns_fd != -1) 552 close(token->orig_netns_fd); 553 free(token); 554 return NULL; 555 } 556 557 void close_netns(struct nstoken *token) 558 { 559 if (!token) 560 return; 561 562 if (setns(token->orig_netns_fd, CLONE_NEWNET)) 563 log_err("Failed to setns(orig_netns_fd)"); 564 close(token->orig_netns_fd); 565 free(token); 566 } 567 568 int get_socket_local_port(int sock_fd) 569 { 570 struct sockaddr_storage addr; 571 socklen_t addrlen = sizeof(addr); 572 int err; 573 574 err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen); 575 if (err < 0) 576 return err; 577 578 if (addr.ss_family == AF_INET) { 579 struct sockaddr_in *sin = (struct sockaddr_in *)&addr; 580 581 return sin->sin_port; 582 } else if (addr.ss_family == AF_INET6) { 583 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr; 584 585 return sin->sin6_port; 586 } 587 588 return -1; 589 } 590 591 int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) 592 { 593 struct ifreq ifr = {0}; 594 int sockfd, err; 595 596 sockfd = socket(AF_INET, SOCK_DGRAM, 0); 597 if (sockfd < 0) 598 return -errno; 599 600 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 601 602 ring_param->cmd = ETHTOOL_GRINGPARAM; 603 ifr.ifr_data = (char *)ring_param; 604 605 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { 606 err = errno; 607 close(sockfd); 608 return -err; 609 } 610 611 close(sockfd); 612 return 0; 613 } 614 615 int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) 616 { 617 struct ifreq ifr = {0}; 618 int sockfd, err; 619 620 sockfd = socket(AF_INET, SOCK_DGRAM, 0); 621 if (sockfd < 0) 622 return -errno; 623 624 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 625 626 ring_param->cmd = ETHTOOL_SRINGPARAM; 627 ifr.ifr_data = (char *)ring_param; 628 629 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { 630 err = errno; 631 close(sockfd); 632 return -err; 633 } 634 635 close(sockfd); 636 return 0; 637 } 638 639 struct send_recv_arg { 640 int fd; 641 uint32_t bytes; 642 int stop; 643 }; 644 645 static void *send_recv_server(void *arg) 646 { 647 struct send_recv_arg *a = (struct send_recv_arg *)arg; 648 ssize_t nr_sent = 0, bytes = 0; 649 char batch[1500]; 650 int err = 0, fd; 651 652 fd = accept(a->fd, NULL, NULL); 653 while (fd == -1) { 654 if (errno == EINTR) 655 continue; 656 err = -errno; 657 goto done; 658 } 659 660 if (settimeo(fd, 0)) { 661 err = -errno; 662 goto done; 663 } 664 665 while (bytes < a->bytes && !READ_ONCE(a->stop)) { 666 nr_sent = send(fd, &batch, 667 MIN(a->bytes - bytes, sizeof(batch)), 0); 668 if (nr_sent == -1 && errno == EINTR) 669 continue; 670 if (nr_sent == -1) { 671 err = -errno; 672 break; 673 } 674 bytes += nr_sent; 675 } 676 677 if (bytes != a->bytes) { 678 log_err("send %zd expected %u", bytes, a->bytes); 679 if (!err) 680 err = bytes > a->bytes ? -E2BIG : -EINTR; 681 } 682 683 done: 684 if (fd >= 0) 685 close(fd); 686 if (err) { 687 WRITE_ONCE(a->stop, 1); 688 return ERR_PTR(err); 689 } 690 return NULL; 691 } 692 693 int send_recv_data(int lfd, int fd, uint32_t total_bytes) 694 { 695 ssize_t nr_recv = 0, bytes = 0; 696 struct send_recv_arg arg = { 697 .fd = lfd, 698 .bytes = total_bytes, 699 .stop = 0, 700 }; 701 pthread_t srv_thread; 702 void *thread_ret; 703 char batch[1500]; 704 int err = 0; 705 706 err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg); 707 if (err) { 708 log_err("Failed to pthread_create"); 709 return err; 710 } 711 712 /* recv total_bytes */ 713 while (bytes < total_bytes && !READ_ONCE(arg.stop)) { 714 nr_recv = recv(fd, &batch, 715 MIN(total_bytes - bytes, sizeof(batch)), 0); 716 if (nr_recv == -1 && errno == EINTR) 717 continue; 718 if (nr_recv == -1) { 719 err = -errno; 720 break; 721 } 722 bytes += nr_recv; 723 } 724 725 if (bytes != total_bytes) { 726 log_err("recv %zd expected %u", bytes, total_bytes); 727 if (!err) 728 err = bytes > total_bytes ? -E2BIG : -EINTR; 729 } 730 731 WRITE_ONCE(arg.stop, 1); 732 pthread_join(srv_thread, &thread_ret); 733 if (IS_ERR(thread_ret)) { 734 log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret)); 735 err = err ? : PTR_ERR(thread_ret); 736 } 737 738 return err; 739 } 740 741 #ifdef TRAFFIC_MONITOR 742 struct tmonitor_ctx { 743 pcap_t *pcap; 744 pcap_dumper_t *dumper; 745 pthread_t thread; 746 int wake_fd; 747 748 volatile bool done; 749 char pkt_fname[PATH_MAX]; 750 int pcap_fd; 751 }; 752 753 static int __base_pr(const char *format, va_list args) 754 { 755 return vfprintf(stdout, format, args); 756 } 757 758 static tm_print_fn_t __tm_pr = __base_pr; 759 760 tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn) 761 { 762 tm_print_fn_t old_print_fn; 763 764 old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED); 765 766 return old_print_fn; 767 } 768 769 void tm_print(const char *format, ...) 770 { 771 tm_print_fn_t print_fn; 772 va_list args; 773 774 print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED); 775 if (!print_fn) 776 return; 777 778 va_start(args, format); 779 print_fn(format, args); 780 va_end(args); 781 } 782 783 /* Is this packet captured with a Ethernet protocol type? */ 784 static bool is_ethernet(const u_char *packet) 785 { 786 u16 arphdr_type; 787 788 memcpy(&arphdr_type, packet + 8, 2); 789 arphdr_type = ntohs(arphdr_type); 790 791 /* Except the following cases, the protocol type contains the 792 * Ethernet protocol type for the packet. 793 * 794 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html 795 */ 796 switch (arphdr_type) { 797 case 770: /* ARPHRD_FRAD */ 798 case 778: /* ARPHDR_IPGRE */ 799 case 803: /* ARPHRD_IEEE80211_RADIOTAP */ 800 tm_print("Packet captured: arphdr_type=%d\n", arphdr_type); 801 return false; 802 } 803 return true; 804 } 805 806 static const char * const pkt_types[] = { 807 "In", 808 "B", /* Broadcast */ 809 "M", /* Multicast */ 810 "C", /* Captured with the promiscuous mode */ 811 "Out", 812 }; 813 814 static const char *pkt_type_str(u16 pkt_type) 815 { 816 if (pkt_type < ARRAY_SIZE(pkt_types)) 817 return pkt_types[pkt_type]; 818 return "Unknown"; 819 } 820 821 #define MAX_FLAGS_STRLEN 21 822 /* Show the information of the transport layer in the packet */ 823 static void show_transport(const u_char *packet, u16 len, u32 ifindex, 824 const char *src_addr, const char *dst_addr, 825 u16 proto, bool ipv6, u8 pkt_type) 826 { 827 char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = ""; 828 const char *transport_str; 829 u16 src_port, dst_port; 830 struct udphdr *udp; 831 struct tcphdr *tcp; 832 833 ifname = if_indextoname(ifindex, _ifname); 834 if (!ifname) { 835 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex); 836 ifname = _ifname; 837 } 838 839 if (proto == IPPROTO_UDP) { 840 udp = (struct udphdr *)packet; 841 src_port = ntohs(udp->source); 842 dst_port = ntohs(udp->dest); 843 transport_str = "UDP"; 844 } else if (proto == IPPROTO_TCP) { 845 tcp = (struct tcphdr *)packet; 846 src_port = ntohs(tcp->source); 847 dst_port = ntohs(tcp->dest); 848 transport_str = "TCP"; 849 } else if (proto == IPPROTO_ICMP) { 850 tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n", 851 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 852 packet[0], packet[1]); 853 return; 854 } else if (proto == IPPROTO_ICMPV6) { 855 tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n", 856 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 857 packet[0], packet[1]); 858 return; 859 } else { 860 tm_print("%-7s %-3s %s %s > %s: protocol %d\n", 861 ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4", 862 src_addr, dst_addr, proto); 863 return; 864 } 865 866 /* TCP or UDP*/ 867 868 if (proto == IPPROTO_TCP) 869 snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s", 870 tcp->fin ? ", FIN" : "", 871 tcp->syn ? ", SYN" : "", 872 tcp->rst ? ", RST" : "", 873 tcp->ack ? ", ACK" : ""); 874 875 if (ipv6) 876 tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n", 877 ifname, pkt_type_str(pkt_type), src_addr, src_port, 878 dst_addr, dst_port, transport_str, len, flags); 879 else 880 tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n", 881 ifname, pkt_type_str(pkt_type), src_addr, src_port, 882 dst_addr, dst_port, transport_str, len, flags); 883 } 884 885 static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type) 886 { 887 char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN]; 888 struct ipv6hdr *pkt = (struct ipv6hdr *)packet; 889 const char *src, *dst; 890 u_char proto; 891 892 src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf)); 893 if (!src) 894 src = "<invalid>"; 895 dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf)); 896 if (!dst) 897 dst = "<invalid>"; 898 proto = pkt->nexthdr; 899 show_transport(packet + sizeof(struct ipv6hdr), 900 ntohs(pkt->payload_len), 901 ifindex, src, dst, proto, true, pkt_type); 902 } 903 904 static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type) 905 { 906 char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN]; 907 struct iphdr *pkt = (struct iphdr *)packet; 908 const char *src, *dst; 909 u_char proto; 910 911 src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf)); 912 if (!src) 913 src = "<invalid>"; 914 dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf)); 915 if (!dst) 916 dst = "<invalid>"; 917 proto = pkt->protocol; 918 show_transport(packet + sizeof(struct iphdr), 919 ntohs(pkt->tot_len), 920 ifindex, src, dst, proto, false, pkt_type); 921 } 922 923 static void *traffic_monitor_thread(void *arg) 924 { 925 char *ifname, _ifname[IF_NAMESIZE]; 926 const u_char *packet, *payload; 927 struct tmonitor_ctx *ctx = arg; 928 pcap_dumper_t *dumper = ctx->dumper; 929 int fd = ctx->pcap_fd, nfds, r; 930 int wake_fd = ctx->wake_fd; 931 struct pcap_pkthdr header; 932 pcap_t *pcap = ctx->pcap; 933 u32 ifindex; 934 fd_set fds; 935 u16 proto; 936 u8 ptype; 937 938 nfds = (fd > wake_fd ? fd : wake_fd) + 1; 939 FD_ZERO(&fds); 940 941 while (!ctx->done) { 942 FD_SET(fd, &fds); 943 FD_SET(wake_fd, &fds); 944 r = select(nfds, &fds, NULL, NULL, NULL); 945 if (!r) 946 continue; 947 if (r < 0) { 948 if (errno == EINTR) 949 continue; 950 log_err("Fail to select on pcap fd and wake fd"); 951 break; 952 } 953 954 /* This instance of pcap is non-blocking */ 955 packet = pcap_next(pcap, &header); 956 if (!packet) 957 continue; 958 959 /* According to the man page of pcap_dump(), first argument 960 * is the pcap_dumper_t pointer even it's argument type is 961 * u_char *. 962 */ 963 pcap_dump((u_char *)dumper, &header, packet); 964 965 /* Not sure what other types of packets look like. Here, we 966 * parse only Ethernet and compatible packets. 967 */ 968 if (!is_ethernet(packet)) 969 continue; 970 971 /* Skip SLL2 header 972 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html 973 * 974 * Although the document doesn't mention that, the payload 975 * doesn't include the Ethernet header. The payload starts 976 * from the first byte of the network layer header. 977 */ 978 payload = packet + 20; 979 980 memcpy(&proto, packet, 2); 981 proto = ntohs(proto); 982 memcpy(&ifindex, packet + 4, 4); 983 ifindex = ntohl(ifindex); 984 ptype = packet[10]; 985 986 if (proto == ETH_P_IPV6) { 987 show_ipv6_packet(payload, ifindex, ptype); 988 } else if (proto == ETH_P_IP) { 989 show_ipv4_packet(payload, ifindex, ptype); 990 } else { 991 ifname = if_indextoname(ifindex, _ifname); 992 if (!ifname) { 993 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex); 994 ifname = _ifname; 995 } 996 997 tm_print("%-7s %-3s Unknown network protocol type 0x%x\n", 998 ifname, pkt_type_str(ptype), proto); 999 } 1000 } 1001 1002 return NULL; 1003 } 1004 1005 /* Prepare the pcap handle to capture packets. 1006 * 1007 * This pcap is non-blocking and immediate mode is enabled to receive 1008 * captured packets as soon as possible. The snaplen is set to 1024 bytes 1009 * to limit the size of captured content. The format of the link-layer 1010 * header is set to DLT_LINUX_SLL2 to enable handling various link-layer 1011 * technologies. 1012 */ 1013 static pcap_t *traffic_monitor_prepare_pcap(void) 1014 { 1015 char errbuf[PCAP_ERRBUF_SIZE]; 1016 pcap_t *pcap; 1017 int r; 1018 1019 /* Listen on all NICs in the namespace */ 1020 pcap = pcap_create("any", errbuf); 1021 if (!pcap) { 1022 log_err("Failed to open pcap: %s", errbuf); 1023 return NULL; 1024 } 1025 /* Limit the size of the packet (first N bytes) */ 1026 r = pcap_set_snaplen(pcap, 1024); 1027 if (r) { 1028 log_err("Failed to set snaplen: %s", pcap_geterr(pcap)); 1029 goto error; 1030 } 1031 /* To receive packets as fast as possible */ 1032 r = pcap_set_immediate_mode(pcap, 1); 1033 if (r) { 1034 log_err("Failed to set immediate mode: %s", pcap_geterr(pcap)); 1035 goto error; 1036 } 1037 r = pcap_setnonblock(pcap, 1, errbuf); 1038 if (r) { 1039 log_err("Failed to set nonblock: %s", errbuf); 1040 goto error; 1041 } 1042 r = pcap_activate(pcap); 1043 if (r) { 1044 log_err("Failed to activate pcap: %s", pcap_geterr(pcap)); 1045 goto error; 1046 } 1047 /* Determine the format of the link-layer header */ 1048 r = pcap_set_datalink(pcap, DLT_LINUX_SLL2); 1049 if (r) { 1050 log_err("Failed to set datalink: %s", pcap_geterr(pcap)); 1051 goto error; 1052 } 1053 1054 return pcap; 1055 error: 1056 pcap_close(pcap); 1057 return NULL; 1058 } 1059 1060 static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name) 1061 { 1062 char *p; 1063 1064 if (subtest_name) 1065 snprintf(buf, len, "%s__%s", test_name, subtest_name); 1066 else 1067 snprintf(buf, len, "%s", test_name); 1068 while ((p = strchr(buf, '/'))) 1069 *p = '_'; 1070 while ((p = strchr(buf, ' '))) 1071 *p = '_'; 1072 } 1073 1074 #define PCAP_DIR "/tmp/tmon_pcap" 1075 1076 /* Start to monitor the network traffic in the given network namespace. 1077 * 1078 * netns: the name of the network namespace to monitor. If NULL, the 1079 * current network namespace is monitored. 1080 * test_name: the name of the running test. 1081 * subtest_name: the name of the running subtest if there is. It should be 1082 * NULL if it is not a subtest. 1083 * 1084 * This function will start a thread to capture packets going through NICs 1085 * in the give network namespace. 1086 */ 1087 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, 1088 const char *subtest_name) 1089 { 1090 struct nstoken *nstoken = NULL; 1091 struct tmonitor_ctx *ctx; 1092 char test_name_buf[64]; 1093 static int tmon_seq; 1094 int r; 1095 1096 if (netns) { 1097 nstoken = open_netns(netns); 1098 if (!nstoken) 1099 return NULL; 1100 } 1101 ctx = malloc(sizeof(*ctx)); 1102 if (!ctx) { 1103 log_err("Failed to malloc ctx"); 1104 goto fail_ctx; 1105 } 1106 memset(ctx, 0, sizeof(*ctx)); 1107 1108 encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name); 1109 snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname), 1110 PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++, 1111 test_name_buf, netns ? netns : "unknown"); 1112 1113 r = mkdir(PCAP_DIR, 0755); 1114 if (r && errno != EEXIST) { 1115 log_err("Failed to create " PCAP_DIR); 1116 goto fail_pcap; 1117 } 1118 1119 ctx->pcap = traffic_monitor_prepare_pcap(); 1120 if (!ctx->pcap) 1121 goto fail_pcap; 1122 ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap); 1123 if (ctx->pcap_fd < 0) { 1124 log_err("Failed to get pcap fd"); 1125 goto fail_dumper; 1126 } 1127 1128 /* Create a packet file */ 1129 ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname); 1130 if (!ctx->dumper) { 1131 log_err("Failed to open pcap dump: %s", ctx->pkt_fname); 1132 goto fail_dumper; 1133 } 1134 1135 /* Create an eventfd to wake up the monitor thread */ 1136 ctx->wake_fd = eventfd(0, 0); 1137 if (ctx->wake_fd < 0) { 1138 log_err("Failed to create eventfd"); 1139 goto fail_eventfd; 1140 } 1141 1142 r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx); 1143 if (r) { 1144 log_err("Failed to create thread"); 1145 goto fail; 1146 } 1147 1148 close_netns(nstoken); 1149 1150 return ctx; 1151 1152 fail: 1153 close(ctx->wake_fd); 1154 1155 fail_eventfd: 1156 pcap_dump_close(ctx->dumper); 1157 unlink(ctx->pkt_fname); 1158 1159 fail_dumper: 1160 pcap_close(ctx->pcap); 1161 1162 fail_pcap: 1163 free(ctx); 1164 1165 fail_ctx: 1166 close_netns(nstoken); 1167 1168 return NULL; 1169 } 1170 1171 static void traffic_monitor_release(struct tmonitor_ctx *ctx) 1172 { 1173 pcap_close(ctx->pcap); 1174 pcap_dump_close(ctx->dumper); 1175 1176 close(ctx->wake_fd); 1177 1178 free(ctx); 1179 } 1180 1181 /* Stop the network traffic monitor. 1182 * 1183 * ctx: the context returned by traffic_monitor_start() 1184 */ 1185 void traffic_monitor_stop(struct tmonitor_ctx *ctx) 1186 { 1187 __u64 w = 1; 1188 1189 if (!ctx) 1190 return; 1191 1192 /* Stop the monitor thread */ 1193 ctx->done = true; 1194 /* Wake up the background thread. */ 1195 write(ctx->wake_fd, &w, sizeof(w)); 1196 pthread_join(ctx->thread, NULL); 1197 1198 tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1); 1199 1200 traffic_monitor_release(ctx); 1201 } 1202 1203 #endif /* TRAFFIC_MONITOR */ 1204