1 // SPDX-License-Identifier: GPL-2.0-only 2 #define _GNU_SOURCE 3 4 #include <errno.h> 5 #include <stdbool.h> 6 #include <stdio.h> 7 #include <string.h> 8 #include <unistd.h> 9 #include <sched.h> 10 11 #include <arpa/inet.h> 12 #include <sys/mount.h> 13 #include <sys/stat.h> 14 #include <sys/types.h> 15 #include <sys/un.h> 16 #include <sys/eventfd.h> 17 18 #include <linux/err.h> 19 #include <linux/in.h> 20 #include <linux/in6.h> 21 #include <linux/limits.h> 22 23 #include <linux/ip.h> 24 #include <netinet/udp.h> 25 #include <netinet/tcp.h> 26 #include <net/if.h> 27 28 #include "bpf_util.h" 29 #include "network_helpers.h" 30 #include "test_progs.h" 31 32 #ifdef TRAFFIC_MONITOR 33 /* Prevent pcap.h from including pcap/bpf.h and causing conflicts */ 34 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 35 #include <pcap/pcap.h> 36 #include <pcap/dlt.h> 37 #endif 38 39 #ifndef IPPROTO_MPTCP 40 #define IPPROTO_MPTCP 262 41 #endif 42 43 #define clean_errno() (errno == 0 ? "None" : strerror(errno)) 44 #define log_err(MSG, ...) ({ \ 45 int __save = errno; \ 46 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ 47 __FILE__, __LINE__, clean_errno(), \ 48 ##__VA_ARGS__); \ 49 errno = __save; \ 50 }) 51 52 struct ipv4_packet pkt_v4 = { 53 .eth.h_proto = __bpf_constant_htons(ETH_P_IP), 54 .iph.ihl = 5, 55 .iph.protocol = IPPROTO_TCP, 56 .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), 57 .tcp.urg_ptr = 123, 58 .tcp.doff = 5, 59 }; 60 61 struct ipv6_packet pkt_v6 = { 62 .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), 63 .iph.nexthdr = IPPROTO_TCP, 64 .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), 65 .tcp.urg_ptr = 123, 66 .tcp.doff = 5, 67 }; 68 69 static const struct network_helper_opts default_opts; 70 71 int settimeo(int fd, int timeout_ms) 72 { 73 struct timeval timeout = { .tv_sec = 3 }; 74 75 if (timeout_ms > 0) { 76 timeout.tv_sec = timeout_ms / 1000; 77 timeout.tv_usec = (timeout_ms % 1000) * 1000; 78 } 79 80 if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, 81 sizeof(timeout))) { 82 log_err("Failed to set SO_RCVTIMEO"); 83 return -1; 84 } 85 86 if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout, 87 sizeof(timeout))) { 88 log_err("Failed to set SO_SNDTIMEO"); 89 return -1; 90 } 91 92 return 0; 93 } 94 95 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) 96 97 int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, 98 const struct network_helper_opts *opts) 99 { 100 int fd; 101 102 if (!opts) 103 opts = &default_opts; 104 105 fd = socket(addr->ss_family, type, opts->proto); 106 if (fd < 0) { 107 log_err("Failed to create server socket"); 108 return -1; 109 } 110 111 if (settimeo(fd, opts->timeout_ms)) 112 goto error_close; 113 114 if (opts->post_socket_cb && 115 opts->post_socket_cb(fd, opts->cb_opts)) { 116 log_err("Failed to call post_socket_cb"); 117 goto error_close; 118 } 119 120 if (bind(fd, (struct sockaddr *)addr, addrlen) < 0) { 121 log_err("Failed to bind socket"); 122 goto error_close; 123 } 124 125 if (type == SOCK_STREAM) { 126 if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) { 127 log_err("Failed to listed on socket"); 128 goto error_close; 129 } 130 } 131 132 return fd; 133 134 error_close: 135 save_errno_close(fd); 136 return -1; 137 } 138 139 int start_server_str(int family, int type, const char *addr_str, __u16 port, 140 const struct network_helper_opts *opts) 141 { 142 struct sockaddr_storage addr; 143 socklen_t addrlen; 144 145 if (!opts) 146 opts = &default_opts; 147 148 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 149 return -1; 150 151 return start_server_addr(type, &addr, addrlen, opts); 152 } 153 154 int start_server(int family, int type, const char *addr_str, __u16 port, 155 int timeout_ms) 156 { 157 struct network_helper_opts opts = { 158 .timeout_ms = timeout_ms, 159 }; 160 161 return start_server_str(family, type, addr_str, port, &opts); 162 } 163 164 static int reuseport_cb(int fd, void *opts) 165 { 166 int on = 1; 167 168 return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on)); 169 } 170 171 int *start_reuseport_server(int family, int type, const char *addr_str, 172 __u16 port, int timeout_ms, unsigned int nr_listens) 173 { 174 struct network_helper_opts opts = { 175 .timeout_ms = timeout_ms, 176 .post_socket_cb = reuseport_cb, 177 }; 178 struct sockaddr_storage addr; 179 unsigned int nr_fds = 0; 180 socklen_t addrlen; 181 int *fds; 182 183 if (!nr_listens) 184 return NULL; 185 186 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 187 return NULL; 188 189 fds = malloc(sizeof(*fds) * nr_listens); 190 if (!fds) 191 return NULL; 192 193 fds[0] = start_server_addr(type, &addr, addrlen, &opts); 194 if (fds[0] == -1) 195 goto close_fds; 196 nr_fds = 1; 197 198 if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen)) 199 goto close_fds; 200 201 for (; nr_fds < nr_listens; nr_fds++) { 202 fds[nr_fds] = start_server_addr(type, &addr, addrlen, &opts); 203 if (fds[nr_fds] == -1) 204 goto close_fds; 205 } 206 207 return fds; 208 209 close_fds: 210 free_fds(fds, nr_fds); 211 return NULL; 212 } 213 214 void free_fds(int *fds, unsigned int nr_close_fds) 215 { 216 if (fds) { 217 while (nr_close_fds) 218 close(fds[--nr_close_fds]); 219 free(fds); 220 } 221 } 222 223 int fastopen_connect(int server_fd, const char *data, unsigned int data_len, 224 int timeout_ms) 225 { 226 struct sockaddr_storage addr; 227 socklen_t addrlen = sizeof(addr); 228 struct sockaddr_in *addr_in; 229 int fd, ret; 230 231 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { 232 log_err("Failed to get server addr"); 233 return -1; 234 } 235 236 addr_in = (struct sockaddr_in *)&addr; 237 fd = socket(addr_in->sin_family, SOCK_STREAM, 0); 238 if (fd < 0) { 239 log_err("Failed to create client socket"); 240 return -1; 241 } 242 243 if (settimeo(fd, timeout_ms)) 244 goto error_close; 245 246 ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr, 247 addrlen); 248 if (ret != data_len) { 249 log_err("sendto(data, %u) != %d\n", data_len, ret); 250 goto error_close; 251 } 252 253 return fd; 254 255 error_close: 256 save_errno_close(fd); 257 return -1; 258 } 259 260 int client_socket(int family, int type, 261 const struct network_helper_opts *opts) 262 { 263 int fd; 264 265 if (!opts) 266 opts = &default_opts; 267 268 fd = socket(family, type, opts->proto); 269 if (fd < 0) { 270 log_err("Failed to create client socket"); 271 return -1; 272 } 273 274 if (settimeo(fd, opts->timeout_ms)) 275 goto error_close; 276 277 if (opts->post_socket_cb && 278 opts->post_socket_cb(fd, opts->cb_opts)) 279 goto error_close; 280 281 return fd; 282 283 error_close: 284 save_errno_close(fd); 285 return -1; 286 } 287 288 int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, 289 const struct network_helper_opts *opts) 290 { 291 int fd; 292 293 if (!opts) 294 opts = &default_opts; 295 296 fd = client_socket(addr->ss_family, type, opts); 297 if (fd < 0) { 298 log_err("Failed to create client socket"); 299 return -1; 300 } 301 302 if (connect(fd, (const struct sockaddr *)addr, addrlen)) { 303 log_err("Failed to connect to server"); 304 save_errno_close(fd); 305 return -1; 306 } 307 308 return fd; 309 } 310 311 int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port, 312 const struct network_helper_opts *opts) 313 { 314 struct sockaddr_storage addr; 315 socklen_t addrlen; 316 317 if (!opts) 318 opts = &default_opts; 319 320 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 321 return -1; 322 323 return connect_to_addr(type, &addr, addrlen, opts); 324 } 325 326 int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) 327 { 328 struct sockaddr_storage addr; 329 socklen_t addrlen, optlen; 330 int type; 331 332 if (!opts) 333 opts = &default_opts; 334 335 optlen = sizeof(type); 336 if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { 337 log_err("getsockopt(SOL_TYPE)"); 338 return -1; 339 } 340 341 addrlen = sizeof(addr); 342 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { 343 log_err("Failed to get server addr"); 344 return -1; 345 } 346 347 return connect_to_addr(type, &addr, addrlen, opts); 348 } 349 350 int connect_to_fd(int server_fd, int timeout_ms) 351 { 352 struct network_helper_opts opts = { 353 .timeout_ms = timeout_ms, 354 }; 355 socklen_t optlen; 356 int protocol; 357 358 optlen = sizeof(protocol); 359 if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { 360 log_err("getsockopt(SOL_PROTOCOL)"); 361 return -1; 362 } 363 opts.proto = protocol; 364 365 return connect_to_fd_opts(server_fd, &opts); 366 } 367 368 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) 369 { 370 struct sockaddr_storage addr; 371 socklen_t len = sizeof(addr); 372 373 if (settimeo(client_fd, timeout_ms)) 374 return -1; 375 376 if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { 377 log_err("Failed to get server addr"); 378 return -1; 379 } 380 381 if (connect(client_fd, (const struct sockaddr *)&addr, len)) { 382 log_err("Failed to connect to server"); 383 return -1; 384 } 385 386 return 0; 387 } 388 389 int make_sockaddr(int family, const char *addr_str, __u16 port, 390 struct sockaddr_storage *addr, socklen_t *len) 391 { 392 if (family == AF_INET) { 393 struct sockaddr_in *sin = (void *)addr; 394 395 memset(addr, 0, sizeof(*sin)); 396 sin->sin_family = AF_INET; 397 sin->sin_port = htons(port); 398 if (addr_str && 399 inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) { 400 log_err("inet_pton(AF_INET, %s)", addr_str); 401 return -1; 402 } 403 if (len) 404 *len = sizeof(*sin); 405 return 0; 406 } else if (family == AF_INET6) { 407 struct sockaddr_in6 *sin6 = (void *)addr; 408 409 memset(addr, 0, sizeof(*sin6)); 410 sin6->sin6_family = AF_INET6; 411 sin6->sin6_port = htons(port); 412 if (addr_str && 413 inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) { 414 log_err("inet_pton(AF_INET6, %s)", addr_str); 415 return -1; 416 } 417 if (len) 418 *len = sizeof(*sin6); 419 return 0; 420 } else if (family == AF_UNIX) { 421 /* Note that we always use abstract unix sockets to avoid having 422 * to clean up leftover files. 423 */ 424 struct sockaddr_un *sun = (void *)addr; 425 426 memset(addr, 0, sizeof(*sun)); 427 sun->sun_family = family; 428 sun->sun_path[0] = 0; 429 strcpy(sun->sun_path + 1, addr_str); 430 if (len) 431 *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str); 432 return 0; 433 } 434 return -1; 435 } 436 437 char *ping_command(int family) 438 { 439 if (family == AF_INET6) { 440 /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */ 441 if (!system("which ping6 >/dev/null 2>&1")) 442 return "ping6"; 443 else 444 return "ping -6"; 445 } 446 return "ping"; 447 } 448 449 int append_tid(char *str, size_t sz) 450 { 451 size_t end; 452 453 if (!str) 454 return -1; 455 456 end = strlen(str); 457 if (end + 8 > sz) 458 return -1; 459 460 sprintf(&str[end], "%07d", gettid()); 461 str[end + 7] = '\0'; 462 463 return 0; 464 } 465 466 int remove_netns(const char *name) 467 { 468 char *cmd; 469 int r; 470 471 r = asprintf(&cmd, "ip netns del %s >/dev/null 2>&1", name); 472 if (r < 0) { 473 log_err("Failed to malloc cmd"); 474 return -1; 475 } 476 477 r = system(cmd); 478 free(cmd); 479 return r; 480 } 481 482 int make_netns(const char *name) 483 { 484 char *cmd; 485 int r; 486 487 r = asprintf(&cmd, "ip netns add %s", name); 488 if (r < 0) { 489 log_err("Failed to malloc cmd"); 490 return -1; 491 } 492 493 r = system(cmd); 494 free(cmd); 495 496 if (r) 497 return r; 498 499 r = asprintf(&cmd, "ip -n %s link set lo up", name); 500 if (r < 0) { 501 log_err("Failed to malloc cmd for setting up lo"); 502 remove_netns(name); 503 return -1; 504 } 505 506 r = system(cmd); 507 free(cmd); 508 509 return r; 510 } 511 512 struct nstoken { 513 int orig_netns_fd; 514 }; 515 516 struct nstoken *open_netns(const char *name) 517 { 518 int nsfd; 519 char nspath[PATH_MAX]; 520 int err; 521 struct nstoken *token; 522 523 token = calloc(1, sizeof(struct nstoken)); 524 if (!token) { 525 log_err("Failed to malloc token"); 526 return NULL; 527 } 528 529 token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY); 530 if (token->orig_netns_fd == -1) { 531 log_err("Failed to open(/proc/self/ns/net)"); 532 goto fail; 533 } 534 535 snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); 536 nsfd = open(nspath, O_RDONLY | O_CLOEXEC); 537 if (nsfd == -1) { 538 log_err("Failed to open(%s)", nspath); 539 goto fail; 540 } 541 542 err = setns(nsfd, CLONE_NEWNET); 543 close(nsfd); 544 if (err) { 545 log_err("Failed to setns(nsfd)"); 546 goto fail; 547 } 548 549 return token; 550 fail: 551 if (token->orig_netns_fd != -1) 552 close(token->orig_netns_fd); 553 free(token); 554 return NULL; 555 } 556 557 void close_netns(struct nstoken *token) 558 { 559 if (!token) 560 return; 561 562 if (setns(token->orig_netns_fd, CLONE_NEWNET)) 563 log_err("Failed to setns(orig_netns_fd)"); 564 close(token->orig_netns_fd); 565 free(token); 566 } 567 568 int open_tuntap(const char *dev_name, bool need_mac) 569 { 570 int err = 0; 571 struct ifreq ifr; 572 int fd = open("/dev/net/tun", O_RDWR); 573 574 if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)")) 575 return -1; 576 577 ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); 578 strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1); 579 ifr.ifr_name[IFNAMSIZ - 1] = '\0'; 580 581 err = ioctl(fd, TUNSETIFF, &ifr); 582 if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { 583 close(fd); 584 return -1; 585 } 586 587 err = fcntl(fd, F_SETFL, O_NONBLOCK); 588 if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { 589 close(fd); 590 return -1; 591 } 592 593 return fd; 594 } 595 596 int get_socket_local_port(int sock_fd) 597 { 598 struct sockaddr_storage addr; 599 socklen_t addrlen = sizeof(addr); 600 int err; 601 602 err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen); 603 if (err < 0) 604 return err; 605 606 if (addr.ss_family == AF_INET) { 607 struct sockaddr_in *sin = (struct sockaddr_in *)&addr; 608 609 return sin->sin_port; 610 } else if (addr.ss_family == AF_INET6) { 611 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr; 612 613 return sin->sin6_port; 614 } 615 616 return -1; 617 } 618 619 int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) 620 { 621 struct ifreq ifr = {0}; 622 int sockfd, err; 623 624 sockfd = socket(AF_INET, SOCK_DGRAM, 0); 625 if (sockfd < 0) 626 return -errno; 627 628 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 629 630 ring_param->cmd = ETHTOOL_GRINGPARAM; 631 ifr.ifr_data = (char *)ring_param; 632 633 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { 634 err = errno; 635 close(sockfd); 636 return -err; 637 } 638 639 close(sockfd); 640 return 0; 641 } 642 643 int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) 644 { 645 struct ifreq ifr = {0}; 646 int sockfd, err; 647 648 sockfd = socket(AF_INET, SOCK_DGRAM, 0); 649 if (sockfd < 0) 650 return -errno; 651 652 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 653 654 ring_param->cmd = ETHTOOL_SRINGPARAM; 655 ifr.ifr_data = (char *)ring_param; 656 657 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { 658 err = errno; 659 close(sockfd); 660 return -err; 661 } 662 663 close(sockfd); 664 return 0; 665 } 666 667 struct send_recv_arg { 668 int fd; 669 uint32_t bytes; 670 int stop; 671 }; 672 673 static void *send_recv_server(void *arg) 674 { 675 struct send_recv_arg *a = (struct send_recv_arg *)arg; 676 ssize_t nr_sent = 0, bytes = 0; 677 char batch[1500]; 678 int err = 0, fd; 679 680 fd = accept(a->fd, NULL, NULL); 681 while (fd == -1) { 682 if (errno == EINTR) 683 continue; 684 err = -errno; 685 goto done; 686 } 687 688 if (settimeo(fd, 0)) { 689 err = -errno; 690 goto done; 691 } 692 693 while (bytes < a->bytes && !READ_ONCE(a->stop)) { 694 nr_sent = send(fd, &batch, 695 MIN(a->bytes - bytes, sizeof(batch)), 0); 696 if (nr_sent == -1 && errno == EINTR) 697 continue; 698 if (nr_sent == -1) { 699 err = -errno; 700 break; 701 } 702 bytes += nr_sent; 703 } 704 705 if (bytes != a->bytes) { 706 log_err("send %zd expected %u", bytes, a->bytes); 707 if (!err) 708 err = bytes > a->bytes ? -E2BIG : -EINTR; 709 } 710 711 done: 712 if (fd >= 0) 713 close(fd); 714 if (err) { 715 WRITE_ONCE(a->stop, 1); 716 return ERR_PTR(err); 717 } 718 return NULL; 719 } 720 721 int send_recv_data(int lfd, int fd, uint32_t total_bytes) 722 { 723 ssize_t nr_recv = 0, bytes = 0; 724 struct send_recv_arg arg = { 725 .fd = lfd, 726 .bytes = total_bytes, 727 .stop = 0, 728 }; 729 pthread_t srv_thread; 730 void *thread_ret; 731 char batch[1500]; 732 int err = 0; 733 734 err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg); 735 if (err) { 736 log_err("Failed to pthread_create"); 737 return err; 738 } 739 740 /* recv total_bytes */ 741 while (bytes < total_bytes && !READ_ONCE(arg.stop)) { 742 nr_recv = recv(fd, &batch, 743 MIN(total_bytes - bytes, sizeof(batch)), 0); 744 if (nr_recv == -1 && errno == EINTR) 745 continue; 746 if (nr_recv == -1) { 747 err = -errno; 748 break; 749 } 750 bytes += nr_recv; 751 } 752 753 if (bytes != total_bytes) { 754 log_err("recv %zd expected %u", bytes, total_bytes); 755 if (!err) 756 err = bytes > total_bytes ? -E2BIG : -EINTR; 757 } 758 759 WRITE_ONCE(arg.stop, 1); 760 pthread_join(srv_thread, &thread_ret); 761 if (IS_ERR(thread_ret)) { 762 log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret)); 763 err = err ? : PTR_ERR(thread_ret); 764 } 765 766 return err; 767 } 768 769 #ifdef TRAFFIC_MONITOR 770 struct tmonitor_ctx { 771 pcap_t *pcap; 772 pcap_dumper_t *dumper; 773 pthread_t thread; 774 int wake_fd; 775 776 volatile bool done; 777 char pkt_fname[PATH_MAX]; 778 int pcap_fd; 779 }; 780 781 static int __base_pr(const char *format, va_list args) 782 { 783 return vfprintf(stdout, format, args); 784 } 785 786 static tm_print_fn_t __tm_pr = __base_pr; 787 788 tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn) 789 { 790 tm_print_fn_t old_print_fn; 791 792 old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED); 793 794 return old_print_fn; 795 } 796 797 void tm_print(const char *format, ...) 798 { 799 tm_print_fn_t print_fn; 800 va_list args; 801 802 print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED); 803 if (!print_fn) 804 return; 805 806 va_start(args, format); 807 print_fn(format, args); 808 va_end(args); 809 } 810 811 /* Is this packet captured with a Ethernet protocol type? */ 812 static bool is_ethernet(const u_char *packet) 813 { 814 u16 arphdr_type; 815 816 memcpy(&arphdr_type, packet + 8, 2); 817 arphdr_type = ntohs(arphdr_type); 818 819 /* Except the following cases, the protocol type contains the 820 * Ethernet protocol type for the packet. 821 * 822 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html 823 */ 824 switch (arphdr_type) { 825 case 770: /* ARPHRD_FRAD */ 826 case 778: /* ARPHDR_IPGRE */ 827 case 803: /* ARPHRD_IEEE80211_RADIOTAP */ 828 tm_print("Packet captured: arphdr_type=%d\n", arphdr_type); 829 return false; 830 } 831 return true; 832 } 833 834 static const char * const pkt_types[] = { 835 "In", 836 "B", /* Broadcast */ 837 "M", /* Multicast */ 838 "C", /* Captured with the promiscuous mode */ 839 "Out", 840 }; 841 842 static const char *pkt_type_str(u16 pkt_type) 843 { 844 if (pkt_type < ARRAY_SIZE(pkt_types)) 845 return pkt_types[pkt_type]; 846 return "Unknown"; 847 } 848 849 #define MAX_FLAGS_STRLEN 21 850 /* Show the information of the transport layer in the packet */ 851 static void show_transport(const u_char *packet, u16 len, u32 ifindex, 852 const char *src_addr, const char *dst_addr, 853 u16 proto, bool ipv6, u8 pkt_type) 854 { 855 char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = ""; 856 const char *transport_str; 857 u16 src_port, dst_port; 858 struct udphdr *udp; 859 struct tcphdr *tcp; 860 861 ifname = if_indextoname(ifindex, _ifname); 862 if (!ifname) { 863 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex); 864 ifname = _ifname; 865 } 866 867 if (proto == IPPROTO_UDP) { 868 udp = (struct udphdr *)packet; 869 src_port = ntohs(udp->source); 870 dst_port = ntohs(udp->dest); 871 transport_str = "UDP"; 872 } else if (proto == IPPROTO_TCP) { 873 tcp = (struct tcphdr *)packet; 874 src_port = ntohs(tcp->source); 875 dst_port = ntohs(tcp->dest); 876 transport_str = "TCP"; 877 } else if (proto == IPPROTO_ICMP) { 878 tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n", 879 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 880 packet[0], packet[1]); 881 return; 882 } else if (proto == IPPROTO_ICMPV6) { 883 tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n", 884 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 885 packet[0], packet[1]); 886 return; 887 } else { 888 tm_print("%-7s %-3s %s %s > %s: protocol %d\n", 889 ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4", 890 src_addr, dst_addr, proto); 891 return; 892 } 893 894 /* TCP or UDP*/ 895 896 if (proto == IPPROTO_TCP) 897 snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s", 898 tcp->fin ? ", FIN" : "", 899 tcp->syn ? ", SYN" : "", 900 tcp->rst ? ", RST" : "", 901 tcp->ack ? ", ACK" : ""); 902 903 if (ipv6) 904 tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n", 905 ifname, pkt_type_str(pkt_type), src_addr, src_port, 906 dst_addr, dst_port, transport_str, len, flags); 907 else 908 tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n", 909 ifname, pkt_type_str(pkt_type), src_addr, src_port, 910 dst_addr, dst_port, transport_str, len, flags); 911 } 912 913 static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type) 914 { 915 char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN]; 916 struct ipv6hdr *pkt = (struct ipv6hdr *)packet; 917 const char *src, *dst; 918 u_char proto; 919 920 src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf)); 921 if (!src) 922 src = "<invalid>"; 923 dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf)); 924 if (!dst) 925 dst = "<invalid>"; 926 proto = pkt->nexthdr; 927 show_transport(packet + sizeof(struct ipv6hdr), 928 ntohs(pkt->payload_len), 929 ifindex, src, dst, proto, true, pkt_type); 930 } 931 932 static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type) 933 { 934 char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN]; 935 struct iphdr *pkt = (struct iphdr *)packet; 936 const char *src, *dst; 937 u_char proto; 938 939 src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf)); 940 if (!src) 941 src = "<invalid>"; 942 dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf)); 943 if (!dst) 944 dst = "<invalid>"; 945 proto = pkt->protocol; 946 show_transport(packet + sizeof(struct iphdr), 947 ntohs(pkt->tot_len), 948 ifindex, src, dst, proto, false, pkt_type); 949 } 950 951 static void *traffic_monitor_thread(void *arg) 952 { 953 char *ifname, _ifname[IF_NAMESIZE]; 954 const u_char *packet, *payload; 955 struct tmonitor_ctx *ctx = arg; 956 pcap_dumper_t *dumper = ctx->dumper; 957 int fd = ctx->pcap_fd, nfds, r; 958 int wake_fd = ctx->wake_fd; 959 struct pcap_pkthdr header; 960 pcap_t *pcap = ctx->pcap; 961 u32 ifindex; 962 fd_set fds; 963 u16 proto; 964 u8 ptype; 965 966 nfds = (fd > wake_fd ? fd : wake_fd) + 1; 967 FD_ZERO(&fds); 968 969 while (!ctx->done) { 970 FD_SET(fd, &fds); 971 FD_SET(wake_fd, &fds); 972 r = select(nfds, &fds, NULL, NULL, NULL); 973 if (!r) 974 continue; 975 if (r < 0) { 976 if (errno == EINTR) 977 continue; 978 log_err("Fail to select on pcap fd and wake fd"); 979 break; 980 } 981 982 /* This instance of pcap is non-blocking */ 983 packet = pcap_next(pcap, &header); 984 if (!packet) 985 continue; 986 987 /* According to the man page of pcap_dump(), first argument 988 * is the pcap_dumper_t pointer even it's argument type is 989 * u_char *. 990 */ 991 pcap_dump((u_char *)dumper, &header, packet); 992 993 /* Not sure what other types of packets look like. Here, we 994 * parse only Ethernet and compatible packets. 995 */ 996 if (!is_ethernet(packet)) 997 continue; 998 999 /* Skip SLL2 header 1000 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html 1001 * 1002 * Although the document doesn't mention that, the payload 1003 * doesn't include the Ethernet header. The payload starts 1004 * from the first byte of the network layer header. 1005 */ 1006 payload = packet + 20; 1007 1008 memcpy(&proto, packet, 2); 1009 proto = ntohs(proto); 1010 memcpy(&ifindex, packet + 4, 4); 1011 ifindex = ntohl(ifindex); 1012 ptype = packet[10]; 1013 1014 if (proto == ETH_P_IPV6) { 1015 show_ipv6_packet(payload, ifindex, ptype); 1016 } else if (proto == ETH_P_IP) { 1017 show_ipv4_packet(payload, ifindex, ptype); 1018 } else { 1019 ifname = if_indextoname(ifindex, _ifname); 1020 if (!ifname) { 1021 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex); 1022 ifname = _ifname; 1023 } 1024 1025 tm_print("%-7s %-3s Unknown network protocol type 0x%x\n", 1026 ifname, pkt_type_str(ptype), proto); 1027 } 1028 } 1029 1030 return NULL; 1031 } 1032 1033 /* Prepare the pcap handle to capture packets. 1034 * 1035 * This pcap is non-blocking and immediate mode is enabled to receive 1036 * captured packets as soon as possible. The snaplen is set to 1024 bytes 1037 * to limit the size of captured content. The format of the link-layer 1038 * header is set to DLT_LINUX_SLL2 to enable handling various link-layer 1039 * technologies. 1040 */ 1041 static pcap_t *traffic_monitor_prepare_pcap(void) 1042 { 1043 char errbuf[PCAP_ERRBUF_SIZE]; 1044 pcap_t *pcap; 1045 int r; 1046 1047 /* Listen on all NICs in the namespace */ 1048 pcap = pcap_create("any", errbuf); 1049 if (!pcap) { 1050 log_err("Failed to open pcap: %s", errbuf); 1051 return NULL; 1052 } 1053 /* Limit the size of the packet (first N bytes) */ 1054 r = pcap_set_snaplen(pcap, 1024); 1055 if (r) { 1056 log_err("Failed to set snaplen: %s", pcap_geterr(pcap)); 1057 goto error; 1058 } 1059 /* To receive packets as fast as possible */ 1060 r = pcap_set_immediate_mode(pcap, 1); 1061 if (r) { 1062 log_err("Failed to set immediate mode: %s", pcap_geterr(pcap)); 1063 goto error; 1064 } 1065 r = pcap_setnonblock(pcap, 1, errbuf); 1066 if (r) { 1067 log_err("Failed to set nonblock: %s", errbuf); 1068 goto error; 1069 } 1070 r = pcap_activate(pcap); 1071 if (r) { 1072 log_err("Failed to activate pcap: %s", pcap_geterr(pcap)); 1073 goto error; 1074 } 1075 /* Determine the format of the link-layer header */ 1076 r = pcap_set_datalink(pcap, DLT_LINUX_SLL2); 1077 if (r) { 1078 log_err("Failed to set datalink: %s", pcap_geterr(pcap)); 1079 goto error; 1080 } 1081 1082 return pcap; 1083 error: 1084 pcap_close(pcap); 1085 return NULL; 1086 } 1087 1088 static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name) 1089 { 1090 char *p; 1091 1092 if (subtest_name) 1093 snprintf(buf, len, "%s__%s", test_name, subtest_name); 1094 else 1095 snprintf(buf, len, "%s", test_name); 1096 while ((p = strchr(buf, '/'))) 1097 *p = '_'; 1098 while ((p = strchr(buf, ' '))) 1099 *p = '_'; 1100 } 1101 1102 #define PCAP_DIR "/tmp/tmon_pcap" 1103 1104 /* Start to monitor the network traffic in the given network namespace. 1105 * 1106 * netns: the name of the network namespace to monitor. If NULL, the 1107 * current network namespace is monitored. 1108 * test_name: the name of the running test. 1109 * subtest_name: the name of the running subtest if there is. It should be 1110 * NULL if it is not a subtest. 1111 * 1112 * This function will start a thread to capture packets going through NICs 1113 * in the give network namespace. 1114 */ 1115 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, 1116 const char *subtest_name) 1117 { 1118 struct nstoken *nstoken = NULL; 1119 struct tmonitor_ctx *ctx; 1120 char test_name_buf[64]; 1121 static int tmon_seq; 1122 int r; 1123 1124 if (netns) { 1125 nstoken = open_netns(netns); 1126 if (!nstoken) 1127 return NULL; 1128 } 1129 ctx = malloc(sizeof(*ctx)); 1130 if (!ctx) { 1131 log_err("Failed to malloc ctx"); 1132 goto fail_ctx; 1133 } 1134 memset(ctx, 0, sizeof(*ctx)); 1135 1136 encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name); 1137 snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname), 1138 PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++, 1139 test_name_buf, netns ? netns : "unknown"); 1140 1141 r = mkdir(PCAP_DIR, 0755); 1142 if (r && errno != EEXIST) { 1143 log_err("Failed to create " PCAP_DIR); 1144 goto fail_pcap; 1145 } 1146 1147 ctx->pcap = traffic_monitor_prepare_pcap(); 1148 if (!ctx->pcap) 1149 goto fail_pcap; 1150 ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap); 1151 if (ctx->pcap_fd < 0) { 1152 log_err("Failed to get pcap fd"); 1153 goto fail_dumper; 1154 } 1155 1156 /* Create a packet file */ 1157 ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname); 1158 if (!ctx->dumper) { 1159 log_err("Failed to open pcap dump: %s", ctx->pkt_fname); 1160 goto fail_dumper; 1161 } 1162 1163 /* Create an eventfd to wake up the monitor thread */ 1164 ctx->wake_fd = eventfd(0, 0); 1165 if (ctx->wake_fd < 0) { 1166 log_err("Failed to create eventfd"); 1167 goto fail_eventfd; 1168 } 1169 1170 r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx); 1171 if (r) { 1172 log_err("Failed to create thread"); 1173 goto fail; 1174 } 1175 1176 close_netns(nstoken); 1177 1178 return ctx; 1179 1180 fail: 1181 close(ctx->wake_fd); 1182 1183 fail_eventfd: 1184 pcap_dump_close(ctx->dumper); 1185 unlink(ctx->pkt_fname); 1186 1187 fail_dumper: 1188 pcap_close(ctx->pcap); 1189 1190 fail_pcap: 1191 free(ctx); 1192 1193 fail_ctx: 1194 close_netns(nstoken); 1195 1196 return NULL; 1197 } 1198 1199 static void traffic_monitor_release(struct tmonitor_ctx *ctx) 1200 { 1201 pcap_close(ctx->pcap); 1202 pcap_dump_close(ctx->dumper); 1203 1204 close(ctx->wake_fd); 1205 1206 free(ctx); 1207 } 1208 1209 /* Stop the network traffic monitor. 1210 * 1211 * ctx: the context returned by traffic_monitor_start() 1212 */ 1213 void traffic_monitor_stop(struct tmonitor_ctx *ctx) 1214 { 1215 __u64 w = 1; 1216 1217 if (!ctx) 1218 return; 1219 1220 /* Stop the monitor thread */ 1221 ctx->done = true; 1222 /* Wake up the background thread. */ 1223 write(ctx->wake_fd, &w, sizeof(w)); 1224 pthread_join(ctx->thread, NULL); 1225 1226 tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1); 1227 1228 traffic_monitor_release(ctx); 1229 } 1230 1231 #endif /* TRAFFIC_MONITOR */ 1232