1 // SPDX-License-Identifier: GPL-2.0-only 2 #define _GNU_SOURCE 3 4 #include <errno.h> 5 #include <stdbool.h> 6 #include <stdio.h> 7 #include <string.h> 8 #include <unistd.h> 9 #include <sched.h> 10 11 #include <arpa/inet.h> 12 #include <sys/mount.h> 13 #include <sys/stat.h> 14 #include <sys/types.h> 15 #include <sys/un.h> 16 #include <sys/eventfd.h> 17 18 #include <linux/err.h> 19 #include <linux/in.h> 20 #include <linux/in6.h> 21 #include <linux/limits.h> 22 23 #include <linux/ip.h> 24 #include <netinet/udp.h> 25 #include <netinet/tcp.h> 26 #include <net/if.h> 27 28 #include "bpf_util.h" 29 #include "network_helpers.h" 30 #include "test_progs.h" 31 32 #ifdef TRAFFIC_MONITOR 33 /* Prevent pcap.h from including pcap/bpf.h and causing conflicts */ 34 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1 35 #include <pcap/pcap.h> 36 #include <pcap/dlt.h> 37 #endif 38 39 #ifndef IPPROTO_MPTCP 40 #define IPPROTO_MPTCP 262 41 #endif 42 43 #define clean_errno() (errno == 0 ? "None" : strerror(errno)) 44 #define log_err(MSG, ...) ({ \ 45 int __save = errno; \ 46 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ 47 __FILE__, __LINE__, clean_errno(), \ 48 ##__VA_ARGS__); \ 49 errno = __save; \ 50 }) 51 52 struct ipv4_packet pkt_v4 = { 53 .eth.h_proto = __bpf_constant_htons(ETH_P_IP), 54 .iph.ihl = 5, 55 .iph.protocol = IPPROTO_TCP, 56 .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), 57 .tcp.urg_ptr = 123, 58 .tcp.doff = 5, 59 }; 60 61 struct ipv6_packet pkt_v6 = { 62 .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), 63 .iph.nexthdr = IPPROTO_TCP, 64 .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), 65 .tcp.urg_ptr = 123, 66 .tcp.doff = 5, 67 }; 68 69 static const struct network_helper_opts default_opts; 70 71 int settimeo(int fd, int timeout_ms) 72 { 73 struct timeval timeout = { .tv_sec = 3 }; 74 75 if (timeout_ms > 0) { 76 timeout.tv_sec = timeout_ms / 1000; 77 timeout.tv_usec = (timeout_ms % 1000) * 1000; 78 } 79 80 if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, 81 sizeof(timeout))) { 82 log_err("Failed to set SO_RCVTIMEO"); 83 return -1; 84 } 85 86 if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout, 87 sizeof(timeout))) { 88 log_err("Failed to set SO_SNDTIMEO"); 89 return -1; 90 } 91 92 return 0; 93 } 94 95 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) 96 97 int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, 98 const struct network_helper_opts *opts) 99 { 100 int on = 1, fd; 101 102 if (!opts) 103 opts = &default_opts; 104 105 fd = socket(addr->ss_family, type, opts->proto); 106 if (fd < 0) { 107 log_err("Failed to create server socket"); 108 return -1; 109 } 110 111 if (settimeo(fd, opts->timeout_ms)) 112 goto error_close; 113 114 if (type == SOCK_STREAM && 115 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) { 116 log_err("Failed to enable SO_REUSEADDR"); 117 goto error_close; 118 } 119 120 if (opts->post_socket_cb && 121 opts->post_socket_cb(fd, opts->cb_opts)) { 122 log_err("Failed to call post_socket_cb"); 123 goto error_close; 124 } 125 126 if (bind(fd, (struct sockaddr *)addr, addrlen) < 0) { 127 log_err("Failed to bind socket"); 128 goto error_close; 129 } 130 131 if (type == SOCK_STREAM) { 132 if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) { 133 log_err("Failed to listed on socket"); 134 goto error_close; 135 } 136 } 137 138 return fd; 139 140 error_close: 141 save_errno_close(fd); 142 return -1; 143 } 144 145 int start_server_str(int family, int type, const char *addr_str, __u16 port, 146 const struct network_helper_opts *opts) 147 { 148 struct sockaddr_storage addr; 149 socklen_t addrlen; 150 151 if (!opts) 152 opts = &default_opts; 153 154 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 155 return -1; 156 157 return start_server_addr(type, &addr, addrlen, opts); 158 } 159 160 int start_server(int family, int type, const char *addr_str, __u16 port, 161 int timeout_ms) 162 { 163 struct network_helper_opts opts = { 164 .timeout_ms = timeout_ms, 165 }; 166 167 return start_server_str(family, type, addr_str, port, &opts); 168 } 169 170 static int reuseport_cb(int fd, void *opts) 171 { 172 int on = 1; 173 174 return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on)); 175 } 176 177 int *start_reuseport_server(int family, int type, const char *addr_str, 178 __u16 port, int timeout_ms, unsigned int nr_listens) 179 { 180 struct network_helper_opts opts = { 181 .timeout_ms = timeout_ms, 182 .post_socket_cb = reuseport_cb, 183 }; 184 struct sockaddr_storage addr; 185 unsigned int nr_fds = 0; 186 socklen_t addrlen; 187 int *fds; 188 189 if (!nr_listens) 190 return NULL; 191 192 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 193 return NULL; 194 195 fds = malloc(sizeof(*fds) * nr_listens); 196 if (!fds) 197 return NULL; 198 199 fds[0] = start_server_addr(type, &addr, addrlen, &opts); 200 if (fds[0] == -1) 201 goto close_fds; 202 nr_fds = 1; 203 204 if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen)) 205 goto close_fds; 206 207 for (; nr_fds < nr_listens; nr_fds++) { 208 fds[nr_fds] = start_server_addr(type, &addr, addrlen, &opts); 209 if (fds[nr_fds] == -1) 210 goto close_fds; 211 } 212 213 return fds; 214 215 close_fds: 216 free_fds(fds, nr_fds); 217 return NULL; 218 } 219 220 void free_fds(int *fds, unsigned int nr_close_fds) 221 { 222 if (fds) { 223 while (nr_close_fds) 224 close(fds[--nr_close_fds]); 225 free(fds); 226 } 227 } 228 229 int fastopen_connect(int server_fd, const char *data, unsigned int data_len, 230 int timeout_ms) 231 { 232 struct sockaddr_storage addr; 233 socklen_t addrlen = sizeof(addr); 234 struct sockaddr_in *addr_in; 235 int fd, ret; 236 237 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { 238 log_err("Failed to get server addr"); 239 return -1; 240 } 241 242 addr_in = (struct sockaddr_in *)&addr; 243 fd = socket(addr_in->sin_family, SOCK_STREAM, 0); 244 if (fd < 0) { 245 log_err("Failed to create client socket"); 246 return -1; 247 } 248 249 if (settimeo(fd, timeout_ms)) 250 goto error_close; 251 252 ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr, 253 addrlen); 254 if (ret != data_len) { 255 log_err("sendto(data, %u) != %d\n", data_len, ret); 256 goto error_close; 257 } 258 259 return fd; 260 261 error_close: 262 save_errno_close(fd); 263 return -1; 264 } 265 266 int client_socket(int family, int type, 267 const struct network_helper_opts *opts) 268 { 269 int fd; 270 271 if (!opts) 272 opts = &default_opts; 273 274 fd = socket(family, type, opts->proto); 275 if (fd < 0) { 276 log_err("Failed to create client socket"); 277 return -1; 278 } 279 280 if (settimeo(fd, opts->timeout_ms)) 281 goto error_close; 282 283 if (opts->post_socket_cb && 284 opts->post_socket_cb(fd, opts->cb_opts)) 285 goto error_close; 286 287 return fd; 288 289 error_close: 290 save_errno_close(fd); 291 return -1; 292 } 293 294 int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, 295 const struct network_helper_opts *opts) 296 { 297 int fd; 298 299 if (!opts) 300 opts = &default_opts; 301 302 fd = client_socket(addr->ss_family, type, opts); 303 if (fd < 0) { 304 log_err("Failed to create client socket"); 305 return -1; 306 } 307 308 if (connect(fd, (const struct sockaddr *)addr, addrlen)) { 309 log_err("Failed to connect to server"); 310 save_errno_close(fd); 311 return -1; 312 } 313 314 return fd; 315 } 316 317 int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port, 318 const struct network_helper_opts *opts) 319 { 320 struct sockaddr_storage addr; 321 socklen_t addrlen; 322 323 if (!opts) 324 opts = &default_opts; 325 326 if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) 327 return -1; 328 329 return connect_to_addr(type, &addr, addrlen, opts); 330 } 331 332 int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) 333 { 334 struct sockaddr_storage addr; 335 socklen_t addrlen, optlen; 336 int type; 337 338 if (!opts) 339 opts = &default_opts; 340 341 optlen = sizeof(type); 342 if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { 343 log_err("getsockopt(SOL_TYPE)"); 344 return -1; 345 } 346 347 addrlen = sizeof(addr); 348 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { 349 log_err("Failed to get server addr"); 350 return -1; 351 } 352 353 return connect_to_addr(type, &addr, addrlen, opts); 354 } 355 356 int connect_to_fd(int server_fd, int timeout_ms) 357 { 358 struct network_helper_opts opts = { 359 .timeout_ms = timeout_ms, 360 }; 361 socklen_t optlen; 362 int protocol; 363 364 optlen = sizeof(protocol); 365 if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { 366 log_err("getsockopt(SOL_PROTOCOL)"); 367 return -1; 368 } 369 opts.proto = protocol; 370 371 return connect_to_fd_opts(server_fd, &opts); 372 } 373 374 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) 375 { 376 struct sockaddr_storage addr; 377 socklen_t len = sizeof(addr); 378 379 if (settimeo(client_fd, timeout_ms)) 380 return -1; 381 382 if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { 383 log_err("Failed to get server addr"); 384 return -1; 385 } 386 387 if (connect(client_fd, (const struct sockaddr *)&addr, len)) { 388 log_err("Failed to connect to server"); 389 return -1; 390 } 391 392 return 0; 393 } 394 395 int make_sockaddr(int family, const char *addr_str, __u16 port, 396 struct sockaddr_storage *addr, socklen_t *len) 397 { 398 if (family == AF_INET) { 399 struct sockaddr_in *sin = (void *)addr; 400 401 memset(addr, 0, sizeof(*sin)); 402 sin->sin_family = AF_INET; 403 sin->sin_port = htons(port); 404 if (addr_str && 405 inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) { 406 log_err("inet_pton(AF_INET, %s)", addr_str); 407 return -1; 408 } 409 if (len) 410 *len = sizeof(*sin); 411 return 0; 412 } else if (family == AF_INET6) { 413 struct sockaddr_in6 *sin6 = (void *)addr; 414 415 memset(addr, 0, sizeof(*sin6)); 416 sin6->sin6_family = AF_INET6; 417 sin6->sin6_port = htons(port); 418 if (addr_str && 419 inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) { 420 log_err("inet_pton(AF_INET6, %s)", addr_str); 421 return -1; 422 } 423 if (len) 424 *len = sizeof(*sin6); 425 return 0; 426 } else if (family == AF_UNIX) { 427 /* Note that we always use abstract unix sockets to avoid having 428 * to clean up leftover files. 429 */ 430 struct sockaddr_un *sun = (void *)addr; 431 432 memset(addr, 0, sizeof(*sun)); 433 sun->sun_family = family; 434 sun->sun_path[0] = 0; 435 strscpy(sun->sun_path + 1, addr_str, sizeof(sun->sun_path) - 1); 436 if (len) 437 *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str); 438 return 0; 439 } 440 return -1; 441 } 442 443 char *ping_command(int family) 444 { 445 if (family == AF_INET6) { 446 /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */ 447 if (!system("which ping6 >/dev/null 2>&1")) 448 return "ping6"; 449 else 450 return "ping -6"; 451 } 452 return "ping"; 453 } 454 455 int append_tid(char *str, size_t sz) 456 { 457 size_t end; 458 459 if (!str) 460 return -1; 461 462 end = strlen(str); 463 if (end + 8 > sz) 464 return -1; 465 466 sprintf(&str[end], "%07ld", sys_gettid()); 467 str[end + 7] = '\0'; 468 469 return 0; 470 } 471 472 int remove_netns(const char *name) 473 { 474 char *cmd; 475 int r; 476 477 r = asprintf(&cmd, "ip netns del %s >/dev/null 2>&1", name); 478 if (r < 0) { 479 log_err("Failed to malloc cmd"); 480 return -1; 481 } 482 483 r = system(cmd); 484 free(cmd); 485 return r; 486 } 487 488 int make_netns(const char *name) 489 { 490 char *cmd; 491 int r; 492 493 r = asprintf(&cmd, "ip netns add %s", name); 494 if (r < 0) { 495 log_err("Failed to malloc cmd"); 496 return -1; 497 } 498 499 r = system(cmd); 500 free(cmd); 501 502 if (r) 503 return r; 504 505 r = asprintf(&cmd, "ip -n %s link set lo up", name); 506 if (r < 0) { 507 log_err("Failed to malloc cmd for setting up lo"); 508 remove_netns(name); 509 return -1; 510 } 511 512 r = system(cmd); 513 free(cmd); 514 515 return r; 516 } 517 518 struct nstoken { 519 int orig_netns_fd; 520 }; 521 522 struct nstoken *open_netns(const char *name) 523 { 524 int nsfd; 525 char nspath[PATH_MAX]; 526 int err; 527 struct nstoken *token; 528 529 token = calloc(1, sizeof(struct nstoken)); 530 if (!token) { 531 log_err("Failed to malloc token"); 532 return NULL; 533 } 534 535 token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY); 536 if (token->orig_netns_fd == -1) { 537 log_err("Failed to open(/proc/self/ns/net)"); 538 goto fail; 539 } 540 541 snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); 542 nsfd = open(nspath, O_RDONLY | O_CLOEXEC); 543 if (nsfd == -1) { 544 log_err("Failed to open(%s)", nspath); 545 goto fail; 546 } 547 548 err = setns(nsfd, CLONE_NEWNET); 549 close(nsfd); 550 if (err) { 551 log_err("Failed to setns(nsfd)"); 552 goto fail; 553 } 554 555 return token; 556 fail: 557 if (token->orig_netns_fd != -1) 558 close(token->orig_netns_fd); 559 free(token); 560 return NULL; 561 } 562 563 void close_netns(struct nstoken *token) 564 { 565 if (!token) 566 return; 567 568 if (setns(token->orig_netns_fd, CLONE_NEWNET)) 569 log_err("Failed to setns(orig_netns_fd)"); 570 close(token->orig_netns_fd); 571 free(token); 572 } 573 574 int open_tuntap(const char *dev_name, bool need_mac) 575 { 576 int err = 0; 577 struct ifreq ifr; 578 int fd = open("/dev/net/tun", O_RDWR); 579 580 if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)")) 581 return -1; 582 583 ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); 584 strscpy(ifr.ifr_name, dev_name); 585 586 err = ioctl(fd, TUNSETIFF, &ifr); 587 if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { 588 close(fd); 589 return -1; 590 } 591 592 err = fcntl(fd, F_SETFL, O_NONBLOCK); 593 if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { 594 close(fd); 595 return -1; 596 } 597 598 return fd; 599 } 600 601 int get_socket_local_port(int sock_fd) 602 { 603 struct sockaddr_storage addr; 604 socklen_t addrlen = sizeof(addr); 605 int err; 606 607 err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen); 608 if (err < 0) 609 return err; 610 611 if (addr.ss_family == AF_INET) { 612 struct sockaddr_in *sin = (struct sockaddr_in *)&addr; 613 614 return sin->sin_port; 615 } else if (addr.ss_family == AF_INET6) { 616 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr; 617 618 return sin->sin6_port; 619 } 620 621 return -1; 622 } 623 624 int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) 625 { 626 struct ifreq ifr = {0}; 627 int sockfd, err; 628 629 sockfd = socket(AF_INET, SOCK_DGRAM, 0); 630 if (sockfd < 0) 631 return -errno; 632 633 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 634 635 ring_param->cmd = ETHTOOL_GRINGPARAM; 636 ifr.ifr_data = (char *)ring_param; 637 638 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { 639 err = errno; 640 close(sockfd); 641 return -err; 642 } 643 644 close(sockfd); 645 return 0; 646 } 647 648 int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) 649 { 650 struct ifreq ifr = {0}; 651 int sockfd, err; 652 653 sockfd = socket(AF_INET, SOCK_DGRAM, 0); 654 if (sockfd < 0) 655 return -errno; 656 657 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); 658 659 ring_param->cmd = ETHTOOL_SRINGPARAM; 660 ifr.ifr_data = (char *)ring_param; 661 662 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { 663 err = errno; 664 close(sockfd); 665 return -err; 666 } 667 668 close(sockfd); 669 return 0; 670 } 671 672 struct send_recv_arg { 673 int fd; 674 uint32_t bytes; 675 int stop; 676 }; 677 678 static void *send_recv_server(void *arg) 679 { 680 struct send_recv_arg *a = (struct send_recv_arg *)arg; 681 ssize_t nr_sent = 0, bytes = 0; 682 char batch[1500]; 683 int err = 0, fd; 684 685 fd = accept(a->fd, NULL, NULL); 686 while (fd == -1) { 687 if (errno == EINTR) 688 continue; 689 err = -errno; 690 goto done; 691 } 692 693 if (settimeo(fd, 0)) { 694 err = -errno; 695 goto done; 696 } 697 698 while (bytes < a->bytes && !READ_ONCE(a->stop)) { 699 nr_sent = send(fd, &batch, 700 MIN(a->bytes - bytes, sizeof(batch)), 0); 701 if (nr_sent == -1 && errno == EINTR) 702 continue; 703 if (nr_sent == -1) { 704 err = -errno; 705 break; 706 } 707 bytes += nr_sent; 708 } 709 710 if (bytes != a->bytes) { 711 log_err("send %zd expected %u", bytes, a->bytes); 712 if (!err) 713 err = bytes > a->bytes ? -E2BIG : -EINTR; 714 } 715 716 done: 717 if (fd >= 0) 718 close(fd); 719 if (err) { 720 WRITE_ONCE(a->stop, 1); 721 return ERR_PTR(err); 722 } 723 return NULL; 724 } 725 726 int send_recv_data(int lfd, int fd, uint32_t total_bytes) 727 { 728 ssize_t nr_recv = 0, bytes = 0; 729 struct send_recv_arg arg = { 730 .fd = lfd, 731 .bytes = total_bytes, 732 .stop = 0, 733 }; 734 pthread_t srv_thread; 735 void *thread_ret; 736 char batch[1500]; 737 int err = 0; 738 739 err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg); 740 if (err) { 741 log_err("Failed to pthread_create"); 742 return err; 743 } 744 745 /* recv total_bytes */ 746 while (bytes < total_bytes && !READ_ONCE(arg.stop)) { 747 nr_recv = recv(fd, &batch, 748 MIN(total_bytes - bytes, sizeof(batch)), 0); 749 if (nr_recv == -1 && errno == EINTR) 750 continue; 751 if (nr_recv == -1) { 752 err = -errno; 753 break; 754 } 755 bytes += nr_recv; 756 } 757 758 if (bytes != total_bytes) { 759 log_err("recv %zd expected %u", bytes, total_bytes); 760 if (!err) 761 err = bytes > total_bytes ? -E2BIG : -EINTR; 762 } 763 764 WRITE_ONCE(arg.stop, 1); 765 pthread_join(srv_thread, &thread_ret); 766 if (IS_ERR(thread_ret)) { 767 log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret)); 768 err = err ? : PTR_ERR(thread_ret); 769 } 770 771 return err; 772 } 773 774 int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd) 775 { 776 int ifindex, ret; 777 778 if (!ASSERT_TRUE(ingress_fd >= 0 || egress_fd >= 0, 779 "at least one program fd is valid")) 780 return -1; 781 782 ifindex = if_nametoindex(dev); 783 if (!ASSERT_NEQ(ifindex, 0, "get ifindex")) 784 return -1; 785 786 DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex, 787 .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS); 788 DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, 789 .priority = 1, .prog_fd = ingress_fd); 790 DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, 791 .priority = 1, .prog_fd = egress_fd); 792 793 ret = bpf_tc_hook_create(&hook); 794 if (!ASSERT_OK(ret, "create tc hook")) 795 return ret; 796 797 if (ingress_fd >= 0) { 798 hook.attach_point = BPF_TC_INGRESS; 799 ret = bpf_tc_attach(&hook, &opts1); 800 if (!ASSERT_OK(ret, "bpf_tc_attach")) { 801 bpf_tc_hook_destroy(&hook); 802 return ret; 803 } 804 } 805 806 if (egress_fd >= 0) { 807 hook.attach_point = BPF_TC_EGRESS; 808 ret = bpf_tc_attach(&hook, &opts2); 809 if (!ASSERT_OK(ret, "bpf_tc_attach")) { 810 bpf_tc_hook_destroy(&hook); 811 return ret; 812 } 813 } 814 815 return 0; 816 } 817 818 #ifdef TRAFFIC_MONITOR 819 struct tmonitor_ctx { 820 pcap_t *pcap; 821 pcap_dumper_t *dumper; 822 pthread_t thread; 823 int wake_fd; 824 825 volatile bool done; 826 char pkt_fname[PATH_MAX]; 827 int pcap_fd; 828 }; 829 830 static int __base_pr(const char *format, va_list args) 831 { 832 return vfprintf(stdout, format, args); 833 } 834 835 static tm_print_fn_t __tm_pr = __base_pr; 836 837 tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn) 838 { 839 tm_print_fn_t old_print_fn; 840 841 old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED); 842 843 return old_print_fn; 844 } 845 846 void tm_print(const char *format, ...) 847 { 848 tm_print_fn_t print_fn; 849 va_list args; 850 851 print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED); 852 if (!print_fn) 853 return; 854 855 va_start(args, format); 856 print_fn(format, args); 857 va_end(args); 858 } 859 860 /* Is this packet captured with a Ethernet protocol type? */ 861 static bool is_ethernet(const u_char *packet) 862 { 863 u16 arphdr_type; 864 865 memcpy(&arphdr_type, packet + 8, 2); 866 arphdr_type = ntohs(arphdr_type); 867 868 /* Except the following cases, the protocol type contains the 869 * Ethernet protocol type for the packet. 870 * 871 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html 872 */ 873 switch (arphdr_type) { 874 case 770: /* ARPHRD_FRAD */ 875 case 778: /* ARPHDR_IPGRE */ 876 case 803: /* ARPHRD_IEEE80211_RADIOTAP */ 877 tm_print("Packet captured: arphdr_type=%d\n", arphdr_type); 878 return false; 879 } 880 return true; 881 } 882 883 static const char * const pkt_types[] = { 884 "In", 885 "B", /* Broadcast */ 886 "M", /* Multicast */ 887 "C", /* Captured with the promiscuous mode */ 888 "Out", 889 }; 890 891 static const char *pkt_type_str(u16 pkt_type) 892 { 893 if (pkt_type < ARRAY_SIZE(pkt_types)) 894 return pkt_types[pkt_type]; 895 return "Unknown"; 896 } 897 898 #define MAX_FLAGS_STRLEN 21 899 /* Show the information of the transport layer in the packet */ 900 static void show_transport(const u_char *packet, u16 len, u32 ifindex, 901 const char *src_addr, const char *dst_addr, 902 u16 proto, bool ipv6, u8 pkt_type) 903 { 904 char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = ""; 905 const char *transport_str; 906 u16 src_port, dst_port; 907 struct udphdr *udp; 908 struct tcphdr *tcp; 909 910 ifname = if_indextoname(ifindex, _ifname); 911 if (!ifname) { 912 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex); 913 ifname = _ifname; 914 } 915 916 if (proto == IPPROTO_UDP) { 917 udp = (struct udphdr *)packet; 918 src_port = ntohs(udp->source); 919 dst_port = ntohs(udp->dest); 920 transport_str = "UDP"; 921 } else if (proto == IPPROTO_TCP) { 922 tcp = (struct tcphdr *)packet; 923 src_port = ntohs(tcp->source); 924 dst_port = ntohs(tcp->dest); 925 transport_str = "TCP"; 926 } else if (proto == IPPROTO_ICMP) { 927 tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n", 928 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 929 packet[0], packet[1]); 930 return; 931 } else if (proto == IPPROTO_ICMPV6) { 932 tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n", 933 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len, 934 packet[0], packet[1]); 935 return; 936 } else { 937 tm_print("%-7s %-3s %s %s > %s: protocol %d\n", 938 ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4", 939 src_addr, dst_addr, proto); 940 return; 941 } 942 943 /* TCP or UDP*/ 944 945 if (proto == IPPROTO_TCP) 946 snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s", 947 tcp->fin ? ", FIN" : "", 948 tcp->syn ? ", SYN" : "", 949 tcp->rst ? ", RST" : "", 950 tcp->ack ? ", ACK" : ""); 951 952 if (ipv6) 953 tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n", 954 ifname, pkt_type_str(pkt_type), src_addr, src_port, 955 dst_addr, dst_port, transport_str, len, flags); 956 else 957 tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n", 958 ifname, pkt_type_str(pkt_type), src_addr, src_port, 959 dst_addr, dst_port, transport_str, len, flags); 960 } 961 962 static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type) 963 { 964 char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN]; 965 struct ipv6hdr *pkt = (struct ipv6hdr *)packet; 966 const char *src, *dst; 967 u_char proto; 968 969 src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf)); 970 if (!src) 971 src = "<invalid>"; 972 dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf)); 973 if (!dst) 974 dst = "<invalid>"; 975 proto = pkt->nexthdr; 976 show_transport(packet + sizeof(struct ipv6hdr), 977 ntohs(pkt->payload_len), 978 ifindex, src, dst, proto, true, pkt_type); 979 } 980 981 static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type) 982 { 983 char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN]; 984 struct iphdr *pkt = (struct iphdr *)packet; 985 const char *src, *dst; 986 u_char proto; 987 988 src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf)); 989 if (!src) 990 src = "<invalid>"; 991 dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf)); 992 if (!dst) 993 dst = "<invalid>"; 994 proto = pkt->protocol; 995 show_transport(packet + sizeof(struct iphdr), 996 ntohs(pkt->tot_len), 997 ifindex, src, dst, proto, false, pkt_type); 998 } 999 1000 static void *traffic_monitor_thread(void *arg) 1001 { 1002 char *ifname, _ifname[IF_NAMESIZE]; 1003 const u_char *packet, *payload; 1004 struct tmonitor_ctx *ctx = arg; 1005 pcap_dumper_t *dumper = ctx->dumper; 1006 int fd = ctx->pcap_fd, nfds, r; 1007 int wake_fd = ctx->wake_fd; 1008 struct pcap_pkthdr header; 1009 pcap_t *pcap = ctx->pcap; 1010 u32 ifindex; 1011 fd_set fds; 1012 u16 proto; 1013 u8 ptype; 1014 1015 nfds = (fd > wake_fd ? fd : wake_fd) + 1; 1016 FD_ZERO(&fds); 1017 1018 while (!ctx->done) { 1019 FD_SET(fd, &fds); 1020 FD_SET(wake_fd, &fds); 1021 r = select(nfds, &fds, NULL, NULL, NULL); 1022 if (!r) 1023 continue; 1024 if (r < 0) { 1025 if (errno == EINTR) 1026 continue; 1027 log_err("Fail to select on pcap fd and wake fd"); 1028 break; 1029 } 1030 1031 /* This instance of pcap is non-blocking */ 1032 packet = pcap_next(pcap, &header); 1033 if (!packet) 1034 continue; 1035 1036 /* According to the man page of pcap_dump(), first argument 1037 * is the pcap_dumper_t pointer even it's argument type is 1038 * u_char *. 1039 */ 1040 pcap_dump((u_char *)dumper, &header, packet); 1041 1042 /* Not sure what other types of packets look like. Here, we 1043 * parse only Ethernet and compatible packets. 1044 */ 1045 if (!is_ethernet(packet)) 1046 continue; 1047 1048 /* Skip SLL2 header 1049 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html 1050 * 1051 * Although the document doesn't mention that, the payload 1052 * doesn't include the Ethernet header. The payload starts 1053 * from the first byte of the network layer header. 1054 */ 1055 payload = packet + 20; 1056 1057 memcpy(&proto, packet, 2); 1058 proto = ntohs(proto); 1059 memcpy(&ifindex, packet + 4, 4); 1060 ifindex = ntohl(ifindex); 1061 ptype = packet[10]; 1062 1063 if (proto == ETH_P_IPV6) { 1064 show_ipv6_packet(payload, ifindex, ptype); 1065 } else if (proto == ETH_P_IP) { 1066 show_ipv4_packet(payload, ifindex, ptype); 1067 } else { 1068 ifname = if_indextoname(ifindex, _ifname); 1069 if (!ifname) { 1070 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex); 1071 ifname = _ifname; 1072 } 1073 1074 tm_print("%-7s %-3s Unknown network protocol type 0x%x\n", 1075 ifname, pkt_type_str(ptype), proto); 1076 } 1077 } 1078 1079 return NULL; 1080 } 1081 1082 /* Prepare the pcap handle to capture packets. 1083 * 1084 * This pcap is non-blocking and immediate mode is enabled to receive 1085 * captured packets as soon as possible. The snaplen is set to 1024 bytes 1086 * to limit the size of captured content. The format of the link-layer 1087 * header is set to DLT_LINUX_SLL2 to enable handling various link-layer 1088 * technologies. 1089 */ 1090 static pcap_t *traffic_monitor_prepare_pcap(void) 1091 { 1092 char errbuf[PCAP_ERRBUF_SIZE]; 1093 pcap_t *pcap; 1094 int r; 1095 1096 /* Listen on all NICs in the namespace */ 1097 pcap = pcap_create("any", errbuf); 1098 if (!pcap) { 1099 log_err("Failed to open pcap: %s", errbuf); 1100 return NULL; 1101 } 1102 /* Limit the size of the packet (first N bytes) */ 1103 r = pcap_set_snaplen(pcap, 1024); 1104 if (r) { 1105 log_err("Failed to set snaplen: %s", pcap_geterr(pcap)); 1106 goto error; 1107 } 1108 /* To receive packets as fast as possible */ 1109 r = pcap_set_immediate_mode(pcap, 1); 1110 if (r) { 1111 log_err("Failed to set immediate mode: %s", pcap_geterr(pcap)); 1112 goto error; 1113 } 1114 r = pcap_setnonblock(pcap, 1, errbuf); 1115 if (r) { 1116 log_err("Failed to set nonblock: %s", errbuf); 1117 goto error; 1118 } 1119 r = pcap_activate(pcap); 1120 if (r) { 1121 log_err("Failed to activate pcap: %s", pcap_geterr(pcap)); 1122 goto error; 1123 } 1124 /* Determine the format of the link-layer header */ 1125 r = pcap_set_datalink(pcap, DLT_LINUX_SLL2); 1126 if (r) { 1127 log_err("Failed to set datalink: %s", pcap_geterr(pcap)); 1128 goto error; 1129 } 1130 1131 return pcap; 1132 error: 1133 pcap_close(pcap); 1134 return NULL; 1135 } 1136 1137 static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name) 1138 { 1139 char *p; 1140 1141 if (subtest_name) 1142 snprintf(buf, len, "%s__%s", test_name, subtest_name); 1143 else 1144 snprintf(buf, len, "%s", test_name); 1145 while ((p = strchr(buf, '/'))) 1146 *p = '_'; 1147 while ((p = strchr(buf, ' '))) 1148 *p = '_'; 1149 } 1150 1151 #define PCAP_DIR "/tmp/tmon_pcap" 1152 1153 /* Start to monitor the network traffic in the given network namespace. 1154 * 1155 * netns: the name of the network namespace to monitor. If NULL, the 1156 * current network namespace is monitored. 1157 * test_name: the name of the running test. 1158 * subtest_name: the name of the running subtest if there is. It should be 1159 * NULL if it is not a subtest. 1160 * 1161 * This function will start a thread to capture packets going through NICs 1162 * in the give network namespace. 1163 */ 1164 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name, 1165 const char *subtest_name) 1166 { 1167 struct nstoken *nstoken = NULL; 1168 struct tmonitor_ctx *ctx; 1169 char test_name_buf[64]; 1170 static int tmon_seq; 1171 int r; 1172 1173 if (netns) { 1174 nstoken = open_netns(netns); 1175 if (!nstoken) 1176 return NULL; 1177 } 1178 ctx = malloc(sizeof(*ctx)); 1179 if (!ctx) { 1180 log_err("Failed to malloc ctx"); 1181 goto fail_ctx; 1182 } 1183 memset(ctx, 0, sizeof(*ctx)); 1184 1185 encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name); 1186 snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname), 1187 PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++, 1188 test_name_buf, netns ? netns : "unknown"); 1189 1190 r = mkdir(PCAP_DIR, 0755); 1191 if (r && errno != EEXIST) { 1192 log_err("Failed to create " PCAP_DIR); 1193 goto fail_pcap; 1194 } 1195 1196 ctx->pcap = traffic_monitor_prepare_pcap(); 1197 if (!ctx->pcap) 1198 goto fail_pcap; 1199 ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap); 1200 if (ctx->pcap_fd < 0) { 1201 log_err("Failed to get pcap fd"); 1202 goto fail_dumper; 1203 } 1204 1205 /* Create a packet file */ 1206 ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname); 1207 if (!ctx->dumper) { 1208 log_err("Failed to open pcap dump: %s", ctx->pkt_fname); 1209 goto fail_dumper; 1210 } 1211 1212 /* Create an eventfd to wake up the monitor thread */ 1213 ctx->wake_fd = eventfd(0, 0); 1214 if (ctx->wake_fd < 0) { 1215 log_err("Failed to create eventfd"); 1216 goto fail_eventfd; 1217 } 1218 1219 r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx); 1220 if (r) { 1221 log_err("Failed to create thread"); 1222 goto fail; 1223 } 1224 1225 close_netns(nstoken); 1226 1227 return ctx; 1228 1229 fail: 1230 close(ctx->wake_fd); 1231 1232 fail_eventfd: 1233 pcap_dump_close(ctx->dumper); 1234 unlink(ctx->pkt_fname); 1235 1236 fail_dumper: 1237 pcap_close(ctx->pcap); 1238 1239 fail_pcap: 1240 free(ctx); 1241 1242 fail_ctx: 1243 close_netns(nstoken); 1244 1245 return NULL; 1246 } 1247 1248 static void traffic_monitor_release(struct tmonitor_ctx *ctx) 1249 { 1250 pcap_close(ctx->pcap); 1251 pcap_dump_close(ctx->dumper); 1252 1253 close(ctx->wake_fd); 1254 1255 free(ctx); 1256 } 1257 1258 /* Stop the network traffic monitor. 1259 * 1260 * ctx: the context returned by traffic_monitor_start() 1261 */ 1262 void traffic_monitor_stop(struct tmonitor_ctx *ctx) 1263 { 1264 __u64 w = 1; 1265 1266 if (!ctx) 1267 return; 1268 1269 /* Stop the monitor thread */ 1270 ctx->done = true; 1271 /* Wake up the background thread. */ 1272 write(ctx->wake_fd, &w, sizeof(w)); 1273 pthread_join(ctx->thread, NULL); 1274 1275 tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1); 1276 1277 traffic_monitor_release(ctx); 1278 } 1279 1280 #endif /* TRAFFIC_MONITOR */ 1281