1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 5 #include <errno.h> 6 #include <limits.h> 7 #include <fcntl.h> 8 #include <string.h> 9 #include <stdarg.h> 10 #include <stdbool.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <strings.h> 15 #include <signal.h> 16 #include <unistd.h> 17 #include <time.h> 18 19 #include <sys/ioctl.h> 20 #include <sys/poll.h> 21 #include <sys/random.h> 22 #include <sys/sendfile.h> 23 #include <sys/stat.h> 24 #include <sys/socket.h> 25 #include <sys/types.h> 26 #include <sys/mman.h> 27 28 #include <arpa/inet.h> 29 30 #include <netdb.h> 31 #include <netinet/in.h> 32 33 #include <linux/tcp.h> 34 #include <linux/time_types.h> 35 #include <linux/sockios.h> 36 37 extern int optind; 38 39 #ifndef IPPROTO_MPTCP 40 #define IPPROTO_MPTCP 262 41 #endif 42 #ifndef TCP_ULP 43 #define TCP_ULP 31 44 #endif 45 46 static int poll_timeout = 10 * 1000; 47 static bool listen_mode; 48 static bool quit; 49 50 enum cfg_mode { 51 CFG_MODE_POLL, 52 CFG_MODE_MMAP, 53 CFG_MODE_SENDFILE, 54 }; 55 56 enum cfg_peek { 57 CFG_NONE_PEEK, 58 CFG_WITH_PEEK, 59 CFG_AFTER_PEEK, 60 }; 61 62 static enum cfg_mode cfg_mode = CFG_MODE_POLL; 63 static enum cfg_peek cfg_peek = CFG_NONE_PEEK; 64 static const char *cfg_host; 65 static const char *cfg_port = "12000"; 66 static int cfg_sock_proto = IPPROTO_MPTCP; 67 static int pf = AF_INET; 68 static int cfg_sndbuf; 69 static int cfg_rcvbuf; 70 static bool cfg_join; 71 static bool cfg_remove; 72 static unsigned int cfg_time; 73 static unsigned int cfg_do_w; 74 static int cfg_wait; 75 static uint32_t cfg_mark; 76 static char *cfg_input; 77 static int cfg_repeat = 1; 78 static int cfg_truncate; 79 static int cfg_rcv_trunc; 80 81 struct cfg_cmsg_types { 82 unsigned int cmsg_enabled:1; 83 unsigned int timestampns:1; 84 unsigned int tcp_inq:1; 85 }; 86 87 struct cfg_sockopt_types { 88 unsigned int transparent:1; 89 unsigned int mptfo:1; 90 }; 91 92 struct tcp_inq_state { 93 unsigned int last; 94 bool expect_eof; 95 }; 96 97 struct wstate { 98 char buf[8192]; 99 unsigned int len; 100 unsigned int off; 101 unsigned int total_len; 102 }; 103 104 static struct tcp_inq_state tcp_inq; 105 106 static struct cfg_cmsg_types cfg_cmsg_types; 107 static struct cfg_sockopt_types cfg_sockopt_types; 108 109 static void die_usage(void) 110 { 111 fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] " 112 "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-r num] [-R num] " 113 "[-s MPTCP|TCP] [-S num] [-t num] [-T num] [-w sec] connect_address\n"); 114 fprintf(stderr, "\t-6 use ipv6\n"); 115 fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); 116 fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount " 117 "of bytes. If there are unread bytes in the receive queue, that will cause a MPTCP " 118 "fastclose at close/shutdown. If offset is negative, expect the peer to close before " 119 "all the local data as been sent, thus toleration errors on write and EPIPE signals\n"); 120 fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin"); 121 fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num " 122 "incoming connections, in client mode, disconnect and reconnect to the server\n"); 123 fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down " 124 "-- for MPJ tests\n"); 125 fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n"); 126 fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); 127 fprintf(stderr, "\t-M mark -- set socket packet mark\n"); 128 fprintf(stderr, "\t-o option -- test sockopt <option>\n"); 129 fprintf(stderr, "\t-p num -- use port num\n"); 130 fprintf(stderr, 131 "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); 132 fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes " 133 "-- for remove addr tests\n"); 134 fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); 135 fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); 136 fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); 137 fprintf(stderr, "\t-t num -- set poll timeout to num\n"); 138 fprintf(stderr, "\t-T num -- set expected runtime to num ms\n"); 139 fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); 140 exit(1); 141 } 142 143 static void xerror(const char *fmt, ...) 144 { 145 va_list ap; 146 147 va_start(ap, fmt); 148 vfprintf(stderr, fmt, ap); 149 va_end(ap); 150 exit(1); 151 } 152 153 static void handle_signal(int nr) 154 { 155 quit = true; 156 } 157 158 static const char *getxinfo_strerr(int err) 159 { 160 if (err == EAI_SYSTEM) 161 return strerror(errno); 162 163 return gai_strerror(err); 164 } 165 166 static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen, 167 char *host, socklen_t hostlen, 168 char *serv, socklen_t servlen) 169 { 170 int flags = NI_NUMERICHOST | NI_NUMERICSERV; 171 int err = getnameinfo(addr, addrlen, host, hostlen, serv, servlen, 172 flags); 173 174 if (err) { 175 const char *errstr = getxinfo_strerr(err); 176 177 fprintf(stderr, "Fatal: getnameinfo: %s\n", errstr); 178 exit(1); 179 } 180 } 181 182 static void xgetaddrinfo(const char *node, const char *service, 183 struct addrinfo *hints, 184 struct addrinfo **res) 185 { 186 int err; 187 188 again: 189 err = getaddrinfo(node, service, hints, res); 190 if (err) { 191 const char *errstr; 192 193 /* glibc starts to support MPTCP since v2.42. 194 * For older versions, use IPPROTO_TCP to resolve, 195 * and use TCP/MPTCP to create socket. 196 * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82 197 */ 198 if (err == EAI_SOCKTYPE) { 199 hints->ai_protocol = IPPROTO_TCP; 200 goto again; 201 } 202 203 errstr = getxinfo_strerr(err); 204 205 fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", 206 node ? node : "", service ? service : "", errstr); 207 exit(1); 208 } 209 } 210 211 static void set_rcvbuf(int fd, unsigned int size) 212 { 213 int err; 214 215 err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)); 216 if (err) { 217 perror("set SO_RCVBUF"); 218 exit(1); 219 } 220 } 221 222 static void set_sndbuf(int fd, unsigned int size) 223 { 224 int err; 225 226 err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)); 227 if (err) { 228 perror("set SO_SNDBUF"); 229 exit(1); 230 } 231 } 232 233 static void set_mark(int fd, uint32_t mark) 234 { 235 int err; 236 237 err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); 238 if (err) { 239 perror("set SO_MARK"); 240 exit(1); 241 } 242 } 243 244 static void set_transparent(int fd, int pf) 245 { 246 int one = 1; 247 248 switch (pf) { 249 case AF_INET: 250 if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one))) 251 perror("IP_TRANSPARENT"); 252 break; 253 case AF_INET6: 254 if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one))) 255 perror("IPV6_TRANSPARENT"); 256 break; 257 } 258 } 259 260 static void set_mptfo(int fd, int pf) 261 { 262 int qlen = 25; 263 264 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) 265 perror("TCP_FASTOPEN"); 266 } 267 268 static int do_ulp_so(int sock, const char *name) 269 { 270 return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); 271 } 272 273 #define X(m) xerror("%s:%u: %s: failed for proto %d at line %u", __FILE__, __LINE__, (m), proto, line) 274 static void sock_test_tcpulp(int sock, int proto, unsigned int line) 275 { 276 socklen_t buflen = 8; 277 char buf[8] = ""; 278 int ret = getsockopt(sock, IPPROTO_TCP, TCP_ULP, buf, &buflen); 279 280 if (ret != 0) 281 X("getsockopt"); 282 283 if (buflen > 0) { 284 if (strcmp(buf, "mptcp") != 0) 285 xerror("unexpected ULP '%s' for proto %d at line %u", buf, proto, line); 286 ret = do_ulp_so(sock, "tls"); 287 if (ret == 0) 288 X("setsockopt"); 289 } else if (proto == IPPROTO_MPTCP) { 290 ret = do_ulp_so(sock, "tls"); 291 if (ret != -1) 292 X("setsockopt"); 293 } 294 295 ret = do_ulp_so(sock, "mptcp"); 296 if (ret != -1) 297 X("setsockopt"); 298 299 #undef X 300 } 301 302 #define SOCK_TEST_TCPULP(s, p) sock_test_tcpulp((s), (p), __LINE__) 303 304 static int sock_listen_mptcp(const char * const listenaddr, 305 const char * const port) 306 { 307 int sock = -1; 308 struct addrinfo hints = { 309 .ai_protocol = IPPROTO_MPTCP, 310 .ai_socktype = SOCK_STREAM, 311 .ai_flags = AI_PASSIVE | AI_NUMERICHOST 312 }; 313 314 hints.ai_family = pf; 315 316 struct addrinfo *a, *addr; 317 int one = 1; 318 319 xgetaddrinfo(listenaddr, port, &hints, &addr); 320 hints.ai_family = pf; 321 322 for (a = addr; a; a = a->ai_next) { 323 sock = socket(a->ai_family, a->ai_socktype, cfg_sock_proto); 324 if (sock < 0) 325 continue; 326 327 SOCK_TEST_TCPULP(sock, cfg_sock_proto); 328 329 if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, 330 sizeof(one))) 331 perror("setsockopt"); 332 333 if (cfg_sockopt_types.transparent) 334 set_transparent(sock, pf); 335 336 if (cfg_sockopt_types.mptfo) 337 set_mptfo(sock, pf); 338 339 if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) 340 break; /* success */ 341 342 perror("bind"); 343 close(sock); 344 sock = -1; 345 } 346 347 freeaddrinfo(addr); 348 349 if (sock < 0) { 350 fprintf(stderr, "Could not create listen socket\n"); 351 return sock; 352 } 353 354 SOCK_TEST_TCPULP(sock, cfg_sock_proto); 355 356 if (listen(sock, 20)) { 357 perror("listen"); 358 close(sock); 359 return -1; 360 } 361 362 SOCK_TEST_TCPULP(sock, cfg_sock_proto); 363 364 return sock; 365 } 366 367 static int sock_connect_mptcp(const char * const remoteaddr, 368 const char * const port, int proto, 369 struct addrinfo **peer, 370 int infd, struct wstate *winfo) 371 { 372 struct addrinfo hints = { 373 .ai_protocol = IPPROTO_MPTCP, 374 .ai_socktype = SOCK_STREAM, 375 }; 376 struct addrinfo *a, *addr; 377 int syn_copied = 0; 378 int sock = -1; 379 380 hints.ai_family = pf; 381 382 xgetaddrinfo(remoteaddr, port, &hints, &addr); 383 for (a = addr; a; a = a->ai_next) { 384 sock = socket(a->ai_family, a->ai_socktype, proto); 385 if (sock < 0) { 386 perror("socket"); 387 continue; 388 } 389 390 SOCK_TEST_TCPULP(sock, proto); 391 392 if (cfg_mark) 393 set_mark(sock, cfg_mark); 394 395 if (cfg_sockopt_types.mptfo) { 396 if (!winfo->total_len) 397 winfo->total_len = winfo->len = read(infd, winfo->buf, 398 sizeof(winfo->buf)); 399 400 syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN, 401 a->ai_addr, a->ai_addrlen); 402 if (syn_copied >= 0) { 403 winfo->off = syn_copied; 404 winfo->len -= syn_copied; 405 *peer = a; 406 break; /* success */ 407 } 408 } else { 409 if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { 410 *peer = a; 411 break; /* success */ 412 } 413 } 414 if (cfg_sockopt_types.mptfo) { 415 perror("sendto()"); 416 close(sock); 417 sock = -1; 418 } else { 419 perror("connect()"); 420 close(sock); 421 sock = -1; 422 } 423 } 424 425 freeaddrinfo(addr); 426 if (sock != -1) 427 SOCK_TEST_TCPULP(sock, proto); 428 return sock; 429 } 430 431 static size_t do_rnd_write(const int fd, char *buf, const size_t len) 432 { 433 static bool first = true; 434 unsigned int do_w; 435 ssize_t bw; 436 437 do_w = rand() & 0xffff; 438 if (do_w == 0 || do_w > len) 439 do_w = len; 440 441 if (cfg_join && first && do_w > 100) 442 do_w = 100; 443 444 if (cfg_remove && do_w > cfg_do_w) 445 do_w = cfg_do_w; 446 447 bw = write(fd, buf, do_w); 448 if (bw < 0) 449 return bw; 450 451 /* let the join handshake complete, before going on */ 452 if (cfg_join && first) { 453 usleep(200000); 454 first = false; 455 } 456 457 if (cfg_remove) 458 usleep(200000); 459 460 return bw; 461 } 462 463 static size_t do_write(const int fd, char *buf, const size_t len) 464 { 465 size_t offset = 0; 466 467 while (offset < len) { 468 size_t written; 469 ssize_t bw; 470 471 bw = write(fd, buf + offset, len - offset); 472 if (bw < 0) { 473 perror("write"); 474 return 0; 475 } 476 477 written = (size_t)bw; 478 offset += written; 479 } 480 481 return offset; 482 } 483 484 static void process_cmsg(struct msghdr *msgh) 485 { 486 struct __kernel_timespec ts; 487 bool inq_found = false; 488 bool ts_found = false; 489 unsigned int inq = 0; 490 struct cmsghdr *cmsg; 491 492 for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { 493 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) { 494 memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts)); 495 ts_found = true; 496 continue; 497 } 498 if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) { 499 memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq)); 500 inq_found = true; 501 continue; 502 } 503 504 } 505 506 if (cfg_cmsg_types.timestampns) { 507 if (!ts_found) 508 xerror("TIMESTAMPNS not present\n"); 509 } 510 511 if (cfg_cmsg_types.tcp_inq) { 512 if (!inq_found) 513 xerror("TCP_INQ not present\n"); 514 515 if (inq > 1024) 516 xerror("tcp_inq %u is larger than one kbyte\n", inq); 517 tcp_inq.last = inq; 518 } 519 } 520 521 static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) 522 { 523 char msg_buf[8192]; 524 struct iovec iov = { 525 .iov_base = buf, 526 .iov_len = len, 527 }; 528 struct msghdr msg = { 529 .msg_iov = &iov, 530 .msg_iovlen = 1, 531 .msg_control = msg_buf, 532 .msg_controllen = sizeof(msg_buf), 533 }; 534 int flags = 0; 535 unsigned int last_hint = tcp_inq.last; 536 int ret = recvmsg(fd, &msg, flags); 537 538 if (ret <= 0) { 539 if (ret == 0 && tcp_inq.expect_eof) 540 return ret; 541 542 if (ret == 0 && cfg_cmsg_types.tcp_inq) 543 if (last_hint != 1 && last_hint != 0) 544 xerror("EOF but last tcp_inq hint was %u\n", last_hint); 545 546 return ret; 547 } 548 549 if (tcp_inq.expect_eof) 550 xerror("expected EOF, last_hint %u, now %u\n", 551 last_hint, tcp_inq.last); 552 553 if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled) 554 xerror("got %lu bytes of cmsg data, expected 0\n", 555 (unsigned long)msg.msg_controllen); 556 557 if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled) 558 xerror("%s\n", "got no cmsg data"); 559 560 if (msg.msg_controllen) 561 process_cmsg(&msg); 562 563 if (cfg_cmsg_types.tcp_inq) { 564 if ((size_t)ret < len && last_hint > (unsigned int)ret) { 565 if (ret + 1 != (int)last_hint) { 566 int next = read(fd, msg_buf, sizeof(msg_buf)); 567 568 xerror("read %u of %u, last_hint was %u tcp_inq hint now %u next_read returned %d/%m\n", 569 ret, (unsigned int)len, last_hint, tcp_inq.last, next); 570 } else { 571 tcp_inq.expect_eof = true; 572 } 573 } 574 } 575 576 return ret; 577 } 578 579 static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) 580 { 581 int ret = 0; 582 char tmp[16384]; 583 size_t cap = rand(); 584 585 cap &= 0xffff; 586 587 if (cap == 0) 588 cap = 1; 589 else if (cap > len) 590 cap = len; 591 592 if (cfg_peek == CFG_WITH_PEEK) { 593 ret = recv(fd, buf, cap, MSG_PEEK); 594 ret = (ret < 0) ? ret : read(fd, tmp, ret); 595 } else if (cfg_peek == CFG_AFTER_PEEK) { 596 ret = recv(fd, buf, cap, MSG_PEEK); 597 ret = (ret < 0) ? ret : read(fd, buf, cap); 598 } else if (cfg_cmsg_types.cmsg_enabled) { 599 ret = do_recvmsg_cmsg(fd, buf, cap); 600 } else { 601 ret = read(fd, buf, cap); 602 } 603 604 return ret; 605 } 606 607 static void set_nonblock(int fd, bool nonblock) 608 { 609 int flags = fcntl(fd, F_GETFL); 610 611 if (flags == -1) 612 return; 613 614 if (nonblock) 615 fcntl(fd, F_SETFL, flags | O_NONBLOCK); 616 else 617 fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); 618 } 619 620 static void shut_wr(int fd) 621 { 622 /* Close our write side, ev. give some time 623 * for address notification and/or checking 624 * the current status 625 */ 626 if (cfg_wait) 627 usleep(cfg_wait); 628 629 shutdown(fd, SHUT_WR); 630 } 631 632 static int copyfd_io_poll(int infd, int peerfd, int outfd, 633 bool *in_closed_after_out, struct wstate *winfo) 634 { 635 struct pollfd fds = { 636 .fd = peerfd, 637 .events = POLLIN | POLLOUT, 638 }; 639 unsigned int total_wlen = 0, total_rlen = 0; 640 641 set_nonblock(peerfd, true); 642 643 for (;;) { 644 char rbuf[8192]; 645 ssize_t len; 646 647 if (fds.events == 0 || quit) 648 break; 649 650 switch (poll(&fds, 1, poll_timeout)) { 651 case -1: 652 if (errno == EINTR) 653 continue; 654 perror("poll"); 655 return 1; 656 case 0: 657 fprintf(stderr, "%s: poll timed out (events: " 658 "POLLIN %u, POLLOUT %u)\n", __func__, 659 fds.events & POLLIN, fds.events & POLLOUT); 660 return 2; 661 } 662 663 if (fds.revents & POLLIN) { 664 ssize_t rb = sizeof(rbuf); 665 666 /* limit the total amount of read data to the trunc value*/ 667 if (cfg_truncate > 0) { 668 if (rb + total_rlen > cfg_truncate) 669 rb = cfg_truncate - total_rlen; 670 len = read(peerfd, rbuf, rb); 671 } else { 672 len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); 673 } 674 if (len == 0) { 675 /* no more data to receive: 676 * peer has closed its write side 677 */ 678 fds.events &= ~POLLIN; 679 680 if ((fds.events & POLLOUT) == 0) { 681 *in_closed_after_out = true; 682 /* and nothing more to send */ 683 break; 684 } 685 686 /* Else, still have data to transmit */ 687 } else if (len < 0) { 688 if (cfg_rcv_trunc) 689 return 0; 690 perror("read"); 691 return 3; 692 } 693 694 total_rlen += len; 695 do_write(outfd, rbuf, len); 696 } 697 698 if (fds.revents & POLLOUT) { 699 if (winfo->len == 0) { 700 winfo->off = 0; 701 winfo->len = read(infd, winfo->buf, sizeof(winfo->buf)); 702 } 703 704 if (winfo->len > 0) { 705 ssize_t bw; 706 707 /* limit the total amount of written data to the trunc value */ 708 if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate) 709 winfo->len = cfg_truncate - total_wlen; 710 711 bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); 712 if (bw < 0) { 713 if (cfg_rcv_trunc) 714 return 0; 715 perror("write"); 716 return 111; 717 } 718 719 winfo->off += bw; 720 winfo->len -= bw; 721 total_wlen += bw; 722 } else if (winfo->len == 0) { 723 /* We have no more data to send. */ 724 fds.events &= ~POLLOUT; 725 726 if ((fds.events & POLLIN) == 0) 727 /* ... and peer also closed already */ 728 break; 729 730 shut_wr(peerfd); 731 } else { 732 if (errno == EINTR) 733 continue; 734 perror("read"); 735 return 4; 736 } 737 } 738 739 if (fds.revents & (POLLERR | POLLNVAL)) { 740 if (cfg_rcv_trunc) 741 return 0; 742 fprintf(stderr, "Unexpected revents: " 743 "POLLERR/POLLNVAL(%x)\n", fds.revents); 744 return 5; 745 } 746 747 if (cfg_truncate > 0 && total_wlen >= cfg_truncate && 748 total_rlen >= cfg_truncate) 749 break; 750 } 751 752 /* leave some time for late join/announce */ 753 if (cfg_remove && !quit) 754 usleep(cfg_wait); 755 756 return 0; 757 } 758 759 static int do_recvfile(int infd, int outfd) 760 { 761 ssize_t r; 762 763 do { 764 char buf[16384]; 765 766 r = do_rnd_read(infd, buf, sizeof(buf)); 767 if (r > 0) { 768 if (write(outfd, buf, r) != r) 769 break; 770 } else if (r < 0) { 771 perror("read"); 772 } 773 } while (r > 0); 774 775 return (int)r; 776 } 777 778 static int spool_buf(int fd, struct wstate *winfo) 779 { 780 while (winfo->len) { 781 int ret = write(fd, winfo->buf + winfo->off, winfo->len); 782 783 if (ret < 0) { 784 perror("write"); 785 return 4; 786 } 787 winfo->off += ret; 788 winfo->len -= ret; 789 } 790 return 0; 791 } 792 793 static int do_mmap(int infd, int outfd, unsigned int size, 794 struct wstate *winfo) 795 { 796 char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); 797 ssize_t ret = 0, off = winfo->total_len; 798 size_t rem; 799 800 if (inbuf == MAP_FAILED) { 801 perror("mmap"); 802 return 1; 803 } 804 805 ret = spool_buf(outfd, winfo); 806 if (ret < 0) 807 return ret; 808 809 rem = size - winfo->total_len; 810 811 while (rem > 0) { 812 ret = write(outfd, inbuf + off, rem); 813 814 if (ret < 0) { 815 perror("write"); 816 break; 817 } 818 819 off += ret; 820 rem -= ret; 821 } 822 823 munmap(inbuf, size); 824 return rem; 825 } 826 827 static int get_infd_size(int fd) 828 { 829 struct stat sb; 830 ssize_t count; 831 int err; 832 833 err = fstat(fd, &sb); 834 if (err < 0) { 835 perror("fstat"); 836 return -1; 837 } 838 839 if ((sb.st_mode & S_IFMT) != S_IFREG) { 840 fprintf(stderr, "%s: stdin is not a regular file\n", __func__); 841 return -2; 842 } 843 844 count = sb.st_size; 845 if (count > INT_MAX) { 846 fprintf(stderr, "File too large: %zu\n", count); 847 return -3; 848 } 849 850 return (int)count; 851 } 852 853 static int do_sendfile(int infd, int outfd, unsigned int count, 854 struct wstate *winfo) 855 { 856 int ret = spool_buf(outfd, winfo); 857 858 if (ret < 0) 859 return ret; 860 861 count -= winfo->total_len; 862 863 while (count > 0) { 864 ssize_t r; 865 866 r = sendfile(outfd, infd, NULL, count); 867 if (r < 0) { 868 perror("sendfile"); 869 return 3; 870 } 871 872 count -= r; 873 } 874 875 return 0; 876 } 877 878 static int copyfd_io_mmap(int infd, int peerfd, int outfd, 879 unsigned int size, bool *in_closed_after_out, 880 struct wstate *winfo) 881 { 882 int err; 883 884 if (listen_mode) { 885 err = do_recvfile(peerfd, outfd); 886 if (err) 887 return err; 888 889 err = do_mmap(infd, peerfd, size, winfo); 890 } else { 891 err = do_mmap(infd, peerfd, size, winfo); 892 if (err) 893 return err; 894 895 shut_wr(peerfd); 896 897 err = do_recvfile(peerfd, outfd); 898 *in_closed_after_out = true; 899 } 900 901 return err; 902 } 903 904 static int copyfd_io_sendfile(int infd, int peerfd, int outfd, 905 unsigned int size, bool *in_closed_after_out, struct wstate *winfo) 906 { 907 int err; 908 909 if (listen_mode) { 910 err = do_recvfile(peerfd, outfd); 911 if (err) 912 return err; 913 914 err = do_sendfile(infd, peerfd, size, winfo); 915 } else { 916 err = do_sendfile(infd, peerfd, size, winfo); 917 if (err) 918 return err; 919 920 shut_wr(peerfd); 921 922 err = do_recvfile(peerfd, outfd); 923 *in_closed_after_out = true; 924 } 925 926 return err; 927 } 928 929 static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo) 930 { 931 bool in_closed_after_out = false; 932 struct timespec start, end; 933 int file_size; 934 int ret; 935 936 if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0)) 937 xerror("can not fetch start time %d", errno); 938 939 switch (cfg_mode) { 940 case CFG_MODE_POLL: 941 ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out, 942 winfo); 943 break; 944 945 case CFG_MODE_MMAP: 946 file_size = get_infd_size(infd); 947 if (file_size < 0) 948 return file_size; 949 ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, 950 &in_closed_after_out, winfo); 951 break; 952 953 case CFG_MODE_SENDFILE: 954 file_size = get_infd_size(infd); 955 if (file_size < 0) 956 return file_size; 957 ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, 958 &in_closed_after_out, winfo); 959 break; 960 961 default: 962 fprintf(stderr, "Invalid mode %d\n", cfg_mode); 963 964 die_usage(); 965 return 1; 966 } 967 968 if (ret) 969 return ret; 970 971 if (close_peerfd) 972 close(peerfd); 973 974 if (cfg_time) { 975 unsigned int delta_ms; 976 977 if (clock_gettime(CLOCK_MONOTONIC, &end) < 0) 978 xerror("can not fetch end time %d", errno); 979 delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000; 980 if (delta_ms > cfg_time) { 981 xerror("transfer slower than expected! runtime %d ms, expected %d ms", 982 delta_ms, cfg_time); 983 } 984 985 /* show the runtime only if this end shutdown(wr) before receiving the EOF, 986 * (that is, if this end got the longer runtime) 987 */ 988 if (in_closed_after_out) 989 fprintf(stderr, "%d", delta_ms); 990 } 991 992 return 0; 993 } 994 995 static void check_sockaddr(int pf, struct sockaddr_storage *ss, 996 socklen_t salen) 997 { 998 struct sockaddr_in6 *sin6; 999 struct sockaddr_in *sin; 1000 socklen_t wanted_size = 0; 1001 1002 switch (pf) { 1003 case AF_INET: 1004 wanted_size = sizeof(*sin); 1005 sin = (void *)ss; 1006 if (!sin->sin_port) 1007 fprintf(stderr, "accept: something wrong: ip connection from port 0"); 1008 break; 1009 case AF_INET6: 1010 wanted_size = sizeof(*sin6); 1011 sin6 = (void *)ss; 1012 if (!sin6->sin6_port) 1013 fprintf(stderr, "accept: something wrong: ipv6 connection from port 0"); 1014 break; 1015 default: 1016 fprintf(stderr, "accept: Unknown pf %d, salen %u\n", pf, salen); 1017 return; 1018 } 1019 1020 if (salen != wanted_size) 1021 fprintf(stderr, "accept: size mismatch, got %d expected %d\n", 1022 (int)salen, wanted_size); 1023 1024 if (ss->ss_family != pf) 1025 fprintf(stderr, "accept: pf mismatch, expect %d, ss_family is %d\n", 1026 (int)ss->ss_family, pf); 1027 } 1028 1029 static void check_getpeername(int fd, struct sockaddr_storage *ss, socklen_t salen) 1030 { 1031 struct sockaddr_storage peerss; 1032 socklen_t peersalen = sizeof(peerss); 1033 1034 if (getpeername(fd, (struct sockaddr *)&peerss, &peersalen) < 0) { 1035 perror("getpeername"); 1036 return; 1037 } 1038 1039 if (peersalen != salen) { 1040 fprintf(stderr, "%s: %d vs %d\n", __func__, peersalen, salen); 1041 return; 1042 } 1043 1044 if (memcmp(ss, &peerss, peersalen)) { 1045 char a[INET6_ADDRSTRLEN]; 1046 char b[INET6_ADDRSTRLEN]; 1047 char c[INET6_ADDRSTRLEN]; 1048 char d[INET6_ADDRSTRLEN]; 1049 1050 xgetnameinfo((struct sockaddr *)ss, salen, 1051 a, sizeof(a), b, sizeof(b)); 1052 1053 xgetnameinfo((struct sockaddr *)&peerss, peersalen, 1054 c, sizeof(c), d, sizeof(d)); 1055 1056 fprintf(stderr, "%s: memcmp failure: accept %s vs peername %s, %s vs %s salen %d vs %d\n", 1057 __func__, a, c, b, d, peersalen, salen); 1058 } 1059 } 1060 1061 static void check_getpeername_connect(int fd) 1062 { 1063 struct sockaddr_storage ss; 1064 socklen_t salen = sizeof(ss); 1065 char a[INET6_ADDRSTRLEN]; 1066 char b[INET6_ADDRSTRLEN]; 1067 1068 if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) { 1069 perror("getpeername"); 1070 return; 1071 } 1072 1073 xgetnameinfo((struct sockaddr *)&ss, salen, 1074 a, sizeof(a), b, sizeof(b)); 1075 1076 if (strcmp(cfg_host, a) || strcmp(cfg_port, b)) 1077 fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__, 1078 cfg_host, a, cfg_port, b); 1079 } 1080 1081 static void maybe_close(int fd) 1082 { 1083 unsigned int r = rand(); 1084 1085 if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1)) 1086 close(fd); 1087 } 1088 1089 int main_loop_s(int listensock) 1090 { 1091 struct sockaddr_storage ss; 1092 struct wstate winfo; 1093 struct pollfd polls; 1094 socklen_t salen; 1095 int remotesock; 1096 int fd = 0; 1097 1098 again: 1099 polls.fd = listensock; 1100 polls.events = POLLIN; 1101 1102 switch (poll(&polls, 1, poll_timeout)) { 1103 case -1: 1104 perror("poll"); 1105 return 1; 1106 case 0: 1107 fprintf(stderr, "%s: timed out\n", __func__); 1108 close(listensock); 1109 return 2; 1110 } 1111 1112 salen = sizeof(ss); 1113 remotesock = accept(listensock, (struct sockaddr *)&ss, &salen); 1114 if (remotesock >= 0) { 1115 maybe_close(listensock); 1116 check_sockaddr(pf, &ss, salen); 1117 check_getpeername(remotesock, &ss, salen); 1118 1119 if (cfg_input) { 1120 fd = open(cfg_input, O_RDONLY); 1121 if (fd < 0) 1122 xerror("can't open %s: %d", cfg_input, errno); 1123 } 1124 1125 SOCK_TEST_TCPULP(remotesock, 0); 1126 1127 memset(&winfo, 0, sizeof(winfo)); 1128 copyfd_io(fd, remotesock, 1, true, &winfo); 1129 } else { 1130 perror("accept"); 1131 return 1; 1132 } 1133 1134 if (cfg_input) 1135 close(fd); 1136 1137 if (--cfg_repeat > 0) 1138 goto again; 1139 1140 return 0; 1141 } 1142 1143 static void init_rng(void) 1144 { 1145 unsigned int foo; 1146 1147 if (getrandom(&foo, sizeof(foo), 0) == -1) { 1148 perror("getrandom"); 1149 exit(1); 1150 } 1151 1152 srand(foo); 1153 } 1154 1155 static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen) 1156 { 1157 int err; 1158 1159 err = setsockopt(fd, level, optname, optval, optlen); 1160 if (err) { 1161 perror("setsockopt"); 1162 exit(1); 1163 } 1164 } 1165 1166 static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg) 1167 { 1168 static const unsigned int on = 1; 1169 1170 if (cmsg->timestampns) 1171 xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on)); 1172 if (cmsg->tcp_inq) 1173 xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)); 1174 } 1175 1176 static void parse_cmsg_types(const char *type) 1177 { 1178 char *next = strchr(type, ','); 1179 unsigned int len = 0; 1180 1181 cfg_cmsg_types.cmsg_enabled = 1; 1182 1183 if (next) { 1184 parse_cmsg_types(next + 1); 1185 len = next - type; 1186 } else { 1187 len = strlen(type); 1188 } 1189 1190 if (strncmp(type, "TIMESTAMPNS", len) == 0) { 1191 cfg_cmsg_types.timestampns = 1; 1192 return; 1193 } 1194 1195 if (strncmp(type, "TCPINQ", len) == 0) { 1196 cfg_cmsg_types.tcp_inq = 1; 1197 return; 1198 } 1199 1200 fprintf(stderr, "Unrecognized cmsg option %s\n", type); 1201 exit(1); 1202 } 1203 1204 static void parse_setsock_options(const char *name) 1205 { 1206 char *next = strchr(name, ','); 1207 unsigned int len = 0; 1208 1209 if (next) { 1210 parse_setsock_options(next + 1); 1211 len = next - name; 1212 } else { 1213 len = strlen(name); 1214 } 1215 1216 if (strncmp(name, "TRANSPARENT", len) == 0) { 1217 cfg_sockopt_types.transparent = 1; 1218 return; 1219 } 1220 1221 if (strncmp(name, "MPTFO", len) == 0) { 1222 cfg_sockopt_types.mptfo = 1; 1223 return; 1224 } 1225 1226 fprintf(stderr, "Unrecognized setsockopt option %s\n", name); 1227 exit(1); 1228 } 1229 1230 void xdisconnect(int fd) 1231 { 1232 socklen_t addrlen = sizeof(struct sockaddr_storage); 1233 struct sockaddr_storage addr, empty; 1234 int msec_sleep = 10; 1235 void *raw_addr; 1236 int i, cmdlen; 1237 char cmd[128]; 1238 1239 /* get the local address and convert it to string */ 1240 if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0) 1241 xerror("getsockname"); 1242 1243 if (addr.ss_family == AF_INET) 1244 raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr); 1245 else if (addr.ss_family == AF_INET6) 1246 raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr); 1247 else 1248 xerror("bad family"); 1249 1250 strcpy(cmd, "ss -M | grep -q "); 1251 cmdlen = strlen(cmd); 1252 if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen], 1253 sizeof(cmd) - cmdlen)) 1254 xerror("inet_ntop"); 1255 1256 shutdown(fd, SHUT_WR); 1257 1258 /* 1259 * wait until the pending data is completely flushed and all 1260 * the MPTCP sockets reached the closed status. 1261 * disconnect will bypass/ignore/drop any pending data. 1262 */ 1263 for (i = 0; ; i += msec_sleep) { 1264 /* closed socket are not listed by 'ss' */ 1265 if (system(cmd) != 0) 1266 break; 1267 1268 if (i > poll_timeout) 1269 xerror("timeout while waiting for spool to complete"); 1270 usleep(msec_sleep * 1000); 1271 } 1272 1273 memset(&empty, 0, sizeof(empty)); 1274 empty.ss_family = AF_UNSPEC; 1275 if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0) 1276 xerror("can't disconnect: %d", errno); 1277 } 1278 1279 int main_loop(void) 1280 { 1281 int fd = 0, ret, fd_in = 0; 1282 struct addrinfo *peer; 1283 struct wstate winfo; 1284 1285 if (cfg_input && cfg_sockopt_types.mptfo) { 1286 fd_in = open(cfg_input, O_RDONLY); 1287 if (fd_in < 0) 1288 xerror("can't open %s:%d", cfg_input, errno); 1289 } 1290 1291 memset(&winfo, 0, sizeof(winfo)); 1292 fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo); 1293 if (fd < 0) 1294 return 2; 1295 1296 again: 1297 check_getpeername_connect(fd); 1298 1299 SOCK_TEST_TCPULP(fd, cfg_sock_proto); 1300 1301 if (cfg_rcvbuf) 1302 set_rcvbuf(fd, cfg_rcvbuf); 1303 if (cfg_sndbuf) 1304 set_sndbuf(fd, cfg_sndbuf); 1305 if (cfg_cmsg_types.cmsg_enabled) 1306 apply_cmsg_types(fd, &cfg_cmsg_types); 1307 1308 if (cfg_input && !cfg_sockopt_types.mptfo) { 1309 fd_in = open(cfg_input, O_RDONLY); 1310 if (fd_in < 0) 1311 xerror("can't open %s:%d", cfg_input, errno); 1312 } 1313 1314 ret = copyfd_io(fd_in, fd, 1, 0, &winfo); 1315 if (ret) 1316 goto out; 1317 1318 if (cfg_truncate > 0) { 1319 shutdown(fd, SHUT_WR); 1320 } else if (--cfg_repeat > 0) { 1321 xdisconnect(fd); 1322 1323 /* the socket could be unblocking at this point, we need the 1324 * connect to be blocking 1325 */ 1326 set_nonblock(fd, false); 1327 if (connect(fd, peer->ai_addr, peer->ai_addrlen)) 1328 xerror("can't reconnect: %d", errno); 1329 if (cfg_input) 1330 close(fd_in); 1331 memset(&winfo, 0, sizeof(winfo)); 1332 goto again; 1333 } else { 1334 close(fd); 1335 } 1336 1337 out: 1338 if (cfg_input) 1339 close(fd_in); 1340 return ret; 1341 } 1342 1343 int parse_proto(const char *proto) 1344 { 1345 if (!strcasecmp(proto, "MPTCP")) 1346 return IPPROTO_MPTCP; 1347 if (!strcasecmp(proto, "TCP")) 1348 return IPPROTO_TCP; 1349 1350 fprintf(stderr, "Unknown protocol: %s\n.", proto); 1351 die_usage(); 1352 1353 /* silence compiler warning */ 1354 return 0; 1355 } 1356 1357 int parse_mode(const char *mode) 1358 { 1359 if (!strcasecmp(mode, "poll")) 1360 return CFG_MODE_POLL; 1361 if (!strcasecmp(mode, "mmap")) 1362 return CFG_MODE_MMAP; 1363 if (!strcasecmp(mode, "sendfile")) 1364 return CFG_MODE_SENDFILE; 1365 1366 fprintf(stderr, "Unknown test mode: %s\n", mode); 1367 fprintf(stderr, "Supported modes are:\n"); 1368 fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n"); 1369 fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n"); 1370 fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n"); 1371 1372 die_usage(); 1373 1374 /* silence compiler warning */ 1375 return 0; 1376 } 1377 1378 int parse_peek(const char *mode) 1379 { 1380 if (!strcasecmp(mode, "saveWithPeek")) 1381 return CFG_WITH_PEEK; 1382 if (!strcasecmp(mode, "saveAfterPeek")) 1383 return CFG_AFTER_PEEK; 1384 1385 fprintf(stderr, "Unknown: %s\n", mode); 1386 fprintf(stderr, "Supported MSG_PEEK mode are:\n"); 1387 fprintf(stderr, 1388 "\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n"); 1389 fprintf(stderr, 1390 "\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n"); 1391 1392 die_usage(); 1393 1394 /* silence compiler warning */ 1395 return 0; 1396 } 1397 1398 static int parse_int(const char *size) 1399 { 1400 unsigned long s; 1401 1402 errno = 0; 1403 1404 s = strtoul(size, NULL, 0); 1405 1406 if (errno) { 1407 fprintf(stderr, "Invalid sndbuf size %s (%s)\n", 1408 size, strerror(errno)); 1409 die_usage(); 1410 } 1411 1412 if (s > INT_MAX) { 1413 fprintf(stderr, "Invalid sndbuf size %s (%s)\n", 1414 size, strerror(ERANGE)); 1415 die_usage(); 1416 } 1417 1418 return (int)s; 1419 } 1420 1421 static void parse_opts(int argc, char **argv) 1422 { 1423 int c; 1424 1425 while ((c = getopt(argc, argv, "6c:f:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) { 1426 switch (c) { 1427 case 'f': 1428 cfg_truncate = atoi(optarg); 1429 1430 /* when receiving a fastclose, ignore PIPE signals and 1431 * all the I/O errors later in the code 1432 */ 1433 if (cfg_truncate < 0) { 1434 cfg_rcv_trunc = true; 1435 signal(SIGPIPE, handle_signal); 1436 } 1437 break; 1438 case 'j': 1439 cfg_join = true; 1440 cfg_mode = CFG_MODE_POLL; 1441 break; 1442 case 'r': 1443 cfg_remove = true; 1444 cfg_mode = CFG_MODE_POLL; 1445 cfg_wait = 400000; 1446 cfg_do_w = atoi(optarg); 1447 if (cfg_do_w <= 0) 1448 cfg_do_w = 50; 1449 break; 1450 case 'i': 1451 cfg_input = optarg; 1452 break; 1453 case 'I': 1454 cfg_repeat = atoi(optarg); 1455 break; 1456 case 'l': 1457 listen_mode = true; 1458 break; 1459 case 'p': 1460 cfg_port = optarg; 1461 break; 1462 case 's': 1463 cfg_sock_proto = parse_proto(optarg); 1464 break; 1465 case 'h': 1466 die_usage(); 1467 break; 1468 case '6': 1469 pf = AF_INET6; 1470 break; 1471 case 't': 1472 poll_timeout = atoi(optarg) * 1000; 1473 if (poll_timeout <= 0) 1474 poll_timeout = -1; 1475 break; 1476 case 'T': 1477 cfg_time = atoi(optarg); 1478 break; 1479 case 'm': 1480 cfg_mode = parse_mode(optarg); 1481 break; 1482 case 'S': 1483 cfg_sndbuf = parse_int(optarg); 1484 break; 1485 case 'R': 1486 cfg_rcvbuf = parse_int(optarg); 1487 break; 1488 case 'w': 1489 cfg_wait = atoi(optarg)*1000000; 1490 break; 1491 case 'M': 1492 cfg_mark = strtol(optarg, NULL, 0); 1493 break; 1494 case 'P': 1495 cfg_peek = parse_peek(optarg); 1496 break; 1497 case 'c': 1498 parse_cmsg_types(optarg); 1499 break; 1500 case 'o': 1501 parse_setsock_options(optarg); 1502 break; 1503 } 1504 } 1505 1506 if (optind + 1 != argc) 1507 die_usage(); 1508 cfg_host = argv[optind]; 1509 1510 if (strchr(cfg_host, ':')) 1511 pf = AF_INET6; 1512 } 1513 1514 int main(int argc, char *argv[]) 1515 { 1516 init_rng(); 1517 1518 signal(SIGUSR1, handle_signal); 1519 parse_opts(argc, argv); 1520 1521 if (listen_mode) { 1522 int fd = sock_listen_mptcp(cfg_host, cfg_port); 1523 1524 if (fd < 0) 1525 return 1; 1526 1527 if (cfg_rcvbuf) 1528 set_rcvbuf(fd, cfg_rcvbuf); 1529 if (cfg_sndbuf) 1530 set_sndbuf(fd, cfg_sndbuf); 1531 if (cfg_mark) 1532 set_mark(fd, cfg_mark); 1533 if (cfg_cmsg_types.cmsg_enabled) 1534 apply_cmsg_types(fd, &cfg_cmsg_types); 1535 1536 return main_loop_s(fd); 1537 } 1538 1539 return main_loop(); 1540 } 1541