1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 5 #include <errno.h> 6 #include <limits.h> 7 #include <fcntl.h> 8 #include <string.h> 9 #include <stdarg.h> 10 #include <stdbool.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <strings.h> 15 #include <signal.h> 16 #include <unistd.h> 17 #include <time.h> 18 19 #include <sys/ioctl.h> 20 #include <sys/poll.h> 21 #include <sys/random.h> 22 #include <sys/sendfile.h> 23 #include <sys/stat.h> 24 #include <sys/socket.h> 25 #include <sys/types.h> 26 #include <sys/mman.h> 27 28 #include <arpa/inet.h> 29 30 #include <netdb.h> 31 #include <netinet/in.h> 32 33 #include <linux/tcp.h> 34 #include <linux/time_types.h> 35 #include <linux/sockios.h> 36 37 extern int optind; 38 39 #ifndef IPPROTO_MPTCP 40 #define IPPROTO_MPTCP 262 41 #endif 42 #ifndef TCP_ULP 43 #define TCP_ULP 31 44 #endif 45 46 static int poll_timeout = 10 * 1000; 47 static bool listen_mode; 48 static bool quit; 49 50 enum cfg_mode { 51 CFG_MODE_POLL, 52 CFG_MODE_MMAP, 53 CFG_MODE_SENDFILE, 54 }; 55 56 enum cfg_peek { 57 CFG_NONE_PEEK, 58 CFG_WITH_PEEK, 59 CFG_AFTER_PEEK, 60 }; 61 62 static enum cfg_mode cfg_mode = CFG_MODE_POLL; 63 static enum cfg_peek cfg_peek = CFG_NONE_PEEK; 64 static const char *cfg_host; 65 static const char *cfg_port = "12000"; 66 static int cfg_sock_proto = IPPROTO_MPTCP; 67 static int pf = AF_INET; 68 static int cfg_sndbuf; 69 static int cfg_rcvbuf; 70 static bool cfg_join; 71 static bool cfg_remove; 72 static unsigned int cfg_time; 73 static unsigned int cfg_do_w; 74 static int cfg_wait; 75 static uint32_t cfg_mark; 76 static char *cfg_input; 77 static int cfg_repeat = 1; 78 static int cfg_truncate; 79 static int cfg_rcv_trunc; 80 81 struct cfg_cmsg_types { 82 unsigned int cmsg_enabled:1; 83 unsigned int timestampns:1; 84 unsigned int tcp_inq:1; 85 }; 86 87 struct cfg_sockopt_types { 88 unsigned int transparent:1; 89 unsigned int mptfo:1; 90 }; 91 92 struct tcp_inq_state { 93 unsigned int last; 94 bool expect_eof; 95 }; 96 97 struct wstate { 98 char buf[8192]; 99 unsigned int len; 100 unsigned int off; 101 unsigned int total_len; 102 }; 103 104 static struct tcp_inq_state tcp_inq; 105 106 static struct cfg_cmsg_types cfg_cmsg_types; 107 static struct cfg_sockopt_types cfg_sockopt_types; 108 109 static void die_usage(void) 110 { 111 fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] " 112 "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-r num] [-R num] " 113 "[-s MPTCP|TCP] [-S num] [-t num] [-T num] [-w sec] connect_address\n"); 114 fprintf(stderr, "\t-6 use ipv6\n"); 115 fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); 116 fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount " 117 "of bytes. If there are unread bytes in the receive queue, that will cause a MPTCP " 118 "fastclose at close/shutdown. If offset is negative, expect the peer to close before " 119 "all the local data as been sent, thus toleration errors on write and EPIPE signals\n"); 120 fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin"); 121 fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num " 122 "incoming connections, in client mode, disconnect and reconnect to the server\n"); 123 fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down " 124 "-- for MPJ tests\n"); 125 fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n"); 126 fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); 127 fprintf(stderr, "\t-M mark -- set socket packet mark\n"); 128 fprintf(stderr, "\t-o option -- test sockopt <option>\n"); 129 fprintf(stderr, "\t-p num -- use port num\n"); 130 fprintf(stderr, 131 "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); 132 fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes " 133 "-- for remove addr tests\n"); 134 fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); 135 fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); 136 fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); 137 fprintf(stderr, "\t-t num -- set poll timeout to num\n"); 138 fprintf(stderr, "\t-T num -- set expected runtime to num ms\n"); 139 fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); 140 exit(1); 141 } 142 143 static void xerror(const char *fmt, ...) 144 { 145 va_list ap; 146 147 va_start(ap, fmt); 148 vfprintf(stderr, fmt, ap); 149 va_end(ap); 150 exit(1); 151 } 152 153 static void handle_signal(int nr) 154 { 155 quit = true; 156 } 157 158 static const char *getxinfo_strerr(int err) 159 { 160 if (err == EAI_SYSTEM) 161 return strerror(errno); 162 163 return gai_strerror(err); 164 } 165 166 static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen, 167 char *host, socklen_t hostlen, 168 char *serv, socklen_t servlen) 169 { 170 int flags = NI_NUMERICHOST | NI_NUMERICSERV; 171 int err = getnameinfo(addr, addrlen, host, hostlen, serv, servlen, 172 flags); 173 174 if (err) { 175 const char *errstr = getxinfo_strerr(err); 176 177 fprintf(stderr, "Fatal: getnameinfo: %s\n", errstr); 178 exit(1); 179 } 180 } 181 182 static void xgetaddrinfo(const char *node, const char *service, 183 struct addrinfo *hints, 184 struct addrinfo **res) 185 { 186 again: 187 int err = getaddrinfo(node, service, hints, res); 188 189 if (err) { 190 const char *errstr; 191 192 /* glibc starts to support MPTCP since v2.42. 193 * For older versions, use IPPROTO_TCP to resolve, 194 * and use TCP/MPTCP to create socket. 195 * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82 196 */ 197 if (err == EAI_SOCKTYPE) { 198 hints->ai_protocol = IPPROTO_TCP; 199 goto again; 200 } 201 202 errstr = getxinfo_strerr(err); 203 204 fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", 205 node ? node : "", service ? service : "", errstr); 206 exit(1); 207 } 208 } 209 210 static void set_rcvbuf(int fd, unsigned int size) 211 { 212 int err; 213 214 err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)); 215 if (err) { 216 perror("set SO_RCVBUF"); 217 exit(1); 218 } 219 } 220 221 static void set_sndbuf(int fd, unsigned int size) 222 { 223 int err; 224 225 err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)); 226 if (err) { 227 perror("set SO_SNDBUF"); 228 exit(1); 229 } 230 } 231 232 static void set_mark(int fd, uint32_t mark) 233 { 234 int err; 235 236 err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); 237 if (err) { 238 perror("set SO_MARK"); 239 exit(1); 240 } 241 } 242 243 static void set_transparent(int fd, int pf) 244 { 245 int one = 1; 246 247 switch (pf) { 248 case AF_INET: 249 if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one))) 250 perror("IP_TRANSPARENT"); 251 break; 252 case AF_INET6: 253 if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one))) 254 perror("IPV6_TRANSPARENT"); 255 break; 256 } 257 } 258 259 static void set_mptfo(int fd, int pf) 260 { 261 int qlen = 25; 262 263 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) 264 perror("TCP_FASTOPEN"); 265 } 266 267 static int do_ulp_so(int sock, const char *name) 268 { 269 return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); 270 } 271 272 #define X(m) xerror("%s:%u: %s: failed for proto %d at line %u", __FILE__, __LINE__, (m), proto, line) 273 static void sock_test_tcpulp(int sock, int proto, unsigned int line) 274 { 275 socklen_t buflen = 8; 276 char buf[8] = ""; 277 int ret = getsockopt(sock, IPPROTO_TCP, TCP_ULP, buf, &buflen); 278 279 if (ret != 0) 280 X("getsockopt"); 281 282 if (buflen > 0) { 283 if (strcmp(buf, "mptcp") != 0) 284 xerror("unexpected ULP '%s' for proto %d at line %u", buf, proto, line); 285 ret = do_ulp_so(sock, "tls"); 286 if (ret == 0) 287 X("setsockopt"); 288 } else if (proto == IPPROTO_MPTCP) { 289 ret = do_ulp_so(sock, "tls"); 290 if (ret != -1) 291 X("setsockopt"); 292 } 293 294 ret = do_ulp_so(sock, "mptcp"); 295 if (ret != -1) 296 X("setsockopt"); 297 298 #undef X 299 } 300 301 #define SOCK_TEST_TCPULP(s, p) sock_test_tcpulp((s), (p), __LINE__) 302 303 static int sock_listen_mptcp(const char * const listenaddr, 304 const char * const port) 305 { 306 int sock = -1; 307 struct addrinfo hints = { 308 .ai_protocol = IPPROTO_MPTCP, 309 .ai_socktype = SOCK_STREAM, 310 .ai_flags = AI_PASSIVE | AI_NUMERICHOST 311 }; 312 313 hints.ai_family = pf; 314 315 struct addrinfo *a, *addr; 316 int one = 1; 317 318 xgetaddrinfo(listenaddr, port, &hints, &addr); 319 hints.ai_family = pf; 320 321 for (a = addr; a; a = a->ai_next) { 322 sock = socket(a->ai_family, a->ai_socktype, cfg_sock_proto); 323 if (sock < 0) 324 continue; 325 326 SOCK_TEST_TCPULP(sock, cfg_sock_proto); 327 328 if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, 329 sizeof(one))) 330 perror("setsockopt"); 331 332 if (cfg_sockopt_types.transparent) 333 set_transparent(sock, pf); 334 335 if (cfg_sockopt_types.mptfo) 336 set_mptfo(sock, pf); 337 338 if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) 339 break; /* success */ 340 341 perror("bind"); 342 close(sock); 343 sock = -1; 344 } 345 346 freeaddrinfo(addr); 347 348 if (sock < 0) { 349 fprintf(stderr, "Could not create listen socket\n"); 350 return sock; 351 } 352 353 SOCK_TEST_TCPULP(sock, cfg_sock_proto); 354 355 if (listen(sock, 20)) { 356 perror("listen"); 357 close(sock); 358 return -1; 359 } 360 361 SOCK_TEST_TCPULP(sock, cfg_sock_proto); 362 363 return sock; 364 } 365 366 static int sock_connect_mptcp(const char * const remoteaddr, 367 const char * const port, int proto, 368 struct addrinfo **peer, 369 int infd, struct wstate *winfo) 370 { 371 struct addrinfo hints = { 372 .ai_protocol = IPPROTO_MPTCP, 373 .ai_socktype = SOCK_STREAM, 374 }; 375 struct addrinfo *a, *addr; 376 int syn_copied = 0; 377 int sock = -1; 378 379 hints.ai_family = pf; 380 381 xgetaddrinfo(remoteaddr, port, &hints, &addr); 382 for (a = addr; a; a = a->ai_next) { 383 sock = socket(a->ai_family, a->ai_socktype, proto); 384 if (sock < 0) { 385 perror("socket"); 386 continue; 387 } 388 389 SOCK_TEST_TCPULP(sock, proto); 390 391 if (cfg_mark) 392 set_mark(sock, cfg_mark); 393 394 if (cfg_sockopt_types.mptfo) { 395 if (!winfo->total_len) 396 winfo->total_len = winfo->len = read(infd, winfo->buf, 397 sizeof(winfo->buf)); 398 399 syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN, 400 a->ai_addr, a->ai_addrlen); 401 if (syn_copied >= 0) { 402 winfo->off = syn_copied; 403 winfo->len -= syn_copied; 404 *peer = a; 405 break; /* success */ 406 } 407 } else { 408 if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { 409 *peer = a; 410 break; /* success */ 411 } 412 } 413 if (cfg_sockopt_types.mptfo) { 414 perror("sendto()"); 415 close(sock); 416 sock = -1; 417 } else { 418 perror("connect()"); 419 close(sock); 420 sock = -1; 421 } 422 } 423 424 freeaddrinfo(addr); 425 if (sock != -1) 426 SOCK_TEST_TCPULP(sock, proto); 427 return sock; 428 } 429 430 static size_t do_rnd_write(const int fd, char *buf, const size_t len) 431 { 432 static bool first = true; 433 unsigned int do_w; 434 ssize_t bw; 435 436 do_w = rand() & 0xffff; 437 if (do_w == 0 || do_w > len) 438 do_w = len; 439 440 if (cfg_join && first && do_w > 100) 441 do_w = 100; 442 443 if (cfg_remove && do_w > cfg_do_w) 444 do_w = cfg_do_w; 445 446 bw = write(fd, buf, do_w); 447 if (bw < 0) 448 return bw; 449 450 /* let the join handshake complete, before going on */ 451 if (cfg_join && first) { 452 usleep(200000); 453 first = false; 454 } 455 456 if (cfg_remove) 457 usleep(200000); 458 459 return bw; 460 } 461 462 static size_t do_write(const int fd, char *buf, const size_t len) 463 { 464 size_t offset = 0; 465 466 while (offset < len) { 467 size_t written; 468 ssize_t bw; 469 470 bw = write(fd, buf + offset, len - offset); 471 if (bw < 0) { 472 perror("write"); 473 return 0; 474 } 475 476 written = (size_t)bw; 477 offset += written; 478 } 479 480 return offset; 481 } 482 483 static void process_cmsg(struct msghdr *msgh) 484 { 485 struct __kernel_timespec ts; 486 bool inq_found = false; 487 bool ts_found = false; 488 unsigned int inq = 0; 489 struct cmsghdr *cmsg; 490 491 for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { 492 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) { 493 memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts)); 494 ts_found = true; 495 continue; 496 } 497 if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) { 498 memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq)); 499 inq_found = true; 500 continue; 501 } 502 503 } 504 505 if (cfg_cmsg_types.timestampns) { 506 if (!ts_found) 507 xerror("TIMESTAMPNS not present\n"); 508 } 509 510 if (cfg_cmsg_types.tcp_inq) { 511 if (!inq_found) 512 xerror("TCP_INQ not present\n"); 513 514 if (inq > 1024) 515 xerror("tcp_inq %u is larger than one kbyte\n", inq); 516 tcp_inq.last = inq; 517 } 518 } 519 520 static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) 521 { 522 char msg_buf[8192]; 523 struct iovec iov = { 524 .iov_base = buf, 525 .iov_len = len, 526 }; 527 struct msghdr msg = { 528 .msg_iov = &iov, 529 .msg_iovlen = 1, 530 .msg_control = msg_buf, 531 .msg_controllen = sizeof(msg_buf), 532 }; 533 int flags = 0; 534 unsigned int last_hint = tcp_inq.last; 535 int ret = recvmsg(fd, &msg, flags); 536 537 if (ret <= 0) { 538 if (ret == 0 && tcp_inq.expect_eof) 539 return ret; 540 541 if (ret == 0 && cfg_cmsg_types.tcp_inq) 542 if (last_hint != 1 && last_hint != 0) 543 xerror("EOF but last tcp_inq hint was %u\n", last_hint); 544 545 return ret; 546 } 547 548 if (tcp_inq.expect_eof) 549 xerror("expected EOF, last_hint %u, now %u\n", 550 last_hint, tcp_inq.last); 551 552 if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled) 553 xerror("got %lu bytes of cmsg data, expected 0\n", 554 (unsigned long)msg.msg_controllen); 555 556 if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled) 557 xerror("%s\n", "got no cmsg data"); 558 559 if (msg.msg_controllen) 560 process_cmsg(&msg); 561 562 if (cfg_cmsg_types.tcp_inq) { 563 if ((size_t)ret < len && last_hint > (unsigned int)ret) { 564 if (ret + 1 != (int)last_hint) { 565 int next = read(fd, msg_buf, sizeof(msg_buf)); 566 567 xerror("read %u of %u, last_hint was %u tcp_inq hint now %u next_read returned %d/%m\n", 568 ret, (unsigned int)len, last_hint, tcp_inq.last, next); 569 } else { 570 tcp_inq.expect_eof = true; 571 } 572 } 573 } 574 575 return ret; 576 } 577 578 static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) 579 { 580 int ret = 0; 581 char tmp[16384]; 582 size_t cap = rand(); 583 584 cap &= 0xffff; 585 586 if (cap == 0) 587 cap = 1; 588 else if (cap > len) 589 cap = len; 590 591 if (cfg_peek == CFG_WITH_PEEK) { 592 ret = recv(fd, buf, cap, MSG_PEEK); 593 ret = (ret < 0) ? ret : read(fd, tmp, ret); 594 } else if (cfg_peek == CFG_AFTER_PEEK) { 595 ret = recv(fd, buf, cap, MSG_PEEK); 596 ret = (ret < 0) ? ret : read(fd, buf, cap); 597 } else if (cfg_cmsg_types.cmsg_enabled) { 598 ret = do_recvmsg_cmsg(fd, buf, cap); 599 } else { 600 ret = read(fd, buf, cap); 601 } 602 603 return ret; 604 } 605 606 static void set_nonblock(int fd, bool nonblock) 607 { 608 int flags = fcntl(fd, F_GETFL); 609 610 if (flags == -1) 611 return; 612 613 if (nonblock) 614 fcntl(fd, F_SETFL, flags | O_NONBLOCK); 615 else 616 fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); 617 } 618 619 static void shut_wr(int fd) 620 { 621 /* Close our write side, ev. give some time 622 * for address notification and/or checking 623 * the current status 624 */ 625 if (cfg_wait) 626 usleep(cfg_wait); 627 628 shutdown(fd, SHUT_WR); 629 } 630 631 static int copyfd_io_poll(int infd, int peerfd, int outfd, 632 bool *in_closed_after_out, struct wstate *winfo) 633 { 634 struct pollfd fds = { 635 .fd = peerfd, 636 .events = POLLIN | POLLOUT, 637 }; 638 unsigned int total_wlen = 0, total_rlen = 0; 639 640 set_nonblock(peerfd, true); 641 642 for (;;) { 643 char rbuf[8192]; 644 ssize_t len; 645 646 if (fds.events == 0 || quit) 647 break; 648 649 switch (poll(&fds, 1, poll_timeout)) { 650 case -1: 651 if (errno == EINTR) 652 continue; 653 perror("poll"); 654 return 1; 655 case 0: 656 fprintf(stderr, "%s: poll timed out (events: " 657 "POLLIN %u, POLLOUT %u)\n", __func__, 658 fds.events & POLLIN, fds.events & POLLOUT); 659 return 2; 660 } 661 662 if (fds.revents & POLLIN) { 663 ssize_t rb = sizeof(rbuf); 664 665 /* limit the total amount of read data to the trunc value*/ 666 if (cfg_truncate > 0) { 667 if (rb + total_rlen > cfg_truncate) 668 rb = cfg_truncate - total_rlen; 669 len = read(peerfd, rbuf, rb); 670 } else { 671 len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); 672 } 673 if (len == 0) { 674 /* no more data to receive: 675 * peer has closed its write side 676 */ 677 fds.events &= ~POLLIN; 678 679 if ((fds.events & POLLOUT) == 0) { 680 *in_closed_after_out = true; 681 /* and nothing more to send */ 682 break; 683 } 684 685 /* Else, still have data to transmit */ 686 } else if (len < 0) { 687 if (cfg_rcv_trunc) 688 return 0; 689 perror("read"); 690 return 3; 691 } 692 693 total_rlen += len; 694 do_write(outfd, rbuf, len); 695 } 696 697 if (fds.revents & POLLOUT) { 698 if (winfo->len == 0) { 699 winfo->off = 0; 700 winfo->len = read(infd, winfo->buf, sizeof(winfo->buf)); 701 } 702 703 if (winfo->len > 0) { 704 ssize_t bw; 705 706 /* limit the total amount of written data to the trunc value */ 707 if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate) 708 winfo->len = cfg_truncate - total_wlen; 709 710 bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); 711 if (bw < 0) { 712 if (cfg_rcv_trunc) 713 return 0; 714 perror("write"); 715 return 111; 716 } 717 718 winfo->off += bw; 719 winfo->len -= bw; 720 total_wlen += bw; 721 } else if (winfo->len == 0) { 722 /* We have no more data to send. */ 723 fds.events &= ~POLLOUT; 724 725 if ((fds.events & POLLIN) == 0) 726 /* ... and peer also closed already */ 727 break; 728 729 shut_wr(peerfd); 730 } else { 731 if (errno == EINTR) 732 continue; 733 perror("read"); 734 return 4; 735 } 736 } 737 738 if (fds.revents & (POLLERR | POLLNVAL)) { 739 if (cfg_rcv_trunc) 740 return 0; 741 fprintf(stderr, "Unexpected revents: " 742 "POLLERR/POLLNVAL(%x)\n", fds.revents); 743 return 5; 744 } 745 746 if (cfg_truncate > 0 && total_wlen >= cfg_truncate && 747 total_rlen >= cfg_truncate) 748 break; 749 } 750 751 /* leave some time for late join/announce */ 752 if (cfg_remove && !quit) 753 usleep(cfg_wait); 754 755 return 0; 756 } 757 758 static int do_recvfile(int infd, int outfd) 759 { 760 ssize_t r; 761 762 do { 763 char buf[16384]; 764 765 r = do_rnd_read(infd, buf, sizeof(buf)); 766 if (r > 0) { 767 if (write(outfd, buf, r) != r) 768 break; 769 } else if (r < 0) { 770 perror("read"); 771 } 772 } while (r > 0); 773 774 return (int)r; 775 } 776 777 static int spool_buf(int fd, struct wstate *winfo) 778 { 779 while (winfo->len) { 780 int ret = write(fd, winfo->buf + winfo->off, winfo->len); 781 782 if (ret < 0) { 783 perror("write"); 784 return 4; 785 } 786 winfo->off += ret; 787 winfo->len -= ret; 788 } 789 return 0; 790 } 791 792 static int do_mmap(int infd, int outfd, unsigned int size, 793 struct wstate *winfo) 794 { 795 char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); 796 ssize_t ret = 0, off = winfo->total_len; 797 size_t rem; 798 799 if (inbuf == MAP_FAILED) { 800 perror("mmap"); 801 return 1; 802 } 803 804 ret = spool_buf(outfd, winfo); 805 if (ret < 0) 806 return ret; 807 808 rem = size - winfo->total_len; 809 810 while (rem > 0) { 811 ret = write(outfd, inbuf + off, rem); 812 813 if (ret < 0) { 814 perror("write"); 815 break; 816 } 817 818 off += ret; 819 rem -= ret; 820 } 821 822 munmap(inbuf, size); 823 return rem; 824 } 825 826 static int get_infd_size(int fd) 827 { 828 struct stat sb; 829 ssize_t count; 830 int err; 831 832 err = fstat(fd, &sb); 833 if (err < 0) { 834 perror("fstat"); 835 return -1; 836 } 837 838 if ((sb.st_mode & S_IFMT) != S_IFREG) { 839 fprintf(stderr, "%s: stdin is not a regular file\n", __func__); 840 return -2; 841 } 842 843 count = sb.st_size; 844 if (count > INT_MAX) { 845 fprintf(stderr, "File too large: %zu\n", count); 846 return -3; 847 } 848 849 return (int)count; 850 } 851 852 static int do_sendfile(int infd, int outfd, unsigned int count, 853 struct wstate *winfo) 854 { 855 int ret = spool_buf(outfd, winfo); 856 857 if (ret < 0) 858 return ret; 859 860 count -= winfo->total_len; 861 862 while (count > 0) { 863 ssize_t r; 864 865 r = sendfile(outfd, infd, NULL, count); 866 if (r < 0) { 867 perror("sendfile"); 868 return 3; 869 } 870 871 count -= r; 872 } 873 874 return 0; 875 } 876 877 static int copyfd_io_mmap(int infd, int peerfd, int outfd, 878 unsigned int size, bool *in_closed_after_out, 879 struct wstate *winfo) 880 { 881 int err; 882 883 if (listen_mode) { 884 err = do_recvfile(peerfd, outfd); 885 if (err) 886 return err; 887 888 err = do_mmap(infd, peerfd, size, winfo); 889 } else { 890 err = do_mmap(infd, peerfd, size, winfo); 891 if (err) 892 return err; 893 894 shut_wr(peerfd); 895 896 err = do_recvfile(peerfd, outfd); 897 *in_closed_after_out = true; 898 } 899 900 return err; 901 } 902 903 static int copyfd_io_sendfile(int infd, int peerfd, int outfd, 904 unsigned int size, bool *in_closed_after_out, struct wstate *winfo) 905 { 906 int err; 907 908 if (listen_mode) { 909 err = do_recvfile(peerfd, outfd); 910 if (err) 911 return err; 912 913 err = do_sendfile(infd, peerfd, size, winfo); 914 } else { 915 err = do_sendfile(infd, peerfd, size, winfo); 916 if (err) 917 return err; 918 919 shut_wr(peerfd); 920 921 err = do_recvfile(peerfd, outfd); 922 *in_closed_after_out = true; 923 } 924 925 return err; 926 } 927 928 static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo) 929 { 930 bool in_closed_after_out = false; 931 struct timespec start, end; 932 int file_size; 933 int ret; 934 935 if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0)) 936 xerror("can not fetch start time %d", errno); 937 938 switch (cfg_mode) { 939 case CFG_MODE_POLL: 940 ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out, 941 winfo); 942 break; 943 944 case CFG_MODE_MMAP: 945 file_size = get_infd_size(infd); 946 if (file_size < 0) 947 return file_size; 948 ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, 949 &in_closed_after_out, winfo); 950 break; 951 952 case CFG_MODE_SENDFILE: 953 file_size = get_infd_size(infd); 954 if (file_size < 0) 955 return file_size; 956 ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, 957 &in_closed_after_out, winfo); 958 break; 959 960 default: 961 fprintf(stderr, "Invalid mode %d\n", cfg_mode); 962 963 die_usage(); 964 return 1; 965 } 966 967 if (ret) 968 return ret; 969 970 if (close_peerfd) 971 close(peerfd); 972 973 if (cfg_time) { 974 unsigned int delta_ms; 975 976 if (clock_gettime(CLOCK_MONOTONIC, &end) < 0) 977 xerror("can not fetch end time %d", errno); 978 delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000; 979 if (delta_ms > cfg_time) { 980 xerror("transfer slower than expected! runtime %d ms, expected %d ms", 981 delta_ms, cfg_time); 982 } 983 984 /* show the runtime only if this end shutdown(wr) before receiving the EOF, 985 * (that is, if this end got the longer runtime) 986 */ 987 if (in_closed_after_out) 988 fprintf(stderr, "%d", delta_ms); 989 } 990 991 return 0; 992 } 993 994 static void check_sockaddr(int pf, struct sockaddr_storage *ss, 995 socklen_t salen) 996 { 997 struct sockaddr_in6 *sin6; 998 struct sockaddr_in *sin; 999 socklen_t wanted_size = 0; 1000 1001 switch (pf) { 1002 case AF_INET: 1003 wanted_size = sizeof(*sin); 1004 sin = (void *)ss; 1005 if (!sin->sin_port) 1006 fprintf(stderr, "accept: something wrong: ip connection from port 0"); 1007 break; 1008 case AF_INET6: 1009 wanted_size = sizeof(*sin6); 1010 sin6 = (void *)ss; 1011 if (!sin6->sin6_port) 1012 fprintf(stderr, "accept: something wrong: ipv6 connection from port 0"); 1013 break; 1014 default: 1015 fprintf(stderr, "accept: Unknown pf %d, salen %u\n", pf, salen); 1016 return; 1017 } 1018 1019 if (salen != wanted_size) 1020 fprintf(stderr, "accept: size mismatch, got %d expected %d\n", 1021 (int)salen, wanted_size); 1022 1023 if (ss->ss_family != pf) 1024 fprintf(stderr, "accept: pf mismatch, expect %d, ss_family is %d\n", 1025 (int)ss->ss_family, pf); 1026 } 1027 1028 static void check_getpeername(int fd, struct sockaddr_storage *ss, socklen_t salen) 1029 { 1030 struct sockaddr_storage peerss; 1031 socklen_t peersalen = sizeof(peerss); 1032 1033 if (getpeername(fd, (struct sockaddr *)&peerss, &peersalen) < 0) { 1034 perror("getpeername"); 1035 return; 1036 } 1037 1038 if (peersalen != salen) { 1039 fprintf(stderr, "%s: %d vs %d\n", __func__, peersalen, salen); 1040 return; 1041 } 1042 1043 if (memcmp(ss, &peerss, peersalen)) { 1044 char a[INET6_ADDRSTRLEN]; 1045 char b[INET6_ADDRSTRLEN]; 1046 char c[INET6_ADDRSTRLEN]; 1047 char d[INET6_ADDRSTRLEN]; 1048 1049 xgetnameinfo((struct sockaddr *)ss, salen, 1050 a, sizeof(a), b, sizeof(b)); 1051 1052 xgetnameinfo((struct sockaddr *)&peerss, peersalen, 1053 c, sizeof(c), d, sizeof(d)); 1054 1055 fprintf(stderr, "%s: memcmp failure: accept %s vs peername %s, %s vs %s salen %d vs %d\n", 1056 __func__, a, c, b, d, peersalen, salen); 1057 } 1058 } 1059 1060 static void check_getpeername_connect(int fd) 1061 { 1062 struct sockaddr_storage ss; 1063 socklen_t salen = sizeof(ss); 1064 char a[INET6_ADDRSTRLEN]; 1065 char b[INET6_ADDRSTRLEN]; 1066 1067 if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) { 1068 perror("getpeername"); 1069 return; 1070 } 1071 1072 xgetnameinfo((struct sockaddr *)&ss, salen, 1073 a, sizeof(a), b, sizeof(b)); 1074 1075 if (strcmp(cfg_host, a) || strcmp(cfg_port, b)) 1076 fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__, 1077 cfg_host, a, cfg_port, b); 1078 } 1079 1080 static void maybe_close(int fd) 1081 { 1082 unsigned int r = rand(); 1083 1084 if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1)) 1085 close(fd); 1086 } 1087 1088 int main_loop_s(int listensock) 1089 { 1090 struct sockaddr_storage ss; 1091 struct wstate winfo; 1092 struct pollfd polls; 1093 socklen_t salen; 1094 int remotesock; 1095 int fd = 0; 1096 1097 again: 1098 polls.fd = listensock; 1099 polls.events = POLLIN; 1100 1101 switch (poll(&polls, 1, poll_timeout)) { 1102 case -1: 1103 perror("poll"); 1104 return 1; 1105 case 0: 1106 fprintf(stderr, "%s: timed out\n", __func__); 1107 close(listensock); 1108 return 2; 1109 } 1110 1111 salen = sizeof(ss); 1112 remotesock = accept(listensock, (struct sockaddr *)&ss, &salen); 1113 if (remotesock >= 0) { 1114 maybe_close(listensock); 1115 check_sockaddr(pf, &ss, salen); 1116 check_getpeername(remotesock, &ss, salen); 1117 1118 if (cfg_input) { 1119 fd = open(cfg_input, O_RDONLY); 1120 if (fd < 0) 1121 xerror("can't open %s: %d", cfg_input, errno); 1122 } 1123 1124 SOCK_TEST_TCPULP(remotesock, 0); 1125 1126 memset(&winfo, 0, sizeof(winfo)); 1127 copyfd_io(fd, remotesock, 1, true, &winfo); 1128 } else { 1129 perror("accept"); 1130 return 1; 1131 } 1132 1133 if (cfg_input) 1134 close(fd); 1135 1136 if (--cfg_repeat > 0) 1137 goto again; 1138 1139 return 0; 1140 } 1141 1142 static void init_rng(void) 1143 { 1144 unsigned int foo; 1145 1146 if (getrandom(&foo, sizeof(foo), 0) == -1) { 1147 perror("getrandom"); 1148 exit(1); 1149 } 1150 1151 srand(foo); 1152 } 1153 1154 static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen) 1155 { 1156 int err; 1157 1158 err = setsockopt(fd, level, optname, optval, optlen); 1159 if (err) { 1160 perror("setsockopt"); 1161 exit(1); 1162 } 1163 } 1164 1165 static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg) 1166 { 1167 static const unsigned int on = 1; 1168 1169 if (cmsg->timestampns) 1170 xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on)); 1171 if (cmsg->tcp_inq) 1172 xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)); 1173 } 1174 1175 static void parse_cmsg_types(const char *type) 1176 { 1177 char *next = strchr(type, ','); 1178 unsigned int len = 0; 1179 1180 cfg_cmsg_types.cmsg_enabled = 1; 1181 1182 if (next) { 1183 parse_cmsg_types(next + 1); 1184 len = next - type; 1185 } else { 1186 len = strlen(type); 1187 } 1188 1189 if (strncmp(type, "TIMESTAMPNS", len) == 0) { 1190 cfg_cmsg_types.timestampns = 1; 1191 return; 1192 } 1193 1194 if (strncmp(type, "TCPINQ", len) == 0) { 1195 cfg_cmsg_types.tcp_inq = 1; 1196 return; 1197 } 1198 1199 fprintf(stderr, "Unrecognized cmsg option %s\n", type); 1200 exit(1); 1201 } 1202 1203 static void parse_setsock_options(const char *name) 1204 { 1205 char *next = strchr(name, ','); 1206 unsigned int len = 0; 1207 1208 if (next) { 1209 parse_setsock_options(next + 1); 1210 len = next - name; 1211 } else { 1212 len = strlen(name); 1213 } 1214 1215 if (strncmp(name, "TRANSPARENT", len) == 0) { 1216 cfg_sockopt_types.transparent = 1; 1217 return; 1218 } 1219 1220 if (strncmp(name, "MPTFO", len) == 0) { 1221 cfg_sockopt_types.mptfo = 1; 1222 return; 1223 } 1224 1225 fprintf(stderr, "Unrecognized setsockopt option %s\n", name); 1226 exit(1); 1227 } 1228 1229 void xdisconnect(int fd) 1230 { 1231 socklen_t addrlen = sizeof(struct sockaddr_storage); 1232 struct sockaddr_storage addr, empty; 1233 int msec_sleep = 10; 1234 void *raw_addr; 1235 int i, cmdlen; 1236 char cmd[128]; 1237 1238 /* get the local address and convert it to string */ 1239 if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0) 1240 xerror("getsockname"); 1241 1242 if (addr.ss_family == AF_INET) 1243 raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr); 1244 else if (addr.ss_family == AF_INET6) 1245 raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr); 1246 else 1247 xerror("bad family"); 1248 1249 strcpy(cmd, "ss -M | grep -q "); 1250 cmdlen = strlen(cmd); 1251 if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen], 1252 sizeof(cmd) - cmdlen)) 1253 xerror("inet_ntop"); 1254 1255 shutdown(fd, SHUT_WR); 1256 1257 /* 1258 * wait until the pending data is completely flushed and all 1259 * the MPTCP sockets reached the closed status. 1260 * disconnect will bypass/ignore/drop any pending data. 1261 */ 1262 for (i = 0; ; i += msec_sleep) { 1263 /* closed socket are not listed by 'ss' */ 1264 if (system(cmd) != 0) 1265 break; 1266 1267 if (i > poll_timeout) 1268 xerror("timeout while waiting for spool to complete"); 1269 usleep(msec_sleep * 1000); 1270 } 1271 1272 memset(&empty, 0, sizeof(empty)); 1273 empty.ss_family = AF_UNSPEC; 1274 if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0) 1275 xerror("can't disconnect: %d", errno); 1276 } 1277 1278 int main_loop(void) 1279 { 1280 int fd = 0, ret, fd_in = 0; 1281 struct addrinfo *peer; 1282 struct wstate winfo; 1283 1284 if (cfg_input && cfg_sockopt_types.mptfo) { 1285 fd_in = open(cfg_input, O_RDONLY); 1286 if (fd_in < 0) 1287 xerror("can't open %s:%d", cfg_input, errno); 1288 } 1289 1290 memset(&winfo, 0, sizeof(winfo)); 1291 fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo); 1292 if (fd < 0) 1293 return 2; 1294 1295 again: 1296 check_getpeername_connect(fd); 1297 1298 SOCK_TEST_TCPULP(fd, cfg_sock_proto); 1299 1300 if (cfg_rcvbuf) 1301 set_rcvbuf(fd, cfg_rcvbuf); 1302 if (cfg_sndbuf) 1303 set_sndbuf(fd, cfg_sndbuf); 1304 if (cfg_cmsg_types.cmsg_enabled) 1305 apply_cmsg_types(fd, &cfg_cmsg_types); 1306 1307 if (cfg_input && !cfg_sockopt_types.mptfo) { 1308 fd_in = open(cfg_input, O_RDONLY); 1309 if (fd_in < 0) 1310 xerror("can't open %s:%d", cfg_input, errno); 1311 } 1312 1313 ret = copyfd_io(fd_in, fd, 1, 0, &winfo); 1314 if (ret) 1315 goto out; 1316 1317 if (cfg_truncate > 0) { 1318 shutdown(fd, SHUT_WR); 1319 } else if (--cfg_repeat > 0) { 1320 xdisconnect(fd); 1321 1322 /* the socket could be unblocking at this point, we need the 1323 * connect to be blocking 1324 */ 1325 set_nonblock(fd, false); 1326 if (connect(fd, peer->ai_addr, peer->ai_addrlen)) 1327 xerror("can't reconnect: %d", errno); 1328 if (cfg_input) 1329 close(fd_in); 1330 memset(&winfo, 0, sizeof(winfo)); 1331 goto again; 1332 } else { 1333 close(fd); 1334 } 1335 1336 out: 1337 if (cfg_input) 1338 close(fd_in); 1339 return ret; 1340 } 1341 1342 int parse_proto(const char *proto) 1343 { 1344 if (!strcasecmp(proto, "MPTCP")) 1345 return IPPROTO_MPTCP; 1346 if (!strcasecmp(proto, "TCP")) 1347 return IPPROTO_TCP; 1348 1349 fprintf(stderr, "Unknown protocol: %s\n.", proto); 1350 die_usage(); 1351 1352 /* silence compiler warning */ 1353 return 0; 1354 } 1355 1356 int parse_mode(const char *mode) 1357 { 1358 if (!strcasecmp(mode, "poll")) 1359 return CFG_MODE_POLL; 1360 if (!strcasecmp(mode, "mmap")) 1361 return CFG_MODE_MMAP; 1362 if (!strcasecmp(mode, "sendfile")) 1363 return CFG_MODE_SENDFILE; 1364 1365 fprintf(stderr, "Unknown test mode: %s\n", mode); 1366 fprintf(stderr, "Supported modes are:\n"); 1367 fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n"); 1368 fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n"); 1369 fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n"); 1370 1371 die_usage(); 1372 1373 /* silence compiler warning */ 1374 return 0; 1375 } 1376 1377 int parse_peek(const char *mode) 1378 { 1379 if (!strcasecmp(mode, "saveWithPeek")) 1380 return CFG_WITH_PEEK; 1381 if (!strcasecmp(mode, "saveAfterPeek")) 1382 return CFG_AFTER_PEEK; 1383 1384 fprintf(stderr, "Unknown: %s\n", mode); 1385 fprintf(stderr, "Supported MSG_PEEK mode are:\n"); 1386 fprintf(stderr, 1387 "\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n"); 1388 fprintf(stderr, 1389 "\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n"); 1390 1391 die_usage(); 1392 1393 /* silence compiler warning */ 1394 return 0; 1395 } 1396 1397 static int parse_int(const char *size) 1398 { 1399 unsigned long s; 1400 1401 errno = 0; 1402 1403 s = strtoul(size, NULL, 0); 1404 1405 if (errno) { 1406 fprintf(stderr, "Invalid sndbuf size %s (%s)\n", 1407 size, strerror(errno)); 1408 die_usage(); 1409 } 1410 1411 if (s > INT_MAX) { 1412 fprintf(stderr, "Invalid sndbuf size %s (%s)\n", 1413 size, strerror(ERANGE)); 1414 die_usage(); 1415 } 1416 1417 return (int)s; 1418 } 1419 1420 static void parse_opts(int argc, char **argv) 1421 { 1422 int c; 1423 1424 while ((c = getopt(argc, argv, "6c:f:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) { 1425 switch (c) { 1426 case 'f': 1427 cfg_truncate = atoi(optarg); 1428 1429 /* when receiving a fastclose, ignore PIPE signals and 1430 * all the I/O errors later in the code 1431 */ 1432 if (cfg_truncate < 0) { 1433 cfg_rcv_trunc = true; 1434 signal(SIGPIPE, handle_signal); 1435 } 1436 break; 1437 case 'j': 1438 cfg_join = true; 1439 cfg_mode = CFG_MODE_POLL; 1440 break; 1441 case 'r': 1442 cfg_remove = true; 1443 cfg_mode = CFG_MODE_POLL; 1444 cfg_wait = 400000; 1445 cfg_do_w = atoi(optarg); 1446 if (cfg_do_w <= 0) 1447 cfg_do_w = 50; 1448 break; 1449 case 'i': 1450 cfg_input = optarg; 1451 break; 1452 case 'I': 1453 cfg_repeat = atoi(optarg); 1454 break; 1455 case 'l': 1456 listen_mode = true; 1457 break; 1458 case 'p': 1459 cfg_port = optarg; 1460 break; 1461 case 's': 1462 cfg_sock_proto = parse_proto(optarg); 1463 break; 1464 case 'h': 1465 die_usage(); 1466 break; 1467 case '6': 1468 pf = AF_INET6; 1469 break; 1470 case 't': 1471 poll_timeout = atoi(optarg) * 1000; 1472 if (poll_timeout <= 0) 1473 poll_timeout = -1; 1474 break; 1475 case 'T': 1476 cfg_time = atoi(optarg); 1477 break; 1478 case 'm': 1479 cfg_mode = parse_mode(optarg); 1480 break; 1481 case 'S': 1482 cfg_sndbuf = parse_int(optarg); 1483 break; 1484 case 'R': 1485 cfg_rcvbuf = parse_int(optarg); 1486 break; 1487 case 'w': 1488 cfg_wait = atoi(optarg)*1000000; 1489 break; 1490 case 'M': 1491 cfg_mark = strtol(optarg, NULL, 0); 1492 break; 1493 case 'P': 1494 cfg_peek = parse_peek(optarg); 1495 break; 1496 case 'c': 1497 parse_cmsg_types(optarg); 1498 break; 1499 case 'o': 1500 parse_setsock_options(optarg); 1501 break; 1502 } 1503 } 1504 1505 if (optind + 1 != argc) 1506 die_usage(); 1507 cfg_host = argv[optind]; 1508 1509 if (strchr(cfg_host, ':')) 1510 pf = AF_INET6; 1511 } 1512 1513 int main(int argc, char *argv[]) 1514 { 1515 init_rng(); 1516 1517 signal(SIGUSR1, handle_signal); 1518 parse_opts(argc, argv); 1519 1520 if (listen_mode) { 1521 int fd = sock_listen_mptcp(cfg_host, cfg_port); 1522 1523 if (fd < 0) 1524 return 1; 1525 1526 if (cfg_rcvbuf) 1527 set_rcvbuf(fd, cfg_rcvbuf); 1528 if (cfg_sndbuf) 1529 set_sndbuf(fd, cfg_sndbuf); 1530 if (cfg_mark) 1531 set_mark(fd, cfg_mark); 1532 if (cfg_cmsg_types.cmsg_enabled) 1533 apply_cmsg_types(fd, &cfg_cmsg_types); 1534 1535 return main_loop_s(fd); 1536 } 1537 1538 return main_loop(); 1539 } 1540