1 // SPDX-License-Identifier: GPL-2.0 2 3 #define _GNU_SOURCE 4 5 #include <errno.h> 6 #include <limits.h> 7 #include <fcntl.h> 8 #include <string.h> 9 #include <stdarg.h> 10 #include <stdbool.h> 11 #include <stdint.h> 12 #include <stdio.h> 13 #include <stdlib.h> 14 #include <strings.h> 15 #include <signal.h> 16 #include <unistd.h> 17 #include <time.h> 18 19 #include <sys/ioctl.h> 20 #include <sys/poll.h> 21 #include <sys/random.h> 22 #include <sys/sendfile.h> 23 #include <sys/stat.h> 24 #include <sys/socket.h> 25 #include <sys/types.h> 26 #include <sys/mman.h> 27 28 #include <arpa/inet.h> 29 30 #include <netdb.h> 31 #include <netinet/in.h> 32 33 #include <linux/tcp.h> 34 #include <linux/time_types.h> 35 #include <linux/sockios.h> 36 37 extern int optind; 38 39 #ifndef IPPROTO_MPTCP 40 #define IPPROTO_MPTCP 262 41 #endif 42 #ifndef TCP_ULP 43 #define TCP_ULP 31 44 #endif 45 46 static int poll_timeout = 10 * 1000; 47 static bool listen_mode; 48 static bool quit; 49 50 enum cfg_mode { 51 CFG_MODE_POLL, 52 CFG_MODE_MMAP, 53 CFG_MODE_SENDFILE, 54 }; 55 56 enum cfg_peek { 57 CFG_NONE_PEEK, 58 CFG_WITH_PEEK, 59 CFG_AFTER_PEEK, 60 }; 61 62 static enum cfg_mode cfg_mode = CFG_MODE_POLL; 63 static enum cfg_peek cfg_peek = CFG_NONE_PEEK; 64 static const char *cfg_host; 65 static const char *cfg_port = "12000"; 66 static int cfg_sock_proto = IPPROTO_MPTCP; 67 static int pf = AF_INET; 68 static int cfg_sndbuf; 69 static int cfg_rcvbuf; 70 static bool cfg_join; 71 static bool cfg_remove; 72 static unsigned int cfg_time; 73 static unsigned int cfg_do_w; 74 static int cfg_wait; 75 static uint32_t cfg_mark; 76 static char *cfg_input; 77 static int cfg_repeat = 1; 78 static int cfg_truncate; 79 static int cfg_rcv_trunc; 80 81 struct cfg_cmsg_types { 82 unsigned int cmsg_enabled:1; 83 unsigned int timestampns:1; 84 unsigned int tcp_inq:1; 85 }; 86 87 struct cfg_sockopt_types { 88 unsigned int transparent:1; 89 unsigned int mptfo:1; 90 }; 91 92 struct tcp_inq_state { 93 unsigned int last; 94 bool expect_eof; 95 }; 96 97 struct wstate { 98 char buf[8192]; 99 unsigned int len; 100 unsigned int off; 101 unsigned int total_len; 102 }; 103 104 static struct tcp_inq_state tcp_inq; 105 106 static struct cfg_cmsg_types cfg_cmsg_types; 107 static struct cfg_sockopt_types cfg_sockopt_types; 108 109 static void die_usage(void) 110 { 111 fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] " 112 "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-r num] [-R num] " 113 "[-s MPTCP|TCP] [-S num] [-t num] [-T num] [-w sec] connect_address\n"); 114 fprintf(stderr, "\t-6 use ipv6\n"); 115 fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n"); 116 fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount " 117 "of bytes. If there are unread bytes in the receive queue, that will cause a MPTCP " 118 "fastclose at close/shutdown. If offset is negative, expect the peer to close before " 119 "all the local data as been sent, thus toleration errors on write and EPIPE signals\n"); 120 fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin"); 121 fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num " 122 "incoming connections, in client mode, disconnect and reconnect to the server\n"); 123 fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down " 124 "-- for MPJ tests\n"); 125 fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n"); 126 fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); 127 fprintf(stderr, "\t-M mark -- set socket packet mark\n"); 128 fprintf(stderr, "\t-o option -- test sockopt <option>\n"); 129 fprintf(stderr, "\t-p num -- use port num\n"); 130 fprintf(stderr, 131 "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n"); 132 fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes " 133 "-- for remove addr tests\n"); 134 fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); 135 fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); 136 fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); 137 fprintf(stderr, "\t-t num -- set poll timeout to num\n"); 138 fprintf(stderr, "\t-T num -- set expected runtime to num ms\n"); 139 fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); 140 exit(1); 141 } 142 143 static void xerror(const char *fmt, ...) 144 { 145 va_list ap; 146 147 va_start(ap, fmt); 148 vfprintf(stderr, fmt, ap); 149 va_end(ap); 150 exit(1); 151 } 152 153 static void handle_signal(int nr) 154 { 155 quit = true; 156 } 157 158 static const char *getxinfo_strerr(int err) 159 { 160 if (err == EAI_SYSTEM) 161 return strerror(errno); 162 163 return gai_strerror(err); 164 } 165 166 static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen, 167 char *host, socklen_t hostlen, 168 char *serv, socklen_t servlen) 169 { 170 int flags = NI_NUMERICHOST | NI_NUMERICSERV; 171 int err = getnameinfo(addr, addrlen, host, hostlen, serv, servlen, 172 flags); 173 174 if (err) { 175 const char *errstr = getxinfo_strerr(err); 176 177 fprintf(stderr, "Fatal: getnameinfo: %s\n", errstr); 178 exit(1); 179 } 180 } 181 182 static void xgetaddrinfo(const char *node, const char *service, 183 const struct addrinfo *hints, 184 struct addrinfo **res) 185 { 186 int err = getaddrinfo(node, service, hints, res); 187 188 if (err) { 189 const char *errstr = getxinfo_strerr(err); 190 191 fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", 192 node ? node : "", service ? service : "", errstr); 193 exit(1); 194 } 195 } 196 197 static void set_rcvbuf(int fd, unsigned int size) 198 { 199 int err; 200 201 err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)); 202 if (err) { 203 perror("set SO_RCVBUF"); 204 exit(1); 205 } 206 } 207 208 static void set_sndbuf(int fd, unsigned int size) 209 { 210 int err; 211 212 err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)); 213 if (err) { 214 perror("set SO_SNDBUF"); 215 exit(1); 216 } 217 } 218 219 static void set_mark(int fd, uint32_t mark) 220 { 221 int err; 222 223 err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); 224 if (err) { 225 perror("set SO_MARK"); 226 exit(1); 227 } 228 } 229 230 static void set_transparent(int fd, int pf) 231 { 232 int one = 1; 233 234 switch (pf) { 235 case AF_INET: 236 if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one))) 237 perror("IP_TRANSPARENT"); 238 break; 239 case AF_INET6: 240 if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one))) 241 perror("IPV6_TRANSPARENT"); 242 break; 243 } 244 } 245 246 static void set_mptfo(int fd, int pf) 247 { 248 int qlen = 25; 249 250 if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1) 251 perror("TCP_FASTOPEN"); 252 } 253 254 static int do_ulp_so(int sock, const char *name) 255 { 256 return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name)); 257 } 258 259 #define X(m) xerror("%s:%u: %s: failed for proto %d at line %u", __FILE__, __LINE__, (m), proto, line) 260 static void sock_test_tcpulp(int sock, int proto, unsigned int line) 261 { 262 socklen_t buflen = 8; 263 char buf[8] = ""; 264 int ret = getsockopt(sock, IPPROTO_TCP, TCP_ULP, buf, &buflen); 265 266 if (ret != 0) 267 X("getsockopt"); 268 269 if (buflen > 0) { 270 if (strcmp(buf, "mptcp") != 0) 271 xerror("unexpected ULP '%s' for proto %d at line %u", buf, proto, line); 272 ret = do_ulp_so(sock, "tls"); 273 if (ret == 0) 274 X("setsockopt"); 275 } else if (proto == IPPROTO_MPTCP) { 276 ret = do_ulp_so(sock, "tls"); 277 if (ret != -1) 278 X("setsockopt"); 279 } 280 281 ret = do_ulp_so(sock, "mptcp"); 282 if (ret != -1) 283 X("setsockopt"); 284 285 #undef X 286 } 287 288 #define SOCK_TEST_TCPULP(s, p) sock_test_tcpulp((s), (p), __LINE__) 289 290 static int sock_listen_mptcp(const char * const listenaddr, 291 const char * const port) 292 { 293 int sock = -1; 294 struct addrinfo hints = { 295 .ai_protocol = IPPROTO_TCP, 296 .ai_socktype = SOCK_STREAM, 297 .ai_flags = AI_PASSIVE | AI_NUMERICHOST 298 }; 299 300 hints.ai_family = pf; 301 302 struct addrinfo *a, *addr; 303 int one = 1; 304 305 xgetaddrinfo(listenaddr, port, &hints, &addr); 306 hints.ai_family = pf; 307 308 for (a = addr; a; a = a->ai_next) { 309 sock = socket(a->ai_family, a->ai_socktype, cfg_sock_proto); 310 if (sock < 0) 311 continue; 312 313 SOCK_TEST_TCPULP(sock, cfg_sock_proto); 314 315 if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, 316 sizeof(one))) 317 perror("setsockopt"); 318 319 if (cfg_sockopt_types.transparent) 320 set_transparent(sock, pf); 321 322 if (cfg_sockopt_types.mptfo) 323 set_mptfo(sock, pf); 324 325 if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) 326 break; /* success */ 327 328 perror("bind"); 329 close(sock); 330 sock = -1; 331 } 332 333 freeaddrinfo(addr); 334 335 if (sock < 0) { 336 fprintf(stderr, "Could not create listen socket\n"); 337 return sock; 338 } 339 340 SOCK_TEST_TCPULP(sock, cfg_sock_proto); 341 342 if (listen(sock, 20)) { 343 perror("listen"); 344 close(sock); 345 return -1; 346 } 347 348 SOCK_TEST_TCPULP(sock, cfg_sock_proto); 349 350 return sock; 351 } 352 353 static int sock_connect_mptcp(const char * const remoteaddr, 354 const char * const port, int proto, 355 struct addrinfo **peer, 356 int infd, struct wstate *winfo) 357 { 358 struct addrinfo hints = { 359 .ai_protocol = IPPROTO_TCP, 360 .ai_socktype = SOCK_STREAM, 361 }; 362 struct addrinfo *a, *addr; 363 int syn_copied = 0; 364 int sock = -1; 365 366 hints.ai_family = pf; 367 368 xgetaddrinfo(remoteaddr, port, &hints, &addr); 369 for (a = addr; a; a = a->ai_next) { 370 sock = socket(a->ai_family, a->ai_socktype, proto); 371 if (sock < 0) { 372 perror("socket"); 373 continue; 374 } 375 376 SOCK_TEST_TCPULP(sock, proto); 377 378 if (cfg_mark) 379 set_mark(sock, cfg_mark); 380 381 if (cfg_sockopt_types.mptfo) { 382 if (!winfo->total_len) 383 winfo->total_len = winfo->len = read(infd, winfo->buf, 384 sizeof(winfo->buf)); 385 386 syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN, 387 a->ai_addr, a->ai_addrlen); 388 if (syn_copied >= 0) { 389 winfo->off = syn_copied; 390 winfo->len -= syn_copied; 391 *peer = a; 392 break; /* success */ 393 } 394 } else { 395 if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) { 396 *peer = a; 397 break; /* success */ 398 } 399 } 400 if (cfg_sockopt_types.mptfo) { 401 perror("sendto()"); 402 close(sock); 403 sock = -1; 404 } else { 405 perror("connect()"); 406 close(sock); 407 sock = -1; 408 } 409 } 410 411 freeaddrinfo(addr); 412 if (sock != -1) 413 SOCK_TEST_TCPULP(sock, proto); 414 return sock; 415 } 416 417 static size_t do_rnd_write(const int fd, char *buf, const size_t len) 418 { 419 static bool first = true; 420 unsigned int do_w; 421 ssize_t bw; 422 423 do_w = rand() & 0xffff; 424 if (do_w == 0 || do_w > len) 425 do_w = len; 426 427 if (cfg_join && first && do_w > 100) 428 do_w = 100; 429 430 if (cfg_remove && do_w > cfg_do_w) 431 do_w = cfg_do_w; 432 433 bw = write(fd, buf, do_w); 434 if (bw < 0) 435 return bw; 436 437 /* let the join handshake complete, before going on */ 438 if (cfg_join && first) { 439 usleep(200000); 440 first = false; 441 } 442 443 if (cfg_remove) 444 usleep(200000); 445 446 return bw; 447 } 448 449 static size_t do_write(const int fd, char *buf, const size_t len) 450 { 451 size_t offset = 0; 452 453 while (offset < len) { 454 size_t written; 455 ssize_t bw; 456 457 bw = write(fd, buf + offset, len - offset); 458 if (bw < 0) { 459 perror("write"); 460 return 0; 461 } 462 463 written = (size_t)bw; 464 offset += written; 465 } 466 467 return offset; 468 } 469 470 static void process_cmsg(struct msghdr *msgh) 471 { 472 struct __kernel_timespec ts; 473 bool inq_found = false; 474 bool ts_found = false; 475 unsigned int inq = 0; 476 struct cmsghdr *cmsg; 477 478 for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) { 479 if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) { 480 memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts)); 481 ts_found = true; 482 continue; 483 } 484 if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) { 485 memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq)); 486 inq_found = true; 487 continue; 488 } 489 490 } 491 492 if (cfg_cmsg_types.timestampns) { 493 if (!ts_found) 494 xerror("TIMESTAMPNS not present\n"); 495 } 496 497 if (cfg_cmsg_types.tcp_inq) { 498 if (!inq_found) 499 xerror("TCP_INQ not present\n"); 500 501 if (inq > 1024) 502 xerror("tcp_inq %u is larger than one kbyte\n", inq); 503 tcp_inq.last = inq; 504 } 505 } 506 507 static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len) 508 { 509 char msg_buf[8192]; 510 struct iovec iov = { 511 .iov_base = buf, 512 .iov_len = len, 513 }; 514 struct msghdr msg = { 515 .msg_iov = &iov, 516 .msg_iovlen = 1, 517 .msg_control = msg_buf, 518 .msg_controllen = sizeof(msg_buf), 519 }; 520 int flags = 0; 521 unsigned int last_hint = tcp_inq.last; 522 int ret = recvmsg(fd, &msg, flags); 523 524 if (ret <= 0) { 525 if (ret == 0 && tcp_inq.expect_eof) 526 return ret; 527 528 if (ret == 0 && cfg_cmsg_types.tcp_inq) 529 if (last_hint != 1 && last_hint != 0) 530 xerror("EOF but last tcp_inq hint was %u\n", last_hint); 531 532 return ret; 533 } 534 535 if (tcp_inq.expect_eof) 536 xerror("expected EOF, last_hint %u, now %u\n", 537 last_hint, tcp_inq.last); 538 539 if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled) 540 xerror("got %lu bytes of cmsg data, expected 0\n", 541 (unsigned long)msg.msg_controllen); 542 543 if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled) 544 xerror("%s\n", "got no cmsg data"); 545 546 if (msg.msg_controllen) 547 process_cmsg(&msg); 548 549 if (cfg_cmsg_types.tcp_inq) { 550 if ((size_t)ret < len && last_hint > (unsigned int)ret) { 551 if (ret + 1 != (int)last_hint) { 552 int next = read(fd, msg_buf, sizeof(msg_buf)); 553 554 xerror("read %u of %u, last_hint was %u tcp_inq hint now %u next_read returned %d/%m\n", 555 ret, (unsigned int)len, last_hint, tcp_inq.last, next); 556 } else { 557 tcp_inq.expect_eof = true; 558 } 559 } 560 } 561 562 return ret; 563 } 564 565 static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) 566 { 567 int ret = 0; 568 char tmp[16384]; 569 size_t cap = rand(); 570 571 cap &= 0xffff; 572 573 if (cap == 0) 574 cap = 1; 575 else if (cap > len) 576 cap = len; 577 578 if (cfg_peek == CFG_WITH_PEEK) { 579 ret = recv(fd, buf, cap, MSG_PEEK); 580 ret = (ret < 0) ? ret : read(fd, tmp, ret); 581 } else if (cfg_peek == CFG_AFTER_PEEK) { 582 ret = recv(fd, buf, cap, MSG_PEEK); 583 ret = (ret < 0) ? ret : read(fd, buf, cap); 584 } else if (cfg_cmsg_types.cmsg_enabled) { 585 ret = do_recvmsg_cmsg(fd, buf, cap); 586 } else { 587 ret = read(fd, buf, cap); 588 } 589 590 return ret; 591 } 592 593 static void set_nonblock(int fd, bool nonblock) 594 { 595 int flags = fcntl(fd, F_GETFL); 596 597 if (flags == -1) 598 return; 599 600 if (nonblock) 601 fcntl(fd, F_SETFL, flags | O_NONBLOCK); 602 else 603 fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); 604 } 605 606 static void shut_wr(int fd) 607 { 608 /* Close our write side, ev. give some time 609 * for address notification and/or checking 610 * the current status 611 */ 612 if (cfg_wait) 613 usleep(cfg_wait); 614 615 shutdown(fd, SHUT_WR); 616 } 617 618 static int copyfd_io_poll(int infd, int peerfd, int outfd, 619 bool *in_closed_after_out, struct wstate *winfo) 620 { 621 struct pollfd fds = { 622 .fd = peerfd, 623 .events = POLLIN | POLLOUT, 624 }; 625 unsigned int total_wlen = 0, total_rlen = 0; 626 627 set_nonblock(peerfd, true); 628 629 for (;;) { 630 char rbuf[8192]; 631 ssize_t len; 632 633 if (fds.events == 0 || quit) 634 break; 635 636 switch (poll(&fds, 1, poll_timeout)) { 637 case -1: 638 if (errno == EINTR) 639 continue; 640 perror("poll"); 641 return 1; 642 case 0: 643 fprintf(stderr, "%s: poll timed out (events: " 644 "POLLIN %u, POLLOUT %u)\n", __func__, 645 fds.events & POLLIN, fds.events & POLLOUT); 646 return 2; 647 } 648 649 if (fds.revents & POLLIN) { 650 ssize_t rb = sizeof(rbuf); 651 652 /* limit the total amount of read data to the trunc value*/ 653 if (cfg_truncate > 0) { 654 if (rb + total_rlen > cfg_truncate) 655 rb = cfg_truncate - total_rlen; 656 len = read(peerfd, rbuf, rb); 657 } else { 658 len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); 659 } 660 if (len == 0) { 661 /* no more data to receive: 662 * peer has closed its write side 663 */ 664 fds.events &= ~POLLIN; 665 666 if ((fds.events & POLLOUT) == 0) { 667 *in_closed_after_out = true; 668 /* and nothing more to send */ 669 break; 670 } 671 672 /* Else, still have data to transmit */ 673 } else if (len < 0) { 674 if (cfg_rcv_trunc) 675 return 0; 676 perror("read"); 677 return 3; 678 } 679 680 total_rlen += len; 681 do_write(outfd, rbuf, len); 682 } 683 684 if (fds.revents & POLLOUT) { 685 if (winfo->len == 0) { 686 winfo->off = 0; 687 winfo->len = read(infd, winfo->buf, sizeof(winfo->buf)); 688 } 689 690 if (winfo->len > 0) { 691 ssize_t bw; 692 693 /* limit the total amount of written data to the trunc value */ 694 if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate) 695 winfo->len = cfg_truncate - total_wlen; 696 697 bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); 698 if (bw < 0) { 699 if (cfg_rcv_trunc) 700 return 0; 701 perror("write"); 702 return 111; 703 } 704 705 winfo->off += bw; 706 winfo->len -= bw; 707 total_wlen += bw; 708 } else if (winfo->len == 0) { 709 /* We have no more data to send. */ 710 fds.events &= ~POLLOUT; 711 712 if ((fds.events & POLLIN) == 0) 713 /* ... and peer also closed already */ 714 break; 715 716 shut_wr(peerfd); 717 } else { 718 if (errno == EINTR) 719 continue; 720 perror("read"); 721 return 4; 722 } 723 } 724 725 if (fds.revents & (POLLERR | POLLNVAL)) { 726 if (cfg_rcv_trunc) 727 return 0; 728 fprintf(stderr, "Unexpected revents: " 729 "POLLERR/POLLNVAL(%x)\n", fds.revents); 730 return 5; 731 } 732 733 if (cfg_truncate > 0 && total_wlen >= cfg_truncate && 734 total_rlen >= cfg_truncate) 735 break; 736 } 737 738 /* leave some time for late join/announce */ 739 if (cfg_remove && !quit) 740 usleep(cfg_wait); 741 742 return 0; 743 } 744 745 static int do_recvfile(int infd, int outfd) 746 { 747 ssize_t r; 748 749 do { 750 char buf[16384]; 751 752 r = do_rnd_read(infd, buf, sizeof(buf)); 753 if (r > 0) { 754 if (write(outfd, buf, r) != r) 755 break; 756 } else if (r < 0) { 757 perror("read"); 758 } 759 } while (r > 0); 760 761 return (int)r; 762 } 763 764 static int spool_buf(int fd, struct wstate *winfo) 765 { 766 while (winfo->len) { 767 int ret = write(fd, winfo->buf + winfo->off, winfo->len); 768 769 if (ret < 0) { 770 perror("write"); 771 return 4; 772 } 773 winfo->off += ret; 774 winfo->len -= ret; 775 } 776 return 0; 777 } 778 779 static int do_mmap(int infd, int outfd, unsigned int size, 780 struct wstate *winfo) 781 { 782 char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); 783 ssize_t ret = 0, off = winfo->total_len; 784 size_t rem; 785 786 if (inbuf == MAP_FAILED) { 787 perror("mmap"); 788 return 1; 789 } 790 791 ret = spool_buf(outfd, winfo); 792 if (ret < 0) 793 return ret; 794 795 rem = size - winfo->total_len; 796 797 while (rem > 0) { 798 ret = write(outfd, inbuf + off, rem); 799 800 if (ret < 0) { 801 perror("write"); 802 break; 803 } 804 805 off += ret; 806 rem -= ret; 807 } 808 809 munmap(inbuf, size); 810 return rem; 811 } 812 813 static int get_infd_size(int fd) 814 { 815 struct stat sb; 816 ssize_t count; 817 int err; 818 819 err = fstat(fd, &sb); 820 if (err < 0) { 821 perror("fstat"); 822 return -1; 823 } 824 825 if ((sb.st_mode & S_IFMT) != S_IFREG) { 826 fprintf(stderr, "%s: stdin is not a regular file\n", __func__); 827 return -2; 828 } 829 830 count = sb.st_size; 831 if (count > INT_MAX) { 832 fprintf(stderr, "File too large: %zu\n", count); 833 return -3; 834 } 835 836 return (int)count; 837 } 838 839 static int do_sendfile(int infd, int outfd, unsigned int count, 840 struct wstate *winfo) 841 { 842 int ret = spool_buf(outfd, winfo); 843 844 if (ret < 0) 845 return ret; 846 847 count -= winfo->total_len; 848 849 while (count > 0) { 850 ssize_t r; 851 852 r = sendfile(outfd, infd, NULL, count); 853 if (r < 0) { 854 perror("sendfile"); 855 return 3; 856 } 857 858 count -= r; 859 } 860 861 return 0; 862 } 863 864 static int copyfd_io_mmap(int infd, int peerfd, int outfd, 865 unsigned int size, bool *in_closed_after_out, 866 struct wstate *winfo) 867 { 868 int err; 869 870 if (listen_mode) { 871 err = do_recvfile(peerfd, outfd); 872 if (err) 873 return err; 874 875 err = do_mmap(infd, peerfd, size, winfo); 876 } else { 877 err = do_mmap(infd, peerfd, size, winfo); 878 if (err) 879 return err; 880 881 shut_wr(peerfd); 882 883 err = do_recvfile(peerfd, outfd); 884 *in_closed_after_out = true; 885 } 886 887 return err; 888 } 889 890 static int copyfd_io_sendfile(int infd, int peerfd, int outfd, 891 unsigned int size, bool *in_closed_after_out, struct wstate *winfo) 892 { 893 int err; 894 895 if (listen_mode) { 896 err = do_recvfile(peerfd, outfd); 897 if (err) 898 return err; 899 900 err = do_sendfile(infd, peerfd, size, winfo); 901 } else { 902 err = do_sendfile(infd, peerfd, size, winfo); 903 if (err) 904 return err; 905 906 shut_wr(peerfd); 907 908 err = do_recvfile(peerfd, outfd); 909 *in_closed_after_out = true; 910 } 911 912 return err; 913 } 914 915 static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo) 916 { 917 bool in_closed_after_out = false; 918 struct timespec start, end; 919 int file_size; 920 int ret; 921 922 if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0)) 923 xerror("can not fetch start time %d", errno); 924 925 switch (cfg_mode) { 926 case CFG_MODE_POLL: 927 ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out, 928 winfo); 929 break; 930 931 case CFG_MODE_MMAP: 932 file_size = get_infd_size(infd); 933 if (file_size < 0) 934 return file_size; 935 ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, 936 &in_closed_after_out, winfo); 937 break; 938 939 case CFG_MODE_SENDFILE: 940 file_size = get_infd_size(infd); 941 if (file_size < 0) 942 return file_size; 943 ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, 944 &in_closed_after_out, winfo); 945 break; 946 947 default: 948 fprintf(stderr, "Invalid mode %d\n", cfg_mode); 949 950 die_usage(); 951 return 1; 952 } 953 954 if (ret) 955 return ret; 956 957 if (close_peerfd) 958 close(peerfd); 959 960 if (cfg_time) { 961 unsigned int delta_ms; 962 963 if (clock_gettime(CLOCK_MONOTONIC, &end) < 0) 964 xerror("can not fetch end time %d", errno); 965 delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000; 966 if (delta_ms > cfg_time) { 967 xerror("transfer slower than expected! runtime %d ms, expected %d ms", 968 delta_ms, cfg_time); 969 } 970 971 /* show the runtime only if this end shutdown(wr) before receiving the EOF, 972 * (that is, if this end got the longer runtime) 973 */ 974 if (in_closed_after_out) 975 fprintf(stderr, "%d", delta_ms); 976 } 977 978 return 0; 979 } 980 981 static void check_sockaddr(int pf, struct sockaddr_storage *ss, 982 socklen_t salen) 983 { 984 struct sockaddr_in6 *sin6; 985 struct sockaddr_in *sin; 986 socklen_t wanted_size = 0; 987 988 switch (pf) { 989 case AF_INET: 990 wanted_size = sizeof(*sin); 991 sin = (void *)ss; 992 if (!sin->sin_port) 993 fprintf(stderr, "accept: something wrong: ip connection from port 0"); 994 break; 995 case AF_INET6: 996 wanted_size = sizeof(*sin6); 997 sin6 = (void *)ss; 998 if (!sin6->sin6_port) 999 fprintf(stderr, "accept: something wrong: ipv6 connection from port 0"); 1000 break; 1001 default: 1002 fprintf(stderr, "accept: Unknown pf %d, salen %u\n", pf, salen); 1003 return; 1004 } 1005 1006 if (salen != wanted_size) 1007 fprintf(stderr, "accept: size mismatch, got %d expected %d\n", 1008 (int)salen, wanted_size); 1009 1010 if (ss->ss_family != pf) 1011 fprintf(stderr, "accept: pf mismatch, expect %d, ss_family is %d\n", 1012 (int)ss->ss_family, pf); 1013 } 1014 1015 static void check_getpeername(int fd, struct sockaddr_storage *ss, socklen_t salen) 1016 { 1017 struct sockaddr_storage peerss; 1018 socklen_t peersalen = sizeof(peerss); 1019 1020 if (getpeername(fd, (struct sockaddr *)&peerss, &peersalen) < 0) { 1021 perror("getpeername"); 1022 return; 1023 } 1024 1025 if (peersalen != salen) { 1026 fprintf(stderr, "%s: %d vs %d\n", __func__, peersalen, salen); 1027 return; 1028 } 1029 1030 if (memcmp(ss, &peerss, peersalen)) { 1031 char a[INET6_ADDRSTRLEN]; 1032 char b[INET6_ADDRSTRLEN]; 1033 char c[INET6_ADDRSTRLEN]; 1034 char d[INET6_ADDRSTRLEN]; 1035 1036 xgetnameinfo((struct sockaddr *)ss, salen, 1037 a, sizeof(a), b, sizeof(b)); 1038 1039 xgetnameinfo((struct sockaddr *)&peerss, peersalen, 1040 c, sizeof(c), d, sizeof(d)); 1041 1042 fprintf(stderr, "%s: memcmp failure: accept %s vs peername %s, %s vs %s salen %d vs %d\n", 1043 __func__, a, c, b, d, peersalen, salen); 1044 } 1045 } 1046 1047 static void check_getpeername_connect(int fd) 1048 { 1049 struct sockaddr_storage ss; 1050 socklen_t salen = sizeof(ss); 1051 char a[INET6_ADDRSTRLEN]; 1052 char b[INET6_ADDRSTRLEN]; 1053 1054 if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) { 1055 perror("getpeername"); 1056 return; 1057 } 1058 1059 xgetnameinfo((struct sockaddr *)&ss, salen, 1060 a, sizeof(a), b, sizeof(b)); 1061 1062 if (strcmp(cfg_host, a) || strcmp(cfg_port, b)) 1063 fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__, 1064 cfg_host, a, cfg_port, b); 1065 } 1066 1067 static void maybe_close(int fd) 1068 { 1069 unsigned int r = rand(); 1070 1071 if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1)) 1072 close(fd); 1073 } 1074 1075 int main_loop_s(int listensock) 1076 { 1077 struct sockaddr_storage ss; 1078 struct wstate winfo; 1079 struct pollfd polls; 1080 socklen_t salen; 1081 int remotesock; 1082 int fd = 0; 1083 1084 again: 1085 polls.fd = listensock; 1086 polls.events = POLLIN; 1087 1088 switch (poll(&polls, 1, poll_timeout)) { 1089 case -1: 1090 perror("poll"); 1091 return 1; 1092 case 0: 1093 fprintf(stderr, "%s: timed out\n", __func__); 1094 close(listensock); 1095 return 2; 1096 } 1097 1098 salen = sizeof(ss); 1099 remotesock = accept(listensock, (struct sockaddr *)&ss, &salen); 1100 if (remotesock >= 0) { 1101 maybe_close(listensock); 1102 check_sockaddr(pf, &ss, salen); 1103 check_getpeername(remotesock, &ss, salen); 1104 1105 if (cfg_input) { 1106 fd = open(cfg_input, O_RDONLY); 1107 if (fd < 0) 1108 xerror("can't open %s: %d", cfg_input, errno); 1109 } 1110 1111 SOCK_TEST_TCPULP(remotesock, 0); 1112 1113 memset(&winfo, 0, sizeof(winfo)); 1114 copyfd_io(fd, remotesock, 1, true, &winfo); 1115 } else { 1116 perror("accept"); 1117 return 1; 1118 } 1119 1120 if (cfg_input) 1121 close(fd); 1122 1123 if (--cfg_repeat > 0) 1124 goto again; 1125 1126 return 0; 1127 } 1128 1129 static void init_rng(void) 1130 { 1131 unsigned int foo; 1132 1133 if (getrandom(&foo, sizeof(foo), 0) == -1) { 1134 perror("getrandom"); 1135 exit(1); 1136 } 1137 1138 srand(foo); 1139 } 1140 1141 static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen) 1142 { 1143 int err; 1144 1145 err = setsockopt(fd, level, optname, optval, optlen); 1146 if (err) { 1147 perror("setsockopt"); 1148 exit(1); 1149 } 1150 } 1151 1152 static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg) 1153 { 1154 static const unsigned int on = 1; 1155 1156 if (cmsg->timestampns) 1157 xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on)); 1158 if (cmsg->tcp_inq) 1159 xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)); 1160 } 1161 1162 static void parse_cmsg_types(const char *type) 1163 { 1164 char *next = strchr(type, ','); 1165 unsigned int len = 0; 1166 1167 cfg_cmsg_types.cmsg_enabled = 1; 1168 1169 if (next) { 1170 parse_cmsg_types(next + 1); 1171 len = next - type; 1172 } else { 1173 len = strlen(type); 1174 } 1175 1176 if (strncmp(type, "TIMESTAMPNS", len) == 0) { 1177 cfg_cmsg_types.timestampns = 1; 1178 return; 1179 } 1180 1181 if (strncmp(type, "TCPINQ", len) == 0) { 1182 cfg_cmsg_types.tcp_inq = 1; 1183 return; 1184 } 1185 1186 fprintf(stderr, "Unrecognized cmsg option %s\n", type); 1187 exit(1); 1188 } 1189 1190 static void parse_setsock_options(const char *name) 1191 { 1192 char *next = strchr(name, ','); 1193 unsigned int len = 0; 1194 1195 if (next) { 1196 parse_setsock_options(next + 1); 1197 len = next - name; 1198 } else { 1199 len = strlen(name); 1200 } 1201 1202 if (strncmp(name, "TRANSPARENT", len) == 0) { 1203 cfg_sockopt_types.transparent = 1; 1204 return; 1205 } 1206 1207 if (strncmp(name, "MPTFO", len) == 0) { 1208 cfg_sockopt_types.mptfo = 1; 1209 return; 1210 } 1211 1212 fprintf(stderr, "Unrecognized setsockopt option %s\n", name); 1213 exit(1); 1214 } 1215 1216 void xdisconnect(int fd) 1217 { 1218 socklen_t addrlen = sizeof(struct sockaddr_storage); 1219 struct sockaddr_storage addr, empty; 1220 int msec_sleep = 10; 1221 void *raw_addr; 1222 int i, cmdlen; 1223 char cmd[128]; 1224 1225 /* get the local address and convert it to string */ 1226 if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0) 1227 xerror("getsockname"); 1228 1229 if (addr.ss_family == AF_INET) 1230 raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr); 1231 else if (addr.ss_family == AF_INET6) 1232 raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr); 1233 else 1234 xerror("bad family"); 1235 1236 strcpy(cmd, "ss -M | grep -q "); 1237 cmdlen = strlen(cmd); 1238 if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen], 1239 sizeof(cmd) - cmdlen)) 1240 xerror("inet_ntop"); 1241 1242 shutdown(fd, SHUT_WR); 1243 1244 /* 1245 * wait until the pending data is completely flushed and all 1246 * the MPTCP sockets reached the closed status. 1247 * disconnect will bypass/ignore/drop any pending data. 1248 */ 1249 for (i = 0; ; i += msec_sleep) { 1250 /* closed socket are not listed by 'ss' */ 1251 if (system(cmd) != 0) 1252 break; 1253 1254 if (i > poll_timeout) 1255 xerror("timeout while waiting for spool to complete"); 1256 usleep(msec_sleep * 1000); 1257 } 1258 1259 memset(&empty, 0, sizeof(empty)); 1260 empty.ss_family = AF_UNSPEC; 1261 if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0) 1262 xerror("can't disconnect: %d", errno); 1263 } 1264 1265 int main_loop(void) 1266 { 1267 int fd = 0, ret, fd_in = 0; 1268 struct addrinfo *peer; 1269 struct wstate winfo; 1270 1271 if (cfg_input && cfg_sockopt_types.mptfo) { 1272 fd_in = open(cfg_input, O_RDONLY); 1273 if (fd < 0) 1274 xerror("can't open %s:%d", cfg_input, errno); 1275 } 1276 1277 memset(&winfo, 0, sizeof(winfo)); 1278 fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo); 1279 if (fd < 0) 1280 return 2; 1281 1282 again: 1283 check_getpeername_connect(fd); 1284 1285 SOCK_TEST_TCPULP(fd, cfg_sock_proto); 1286 1287 if (cfg_rcvbuf) 1288 set_rcvbuf(fd, cfg_rcvbuf); 1289 if (cfg_sndbuf) 1290 set_sndbuf(fd, cfg_sndbuf); 1291 if (cfg_cmsg_types.cmsg_enabled) 1292 apply_cmsg_types(fd, &cfg_cmsg_types); 1293 1294 if (cfg_input && !cfg_sockopt_types.mptfo) { 1295 fd_in = open(cfg_input, O_RDONLY); 1296 if (fd < 0) 1297 xerror("can't open %s:%d", cfg_input, errno); 1298 } 1299 1300 ret = copyfd_io(fd_in, fd, 1, 0, &winfo); 1301 if (ret) 1302 return ret; 1303 1304 if (cfg_truncate > 0) { 1305 xdisconnect(fd); 1306 } else if (--cfg_repeat > 0) { 1307 xdisconnect(fd); 1308 1309 /* the socket could be unblocking at this point, we need the 1310 * connect to be blocking 1311 */ 1312 set_nonblock(fd, false); 1313 if (connect(fd, peer->ai_addr, peer->ai_addrlen)) 1314 xerror("can't reconnect: %d", errno); 1315 if (cfg_input) 1316 close(fd_in); 1317 memset(&winfo, 0, sizeof(winfo)); 1318 goto again; 1319 } else { 1320 close(fd); 1321 } 1322 1323 return 0; 1324 } 1325 1326 int parse_proto(const char *proto) 1327 { 1328 if (!strcasecmp(proto, "MPTCP")) 1329 return IPPROTO_MPTCP; 1330 if (!strcasecmp(proto, "TCP")) 1331 return IPPROTO_TCP; 1332 1333 fprintf(stderr, "Unknown protocol: %s\n.", proto); 1334 die_usage(); 1335 1336 /* silence compiler warning */ 1337 return 0; 1338 } 1339 1340 int parse_mode(const char *mode) 1341 { 1342 if (!strcasecmp(mode, "poll")) 1343 return CFG_MODE_POLL; 1344 if (!strcasecmp(mode, "mmap")) 1345 return CFG_MODE_MMAP; 1346 if (!strcasecmp(mode, "sendfile")) 1347 return CFG_MODE_SENDFILE; 1348 1349 fprintf(stderr, "Unknown test mode: %s\n", mode); 1350 fprintf(stderr, "Supported modes are:\n"); 1351 fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n"); 1352 fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n"); 1353 fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n"); 1354 1355 die_usage(); 1356 1357 /* silence compiler warning */ 1358 return 0; 1359 } 1360 1361 int parse_peek(const char *mode) 1362 { 1363 if (!strcasecmp(mode, "saveWithPeek")) 1364 return CFG_WITH_PEEK; 1365 if (!strcasecmp(mode, "saveAfterPeek")) 1366 return CFG_AFTER_PEEK; 1367 1368 fprintf(stderr, "Unknown: %s\n", mode); 1369 fprintf(stderr, "Supported MSG_PEEK mode are:\n"); 1370 fprintf(stderr, 1371 "\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n"); 1372 fprintf(stderr, 1373 "\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n"); 1374 1375 die_usage(); 1376 1377 /* silence compiler warning */ 1378 return 0; 1379 } 1380 1381 static int parse_int(const char *size) 1382 { 1383 unsigned long s; 1384 1385 errno = 0; 1386 1387 s = strtoul(size, NULL, 0); 1388 1389 if (errno) { 1390 fprintf(stderr, "Invalid sndbuf size %s (%s)\n", 1391 size, strerror(errno)); 1392 die_usage(); 1393 } 1394 1395 if (s > INT_MAX) { 1396 fprintf(stderr, "Invalid sndbuf size %s (%s)\n", 1397 size, strerror(ERANGE)); 1398 die_usage(); 1399 } 1400 1401 return (int)s; 1402 } 1403 1404 static void parse_opts(int argc, char **argv) 1405 { 1406 int c; 1407 1408 while ((c = getopt(argc, argv, "6c:f:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) { 1409 switch (c) { 1410 case 'f': 1411 cfg_truncate = atoi(optarg); 1412 1413 /* when receiving a fastclose, ignore PIPE signals and 1414 * all the I/O errors later in the code 1415 */ 1416 if (cfg_truncate < 0) { 1417 cfg_rcv_trunc = true; 1418 signal(SIGPIPE, handle_signal); 1419 } 1420 break; 1421 case 'j': 1422 cfg_join = true; 1423 cfg_mode = CFG_MODE_POLL; 1424 break; 1425 case 'r': 1426 cfg_remove = true; 1427 cfg_mode = CFG_MODE_POLL; 1428 cfg_wait = 400000; 1429 cfg_do_w = atoi(optarg); 1430 if (cfg_do_w <= 0) 1431 cfg_do_w = 50; 1432 break; 1433 case 'i': 1434 cfg_input = optarg; 1435 break; 1436 case 'I': 1437 cfg_repeat = atoi(optarg); 1438 break; 1439 case 'l': 1440 listen_mode = true; 1441 break; 1442 case 'p': 1443 cfg_port = optarg; 1444 break; 1445 case 's': 1446 cfg_sock_proto = parse_proto(optarg); 1447 break; 1448 case 'h': 1449 die_usage(); 1450 break; 1451 case '6': 1452 pf = AF_INET6; 1453 break; 1454 case 't': 1455 poll_timeout = atoi(optarg) * 1000; 1456 if (poll_timeout <= 0) 1457 poll_timeout = -1; 1458 break; 1459 case 'T': 1460 cfg_time = atoi(optarg); 1461 break; 1462 case 'm': 1463 cfg_mode = parse_mode(optarg); 1464 break; 1465 case 'S': 1466 cfg_sndbuf = parse_int(optarg); 1467 break; 1468 case 'R': 1469 cfg_rcvbuf = parse_int(optarg); 1470 break; 1471 case 'w': 1472 cfg_wait = atoi(optarg)*1000000; 1473 break; 1474 case 'M': 1475 cfg_mark = strtol(optarg, NULL, 0); 1476 break; 1477 case 'P': 1478 cfg_peek = parse_peek(optarg); 1479 break; 1480 case 'c': 1481 parse_cmsg_types(optarg); 1482 break; 1483 case 'o': 1484 parse_setsock_options(optarg); 1485 break; 1486 } 1487 } 1488 1489 if (optind + 1 != argc) 1490 die_usage(); 1491 cfg_host = argv[optind]; 1492 1493 if (strchr(cfg_host, ':')) 1494 pf = AF_INET6; 1495 } 1496 1497 int main(int argc, char *argv[]) 1498 { 1499 init_rng(); 1500 1501 signal(SIGUSR1, handle_signal); 1502 parse_opts(argc, argv); 1503 1504 if (listen_mode) { 1505 int fd = sock_listen_mptcp(cfg_host, cfg_port); 1506 1507 if (fd < 0) 1508 return 1; 1509 1510 if (cfg_rcvbuf) 1511 set_rcvbuf(fd, cfg_rcvbuf); 1512 if (cfg_sndbuf) 1513 set_sndbuf(fd, cfg_sndbuf); 1514 if (cfg_mark) 1515 set_mark(fd, cfg_mark); 1516 if (cfg_cmsg_types.cmsg_enabled) 1517 apply_cmsg_types(fd, &cfg_cmsg_types); 1518 1519 return main_loop_s(fd); 1520 } 1521 1522 return main_loop(); 1523 } 1524