1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2021, Red Hat. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <net/sock.h> 12 #include <net/protocol.h> 13 #include <net/tcp.h> 14 #include <net/mptcp.h> 15 #include "protocol.h" 16 17 #define MIN_INFO_OPTLEN_SIZE 16 18 #define MIN_FULL_INFO_OPTLEN_SIZE 40 19 20 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) 21 { 22 msk_owned_by_me(msk); 23 24 if (likely(!__mptcp_check_fallback(msk))) 25 return NULL; 26 27 return msk->first; 28 } 29 30 static u32 sockopt_seq_reset(const struct sock *sk) 31 { 32 sock_owned_by_me(sk); 33 34 /* Highbits contain state. Allows to distinguish sockopt_seq 35 * of listener and established: 36 * s0 = new_listener() 37 * sockopt(s0) - seq is 1 38 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) 39 * sockopt(s0) - seq increments to 2 on s0 40 * sockopt(s1) // seq increments to 2 on s1 (different option) 41 * new ssk completes join, inherits options from s0 // seq 2 42 * Needs sync from mptcp join logic, but ssk->seq == msk->seq 43 * 44 * Set High order bits to sk_state so ssk->seq == msk->seq test 45 * will fail. 46 */ 47 48 return (u32)sk->sk_state << 24u; 49 } 50 51 static void sockopt_seq_inc(struct mptcp_sock *msk) 52 { 53 u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; 54 55 msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; 56 } 57 58 static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, 59 unsigned int optlen, int *val) 60 { 61 if (optlen < sizeof(int)) 62 return -EINVAL; 63 64 if (copy_from_sockptr(val, optval, sizeof(*val))) 65 return -EFAULT; 66 67 return 0; 68 } 69 70 static void __mptcp_subflow_set_rcvbuf(struct sock *ssk, int val) 71 { 72 WRITE_ONCE(ssk->sk_rcvbuf, val); 73 tcp_set_rcvbuf(ssk, val); 74 } 75 76 static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) 77 { 78 struct mptcp_subflow_context *subflow; 79 struct sock *sk = (struct sock *)msk; 80 81 lock_sock(sk); 82 sockopt_seq_inc(msk); 83 84 mptcp_for_each_subflow(msk, subflow) { 85 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 86 bool slow = lock_sock_fast(ssk); 87 88 switch (optname) { 89 case SO_DEBUG: 90 sock_valbool_flag(ssk, SOCK_DBG, !!val); 91 break; 92 case SO_KEEPALIVE: 93 if (ssk->sk_prot->keepalive) 94 ssk->sk_prot->keepalive(ssk, !!val); 95 sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); 96 break; 97 case SO_PRIORITY: 98 WRITE_ONCE(ssk->sk_priority, val); 99 break; 100 case SO_SNDBUF: 101 case SO_SNDBUFFORCE: 102 ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; 103 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 104 mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; 105 break; 106 case SO_RCVBUF: 107 case SO_RCVBUFFORCE: 108 ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; 109 __mptcp_subflow_set_rcvbuf(ssk, sk->sk_rcvbuf); 110 break; 111 case SO_MARK: 112 if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { 113 WRITE_ONCE(ssk->sk_mark, sk->sk_mark); 114 sk_dst_reset(ssk); 115 } 116 break; 117 case SO_INCOMING_CPU: 118 WRITE_ONCE(ssk->sk_incoming_cpu, val); 119 break; 120 } 121 122 subflow->setsockopt_seq = msk->setsockopt_seq; 123 unlock_sock_fast(ssk, slow); 124 } 125 126 release_sock(sk); 127 } 128 129 static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) 130 { 131 sockptr_t optval = KERNEL_SOCKPTR(&val); 132 struct sock *sk = (struct sock *)msk; 133 int ret; 134 135 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 136 optval, sizeof(val)); 137 if (ret) 138 return ret; 139 140 mptcp_sol_socket_sync_intval(msk, optname, val); 141 return 0; 142 } 143 144 static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) 145 { 146 struct sock *sk = (struct sock *)msk; 147 148 WRITE_ONCE(sk->sk_incoming_cpu, val); 149 150 mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); 151 } 152 153 static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) 154 { 155 sockptr_t optval = KERNEL_SOCKPTR(&val); 156 struct mptcp_subflow_context *subflow; 157 struct sock *sk = (struct sock *)msk; 158 int ret; 159 160 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 161 optval, sizeof(val)); 162 if (ret) 163 return ret; 164 165 lock_sock(sk); 166 mptcp_for_each_subflow(msk, subflow) { 167 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 168 169 lock_sock(ssk); 170 sock_set_timestamp(ssk, optname, !!val); 171 release_sock(ssk); 172 } 173 174 release_sock(sk); 175 return 0; 176 } 177 178 static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, 179 sockptr_t optval, 180 unsigned int optlen) 181 { 182 int val, ret; 183 184 ret = mptcp_get_int_option(msk, optval, optlen, &val); 185 if (ret) 186 return ret; 187 188 switch (optname) { 189 case SO_KEEPALIVE: 190 case SO_DEBUG: 191 case SO_MARK: 192 case SO_PRIORITY: 193 case SO_SNDBUF: 194 case SO_SNDBUFFORCE: 195 case SO_RCVBUF: 196 case SO_RCVBUFFORCE: 197 return mptcp_sol_socket_intval(msk, optname, val); 198 case SO_INCOMING_CPU: 199 mptcp_so_incoming_cpu(msk, val); 200 return 0; 201 case SO_TIMESTAMP_OLD: 202 case SO_TIMESTAMP_NEW: 203 case SO_TIMESTAMPNS_OLD: 204 case SO_TIMESTAMPNS_NEW: 205 return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); 206 } 207 208 return -ENOPROTOOPT; 209 } 210 211 static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, 212 int optname, 213 sockptr_t optval, 214 unsigned int optlen) 215 { 216 struct mptcp_subflow_context *subflow; 217 struct sock *sk = (struct sock *)msk; 218 struct so_timestamping timestamping; 219 int ret; 220 221 if (optlen == sizeof(timestamping)) { 222 if (copy_from_sockptr(×tamping, optval, 223 sizeof(timestamping))) 224 return -EFAULT; 225 } else if (optlen == sizeof(int)) { 226 memset(×tamping, 0, sizeof(timestamping)); 227 228 if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) 229 return -EFAULT; 230 } else { 231 return -EINVAL; 232 } 233 234 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 235 KERNEL_SOCKPTR(×tamping), 236 sizeof(timestamping)); 237 if (ret) 238 return ret; 239 240 lock_sock(sk); 241 242 mptcp_for_each_subflow(msk, subflow) { 243 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 244 int err; 245 246 lock_sock(ssk); 247 err = sock_set_timestamping(ssk, optname, timestamping); 248 release_sock(ssk); 249 250 if (err < 0 && ret == 0) 251 ret = err; 252 } 253 254 release_sock(sk); 255 256 return ret; 257 } 258 259 static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, 260 unsigned int optlen) 261 { 262 struct mptcp_subflow_context *subflow; 263 struct sock *sk = (struct sock *)msk; 264 struct linger ling; 265 sockptr_t kopt; 266 int ret; 267 268 if (optlen < sizeof(ling)) 269 return -EINVAL; 270 271 if (copy_from_sockptr(&ling, optval, sizeof(ling))) 272 return -EFAULT; 273 274 kopt = KERNEL_SOCKPTR(&ling); 275 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); 276 if (ret) 277 return ret; 278 279 lock_sock(sk); 280 sockopt_seq_inc(msk); 281 mptcp_for_each_subflow(msk, subflow) { 282 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 283 bool slow = lock_sock_fast(ssk); 284 285 if (!ling.l_onoff) { 286 sock_reset_flag(ssk, SOCK_LINGER); 287 } else { 288 ssk->sk_lingertime = sk->sk_lingertime; 289 sock_set_flag(ssk, SOCK_LINGER); 290 } 291 292 subflow->setsockopt_seq = msk->setsockopt_seq; 293 unlock_sock_fast(ssk, slow); 294 } 295 296 release_sock(sk); 297 return 0; 298 } 299 300 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, 301 sockptr_t optval, unsigned int optlen) 302 { 303 struct sock *sk = (struct sock *)msk; 304 struct sock *ssk; 305 int ret; 306 307 switch (optname) { 308 case SO_REUSEPORT: 309 case SO_REUSEADDR: 310 case SO_BINDTODEVICE: 311 case SO_BINDTOIFINDEX: 312 lock_sock(sk); 313 ssk = __mptcp_nmpc_sk(msk); 314 if (IS_ERR(ssk)) { 315 release_sock(sk); 316 return PTR_ERR(ssk); 317 } 318 319 ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen); 320 if (ret == 0) { 321 if (optname == SO_REUSEPORT) 322 sk->sk_reuseport = ssk->sk_reuseport; 323 else if (optname == SO_REUSEADDR) 324 sk->sk_reuse = ssk->sk_reuse; 325 else if (optname == SO_BINDTODEVICE) 326 sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 327 else if (optname == SO_BINDTOIFINDEX) 328 sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 329 } 330 release_sock(sk); 331 return ret; 332 case SO_KEEPALIVE: 333 case SO_PRIORITY: 334 case SO_SNDBUF: 335 case SO_SNDBUFFORCE: 336 case SO_RCVBUF: 337 case SO_RCVBUFFORCE: 338 case SO_MARK: 339 case SO_INCOMING_CPU: 340 case SO_DEBUG: 341 case SO_TIMESTAMP_OLD: 342 case SO_TIMESTAMP_NEW: 343 case SO_TIMESTAMPNS_OLD: 344 case SO_TIMESTAMPNS_NEW: 345 return mptcp_setsockopt_sol_socket_int(msk, optname, optval, 346 optlen); 347 case SO_TIMESTAMPING_OLD: 348 case SO_TIMESTAMPING_NEW: 349 return mptcp_setsockopt_sol_socket_timestamping(msk, optname, 350 optval, optlen); 351 case SO_LINGER: 352 return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); 353 case SO_RCVLOWAT: 354 case SO_RCVTIMEO_OLD: 355 case SO_RCVTIMEO_NEW: 356 case SO_SNDTIMEO_OLD: 357 case SO_SNDTIMEO_NEW: 358 case SO_BUSY_POLL: 359 case SO_PREFER_BUSY_POLL: 360 case SO_BUSY_POLL_BUDGET: 361 /* No need to copy: only relevant for msk */ 362 return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); 363 case SO_NO_CHECK: 364 case SO_DONTROUTE: 365 case SO_BROADCAST: 366 case SO_BSDCOMPAT: 367 case SO_PASSCRED: 368 case SO_PASSPIDFD: 369 case SO_PASSSEC: 370 case SO_RXQ_OVFL: 371 case SO_WIFI_STATUS: 372 case SO_NOFCS: 373 case SO_SELECT_ERR_QUEUE: 374 return 0; 375 } 376 377 /* SO_OOBINLINE is not supported, let's avoid the related mess 378 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, 379 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, 380 * we must be careful with subflows 381 * 382 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks 383 * explicitly the sk_protocol field 384 * 385 * SO_PEEK_OFF is unsupported, as it is for plain TCP 386 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows 387 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, 388 * but likely needs careful design 389 * 390 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg 391 * SO_TXTIME is currently unsupported 392 */ 393 394 return -EOPNOTSUPP; 395 } 396 397 static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, 398 sockptr_t optval, unsigned int optlen) 399 { 400 struct sock *sk = (struct sock *)msk; 401 int ret = -EOPNOTSUPP; 402 struct sock *ssk; 403 404 switch (optname) { 405 case IPV6_V6ONLY: 406 case IPV6_TRANSPARENT: 407 case IPV6_FREEBIND: 408 lock_sock(sk); 409 ssk = __mptcp_nmpc_sk(msk); 410 if (IS_ERR(ssk)) { 411 release_sock(sk); 412 return PTR_ERR(ssk); 413 } 414 415 ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); 416 if (ret != 0) { 417 release_sock(sk); 418 return ret; 419 } 420 421 sockopt_seq_inc(msk); 422 423 switch (optname) { 424 case IPV6_V6ONLY: 425 sk->sk_ipv6only = ssk->sk_ipv6only; 426 break; 427 case IPV6_TRANSPARENT: 428 inet_assign_bit(TRANSPARENT, sk, 429 inet_test_bit(TRANSPARENT, ssk)); 430 break; 431 case IPV6_FREEBIND: 432 inet_assign_bit(FREEBIND, sk, 433 inet_test_bit(FREEBIND, ssk)); 434 break; 435 } 436 437 release_sock(sk); 438 break; 439 } 440 441 return ret; 442 } 443 444 static bool mptcp_supported_sockopt(int level, int optname) 445 { 446 if (level == SOL_IP) { 447 switch (optname) { 448 /* should work fine */ 449 case IP_FREEBIND: 450 case IP_TRANSPARENT: 451 case IP_BIND_ADDRESS_NO_PORT: 452 case IP_LOCAL_PORT_RANGE: 453 454 /* the following are control cmsg related */ 455 case IP_PKTINFO: 456 case IP_RECVTTL: 457 case IP_RECVTOS: 458 case IP_RECVOPTS: 459 case IP_RETOPTS: 460 case IP_PASSSEC: 461 case IP_RECVORIGDSTADDR: 462 case IP_CHECKSUM: 463 case IP_RECVFRAGSIZE: 464 465 /* common stuff that need some love */ 466 case IP_TOS: 467 case IP_TTL: 468 case IP_MTU_DISCOVER: 469 case IP_RECVERR: 470 471 /* possibly less common may deserve some love */ 472 case IP_MINTTL: 473 474 /* the following is apparently a no-op for plain TCP */ 475 case IP_RECVERR_RFC4884: 476 return true; 477 } 478 479 /* IP_OPTIONS is not supported, needs subflow care */ 480 /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ 481 /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, 482 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, 483 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, 484 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, 485 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, 486 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal 487 * with mcast stuff 488 */ 489 /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ 490 return false; 491 } 492 if (level == SOL_IPV6) { 493 switch (optname) { 494 case IPV6_V6ONLY: 495 496 /* the following are control cmsg related */ 497 case IPV6_RECVPKTINFO: 498 case IPV6_2292PKTINFO: 499 case IPV6_RECVHOPLIMIT: 500 case IPV6_2292HOPLIMIT: 501 case IPV6_RECVRTHDR: 502 case IPV6_2292RTHDR: 503 case IPV6_RECVHOPOPTS: 504 case IPV6_2292HOPOPTS: 505 case IPV6_RECVDSTOPTS: 506 case IPV6_2292DSTOPTS: 507 case IPV6_RECVTCLASS: 508 case IPV6_FLOWINFO: 509 case IPV6_RECVPATHMTU: 510 case IPV6_RECVORIGDSTADDR: 511 case IPV6_RECVFRAGSIZE: 512 513 /* the following ones need some love but are quite common */ 514 case IPV6_TCLASS: 515 case IPV6_TRANSPARENT: 516 case IPV6_FREEBIND: 517 case IPV6_PKTINFO: 518 case IPV6_2292PKTOPTIONS: 519 case IPV6_UNICAST_HOPS: 520 case IPV6_MTU_DISCOVER: 521 case IPV6_MTU: 522 case IPV6_RECVERR: 523 case IPV6_FLOWINFO_SEND: 524 case IPV6_FLOWLABEL_MGR: 525 case IPV6_MINHOPCOUNT: 526 case IPV6_DONTFRAG: 527 case IPV6_AUTOFLOWLABEL: 528 529 /* the following one is a no-op for plain TCP */ 530 case IPV6_RECVERR_RFC4884: 531 return true; 532 } 533 534 /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are 535 * not supported 536 */ 537 /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, 538 * IPV6_MULTICAST_IF, IPV6_ADDRFORM, 539 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, 540 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, 541 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, 542 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER 543 * are not supported better not deal with mcast 544 */ 545 /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ 546 547 /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ 548 /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ 549 return false; 550 } 551 if (level == SOL_TCP) { 552 switch (optname) { 553 /* the following are no-op or should work just fine */ 554 case TCP_THIN_DUPACK: 555 case TCP_DEFER_ACCEPT: 556 557 /* the following need some love */ 558 case TCP_MAXSEG: 559 case TCP_NODELAY: 560 case TCP_THIN_LINEAR_TIMEOUTS: 561 case TCP_CONGESTION: 562 case TCP_CORK: 563 case TCP_KEEPIDLE: 564 case TCP_KEEPINTVL: 565 case TCP_KEEPCNT: 566 case TCP_SYNCNT: 567 case TCP_SAVE_SYN: 568 case TCP_LINGER2: 569 case TCP_WINDOW_CLAMP: 570 case TCP_QUICKACK: 571 case TCP_USER_TIMEOUT: 572 case TCP_TIMESTAMP: 573 case TCP_NOTSENT_LOWAT: 574 case TCP_TX_DELAY: 575 case TCP_INQ: 576 case TCP_FASTOPEN: 577 case TCP_FASTOPEN_CONNECT: 578 case TCP_FASTOPEN_KEY: 579 case TCP_FASTOPEN_NO_COOKIE: 580 return true; 581 } 582 583 /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ 584 585 /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, 586 * TCP_REPAIR_WINDOW are not supported, better avoid this mess 587 */ 588 } 589 return false; 590 } 591 592 static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, 593 unsigned int optlen) 594 { 595 struct mptcp_subflow_context *subflow; 596 struct sock *sk = (struct sock *)msk; 597 char name[TCP_CA_NAME_MAX]; 598 bool cap_net_admin; 599 int ret; 600 601 if (optlen < 1) 602 return -EINVAL; 603 604 ret = strncpy_from_sockptr(name, optval, 605 min_t(long, TCP_CA_NAME_MAX - 1, optlen)); 606 if (ret < 0) 607 return -EFAULT; 608 609 name[ret] = 0; 610 611 cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); 612 613 ret = 0; 614 lock_sock(sk); 615 sockopt_seq_inc(msk); 616 mptcp_for_each_subflow(msk, subflow) { 617 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 618 int err; 619 620 lock_sock(ssk); 621 err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); 622 if (err < 0 && ret == 0) 623 ret = err; 624 subflow->setsockopt_seq = msk->setsockopt_seq; 625 release_sock(ssk); 626 } 627 628 if (ret == 0) 629 strscpy(msk->ca_name, name, sizeof(msk->ca_name)); 630 631 release_sock(sk); 632 return ret; 633 } 634 635 static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max, 636 int (*set_val)(struct sock *, int), 637 int *msk_val, int val) 638 { 639 struct mptcp_subflow_context *subflow; 640 int err = 0; 641 642 mptcp_for_each_subflow(msk, subflow) { 643 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 644 int ret; 645 646 lock_sock(ssk); 647 ret = set_val(ssk, val); 648 err = err ? : ret; 649 release_sock(ssk); 650 } 651 652 if (!err) { 653 *msk_val = val; 654 sockopt_seq_inc(msk); 655 } 656 657 return err; 658 } 659 660 static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) 661 { 662 struct mptcp_subflow_context *subflow; 663 struct sock *sk = (struct sock *)msk; 664 665 sockopt_seq_inc(msk); 666 msk->cork = !!val; 667 mptcp_for_each_subflow(msk, subflow) { 668 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 669 670 lock_sock(ssk); 671 __tcp_sock_set_cork(ssk, !!val); 672 release_sock(ssk); 673 } 674 if (!val) 675 mptcp_check_and_set_pending(sk); 676 677 return 0; 678 } 679 680 static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val) 681 { 682 struct mptcp_subflow_context *subflow; 683 struct sock *sk = (struct sock *)msk; 684 685 sockopt_seq_inc(msk); 686 msk->nodelay = !!val; 687 mptcp_for_each_subflow(msk, subflow) { 688 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 689 690 lock_sock(ssk); 691 __tcp_sock_set_nodelay(ssk, !!val); 692 release_sock(ssk); 693 } 694 if (val) 695 mptcp_check_and_set_pending(sk); 696 return 0; 697 } 698 699 static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, 700 sockptr_t optval, unsigned int optlen) 701 { 702 struct sock *sk = (struct sock *)msk; 703 struct sock *ssk; 704 int err; 705 706 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 707 if (err != 0) 708 return err; 709 710 lock_sock(sk); 711 712 ssk = __mptcp_nmpc_sk(msk); 713 if (IS_ERR(ssk)) { 714 release_sock(sk); 715 return PTR_ERR(ssk); 716 } 717 718 switch (optname) { 719 case IP_FREEBIND: 720 inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); 721 break; 722 case IP_TRANSPARENT: 723 inet_assign_bit(TRANSPARENT, ssk, 724 inet_test_bit(TRANSPARENT, sk)); 725 break; 726 case IP_BIND_ADDRESS_NO_PORT: 727 inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, 728 inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); 729 break; 730 case IP_LOCAL_PORT_RANGE: 731 WRITE_ONCE(inet_sk(ssk)->local_port_range, 732 READ_ONCE(inet_sk(sk)->local_port_range)); 733 break; 734 default: 735 release_sock(sk); 736 WARN_ON_ONCE(1); 737 return -EOPNOTSUPP; 738 } 739 740 sockopt_seq_inc(msk); 741 release_sock(sk); 742 return 0; 743 } 744 745 static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, 746 sockptr_t optval, unsigned int optlen) 747 { 748 struct mptcp_subflow_context *subflow; 749 struct sock *sk = (struct sock *)msk; 750 int err, val; 751 752 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 753 754 if (err != 0) 755 return err; 756 757 lock_sock(sk); 758 sockopt_seq_inc(msk); 759 val = READ_ONCE(inet_sk(sk)->tos); 760 mptcp_for_each_subflow(msk, subflow) { 761 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 762 bool slow; 763 764 slow = lock_sock_fast(ssk); 765 __ip_sock_set_tos(ssk, val); 766 unlock_sock_fast(ssk, slow); 767 } 768 release_sock(sk); 769 770 return 0; 771 } 772 773 static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, 774 sockptr_t optval, unsigned int optlen) 775 { 776 switch (optname) { 777 case IP_FREEBIND: 778 case IP_TRANSPARENT: 779 case IP_BIND_ADDRESS_NO_PORT: 780 case IP_LOCAL_PORT_RANGE: 781 return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen); 782 case IP_TOS: 783 return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); 784 } 785 786 return -EOPNOTSUPP; 787 } 788 789 static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 790 sockptr_t optval, unsigned int optlen) 791 { 792 struct sock *sk = (struct sock *)msk; 793 struct sock *ssk; 794 int ret; 795 796 /* Limit to first subflow, before the connection establishment */ 797 lock_sock(sk); 798 ssk = __mptcp_nmpc_sk(msk); 799 if (IS_ERR(ssk)) { 800 ret = PTR_ERR(ssk); 801 goto unlock; 802 } 803 804 ret = tcp_setsockopt(ssk, level, optname, optval, optlen); 805 806 unlock: 807 release_sock(sk); 808 return ret; 809 } 810 811 static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, 812 int optname, sockptr_t optval, 813 unsigned int optlen) 814 { 815 struct mptcp_subflow_context *subflow; 816 int ret = 0; 817 818 mptcp_for_each_subflow(msk, subflow) { 819 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 820 int err; 821 822 err = tcp_setsockopt(ssk, level, optname, optval, optlen); 823 if (err < 0 && ret == 0) 824 ret = err; 825 } 826 827 if (!ret) 828 sockopt_seq_inc(msk); 829 830 return ret; 831 } 832 833 static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 834 sockptr_t optval, unsigned int optlen) 835 { 836 struct sock *sk = (void *)msk; 837 int ret, val; 838 839 switch (optname) { 840 case TCP_ULP: 841 return -EOPNOTSUPP; 842 case TCP_CONGESTION: 843 return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); 844 case TCP_DEFER_ACCEPT: 845 /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ 846 mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); 847 return 0; 848 case TCP_FASTOPEN: 849 case TCP_FASTOPEN_CONNECT: 850 case TCP_FASTOPEN_KEY: 851 case TCP_FASTOPEN_NO_COOKIE: 852 return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, 853 optval, optlen); 854 } 855 856 ret = mptcp_get_int_option(msk, optval, optlen, &val); 857 if (ret) 858 return ret; 859 860 lock_sock(sk); 861 switch (optname) { 862 case TCP_INQ: 863 if (val < 0 || val > 1) 864 ret = -EINVAL; 865 else 866 msk->recvmsg_inq = !!val; 867 break; 868 case TCP_NOTSENT_LOWAT: 869 WRITE_ONCE(msk->notsent_lowat, val); 870 mptcp_write_space(sk); 871 break; 872 case TCP_CORK: 873 ret = __mptcp_setsockopt_sol_tcp_cork(msk, val); 874 break; 875 case TCP_NODELAY: 876 ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); 877 break; 878 case TCP_KEEPIDLE: 879 ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE, 880 &tcp_sock_set_keepidle_locked, 881 &msk->keepalive_idle, val); 882 break; 883 case TCP_KEEPINTVL: 884 ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL, 885 &tcp_sock_set_keepintvl, 886 &msk->keepalive_intvl, val); 887 break; 888 case TCP_KEEPCNT: 889 ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT, 890 &tcp_sock_set_keepcnt, 891 &msk->keepalive_cnt, 892 val); 893 break; 894 case TCP_MAXSEG: 895 msk->maxseg = val; 896 ret = mptcp_setsockopt_all_sf(msk, SOL_TCP, optname, optval, 897 optlen); 898 break; 899 default: 900 ret = -ENOPROTOOPT; 901 } 902 903 release_sock(sk); 904 return ret; 905 } 906 907 int mptcp_setsockopt(struct sock *sk, int level, int optname, 908 sockptr_t optval, unsigned int optlen) 909 { 910 struct mptcp_sock *msk = mptcp_sk(sk); 911 struct sock *ssk; 912 913 pr_debug("msk=%p\n", msk); 914 915 if (level == SOL_SOCKET) 916 return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); 917 918 if (!mptcp_supported_sockopt(level, optname)) 919 return -ENOPROTOOPT; 920 921 /* @@ the meaning of setsockopt() when the socket is connected and 922 * there are multiple subflows is not yet defined. It is up to the 923 * MPTCP-level socket to configure the subflows until the subflow 924 * is in TCP fallback, when TCP socket options are passed through 925 * to the one remaining subflow. 926 */ 927 lock_sock(sk); 928 ssk = __mptcp_tcp_fallback(msk); 929 release_sock(sk); 930 if (ssk) 931 return tcp_setsockopt(ssk, level, optname, optval, optlen); 932 933 if (level == SOL_IP) 934 return mptcp_setsockopt_v4(msk, optname, optval, optlen); 935 936 if (level == SOL_IPV6) 937 return mptcp_setsockopt_v6(msk, optname, optval, optlen); 938 939 if (level == SOL_TCP) 940 return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); 941 942 return -EOPNOTSUPP; 943 } 944 945 static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 946 char __user *optval, int __user *optlen) 947 { 948 struct sock *sk = (struct sock *)msk; 949 struct sock *ssk; 950 int ret; 951 952 lock_sock(sk); 953 ssk = msk->first; 954 if (ssk) 955 goto get; 956 957 ssk = __mptcp_nmpc_sk(msk); 958 if (IS_ERR(ssk)) { 959 ret = PTR_ERR(ssk); 960 goto out; 961 } 962 963 get: 964 ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 965 966 out: 967 release_sock(sk); 968 return ret; 969 } 970 971 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) 972 { 973 struct sock *sk = (struct sock *)msk; 974 u32 flags = 0; 975 bool slow; 976 u32 now; 977 978 memset(info, 0, sizeof(*info)); 979 980 info->mptcpi_extra_subflows = READ_ONCE(msk->pm.extra_subflows); 981 info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); 982 info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); 983 info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); 984 985 if (inet_sk_state_load(sk) == TCP_LISTEN) 986 return; 987 988 /* The following limits only make sense for the in-kernel PM */ 989 if (mptcp_pm_is_kernel(msk)) { 990 info->mptcpi_limit_extra_subflows = 991 mptcp_pm_get_limit_extra_subflows(msk); 992 info->mptcpi_endp_signal_max = 993 mptcp_pm_get_endp_signal_max(msk); 994 info->mptcpi_limit_add_addr_accepted = 995 mptcp_pm_get_limit_add_addr_accepted(msk); 996 info->mptcpi_endp_subflow_max = 997 mptcp_pm_get_endp_subflow_max(msk); 998 info->mptcpi_endp_laminar_max = 999 mptcp_pm_get_endp_laminar_max(msk); 1000 info->mptcpi_endp_fullmesh_max = 1001 mptcp_pm_get_endp_fullmesh_max(msk); 1002 } 1003 1004 if (__mptcp_check_fallback(msk)) 1005 flags |= MPTCP_INFO_FLAG_FALLBACK; 1006 if (READ_ONCE(msk->can_ack)) 1007 flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; 1008 info->mptcpi_flags = flags; 1009 1010 slow = lock_sock_fast(sk); 1011 info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); 1012 info->mptcpi_token = msk->token; 1013 info->mptcpi_write_seq = msk->write_seq; 1014 info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; 1015 info->mptcpi_bytes_sent = msk->bytes_sent; 1016 info->mptcpi_bytes_received = msk->bytes_received; 1017 info->mptcpi_bytes_retrans = msk->bytes_retrans; 1018 info->mptcpi_subflows_total = info->mptcpi_extra_subflows + 1019 __mptcp_has_initial_subflow(msk); 1020 now = tcp_jiffies32; 1021 info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent); 1022 info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv); 1023 unlock_sock_fast(sk, slow); 1024 1025 mptcp_data_lock(sk); 1026 info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv); 1027 info->mptcpi_snd_una = msk->snd_una; 1028 info->mptcpi_rcv_nxt = msk->ack_seq; 1029 info->mptcpi_bytes_acked = msk->bytes_acked; 1030 mptcp_data_unlock(sk); 1031 } 1032 EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); 1033 1034 static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) 1035 { 1036 struct mptcp_info m_info; 1037 int len; 1038 1039 if (get_user(len, optlen)) 1040 return -EFAULT; 1041 1042 /* When used only to check if a fallback to TCP happened. */ 1043 if (len == 0) 1044 return 0; 1045 1046 len = min_t(unsigned int, len, sizeof(struct mptcp_info)); 1047 1048 mptcp_diag_fill_info(msk, &m_info); 1049 1050 if (put_user(len, optlen)) 1051 return -EFAULT; 1052 1053 if (copy_to_user(optval, &m_info, len)) 1054 return -EFAULT; 1055 1056 return 0; 1057 } 1058 1059 static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, 1060 char __user *optval, 1061 u32 copied, 1062 int __user *optlen) 1063 { 1064 u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); 1065 1066 if (copied) 1067 copied += sfd->size_subflow_data; 1068 else 1069 copied = copylen; 1070 1071 if (put_user(copied, optlen)) 1072 return -EFAULT; 1073 1074 if (copy_to_user(optval, sfd, copylen)) 1075 return -EFAULT; 1076 1077 return 0; 1078 } 1079 1080 static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, 1081 char __user *optval, 1082 int __user *optlen) 1083 { 1084 int len, copylen; 1085 1086 if (get_user(len, optlen)) 1087 return -EFAULT; 1088 1089 /* if mptcp_subflow_data size is changed, need to adjust 1090 * this function to deal with programs using old version. 1091 */ 1092 BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); 1093 1094 if (len < MIN_INFO_OPTLEN_SIZE) 1095 return -EINVAL; 1096 1097 memset(sfd, 0, sizeof(*sfd)); 1098 1099 copylen = min_t(unsigned int, len, sizeof(*sfd)); 1100 if (copy_from_user(sfd, optval, copylen)) 1101 return -EFAULT; 1102 1103 /* size_subflow_data is u32, but len is signed */ 1104 if (sfd->size_subflow_data > INT_MAX || 1105 sfd->size_user > INT_MAX) 1106 return -EINVAL; 1107 1108 if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || 1109 sfd->size_subflow_data > len) 1110 return -EINVAL; 1111 1112 if (sfd->num_subflows || sfd->size_kernel) 1113 return -EINVAL; 1114 1115 return len - sfd->size_subflow_data; 1116 } 1117 1118 static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, 1119 int __user *optlen) 1120 { 1121 struct mptcp_subflow_context *subflow; 1122 struct sock *sk = (struct sock *)msk; 1123 unsigned int sfcount = 0, copied = 0; 1124 struct mptcp_subflow_data sfd; 1125 char __user *infoptr; 1126 int len; 1127 1128 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1129 if (len < 0) 1130 return len; 1131 1132 sfd.size_kernel = sizeof(struct tcp_info); 1133 sfd.size_user = min_t(unsigned int, sfd.size_user, 1134 sizeof(struct tcp_info)); 1135 1136 infoptr = optval + sfd.size_subflow_data; 1137 1138 lock_sock(sk); 1139 1140 mptcp_for_each_subflow(msk, subflow) { 1141 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1142 1143 ++sfcount; 1144 1145 if (len && len >= sfd.size_user) { 1146 struct tcp_info info; 1147 1148 tcp_get_info(ssk, &info); 1149 1150 if (copy_to_user(infoptr, &info, sfd.size_user)) { 1151 release_sock(sk); 1152 return -EFAULT; 1153 } 1154 1155 infoptr += sfd.size_user; 1156 copied += sfd.size_user; 1157 len -= sfd.size_user; 1158 } 1159 } 1160 1161 release_sock(sk); 1162 1163 sfd.num_subflows = sfcount; 1164 1165 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1166 return -EFAULT; 1167 1168 return 0; 1169 } 1170 1171 static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) 1172 { 1173 const struct inet_sock *inet = inet_sk(sk); 1174 1175 memset(a, 0, sizeof(*a)); 1176 1177 if (sk->sk_family == AF_INET) { 1178 a->sin_local.sin_family = AF_INET; 1179 a->sin_local.sin_port = inet->inet_sport; 1180 a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; 1181 1182 if (!a->sin_local.sin_addr.s_addr) 1183 a->sin_local.sin_addr.s_addr = inet->inet_saddr; 1184 1185 a->sin_remote.sin_family = AF_INET; 1186 a->sin_remote.sin_port = inet->inet_dport; 1187 a->sin_remote.sin_addr.s_addr = inet->inet_daddr; 1188 #if IS_ENABLED(CONFIG_IPV6) 1189 } else if (sk->sk_family == AF_INET6) { 1190 const struct ipv6_pinfo *np = inet6_sk(sk); 1191 1192 if (WARN_ON_ONCE(!np)) 1193 return; 1194 1195 a->sin6_local.sin6_family = AF_INET6; 1196 a->sin6_local.sin6_port = inet->inet_sport; 1197 1198 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 1199 a->sin6_local.sin6_addr = np->saddr; 1200 else 1201 a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; 1202 1203 a->sin6_remote.sin6_family = AF_INET6; 1204 a->sin6_remote.sin6_port = inet->inet_dport; 1205 a->sin6_remote.sin6_addr = sk->sk_v6_daddr; 1206 #endif 1207 } 1208 } 1209 1210 static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, 1211 int __user *optlen) 1212 { 1213 struct mptcp_subflow_context *subflow; 1214 struct sock *sk = (struct sock *)msk; 1215 unsigned int sfcount = 0, copied = 0; 1216 struct mptcp_subflow_data sfd; 1217 char __user *addrptr; 1218 int len; 1219 1220 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1221 if (len < 0) 1222 return len; 1223 1224 sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); 1225 sfd.size_user = min_t(unsigned int, sfd.size_user, 1226 sizeof(struct mptcp_subflow_addrs)); 1227 1228 addrptr = optval + sfd.size_subflow_data; 1229 1230 lock_sock(sk); 1231 1232 mptcp_for_each_subflow(msk, subflow) { 1233 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1234 1235 ++sfcount; 1236 1237 if (len && len >= sfd.size_user) { 1238 struct mptcp_subflow_addrs a; 1239 1240 mptcp_get_sub_addrs(ssk, &a); 1241 1242 if (copy_to_user(addrptr, &a, sfd.size_user)) { 1243 release_sock(sk); 1244 return -EFAULT; 1245 } 1246 1247 addrptr += sfd.size_user; 1248 copied += sfd.size_user; 1249 len -= sfd.size_user; 1250 } 1251 } 1252 1253 release_sock(sk); 1254 1255 sfd.num_subflows = sfcount; 1256 1257 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1258 return -EFAULT; 1259 1260 return 0; 1261 } 1262 1263 static int mptcp_get_full_info(struct mptcp_full_info *mfi, 1264 char __user *optval, 1265 int __user *optlen) 1266 { 1267 int len; 1268 1269 BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != 1270 MIN_FULL_INFO_OPTLEN_SIZE); 1271 1272 if (get_user(len, optlen)) 1273 return -EFAULT; 1274 1275 if (len < MIN_FULL_INFO_OPTLEN_SIZE) 1276 return -EINVAL; 1277 1278 memset(mfi, 0, sizeof(*mfi)); 1279 if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) 1280 return -EFAULT; 1281 1282 if (mfi->size_tcpinfo_kernel || 1283 mfi->size_sfinfo_kernel || 1284 mfi->num_subflows) 1285 return -EINVAL; 1286 1287 if (mfi->size_sfinfo_user > INT_MAX || 1288 mfi->size_tcpinfo_user > INT_MAX) 1289 return -EINVAL; 1290 1291 return len - MIN_FULL_INFO_OPTLEN_SIZE; 1292 } 1293 1294 static int mptcp_put_full_info(struct mptcp_full_info *mfi, 1295 char __user *optval, 1296 u32 copylen, 1297 int __user *optlen) 1298 { 1299 copylen += MIN_FULL_INFO_OPTLEN_SIZE; 1300 if (put_user(copylen, optlen)) 1301 return -EFAULT; 1302 1303 if (copy_to_user(optval, mfi, copylen)) 1304 return -EFAULT; 1305 return 0; 1306 } 1307 1308 static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, 1309 int __user *optlen) 1310 { 1311 unsigned int sfcount = 0, copylen = 0; 1312 struct mptcp_subflow_context *subflow; 1313 struct sock *sk = (struct sock *)msk; 1314 void __user *tcpinfoptr, *sfinfoptr; 1315 struct mptcp_full_info mfi; 1316 int len; 1317 1318 len = mptcp_get_full_info(&mfi, optval, optlen); 1319 if (len < 0) 1320 return len; 1321 1322 /* don't bother filling the mptcp info if there is not enough 1323 * user-space-provided storage 1324 */ 1325 if (len > 0) { 1326 mptcp_diag_fill_info(msk, &mfi.mptcp_info); 1327 copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); 1328 } 1329 1330 mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); 1331 mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, 1332 sizeof(struct tcp_info)); 1333 sfinfoptr = u64_to_user_ptr(mfi.subflow_info); 1334 mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); 1335 mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, 1336 sizeof(struct mptcp_subflow_info)); 1337 tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); 1338 1339 lock_sock(sk); 1340 mptcp_for_each_subflow(msk, subflow) { 1341 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1342 struct mptcp_subflow_info sfinfo; 1343 struct tcp_info tcp_info; 1344 1345 if (sfcount++ >= mfi.size_arrays_user) 1346 continue; 1347 1348 /* fetch addr/tcp_info only if the user space buffers 1349 * are wide enough 1350 */ 1351 memset(&sfinfo, 0, sizeof(sfinfo)); 1352 sfinfo.id = subflow->subflow_id; 1353 if (mfi.size_sfinfo_user > 1354 offsetof(struct mptcp_subflow_info, addrs)) 1355 mptcp_get_sub_addrs(ssk, &sfinfo.addrs); 1356 if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) 1357 goto fail_release; 1358 1359 if (mfi.size_tcpinfo_user) { 1360 tcp_get_info(ssk, &tcp_info); 1361 if (copy_to_user(tcpinfoptr, &tcp_info, 1362 mfi.size_tcpinfo_user)) 1363 goto fail_release; 1364 } 1365 1366 tcpinfoptr += mfi.size_tcpinfo_user; 1367 sfinfoptr += mfi.size_sfinfo_user; 1368 } 1369 release_sock(sk); 1370 1371 mfi.num_subflows = sfcount; 1372 if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) 1373 return -EFAULT; 1374 1375 return 0; 1376 1377 fail_release: 1378 release_sock(sk); 1379 return -EFAULT; 1380 } 1381 1382 static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, 1383 int __user *optlen, int val) 1384 { 1385 int len; 1386 1387 if (get_user(len, optlen)) 1388 return -EFAULT; 1389 if (len < 0) 1390 return -EINVAL; 1391 1392 if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { 1393 unsigned char ucval = (unsigned char)val; 1394 1395 len = 1; 1396 if (put_user(len, optlen)) 1397 return -EFAULT; 1398 if (copy_to_user(optval, &ucval, 1)) 1399 return -EFAULT; 1400 } else { 1401 len = min_t(unsigned int, len, sizeof(int)); 1402 if (put_user(len, optlen)) 1403 return -EFAULT; 1404 if (copy_to_user(optval, &val, len)) 1405 return -EFAULT; 1406 } 1407 1408 return 0; 1409 } 1410 1411 static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 1412 char __user *optval, int __user *optlen) 1413 { 1414 struct sock *sk = (void *)msk; 1415 1416 switch (optname) { 1417 case TCP_ULP: 1418 case TCP_CONGESTION: 1419 case TCP_INFO: 1420 case TCP_CC_INFO: 1421 case TCP_DEFER_ACCEPT: 1422 case TCP_FASTOPEN: 1423 case TCP_FASTOPEN_CONNECT: 1424 case TCP_FASTOPEN_KEY: 1425 case TCP_FASTOPEN_NO_COOKIE: 1426 return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, 1427 optval, optlen); 1428 case TCP_INQ: 1429 return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); 1430 case TCP_CORK: 1431 return mptcp_put_int_option(msk, optval, optlen, msk->cork); 1432 case TCP_NODELAY: 1433 return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); 1434 case TCP_KEEPIDLE: 1435 return mptcp_put_int_option(msk, optval, optlen, 1436 msk->keepalive_idle ? : 1437 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ); 1438 case TCP_KEEPINTVL: 1439 return mptcp_put_int_option(msk, optval, optlen, 1440 msk->keepalive_intvl ? : 1441 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ); 1442 case TCP_KEEPCNT: 1443 return mptcp_put_int_option(msk, optval, optlen, 1444 msk->keepalive_cnt ? : 1445 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes)); 1446 case TCP_NOTSENT_LOWAT: 1447 return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); 1448 case TCP_IS_MPTCP: 1449 return mptcp_put_int_option(msk, optval, optlen, 1); 1450 case TCP_MAXSEG: 1451 return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, 1452 optval, optlen); 1453 } 1454 return -EOPNOTSUPP; 1455 } 1456 1457 static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, 1458 char __user *optval, int __user *optlen) 1459 { 1460 struct sock *sk = (void *)msk; 1461 1462 switch (optname) { 1463 case IP_TOS: 1464 return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); 1465 case IP_FREEBIND: 1466 return mptcp_put_int_option(msk, optval, optlen, 1467 inet_test_bit(FREEBIND, sk)); 1468 case IP_TRANSPARENT: 1469 return mptcp_put_int_option(msk, optval, optlen, 1470 inet_test_bit(TRANSPARENT, sk)); 1471 case IP_BIND_ADDRESS_NO_PORT: 1472 return mptcp_put_int_option(msk, optval, optlen, 1473 inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); 1474 case IP_LOCAL_PORT_RANGE: 1475 return mptcp_put_int_option(msk, optval, optlen, 1476 READ_ONCE(inet_sk(sk)->local_port_range)); 1477 } 1478 1479 return -EOPNOTSUPP; 1480 } 1481 1482 static int mptcp_getsockopt_v6(struct mptcp_sock *msk, int optname, 1483 char __user *optval, int __user *optlen) 1484 { 1485 struct sock *sk = (void *)msk; 1486 1487 switch (optname) { 1488 case IPV6_V6ONLY: 1489 return mptcp_put_int_option(msk, optval, optlen, 1490 sk->sk_ipv6only); 1491 case IPV6_TRANSPARENT: 1492 return mptcp_put_int_option(msk, optval, optlen, 1493 inet_test_bit(TRANSPARENT, sk)); 1494 case IPV6_FREEBIND: 1495 return mptcp_put_int_option(msk, optval, optlen, 1496 inet_test_bit(FREEBIND, sk)); 1497 } 1498 1499 return -EOPNOTSUPP; 1500 } 1501 1502 static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, 1503 char __user *optval, int __user *optlen) 1504 { 1505 switch (optname) { 1506 case MPTCP_INFO: 1507 return mptcp_getsockopt_info(msk, optval, optlen); 1508 case MPTCP_FULL_INFO: 1509 return mptcp_getsockopt_full_info(msk, optval, optlen); 1510 case MPTCP_TCPINFO: 1511 return mptcp_getsockopt_tcpinfo(msk, optval, optlen); 1512 case MPTCP_SUBFLOW_ADDRS: 1513 return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); 1514 } 1515 1516 return -EOPNOTSUPP; 1517 } 1518 1519 int mptcp_getsockopt(struct sock *sk, int level, int optname, 1520 char __user *optval, int __user *option) 1521 { 1522 struct mptcp_sock *msk = mptcp_sk(sk); 1523 struct sock *ssk; 1524 1525 pr_debug("msk=%p\n", msk); 1526 1527 /* @@ the meaning of setsockopt() when the socket is connected and 1528 * there are multiple subflows is not yet defined. It is up to the 1529 * MPTCP-level socket to configure the subflows until the subflow 1530 * is in TCP fallback, when socket options are passed through 1531 * to the one remaining subflow. 1532 */ 1533 lock_sock(sk); 1534 ssk = __mptcp_tcp_fallback(msk); 1535 release_sock(sk); 1536 if (ssk) 1537 return tcp_getsockopt(ssk, level, optname, optval, option); 1538 1539 if (level == SOL_IP) 1540 return mptcp_getsockopt_v4(msk, optname, optval, option); 1541 if (level == SOL_IPV6) 1542 return mptcp_getsockopt_v6(msk, optname, optval, option); 1543 if (level == SOL_TCP) 1544 return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); 1545 if (level == SOL_MPTCP) 1546 return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); 1547 return -EOPNOTSUPP; 1548 } 1549 1550 static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) 1551 { 1552 static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; 1553 struct sock *sk = (struct sock *)msk; 1554 bool keep_open; 1555 1556 keep_open = sock_flag(sk, SOCK_KEEPOPEN); 1557 if (ssk->sk_prot->keepalive) 1558 ssk->sk_prot->keepalive(ssk, keep_open); 1559 sock_valbool_flag(ssk, SOCK_KEEPOPEN, keep_open); 1560 1561 ssk->sk_priority = sk->sk_priority; 1562 ssk->sk_bound_dev_if = sk->sk_bound_dev_if; 1563 ssk->sk_incoming_cpu = sk->sk_incoming_cpu; 1564 ssk->sk_ipv6only = sk->sk_ipv6only; 1565 __ip_sock_set_tos(ssk, inet_sk(sk)->tos); 1566 1567 if (sk->sk_userlocks & tx_rx_locks) { 1568 ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; 1569 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) { 1570 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 1571 mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; 1572 } 1573 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1574 __mptcp_subflow_set_rcvbuf(ssk, sk->sk_rcvbuf); 1575 } 1576 1577 if (sock_flag(sk, SOCK_LINGER)) { 1578 ssk->sk_lingertime = sk->sk_lingertime; 1579 sock_set_flag(ssk, SOCK_LINGER); 1580 } else { 1581 sock_reset_flag(ssk, SOCK_LINGER); 1582 } 1583 1584 if (sk->sk_mark != ssk->sk_mark) { 1585 ssk->sk_mark = sk->sk_mark; 1586 sk_dst_reset(ssk); 1587 } 1588 1589 sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); 1590 1591 if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) 1592 tcp_set_congestion_control(ssk, msk->ca_name, false, true); 1593 __tcp_sock_set_cork(ssk, !!msk->cork); 1594 __tcp_sock_set_nodelay(ssk, !!msk->nodelay); 1595 tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle); 1596 tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl); 1597 tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt); 1598 tcp_sock_set_maxseg(ssk, msk->maxseg); 1599 1600 inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); 1601 inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); 1602 inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); 1603 WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); 1604 } 1605 1606 void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) 1607 { 1608 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 1609 1610 msk_owned_by_me(msk); 1611 1612 ssk->sk_rcvlowat = 0; 1613 1614 /* subflows must ignore any latency-related settings: will not affect 1615 * the user-space - only the msk is relevant - but will foul the 1616 * mptcp scheduler 1617 */ 1618 tcp_sk(ssk)->notsent_lowat = UINT_MAX; 1619 1620 if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { 1621 sync_socket_options(msk, ssk); 1622 1623 subflow->setsockopt_seq = msk->setsockopt_seq; 1624 } 1625 } 1626 1627 /* unfortunately this is different enough from the tcp version so 1628 * that we can't factor it out 1629 */ 1630 int mptcp_set_rcvlowat(struct sock *sk, int val) 1631 { 1632 struct mptcp_subflow_context *subflow; 1633 int space, cap; 1634 1635 /* bpf can land here with a wrong sk type */ 1636 if (sk->sk_protocol == IPPROTO_TCP) 1637 return -EINVAL; 1638 1639 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1640 cap = sk->sk_rcvbuf >> 1; 1641 else 1642 cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; 1643 val = min(val, cap); 1644 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); 1645 1646 /* Check if we need to signal EPOLLIN right now */ 1647 if (mptcp_epollin_ready(sk)) 1648 sk->sk_data_ready(sk); 1649 1650 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1651 return 0; 1652 1653 space = mptcp_space_from_win(sk, val); 1654 if (space <= sk->sk_rcvbuf) 1655 return 0; 1656 1657 /* propagate the rcvbuf changes to all the subflows */ 1658 WRITE_ONCE(sk->sk_rcvbuf, space); 1659 mptcp_for_each_subflow(mptcp_sk(sk), subflow) { 1660 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1661 bool slow; 1662 1663 slow = lock_sock_fast(ssk); 1664 WRITE_ONCE(ssk->sk_rcvbuf, space); 1665 WRITE_ONCE(tcp_sk(ssk)->window_clamp, val); 1666 unlock_sock_fast(ssk, slow); 1667 } 1668 return 0; 1669 } 1670