1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2021, Red Hat. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <net/sock.h> 12 #include <net/protocol.h> 13 #include <net/tcp.h> 14 #include <net/mptcp.h> 15 #include "protocol.h" 16 17 #define MIN_INFO_OPTLEN_SIZE 16 18 #define MIN_FULL_INFO_OPTLEN_SIZE 40 19 20 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) 21 { 22 msk_owned_by_me(msk); 23 24 if (likely(!__mptcp_check_fallback(msk))) 25 return NULL; 26 27 return msk->first; 28 } 29 30 static u32 sockopt_seq_reset(const struct sock *sk) 31 { 32 sock_owned_by_me(sk); 33 34 /* Highbits contain state. Allows to distinguish sockopt_seq 35 * of listener and established: 36 * s0 = new_listener() 37 * sockopt(s0) - seq is 1 38 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) 39 * sockopt(s0) - seq increments to 2 on s0 40 * sockopt(s1) // seq increments to 2 on s1 (different option) 41 * new ssk completes join, inherits options from s0 // seq 2 42 * Needs sync from mptcp join logic, but ssk->seq == msk->seq 43 * 44 * Set High order bits to sk_state so ssk->seq == msk->seq test 45 * will fail. 46 */ 47 48 return (u32)sk->sk_state << 24u; 49 } 50 51 static void sockopt_seq_inc(struct mptcp_sock *msk) 52 { 53 u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; 54 55 msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; 56 } 57 58 static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, 59 unsigned int optlen, int *val) 60 { 61 if (optlen < sizeof(int)) 62 return -EINVAL; 63 64 if (copy_from_sockptr(val, optval, sizeof(*val))) 65 return -EFAULT; 66 67 return 0; 68 } 69 70 static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) 71 { 72 struct mptcp_subflow_context *subflow; 73 struct sock *sk = (struct sock *)msk; 74 75 lock_sock(sk); 76 sockopt_seq_inc(msk); 77 78 mptcp_for_each_subflow(msk, subflow) { 79 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 80 bool slow = lock_sock_fast(ssk); 81 82 switch (optname) { 83 case SO_DEBUG: 84 sock_valbool_flag(ssk, SOCK_DBG, !!val); 85 break; 86 case SO_KEEPALIVE: 87 if (ssk->sk_prot->keepalive) 88 ssk->sk_prot->keepalive(ssk, !!val); 89 sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); 90 break; 91 case SO_PRIORITY: 92 WRITE_ONCE(ssk->sk_priority, val); 93 break; 94 case SO_SNDBUF: 95 case SO_SNDBUFFORCE: 96 ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; 97 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 98 mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; 99 break; 100 case SO_RCVBUF: 101 case SO_RCVBUFFORCE: 102 ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; 103 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 104 break; 105 case SO_MARK: 106 if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { 107 WRITE_ONCE(ssk->sk_mark, sk->sk_mark); 108 sk_dst_reset(ssk); 109 } 110 break; 111 case SO_INCOMING_CPU: 112 WRITE_ONCE(ssk->sk_incoming_cpu, val); 113 break; 114 } 115 116 subflow->setsockopt_seq = msk->setsockopt_seq; 117 unlock_sock_fast(ssk, slow); 118 } 119 120 release_sock(sk); 121 } 122 123 static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) 124 { 125 sockptr_t optval = KERNEL_SOCKPTR(&val); 126 struct sock *sk = (struct sock *)msk; 127 int ret; 128 129 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 130 optval, sizeof(val)); 131 if (ret) 132 return ret; 133 134 mptcp_sol_socket_sync_intval(msk, optname, val); 135 return 0; 136 } 137 138 static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) 139 { 140 struct sock *sk = (struct sock *)msk; 141 142 WRITE_ONCE(sk->sk_incoming_cpu, val); 143 144 mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); 145 } 146 147 static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) 148 { 149 sockptr_t optval = KERNEL_SOCKPTR(&val); 150 struct mptcp_subflow_context *subflow; 151 struct sock *sk = (struct sock *)msk; 152 int ret; 153 154 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 155 optval, sizeof(val)); 156 if (ret) 157 return ret; 158 159 lock_sock(sk); 160 mptcp_for_each_subflow(msk, subflow) { 161 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 162 163 lock_sock(ssk); 164 sock_set_timestamp(ssk, optname, !!val); 165 release_sock(ssk); 166 } 167 168 release_sock(sk); 169 return 0; 170 } 171 172 static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, 173 sockptr_t optval, 174 unsigned int optlen) 175 { 176 int val, ret; 177 178 ret = mptcp_get_int_option(msk, optval, optlen, &val); 179 if (ret) 180 return ret; 181 182 switch (optname) { 183 case SO_KEEPALIVE: 184 case SO_DEBUG: 185 case SO_MARK: 186 case SO_PRIORITY: 187 case SO_SNDBUF: 188 case SO_SNDBUFFORCE: 189 case SO_RCVBUF: 190 case SO_RCVBUFFORCE: 191 return mptcp_sol_socket_intval(msk, optname, val); 192 case SO_INCOMING_CPU: 193 mptcp_so_incoming_cpu(msk, val); 194 return 0; 195 case SO_TIMESTAMP_OLD: 196 case SO_TIMESTAMP_NEW: 197 case SO_TIMESTAMPNS_OLD: 198 case SO_TIMESTAMPNS_NEW: 199 return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); 200 } 201 202 return -ENOPROTOOPT; 203 } 204 205 static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, 206 int optname, 207 sockptr_t optval, 208 unsigned int optlen) 209 { 210 struct mptcp_subflow_context *subflow; 211 struct sock *sk = (struct sock *)msk; 212 struct so_timestamping timestamping; 213 int ret; 214 215 if (optlen == sizeof(timestamping)) { 216 if (copy_from_sockptr(×tamping, optval, 217 sizeof(timestamping))) 218 return -EFAULT; 219 } else if (optlen == sizeof(int)) { 220 memset(×tamping, 0, sizeof(timestamping)); 221 222 if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) 223 return -EFAULT; 224 } else { 225 return -EINVAL; 226 } 227 228 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 229 KERNEL_SOCKPTR(×tamping), 230 sizeof(timestamping)); 231 if (ret) 232 return ret; 233 234 lock_sock(sk); 235 236 mptcp_for_each_subflow(msk, subflow) { 237 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 238 239 lock_sock(ssk); 240 sock_set_timestamping(ssk, optname, timestamping); 241 release_sock(ssk); 242 } 243 244 release_sock(sk); 245 246 return 0; 247 } 248 249 static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, 250 unsigned int optlen) 251 { 252 struct mptcp_subflow_context *subflow; 253 struct sock *sk = (struct sock *)msk; 254 struct linger ling; 255 sockptr_t kopt; 256 int ret; 257 258 if (optlen < sizeof(ling)) 259 return -EINVAL; 260 261 if (copy_from_sockptr(&ling, optval, sizeof(ling))) 262 return -EFAULT; 263 264 kopt = KERNEL_SOCKPTR(&ling); 265 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); 266 if (ret) 267 return ret; 268 269 lock_sock(sk); 270 sockopt_seq_inc(msk); 271 mptcp_for_each_subflow(msk, subflow) { 272 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 273 bool slow = lock_sock_fast(ssk); 274 275 if (!ling.l_onoff) { 276 sock_reset_flag(ssk, SOCK_LINGER); 277 } else { 278 ssk->sk_lingertime = sk->sk_lingertime; 279 sock_set_flag(ssk, SOCK_LINGER); 280 } 281 282 subflow->setsockopt_seq = msk->setsockopt_seq; 283 unlock_sock_fast(ssk, slow); 284 } 285 286 release_sock(sk); 287 return 0; 288 } 289 290 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, 291 sockptr_t optval, unsigned int optlen) 292 { 293 struct sock *sk = (struct sock *)msk; 294 struct sock *ssk; 295 int ret; 296 297 switch (optname) { 298 case SO_REUSEPORT: 299 case SO_REUSEADDR: 300 case SO_BINDTODEVICE: 301 case SO_BINDTOIFINDEX: 302 lock_sock(sk); 303 ssk = __mptcp_nmpc_sk(msk); 304 if (IS_ERR(ssk)) { 305 release_sock(sk); 306 return PTR_ERR(ssk); 307 } 308 309 ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen); 310 if (ret == 0) { 311 if (optname == SO_REUSEPORT) 312 sk->sk_reuseport = ssk->sk_reuseport; 313 else if (optname == SO_REUSEADDR) 314 sk->sk_reuse = ssk->sk_reuse; 315 else if (optname == SO_BINDTODEVICE) 316 sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 317 else if (optname == SO_BINDTOIFINDEX) 318 sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 319 } 320 release_sock(sk); 321 return ret; 322 case SO_KEEPALIVE: 323 case SO_PRIORITY: 324 case SO_SNDBUF: 325 case SO_SNDBUFFORCE: 326 case SO_RCVBUF: 327 case SO_RCVBUFFORCE: 328 case SO_MARK: 329 case SO_INCOMING_CPU: 330 case SO_DEBUG: 331 case SO_TIMESTAMP_OLD: 332 case SO_TIMESTAMP_NEW: 333 case SO_TIMESTAMPNS_OLD: 334 case SO_TIMESTAMPNS_NEW: 335 return mptcp_setsockopt_sol_socket_int(msk, optname, optval, 336 optlen); 337 case SO_TIMESTAMPING_OLD: 338 case SO_TIMESTAMPING_NEW: 339 return mptcp_setsockopt_sol_socket_timestamping(msk, optname, 340 optval, optlen); 341 case SO_LINGER: 342 return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); 343 case SO_RCVLOWAT: 344 case SO_RCVTIMEO_OLD: 345 case SO_RCVTIMEO_NEW: 346 case SO_SNDTIMEO_OLD: 347 case SO_SNDTIMEO_NEW: 348 case SO_BUSY_POLL: 349 case SO_PREFER_BUSY_POLL: 350 case SO_BUSY_POLL_BUDGET: 351 /* No need to copy: only relevant for msk */ 352 return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); 353 case SO_NO_CHECK: 354 case SO_DONTROUTE: 355 case SO_BROADCAST: 356 case SO_BSDCOMPAT: 357 case SO_PASSCRED: 358 case SO_PASSPIDFD: 359 case SO_PASSSEC: 360 case SO_RXQ_OVFL: 361 case SO_WIFI_STATUS: 362 case SO_NOFCS: 363 case SO_SELECT_ERR_QUEUE: 364 return 0; 365 } 366 367 /* SO_OOBINLINE is not supported, let's avoid the related mess 368 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, 369 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, 370 * we must be careful with subflows 371 * 372 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks 373 * explicitly the sk_protocol field 374 * 375 * SO_PEEK_OFF is unsupported, as it is for plain TCP 376 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows 377 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, 378 * but likely needs careful design 379 * 380 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg 381 * SO_TXTIME is currently unsupported 382 */ 383 384 return -EOPNOTSUPP; 385 } 386 387 static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, 388 sockptr_t optval, unsigned int optlen) 389 { 390 struct sock *sk = (struct sock *)msk; 391 int ret = -EOPNOTSUPP; 392 struct sock *ssk; 393 394 switch (optname) { 395 case IPV6_V6ONLY: 396 case IPV6_TRANSPARENT: 397 case IPV6_FREEBIND: 398 lock_sock(sk); 399 ssk = __mptcp_nmpc_sk(msk); 400 if (IS_ERR(ssk)) { 401 release_sock(sk); 402 return PTR_ERR(ssk); 403 } 404 405 ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); 406 if (ret != 0) { 407 release_sock(sk); 408 return ret; 409 } 410 411 sockopt_seq_inc(msk); 412 413 switch (optname) { 414 case IPV6_V6ONLY: 415 sk->sk_ipv6only = ssk->sk_ipv6only; 416 break; 417 case IPV6_TRANSPARENT: 418 inet_assign_bit(TRANSPARENT, sk, 419 inet_test_bit(TRANSPARENT, ssk)); 420 break; 421 case IPV6_FREEBIND: 422 inet_assign_bit(FREEBIND, sk, 423 inet_test_bit(FREEBIND, ssk)); 424 break; 425 } 426 427 release_sock(sk); 428 break; 429 } 430 431 return ret; 432 } 433 434 static bool mptcp_supported_sockopt(int level, int optname) 435 { 436 if (level == SOL_IP) { 437 switch (optname) { 438 /* should work fine */ 439 case IP_FREEBIND: 440 case IP_TRANSPARENT: 441 case IP_BIND_ADDRESS_NO_PORT: 442 case IP_LOCAL_PORT_RANGE: 443 444 /* the following are control cmsg related */ 445 case IP_PKTINFO: 446 case IP_RECVTTL: 447 case IP_RECVTOS: 448 case IP_RECVOPTS: 449 case IP_RETOPTS: 450 case IP_PASSSEC: 451 case IP_RECVORIGDSTADDR: 452 case IP_CHECKSUM: 453 case IP_RECVFRAGSIZE: 454 455 /* common stuff that need some love */ 456 case IP_TOS: 457 case IP_TTL: 458 case IP_MTU_DISCOVER: 459 case IP_RECVERR: 460 461 /* possibly less common may deserve some love */ 462 case IP_MINTTL: 463 464 /* the following is apparently a no-op for plain TCP */ 465 case IP_RECVERR_RFC4884: 466 return true; 467 } 468 469 /* IP_OPTIONS is not supported, needs subflow care */ 470 /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ 471 /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, 472 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, 473 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, 474 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, 475 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, 476 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal 477 * with mcast stuff 478 */ 479 /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ 480 return false; 481 } 482 if (level == SOL_IPV6) { 483 switch (optname) { 484 case IPV6_V6ONLY: 485 486 /* the following are control cmsg related */ 487 case IPV6_RECVPKTINFO: 488 case IPV6_2292PKTINFO: 489 case IPV6_RECVHOPLIMIT: 490 case IPV6_2292HOPLIMIT: 491 case IPV6_RECVRTHDR: 492 case IPV6_2292RTHDR: 493 case IPV6_RECVHOPOPTS: 494 case IPV6_2292HOPOPTS: 495 case IPV6_RECVDSTOPTS: 496 case IPV6_2292DSTOPTS: 497 case IPV6_RECVTCLASS: 498 case IPV6_FLOWINFO: 499 case IPV6_RECVPATHMTU: 500 case IPV6_RECVORIGDSTADDR: 501 case IPV6_RECVFRAGSIZE: 502 503 /* the following ones need some love but are quite common */ 504 case IPV6_TCLASS: 505 case IPV6_TRANSPARENT: 506 case IPV6_FREEBIND: 507 case IPV6_PKTINFO: 508 case IPV6_2292PKTOPTIONS: 509 case IPV6_UNICAST_HOPS: 510 case IPV6_MTU_DISCOVER: 511 case IPV6_MTU: 512 case IPV6_RECVERR: 513 case IPV6_FLOWINFO_SEND: 514 case IPV6_FLOWLABEL_MGR: 515 case IPV6_MINHOPCOUNT: 516 case IPV6_DONTFRAG: 517 case IPV6_AUTOFLOWLABEL: 518 519 /* the following one is a no-op for plain TCP */ 520 case IPV6_RECVERR_RFC4884: 521 return true; 522 } 523 524 /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are 525 * not supported 526 */ 527 /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, 528 * IPV6_MULTICAST_IF, IPV6_ADDRFORM, 529 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, 530 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, 531 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, 532 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER 533 * are not supported better not deal with mcast 534 */ 535 /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ 536 537 /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ 538 /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ 539 return false; 540 } 541 if (level == SOL_TCP) { 542 switch (optname) { 543 /* the following are no-op or should work just fine */ 544 case TCP_THIN_DUPACK: 545 case TCP_DEFER_ACCEPT: 546 547 /* the following need some love */ 548 case TCP_MAXSEG: 549 case TCP_NODELAY: 550 case TCP_THIN_LINEAR_TIMEOUTS: 551 case TCP_CONGESTION: 552 case TCP_CORK: 553 case TCP_KEEPIDLE: 554 case TCP_KEEPINTVL: 555 case TCP_KEEPCNT: 556 case TCP_SYNCNT: 557 case TCP_SAVE_SYN: 558 case TCP_LINGER2: 559 case TCP_WINDOW_CLAMP: 560 case TCP_QUICKACK: 561 case TCP_USER_TIMEOUT: 562 case TCP_TIMESTAMP: 563 case TCP_NOTSENT_LOWAT: 564 case TCP_TX_DELAY: 565 case TCP_INQ: 566 case TCP_FASTOPEN: 567 case TCP_FASTOPEN_CONNECT: 568 case TCP_FASTOPEN_KEY: 569 case TCP_FASTOPEN_NO_COOKIE: 570 return true; 571 } 572 573 /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ 574 575 /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, 576 * TCP_REPAIR_WINDOW are not supported, better avoid this mess 577 */ 578 } 579 return false; 580 } 581 582 static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, 583 unsigned int optlen) 584 { 585 struct mptcp_subflow_context *subflow; 586 struct sock *sk = (struct sock *)msk; 587 char name[TCP_CA_NAME_MAX]; 588 bool cap_net_admin; 589 int ret; 590 591 if (optlen < 1) 592 return -EINVAL; 593 594 ret = strncpy_from_sockptr(name, optval, 595 min_t(long, TCP_CA_NAME_MAX - 1, optlen)); 596 if (ret < 0) 597 return -EFAULT; 598 599 name[ret] = 0; 600 601 cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); 602 603 ret = 0; 604 lock_sock(sk); 605 sockopt_seq_inc(msk); 606 mptcp_for_each_subflow(msk, subflow) { 607 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 608 int err; 609 610 lock_sock(ssk); 611 err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); 612 if (err < 0 && ret == 0) 613 ret = err; 614 subflow->setsockopt_seq = msk->setsockopt_seq; 615 release_sock(ssk); 616 } 617 618 if (ret == 0) 619 strscpy(msk->ca_name, name, sizeof(msk->ca_name)); 620 621 release_sock(sk); 622 return ret; 623 } 624 625 static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max, 626 int (*set_val)(struct sock *, int), 627 int *msk_val, int val) 628 { 629 struct mptcp_subflow_context *subflow; 630 int err = 0; 631 632 mptcp_for_each_subflow(msk, subflow) { 633 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 634 int ret; 635 636 lock_sock(ssk); 637 ret = set_val(ssk, val); 638 err = err ? : ret; 639 release_sock(ssk); 640 } 641 642 if (!err) { 643 *msk_val = val; 644 sockopt_seq_inc(msk); 645 } 646 647 return err; 648 } 649 650 static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) 651 { 652 struct mptcp_subflow_context *subflow; 653 struct sock *sk = (struct sock *)msk; 654 655 sockopt_seq_inc(msk); 656 msk->cork = !!val; 657 mptcp_for_each_subflow(msk, subflow) { 658 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 659 660 lock_sock(ssk); 661 __tcp_sock_set_cork(ssk, !!val); 662 release_sock(ssk); 663 } 664 if (!val) 665 mptcp_check_and_set_pending(sk); 666 667 return 0; 668 } 669 670 static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val) 671 { 672 struct mptcp_subflow_context *subflow; 673 struct sock *sk = (struct sock *)msk; 674 675 sockopt_seq_inc(msk); 676 msk->nodelay = !!val; 677 mptcp_for_each_subflow(msk, subflow) { 678 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 679 680 lock_sock(ssk); 681 __tcp_sock_set_nodelay(ssk, !!val); 682 release_sock(ssk); 683 } 684 if (val) 685 mptcp_check_and_set_pending(sk); 686 return 0; 687 } 688 689 static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, 690 sockptr_t optval, unsigned int optlen) 691 { 692 struct sock *sk = (struct sock *)msk; 693 struct sock *ssk; 694 int err; 695 696 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 697 if (err != 0) 698 return err; 699 700 lock_sock(sk); 701 702 ssk = __mptcp_nmpc_sk(msk); 703 if (IS_ERR(ssk)) { 704 release_sock(sk); 705 return PTR_ERR(ssk); 706 } 707 708 switch (optname) { 709 case IP_FREEBIND: 710 inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); 711 break; 712 case IP_TRANSPARENT: 713 inet_assign_bit(TRANSPARENT, ssk, 714 inet_test_bit(TRANSPARENT, sk)); 715 break; 716 case IP_BIND_ADDRESS_NO_PORT: 717 inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, 718 inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); 719 break; 720 case IP_LOCAL_PORT_RANGE: 721 WRITE_ONCE(inet_sk(ssk)->local_port_range, 722 READ_ONCE(inet_sk(sk)->local_port_range)); 723 break; 724 default: 725 release_sock(sk); 726 WARN_ON_ONCE(1); 727 return -EOPNOTSUPP; 728 } 729 730 sockopt_seq_inc(msk); 731 release_sock(sk); 732 return 0; 733 } 734 735 static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, 736 sockptr_t optval, unsigned int optlen) 737 { 738 struct mptcp_subflow_context *subflow; 739 struct sock *sk = (struct sock *)msk; 740 int err, val; 741 742 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 743 744 if (err != 0) 745 return err; 746 747 lock_sock(sk); 748 sockopt_seq_inc(msk); 749 val = READ_ONCE(inet_sk(sk)->tos); 750 mptcp_for_each_subflow(msk, subflow) { 751 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 752 bool slow; 753 754 slow = lock_sock_fast(ssk); 755 __ip_sock_set_tos(ssk, val); 756 unlock_sock_fast(ssk, slow); 757 } 758 release_sock(sk); 759 760 return 0; 761 } 762 763 static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, 764 sockptr_t optval, unsigned int optlen) 765 { 766 switch (optname) { 767 case IP_FREEBIND: 768 case IP_TRANSPARENT: 769 case IP_BIND_ADDRESS_NO_PORT: 770 case IP_LOCAL_PORT_RANGE: 771 return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen); 772 case IP_TOS: 773 return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); 774 } 775 776 return -EOPNOTSUPP; 777 } 778 779 static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 780 sockptr_t optval, unsigned int optlen) 781 { 782 struct sock *sk = (struct sock *)msk; 783 struct sock *ssk; 784 int ret; 785 786 /* Limit to first subflow, before the connection establishment */ 787 lock_sock(sk); 788 ssk = __mptcp_nmpc_sk(msk); 789 if (IS_ERR(ssk)) { 790 ret = PTR_ERR(ssk); 791 goto unlock; 792 } 793 794 ret = tcp_setsockopt(ssk, level, optname, optval, optlen); 795 796 unlock: 797 release_sock(sk); 798 return ret; 799 } 800 801 static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, 802 int optname, sockptr_t optval, 803 unsigned int optlen) 804 { 805 struct mptcp_subflow_context *subflow; 806 int ret = 0; 807 808 mptcp_for_each_subflow(msk, subflow) { 809 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 810 811 ret = tcp_setsockopt(ssk, level, optname, optval, optlen); 812 if (ret) 813 break; 814 } 815 816 if (!ret) 817 sockopt_seq_inc(msk); 818 819 return ret; 820 } 821 822 static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 823 sockptr_t optval, unsigned int optlen) 824 { 825 struct sock *sk = (void *)msk; 826 int ret, val; 827 828 switch (optname) { 829 case TCP_ULP: 830 return -EOPNOTSUPP; 831 case TCP_CONGESTION: 832 return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); 833 case TCP_DEFER_ACCEPT: 834 /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ 835 mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); 836 return 0; 837 case TCP_FASTOPEN: 838 case TCP_FASTOPEN_CONNECT: 839 case TCP_FASTOPEN_KEY: 840 case TCP_FASTOPEN_NO_COOKIE: 841 return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, 842 optval, optlen); 843 } 844 845 ret = mptcp_get_int_option(msk, optval, optlen, &val); 846 if (ret) 847 return ret; 848 849 lock_sock(sk); 850 switch (optname) { 851 case TCP_INQ: 852 if (val < 0 || val > 1) 853 ret = -EINVAL; 854 else 855 msk->recvmsg_inq = !!val; 856 break; 857 case TCP_NOTSENT_LOWAT: 858 WRITE_ONCE(msk->notsent_lowat, val); 859 mptcp_write_space(sk); 860 break; 861 case TCP_CORK: 862 ret = __mptcp_setsockopt_sol_tcp_cork(msk, val); 863 break; 864 case TCP_NODELAY: 865 ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); 866 break; 867 case TCP_KEEPIDLE: 868 ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE, 869 &tcp_sock_set_keepidle_locked, 870 &msk->keepalive_idle, val); 871 break; 872 case TCP_KEEPINTVL: 873 ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL, 874 &tcp_sock_set_keepintvl, 875 &msk->keepalive_intvl, val); 876 break; 877 case TCP_KEEPCNT: 878 ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT, 879 &tcp_sock_set_keepcnt, 880 &msk->keepalive_cnt, 881 val); 882 break; 883 case TCP_MAXSEG: 884 msk->maxseg = val; 885 ret = mptcp_setsockopt_all_sf(msk, SOL_TCP, optname, optval, 886 optlen); 887 break; 888 default: 889 ret = -ENOPROTOOPT; 890 } 891 892 release_sock(sk); 893 return ret; 894 } 895 896 int mptcp_setsockopt(struct sock *sk, int level, int optname, 897 sockptr_t optval, unsigned int optlen) 898 { 899 struct mptcp_sock *msk = mptcp_sk(sk); 900 struct sock *ssk; 901 902 pr_debug("msk=%p\n", msk); 903 904 if (level == SOL_SOCKET) 905 return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); 906 907 if (!mptcp_supported_sockopt(level, optname)) 908 return -ENOPROTOOPT; 909 910 /* @@ the meaning of setsockopt() when the socket is connected and 911 * there are multiple subflows is not yet defined. It is up to the 912 * MPTCP-level socket to configure the subflows until the subflow 913 * is in TCP fallback, when TCP socket options are passed through 914 * to the one remaining subflow. 915 */ 916 lock_sock(sk); 917 ssk = __mptcp_tcp_fallback(msk); 918 release_sock(sk); 919 if (ssk) 920 return tcp_setsockopt(ssk, level, optname, optval, optlen); 921 922 if (level == SOL_IP) 923 return mptcp_setsockopt_v4(msk, optname, optval, optlen); 924 925 if (level == SOL_IPV6) 926 return mptcp_setsockopt_v6(msk, optname, optval, optlen); 927 928 if (level == SOL_TCP) 929 return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); 930 931 return -EOPNOTSUPP; 932 } 933 934 static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 935 char __user *optval, int __user *optlen) 936 { 937 struct sock *sk = (struct sock *)msk; 938 struct sock *ssk; 939 int ret; 940 941 lock_sock(sk); 942 ssk = msk->first; 943 if (ssk) 944 goto get; 945 946 ssk = __mptcp_nmpc_sk(msk); 947 if (IS_ERR(ssk)) { 948 ret = PTR_ERR(ssk); 949 goto out; 950 } 951 952 get: 953 ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 954 955 out: 956 release_sock(sk); 957 return ret; 958 } 959 960 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) 961 { 962 struct sock *sk = (struct sock *)msk; 963 u32 flags = 0; 964 bool slow; 965 u32 now; 966 967 memset(info, 0, sizeof(*info)); 968 969 info->mptcpi_extra_subflows = READ_ONCE(msk->pm.extra_subflows); 970 info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); 971 info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); 972 info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); 973 974 if (inet_sk_state_load(sk) == TCP_LISTEN) 975 return; 976 977 /* The following limits only make sense for the in-kernel PM */ 978 if (mptcp_pm_is_kernel(msk)) { 979 info->mptcpi_limit_extra_subflows = 980 mptcp_pm_get_limit_extra_subflows(msk); 981 info->mptcpi_endp_signal_max = 982 mptcp_pm_get_endp_signal_max(msk); 983 info->mptcpi_limit_add_addr_accepted = 984 mptcp_pm_get_limit_add_addr_accepted(msk); 985 info->mptcpi_endp_subflow_max = 986 mptcp_pm_get_endp_subflow_max(msk); 987 info->mptcpi_endp_laminar_max = 988 mptcp_pm_get_endp_laminar_max(msk); 989 info->mptcpi_endp_fullmesh_max = 990 mptcp_pm_get_endp_fullmesh_max(msk); 991 } 992 993 if (__mptcp_check_fallback(msk)) 994 flags |= MPTCP_INFO_FLAG_FALLBACK; 995 if (READ_ONCE(msk->can_ack)) 996 flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; 997 info->mptcpi_flags = flags; 998 999 slow = lock_sock_fast(sk); 1000 info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); 1001 info->mptcpi_token = msk->token; 1002 info->mptcpi_write_seq = msk->write_seq; 1003 info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; 1004 info->mptcpi_bytes_sent = msk->bytes_sent; 1005 info->mptcpi_bytes_received = msk->bytes_received; 1006 info->mptcpi_bytes_retrans = msk->bytes_retrans; 1007 info->mptcpi_subflows_total = info->mptcpi_extra_subflows + 1008 __mptcp_has_initial_subflow(msk); 1009 now = tcp_jiffies32; 1010 info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent); 1011 info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv); 1012 unlock_sock_fast(sk, slow); 1013 1014 mptcp_data_lock(sk); 1015 info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv); 1016 info->mptcpi_snd_una = msk->snd_una; 1017 info->mptcpi_rcv_nxt = msk->ack_seq; 1018 info->mptcpi_bytes_acked = msk->bytes_acked; 1019 mptcp_data_unlock(sk); 1020 } 1021 EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); 1022 1023 static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) 1024 { 1025 struct mptcp_info m_info; 1026 int len; 1027 1028 if (get_user(len, optlen)) 1029 return -EFAULT; 1030 1031 /* When used only to check if a fallback to TCP happened. */ 1032 if (len == 0) 1033 return 0; 1034 1035 len = min_t(unsigned int, len, sizeof(struct mptcp_info)); 1036 1037 mptcp_diag_fill_info(msk, &m_info); 1038 1039 if (put_user(len, optlen)) 1040 return -EFAULT; 1041 1042 if (copy_to_user(optval, &m_info, len)) 1043 return -EFAULT; 1044 1045 return 0; 1046 } 1047 1048 static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, 1049 char __user *optval, 1050 u32 copied, 1051 int __user *optlen) 1052 { 1053 u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); 1054 1055 if (copied) 1056 copied += sfd->size_subflow_data; 1057 else 1058 copied = copylen; 1059 1060 if (put_user(copied, optlen)) 1061 return -EFAULT; 1062 1063 if (copy_to_user(optval, sfd, copylen)) 1064 return -EFAULT; 1065 1066 return 0; 1067 } 1068 1069 static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, 1070 char __user *optval, 1071 int __user *optlen) 1072 { 1073 int len, copylen; 1074 1075 if (get_user(len, optlen)) 1076 return -EFAULT; 1077 1078 /* if mptcp_subflow_data size is changed, need to adjust 1079 * this function to deal with programs using old version. 1080 */ 1081 BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); 1082 1083 if (len < MIN_INFO_OPTLEN_SIZE) 1084 return -EINVAL; 1085 1086 memset(sfd, 0, sizeof(*sfd)); 1087 1088 copylen = min_t(unsigned int, len, sizeof(*sfd)); 1089 if (copy_from_user(sfd, optval, copylen)) 1090 return -EFAULT; 1091 1092 /* size_subflow_data is u32, but len is signed */ 1093 if (sfd->size_subflow_data > INT_MAX || 1094 sfd->size_user > INT_MAX) 1095 return -EINVAL; 1096 1097 if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || 1098 sfd->size_subflow_data > len) 1099 return -EINVAL; 1100 1101 if (sfd->num_subflows || sfd->size_kernel) 1102 return -EINVAL; 1103 1104 return len - sfd->size_subflow_data; 1105 } 1106 1107 static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, 1108 int __user *optlen) 1109 { 1110 struct mptcp_subflow_context *subflow; 1111 struct sock *sk = (struct sock *)msk; 1112 unsigned int sfcount = 0, copied = 0; 1113 struct mptcp_subflow_data sfd; 1114 char __user *infoptr; 1115 int len; 1116 1117 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1118 if (len < 0) 1119 return len; 1120 1121 sfd.size_kernel = sizeof(struct tcp_info); 1122 sfd.size_user = min_t(unsigned int, sfd.size_user, 1123 sizeof(struct tcp_info)); 1124 1125 infoptr = optval + sfd.size_subflow_data; 1126 1127 lock_sock(sk); 1128 1129 mptcp_for_each_subflow(msk, subflow) { 1130 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1131 1132 ++sfcount; 1133 1134 if (len && len >= sfd.size_user) { 1135 struct tcp_info info; 1136 1137 tcp_get_info(ssk, &info); 1138 1139 if (copy_to_user(infoptr, &info, sfd.size_user)) { 1140 release_sock(sk); 1141 return -EFAULT; 1142 } 1143 1144 infoptr += sfd.size_user; 1145 copied += sfd.size_user; 1146 len -= sfd.size_user; 1147 } 1148 } 1149 1150 release_sock(sk); 1151 1152 sfd.num_subflows = sfcount; 1153 1154 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1155 return -EFAULT; 1156 1157 return 0; 1158 } 1159 1160 static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) 1161 { 1162 const struct inet_sock *inet = inet_sk(sk); 1163 1164 memset(a, 0, sizeof(*a)); 1165 1166 if (sk->sk_family == AF_INET) { 1167 a->sin_local.sin_family = AF_INET; 1168 a->sin_local.sin_port = inet->inet_sport; 1169 a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; 1170 1171 if (!a->sin_local.sin_addr.s_addr) 1172 a->sin_local.sin_addr.s_addr = inet->inet_saddr; 1173 1174 a->sin_remote.sin_family = AF_INET; 1175 a->sin_remote.sin_port = inet->inet_dport; 1176 a->sin_remote.sin_addr.s_addr = inet->inet_daddr; 1177 #if IS_ENABLED(CONFIG_IPV6) 1178 } else if (sk->sk_family == AF_INET6) { 1179 const struct ipv6_pinfo *np = inet6_sk(sk); 1180 1181 if (WARN_ON_ONCE(!np)) 1182 return; 1183 1184 a->sin6_local.sin6_family = AF_INET6; 1185 a->sin6_local.sin6_port = inet->inet_sport; 1186 1187 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 1188 a->sin6_local.sin6_addr = np->saddr; 1189 else 1190 a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; 1191 1192 a->sin6_remote.sin6_family = AF_INET6; 1193 a->sin6_remote.sin6_port = inet->inet_dport; 1194 a->sin6_remote.sin6_addr = sk->sk_v6_daddr; 1195 #endif 1196 } 1197 } 1198 1199 static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, 1200 int __user *optlen) 1201 { 1202 struct mptcp_subflow_context *subflow; 1203 struct sock *sk = (struct sock *)msk; 1204 unsigned int sfcount = 0, copied = 0; 1205 struct mptcp_subflow_data sfd; 1206 char __user *addrptr; 1207 int len; 1208 1209 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1210 if (len < 0) 1211 return len; 1212 1213 sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); 1214 sfd.size_user = min_t(unsigned int, sfd.size_user, 1215 sizeof(struct mptcp_subflow_addrs)); 1216 1217 addrptr = optval + sfd.size_subflow_data; 1218 1219 lock_sock(sk); 1220 1221 mptcp_for_each_subflow(msk, subflow) { 1222 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1223 1224 ++sfcount; 1225 1226 if (len && len >= sfd.size_user) { 1227 struct mptcp_subflow_addrs a; 1228 1229 mptcp_get_sub_addrs(ssk, &a); 1230 1231 if (copy_to_user(addrptr, &a, sfd.size_user)) { 1232 release_sock(sk); 1233 return -EFAULT; 1234 } 1235 1236 addrptr += sfd.size_user; 1237 copied += sfd.size_user; 1238 len -= sfd.size_user; 1239 } 1240 } 1241 1242 release_sock(sk); 1243 1244 sfd.num_subflows = sfcount; 1245 1246 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1247 return -EFAULT; 1248 1249 return 0; 1250 } 1251 1252 static int mptcp_get_full_info(struct mptcp_full_info *mfi, 1253 char __user *optval, 1254 int __user *optlen) 1255 { 1256 int len; 1257 1258 BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != 1259 MIN_FULL_INFO_OPTLEN_SIZE); 1260 1261 if (get_user(len, optlen)) 1262 return -EFAULT; 1263 1264 if (len < MIN_FULL_INFO_OPTLEN_SIZE) 1265 return -EINVAL; 1266 1267 memset(mfi, 0, sizeof(*mfi)); 1268 if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) 1269 return -EFAULT; 1270 1271 if (mfi->size_tcpinfo_kernel || 1272 mfi->size_sfinfo_kernel || 1273 mfi->num_subflows) 1274 return -EINVAL; 1275 1276 if (mfi->size_sfinfo_user > INT_MAX || 1277 mfi->size_tcpinfo_user > INT_MAX) 1278 return -EINVAL; 1279 1280 return len - MIN_FULL_INFO_OPTLEN_SIZE; 1281 } 1282 1283 static int mptcp_put_full_info(struct mptcp_full_info *mfi, 1284 char __user *optval, 1285 u32 copylen, 1286 int __user *optlen) 1287 { 1288 copylen += MIN_FULL_INFO_OPTLEN_SIZE; 1289 if (put_user(copylen, optlen)) 1290 return -EFAULT; 1291 1292 if (copy_to_user(optval, mfi, copylen)) 1293 return -EFAULT; 1294 return 0; 1295 } 1296 1297 static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, 1298 int __user *optlen) 1299 { 1300 unsigned int sfcount = 0, copylen = 0; 1301 struct mptcp_subflow_context *subflow; 1302 struct sock *sk = (struct sock *)msk; 1303 void __user *tcpinfoptr, *sfinfoptr; 1304 struct mptcp_full_info mfi; 1305 int len; 1306 1307 len = mptcp_get_full_info(&mfi, optval, optlen); 1308 if (len < 0) 1309 return len; 1310 1311 /* don't bother filling the mptcp info if there is not enough 1312 * user-space-provided storage 1313 */ 1314 if (len > 0) { 1315 mptcp_diag_fill_info(msk, &mfi.mptcp_info); 1316 copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); 1317 } 1318 1319 mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); 1320 mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, 1321 sizeof(struct tcp_info)); 1322 sfinfoptr = u64_to_user_ptr(mfi.subflow_info); 1323 mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); 1324 mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, 1325 sizeof(struct mptcp_subflow_info)); 1326 tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); 1327 1328 lock_sock(sk); 1329 mptcp_for_each_subflow(msk, subflow) { 1330 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1331 struct mptcp_subflow_info sfinfo; 1332 struct tcp_info tcp_info; 1333 1334 if (sfcount++ >= mfi.size_arrays_user) 1335 continue; 1336 1337 /* fetch addr/tcp_info only if the user space buffers 1338 * are wide enough 1339 */ 1340 memset(&sfinfo, 0, sizeof(sfinfo)); 1341 sfinfo.id = subflow->subflow_id; 1342 if (mfi.size_sfinfo_user > 1343 offsetof(struct mptcp_subflow_info, addrs)) 1344 mptcp_get_sub_addrs(ssk, &sfinfo.addrs); 1345 if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) 1346 goto fail_release; 1347 1348 if (mfi.size_tcpinfo_user) { 1349 tcp_get_info(ssk, &tcp_info); 1350 if (copy_to_user(tcpinfoptr, &tcp_info, 1351 mfi.size_tcpinfo_user)) 1352 goto fail_release; 1353 } 1354 1355 tcpinfoptr += mfi.size_tcpinfo_user; 1356 sfinfoptr += mfi.size_sfinfo_user; 1357 } 1358 release_sock(sk); 1359 1360 mfi.num_subflows = sfcount; 1361 if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) 1362 return -EFAULT; 1363 1364 return 0; 1365 1366 fail_release: 1367 release_sock(sk); 1368 return -EFAULT; 1369 } 1370 1371 static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, 1372 int __user *optlen, int val) 1373 { 1374 int len; 1375 1376 if (get_user(len, optlen)) 1377 return -EFAULT; 1378 if (len < 0) 1379 return -EINVAL; 1380 1381 if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { 1382 unsigned char ucval = (unsigned char)val; 1383 1384 len = 1; 1385 if (put_user(len, optlen)) 1386 return -EFAULT; 1387 if (copy_to_user(optval, &ucval, 1)) 1388 return -EFAULT; 1389 } else { 1390 len = min_t(unsigned int, len, sizeof(int)); 1391 if (put_user(len, optlen)) 1392 return -EFAULT; 1393 if (copy_to_user(optval, &val, len)) 1394 return -EFAULT; 1395 } 1396 1397 return 0; 1398 } 1399 1400 static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 1401 char __user *optval, int __user *optlen) 1402 { 1403 struct sock *sk = (void *)msk; 1404 1405 switch (optname) { 1406 case TCP_ULP: 1407 case TCP_CONGESTION: 1408 case TCP_INFO: 1409 case TCP_CC_INFO: 1410 case TCP_DEFER_ACCEPT: 1411 case TCP_FASTOPEN: 1412 case TCP_FASTOPEN_CONNECT: 1413 case TCP_FASTOPEN_KEY: 1414 case TCP_FASTOPEN_NO_COOKIE: 1415 return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, 1416 optval, optlen); 1417 case TCP_INQ: 1418 return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); 1419 case TCP_CORK: 1420 return mptcp_put_int_option(msk, optval, optlen, msk->cork); 1421 case TCP_NODELAY: 1422 return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); 1423 case TCP_KEEPIDLE: 1424 return mptcp_put_int_option(msk, optval, optlen, 1425 msk->keepalive_idle ? : 1426 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ); 1427 case TCP_KEEPINTVL: 1428 return mptcp_put_int_option(msk, optval, optlen, 1429 msk->keepalive_intvl ? : 1430 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ); 1431 case TCP_KEEPCNT: 1432 return mptcp_put_int_option(msk, optval, optlen, 1433 msk->keepalive_cnt ? : 1434 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes)); 1435 case TCP_NOTSENT_LOWAT: 1436 return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); 1437 case TCP_IS_MPTCP: 1438 return mptcp_put_int_option(msk, optval, optlen, 1); 1439 case TCP_MAXSEG: 1440 return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, 1441 optval, optlen); 1442 } 1443 return -EOPNOTSUPP; 1444 } 1445 1446 static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, 1447 char __user *optval, int __user *optlen) 1448 { 1449 struct sock *sk = (void *)msk; 1450 1451 switch (optname) { 1452 case IP_TOS: 1453 return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); 1454 case IP_FREEBIND: 1455 return mptcp_put_int_option(msk, optval, optlen, 1456 inet_test_bit(FREEBIND, sk)); 1457 case IP_TRANSPARENT: 1458 return mptcp_put_int_option(msk, optval, optlen, 1459 inet_test_bit(TRANSPARENT, sk)); 1460 case IP_BIND_ADDRESS_NO_PORT: 1461 return mptcp_put_int_option(msk, optval, optlen, 1462 inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); 1463 case IP_LOCAL_PORT_RANGE: 1464 return mptcp_put_int_option(msk, optval, optlen, 1465 READ_ONCE(inet_sk(sk)->local_port_range)); 1466 } 1467 1468 return -EOPNOTSUPP; 1469 } 1470 1471 static int mptcp_getsockopt_v6(struct mptcp_sock *msk, int optname, 1472 char __user *optval, int __user *optlen) 1473 { 1474 struct sock *sk = (void *)msk; 1475 1476 switch (optname) { 1477 case IPV6_V6ONLY: 1478 return mptcp_put_int_option(msk, optval, optlen, 1479 sk->sk_ipv6only); 1480 case IPV6_TRANSPARENT: 1481 return mptcp_put_int_option(msk, optval, optlen, 1482 inet_test_bit(TRANSPARENT, sk)); 1483 case IPV6_FREEBIND: 1484 return mptcp_put_int_option(msk, optval, optlen, 1485 inet_test_bit(FREEBIND, sk)); 1486 } 1487 1488 return -EOPNOTSUPP; 1489 } 1490 1491 static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, 1492 char __user *optval, int __user *optlen) 1493 { 1494 switch (optname) { 1495 case MPTCP_INFO: 1496 return mptcp_getsockopt_info(msk, optval, optlen); 1497 case MPTCP_FULL_INFO: 1498 return mptcp_getsockopt_full_info(msk, optval, optlen); 1499 case MPTCP_TCPINFO: 1500 return mptcp_getsockopt_tcpinfo(msk, optval, optlen); 1501 case MPTCP_SUBFLOW_ADDRS: 1502 return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); 1503 } 1504 1505 return -EOPNOTSUPP; 1506 } 1507 1508 int mptcp_getsockopt(struct sock *sk, int level, int optname, 1509 char __user *optval, int __user *option) 1510 { 1511 struct mptcp_sock *msk = mptcp_sk(sk); 1512 struct sock *ssk; 1513 1514 pr_debug("msk=%p\n", msk); 1515 1516 /* @@ the meaning of setsockopt() when the socket is connected and 1517 * there are multiple subflows is not yet defined. It is up to the 1518 * MPTCP-level socket to configure the subflows until the subflow 1519 * is in TCP fallback, when socket options are passed through 1520 * to the one remaining subflow. 1521 */ 1522 lock_sock(sk); 1523 ssk = __mptcp_tcp_fallback(msk); 1524 release_sock(sk); 1525 if (ssk) 1526 return tcp_getsockopt(ssk, level, optname, optval, option); 1527 1528 if (level == SOL_IP) 1529 return mptcp_getsockopt_v4(msk, optname, optval, option); 1530 if (level == SOL_IPV6) 1531 return mptcp_getsockopt_v6(msk, optname, optval, option); 1532 if (level == SOL_TCP) 1533 return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); 1534 if (level == SOL_MPTCP) 1535 return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); 1536 return -EOPNOTSUPP; 1537 } 1538 1539 static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) 1540 { 1541 static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; 1542 struct sock *sk = (struct sock *)msk; 1543 bool keep_open; 1544 1545 keep_open = sock_flag(sk, SOCK_KEEPOPEN); 1546 if (ssk->sk_prot->keepalive) 1547 ssk->sk_prot->keepalive(ssk, keep_open); 1548 sock_valbool_flag(ssk, SOCK_KEEPOPEN, keep_open); 1549 1550 ssk->sk_priority = sk->sk_priority; 1551 ssk->sk_bound_dev_if = sk->sk_bound_dev_if; 1552 ssk->sk_incoming_cpu = sk->sk_incoming_cpu; 1553 ssk->sk_ipv6only = sk->sk_ipv6only; 1554 __ip_sock_set_tos(ssk, inet_sk(sk)->tos); 1555 1556 if (sk->sk_userlocks & tx_rx_locks) { 1557 ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; 1558 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) { 1559 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 1560 mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; 1561 } 1562 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1563 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 1564 } 1565 1566 if (sock_flag(sk, SOCK_LINGER)) { 1567 ssk->sk_lingertime = sk->sk_lingertime; 1568 sock_set_flag(ssk, SOCK_LINGER); 1569 } else { 1570 sock_reset_flag(ssk, SOCK_LINGER); 1571 } 1572 1573 if (sk->sk_mark != ssk->sk_mark) { 1574 ssk->sk_mark = sk->sk_mark; 1575 sk_dst_reset(ssk); 1576 } 1577 1578 sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); 1579 1580 if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) 1581 tcp_set_congestion_control(ssk, msk->ca_name, false, true); 1582 __tcp_sock_set_cork(ssk, !!msk->cork); 1583 __tcp_sock_set_nodelay(ssk, !!msk->nodelay); 1584 tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle); 1585 tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl); 1586 tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt); 1587 tcp_sock_set_maxseg(ssk, msk->maxseg); 1588 1589 inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); 1590 inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); 1591 inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); 1592 WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); 1593 } 1594 1595 void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) 1596 { 1597 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 1598 1599 msk_owned_by_me(msk); 1600 1601 ssk->sk_rcvlowat = 0; 1602 1603 /* subflows must ignore any latency-related settings: will not affect 1604 * the user-space - only the msk is relevant - but will foul the 1605 * mptcp scheduler 1606 */ 1607 tcp_sk(ssk)->notsent_lowat = UINT_MAX; 1608 1609 if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { 1610 sync_socket_options(msk, ssk); 1611 1612 subflow->setsockopt_seq = msk->setsockopt_seq; 1613 } 1614 } 1615 1616 /* unfortunately this is different enough from the tcp version so 1617 * that we can't factor it out 1618 */ 1619 int mptcp_set_rcvlowat(struct sock *sk, int val) 1620 { 1621 struct mptcp_subflow_context *subflow; 1622 int space, cap; 1623 1624 /* bpf can land here with a wrong sk type */ 1625 if (sk->sk_protocol == IPPROTO_TCP) 1626 return -EINVAL; 1627 1628 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1629 cap = sk->sk_rcvbuf >> 1; 1630 else 1631 cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; 1632 val = min(val, cap); 1633 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); 1634 1635 /* Check if we need to signal EPOLLIN right now */ 1636 if (mptcp_epollin_ready(sk)) 1637 sk->sk_data_ready(sk); 1638 1639 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1640 return 0; 1641 1642 space = mptcp_space_from_win(sk, val); 1643 if (space <= sk->sk_rcvbuf) 1644 return 0; 1645 1646 /* propagate the rcvbuf changes to all the subflows */ 1647 WRITE_ONCE(sk->sk_rcvbuf, space); 1648 mptcp_for_each_subflow(mptcp_sk(sk), subflow) { 1649 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1650 bool slow; 1651 1652 slow = lock_sock_fast(ssk); 1653 WRITE_ONCE(ssk->sk_rcvbuf, space); 1654 WRITE_ONCE(tcp_sk(ssk)->window_clamp, val); 1655 unlock_sock_fast(ssk, slow); 1656 } 1657 return 0; 1658 } 1659