1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2021, Red Hat. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <net/sock.h> 12 #include <net/protocol.h> 13 #include <net/tcp.h> 14 #include <net/mptcp.h> 15 #include "protocol.h" 16 17 #define MIN_INFO_OPTLEN_SIZE 16 18 #define MIN_FULL_INFO_OPTLEN_SIZE 40 19 20 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) 21 { 22 msk_owned_by_me(msk); 23 24 if (likely(!__mptcp_check_fallback(msk))) 25 return NULL; 26 27 return msk->first; 28 } 29 30 static u32 sockopt_seq_reset(const struct sock *sk) 31 { 32 sock_owned_by_me(sk); 33 34 /* Highbits contain state. Allows to distinguish sockopt_seq 35 * of listener and established: 36 * s0 = new_listener() 37 * sockopt(s0) - seq is 1 38 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) 39 * sockopt(s0) - seq increments to 2 on s0 40 * sockopt(s1) // seq increments to 2 on s1 (different option) 41 * new ssk completes join, inherits options from s0 // seq 2 42 * Needs sync from mptcp join logic, but ssk->seq == msk->seq 43 * 44 * Set High order bits to sk_state so ssk->seq == msk->seq test 45 * will fail. 46 */ 47 48 return (u32)sk->sk_state << 24u; 49 } 50 51 static void sockopt_seq_inc(struct mptcp_sock *msk) 52 { 53 u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; 54 55 msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; 56 } 57 58 static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, 59 unsigned int optlen, int *val) 60 { 61 if (optlen < sizeof(int)) 62 return -EINVAL; 63 64 if (copy_from_sockptr(val, optval, sizeof(*val))) 65 return -EFAULT; 66 67 return 0; 68 } 69 70 static void __mptcp_subflow_set_rcvbuf(struct sock *ssk, int val) 71 { 72 WRITE_ONCE(ssk->sk_rcvbuf, val); 73 tcp_set_rcvbuf(ssk, val); 74 } 75 76 static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) 77 { 78 struct mptcp_subflow_context *subflow; 79 struct sock *sk = (struct sock *)msk; 80 81 lock_sock(sk); 82 sockopt_seq_inc(msk); 83 84 mptcp_for_each_subflow(msk, subflow) { 85 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 86 bool slow = lock_sock_fast(ssk); 87 88 switch (optname) { 89 case SO_DEBUG: 90 sock_valbool_flag(ssk, SOCK_DBG, !!val); 91 break; 92 case SO_KEEPALIVE: 93 if (ssk->sk_prot->keepalive) 94 ssk->sk_prot->keepalive(ssk, !!val); 95 sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); 96 break; 97 case SO_PRIORITY: 98 WRITE_ONCE(ssk->sk_priority, val); 99 break; 100 case SO_SNDBUF: 101 case SO_SNDBUFFORCE: 102 ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; 103 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 104 mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; 105 break; 106 case SO_RCVBUF: 107 case SO_RCVBUFFORCE: 108 ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; 109 __mptcp_subflow_set_rcvbuf(ssk, sk->sk_rcvbuf); 110 break; 111 case SO_MARK: 112 if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { 113 WRITE_ONCE(ssk->sk_mark, sk->sk_mark); 114 sk_dst_reset(ssk); 115 } 116 break; 117 case SO_INCOMING_CPU: 118 WRITE_ONCE(ssk->sk_incoming_cpu, val); 119 break; 120 } 121 122 subflow->setsockopt_seq = msk->setsockopt_seq; 123 unlock_sock_fast(ssk, slow); 124 } 125 126 release_sock(sk); 127 } 128 129 static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) 130 { 131 sockptr_t optval = KERNEL_SOCKPTR(&val); 132 struct sock *sk = (struct sock *)msk; 133 int ret; 134 135 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 136 optval, sizeof(val)); 137 if (ret) 138 return ret; 139 140 mptcp_sol_socket_sync_intval(msk, optname, val); 141 return 0; 142 } 143 144 static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) 145 { 146 struct sock *sk = (struct sock *)msk; 147 148 WRITE_ONCE(sk->sk_incoming_cpu, val); 149 150 mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); 151 } 152 153 static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) 154 { 155 sockptr_t optval = KERNEL_SOCKPTR(&val); 156 struct mptcp_subflow_context *subflow; 157 struct sock *sk = (struct sock *)msk; 158 int ret; 159 160 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 161 optval, sizeof(val)); 162 if (ret) 163 return ret; 164 165 lock_sock(sk); 166 mptcp_for_each_subflow(msk, subflow) { 167 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 168 169 lock_sock(ssk); 170 sock_set_timestamp(ssk, optname, !!val); 171 release_sock(ssk); 172 } 173 174 release_sock(sk); 175 return 0; 176 } 177 178 static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, 179 sockptr_t optval, 180 unsigned int optlen) 181 { 182 int val, ret; 183 184 ret = mptcp_get_int_option(msk, optval, optlen, &val); 185 if (ret) 186 return ret; 187 188 switch (optname) { 189 case SO_KEEPALIVE: 190 case SO_DEBUG: 191 case SO_MARK: 192 case SO_PRIORITY: 193 case SO_SNDBUF: 194 case SO_SNDBUFFORCE: 195 case SO_RCVBUF: 196 case SO_RCVBUFFORCE: 197 return mptcp_sol_socket_intval(msk, optname, val); 198 case SO_INCOMING_CPU: 199 mptcp_so_incoming_cpu(msk, val); 200 return 0; 201 case SO_TIMESTAMP_OLD: 202 case SO_TIMESTAMP_NEW: 203 case SO_TIMESTAMPNS_OLD: 204 case SO_TIMESTAMPNS_NEW: 205 return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); 206 } 207 208 return -ENOPROTOOPT; 209 } 210 211 static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, 212 int optname, 213 sockptr_t optval, 214 unsigned int optlen) 215 { 216 struct mptcp_subflow_context *subflow; 217 struct sock *sk = (struct sock *)msk; 218 struct so_timestamping timestamping; 219 int ret; 220 221 if (optlen == sizeof(timestamping)) { 222 if (copy_from_sockptr(×tamping, optval, 223 sizeof(timestamping))) 224 return -EFAULT; 225 } else if (optlen == sizeof(int)) { 226 memset(×tamping, 0, sizeof(timestamping)); 227 228 if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) 229 return -EFAULT; 230 } else { 231 return -EINVAL; 232 } 233 234 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 235 KERNEL_SOCKPTR(×tamping), 236 sizeof(timestamping)); 237 if (ret) 238 return ret; 239 240 lock_sock(sk); 241 242 mptcp_for_each_subflow(msk, subflow) { 243 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 244 245 lock_sock(ssk); 246 sock_set_timestamping(ssk, optname, timestamping); 247 release_sock(ssk); 248 } 249 250 release_sock(sk); 251 252 return 0; 253 } 254 255 static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, 256 unsigned int optlen) 257 { 258 struct mptcp_subflow_context *subflow; 259 struct sock *sk = (struct sock *)msk; 260 struct linger ling; 261 sockptr_t kopt; 262 int ret; 263 264 if (optlen < sizeof(ling)) 265 return -EINVAL; 266 267 if (copy_from_sockptr(&ling, optval, sizeof(ling))) 268 return -EFAULT; 269 270 kopt = KERNEL_SOCKPTR(&ling); 271 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); 272 if (ret) 273 return ret; 274 275 lock_sock(sk); 276 sockopt_seq_inc(msk); 277 mptcp_for_each_subflow(msk, subflow) { 278 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 279 bool slow = lock_sock_fast(ssk); 280 281 if (!ling.l_onoff) { 282 sock_reset_flag(ssk, SOCK_LINGER); 283 } else { 284 ssk->sk_lingertime = sk->sk_lingertime; 285 sock_set_flag(ssk, SOCK_LINGER); 286 } 287 288 subflow->setsockopt_seq = msk->setsockopt_seq; 289 unlock_sock_fast(ssk, slow); 290 } 291 292 release_sock(sk); 293 return 0; 294 } 295 296 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, 297 sockptr_t optval, unsigned int optlen) 298 { 299 struct sock *sk = (struct sock *)msk; 300 struct sock *ssk; 301 int ret; 302 303 switch (optname) { 304 case SO_REUSEPORT: 305 case SO_REUSEADDR: 306 case SO_BINDTODEVICE: 307 case SO_BINDTOIFINDEX: 308 lock_sock(sk); 309 ssk = __mptcp_nmpc_sk(msk); 310 if (IS_ERR(ssk)) { 311 release_sock(sk); 312 return PTR_ERR(ssk); 313 } 314 315 ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen); 316 if (ret == 0) { 317 if (optname == SO_REUSEPORT) 318 sk->sk_reuseport = ssk->sk_reuseport; 319 else if (optname == SO_REUSEADDR) 320 sk->sk_reuse = ssk->sk_reuse; 321 else if (optname == SO_BINDTODEVICE) 322 sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 323 else if (optname == SO_BINDTOIFINDEX) 324 sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 325 } 326 release_sock(sk); 327 return ret; 328 case SO_KEEPALIVE: 329 case SO_PRIORITY: 330 case SO_SNDBUF: 331 case SO_SNDBUFFORCE: 332 case SO_RCVBUF: 333 case SO_RCVBUFFORCE: 334 case SO_MARK: 335 case SO_INCOMING_CPU: 336 case SO_DEBUG: 337 case SO_TIMESTAMP_OLD: 338 case SO_TIMESTAMP_NEW: 339 case SO_TIMESTAMPNS_OLD: 340 case SO_TIMESTAMPNS_NEW: 341 return mptcp_setsockopt_sol_socket_int(msk, optname, optval, 342 optlen); 343 case SO_TIMESTAMPING_OLD: 344 case SO_TIMESTAMPING_NEW: 345 return mptcp_setsockopt_sol_socket_timestamping(msk, optname, 346 optval, optlen); 347 case SO_LINGER: 348 return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); 349 case SO_RCVLOWAT: 350 case SO_RCVTIMEO_OLD: 351 case SO_RCVTIMEO_NEW: 352 case SO_SNDTIMEO_OLD: 353 case SO_SNDTIMEO_NEW: 354 case SO_BUSY_POLL: 355 case SO_PREFER_BUSY_POLL: 356 case SO_BUSY_POLL_BUDGET: 357 /* No need to copy: only relevant for msk */ 358 return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); 359 case SO_NO_CHECK: 360 case SO_DONTROUTE: 361 case SO_BROADCAST: 362 case SO_BSDCOMPAT: 363 case SO_PASSCRED: 364 case SO_PASSPIDFD: 365 case SO_PASSSEC: 366 case SO_RXQ_OVFL: 367 case SO_WIFI_STATUS: 368 case SO_NOFCS: 369 case SO_SELECT_ERR_QUEUE: 370 return 0; 371 } 372 373 /* SO_OOBINLINE is not supported, let's avoid the related mess 374 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, 375 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, 376 * we must be careful with subflows 377 * 378 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks 379 * explicitly the sk_protocol field 380 * 381 * SO_PEEK_OFF is unsupported, as it is for plain TCP 382 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows 383 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, 384 * but likely needs careful design 385 * 386 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg 387 * SO_TXTIME is currently unsupported 388 */ 389 390 return -EOPNOTSUPP; 391 } 392 393 static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, 394 sockptr_t optval, unsigned int optlen) 395 { 396 struct sock *sk = (struct sock *)msk; 397 int ret = -EOPNOTSUPP; 398 struct sock *ssk; 399 400 switch (optname) { 401 case IPV6_V6ONLY: 402 case IPV6_TRANSPARENT: 403 case IPV6_FREEBIND: 404 lock_sock(sk); 405 ssk = __mptcp_nmpc_sk(msk); 406 if (IS_ERR(ssk)) { 407 release_sock(sk); 408 return PTR_ERR(ssk); 409 } 410 411 ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); 412 if (ret != 0) { 413 release_sock(sk); 414 return ret; 415 } 416 417 sockopt_seq_inc(msk); 418 419 switch (optname) { 420 case IPV6_V6ONLY: 421 sk->sk_ipv6only = ssk->sk_ipv6only; 422 break; 423 case IPV6_TRANSPARENT: 424 inet_assign_bit(TRANSPARENT, sk, 425 inet_test_bit(TRANSPARENT, ssk)); 426 break; 427 case IPV6_FREEBIND: 428 inet_assign_bit(FREEBIND, sk, 429 inet_test_bit(FREEBIND, ssk)); 430 break; 431 } 432 433 release_sock(sk); 434 break; 435 } 436 437 return ret; 438 } 439 440 static bool mptcp_supported_sockopt(int level, int optname) 441 { 442 if (level == SOL_IP) { 443 switch (optname) { 444 /* should work fine */ 445 case IP_FREEBIND: 446 case IP_TRANSPARENT: 447 case IP_BIND_ADDRESS_NO_PORT: 448 case IP_LOCAL_PORT_RANGE: 449 450 /* the following are control cmsg related */ 451 case IP_PKTINFO: 452 case IP_RECVTTL: 453 case IP_RECVTOS: 454 case IP_RECVOPTS: 455 case IP_RETOPTS: 456 case IP_PASSSEC: 457 case IP_RECVORIGDSTADDR: 458 case IP_CHECKSUM: 459 case IP_RECVFRAGSIZE: 460 461 /* common stuff that need some love */ 462 case IP_TOS: 463 case IP_TTL: 464 case IP_MTU_DISCOVER: 465 case IP_RECVERR: 466 467 /* possibly less common may deserve some love */ 468 case IP_MINTTL: 469 470 /* the following is apparently a no-op for plain TCP */ 471 case IP_RECVERR_RFC4884: 472 return true; 473 } 474 475 /* IP_OPTIONS is not supported, needs subflow care */ 476 /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ 477 /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, 478 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, 479 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, 480 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, 481 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, 482 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal 483 * with mcast stuff 484 */ 485 /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ 486 return false; 487 } 488 if (level == SOL_IPV6) { 489 switch (optname) { 490 case IPV6_V6ONLY: 491 492 /* the following are control cmsg related */ 493 case IPV6_RECVPKTINFO: 494 case IPV6_2292PKTINFO: 495 case IPV6_RECVHOPLIMIT: 496 case IPV6_2292HOPLIMIT: 497 case IPV6_RECVRTHDR: 498 case IPV6_2292RTHDR: 499 case IPV6_RECVHOPOPTS: 500 case IPV6_2292HOPOPTS: 501 case IPV6_RECVDSTOPTS: 502 case IPV6_2292DSTOPTS: 503 case IPV6_RECVTCLASS: 504 case IPV6_FLOWINFO: 505 case IPV6_RECVPATHMTU: 506 case IPV6_RECVORIGDSTADDR: 507 case IPV6_RECVFRAGSIZE: 508 509 /* the following ones need some love but are quite common */ 510 case IPV6_TCLASS: 511 case IPV6_TRANSPARENT: 512 case IPV6_FREEBIND: 513 case IPV6_PKTINFO: 514 case IPV6_2292PKTOPTIONS: 515 case IPV6_UNICAST_HOPS: 516 case IPV6_MTU_DISCOVER: 517 case IPV6_MTU: 518 case IPV6_RECVERR: 519 case IPV6_FLOWINFO_SEND: 520 case IPV6_FLOWLABEL_MGR: 521 case IPV6_MINHOPCOUNT: 522 case IPV6_DONTFRAG: 523 case IPV6_AUTOFLOWLABEL: 524 525 /* the following one is a no-op for plain TCP */ 526 case IPV6_RECVERR_RFC4884: 527 return true; 528 } 529 530 /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are 531 * not supported 532 */ 533 /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, 534 * IPV6_MULTICAST_IF, IPV6_ADDRFORM, 535 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, 536 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, 537 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, 538 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER 539 * are not supported better not deal with mcast 540 */ 541 /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ 542 543 /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ 544 /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ 545 return false; 546 } 547 if (level == SOL_TCP) { 548 switch (optname) { 549 /* the following are no-op or should work just fine */ 550 case TCP_THIN_DUPACK: 551 case TCP_DEFER_ACCEPT: 552 553 /* the following need some love */ 554 case TCP_MAXSEG: 555 case TCP_NODELAY: 556 case TCP_THIN_LINEAR_TIMEOUTS: 557 case TCP_CONGESTION: 558 case TCP_CORK: 559 case TCP_KEEPIDLE: 560 case TCP_KEEPINTVL: 561 case TCP_KEEPCNT: 562 case TCP_SYNCNT: 563 case TCP_SAVE_SYN: 564 case TCP_LINGER2: 565 case TCP_WINDOW_CLAMP: 566 case TCP_QUICKACK: 567 case TCP_USER_TIMEOUT: 568 case TCP_TIMESTAMP: 569 case TCP_NOTSENT_LOWAT: 570 case TCP_TX_DELAY: 571 case TCP_INQ: 572 case TCP_FASTOPEN: 573 case TCP_FASTOPEN_CONNECT: 574 case TCP_FASTOPEN_KEY: 575 case TCP_FASTOPEN_NO_COOKIE: 576 return true; 577 } 578 579 /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ 580 581 /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, 582 * TCP_REPAIR_WINDOW are not supported, better avoid this mess 583 */ 584 } 585 return false; 586 } 587 588 static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, 589 unsigned int optlen) 590 { 591 struct mptcp_subflow_context *subflow; 592 struct sock *sk = (struct sock *)msk; 593 char name[TCP_CA_NAME_MAX]; 594 bool cap_net_admin; 595 int ret; 596 597 if (optlen < 1) 598 return -EINVAL; 599 600 ret = strncpy_from_sockptr(name, optval, 601 min_t(long, TCP_CA_NAME_MAX - 1, optlen)); 602 if (ret < 0) 603 return -EFAULT; 604 605 name[ret] = 0; 606 607 cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); 608 609 ret = 0; 610 lock_sock(sk); 611 sockopt_seq_inc(msk); 612 mptcp_for_each_subflow(msk, subflow) { 613 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 614 int err; 615 616 lock_sock(ssk); 617 err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); 618 if (err < 0 && ret == 0) 619 ret = err; 620 subflow->setsockopt_seq = msk->setsockopt_seq; 621 release_sock(ssk); 622 } 623 624 if (ret == 0) 625 strscpy(msk->ca_name, name, sizeof(msk->ca_name)); 626 627 release_sock(sk); 628 return ret; 629 } 630 631 static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max, 632 int (*set_val)(struct sock *, int), 633 int *msk_val, int val) 634 { 635 struct mptcp_subflow_context *subflow; 636 int err = 0; 637 638 mptcp_for_each_subflow(msk, subflow) { 639 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 640 int ret; 641 642 lock_sock(ssk); 643 ret = set_val(ssk, val); 644 err = err ? : ret; 645 release_sock(ssk); 646 } 647 648 if (!err) { 649 *msk_val = val; 650 sockopt_seq_inc(msk); 651 } 652 653 return err; 654 } 655 656 static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) 657 { 658 struct mptcp_subflow_context *subflow; 659 struct sock *sk = (struct sock *)msk; 660 661 sockopt_seq_inc(msk); 662 msk->cork = !!val; 663 mptcp_for_each_subflow(msk, subflow) { 664 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 665 666 lock_sock(ssk); 667 __tcp_sock_set_cork(ssk, !!val); 668 release_sock(ssk); 669 } 670 if (!val) 671 mptcp_check_and_set_pending(sk); 672 673 return 0; 674 } 675 676 static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val) 677 { 678 struct mptcp_subflow_context *subflow; 679 struct sock *sk = (struct sock *)msk; 680 681 sockopt_seq_inc(msk); 682 msk->nodelay = !!val; 683 mptcp_for_each_subflow(msk, subflow) { 684 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 685 686 lock_sock(ssk); 687 __tcp_sock_set_nodelay(ssk, !!val); 688 release_sock(ssk); 689 } 690 if (val) 691 mptcp_check_and_set_pending(sk); 692 return 0; 693 } 694 695 static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, 696 sockptr_t optval, unsigned int optlen) 697 { 698 struct sock *sk = (struct sock *)msk; 699 struct sock *ssk; 700 int err; 701 702 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 703 if (err != 0) 704 return err; 705 706 lock_sock(sk); 707 708 ssk = __mptcp_nmpc_sk(msk); 709 if (IS_ERR(ssk)) { 710 release_sock(sk); 711 return PTR_ERR(ssk); 712 } 713 714 switch (optname) { 715 case IP_FREEBIND: 716 inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); 717 break; 718 case IP_TRANSPARENT: 719 inet_assign_bit(TRANSPARENT, ssk, 720 inet_test_bit(TRANSPARENT, sk)); 721 break; 722 case IP_BIND_ADDRESS_NO_PORT: 723 inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, 724 inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); 725 break; 726 case IP_LOCAL_PORT_RANGE: 727 WRITE_ONCE(inet_sk(ssk)->local_port_range, 728 READ_ONCE(inet_sk(sk)->local_port_range)); 729 break; 730 default: 731 release_sock(sk); 732 WARN_ON_ONCE(1); 733 return -EOPNOTSUPP; 734 } 735 736 sockopt_seq_inc(msk); 737 release_sock(sk); 738 return 0; 739 } 740 741 static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, 742 sockptr_t optval, unsigned int optlen) 743 { 744 struct mptcp_subflow_context *subflow; 745 struct sock *sk = (struct sock *)msk; 746 int err, val; 747 748 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 749 750 if (err != 0) 751 return err; 752 753 lock_sock(sk); 754 sockopt_seq_inc(msk); 755 val = READ_ONCE(inet_sk(sk)->tos); 756 mptcp_for_each_subflow(msk, subflow) { 757 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 758 bool slow; 759 760 slow = lock_sock_fast(ssk); 761 __ip_sock_set_tos(ssk, val); 762 unlock_sock_fast(ssk, slow); 763 } 764 release_sock(sk); 765 766 return 0; 767 } 768 769 static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, 770 sockptr_t optval, unsigned int optlen) 771 { 772 switch (optname) { 773 case IP_FREEBIND: 774 case IP_TRANSPARENT: 775 case IP_BIND_ADDRESS_NO_PORT: 776 case IP_LOCAL_PORT_RANGE: 777 return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen); 778 case IP_TOS: 779 return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); 780 } 781 782 return -EOPNOTSUPP; 783 } 784 785 static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 786 sockptr_t optval, unsigned int optlen) 787 { 788 struct sock *sk = (struct sock *)msk; 789 struct sock *ssk; 790 int ret; 791 792 /* Limit to first subflow, before the connection establishment */ 793 lock_sock(sk); 794 ssk = __mptcp_nmpc_sk(msk); 795 if (IS_ERR(ssk)) { 796 ret = PTR_ERR(ssk); 797 goto unlock; 798 } 799 800 ret = tcp_setsockopt(ssk, level, optname, optval, optlen); 801 802 unlock: 803 release_sock(sk); 804 return ret; 805 } 806 807 static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, 808 int optname, sockptr_t optval, 809 unsigned int optlen) 810 { 811 struct mptcp_subflow_context *subflow; 812 int ret = 0; 813 814 mptcp_for_each_subflow(msk, subflow) { 815 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 816 817 ret = tcp_setsockopt(ssk, level, optname, optval, optlen); 818 if (ret) 819 break; 820 } 821 822 if (!ret) 823 sockopt_seq_inc(msk); 824 825 return ret; 826 } 827 828 static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 829 sockptr_t optval, unsigned int optlen) 830 { 831 struct sock *sk = (void *)msk; 832 int ret, val; 833 834 switch (optname) { 835 case TCP_ULP: 836 return -EOPNOTSUPP; 837 case TCP_CONGESTION: 838 return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); 839 case TCP_DEFER_ACCEPT: 840 /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ 841 mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); 842 return 0; 843 case TCP_FASTOPEN: 844 case TCP_FASTOPEN_CONNECT: 845 case TCP_FASTOPEN_KEY: 846 case TCP_FASTOPEN_NO_COOKIE: 847 return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, 848 optval, optlen); 849 } 850 851 ret = mptcp_get_int_option(msk, optval, optlen, &val); 852 if (ret) 853 return ret; 854 855 lock_sock(sk); 856 switch (optname) { 857 case TCP_INQ: 858 if (val < 0 || val > 1) 859 ret = -EINVAL; 860 else 861 msk->recvmsg_inq = !!val; 862 break; 863 case TCP_NOTSENT_LOWAT: 864 WRITE_ONCE(msk->notsent_lowat, val); 865 mptcp_write_space(sk); 866 break; 867 case TCP_CORK: 868 ret = __mptcp_setsockopt_sol_tcp_cork(msk, val); 869 break; 870 case TCP_NODELAY: 871 ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); 872 break; 873 case TCP_KEEPIDLE: 874 ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE, 875 &tcp_sock_set_keepidle_locked, 876 &msk->keepalive_idle, val); 877 break; 878 case TCP_KEEPINTVL: 879 ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL, 880 &tcp_sock_set_keepintvl, 881 &msk->keepalive_intvl, val); 882 break; 883 case TCP_KEEPCNT: 884 ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT, 885 &tcp_sock_set_keepcnt, 886 &msk->keepalive_cnt, 887 val); 888 break; 889 case TCP_MAXSEG: 890 msk->maxseg = val; 891 ret = mptcp_setsockopt_all_sf(msk, SOL_TCP, optname, optval, 892 optlen); 893 break; 894 default: 895 ret = -ENOPROTOOPT; 896 } 897 898 release_sock(sk); 899 return ret; 900 } 901 902 int mptcp_setsockopt(struct sock *sk, int level, int optname, 903 sockptr_t optval, unsigned int optlen) 904 { 905 struct mptcp_sock *msk = mptcp_sk(sk); 906 struct sock *ssk; 907 908 pr_debug("msk=%p\n", msk); 909 910 if (level == SOL_SOCKET) 911 return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); 912 913 if (!mptcp_supported_sockopt(level, optname)) 914 return -ENOPROTOOPT; 915 916 /* @@ the meaning of setsockopt() when the socket is connected and 917 * there are multiple subflows is not yet defined. It is up to the 918 * MPTCP-level socket to configure the subflows until the subflow 919 * is in TCP fallback, when TCP socket options are passed through 920 * to the one remaining subflow. 921 */ 922 lock_sock(sk); 923 ssk = __mptcp_tcp_fallback(msk); 924 release_sock(sk); 925 if (ssk) 926 return tcp_setsockopt(ssk, level, optname, optval, optlen); 927 928 if (level == SOL_IP) 929 return mptcp_setsockopt_v4(msk, optname, optval, optlen); 930 931 if (level == SOL_IPV6) 932 return mptcp_setsockopt_v6(msk, optname, optval, optlen); 933 934 if (level == SOL_TCP) 935 return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); 936 937 return -EOPNOTSUPP; 938 } 939 940 static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 941 char __user *optval, int __user *optlen) 942 { 943 struct sock *sk = (struct sock *)msk; 944 struct sock *ssk; 945 int ret; 946 947 lock_sock(sk); 948 ssk = msk->first; 949 if (ssk) 950 goto get; 951 952 ssk = __mptcp_nmpc_sk(msk); 953 if (IS_ERR(ssk)) { 954 ret = PTR_ERR(ssk); 955 goto out; 956 } 957 958 get: 959 ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 960 961 out: 962 release_sock(sk); 963 return ret; 964 } 965 966 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) 967 { 968 struct sock *sk = (struct sock *)msk; 969 u32 flags = 0; 970 bool slow; 971 u32 now; 972 973 memset(info, 0, sizeof(*info)); 974 975 info->mptcpi_extra_subflows = READ_ONCE(msk->pm.extra_subflows); 976 info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); 977 info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); 978 info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); 979 980 if (inet_sk_state_load(sk) == TCP_LISTEN) 981 return; 982 983 /* The following limits only make sense for the in-kernel PM */ 984 if (mptcp_pm_is_kernel(msk)) { 985 info->mptcpi_limit_extra_subflows = 986 mptcp_pm_get_limit_extra_subflows(msk); 987 info->mptcpi_endp_signal_max = 988 mptcp_pm_get_endp_signal_max(msk); 989 info->mptcpi_limit_add_addr_accepted = 990 mptcp_pm_get_limit_add_addr_accepted(msk); 991 info->mptcpi_endp_subflow_max = 992 mptcp_pm_get_endp_subflow_max(msk); 993 info->mptcpi_endp_laminar_max = 994 mptcp_pm_get_endp_laminar_max(msk); 995 info->mptcpi_endp_fullmesh_max = 996 mptcp_pm_get_endp_fullmesh_max(msk); 997 } 998 999 if (__mptcp_check_fallback(msk)) 1000 flags |= MPTCP_INFO_FLAG_FALLBACK; 1001 if (READ_ONCE(msk->can_ack)) 1002 flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; 1003 info->mptcpi_flags = flags; 1004 1005 slow = lock_sock_fast(sk); 1006 info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); 1007 info->mptcpi_token = msk->token; 1008 info->mptcpi_write_seq = msk->write_seq; 1009 info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; 1010 info->mptcpi_bytes_sent = msk->bytes_sent; 1011 info->mptcpi_bytes_received = msk->bytes_received; 1012 info->mptcpi_bytes_retrans = msk->bytes_retrans; 1013 info->mptcpi_subflows_total = info->mptcpi_extra_subflows + 1014 __mptcp_has_initial_subflow(msk); 1015 now = tcp_jiffies32; 1016 info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent); 1017 info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv); 1018 unlock_sock_fast(sk, slow); 1019 1020 mptcp_data_lock(sk); 1021 info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv); 1022 info->mptcpi_snd_una = msk->snd_una; 1023 info->mptcpi_rcv_nxt = msk->ack_seq; 1024 info->mptcpi_bytes_acked = msk->bytes_acked; 1025 mptcp_data_unlock(sk); 1026 } 1027 EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); 1028 1029 static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) 1030 { 1031 struct mptcp_info m_info; 1032 int len; 1033 1034 if (get_user(len, optlen)) 1035 return -EFAULT; 1036 1037 /* When used only to check if a fallback to TCP happened. */ 1038 if (len == 0) 1039 return 0; 1040 1041 len = min_t(unsigned int, len, sizeof(struct mptcp_info)); 1042 1043 mptcp_diag_fill_info(msk, &m_info); 1044 1045 if (put_user(len, optlen)) 1046 return -EFAULT; 1047 1048 if (copy_to_user(optval, &m_info, len)) 1049 return -EFAULT; 1050 1051 return 0; 1052 } 1053 1054 static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, 1055 char __user *optval, 1056 u32 copied, 1057 int __user *optlen) 1058 { 1059 u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); 1060 1061 if (copied) 1062 copied += sfd->size_subflow_data; 1063 else 1064 copied = copylen; 1065 1066 if (put_user(copied, optlen)) 1067 return -EFAULT; 1068 1069 if (copy_to_user(optval, sfd, copylen)) 1070 return -EFAULT; 1071 1072 return 0; 1073 } 1074 1075 static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, 1076 char __user *optval, 1077 int __user *optlen) 1078 { 1079 int len, copylen; 1080 1081 if (get_user(len, optlen)) 1082 return -EFAULT; 1083 1084 /* if mptcp_subflow_data size is changed, need to adjust 1085 * this function to deal with programs using old version. 1086 */ 1087 BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); 1088 1089 if (len < MIN_INFO_OPTLEN_SIZE) 1090 return -EINVAL; 1091 1092 memset(sfd, 0, sizeof(*sfd)); 1093 1094 copylen = min_t(unsigned int, len, sizeof(*sfd)); 1095 if (copy_from_user(sfd, optval, copylen)) 1096 return -EFAULT; 1097 1098 /* size_subflow_data is u32, but len is signed */ 1099 if (sfd->size_subflow_data > INT_MAX || 1100 sfd->size_user > INT_MAX) 1101 return -EINVAL; 1102 1103 if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || 1104 sfd->size_subflow_data > len) 1105 return -EINVAL; 1106 1107 if (sfd->num_subflows || sfd->size_kernel) 1108 return -EINVAL; 1109 1110 return len - sfd->size_subflow_data; 1111 } 1112 1113 static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, 1114 int __user *optlen) 1115 { 1116 struct mptcp_subflow_context *subflow; 1117 struct sock *sk = (struct sock *)msk; 1118 unsigned int sfcount = 0, copied = 0; 1119 struct mptcp_subflow_data sfd; 1120 char __user *infoptr; 1121 int len; 1122 1123 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1124 if (len < 0) 1125 return len; 1126 1127 sfd.size_kernel = sizeof(struct tcp_info); 1128 sfd.size_user = min_t(unsigned int, sfd.size_user, 1129 sizeof(struct tcp_info)); 1130 1131 infoptr = optval + sfd.size_subflow_data; 1132 1133 lock_sock(sk); 1134 1135 mptcp_for_each_subflow(msk, subflow) { 1136 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1137 1138 ++sfcount; 1139 1140 if (len && len >= sfd.size_user) { 1141 struct tcp_info info; 1142 1143 tcp_get_info(ssk, &info); 1144 1145 if (copy_to_user(infoptr, &info, sfd.size_user)) { 1146 release_sock(sk); 1147 return -EFAULT; 1148 } 1149 1150 infoptr += sfd.size_user; 1151 copied += sfd.size_user; 1152 len -= sfd.size_user; 1153 } 1154 } 1155 1156 release_sock(sk); 1157 1158 sfd.num_subflows = sfcount; 1159 1160 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1161 return -EFAULT; 1162 1163 return 0; 1164 } 1165 1166 static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) 1167 { 1168 const struct inet_sock *inet = inet_sk(sk); 1169 1170 memset(a, 0, sizeof(*a)); 1171 1172 if (sk->sk_family == AF_INET) { 1173 a->sin_local.sin_family = AF_INET; 1174 a->sin_local.sin_port = inet->inet_sport; 1175 a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; 1176 1177 if (!a->sin_local.sin_addr.s_addr) 1178 a->sin_local.sin_addr.s_addr = inet->inet_saddr; 1179 1180 a->sin_remote.sin_family = AF_INET; 1181 a->sin_remote.sin_port = inet->inet_dport; 1182 a->sin_remote.sin_addr.s_addr = inet->inet_daddr; 1183 #if IS_ENABLED(CONFIG_IPV6) 1184 } else if (sk->sk_family == AF_INET6) { 1185 const struct ipv6_pinfo *np = inet6_sk(sk); 1186 1187 if (WARN_ON_ONCE(!np)) 1188 return; 1189 1190 a->sin6_local.sin6_family = AF_INET6; 1191 a->sin6_local.sin6_port = inet->inet_sport; 1192 1193 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 1194 a->sin6_local.sin6_addr = np->saddr; 1195 else 1196 a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; 1197 1198 a->sin6_remote.sin6_family = AF_INET6; 1199 a->sin6_remote.sin6_port = inet->inet_dport; 1200 a->sin6_remote.sin6_addr = sk->sk_v6_daddr; 1201 #endif 1202 } 1203 } 1204 1205 static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, 1206 int __user *optlen) 1207 { 1208 struct mptcp_subflow_context *subflow; 1209 struct sock *sk = (struct sock *)msk; 1210 unsigned int sfcount = 0, copied = 0; 1211 struct mptcp_subflow_data sfd; 1212 char __user *addrptr; 1213 int len; 1214 1215 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1216 if (len < 0) 1217 return len; 1218 1219 sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); 1220 sfd.size_user = min_t(unsigned int, sfd.size_user, 1221 sizeof(struct mptcp_subflow_addrs)); 1222 1223 addrptr = optval + sfd.size_subflow_data; 1224 1225 lock_sock(sk); 1226 1227 mptcp_for_each_subflow(msk, subflow) { 1228 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1229 1230 ++sfcount; 1231 1232 if (len && len >= sfd.size_user) { 1233 struct mptcp_subflow_addrs a; 1234 1235 mptcp_get_sub_addrs(ssk, &a); 1236 1237 if (copy_to_user(addrptr, &a, sfd.size_user)) { 1238 release_sock(sk); 1239 return -EFAULT; 1240 } 1241 1242 addrptr += sfd.size_user; 1243 copied += sfd.size_user; 1244 len -= sfd.size_user; 1245 } 1246 } 1247 1248 release_sock(sk); 1249 1250 sfd.num_subflows = sfcount; 1251 1252 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1253 return -EFAULT; 1254 1255 return 0; 1256 } 1257 1258 static int mptcp_get_full_info(struct mptcp_full_info *mfi, 1259 char __user *optval, 1260 int __user *optlen) 1261 { 1262 int len; 1263 1264 BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != 1265 MIN_FULL_INFO_OPTLEN_SIZE); 1266 1267 if (get_user(len, optlen)) 1268 return -EFAULT; 1269 1270 if (len < MIN_FULL_INFO_OPTLEN_SIZE) 1271 return -EINVAL; 1272 1273 memset(mfi, 0, sizeof(*mfi)); 1274 if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) 1275 return -EFAULT; 1276 1277 if (mfi->size_tcpinfo_kernel || 1278 mfi->size_sfinfo_kernel || 1279 mfi->num_subflows) 1280 return -EINVAL; 1281 1282 if (mfi->size_sfinfo_user > INT_MAX || 1283 mfi->size_tcpinfo_user > INT_MAX) 1284 return -EINVAL; 1285 1286 return len - MIN_FULL_INFO_OPTLEN_SIZE; 1287 } 1288 1289 static int mptcp_put_full_info(struct mptcp_full_info *mfi, 1290 char __user *optval, 1291 u32 copylen, 1292 int __user *optlen) 1293 { 1294 copylen += MIN_FULL_INFO_OPTLEN_SIZE; 1295 if (put_user(copylen, optlen)) 1296 return -EFAULT; 1297 1298 if (copy_to_user(optval, mfi, copylen)) 1299 return -EFAULT; 1300 return 0; 1301 } 1302 1303 static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, 1304 int __user *optlen) 1305 { 1306 unsigned int sfcount = 0, copylen = 0; 1307 struct mptcp_subflow_context *subflow; 1308 struct sock *sk = (struct sock *)msk; 1309 void __user *tcpinfoptr, *sfinfoptr; 1310 struct mptcp_full_info mfi; 1311 int len; 1312 1313 len = mptcp_get_full_info(&mfi, optval, optlen); 1314 if (len < 0) 1315 return len; 1316 1317 /* don't bother filling the mptcp info if there is not enough 1318 * user-space-provided storage 1319 */ 1320 if (len > 0) { 1321 mptcp_diag_fill_info(msk, &mfi.mptcp_info); 1322 copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); 1323 } 1324 1325 mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); 1326 mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, 1327 sizeof(struct tcp_info)); 1328 sfinfoptr = u64_to_user_ptr(mfi.subflow_info); 1329 mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); 1330 mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, 1331 sizeof(struct mptcp_subflow_info)); 1332 tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); 1333 1334 lock_sock(sk); 1335 mptcp_for_each_subflow(msk, subflow) { 1336 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1337 struct mptcp_subflow_info sfinfo; 1338 struct tcp_info tcp_info; 1339 1340 if (sfcount++ >= mfi.size_arrays_user) 1341 continue; 1342 1343 /* fetch addr/tcp_info only if the user space buffers 1344 * are wide enough 1345 */ 1346 memset(&sfinfo, 0, sizeof(sfinfo)); 1347 sfinfo.id = subflow->subflow_id; 1348 if (mfi.size_sfinfo_user > 1349 offsetof(struct mptcp_subflow_info, addrs)) 1350 mptcp_get_sub_addrs(ssk, &sfinfo.addrs); 1351 if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) 1352 goto fail_release; 1353 1354 if (mfi.size_tcpinfo_user) { 1355 tcp_get_info(ssk, &tcp_info); 1356 if (copy_to_user(tcpinfoptr, &tcp_info, 1357 mfi.size_tcpinfo_user)) 1358 goto fail_release; 1359 } 1360 1361 tcpinfoptr += mfi.size_tcpinfo_user; 1362 sfinfoptr += mfi.size_sfinfo_user; 1363 } 1364 release_sock(sk); 1365 1366 mfi.num_subflows = sfcount; 1367 if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) 1368 return -EFAULT; 1369 1370 return 0; 1371 1372 fail_release: 1373 release_sock(sk); 1374 return -EFAULT; 1375 } 1376 1377 static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, 1378 int __user *optlen, int val) 1379 { 1380 int len; 1381 1382 if (get_user(len, optlen)) 1383 return -EFAULT; 1384 if (len < 0) 1385 return -EINVAL; 1386 1387 if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { 1388 unsigned char ucval = (unsigned char)val; 1389 1390 len = 1; 1391 if (put_user(len, optlen)) 1392 return -EFAULT; 1393 if (copy_to_user(optval, &ucval, 1)) 1394 return -EFAULT; 1395 } else { 1396 len = min_t(unsigned int, len, sizeof(int)); 1397 if (put_user(len, optlen)) 1398 return -EFAULT; 1399 if (copy_to_user(optval, &val, len)) 1400 return -EFAULT; 1401 } 1402 1403 return 0; 1404 } 1405 1406 static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 1407 char __user *optval, int __user *optlen) 1408 { 1409 struct sock *sk = (void *)msk; 1410 1411 switch (optname) { 1412 case TCP_ULP: 1413 case TCP_CONGESTION: 1414 case TCP_INFO: 1415 case TCP_CC_INFO: 1416 case TCP_DEFER_ACCEPT: 1417 case TCP_FASTOPEN: 1418 case TCP_FASTOPEN_CONNECT: 1419 case TCP_FASTOPEN_KEY: 1420 case TCP_FASTOPEN_NO_COOKIE: 1421 return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, 1422 optval, optlen); 1423 case TCP_INQ: 1424 return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); 1425 case TCP_CORK: 1426 return mptcp_put_int_option(msk, optval, optlen, msk->cork); 1427 case TCP_NODELAY: 1428 return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); 1429 case TCP_KEEPIDLE: 1430 return mptcp_put_int_option(msk, optval, optlen, 1431 msk->keepalive_idle ? : 1432 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ); 1433 case TCP_KEEPINTVL: 1434 return mptcp_put_int_option(msk, optval, optlen, 1435 msk->keepalive_intvl ? : 1436 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ); 1437 case TCP_KEEPCNT: 1438 return mptcp_put_int_option(msk, optval, optlen, 1439 msk->keepalive_cnt ? : 1440 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes)); 1441 case TCP_NOTSENT_LOWAT: 1442 return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); 1443 case TCP_IS_MPTCP: 1444 return mptcp_put_int_option(msk, optval, optlen, 1); 1445 case TCP_MAXSEG: 1446 return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, 1447 optval, optlen); 1448 } 1449 return -EOPNOTSUPP; 1450 } 1451 1452 static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, 1453 char __user *optval, int __user *optlen) 1454 { 1455 struct sock *sk = (void *)msk; 1456 1457 switch (optname) { 1458 case IP_TOS: 1459 return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); 1460 case IP_FREEBIND: 1461 return mptcp_put_int_option(msk, optval, optlen, 1462 inet_test_bit(FREEBIND, sk)); 1463 case IP_TRANSPARENT: 1464 return mptcp_put_int_option(msk, optval, optlen, 1465 inet_test_bit(TRANSPARENT, sk)); 1466 case IP_BIND_ADDRESS_NO_PORT: 1467 return mptcp_put_int_option(msk, optval, optlen, 1468 inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); 1469 case IP_LOCAL_PORT_RANGE: 1470 return mptcp_put_int_option(msk, optval, optlen, 1471 READ_ONCE(inet_sk(sk)->local_port_range)); 1472 } 1473 1474 return -EOPNOTSUPP; 1475 } 1476 1477 static int mptcp_getsockopt_v6(struct mptcp_sock *msk, int optname, 1478 char __user *optval, int __user *optlen) 1479 { 1480 struct sock *sk = (void *)msk; 1481 1482 switch (optname) { 1483 case IPV6_V6ONLY: 1484 return mptcp_put_int_option(msk, optval, optlen, 1485 sk->sk_ipv6only); 1486 case IPV6_TRANSPARENT: 1487 return mptcp_put_int_option(msk, optval, optlen, 1488 inet_test_bit(TRANSPARENT, sk)); 1489 case IPV6_FREEBIND: 1490 return mptcp_put_int_option(msk, optval, optlen, 1491 inet_test_bit(FREEBIND, sk)); 1492 } 1493 1494 return -EOPNOTSUPP; 1495 } 1496 1497 static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, 1498 char __user *optval, int __user *optlen) 1499 { 1500 switch (optname) { 1501 case MPTCP_INFO: 1502 return mptcp_getsockopt_info(msk, optval, optlen); 1503 case MPTCP_FULL_INFO: 1504 return mptcp_getsockopt_full_info(msk, optval, optlen); 1505 case MPTCP_TCPINFO: 1506 return mptcp_getsockopt_tcpinfo(msk, optval, optlen); 1507 case MPTCP_SUBFLOW_ADDRS: 1508 return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); 1509 } 1510 1511 return -EOPNOTSUPP; 1512 } 1513 1514 int mptcp_getsockopt(struct sock *sk, int level, int optname, 1515 char __user *optval, int __user *option) 1516 { 1517 struct mptcp_sock *msk = mptcp_sk(sk); 1518 struct sock *ssk; 1519 1520 pr_debug("msk=%p\n", msk); 1521 1522 /* @@ the meaning of setsockopt() when the socket is connected and 1523 * there are multiple subflows is not yet defined. It is up to the 1524 * MPTCP-level socket to configure the subflows until the subflow 1525 * is in TCP fallback, when socket options are passed through 1526 * to the one remaining subflow. 1527 */ 1528 lock_sock(sk); 1529 ssk = __mptcp_tcp_fallback(msk); 1530 release_sock(sk); 1531 if (ssk) 1532 return tcp_getsockopt(ssk, level, optname, optval, option); 1533 1534 if (level == SOL_IP) 1535 return mptcp_getsockopt_v4(msk, optname, optval, option); 1536 if (level == SOL_IPV6) 1537 return mptcp_getsockopt_v6(msk, optname, optval, option); 1538 if (level == SOL_TCP) 1539 return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); 1540 if (level == SOL_MPTCP) 1541 return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); 1542 return -EOPNOTSUPP; 1543 } 1544 1545 static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) 1546 { 1547 static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; 1548 struct sock *sk = (struct sock *)msk; 1549 bool keep_open; 1550 1551 keep_open = sock_flag(sk, SOCK_KEEPOPEN); 1552 if (ssk->sk_prot->keepalive) 1553 ssk->sk_prot->keepalive(ssk, keep_open); 1554 sock_valbool_flag(ssk, SOCK_KEEPOPEN, keep_open); 1555 1556 ssk->sk_priority = sk->sk_priority; 1557 ssk->sk_bound_dev_if = sk->sk_bound_dev_if; 1558 ssk->sk_incoming_cpu = sk->sk_incoming_cpu; 1559 ssk->sk_ipv6only = sk->sk_ipv6only; 1560 __ip_sock_set_tos(ssk, inet_sk(sk)->tos); 1561 1562 if (sk->sk_userlocks & tx_rx_locks) { 1563 ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; 1564 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) { 1565 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 1566 mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; 1567 } 1568 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1569 __mptcp_subflow_set_rcvbuf(ssk, sk->sk_rcvbuf); 1570 } 1571 1572 if (sock_flag(sk, SOCK_LINGER)) { 1573 ssk->sk_lingertime = sk->sk_lingertime; 1574 sock_set_flag(ssk, SOCK_LINGER); 1575 } else { 1576 sock_reset_flag(ssk, SOCK_LINGER); 1577 } 1578 1579 if (sk->sk_mark != ssk->sk_mark) { 1580 ssk->sk_mark = sk->sk_mark; 1581 sk_dst_reset(ssk); 1582 } 1583 1584 sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); 1585 1586 if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) 1587 tcp_set_congestion_control(ssk, msk->ca_name, false, true); 1588 __tcp_sock_set_cork(ssk, !!msk->cork); 1589 __tcp_sock_set_nodelay(ssk, !!msk->nodelay); 1590 tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle); 1591 tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl); 1592 tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt); 1593 tcp_sock_set_maxseg(ssk, msk->maxseg); 1594 1595 inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); 1596 inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); 1597 inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); 1598 WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); 1599 } 1600 1601 void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) 1602 { 1603 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 1604 1605 msk_owned_by_me(msk); 1606 1607 ssk->sk_rcvlowat = 0; 1608 1609 /* subflows must ignore any latency-related settings: will not affect 1610 * the user-space - only the msk is relevant - but will foul the 1611 * mptcp scheduler 1612 */ 1613 tcp_sk(ssk)->notsent_lowat = UINT_MAX; 1614 1615 if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { 1616 sync_socket_options(msk, ssk); 1617 1618 subflow->setsockopt_seq = msk->setsockopt_seq; 1619 } 1620 } 1621 1622 /* unfortunately this is different enough from the tcp version so 1623 * that we can't factor it out 1624 */ 1625 int mptcp_set_rcvlowat(struct sock *sk, int val) 1626 { 1627 struct mptcp_subflow_context *subflow; 1628 int space, cap; 1629 1630 /* bpf can land here with a wrong sk type */ 1631 if (sk->sk_protocol == IPPROTO_TCP) 1632 return -EINVAL; 1633 1634 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1635 cap = sk->sk_rcvbuf >> 1; 1636 else 1637 cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; 1638 val = min(val, cap); 1639 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); 1640 1641 /* Check if we need to signal EPOLLIN right now */ 1642 if (mptcp_epollin_ready(sk)) 1643 sk->sk_data_ready(sk); 1644 1645 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1646 return 0; 1647 1648 space = mptcp_space_from_win(sk, val); 1649 if (space <= sk->sk_rcvbuf) 1650 return 0; 1651 1652 /* propagate the rcvbuf changes to all the subflows */ 1653 WRITE_ONCE(sk->sk_rcvbuf, space); 1654 mptcp_for_each_subflow(mptcp_sk(sk), subflow) { 1655 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1656 bool slow; 1657 1658 slow = lock_sock_fast(ssk); 1659 WRITE_ONCE(ssk->sk_rcvbuf, space); 1660 WRITE_ONCE(tcp_sk(ssk)->window_clamp, val); 1661 unlock_sock_fast(ssk, slow); 1662 } 1663 return 0; 1664 } 1665