1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2021, Red Hat. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <net/sock.h> 12 #include <net/protocol.h> 13 #include <net/tcp.h> 14 #include <net/mptcp.h> 15 #include "protocol.h" 16 17 #define MIN_INFO_OPTLEN_SIZE 16 18 #define MIN_FULL_INFO_OPTLEN_SIZE 40 19 20 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) 21 { 22 msk_owned_by_me(msk); 23 24 if (likely(!__mptcp_check_fallback(msk))) 25 return NULL; 26 27 return msk->first; 28 } 29 30 static u32 sockopt_seq_reset(const struct sock *sk) 31 { 32 sock_owned_by_me(sk); 33 34 /* Highbits contain state. Allows to distinguish sockopt_seq 35 * of listener and established: 36 * s0 = new_listener() 37 * sockopt(s0) - seq is 1 38 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) 39 * sockopt(s0) - seq increments to 2 on s0 40 * sockopt(s1) // seq increments to 2 on s1 (different option) 41 * new ssk completes join, inherits options from s0 // seq 2 42 * Needs sync from mptcp join logic, but ssk->seq == msk->seq 43 * 44 * Set High order bits to sk_state so ssk->seq == msk->seq test 45 * will fail. 46 */ 47 48 return (u32)sk->sk_state << 24u; 49 } 50 51 static void sockopt_seq_inc(struct mptcp_sock *msk) 52 { 53 u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; 54 55 msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; 56 } 57 58 static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, 59 unsigned int optlen, int *val) 60 { 61 if (optlen < sizeof(int)) 62 return -EINVAL; 63 64 if (copy_from_sockptr(val, optval, sizeof(*val))) 65 return -EFAULT; 66 67 return 0; 68 } 69 70 static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) 71 { 72 struct mptcp_subflow_context *subflow; 73 struct sock *sk = (struct sock *)msk; 74 75 lock_sock(sk); 76 sockopt_seq_inc(msk); 77 78 mptcp_for_each_subflow(msk, subflow) { 79 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 80 bool slow = lock_sock_fast(ssk); 81 82 switch (optname) { 83 case SO_DEBUG: 84 sock_valbool_flag(ssk, SOCK_DBG, !!val); 85 break; 86 case SO_KEEPALIVE: 87 if (ssk->sk_prot->keepalive) 88 ssk->sk_prot->keepalive(ssk, !!val); 89 sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); 90 break; 91 case SO_PRIORITY: 92 WRITE_ONCE(ssk->sk_priority, val); 93 break; 94 case SO_SNDBUF: 95 case SO_SNDBUFFORCE: 96 ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; 97 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 98 mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; 99 break; 100 case SO_RCVBUF: 101 case SO_RCVBUFFORCE: 102 ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; 103 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 104 break; 105 case SO_MARK: 106 if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { 107 WRITE_ONCE(ssk->sk_mark, sk->sk_mark); 108 sk_dst_reset(ssk); 109 } 110 break; 111 case SO_INCOMING_CPU: 112 WRITE_ONCE(ssk->sk_incoming_cpu, val); 113 break; 114 } 115 116 subflow->setsockopt_seq = msk->setsockopt_seq; 117 unlock_sock_fast(ssk, slow); 118 } 119 120 release_sock(sk); 121 } 122 123 static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) 124 { 125 sockptr_t optval = KERNEL_SOCKPTR(&val); 126 struct sock *sk = (struct sock *)msk; 127 int ret; 128 129 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 130 optval, sizeof(val)); 131 if (ret) 132 return ret; 133 134 mptcp_sol_socket_sync_intval(msk, optname, val); 135 return 0; 136 } 137 138 static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) 139 { 140 struct sock *sk = (struct sock *)msk; 141 142 WRITE_ONCE(sk->sk_incoming_cpu, val); 143 144 mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); 145 } 146 147 static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) 148 { 149 sockptr_t optval = KERNEL_SOCKPTR(&val); 150 struct mptcp_subflow_context *subflow; 151 struct sock *sk = (struct sock *)msk; 152 int ret; 153 154 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 155 optval, sizeof(val)); 156 if (ret) 157 return ret; 158 159 lock_sock(sk); 160 mptcp_for_each_subflow(msk, subflow) { 161 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 162 bool slow = lock_sock_fast(ssk); 163 164 sock_set_timestamp(sk, optname, !!val); 165 unlock_sock_fast(ssk, slow); 166 } 167 168 release_sock(sk); 169 return 0; 170 } 171 172 static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, 173 sockptr_t optval, 174 unsigned int optlen) 175 { 176 int val, ret; 177 178 ret = mptcp_get_int_option(msk, optval, optlen, &val); 179 if (ret) 180 return ret; 181 182 switch (optname) { 183 case SO_KEEPALIVE: 184 mptcp_sol_socket_sync_intval(msk, optname, val); 185 return 0; 186 case SO_DEBUG: 187 case SO_MARK: 188 case SO_PRIORITY: 189 case SO_SNDBUF: 190 case SO_SNDBUFFORCE: 191 case SO_RCVBUF: 192 case SO_RCVBUFFORCE: 193 return mptcp_sol_socket_intval(msk, optname, val); 194 case SO_INCOMING_CPU: 195 mptcp_so_incoming_cpu(msk, val); 196 return 0; 197 case SO_TIMESTAMP_OLD: 198 case SO_TIMESTAMP_NEW: 199 case SO_TIMESTAMPNS_OLD: 200 case SO_TIMESTAMPNS_NEW: 201 return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); 202 } 203 204 return -ENOPROTOOPT; 205 } 206 207 static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, 208 int optname, 209 sockptr_t optval, 210 unsigned int optlen) 211 { 212 struct mptcp_subflow_context *subflow; 213 struct sock *sk = (struct sock *)msk; 214 struct so_timestamping timestamping; 215 int ret; 216 217 if (optlen == sizeof(timestamping)) { 218 if (copy_from_sockptr(×tamping, optval, 219 sizeof(timestamping))) 220 return -EFAULT; 221 } else if (optlen == sizeof(int)) { 222 memset(×tamping, 0, sizeof(timestamping)); 223 224 if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) 225 return -EFAULT; 226 } else { 227 return -EINVAL; 228 } 229 230 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 231 KERNEL_SOCKPTR(×tamping), 232 sizeof(timestamping)); 233 if (ret) 234 return ret; 235 236 lock_sock(sk); 237 238 mptcp_for_each_subflow(msk, subflow) { 239 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 240 bool slow = lock_sock_fast(ssk); 241 242 sock_set_timestamping(sk, optname, timestamping); 243 unlock_sock_fast(ssk, slow); 244 } 245 246 release_sock(sk); 247 248 return 0; 249 } 250 251 static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, 252 unsigned int optlen) 253 { 254 struct mptcp_subflow_context *subflow; 255 struct sock *sk = (struct sock *)msk; 256 struct linger ling; 257 sockptr_t kopt; 258 int ret; 259 260 if (optlen < sizeof(ling)) 261 return -EINVAL; 262 263 if (copy_from_sockptr(&ling, optval, sizeof(ling))) 264 return -EFAULT; 265 266 kopt = KERNEL_SOCKPTR(&ling); 267 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); 268 if (ret) 269 return ret; 270 271 lock_sock(sk); 272 sockopt_seq_inc(msk); 273 mptcp_for_each_subflow(msk, subflow) { 274 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 275 bool slow = lock_sock_fast(ssk); 276 277 if (!ling.l_onoff) { 278 sock_reset_flag(ssk, SOCK_LINGER); 279 } else { 280 ssk->sk_lingertime = sk->sk_lingertime; 281 sock_set_flag(ssk, SOCK_LINGER); 282 } 283 284 subflow->setsockopt_seq = msk->setsockopt_seq; 285 unlock_sock_fast(ssk, slow); 286 } 287 288 release_sock(sk); 289 return 0; 290 } 291 292 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, 293 sockptr_t optval, unsigned int optlen) 294 { 295 struct sock *sk = (struct sock *)msk; 296 struct sock *ssk; 297 int ret; 298 299 switch (optname) { 300 case SO_REUSEPORT: 301 case SO_REUSEADDR: 302 case SO_BINDTODEVICE: 303 case SO_BINDTOIFINDEX: 304 lock_sock(sk); 305 ssk = __mptcp_nmpc_sk(msk); 306 if (IS_ERR(ssk)) { 307 release_sock(sk); 308 return PTR_ERR(ssk); 309 } 310 311 ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen); 312 if (ret == 0) { 313 if (optname == SO_REUSEPORT) 314 sk->sk_reuseport = ssk->sk_reuseport; 315 else if (optname == SO_REUSEADDR) 316 sk->sk_reuse = ssk->sk_reuse; 317 else if (optname == SO_BINDTODEVICE) 318 sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 319 else if (optname == SO_BINDTOIFINDEX) 320 sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 321 } 322 release_sock(sk); 323 return ret; 324 case SO_KEEPALIVE: 325 case SO_PRIORITY: 326 case SO_SNDBUF: 327 case SO_SNDBUFFORCE: 328 case SO_RCVBUF: 329 case SO_RCVBUFFORCE: 330 case SO_MARK: 331 case SO_INCOMING_CPU: 332 case SO_DEBUG: 333 case SO_TIMESTAMP_OLD: 334 case SO_TIMESTAMP_NEW: 335 case SO_TIMESTAMPNS_OLD: 336 case SO_TIMESTAMPNS_NEW: 337 return mptcp_setsockopt_sol_socket_int(msk, optname, optval, 338 optlen); 339 case SO_TIMESTAMPING_OLD: 340 case SO_TIMESTAMPING_NEW: 341 return mptcp_setsockopt_sol_socket_timestamping(msk, optname, 342 optval, optlen); 343 case SO_LINGER: 344 return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); 345 case SO_RCVLOWAT: 346 case SO_RCVTIMEO_OLD: 347 case SO_RCVTIMEO_NEW: 348 case SO_SNDTIMEO_OLD: 349 case SO_SNDTIMEO_NEW: 350 case SO_BUSY_POLL: 351 case SO_PREFER_BUSY_POLL: 352 case SO_BUSY_POLL_BUDGET: 353 /* No need to copy: only relevant for msk */ 354 return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); 355 case SO_NO_CHECK: 356 case SO_DONTROUTE: 357 case SO_BROADCAST: 358 case SO_BSDCOMPAT: 359 case SO_PASSCRED: 360 case SO_PASSPIDFD: 361 case SO_PASSSEC: 362 case SO_RXQ_OVFL: 363 case SO_WIFI_STATUS: 364 case SO_NOFCS: 365 case SO_SELECT_ERR_QUEUE: 366 return 0; 367 } 368 369 /* SO_OOBINLINE is not supported, let's avoid the related mess 370 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, 371 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, 372 * we must be careful with subflows 373 * 374 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks 375 * explicitly the sk_protocol field 376 * 377 * SO_PEEK_OFF is unsupported, as it is for plain TCP 378 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows 379 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, 380 * but likely needs careful design 381 * 382 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg 383 * SO_TXTIME is currently unsupported 384 */ 385 386 return -EOPNOTSUPP; 387 } 388 389 static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, 390 sockptr_t optval, unsigned int optlen) 391 { 392 struct sock *sk = (struct sock *)msk; 393 int ret = -EOPNOTSUPP; 394 struct sock *ssk; 395 396 switch (optname) { 397 case IPV6_V6ONLY: 398 case IPV6_TRANSPARENT: 399 case IPV6_FREEBIND: 400 lock_sock(sk); 401 ssk = __mptcp_nmpc_sk(msk); 402 if (IS_ERR(ssk)) { 403 release_sock(sk); 404 return PTR_ERR(ssk); 405 } 406 407 ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); 408 if (ret != 0) { 409 release_sock(sk); 410 return ret; 411 } 412 413 sockopt_seq_inc(msk); 414 415 switch (optname) { 416 case IPV6_V6ONLY: 417 sk->sk_ipv6only = ssk->sk_ipv6only; 418 break; 419 case IPV6_TRANSPARENT: 420 inet_assign_bit(TRANSPARENT, sk, 421 inet_test_bit(TRANSPARENT, ssk)); 422 break; 423 case IPV6_FREEBIND: 424 inet_assign_bit(FREEBIND, sk, 425 inet_test_bit(FREEBIND, ssk)); 426 break; 427 } 428 429 release_sock(sk); 430 break; 431 } 432 433 return ret; 434 } 435 436 static bool mptcp_supported_sockopt(int level, int optname) 437 { 438 if (level == SOL_IP) { 439 switch (optname) { 440 /* should work fine */ 441 case IP_FREEBIND: 442 case IP_TRANSPARENT: 443 case IP_BIND_ADDRESS_NO_PORT: 444 case IP_LOCAL_PORT_RANGE: 445 446 /* the following are control cmsg related */ 447 case IP_PKTINFO: 448 case IP_RECVTTL: 449 case IP_RECVTOS: 450 case IP_RECVOPTS: 451 case IP_RETOPTS: 452 case IP_PASSSEC: 453 case IP_RECVORIGDSTADDR: 454 case IP_CHECKSUM: 455 case IP_RECVFRAGSIZE: 456 457 /* common stuff that need some love */ 458 case IP_TOS: 459 case IP_TTL: 460 case IP_MTU_DISCOVER: 461 case IP_RECVERR: 462 463 /* possibly less common may deserve some love */ 464 case IP_MINTTL: 465 466 /* the following is apparently a no-op for plain TCP */ 467 case IP_RECVERR_RFC4884: 468 return true; 469 } 470 471 /* IP_OPTIONS is not supported, needs subflow care */ 472 /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ 473 /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, 474 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, 475 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, 476 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, 477 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, 478 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal 479 * with mcast stuff 480 */ 481 /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ 482 return false; 483 } 484 if (level == SOL_IPV6) { 485 switch (optname) { 486 case IPV6_V6ONLY: 487 488 /* the following are control cmsg related */ 489 case IPV6_RECVPKTINFO: 490 case IPV6_2292PKTINFO: 491 case IPV6_RECVHOPLIMIT: 492 case IPV6_2292HOPLIMIT: 493 case IPV6_RECVRTHDR: 494 case IPV6_2292RTHDR: 495 case IPV6_RECVHOPOPTS: 496 case IPV6_2292HOPOPTS: 497 case IPV6_RECVDSTOPTS: 498 case IPV6_2292DSTOPTS: 499 case IPV6_RECVTCLASS: 500 case IPV6_FLOWINFO: 501 case IPV6_RECVPATHMTU: 502 case IPV6_RECVORIGDSTADDR: 503 case IPV6_RECVFRAGSIZE: 504 505 /* the following ones need some love but are quite common */ 506 case IPV6_TCLASS: 507 case IPV6_TRANSPARENT: 508 case IPV6_FREEBIND: 509 case IPV6_PKTINFO: 510 case IPV6_2292PKTOPTIONS: 511 case IPV6_UNICAST_HOPS: 512 case IPV6_MTU_DISCOVER: 513 case IPV6_MTU: 514 case IPV6_RECVERR: 515 case IPV6_FLOWINFO_SEND: 516 case IPV6_FLOWLABEL_MGR: 517 case IPV6_MINHOPCOUNT: 518 case IPV6_DONTFRAG: 519 case IPV6_AUTOFLOWLABEL: 520 521 /* the following one is a no-op for plain TCP */ 522 case IPV6_RECVERR_RFC4884: 523 return true; 524 } 525 526 /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are 527 * not supported 528 */ 529 /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, 530 * IPV6_MULTICAST_IF, IPV6_ADDRFORM, 531 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, 532 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, 533 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, 534 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER 535 * are not supported better not deal with mcast 536 */ 537 /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ 538 539 /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ 540 /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ 541 return false; 542 } 543 if (level == SOL_TCP) { 544 switch (optname) { 545 /* the following are no-op or should work just fine */ 546 case TCP_THIN_DUPACK: 547 case TCP_DEFER_ACCEPT: 548 549 /* the following need some love */ 550 case TCP_MAXSEG: 551 case TCP_NODELAY: 552 case TCP_THIN_LINEAR_TIMEOUTS: 553 case TCP_CONGESTION: 554 case TCP_CORK: 555 case TCP_KEEPIDLE: 556 case TCP_KEEPINTVL: 557 case TCP_KEEPCNT: 558 case TCP_SYNCNT: 559 case TCP_SAVE_SYN: 560 case TCP_LINGER2: 561 case TCP_WINDOW_CLAMP: 562 case TCP_QUICKACK: 563 case TCP_USER_TIMEOUT: 564 case TCP_TIMESTAMP: 565 case TCP_NOTSENT_LOWAT: 566 case TCP_TX_DELAY: 567 case TCP_INQ: 568 case TCP_FASTOPEN: 569 case TCP_FASTOPEN_CONNECT: 570 case TCP_FASTOPEN_KEY: 571 case TCP_FASTOPEN_NO_COOKIE: 572 return true; 573 } 574 575 /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ 576 577 /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, 578 * TCP_REPAIR_WINDOW are not supported, better avoid this mess 579 */ 580 } 581 return false; 582 } 583 584 static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, 585 unsigned int optlen) 586 { 587 struct mptcp_subflow_context *subflow; 588 struct sock *sk = (struct sock *)msk; 589 char name[TCP_CA_NAME_MAX]; 590 bool cap_net_admin; 591 int ret; 592 593 if (optlen < 1) 594 return -EINVAL; 595 596 ret = strncpy_from_sockptr(name, optval, 597 min_t(long, TCP_CA_NAME_MAX - 1, optlen)); 598 if (ret < 0) 599 return -EFAULT; 600 601 name[ret] = 0; 602 603 cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); 604 605 ret = 0; 606 lock_sock(sk); 607 sockopt_seq_inc(msk); 608 mptcp_for_each_subflow(msk, subflow) { 609 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 610 int err; 611 612 lock_sock(ssk); 613 err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); 614 if (err < 0 && ret == 0) 615 ret = err; 616 subflow->setsockopt_seq = msk->setsockopt_seq; 617 release_sock(ssk); 618 } 619 620 if (ret == 0) 621 strcpy(msk->ca_name, name); 622 623 release_sock(sk); 624 return ret; 625 } 626 627 static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval, 628 unsigned int optlen) 629 { 630 struct mptcp_subflow_context *subflow; 631 struct sock *sk = (struct sock *)msk; 632 int val; 633 634 if (optlen < sizeof(int)) 635 return -EINVAL; 636 637 if (copy_from_sockptr(&val, optval, sizeof(val))) 638 return -EFAULT; 639 640 lock_sock(sk); 641 sockopt_seq_inc(msk); 642 msk->cork = !!val; 643 mptcp_for_each_subflow(msk, subflow) { 644 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 645 646 lock_sock(ssk); 647 __tcp_sock_set_cork(ssk, !!val); 648 release_sock(ssk); 649 } 650 if (!val) 651 mptcp_check_and_set_pending(sk); 652 release_sock(sk); 653 654 return 0; 655 } 656 657 static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval, 658 unsigned int optlen) 659 { 660 struct mptcp_subflow_context *subflow; 661 struct sock *sk = (struct sock *)msk; 662 int val; 663 664 if (optlen < sizeof(int)) 665 return -EINVAL; 666 667 if (copy_from_sockptr(&val, optval, sizeof(val))) 668 return -EFAULT; 669 670 lock_sock(sk); 671 sockopt_seq_inc(msk); 672 msk->nodelay = !!val; 673 mptcp_for_each_subflow(msk, subflow) { 674 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 675 676 lock_sock(ssk); 677 __tcp_sock_set_nodelay(ssk, !!val); 678 release_sock(ssk); 679 } 680 if (val) 681 mptcp_check_and_set_pending(sk); 682 release_sock(sk); 683 684 return 0; 685 } 686 687 static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname, 688 sockptr_t optval, unsigned int optlen) 689 { 690 struct sock *sk = (struct sock *)msk; 691 struct sock *ssk; 692 int err; 693 694 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 695 if (err != 0) 696 return err; 697 698 lock_sock(sk); 699 700 ssk = __mptcp_nmpc_sk(msk); 701 if (IS_ERR(ssk)) { 702 release_sock(sk); 703 return PTR_ERR(ssk); 704 } 705 706 switch (optname) { 707 case IP_FREEBIND: 708 inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); 709 break; 710 case IP_TRANSPARENT: 711 inet_assign_bit(TRANSPARENT, ssk, 712 inet_test_bit(TRANSPARENT, sk)); 713 break; 714 case IP_BIND_ADDRESS_NO_PORT: 715 inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, 716 inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); 717 break; 718 case IP_LOCAL_PORT_RANGE: 719 WRITE_ONCE(inet_sk(ssk)->local_port_range, 720 READ_ONCE(inet_sk(sk)->local_port_range)); 721 break; 722 default: 723 release_sock(sk); 724 WARN_ON_ONCE(1); 725 return -EOPNOTSUPP; 726 } 727 728 sockopt_seq_inc(msk); 729 release_sock(sk); 730 return 0; 731 } 732 733 static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, 734 sockptr_t optval, unsigned int optlen) 735 { 736 struct mptcp_subflow_context *subflow; 737 struct sock *sk = (struct sock *)msk; 738 int err, val; 739 740 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 741 742 if (err != 0) 743 return err; 744 745 lock_sock(sk); 746 sockopt_seq_inc(msk); 747 val = READ_ONCE(inet_sk(sk)->tos); 748 mptcp_for_each_subflow(msk, subflow) { 749 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 750 bool slow; 751 752 slow = lock_sock_fast(ssk); 753 __ip_sock_set_tos(ssk, val); 754 unlock_sock_fast(ssk, slow); 755 } 756 release_sock(sk); 757 758 return 0; 759 } 760 761 static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, 762 sockptr_t optval, unsigned int optlen) 763 { 764 switch (optname) { 765 case IP_FREEBIND: 766 case IP_TRANSPARENT: 767 case IP_BIND_ADDRESS_NO_PORT: 768 case IP_LOCAL_PORT_RANGE: 769 return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen); 770 case IP_TOS: 771 return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); 772 } 773 774 return -EOPNOTSUPP; 775 } 776 777 static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 778 sockptr_t optval, unsigned int optlen) 779 { 780 struct sock *sk = (struct sock *)msk; 781 struct sock *ssk; 782 int ret; 783 784 /* Limit to first subflow, before the connection establishment */ 785 lock_sock(sk); 786 ssk = __mptcp_nmpc_sk(msk); 787 if (IS_ERR(ssk)) { 788 ret = PTR_ERR(ssk); 789 goto unlock; 790 } 791 792 ret = tcp_setsockopt(ssk, level, optname, optval, optlen); 793 794 unlock: 795 release_sock(sk); 796 return ret; 797 } 798 799 static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 800 sockptr_t optval, unsigned int optlen) 801 { 802 struct sock *sk = (void *)msk; 803 int ret, val; 804 805 switch (optname) { 806 case TCP_INQ: 807 ret = mptcp_get_int_option(msk, optval, optlen, &val); 808 if (ret) 809 return ret; 810 if (val < 0 || val > 1) 811 return -EINVAL; 812 813 lock_sock(sk); 814 msk->recvmsg_inq = !!val; 815 release_sock(sk); 816 return 0; 817 case TCP_ULP: 818 return -EOPNOTSUPP; 819 case TCP_CONGESTION: 820 return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); 821 case TCP_CORK: 822 return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen); 823 case TCP_NODELAY: 824 return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); 825 case TCP_DEFER_ACCEPT: 826 /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ 827 mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); 828 return 0; 829 case TCP_FASTOPEN: 830 case TCP_FASTOPEN_CONNECT: 831 case TCP_FASTOPEN_KEY: 832 case TCP_FASTOPEN_NO_COOKIE: 833 return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, 834 optval, optlen); 835 } 836 837 return -EOPNOTSUPP; 838 } 839 840 int mptcp_setsockopt(struct sock *sk, int level, int optname, 841 sockptr_t optval, unsigned int optlen) 842 { 843 struct mptcp_sock *msk = mptcp_sk(sk); 844 struct sock *ssk; 845 846 pr_debug("msk=%p", msk); 847 848 if (level == SOL_SOCKET) 849 return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); 850 851 if (!mptcp_supported_sockopt(level, optname)) 852 return -ENOPROTOOPT; 853 854 /* @@ the meaning of setsockopt() when the socket is connected and 855 * there are multiple subflows is not yet defined. It is up to the 856 * MPTCP-level socket to configure the subflows until the subflow 857 * is in TCP fallback, when TCP socket options are passed through 858 * to the one remaining subflow. 859 */ 860 lock_sock(sk); 861 ssk = __mptcp_tcp_fallback(msk); 862 release_sock(sk); 863 if (ssk) 864 return tcp_setsockopt(ssk, level, optname, optval, optlen); 865 866 if (level == SOL_IP) 867 return mptcp_setsockopt_v4(msk, optname, optval, optlen); 868 869 if (level == SOL_IPV6) 870 return mptcp_setsockopt_v6(msk, optname, optval, optlen); 871 872 if (level == SOL_TCP) 873 return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); 874 875 return -EOPNOTSUPP; 876 } 877 878 static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 879 char __user *optval, int __user *optlen) 880 { 881 struct sock *sk = (struct sock *)msk; 882 struct sock *ssk; 883 int ret; 884 885 lock_sock(sk); 886 ssk = msk->first; 887 if (ssk) { 888 ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 889 goto out; 890 } 891 892 ssk = __mptcp_nmpc_sk(msk); 893 if (IS_ERR(ssk)) { 894 ret = PTR_ERR(ssk); 895 goto out; 896 } 897 898 ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 899 900 out: 901 release_sock(sk); 902 return ret; 903 } 904 905 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) 906 { 907 struct sock *sk = (struct sock *)msk; 908 u32 flags = 0; 909 bool slow; 910 911 memset(info, 0, sizeof(*info)); 912 913 info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); 914 info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); 915 info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); 916 info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); 917 918 if (inet_sk_state_load(sk) == TCP_LISTEN) 919 return; 920 921 /* The following limits only make sense for the in-kernel PM */ 922 if (mptcp_pm_is_kernel(msk)) { 923 info->mptcpi_subflows_max = 924 mptcp_pm_get_subflows_max(msk); 925 info->mptcpi_add_addr_signal_max = 926 mptcp_pm_get_add_addr_signal_max(msk); 927 info->mptcpi_add_addr_accepted_max = 928 mptcp_pm_get_add_addr_accept_max(msk); 929 info->mptcpi_local_addr_max = 930 mptcp_pm_get_local_addr_max(msk); 931 } 932 933 if (__mptcp_check_fallback(msk)) 934 flags |= MPTCP_INFO_FLAG_FALLBACK; 935 if (READ_ONCE(msk->can_ack)) 936 flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; 937 info->mptcpi_flags = flags; 938 mptcp_data_lock(sk); 939 info->mptcpi_snd_una = msk->snd_una; 940 info->mptcpi_rcv_nxt = msk->ack_seq; 941 info->mptcpi_bytes_acked = msk->bytes_acked; 942 mptcp_data_unlock(sk); 943 944 slow = lock_sock_fast(sk); 945 info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); 946 info->mptcpi_token = msk->token; 947 info->mptcpi_write_seq = msk->write_seq; 948 info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; 949 info->mptcpi_bytes_sent = msk->bytes_sent; 950 info->mptcpi_bytes_received = msk->bytes_received; 951 info->mptcpi_bytes_retrans = msk->bytes_retrans; 952 info->mptcpi_subflows_total = info->mptcpi_subflows + 953 __mptcp_has_initial_subflow(msk); 954 unlock_sock_fast(sk, slow); 955 } 956 EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); 957 958 static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) 959 { 960 struct mptcp_info m_info; 961 int len; 962 963 if (get_user(len, optlen)) 964 return -EFAULT; 965 966 len = min_t(unsigned int, len, sizeof(struct mptcp_info)); 967 968 mptcp_diag_fill_info(msk, &m_info); 969 970 if (put_user(len, optlen)) 971 return -EFAULT; 972 973 if (copy_to_user(optval, &m_info, len)) 974 return -EFAULT; 975 976 return 0; 977 } 978 979 static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, 980 char __user *optval, 981 u32 copied, 982 int __user *optlen) 983 { 984 u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); 985 986 if (copied) 987 copied += sfd->size_subflow_data; 988 else 989 copied = copylen; 990 991 if (put_user(copied, optlen)) 992 return -EFAULT; 993 994 if (copy_to_user(optval, sfd, copylen)) 995 return -EFAULT; 996 997 return 0; 998 } 999 1000 static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, 1001 char __user *optval, 1002 int __user *optlen) 1003 { 1004 int len, copylen; 1005 1006 if (get_user(len, optlen)) 1007 return -EFAULT; 1008 1009 /* if mptcp_subflow_data size is changed, need to adjust 1010 * this function to deal with programs using old version. 1011 */ 1012 BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); 1013 1014 if (len < MIN_INFO_OPTLEN_SIZE) 1015 return -EINVAL; 1016 1017 memset(sfd, 0, sizeof(*sfd)); 1018 1019 copylen = min_t(unsigned int, len, sizeof(*sfd)); 1020 if (copy_from_user(sfd, optval, copylen)) 1021 return -EFAULT; 1022 1023 /* size_subflow_data is u32, but len is signed */ 1024 if (sfd->size_subflow_data > INT_MAX || 1025 sfd->size_user > INT_MAX) 1026 return -EINVAL; 1027 1028 if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || 1029 sfd->size_subflow_data > len) 1030 return -EINVAL; 1031 1032 if (sfd->num_subflows || sfd->size_kernel) 1033 return -EINVAL; 1034 1035 return len - sfd->size_subflow_data; 1036 } 1037 1038 static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, 1039 int __user *optlen) 1040 { 1041 struct mptcp_subflow_context *subflow; 1042 struct sock *sk = (struct sock *)msk; 1043 unsigned int sfcount = 0, copied = 0; 1044 struct mptcp_subflow_data sfd; 1045 char __user *infoptr; 1046 int len; 1047 1048 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1049 if (len < 0) 1050 return len; 1051 1052 sfd.size_kernel = sizeof(struct tcp_info); 1053 sfd.size_user = min_t(unsigned int, sfd.size_user, 1054 sizeof(struct tcp_info)); 1055 1056 infoptr = optval + sfd.size_subflow_data; 1057 1058 lock_sock(sk); 1059 1060 mptcp_for_each_subflow(msk, subflow) { 1061 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1062 1063 ++sfcount; 1064 1065 if (len && len >= sfd.size_user) { 1066 struct tcp_info info; 1067 1068 tcp_get_info(ssk, &info); 1069 1070 if (copy_to_user(infoptr, &info, sfd.size_user)) { 1071 release_sock(sk); 1072 return -EFAULT; 1073 } 1074 1075 infoptr += sfd.size_user; 1076 copied += sfd.size_user; 1077 len -= sfd.size_user; 1078 } 1079 } 1080 1081 release_sock(sk); 1082 1083 sfd.num_subflows = sfcount; 1084 1085 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1086 return -EFAULT; 1087 1088 return 0; 1089 } 1090 1091 static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) 1092 { 1093 const struct inet_sock *inet = inet_sk(sk); 1094 1095 memset(a, 0, sizeof(*a)); 1096 1097 if (sk->sk_family == AF_INET) { 1098 a->sin_local.sin_family = AF_INET; 1099 a->sin_local.sin_port = inet->inet_sport; 1100 a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; 1101 1102 if (!a->sin_local.sin_addr.s_addr) 1103 a->sin_local.sin_addr.s_addr = inet->inet_saddr; 1104 1105 a->sin_remote.sin_family = AF_INET; 1106 a->sin_remote.sin_port = inet->inet_dport; 1107 a->sin_remote.sin_addr.s_addr = inet->inet_daddr; 1108 #if IS_ENABLED(CONFIG_IPV6) 1109 } else if (sk->sk_family == AF_INET6) { 1110 const struct ipv6_pinfo *np = inet6_sk(sk); 1111 1112 if (WARN_ON_ONCE(!np)) 1113 return; 1114 1115 a->sin6_local.sin6_family = AF_INET6; 1116 a->sin6_local.sin6_port = inet->inet_sport; 1117 1118 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 1119 a->sin6_local.sin6_addr = np->saddr; 1120 else 1121 a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; 1122 1123 a->sin6_remote.sin6_family = AF_INET6; 1124 a->sin6_remote.sin6_port = inet->inet_dport; 1125 a->sin6_remote.sin6_addr = sk->sk_v6_daddr; 1126 #endif 1127 } 1128 } 1129 1130 static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, 1131 int __user *optlen) 1132 { 1133 struct mptcp_subflow_context *subflow; 1134 struct sock *sk = (struct sock *)msk; 1135 unsigned int sfcount = 0, copied = 0; 1136 struct mptcp_subflow_data sfd; 1137 char __user *addrptr; 1138 int len; 1139 1140 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1141 if (len < 0) 1142 return len; 1143 1144 sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); 1145 sfd.size_user = min_t(unsigned int, sfd.size_user, 1146 sizeof(struct mptcp_subflow_addrs)); 1147 1148 addrptr = optval + sfd.size_subflow_data; 1149 1150 lock_sock(sk); 1151 1152 mptcp_for_each_subflow(msk, subflow) { 1153 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1154 1155 ++sfcount; 1156 1157 if (len && len >= sfd.size_user) { 1158 struct mptcp_subflow_addrs a; 1159 1160 mptcp_get_sub_addrs(ssk, &a); 1161 1162 if (copy_to_user(addrptr, &a, sfd.size_user)) { 1163 release_sock(sk); 1164 return -EFAULT; 1165 } 1166 1167 addrptr += sfd.size_user; 1168 copied += sfd.size_user; 1169 len -= sfd.size_user; 1170 } 1171 } 1172 1173 release_sock(sk); 1174 1175 sfd.num_subflows = sfcount; 1176 1177 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1178 return -EFAULT; 1179 1180 return 0; 1181 } 1182 1183 static int mptcp_get_full_info(struct mptcp_full_info *mfi, 1184 char __user *optval, 1185 int __user *optlen) 1186 { 1187 int len; 1188 1189 BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != 1190 MIN_FULL_INFO_OPTLEN_SIZE); 1191 1192 if (get_user(len, optlen)) 1193 return -EFAULT; 1194 1195 if (len < MIN_FULL_INFO_OPTLEN_SIZE) 1196 return -EINVAL; 1197 1198 memset(mfi, 0, sizeof(*mfi)); 1199 if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) 1200 return -EFAULT; 1201 1202 if (mfi->size_tcpinfo_kernel || 1203 mfi->size_sfinfo_kernel || 1204 mfi->num_subflows) 1205 return -EINVAL; 1206 1207 if (mfi->size_sfinfo_user > INT_MAX || 1208 mfi->size_tcpinfo_user > INT_MAX) 1209 return -EINVAL; 1210 1211 return len - MIN_FULL_INFO_OPTLEN_SIZE; 1212 } 1213 1214 static int mptcp_put_full_info(struct mptcp_full_info *mfi, 1215 char __user *optval, 1216 u32 copylen, 1217 int __user *optlen) 1218 { 1219 copylen += MIN_FULL_INFO_OPTLEN_SIZE; 1220 if (put_user(copylen, optlen)) 1221 return -EFAULT; 1222 1223 if (copy_to_user(optval, mfi, copylen)) 1224 return -EFAULT; 1225 return 0; 1226 } 1227 1228 static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, 1229 int __user *optlen) 1230 { 1231 unsigned int sfcount = 0, copylen = 0; 1232 struct mptcp_subflow_context *subflow; 1233 struct sock *sk = (struct sock *)msk; 1234 void __user *tcpinfoptr, *sfinfoptr; 1235 struct mptcp_full_info mfi; 1236 int len; 1237 1238 len = mptcp_get_full_info(&mfi, optval, optlen); 1239 if (len < 0) 1240 return len; 1241 1242 /* don't bother filling the mptcp info if there is not enough 1243 * user-space-provided storage 1244 */ 1245 if (len > 0) { 1246 mptcp_diag_fill_info(msk, &mfi.mptcp_info); 1247 copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); 1248 } 1249 1250 mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); 1251 mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, 1252 sizeof(struct tcp_info)); 1253 sfinfoptr = u64_to_user_ptr(mfi.subflow_info); 1254 mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); 1255 mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, 1256 sizeof(struct mptcp_subflow_info)); 1257 tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); 1258 1259 lock_sock(sk); 1260 mptcp_for_each_subflow(msk, subflow) { 1261 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1262 struct mptcp_subflow_info sfinfo; 1263 struct tcp_info tcp_info; 1264 1265 if (sfcount++ >= mfi.size_arrays_user) 1266 continue; 1267 1268 /* fetch addr/tcp_info only if the user space buffers 1269 * are wide enough 1270 */ 1271 memset(&sfinfo, 0, sizeof(sfinfo)); 1272 sfinfo.id = subflow->subflow_id; 1273 if (mfi.size_sfinfo_user > 1274 offsetof(struct mptcp_subflow_info, addrs)) 1275 mptcp_get_sub_addrs(ssk, &sfinfo.addrs); 1276 if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) 1277 goto fail_release; 1278 1279 if (mfi.size_tcpinfo_user) { 1280 tcp_get_info(ssk, &tcp_info); 1281 if (copy_to_user(tcpinfoptr, &tcp_info, 1282 mfi.size_tcpinfo_user)) 1283 goto fail_release; 1284 } 1285 1286 tcpinfoptr += mfi.size_tcpinfo_user; 1287 sfinfoptr += mfi.size_sfinfo_user; 1288 } 1289 release_sock(sk); 1290 1291 mfi.num_subflows = sfcount; 1292 if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) 1293 return -EFAULT; 1294 1295 return 0; 1296 1297 fail_release: 1298 release_sock(sk); 1299 return -EFAULT; 1300 } 1301 1302 static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, 1303 int __user *optlen, int val) 1304 { 1305 int len; 1306 1307 if (get_user(len, optlen)) 1308 return -EFAULT; 1309 if (len < 0) 1310 return -EINVAL; 1311 1312 if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { 1313 unsigned char ucval = (unsigned char)val; 1314 1315 len = 1; 1316 if (put_user(len, optlen)) 1317 return -EFAULT; 1318 if (copy_to_user(optval, &ucval, 1)) 1319 return -EFAULT; 1320 } else { 1321 len = min_t(unsigned int, len, sizeof(int)); 1322 if (put_user(len, optlen)) 1323 return -EFAULT; 1324 if (copy_to_user(optval, &val, len)) 1325 return -EFAULT; 1326 } 1327 1328 return 0; 1329 } 1330 1331 static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 1332 char __user *optval, int __user *optlen) 1333 { 1334 switch (optname) { 1335 case TCP_ULP: 1336 case TCP_CONGESTION: 1337 case TCP_INFO: 1338 case TCP_CC_INFO: 1339 case TCP_DEFER_ACCEPT: 1340 case TCP_FASTOPEN: 1341 case TCP_FASTOPEN_CONNECT: 1342 case TCP_FASTOPEN_KEY: 1343 case TCP_FASTOPEN_NO_COOKIE: 1344 return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, 1345 optval, optlen); 1346 case TCP_INQ: 1347 return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); 1348 case TCP_CORK: 1349 return mptcp_put_int_option(msk, optval, optlen, msk->cork); 1350 case TCP_NODELAY: 1351 return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); 1352 } 1353 return -EOPNOTSUPP; 1354 } 1355 1356 static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, 1357 char __user *optval, int __user *optlen) 1358 { 1359 struct sock *sk = (void *)msk; 1360 1361 switch (optname) { 1362 case IP_TOS: 1363 return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos)); 1364 case IP_BIND_ADDRESS_NO_PORT: 1365 return mptcp_put_int_option(msk, optval, optlen, 1366 inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); 1367 case IP_LOCAL_PORT_RANGE: 1368 return mptcp_put_int_option(msk, optval, optlen, 1369 READ_ONCE(inet_sk(sk)->local_port_range)); 1370 } 1371 1372 return -EOPNOTSUPP; 1373 } 1374 1375 static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, 1376 char __user *optval, int __user *optlen) 1377 { 1378 switch (optname) { 1379 case MPTCP_INFO: 1380 return mptcp_getsockopt_info(msk, optval, optlen); 1381 case MPTCP_FULL_INFO: 1382 return mptcp_getsockopt_full_info(msk, optval, optlen); 1383 case MPTCP_TCPINFO: 1384 return mptcp_getsockopt_tcpinfo(msk, optval, optlen); 1385 case MPTCP_SUBFLOW_ADDRS: 1386 return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); 1387 } 1388 1389 return -EOPNOTSUPP; 1390 } 1391 1392 int mptcp_getsockopt(struct sock *sk, int level, int optname, 1393 char __user *optval, int __user *option) 1394 { 1395 struct mptcp_sock *msk = mptcp_sk(sk); 1396 struct sock *ssk; 1397 1398 pr_debug("msk=%p", msk); 1399 1400 /* @@ the meaning of setsockopt() when the socket is connected and 1401 * there are multiple subflows is not yet defined. It is up to the 1402 * MPTCP-level socket to configure the subflows until the subflow 1403 * is in TCP fallback, when socket options are passed through 1404 * to the one remaining subflow. 1405 */ 1406 lock_sock(sk); 1407 ssk = __mptcp_tcp_fallback(msk); 1408 release_sock(sk); 1409 if (ssk) 1410 return tcp_getsockopt(ssk, level, optname, optval, option); 1411 1412 if (level == SOL_IP) 1413 return mptcp_getsockopt_v4(msk, optname, optval, option); 1414 if (level == SOL_TCP) 1415 return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); 1416 if (level == SOL_MPTCP) 1417 return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); 1418 return -EOPNOTSUPP; 1419 } 1420 1421 static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) 1422 { 1423 static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; 1424 struct sock *sk = (struct sock *)msk; 1425 1426 if (ssk->sk_prot->keepalive) { 1427 if (sock_flag(sk, SOCK_KEEPOPEN)) 1428 ssk->sk_prot->keepalive(ssk, 1); 1429 else 1430 ssk->sk_prot->keepalive(ssk, 0); 1431 } 1432 1433 ssk->sk_priority = sk->sk_priority; 1434 ssk->sk_bound_dev_if = sk->sk_bound_dev_if; 1435 ssk->sk_incoming_cpu = sk->sk_incoming_cpu; 1436 ssk->sk_ipv6only = sk->sk_ipv6only; 1437 __ip_sock_set_tos(ssk, inet_sk(sk)->tos); 1438 1439 if (sk->sk_userlocks & tx_rx_locks) { 1440 ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; 1441 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) { 1442 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 1443 mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; 1444 } 1445 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1446 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 1447 } 1448 1449 if (sock_flag(sk, SOCK_LINGER)) { 1450 ssk->sk_lingertime = sk->sk_lingertime; 1451 sock_set_flag(ssk, SOCK_LINGER); 1452 } else { 1453 sock_reset_flag(ssk, SOCK_LINGER); 1454 } 1455 1456 if (sk->sk_mark != ssk->sk_mark) { 1457 ssk->sk_mark = sk->sk_mark; 1458 sk_dst_reset(ssk); 1459 } 1460 1461 sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); 1462 1463 if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) 1464 tcp_set_congestion_control(ssk, msk->ca_name, false, true); 1465 __tcp_sock_set_cork(ssk, !!msk->cork); 1466 __tcp_sock_set_nodelay(ssk, !!msk->nodelay); 1467 1468 inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); 1469 inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); 1470 inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk)); 1471 WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); 1472 } 1473 1474 void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) 1475 { 1476 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 1477 1478 msk_owned_by_me(msk); 1479 1480 ssk->sk_rcvlowat = 0; 1481 1482 /* subflows must ignore any latency-related settings: will not affect 1483 * the user-space - only the msk is relevant - but will foul the 1484 * mptcp scheduler 1485 */ 1486 tcp_sk(ssk)->notsent_lowat = UINT_MAX; 1487 1488 if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { 1489 sync_socket_options(msk, ssk); 1490 1491 subflow->setsockopt_seq = msk->setsockopt_seq; 1492 } 1493 } 1494 1495 /* unfortunately this is different enough from the tcp version so 1496 * that we can't factor it out 1497 */ 1498 int mptcp_set_rcvlowat(struct sock *sk, int val) 1499 { 1500 struct mptcp_subflow_context *subflow; 1501 int space, cap; 1502 1503 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1504 cap = sk->sk_rcvbuf >> 1; 1505 else 1506 cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; 1507 val = min(val, cap); 1508 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); 1509 1510 /* Check if we need to signal EPOLLIN right now */ 1511 if (mptcp_epollin_ready(sk)) 1512 sk->sk_data_ready(sk); 1513 1514 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1515 return 0; 1516 1517 space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, val); 1518 if (space <= sk->sk_rcvbuf) 1519 return 0; 1520 1521 /* propagate the rcvbuf changes to all the subflows */ 1522 WRITE_ONCE(sk->sk_rcvbuf, space); 1523 mptcp_for_each_subflow(mptcp_sk(sk), subflow) { 1524 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1525 bool slow; 1526 1527 slow = lock_sock_fast(ssk); 1528 WRITE_ONCE(ssk->sk_rcvbuf, space); 1529 tcp_sk(ssk)->window_clamp = val; 1530 unlock_sock_fast(ssk, slow); 1531 } 1532 return 0; 1533 } 1534