1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2021, Red Hat. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <net/sock.h> 12 #include <net/protocol.h> 13 #include <net/tcp.h> 14 #include <net/mptcp.h> 15 #include "protocol.h" 16 17 #define MIN_INFO_OPTLEN_SIZE 16 18 19 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) 20 { 21 sock_owned_by_me((const struct sock *)msk); 22 23 if (likely(!__mptcp_check_fallback(msk))) 24 return NULL; 25 26 return msk->first; 27 } 28 29 static u32 sockopt_seq_reset(const struct sock *sk) 30 { 31 sock_owned_by_me(sk); 32 33 /* Highbits contain state. Allows to distinguish sockopt_seq 34 * of listener and established: 35 * s0 = new_listener() 36 * sockopt(s0) - seq is 1 37 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) 38 * sockopt(s0) - seq increments to 2 on s0 39 * sockopt(s1) // seq increments to 2 on s1 (different option) 40 * new ssk completes join, inherits options from s0 // seq 2 41 * Needs sync from mptcp join logic, but ssk->seq == msk->seq 42 * 43 * Set High order bits to sk_state so ssk->seq == msk->seq test 44 * will fail. 45 */ 46 47 return (u32)sk->sk_state << 24u; 48 } 49 50 static void sockopt_seq_inc(struct mptcp_sock *msk) 51 { 52 u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; 53 54 msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; 55 } 56 57 static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, 58 unsigned int optlen, int *val) 59 { 60 if (optlen < sizeof(int)) 61 return -EINVAL; 62 63 if (copy_from_sockptr(val, optval, sizeof(*val))) 64 return -EFAULT; 65 66 return 0; 67 } 68 69 static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) 70 { 71 struct mptcp_subflow_context *subflow; 72 struct sock *sk = (struct sock *)msk; 73 74 lock_sock(sk); 75 sockopt_seq_inc(msk); 76 77 mptcp_for_each_subflow(msk, subflow) { 78 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 79 bool slow = lock_sock_fast(ssk); 80 81 switch (optname) { 82 case SO_DEBUG: 83 sock_valbool_flag(ssk, SOCK_DBG, !!val); 84 break; 85 case SO_KEEPALIVE: 86 if (ssk->sk_prot->keepalive) 87 ssk->sk_prot->keepalive(ssk, !!val); 88 sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); 89 break; 90 case SO_PRIORITY: 91 ssk->sk_priority = val; 92 break; 93 case SO_SNDBUF: 94 case SO_SNDBUFFORCE: 95 ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; 96 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 97 break; 98 case SO_RCVBUF: 99 case SO_RCVBUFFORCE: 100 ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; 101 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 102 break; 103 case SO_MARK: 104 if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { 105 ssk->sk_mark = sk->sk_mark; 106 sk_dst_reset(ssk); 107 } 108 break; 109 case SO_INCOMING_CPU: 110 WRITE_ONCE(ssk->sk_incoming_cpu, val); 111 break; 112 } 113 114 subflow->setsockopt_seq = msk->setsockopt_seq; 115 unlock_sock_fast(ssk, slow); 116 } 117 118 release_sock(sk); 119 } 120 121 static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) 122 { 123 sockptr_t optval = KERNEL_SOCKPTR(&val); 124 struct sock *sk = (struct sock *)msk; 125 int ret; 126 127 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 128 optval, sizeof(val)); 129 if (ret) 130 return ret; 131 132 mptcp_sol_socket_sync_intval(msk, optname, val); 133 return 0; 134 } 135 136 static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) 137 { 138 struct sock *sk = (struct sock *)msk; 139 140 WRITE_ONCE(sk->sk_incoming_cpu, val); 141 142 mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); 143 } 144 145 static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) 146 { 147 sockptr_t optval = KERNEL_SOCKPTR(&val); 148 struct mptcp_subflow_context *subflow; 149 struct sock *sk = (struct sock *)msk; 150 int ret; 151 152 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 153 optval, sizeof(val)); 154 if (ret) 155 return ret; 156 157 lock_sock(sk); 158 mptcp_for_each_subflow(msk, subflow) { 159 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 160 bool slow = lock_sock_fast(ssk); 161 162 sock_set_timestamp(sk, optname, !!val); 163 unlock_sock_fast(ssk, slow); 164 } 165 166 release_sock(sk); 167 return 0; 168 } 169 170 static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, 171 sockptr_t optval, 172 unsigned int optlen) 173 { 174 int val, ret; 175 176 ret = mptcp_get_int_option(msk, optval, optlen, &val); 177 if (ret) 178 return ret; 179 180 switch (optname) { 181 case SO_KEEPALIVE: 182 mptcp_sol_socket_sync_intval(msk, optname, val); 183 return 0; 184 case SO_DEBUG: 185 case SO_MARK: 186 case SO_PRIORITY: 187 case SO_SNDBUF: 188 case SO_SNDBUFFORCE: 189 case SO_RCVBUF: 190 case SO_RCVBUFFORCE: 191 return mptcp_sol_socket_intval(msk, optname, val); 192 case SO_INCOMING_CPU: 193 mptcp_so_incoming_cpu(msk, val); 194 return 0; 195 case SO_TIMESTAMP_OLD: 196 case SO_TIMESTAMP_NEW: 197 case SO_TIMESTAMPNS_OLD: 198 case SO_TIMESTAMPNS_NEW: 199 return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); 200 } 201 202 return -ENOPROTOOPT; 203 } 204 205 static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, 206 int optname, 207 sockptr_t optval, 208 unsigned int optlen) 209 { 210 struct mptcp_subflow_context *subflow; 211 struct sock *sk = (struct sock *)msk; 212 struct so_timestamping timestamping; 213 int ret; 214 215 if (optlen == sizeof(timestamping)) { 216 if (copy_from_sockptr(×tamping, optval, 217 sizeof(timestamping))) 218 return -EFAULT; 219 } else if (optlen == sizeof(int)) { 220 memset(×tamping, 0, sizeof(timestamping)); 221 222 if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) 223 return -EFAULT; 224 } else { 225 return -EINVAL; 226 } 227 228 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 229 KERNEL_SOCKPTR(×tamping), 230 sizeof(timestamping)); 231 if (ret) 232 return ret; 233 234 lock_sock(sk); 235 236 mptcp_for_each_subflow(msk, subflow) { 237 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 238 bool slow = lock_sock_fast(ssk); 239 240 sock_set_timestamping(sk, optname, timestamping); 241 unlock_sock_fast(ssk, slow); 242 } 243 244 release_sock(sk); 245 246 return 0; 247 } 248 249 static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, 250 unsigned int optlen) 251 { 252 struct mptcp_subflow_context *subflow; 253 struct sock *sk = (struct sock *)msk; 254 struct linger ling; 255 sockptr_t kopt; 256 int ret; 257 258 if (optlen < sizeof(ling)) 259 return -EINVAL; 260 261 if (copy_from_sockptr(&ling, optval, sizeof(ling))) 262 return -EFAULT; 263 264 kopt = KERNEL_SOCKPTR(&ling); 265 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); 266 if (ret) 267 return ret; 268 269 lock_sock(sk); 270 sockopt_seq_inc(msk); 271 mptcp_for_each_subflow(msk, subflow) { 272 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 273 bool slow = lock_sock_fast(ssk); 274 275 if (!ling.l_onoff) { 276 sock_reset_flag(ssk, SOCK_LINGER); 277 } else { 278 ssk->sk_lingertime = sk->sk_lingertime; 279 sock_set_flag(ssk, SOCK_LINGER); 280 } 281 282 subflow->setsockopt_seq = msk->setsockopt_seq; 283 unlock_sock_fast(ssk, slow); 284 } 285 286 release_sock(sk); 287 return 0; 288 } 289 290 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, 291 sockptr_t optval, unsigned int optlen) 292 { 293 struct sock *sk = (struct sock *)msk; 294 struct socket *ssock; 295 int ret; 296 297 switch (optname) { 298 case SO_REUSEPORT: 299 case SO_REUSEADDR: 300 case SO_BINDTODEVICE: 301 case SO_BINDTOIFINDEX: 302 lock_sock(sk); 303 ssock = __mptcp_nmpc_socket(msk); 304 if (!ssock) { 305 release_sock(sk); 306 return -EINVAL; 307 } 308 309 ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); 310 if (ret == 0) { 311 if (optname == SO_REUSEPORT) 312 sk->sk_reuseport = ssock->sk->sk_reuseport; 313 else if (optname == SO_REUSEADDR) 314 sk->sk_reuse = ssock->sk->sk_reuse; 315 else if (optname == SO_BINDTODEVICE) 316 sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; 317 else if (optname == SO_BINDTOIFINDEX) 318 sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; 319 } 320 release_sock(sk); 321 return ret; 322 case SO_KEEPALIVE: 323 case SO_PRIORITY: 324 case SO_SNDBUF: 325 case SO_SNDBUFFORCE: 326 case SO_RCVBUF: 327 case SO_RCVBUFFORCE: 328 case SO_MARK: 329 case SO_INCOMING_CPU: 330 case SO_DEBUG: 331 case SO_TIMESTAMP_OLD: 332 case SO_TIMESTAMP_NEW: 333 case SO_TIMESTAMPNS_OLD: 334 case SO_TIMESTAMPNS_NEW: 335 return mptcp_setsockopt_sol_socket_int(msk, optname, optval, 336 optlen); 337 case SO_TIMESTAMPING_OLD: 338 case SO_TIMESTAMPING_NEW: 339 return mptcp_setsockopt_sol_socket_timestamping(msk, optname, 340 optval, optlen); 341 case SO_LINGER: 342 return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); 343 case SO_RCVLOWAT: 344 case SO_RCVTIMEO_OLD: 345 case SO_RCVTIMEO_NEW: 346 case SO_SNDTIMEO_OLD: 347 case SO_SNDTIMEO_NEW: 348 case SO_BUSY_POLL: 349 case SO_PREFER_BUSY_POLL: 350 case SO_BUSY_POLL_BUDGET: 351 /* No need to copy: only relevant for msk */ 352 return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); 353 case SO_NO_CHECK: 354 case SO_DONTROUTE: 355 case SO_BROADCAST: 356 case SO_BSDCOMPAT: 357 case SO_PASSCRED: 358 case SO_PASSSEC: 359 case SO_RXQ_OVFL: 360 case SO_WIFI_STATUS: 361 case SO_NOFCS: 362 case SO_SELECT_ERR_QUEUE: 363 return 0; 364 } 365 366 /* SO_OOBINLINE is not supported, let's avoid the related mess 367 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, 368 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, 369 * we must be careful with subflows 370 * 371 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks 372 * explicitly the sk_protocol field 373 * 374 * SO_PEEK_OFF is unsupported, as it is for plain TCP 375 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows 376 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, 377 * but likely needs careful design 378 * 379 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg 380 * SO_TXTIME is currently unsupported 381 */ 382 383 return -EOPNOTSUPP; 384 } 385 386 static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, 387 sockptr_t optval, unsigned int optlen) 388 { 389 struct sock *sk = (struct sock *)msk; 390 int ret = -EOPNOTSUPP; 391 struct socket *ssock; 392 393 switch (optname) { 394 case IPV6_V6ONLY: 395 case IPV6_TRANSPARENT: 396 case IPV6_FREEBIND: 397 lock_sock(sk); 398 ssock = __mptcp_nmpc_socket(msk); 399 if (!ssock) { 400 release_sock(sk); 401 return -EINVAL; 402 } 403 404 ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); 405 if (ret != 0) { 406 release_sock(sk); 407 return ret; 408 } 409 410 sockopt_seq_inc(msk); 411 412 switch (optname) { 413 case IPV6_V6ONLY: 414 sk->sk_ipv6only = ssock->sk->sk_ipv6only; 415 break; 416 case IPV6_TRANSPARENT: 417 inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent; 418 break; 419 case IPV6_FREEBIND: 420 inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind; 421 break; 422 } 423 424 release_sock(sk); 425 break; 426 } 427 428 return ret; 429 } 430 431 static bool mptcp_supported_sockopt(int level, int optname) 432 { 433 if (level == SOL_IP) { 434 switch (optname) { 435 /* should work fine */ 436 case IP_FREEBIND: 437 case IP_TRANSPARENT: 438 439 /* the following are control cmsg related */ 440 case IP_PKTINFO: 441 case IP_RECVTTL: 442 case IP_RECVTOS: 443 case IP_RECVOPTS: 444 case IP_RETOPTS: 445 case IP_PASSSEC: 446 case IP_RECVORIGDSTADDR: 447 case IP_CHECKSUM: 448 case IP_RECVFRAGSIZE: 449 450 /* common stuff that need some love */ 451 case IP_TOS: 452 case IP_TTL: 453 case IP_BIND_ADDRESS_NO_PORT: 454 case IP_MTU_DISCOVER: 455 case IP_RECVERR: 456 457 /* possibly less common may deserve some love */ 458 case IP_MINTTL: 459 460 /* the following is apparently a no-op for plain TCP */ 461 case IP_RECVERR_RFC4884: 462 return true; 463 } 464 465 /* IP_OPTIONS is not supported, needs subflow care */ 466 /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ 467 /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, 468 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, 469 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, 470 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, 471 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, 472 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal 473 * with mcast stuff 474 */ 475 /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ 476 return false; 477 } 478 if (level == SOL_IPV6) { 479 switch (optname) { 480 case IPV6_V6ONLY: 481 482 /* the following are control cmsg related */ 483 case IPV6_RECVPKTINFO: 484 case IPV6_2292PKTINFO: 485 case IPV6_RECVHOPLIMIT: 486 case IPV6_2292HOPLIMIT: 487 case IPV6_RECVRTHDR: 488 case IPV6_2292RTHDR: 489 case IPV6_RECVHOPOPTS: 490 case IPV6_2292HOPOPTS: 491 case IPV6_RECVDSTOPTS: 492 case IPV6_2292DSTOPTS: 493 case IPV6_RECVTCLASS: 494 case IPV6_FLOWINFO: 495 case IPV6_RECVPATHMTU: 496 case IPV6_RECVORIGDSTADDR: 497 case IPV6_RECVFRAGSIZE: 498 499 /* the following ones need some love but are quite common */ 500 case IPV6_TCLASS: 501 case IPV6_TRANSPARENT: 502 case IPV6_FREEBIND: 503 case IPV6_PKTINFO: 504 case IPV6_2292PKTOPTIONS: 505 case IPV6_UNICAST_HOPS: 506 case IPV6_MTU_DISCOVER: 507 case IPV6_MTU: 508 case IPV6_RECVERR: 509 case IPV6_FLOWINFO_SEND: 510 case IPV6_FLOWLABEL_MGR: 511 case IPV6_MINHOPCOUNT: 512 case IPV6_DONTFRAG: 513 case IPV6_AUTOFLOWLABEL: 514 515 /* the following one is a no-op for plain TCP */ 516 case IPV6_RECVERR_RFC4884: 517 return true; 518 } 519 520 /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are 521 * not supported 522 */ 523 /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, 524 * IPV6_MULTICAST_IF, IPV6_ADDRFORM, 525 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, 526 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, 527 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, 528 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER 529 * are not supported better not deal with mcast 530 */ 531 /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ 532 533 /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ 534 /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ 535 return false; 536 } 537 if (level == SOL_TCP) { 538 switch (optname) { 539 /* the following are no-op or should work just fine */ 540 case TCP_THIN_DUPACK: 541 case TCP_DEFER_ACCEPT: 542 543 /* the following need some love */ 544 case TCP_MAXSEG: 545 case TCP_NODELAY: 546 case TCP_THIN_LINEAR_TIMEOUTS: 547 case TCP_CONGESTION: 548 case TCP_CORK: 549 case TCP_KEEPIDLE: 550 case TCP_KEEPINTVL: 551 case TCP_KEEPCNT: 552 case TCP_SYNCNT: 553 case TCP_SAVE_SYN: 554 case TCP_LINGER2: 555 case TCP_WINDOW_CLAMP: 556 case TCP_QUICKACK: 557 case TCP_USER_TIMEOUT: 558 case TCP_TIMESTAMP: 559 case TCP_NOTSENT_LOWAT: 560 case TCP_TX_DELAY: 561 case TCP_INQ: 562 return true; 563 } 564 565 /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ 566 567 /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, 568 * TCP_REPAIR_WINDOW are not supported, better avoid this mess 569 */ 570 /* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE, 571 * are not supported fastopen is currently unsupported 572 */ 573 } 574 return false; 575 } 576 577 static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, 578 unsigned int optlen) 579 { 580 struct mptcp_subflow_context *subflow; 581 struct sock *sk = (struct sock *)msk; 582 char name[TCP_CA_NAME_MAX]; 583 bool cap_net_admin; 584 int ret; 585 586 if (optlen < 1) 587 return -EINVAL; 588 589 ret = strncpy_from_sockptr(name, optval, 590 min_t(long, TCP_CA_NAME_MAX - 1, optlen)); 591 if (ret < 0) 592 return -EFAULT; 593 594 name[ret] = 0; 595 596 cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); 597 598 ret = 0; 599 lock_sock(sk); 600 sockopt_seq_inc(msk); 601 mptcp_for_each_subflow(msk, subflow) { 602 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 603 int err; 604 605 lock_sock(ssk); 606 err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); 607 if (err < 0 && ret == 0) 608 ret = err; 609 subflow->setsockopt_seq = msk->setsockopt_seq; 610 release_sock(ssk); 611 } 612 613 if (ret == 0) 614 strcpy(msk->ca_name, name); 615 616 release_sock(sk); 617 return ret; 618 } 619 620 static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval, 621 unsigned int optlen) 622 { 623 struct mptcp_subflow_context *subflow; 624 struct sock *sk = (struct sock *)msk; 625 int val; 626 627 if (optlen < sizeof(int)) 628 return -EINVAL; 629 630 if (copy_from_sockptr(&val, optval, sizeof(val))) 631 return -EFAULT; 632 633 lock_sock(sk); 634 sockopt_seq_inc(msk); 635 msk->cork = !!val; 636 mptcp_for_each_subflow(msk, subflow) { 637 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 638 639 lock_sock(ssk); 640 __tcp_sock_set_cork(ssk, !!val); 641 release_sock(ssk); 642 } 643 if (!val) 644 mptcp_check_and_set_pending(sk); 645 release_sock(sk); 646 647 return 0; 648 } 649 650 static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval, 651 unsigned int optlen) 652 { 653 struct mptcp_subflow_context *subflow; 654 struct sock *sk = (struct sock *)msk; 655 int val; 656 657 if (optlen < sizeof(int)) 658 return -EINVAL; 659 660 if (copy_from_sockptr(&val, optval, sizeof(val))) 661 return -EFAULT; 662 663 lock_sock(sk); 664 sockopt_seq_inc(msk); 665 msk->nodelay = !!val; 666 mptcp_for_each_subflow(msk, subflow) { 667 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 668 669 lock_sock(ssk); 670 __tcp_sock_set_nodelay(ssk, !!val); 671 release_sock(ssk); 672 } 673 if (val) 674 mptcp_check_and_set_pending(sk); 675 release_sock(sk); 676 677 return 0; 678 } 679 680 static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, 681 sockptr_t optval, unsigned int optlen) 682 { 683 struct sock *sk = (struct sock *)msk; 684 struct inet_sock *issk; 685 struct socket *ssock; 686 int err; 687 688 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 689 if (err != 0) 690 return err; 691 692 lock_sock(sk); 693 694 ssock = __mptcp_nmpc_socket(msk); 695 if (!ssock) { 696 release_sock(sk); 697 return -EINVAL; 698 } 699 700 issk = inet_sk(ssock->sk); 701 702 switch (optname) { 703 case IP_FREEBIND: 704 issk->freebind = inet_sk(sk)->freebind; 705 break; 706 case IP_TRANSPARENT: 707 issk->transparent = inet_sk(sk)->transparent; 708 break; 709 default: 710 release_sock(sk); 711 WARN_ON_ONCE(1); 712 return -EOPNOTSUPP; 713 } 714 715 sockopt_seq_inc(msk); 716 release_sock(sk); 717 return 0; 718 } 719 720 static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, 721 sockptr_t optval, unsigned int optlen) 722 { 723 struct mptcp_subflow_context *subflow; 724 struct sock *sk = (struct sock *)msk; 725 int err, val; 726 727 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 728 729 if (err != 0) 730 return err; 731 732 lock_sock(sk); 733 sockopt_seq_inc(msk); 734 val = inet_sk(sk)->tos; 735 mptcp_for_each_subflow(msk, subflow) { 736 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 737 738 __ip_sock_set_tos(ssk, val); 739 } 740 release_sock(sk); 741 742 return err; 743 } 744 745 static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, 746 sockptr_t optval, unsigned int optlen) 747 { 748 switch (optname) { 749 case IP_FREEBIND: 750 case IP_TRANSPARENT: 751 return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); 752 case IP_TOS: 753 return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); 754 } 755 756 return -EOPNOTSUPP; 757 } 758 759 static int mptcp_setsockopt_sol_tcp_defer(struct mptcp_sock *msk, sockptr_t optval, 760 unsigned int optlen) 761 { 762 struct socket *listener; 763 764 listener = __mptcp_nmpc_socket(msk); 765 if (!listener) 766 return 0; /* TCP_DEFER_ACCEPT does not fail */ 767 768 return tcp_setsockopt(listener->sk, SOL_TCP, TCP_DEFER_ACCEPT, optval, optlen); 769 } 770 771 static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 772 sockptr_t optval, unsigned int optlen) 773 { 774 struct sock *sk = (void *)msk; 775 int ret, val; 776 777 switch (optname) { 778 case TCP_INQ: 779 ret = mptcp_get_int_option(msk, optval, optlen, &val); 780 if (ret) 781 return ret; 782 if (val < 0 || val > 1) 783 return -EINVAL; 784 785 lock_sock(sk); 786 msk->recvmsg_inq = !!val; 787 release_sock(sk); 788 return 0; 789 case TCP_ULP: 790 return -EOPNOTSUPP; 791 case TCP_CONGESTION: 792 return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); 793 case TCP_CORK: 794 return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen); 795 case TCP_NODELAY: 796 return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); 797 case TCP_DEFER_ACCEPT: 798 return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen); 799 } 800 801 return -EOPNOTSUPP; 802 } 803 804 int mptcp_setsockopt(struct sock *sk, int level, int optname, 805 sockptr_t optval, unsigned int optlen) 806 { 807 struct mptcp_sock *msk = mptcp_sk(sk); 808 struct sock *ssk; 809 810 pr_debug("msk=%p", msk); 811 812 if (level == SOL_SOCKET) 813 return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); 814 815 if (!mptcp_supported_sockopt(level, optname)) 816 return -ENOPROTOOPT; 817 818 /* @@ the meaning of setsockopt() when the socket is connected and 819 * there are multiple subflows is not yet defined. It is up to the 820 * MPTCP-level socket to configure the subflows until the subflow 821 * is in TCP fallback, when TCP socket options are passed through 822 * to the one remaining subflow. 823 */ 824 lock_sock(sk); 825 ssk = __mptcp_tcp_fallback(msk); 826 release_sock(sk); 827 if (ssk) 828 return tcp_setsockopt(ssk, level, optname, optval, optlen); 829 830 if (level == SOL_IP) 831 return mptcp_setsockopt_v4(msk, optname, optval, optlen); 832 833 if (level == SOL_IPV6) 834 return mptcp_setsockopt_v6(msk, optname, optval, optlen); 835 836 if (level == SOL_TCP) 837 return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); 838 839 return -EOPNOTSUPP; 840 } 841 842 static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 843 char __user *optval, int __user *optlen) 844 { 845 struct sock *sk = (struct sock *)msk; 846 struct socket *ssock; 847 int ret = -EINVAL; 848 struct sock *ssk; 849 850 lock_sock(sk); 851 ssk = msk->first; 852 if (ssk) { 853 ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 854 goto out; 855 } 856 857 ssock = __mptcp_nmpc_socket(msk); 858 if (!ssock) 859 goto out; 860 861 ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); 862 863 out: 864 release_sock(sk); 865 return ret; 866 } 867 868 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) 869 { 870 u32 flags = 0; 871 u8 val; 872 873 memset(info, 0, sizeof(*info)); 874 875 info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); 876 info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); 877 info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); 878 info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); 879 info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk); 880 val = mptcp_pm_get_add_addr_signal_max(msk); 881 info->mptcpi_add_addr_signal_max = val; 882 val = mptcp_pm_get_add_addr_accept_max(msk); 883 info->mptcpi_add_addr_accepted_max = val; 884 info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk); 885 if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) 886 flags |= MPTCP_INFO_FLAG_FALLBACK; 887 if (READ_ONCE(msk->can_ack)) 888 flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; 889 info->mptcpi_flags = flags; 890 info->mptcpi_token = READ_ONCE(msk->token); 891 info->mptcpi_write_seq = READ_ONCE(msk->write_seq); 892 info->mptcpi_snd_una = READ_ONCE(msk->snd_una); 893 info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); 894 info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); 895 } 896 EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); 897 898 static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) 899 { 900 struct mptcp_info m_info; 901 int len; 902 903 if (get_user(len, optlen)) 904 return -EFAULT; 905 906 len = min_t(unsigned int, len, sizeof(struct mptcp_info)); 907 908 mptcp_diag_fill_info(msk, &m_info); 909 910 if (put_user(len, optlen)) 911 return -EFAULT; 912 913 if (copy_to_user(optval, &m_info, len)) 914 return -EFAULT; 915 916 return 0; 917 } 918 919 static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, 920 char __user *optval, 921 u32 copied, 922 int __user *optlen) 923 { 924 u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); 925 926 if (copied) 927 copied += sfd->size_subflow_data; 928 else 929 copied = copylen; 930 931 if (put_user(copied, optlen)) 932 return -EFAULT; 933 934 if (copy_to_user(optval, sfd, copylen)) 935 return -EFAULT; 936 937 return 0; 938 } 939 940 static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, 941 char __user *optval, int __user *optlen) 942 { 943 int len, copylen; 944 945 if (get_user(len, optlen)) 946 return -EFAULT; 947 948 /* if mptcp_subflow_data size is changed, need to adjust 949 * this function to deal with programs using old version. 950 */ 951 BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); 952 953 if (len < MIN_INFO_OPTLEN_SIZE) 954 return -EINVAL; 955 956 memset(sfd, 0, sizeof(*sfd)); 957 958 copylen = min_t(unsigned int, len, sizeof(*sfd)); 959 if (copy_from_user(sfd, optval, copylen)) 960 return -EFAULT; 961 962 /* size_subflow_data is u32, but len is signed */ 963 if (sfd->size_subflow_data > INT_MAX || 964 sfd->size_user > INT_MAX) 965 return -EINVAL; 966 967 if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || 968 sfd->size_subflow_data > len) 969 return -EINVAL; 970 971 if (sfd->num_subflows || sfd->size_kernel) 972 return -EINVAL; 973 974 return len - sfd->size_subflow_data; 975 } 976 977 static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, 978 int __user *optlen) 979 { 980 struct mptcp_subflow_context *subflow; 981 struct sock *sk = &msk->sk.icsk_inet.sk; 982 unsigned int sfcount = 0, copied = 0; 983 struct mptcp_subflow_data sfd; 984 char __user *infoptr; 985 int len; 986 987 len = mptcp_get_subflow_data(&sfd, optval, optlen); 988 if (len < 0) 989 return len; 990 991 sfd.size_kernel = sizeof(struct tcp_info); 992 sfd.size_user = min_t(unsigned int, sfd.size_user, 993 sizeof(struct tcp_info)); 994 995 infoptr = optval + sfd.size_subflow_data; 996 997 lock_sock(sk); 998 999 mptcp_for_each_subflow(msk, subflow) { 1000 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1001 1002 ++sfcount; 1003 1004 if (len && len >= sfd.size_user) { 1005 struct tcp_info info; 1006 1007 tcp_get_info(ssk, &info); 1008 1009 if (copy_to_user(infoptr, &info, sfd.size_user)) { 1010 release_sock(sk); 1011 return -EFAULT; 1012 } 1013 1014 infoptr += sfd.size_user; 1015 copied += sfd.size_user; 1016 len -= sfd.size_user; 1017 } 1018 } 1019 1020 release_sock(sk); 1021 1022 sfd.num_subflows = sfcount; 1023 1024 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1025 return -EFAULT; 1026 1027 return 0; 1028 } 1029 1030 static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) 1031 { 1032 struct inet_sock *inet = inet_sk(sk); 1033 1034 memset(a, 0, sizeof(*a)); 1035 1036 if (sk->sk_family == AF_INET) { 1037 a->sin_local.sin_family = AF_INET; 1038 a->sin_local.sin_port = inet->inet_sport; 1039 a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; 1040 1041 if (!a->sin_local.sin_addr.s_addr) 1042 a->sin_local.sin_addr.s_addr = inet->inet_saddr; 1043 1044 a->sin_remote.sin_family = AF_INET; 1045 a->sin_remote.sin_port = inet->inet_dport; 1046 a->sin_remote.sin_addr.s_addr = inet->inet_daddr; 1047 #if IS_ENABLED(CONFIG_IPV6) 1048 } else if (sk->sk_family == AF_INET6) { 1049 const struct ipv6_pinfo *np = inet6_sk(sk); 1050 1051 if (WARN_ON_ONCE(!np)) 1052 return; 1053 1054 a->sin6_local.sin6_family = AF_INET6; 1055 a->sin6_local.sin6_port = inet->inet_sport; 1056 1057 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 1058 a->sin6_local.sin6_addr = np->saddr; 1059 else 1060 a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; 1061 1062 a->sin6_remote.sin6_family = AF_INET6; 1063 a->sin6_remote.sin6_port = inet->inet_dport; 1064 a->sin6_remote.sin6_addr = sk->sk_v6_daddr; 1065 #endif 1066 } 1067 } 1068 1069 static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, 1070 int __user *optlen) 1071 { 1072 struct sock *sk = &msk->sk.icsk_inet.sk; 1073 struct mptcp_subflow_context *subflow; 1074 unsigned int sfcount = 0, copied = 0; 1075 struct mptcp_subflow_data sfd; 1076 char __user *addrptr; 1077 int len; 1078 1079 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1080 if (len < 0) 1081 return len; 1082 1083 sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); 1084 sfd.size_user = min_t(unsigned int, sfd.size_user, 1085 sizeof(struct mptcp_subflow_addrs)); 1086 1087 addrptr = optval + sfd.size_subflow_data; 1088 1089 lock_sock(sk); 1090 1091 mptcp_for_each_subflow(msk, subflow) { 1092 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1093 1094 ++sfcount; 1095 1096 if (len && len >= sfd.size_user) { 1097 struct mptcp_subflow_addrs a; 1098 1099 mptcp_get_sub_addrs(ssk, &a); 1100 1101 if (copy_to_user(addrptr, &a, sfd.size_user)) { 1102 release_sock(sk); 1103 return -EFAULT; 1104 } 1105 1106 addrptr += sfd.size_user; 1107 copied += sfd.size_user; 1108 len -= sfd.size_user; 1109 } 1110 } 1111 1112 release_sock(sk); 1113 1114 sfd.num_subflows = sfcount; 1115 1116 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1117 return -EFAULT; 1118 1119 return 0; 1120 } 1121 1122 static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, 1123 int __user *optlen, int val) 1124 { 1125 int len; 1126 1127 if (get_user(len, optlen)) 1128 return -EFAULT; 1129 if (len < 0) 1130 return -EINVAL; 1131 1132 if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { 1133 unsigned char ucval = (unsigned char)val; 1134 1135 len = 1; 1136 if (put_user(len, optlen)) 1137 return -EFAULT; 1138 if (copy_to_user(optval, &ucval, 1)) 1139 return -EFAULT; 1140 } else { 1141 len = min_t(unsigned int, len, sizeof(int)); 1142 if (put_user(len, optlen)) 1143 return -EFAULT; 1144 if (copy_to_user(optval, &val, len)) 1145 return -EFAULT; 1146 } 1147 1148 return 0; 1149 } 1150 1151 static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 1152 char __user *optval, int __user *optlen) 1153 { 1154 switch (optname) { 1155 case TCP_ULP: 1156 case TCP_CONGESTION: 1157 case TCP_INFO: 1158 case TCP_CC_INFO: 1159 case TCP_DEFER_ACCEPT: 1160 return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, 1161 optval, optlen); 1162 case TCP_INQ: 1163 return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); 1164 case TCP_CORK: 1165 return mptcp_put_int_option(msk, optval, optlen, msk->cork); 1166 case TCP_NODELAY: 1167 return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); 1168 } 1169 return -EOPNOTSUPP; 1170 } 1171 1172 static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, 1173 char __user *optval, int __user *optlen) 1174 { 1175 struct sock *sk = (void *)msk; 1176 1177 switch (optname) { 1178 case IP_TOS: 1179 return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos); 1180 } 1181 1182 return -EOPNOTSUPP; 1183 } 1184 1185 static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, 1186 char __user *optval, int __user *optlen) 1187 { 1188 switch (optname) { 1189 case MPTCP_INFO: 1190 return mptcp_getsockopt_info(msk, optval, optlen); 1191 case MPTCP_TCPINFO: 1192 return mptcp_getsockopt_tcpinfo(msk, optval, optlen); 1193 case MPTCP_SUBFLOW_ADDRS: 1194 return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); 1195 } 1196 1197 return -EOPNOTSUPP; 1198 } 1199 1200 int mptcp_getsockopt(struct sock *sk, int level, int optname, 1201 char __user *optval, int __user *option) 1202 { 1203 struct mptcp_sock *msk = mptcp_sk(sk); 1204 struct sock *ssk; 1205 1206 pr_debug("msk=%p", msk); 1207 1208 /* @@ the meaning of setsockopt() when the socket is connected and 1209 * there are multiple subflows is not yet defined. It is up to the 1210 * MPTCP-level socket to configure the subflows until the subflow 1211 * is in TCP fallback, when socket options are passed through 1212 * to the one remaining subflow. 1213 */ 1214 lock_sock(sk); 1215 ssk = __mptcp_tcp_fallback(msk); 1216 release_sock(sk); 1217 if (ssk) 1218 return tcp_getsockopt(ssk, level, optname, optval, option); 1219 1220 if (level == SOL_IP) 1221 return mptcp_getsockopt_v4(msk, optname, optval, option); 1222 if (level == SOL_TCP) 1223 return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); 1224 if (level == SOL_MPTCP) 1225 return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); 1226 return -EOPNOTSUPP; 1227 } 1228 1229 static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) 1230 { 1231 static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; 1232 struct sock *sk = (struct sock *)msk; 1233 1234 if (ssk->sk_prot->keepalive) { 1235 if (sock_flag(sk, SOCK_KEEPOPEN)) 1236 ssk->sk_prot->keepalive(ssk, 1); 1237 else 1238 ssk->sk_prot->keepalive(ssk, 0); 1239 } 1240 1241 ssk->sk_priority = sk->sk_priority; 1242 ssk->sk_bound_dev_if = sk->sk_bound_dev_if; 1243 ssk->sk_incoming_cpu = sk->sk_incoming_cpu; 1244 __ip_sock_set_tos(ssk, inet_sk(sk)->tos); 1245 1246 if (sk->sk_userlocks & tx_rx_locks) { 1247 ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; 1248 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) 1249 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 1250 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1251 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 1252 } 1253 1254 if (sock_flag(sk, SOCK_LINGER)) { 1255 ssk->sk_lingertime = sk->sk_lingertime; 1256 sock_set_flag(ssk, SOCK_LINGER); 1257 } else { 1258 sock_reset_flag(ssk, SOCK_LINGER); 1259 } 1260 1261 if (sk->sk_mark != ssk->sk_mark) { 1262 ssk->sk_mark = sk->sk_mark; 1263 sk_dst_reset(ssk); 1264 } 1265 1266 sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); 1267 1268 if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) 1269 tcp_set_congestion_control(ssk, msk->ca_name, false, true); 1270 __tcp_sock_set_cork(ssk, !!msk->cork); 1271 __tcp_sock_set_nodelay(ssk, !!msk->nodelay); 1272 1273 inet_sk(ssk)->transparent = inet_sk(sk)->transparent; 1274 inet_sk(ssk)->freebind = inet_sk(sk)->freebind; 1275 } 1276 1277 static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) 1278 { 1279 bool slow = lock_sock_fast(ssk); 1280 1281 sync_socket_options(msk, ssk); 1282 1283 unlock_sock_fast(ssk, slow); 1284 } 1285 1286 void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) 1287 { 1288 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 1289 1290 msk_owned_by_me(msk); 1291 1292 if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { 1293 __mptcp_sockopt_sync(msk, ssk); 1294 1295 subflow->setsockopt_seq = msk->setsockopt_seq; 1296 } 1297 } 1298 1299 void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) 1300 { 1301 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 1302 1303 msk_owned_by_me(msk); 1304 1305 if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { 1306 sync_socket_options(msk, ssk); 1307 1308 subflow->setsockopt_seq = msk->setsockopt_seq; 1309 } 1310 } 1311