1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2021, Red Hat. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <net/sock.h> 12 #include <net/protocol.h> 13 #include <net/tcp.h> 14 #include <net/mptcp.h> 15 #include "protocol.h" 16 17 #define MIN_INFO_OPTLEN_SIZE 16 18 19 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) 20 { 21 sock_owned_by_me((const struct sock *)msk); 22 23 if (likely(!__mptcp_check_fallback(msk))) 24 return NULL; 25 26 return msk->first; 27 } 28 29 static u32 sockopt_seq_reset(const struct sock *sk) 30 { 31 sock_owned_by_me(sk); 32 33 /* Highbits contain state. Allows to distinguish sockopt_seq 34 * of listener and established: 35 * s0 = new_listener() 36 * sockopt(s0) - seq is 1 37 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) 38 * sockopt(s0) - seq increments to 2 on s0 39 * sockopt(s1) // seq increments to 2 on s1 (different option) 40 * new ssk completes join, inherits options from s0 // seq 2 41 * Needs sync from mptcp join logic, but ssk->seq == msk->seq 42 * 43 * Set High order bits to sk_state so ssk->seq == msk->seq test 44 * will fail. 45 */ 46 47 return (u32)sk->sk_state << 24u; 48 } 49 50 static void sockopt_seq_inc(struct mptcp_sock *msk) 51 { 52 u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; 53 54 msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; 55 } 56 57 static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, 58 unsigned int optlen, int *val) 59 { 60 if (optlen < sizeof(int)) 61 return -EINVAL; 62 63 if (copy_from_sockptr(val, optval, sizeof(*val))) 64 return -EFAULT; 65 66 return 0; 67 } 68 69 static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) 70 { 71 struct mptcp_subflow_context *subflow; 72 struct sock *sk = (struct sock *)msk; 73 74 lock_sock(sk); 75 sockopt_seq_inc(msk); 76 77 mptcp_for_each_subflow(msk, subflow) { 78 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 79 bool slow = lock_sock_fast(ssk); 80 81 switch (optname) { 82 case SO_DEBUG: 83 sock_valbool_flag(ssk, SOCK_DBG, !!val); 84 break; 85 case SO_KEEPALIVE: 86 if (ssk->sk_prot->keepalive) 87 ssk->sk_prot->keepalive(ssk, !!val); 88 sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); 89 break; 90 case SO_PRIORITY: 91 ssk->sk_priority = val; 92 break; 93 case SO_SNDBUF: 94 case SO_SNDBUFFORCE: 95 ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; 96 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 97 break; 98 case SO_RCVBUF: 99 case SO_RCVBUFFORCE: 100 ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; 101 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 102 break; 103 case SO_MARK: 104 if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { 105 ssk->sk_mark = sk->sk_mark; 106 sk_dst_reset(ssk); 107 } 108 break; 109 case SO_INCOMING_CPU: 110 WRITE_ONCE(ssk->sk_incoming_cpu, val); 111 break; 112 } 113 114 subflow->setsockopt_seq = msk->setsockopt_seq; 115 unlock_sock_fast(ssk, slow); 116 } 117 118 release_sock(sk); 119 } 120 121 static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) 122 { 123 sockptr_t optval = KERNEL_SOCKPTR(&val); 124 struct sock *sk = (struct sock *)msk; 125 int ret; 126 127 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 128 optval, sizeof(val)); 129 if (ret) 130 return ret; 131 132 mptcp_sol_socket_sync_intval(msk, optname, val); 133 return 0; 134 } 135 136 static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) 137 { 138 struct sock *sk = (struct sock *)msk; 139 140 WRITE_ONCE(sk->sk_incoming_cpu, val); 141 142 mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); 143 } 144 145 static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) 146 { 147 sockptr_t optval = KERNEL_SOCKPTR(&val); 148 struct mptcp_subflow_context *subflow; 149 struct sock *sk = (struct sock *)msk; 150 int ret; 151 152 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 153 optval, sizeof(val)); 154 if (ret) 155 return ret; 156 157 lock_sock(sk); 158 mptcp_for_each_subflow(msk, subflow) { 159 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 160 bool slow = lock_sock_fast(ssk); 161 162 sock_set_timestamp(sk, optname, !!val); 163 unlock_sock_fast(ssk, slow); 164 } 165 166 release_sock(sk); 167 return 0; 168 } 169 170 static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, 171 sockptr_t optval, 172 unsigned int optlen) 173 { 174 int val, ret; 175 176 ret = mptcp_get_int_option(msk, optval, optlen, &val); 177 if (ret) 178 return ret; 179 180 switch (optname) { 181 case SO_KEEPALIVE: 182 mptcp_sol_socket_sync_intval(msk, optname, val); 183 return 0; 184 case SO_DEBUG: 185 case SO_MARK: 186 case SO_PRIORITY: 187 case SO_SNDBUF: 188 case SO_SNDBUFFORCE: 189 case SO_RCVBUF: 190 case SO_RCVBUFFORCE: 191 return mptcp_sol_socket_intval(msk, optname, val); 192 case SO_INCOMING_CPU: 193 mptcp_so_incoming_cpu(msk, val); 194 return 0; 195 case SO_TIMESTAMP_OLD: 196 case SO_TIMESTAMP_NEW: 197 case SO_TIMESTAMPNS_OLD: 198 case SO_TIMESTAMPNS_NEW: 199 return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); 200 } 201 202 return -ENOPROTOOPT; 203 } 204 205 static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, 206 int optname, 207 sockptr_t optval, 208 unsigned int optlen) 209 { 210 struct mptcp_subflow_context *subflow; 211 struct sock *sk = (struct sock *)msk; 212 struct so_timestamping timestamping; 213 int ret; 214 215 if (optlen == sizeof(timestamping)) { 216 if (copy_from_sockptr(×tamping, optval, 217 sizeof(timestamping))) 218 return -EFAULT; 219 } else if (optlen == sizeof(int)) { 220 memset(×tamping, 0, sizeof(timestamping)); 221 222 if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) 223 return -EFAULT; 224 } else { 225 return -EINVAL; 226 } 227 228 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 229 KERNEL_SOCKPTR(×tamping), 230 sizeof(timestamping)); 231 if (ret) 232 return ret; 233 234 lock_sock(sk); 235 236 mptcp_for_each_subflow(msk, subflow) { 237 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 238 bool slow = lock_sock_fast(ssk); 239 240 sock_set_timestamping(sk, optname, timestamping); 241 unlock_sock_fast(ssk, slow); 242 } 243 244 release_sock(sk); 245 246 return 0; 247 } 248 249 static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, 250 unsigned int optlen) 251 { 252 struct mptcp_subflow_context *subflow; 253 struct sock *sk = (struct sock *)msk; 254 struct linger ling; 255 sockptr_t kopt; 256 int ret; 257 258 if (optlen < sizeof(ling)) 259 return -EINVAL; 260 261 if (copy_from_sockptr(&ling, optval, sizeof(ling))) 262 return -EFAULT; 263 264 kopt = KERNEL_SOCKPTR(&ling); 265 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); 266 if (ret) 267 return ret; 268 269 lock_sock(sk); 270 sockopt_seq_inc(msk); 271 mptcp_for_each_subflow(msk, subflow) { 272 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 273 bool slow = lock_sock_fast(ssk); 274 275 if (!ling.l_onoff) { 276 sock_reset_flag(ssk, SOCK_LINGER); 277 } else { 278 ssk->sk_lingertime = sk->sk_lingertime; 279 sock_set_flag(ssk, SOCK_LINGER); 280 } 281 282 subflow->setsockopt_seq = msk->setsockopt_seq; 283 unlock_sock_fast(ssk, slow); 284 } 285 286 release_sock(sk); 287 return 0; 288 } 289 290 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, 291 sockptr_t optval, unsigned int optlen) 292 { 293 struct sock *sk = (struct sock *)msk; 294 struct socket *ssock; 295 int ret; 296 297 switch (optname) { 298 case SO_REUSEPORT: 299 case SO_REUSEADDR: 300 case SO_BINDTODEVICE: 301 case SO_BINDTOIFINDEX: 302 lock_sock(sk); 303 ssock = __mptcp_nmpc_socket(msk); 304 if (!ssock) { 305 release_sock(sk); 306 return -EINVAL; 307 } 308 309 ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); 310 if (ret == 0) { 311 if (optname == SO_REUSEPORT) 312 sk->sk_reuseport = ssock->sk->sk_reuseport; 313 else if (optname == SO_REUSEADDR) 314 sk->sk_reuse = ssock->sk->sk_reuse; 315 else if (optname == SO_BINDTODEVICE) 316 sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; 317 else if (optname == SO_BINDTOIFINDEX) 318 sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; 319 } 320 release_sock(sk); 321 return ret; 322 case SO_KEEPALIVE: 323 case SO_PRIORITY: 324 case SO_SNDBUF: 325 case SO_SNDBUFFORCE: 326 case SO_RCVBUF: 327 case SO_RCVBUFFORCE: 328 case SO_MARK: 329 case SO_INCOMING_CPU: 330 case SO_DEBUG: 331 case SO_TIMESTAMP_OLD: 332 case SO_TIMESTAMP_NEW: 333 case SO_TIMESTAMPNS_OLD: 334 case SO_TIMESTAMPNS_NEW: 335 return mptcp_setsockopt_sol_socket_int(msk, optname, optval, 336 optlen); 337 case SO_TIMESTAMPING_OLD: 338 case SO_TIMESTAMPING_NEW: 339 return mptcp_setsockopt_sol_socket_timestamping(msk, optname, 340 optval, optlen); 341 case SO_LINGER: 342 return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); 343 case SO_RCVLOWAT: 344 case SO_RCVTIMEO_OLD: 345 case SO_RCVTIMEO_NEW: 346 case SO_BUSY_POLL: 347 case SO_PREFER_BUSY_POLL: 348 case SO_BUSY_POLL_BUDGET: 349 /* No need to copy: only relevant for msk */ 350 return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); 351 case SO_NO_CHECK: 352 case SO_DONTROUTE: 353 case SO_BROADCAST: 354 case SO_BSDCOMPAT: 355 case SO_PASSCRED: 356 case SO_PASSSEC: 357 case SO_RXQ_OVFL: 358 case SO_WIFI_STATUS: 359 case SO_NOFCS: 360 case SO_SELECT_ERR_QUEUE: 361 return 0; 362 } 363 364 /* SO_OOBINLINE is not supported, let's avoid the related mess 365 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, 366 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, 367 * we must be careful with subflows 368 * 369 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks 370 * explicitly the sk_protocol field 371 * 372 * SO_PEEK_OFF is unsupported, as it is for plain TCP 373 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows 374 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, 375 * but likely needs careful design 376 * 377 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg 378 * SO_TXTIME is currently unsupported 379 */ 380 381 return -EOPNOTSUPP; 382 } 383 384 static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, 385 sockptr_t optval, unsigned int optlen) 386 { 387 struct sock *sk = (struct sock *)msk; 388 int ret = -EOPNOTSUPP; 389 struct socket *ssock; 390 391 switch (optname) { 392 case IPV6_V6ONLY: 393 lock_sock(sk); 394 ssock = __mptcp_nmpc_socket(msk); 395 if (!ssock) { 396 release_sock(sk); 397 return -EINVAL; 398 } 399 400 ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); 401 if (ret == 0) 402 sk->sk_ipv6only = ssock->sk->sk_ipv6only; 403 404 release_sock(sk); 405 break; 406 } 407 408 return ret; 409 } 410 411 static bool mptcp_supported_sockopt(int level, int optname) 412 { 413 if (level == SOL_IP) { 414 switch (optname) { 415 /* should work fine */ 416 case IP_FREEBIND: 417 case IP_TRANSPARENT: 418 419 /* the following are control cmsg related */ 420 case IP_PKTINFO: 421 case IP_RECVTTL: 422 case IP_RECVTOS: 423 case IP_RECVOPTS: 424 case IP_RETOPTS: 425 case IP_PASSSEC: 426 case IP_RECVORIGDSTADDR: 427 case IP_CHECKSUM: 428 case IP_RECVFRAGSIZE: 429 430 /* common stuff that need some love */ 431 case IP_TOS: 432 case IP_TTL: 433 case IP_BIND_ADDRESS_NO_PORT: 434 case IP_MTU_DISCOVER: 435 case IP_RECVERR: 436 437 /* possibly less common may deserve some love */ 438 case IP_MINTTL: 439 440 /* the following is apparently a no-op for plain TCP */ 441 case IP_RECVERR_RFC4884: 442 return true; 443 } 444 445 /* IP_OPTIONS is not supported, needs subflow care */ 446 /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ 447 /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, 448 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, 449 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, 450 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, 451 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, 452 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal 453 * with mcast stuff 454 */ 455 /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ 456 return false; 457 } 458 if (level == SOL_IPV6) { 459 switch (optname) { 460 case IPV6_V6ONLY: 461 462 /* the following are control cmsg related */ 463 case IPV6_RECVPKTINFO: 464 case IPV6_2292PKTINFO: 465 case IPV6_RECVHOPLIMIT: 466 case IPV6_2292HOPLIMIT: 467 case IPV6_RECVRTHDR: 468 case IPV6_2292RTHDR: 469 case IPV6_RECVHOPOPTS: 470 case IPV6_2292HOPOPTS: 471 case IPV6_RECVDSTOPTS: 472 case IPV6_2292DSTOPTS: 473 case IPV6_RECVTCLASS: 474 case IPV6_FLOWINFO: 475 case IPV6_RECVPATHMTU: 476 case IPV6_RECVORIGDSTADDR: 477 case IPV6_RECVFRAGSIZE: 478 479 /* the following ones need some love but are quite common */ 480 case IPV6_TCLASS: 481 case IPV6_TRANSPARENT: 482 case IPV6_FREEBIND: 483 case IPV6_PKTINFO: 484 case IPV6_2292PKTOPTIONS: 485 case IPV6_UNICAST_HOPS: 486 case IPV6_MTU_DISCOVER: 487 case IPV6_MTU: 488 case IPV6_RECVERR: 489 case IPV6_FLOWINFO_SEND: 490 case IPV6_FLOWLABEL_MGR: 491 case IPV6_MINHOPCOUNT: 492 case IPV6_DONTFRAG: 493 case IPV6_AUTOFLOWLABEL: 494 495 /* the following one is a no-op for plain TCP */ 496 case IPV6_RECVERR_RFC4884: 497 return true; 498 } 499 500 /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are 501 * not supported 502 */ 503 /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, 504 * IPV6_MULTICAST_IF, IPV6_ADDRFORM, 505 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, 506 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, 507 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, 508 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER 509 * are not supported better not deal with mcast 510 */ 511 /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ 512 513 /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ 514 /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ 515 return false; 516 } 517 if (level == SOL_TCP) { 518 switch (optname) { 519 /* the following are no-op or should work just fine */ 520 case TCP_THIN_DUPACK: 521 case TCP_DEFER_ACCEPT: 522 523 /* the following need some love */ 524 case TCP_MAXSEG: 525 case TCP_NODELAY: 526 case TCP_THIN_LINEAR_TIMEOUTS: 527 case TCP_CONGESTION: 528 case TCP_CORK: 529 case TCP_KEEPIDLE: 530 case TCP_KEEPINTVL: 531 case TCP_KEEPCNT: 532 case TCP_SYNCNT: 533 case TCP_SAVE_SYN: 534 case TCP_LINGER2: 535 case TCP_WINDOW_CLAMP: 536 case TCP_QUICKACK: 537 case TCP_USER_TIMEOUT: 538 case TCP_TIMESTAMP: 539 case TCP_NOTSENT_LOWAT: 540 case TCP_TX_DELAY: 541 return true; 542 } 543 544 /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ 545 546 /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, 547 * TCP_REPAIR_WINDOW are not supported, better avoid this mess 548 */ 549 /* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE, 550 * are not supported fastopen is currently unsupported 551 */ 552 /* TCP_INQ is currently unsupported, needs some recvmsg work */ 553 } 554 return false; 555 } 556 557 static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, 558 unsigned int optlen) 559 { 560 struct mptcp_subflow_context *subflow; 561 struct sock *sk = (struct sock *)msk; 562 char name[TCP_CA_NAME_MAX]; 563 bool cap_net_admin; 564 int ret; 565 566 if (optlen < 1) 567 return -EINVAL; 568 569 ret = strncpy_from_sockptr(name, optval, 570 min_t(long, TCP_CA_NAME_MAX - 1, optlen)); 571 if (ret < 0) 572 return -EFAULT; 573 574 name[ret] = 0; 575 576 cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); 577 578 ret = 0; 579 lock_sock(sk); 580 sockopt_seq_inc(msk); 581 mptcp_for_each_subflow(msk, subflow) { 582 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 583 int err; 584 585 lock_sock(ssk); 586 err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); 587 if (err < 0 && ret == 0) 588 ret = err; 589 subflow->setsockopt_seq = msk->setsockopt_seq; 590 release_sock(ssk); 591 } 592 593 if (ret == 0) 594 strcpy(msk->ca_name, name); 595 596 release_sock(sk); 597 return ret; 598 } 599 600 static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 601 sockptr_t optval, unsigned int optlen) 602 { 603 switch (optname) { 604 case TCP_ULP: 605 return -EOPNOTSUPP; 606 case TCP_CONGESTION: 607 return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); 608 } 609 610 return -EOPNOTSUPP; 611 } 612 613 int mptcp_setsockopt(struct sock *sk, int level, int optname, 614 sockptr_t optval, unsigned int optlen) 615 { 616 struct mptcp_sock *msk = mptcp_sk(sk); 617 struct sock *ssk; 618 619 pr_debug("msk=%p", msk); 620 621 if (level == SOL_SOCKET) 622 return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); 623 624 if (!mptcp_supported_sockopt(level, optname)) 625 return -ENOPROTOOPT; 626 627 /* @@ the meaning of setsockopt() when the socket is connected and 628 * there are multiple subflows is not yet defined. It is up to the 629 * MPTCP-level socket to configure the subflows until the subflow 630 * is in TCP fallback, when TCP socket options are passed through 631 * to the one remaining subflow. 632 */ 633 lock_sock(sk); 634 ssk = __mptcp_tcp_fallback(msk); 635 release_sock(sk); 636 if (ssk) 637 return tcp_setsockopt(ssk, level, optname, optval, optlen); 638 639 if (level == SOL_IPV6) 640 return mptcp_setsockopt_v6(msk, optname, optval, optlen); 641 642 if (level == SOL_TCP) 643 return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); 644 645 return -EOPNOTSUPP; 646 } 647 648 static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 649 char __user *optval, int __user *optlen) 650 { 651 struct sock *sk = (struct sock *)msk; 652 struct socket *ssock; 653 int ret = -EINVAL; 654 struct sock *ssk; 655 656 lock_sock(sk); 657 ssk = msk->first; 658 if (ssk) { 659 ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 660 goto out; 661 } 662 663 ssock = __mptcp_nmpc_socket(msk); 664 if (!ssock) 665 goto out; 666 667 ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); 668 669 out: 670 release_sock(sk); 671 return ret; 672 } 673 674 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) 675 { 676 struct sock *sk = &msk->sk.icsk_inet.sk; 677 u32 flags = 0; 678 bool slow; 679 u8 val; 680 681 memset(info, 0, sizeof(*info)); 682 683 slow = lock_sock_fast(sk); 684 685 info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); 686 info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); 687 info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); 688 info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); 689 info->mptcpi_subflows_max = mptcp_pm_get_subflows_max(msk); 690 val = mptcp_pm_get_add_addr_signal_max(msk); 691 info->mptcpi_add_addr_signal_max = val; 692 val = mptcp_pm_get_add_addr_accept_max(msk); 693 info->mptcpi_add_addr_accepted_max = val; 694 info->mptcpi_local_addr_max = mptcp_pm_get_local_addr_max(msk); 695 if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) 696 flags |= MPTCP_INFO_FLAG_FALLBACK; 697 if (READ_ONCE(msk->can_ack)) 698 flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; 699 info->mptcpi_flags = flags; 700 info->mptcpi_token = READ_ONCE(msk->token); 701 info->mptcpi_write_seq = READ_ONCE(msk->write_seq); 702 info->mptcpi_snd_una = READ_ONCE(msk->snd_una); 703 info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq); 704 info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); 705 706 unlock_sock_fast(sk, slow); 707 } 708 EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); 709 710 static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) 711 { 712 struct mptcp_info m_info; 713 int len; 714 715 if (get_user(len, optlen)) 716 return -EFAULT; 717 718 len = min_t(unsigned int, len, sizeof(struct mptcp_info)); 719 720 mptcp_diag_fill_info(msk, &m_info); 721 722 if (put_user(len, optlen)) 723 return -EFAULT; 724 725 if (copy_to_user(optval, &m_info, len)) 726 return -EFAULT; 727 728 return 0; 729 } 730 731 static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, 732 char __user *optval, 733 u32 copied, 734 int __user *optlen) 735 { 736 u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); 737 738 if (copied) 739 copied += sfd->size_subflow_data; 740 else 741 copied = copylen; 742 743 if (put_user(copied, optlen)) 744 return -EFAULT; 745 746 if (copy_to_user(optval, sfd, copylen)) 747 return -EFAULT; 748 749 return 0; 750 } 751 752 static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, 753 char __user *optval, int __user *optlen) 754 { 755 int len, copylen; 756 757 if (get_user(len, optlen)) 758 return -EFAULT; 759 760 /* if mptcp_subflow_data size is changed, need to adjust 761 * this function to deal with programs using old version. 762 */ 763 BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); 764 765 if (len < MIN_INFO_OPTLEN_SIZE) 766 return -EINVAL; 767 768 memset(sfd, 0, sizeof(*sfd)); 769 770 copylen = min_t(unsigned int, len, sizeof(*sfd)); 771 if (copy_from_user(sfd, optval, copylen)) 772 return -EFAULT; 773 774 /* size_subflow_data is u32, but len is signed */ 775 if (sfd->size_subflow_data > INT_MAX || 776 sfd->size_user > INT_MAX) 777 return -EINVAL; 778 779 if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || 780 sfd->size_subflow_data > len) 781 return -EINVAL; 782 783 if (sfd->num_subflows || sfd->size_kernel) 784 return -EINVAL; 785 786 return len - sfd->size_subflow_data; 787 } 788 789 static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, 790 int __user *optlen) 791 { 792 struct mptcp_subflow_context *subflow; 793 struct sock *sk = &msk->sk.icsk_inet.sk; 794 unsigned int sfcount = 0, copied = 0; 795 struct mptcp_subflow_data sfd; 796 char __user *infoptr; 797 int len; 798 799 len = mptcp_get_subflow_data(&sfd, optval, optlen); 800 if (len < 0) 801 return len; 802 803 sfd.size_kernel = sizeof(struct tcp_info); 804 sfd.size_user = min_t(unsigned int, sfd.size_user, 805 sizeof(struct tcp_info)); 806 807 infoptr = optval + sfd.size_subflow_data; 808 809 lock_sock(sk); 810 811 mptcp_for_each_subflow(msk, subflow) { 812 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 813 814 ++sfcount; 815 816 if (len && len >= sfd.size_user) { 817 struct tcp_info info; 818 819 tcp_get_info(ssk, &info); 820 821 if (copy_to_user(infoptr, &info, sfd.size_user)) { 822 release_sock(sk); 823 return -EFAULT; 824 } 825 826 infoptr += sfd.size_user; 827 copied += sfd.size_user; 828 len -= sfd.size_user; 829 } 830 } 831 832 release_sock(sk); 833 834 sfd.num_subflows = sfcount; 835 836 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 837 return -EFAULT; 838 839 return 0; 840 } 841 842 static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) 843 { 844 struct inet_sock *inet = inet_sk(sk); 845 846 memset(a, 0, sizeof(*a)); 847 848 if (sk->sk_family == AF_INET) { 849 a->sin_local.sin_family = AF_INET; 850 a->sin_local.sin_port = inet->inet_sport; 851 a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; 852 853 if (!a->sin_local.sin_addr.s_addr) 854 a->sin_local.sin_addr.s_addr = inet->inet_saddr; 855 856 a->sin_remote.sin_family = AF_INET; 857 a->sin_remote.sin_port = inet->inet_dport; 858 a->sin_remote.sin_addr.s_addr = inet->inet_daddr; 859 #if IS_ENABLED(CONFIG_IPV6) 860 } else if (sk->sk_family == AF_INET6) { 861 const struct ipv6_pinfo *np = inet6_sk(sk); 862 863 if (WARN_ON_ONCE(!np)) 864 return; 865 866 a->sin6_local.sin6_family = AF_INET6; 867 a->sin6_local.sin6_port = inet->inet_sport; 868 869 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 870 a->sin6_local.sin6_addr = np->saddr; 871 else 872 a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; 873 874 a->sin6_remote.sin6_family = AF_INET6; 875 a->sin6_remote.sin6_port = inet->inet_dport; 876 a->sin6_remote.sin6_addr = sk->sk_v6_daddr; 877 #endif 878 } 879 } 880 881 static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, 882 int __user *optlen) 883 { 884 struct sock *sk = &msk->sk.icsk_inet.sk; 885 struct mptcp_subflow_context *subflow; 886 unsigned int sfcount = 0, copied = 0; 887 struct mptcp_subflow_data sfd; 888 char __user *addrptr; 889 int len; 890 891 len = mptcp_get_subflow_data(&sfd, optval, optlen); 892 if (len < 0) 893 return len; 894 895 sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); 896 sfd.size_user = min_t(unsigned int, sfd.size_user, 897 sizeof(struct mptcp_subflow_addrs)); 898 899 addrptr = optval + sfd.size_subflow_data; 900 901 lock_sock(sk); 902 903 mptcp_for_each_subflow(msk, subflow) { 904 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 905 906 ++sfcount; 907 908 if (len && len >= sfd.size_user) { 909 struct mptcp_subflow_addrs a; 910 911 mptcp_get_sub_addrs(ssk, &a); 912 913 if (copy_to_user(addrptr, &a, sfd.size_user)) { 914 release_sock(sk); 915 return -EFAULT; 916 } 917 918 addrptr += sfd.size_user; 919 copied += sfd.size_user; 920 len -= sfd.size_user; 921 } 922 } 923 924 release_sock(sk); 925 926 sfd.num_subflows = sfcount; 927 928 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 929 return -EFAULT; 930 931 return 0; 932 } 933 934 static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 935 char __user *optval, int __user *optlen) 936 { 937 switch (optname) { 938 case TCP_ULP: 939 case TCP_CONGESTION: 940 case TCP_INFO: 941 case TCP_CC_INFO: 942 return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, 943 optval, optlen); 944 } 945 return -EOPNOTSUPP; 946 } 947 948 static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, 949 char __user *optval, int __user *optlen) 950 { 951 switch (optname) { 952 case MPTCP_INFO: 953 return mptcp_getsockopt_info(msk, optval, optlen); 954 case MPTCP_TCPINFO: 955 return mptcp_getsockopt_tcpinfo(msk, optval, optlen); 956 case MPTCP_SUBFLOW_ADDRS: 957 return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); 958 } 959 960 return -EOPNOTSUPP; 961 } 962 963 int mptcp_getsockopt(struct sock *sk, int level, int optname, 964 char __user *optval, int __user *option) 965 { 966 struct mptcp_sock *msk = mptcp_sk(sk); 967 struct sock *ssk; 968 969 pr_debug("msk=%p", msk); 970 971 /* @@ the meaning of setsockopt() when the socket is connected and 972 * there are multiple subflows is not yet defined. It is up to the 973 * MPTCP-level socket to configure the subflows until the subflow 974 * is in TCP fallback, when socket options are passed through 975 * to the one remaining subflow. 976 */ 977 lock_sock(sk); 978 ssk = __mptcp_tcp_fallback(msk); 979 release_sock(sk); 980 if (ssk) 981 return tcp_getsockopt(ssk, level, optname, optval, option); 982 983 if (level == SOL_TCP) 984 return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); 985 if (level == SOL_MPTCP) 986 return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); 987 return -EOPNOTSUPP; 988 } 989 990 static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) 991 { 992 static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; 993 struct sock *sk = (struct sock *)msk; 994 995 if (ssk->sk_prot->keepalive) { 996 if (sock_flag(sk, SOCK_KEEPOPEN)) 997 ssk->sk_prot->keepalive(ssk, 1); 998 else 999 ssk->sk_prot->keepalive(ssk, 0); 1000 } 1001 1002 ssk->sk_priority = sk->sk_priority; 1003 ssk->sk_bound_dev_if = sk->sk_bound_dev_if; 1004 ssk->sk_incoming_cpu = sk->sk_incoming_cpu; 1005 1006 if (sk->sk_userlocks & tx_rx_locks) { 1007 ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; 1008 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) 1009 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 1010 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1011 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 1012 } 1013 1014 if (sock_flag(sk, SOCK_LINGER)) { 1015 ssk->sk_lingertime = sk->sk_lingertime; 1016 sock_set_flag(ssk, SOCK_LINGER); 1017 } else { 1018 sock_reset_flag(ssk, SOCK_LINGER); 1019 } 1020 1021 if (sk->sk_mark != ssk->sk_mark) { 1022 ssk->sk_mark = sk->sk_mark; 1023 sk_dst_reset(ssk); 1024 } 1025 1026 sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); 1027 1028 if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) 1029 tcp_set_congestion_control(ssk, msk->ca_name, false, true); 1030 } 1031 1032 static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) 1033 { 1034 bool slow = lock_sock_fast(ssk); 1035 1036 sync_socket_options(msk, ssk); 1037 1038 unlock_sock_fast(ssk, slow); 1039 } 1040 1041 void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) 1042 { 1043 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 1044 1045 msk_owned_by_me(msk); 1046 1047 if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { 1048 __mptcp_sockopt_sync(msk, ssk); 1049 1050 subflow->setsockopt_seq = msk->setsockopt_seq; 1051 } 1052 } 1053 1054 void mptcp_sockopt_sync_all(struct mptcp_sock *msk) 1055 { 1056 struct mptcp_subflow_context *subflow; 1057 struct sock *sk = (struct sock *)msk; 1058 u32 seq; 1059 1060 seq = sockopt_seq_reset(sk); 1061 1062 mptcp_for_each_subflow(msk, subflow) { 1063 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1064 u32 sseq = READ_ONCE(subflow->setsockopt_seq); 1065 1066 if (sseq != msk->setsockopt_seq) { 1067 __mptcp_sockopt_sync(msk, ssk); 1068 WRITE_ONCE(subflow->setsockopt_seq, seq); 1069 } else if (sseq != seq) { 1070 WRITE_ONCE(subflow->setsockopt_seq, seq); 1071 } 1072 1073 cond_resched(); 1074 } 1075 1076 msk->setsockopt_seq = seq; 1077 } 1078