1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2021, Red Hat. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <net/sock.h> 12 #include <net/protocol.h> 13 #include <net/tcp.h> 14 #include <net/mptcp.h> 15 #include "protocol.h" 16 17 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) 18 { 19 sock_owned_by_me((const struct sock *)msk); 20 21 if (likely(!__mptcp_check_fallback(msk))) 22 return NULL; 23 24 return msk->first; 25 } 26 27 static u32 sockopt_seq_reset(const struct sock *sk) 28 { 29 sock_owned_by_me(sk); 30 31 /* Highbits contain state. Allows to distinguish sockopt_seq 32 * of listener and established: 33 * s0 = new_listener() 34 * sockopt(s0) - seq is 1 35 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) 36 * sockopt(s0) - seq increments to 2 on s0 37 * sockopt(s1) // seq increments to 2 on s1 (different option) 38 * new ssk completes join, inherits options from s0 // seq 2 39 * Needs sync from mptcp join logic, but ssk->seq == msk->seq 40 * 41 * Set High order bits to sk_state so ssk->seq == msk->seq test 42 * will fail. 43 */ 44 45 return (u32)sk->sk_state << 24u; 46 } 47 48 static void sockopt_seq_inc(struct mptcp_sock *msk) 49 { 50 u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; 51 52 msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; 53 } 54 55 static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, 56 unsigned int optlen, int *val) 57 { 58 if (optlen < sizeof(int)) 59 return -EINVAL; 60 61 if (copy_from_sockptr(val, optval, sizeof(*val))) 62 return -EFAULT; 63 64 return 0; 65 } 66 67 static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) 68 { 69 struct mptcp_subflow_context *subflow; 70 struct sock *sk = (struct sock *)msk; 71 72 lock_sock(sk); 73 sockopt_seq_inc(msk); 74 75 mptcp_for_each_subflow(msk, subflow) { 76 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 77 bool slow = lock_sock_fast(ssk); 78 79 switch (optname) { 80 case SO_DEBUG: 81 sock_valbool_flag(ssk, SOCK_DBG, !!val); 82 break; 83 case SO_KEEPALIVE: 84 if (ssk->sk_prot->keepalive) 85 ssk->sk_prot->keepalive(ssk, !!val); 86 sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); 87 break; 88 case SO_PRIORITY: 89 ssk->sk_priority = val; 90 break; 91 case SO_SNDBUF: 92 case SO_SNDBUFFORCE: 93 ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; 94 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 95 break; 96 case SO_RCVBUF: 97 case SO_RCVBUFFORCE: 98 ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; 99 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 100 break; 101 case SO_MARK: 102 if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { 103 ssk->sk_mark = sk->sk_mark; 104 sk_dst_reset(ssk); 105 } 106 break; 107 case SO_INCOMING_CPU: 108 WRITE_ONCE(ssk->sk_incoming_cpu, val); 109 break; 110 } 111 112 subflow->setsockopt_seq = msk->setsockopt_seq; 113 unlock_sock_fast(ssk, slow); 114 } 115 116 release_sock(sk); 117 } 118 119 static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) 120 { 121 sockptr_t optval = KERNEL_SOCKPTR(&val); 122 struct sock *sk = (struct sock *)msk; 123 int ret; 124 125 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 126 optval, sizeof(val)); 127 if (ret) 128 return ret; 129 130 mptcp_sol_socket_sync_intval(msk, optname, val); 131 return 0; 132 } 133 134 static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) 135 { 136 struct sock *sk = (struct sock *)msk; 137 138 WRITE_ONCE(sk->sk_incoming_cpu, val); 139 140 mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); 141 } 142 143 static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) 144 { 145 sockptr_t optval = KERNEL_SOCKPTR(&val); 146 struct mptcp_subflow_context *subflow; 147 struct sock *sk = (struct sock *)msk; 148 int ret; 149 150 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 151 optval, sizeof(val)); 152 if (ret) 153 return ret; 154 155 lock_sock(sk); 156 mptcp_for_each_subflow(msk, subflow) { 157 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 158 bool slow = lock_sock_fast(ssk); 159 160 sock_set_timestamp(sk, optname, !!val); 161 unlock_sock_fast(ssk, slow); 162 } 163 164 release_sock(sk); 165 return 0; 166 } 167 168 static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, 169 sockptr_t optval, 170 unsigned int optlen) 171 { 172 int val, ret; 173 174 ret = mptcp_get_int_option(msk, optval, optlen, &val); 175 if (ret) 176 return ret; 177 178 switch (optname) { 179 case SO_KEEPALIVE: 180 mptcp_sol_socket_sync_intval(msk, optname, val); 181 return 0; 182 case SO_DEBUG: 183 case SO_MARK: 184 case SO_PRIORITY: 185 case SO_SNDBUF: 186 case SO_SNDBUFFORCE: 187 case SO_RCVBUF: 188 case SO_RCVBUFFORCE: 189 return mptcp_sol_socket_intval(msk, optname, val); 190 case SO_INCOMING_CPU: 191 mptcp_so_incoming_cpu(msk, val); 192 return 0; 193 case SO_TIMESTAMP_OLD: 194 case SO_TIMESTAMP_NEW: 195 case SO_TIMESTAMPNS_OLD: 196 case SO_TIMESTAMPNS_NEW: 197 return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); 198 } 199 200 return -ENOPROTOOPT; 201 } 202 203 static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, 204 int optname, 205 sockptr_t optval, 206 unsigned int optlen) 207 { 208 struct mptcp_subflow_context *subflow; 209 struct sock *sk = (struct sock *)msk; 210 struct so_timestamping timestamping; 211 int ret; 212 213 if (optlen == sizeof(timestamping)) { 214 if (copy_from_sockptr(×tamping, optval, 215 sizeof(timestamping))) 216 return -EFAULT; 217 } else if (optlen == sizeof(int)) { 218 memset(×tamping, 0, sizeof(timestamping)); 219 220 if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) 221 return -EFAULT; 222 } else { 223 return -EINVAL; 224 } 225 226 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 227 KERNEL_SOCKPTR(×tamping), 228 sizeof(timestamping)); 229 if (ret) 230 return ret; 231 232 lock_sock(sk); 233 234 mptcp_for_each_subflow(msk, subflow) { 235 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 236 bool slow = lock_sock_fast(ssk); 237 238 sock_set_timestamping(sk, optname, timestamping); 239 unlock_sock_fast(ssk, slow); 240 } 241 242 release_sock(sk); 243 244 return 0; 245 } 246 247 static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, 248 unsigned int optlen) 249 { 250 struct mptcp_subflow_context *subflow; 251 struct sock *sk = (struct sock *)msk; 252 struct linger ling; 253 sockptr_t kopt; 254 int ret; 255 256 if (optlen < sizeof(ling)) 257 return -EINVAL; 258 259 if (copy_from_sockptr(&ling, optval, sizeof(ling))) 260 return -EFAULT; 261 262 kopt = KERNEL_SOCKPTR(&ling); 263 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); 264 if (ret) 265 return ret; 266 267 lock_sock(sk); 268 sockopt_seq_inc(msk); 269 mptcp_for_each_subflow(msk, subflow) { 270 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 271 bool slow = lock_sock_fast(ssk); 272 273 if (!ling.l_onoff) { 274 sock_reset_flag(ssk, SOCK_LINGER); 275 } else { 276 ssk->sk_lingertime = sk->sk_lingertime; 277 sock_set_flag(ssk, SOCK_LINGER); 278 } 279 280 subflow->setsockopt_seq = msk->setsockopt_seq; 281 unlock_sock_fast(ssk, slow); 282 } 283 284 release_sock(sk); 285 return 0; 286 } 287 288 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, 289 sockptr_t optval, unsigned int optlen) 290 { 291 struct sock *sk = (struct sock *)msk; 292 struct socket *ssock; 293 int ret; 294 295 switch (optname) { 296 case SO_REUSEPORT: 297 case SO_REUSEADDR: 298 case SO_BINDTODEVICE: 299 case SO_BINDTOIFINDEX: 300 lock_sock(sk); 301 ssock = __mptcp_nmpc_socket(msk); 302 if (!ssock) { 303 release_sock(sk); 304 return -EINVAL; 305 } 306 307 ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); 308 if (ret == 0) { 309 if (optname == SO_REUSEPORT) 310 sk->sk_reuseport = ssock->sk->sk_reuseport; 311 else if (optname == SO_REUSEADDR) 312 sk->sk_reuse = ssock->sk->sk_reuse; 313 else if (optname == SO_BINDTODEVICE) 314 sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; 315 else if (optname == SO_BINDTOIFINDEX) 316 sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; 317 } 318 release_sock(sk); 319 return ret; 320 case SO_KEEPALIVE: 321 case SO_PRIORITY: 322 case SO_SNDBUF: 323 case SO_SNDBUFFORCE: 324 case SO_RCVBUF: 325 case SO_RCVBUFFORCE: 326 case SO_MARK: 327 case SO_INCOMING_CPU: 328 case SO_DEBUG: 329 case SO_TIMESTAMP_OLD: 330 case SO_TIMESTAMP_NEW: 331 case SO_TIMESTAMPNS_OLD: 332 case SO_TIMESTAMPNS_NEW: 333 return mptcp_setsockopt_sol_socket_int(msk, optname, optval, 334 optlen); 335 case SO_TIMESTAMPING_OLD: 336 case SO_TIMESTAMPING_NEW: 337 return mptcp_setsockopt_sol_socket_timestamping(msk, optname, 338 optval, optlen); 339 case SO_LINGER: 340 return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); 341 case SO_RCVLOWAT: 342 case SO_RCVTIMEO_OLD: 343 case SO_RCVTIMEO_NEW: 344 case SO_BUSY_POLL: 345 case SO_PREFER_BUSY_POLL: 346 case SO_BUSY_POLL_BUDGET: 347 /* No need to copy: only relevant for msk */ 348 return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); 349 case SO_NO_CHECK: 350 case SO_DONTROUTE: 351 case SO_BROADCAST: 352 case SO_BSDCOMPAT: 353 case SO_PASSCRED: 354 case SO_PASSSEC: 355 case SO_RXQ_OVFL: 356 case SO_WIFI_STATUS: 357 case SO_NOFCS: 358 case SO_SELECT_ERR_QUEUE: 359 return 0; 360 } 361 362 /* SO_OOBINLINE is not supported, let's avoid the related mess 363 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, 364 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, 365 * we must be careful with subflows 366 * 367 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks 368 * explicitly the sk_protocol field 369 * 370 * SO_PEEK_OFF is unsupported, as it is for plain TCP 371 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows 372 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, 373 * but likely needs careful design 374 * 375 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg 376 * SO_TXTIME is currently unsupported 377 */ 378 379 return -EOPNOTSUPP; 380 } 381 382 static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, 383 sockptr_t optval, unsigned int optlen) 384 { 385 struct sock *sk = (struct sock *)msk; 386 int ret = -EOPNOTSUPP; 387 struct socket *ssock; 388 389 switch (optname) { 390 case IPV6_V6ONLY: 391 lock_sock(sk); 392 ssock = __mptcp_nmpc_socket(msk); 393 if (!ssock) { 394 release_sock(sk); 395 return -EINVAL; 396 } 397 398 ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); 399 if (ret == 0) 400 sk->sk_ipv6only = ssock->sk->sk_ipv6only; 401 402 release_sock(sk); 403 break; 404 } 405 406 return ret; 407 } 408 409 static bool mptcp_supported_sockopt(int level, int optname) 410 { 411 if (level == SOL_IP) { 412 switch (optname) { 413 /* should work fine */ 414 case IP_FREEBIND: 415 case IP_TRANSPARENT: 416 417 /* the following are control cmsg related */ 418 case IP_PKTINFO: 419 case IP_RECVTTL: 420 case IP_RECVTOS: 421 case IP_RECVOPTS: 422 case IP_RETOPTS: 423 case IP_PASSSEC: 424 case IP_RECVORIGDSTADDR: 425 case IP_CHECKSUM: 426 case IP_RECVFRAGSIZE: 427 428 /* common stuff that need some love */ 429 case IP_TOS: 430 case IP_TTL: 431 case IP_BIND_ADDRESS_NO_PORT: 432 case IP_MTU_DISCOVER: 433 case IP_RECVERR: 434 435 /* possibly less common may deserve some love */ 436 case IP_MINTTL: 437 438 /* the following is apparently a no-op for plain TCP */ 439 case IP_RECVERR_RFC4884: 440 return true; 441 } 442 443 /* IP_OPTIONS is not supported, needs subflow care */ 444 /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ 445 /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, 446 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, 447 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, 448 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, 449 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, 450 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal 451 * with mcast stuff 452 */ 453 /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ 454 return false; 455 } 456 if (level == SOL_IPV6) { 457 switch (optname) { 458 case IPV6_V6ONLY: 459 460 /* the following are control cmsg related */ 461 case IPV6_RECVPKTINFO: 462 case IPV6_2292PKTINFO: 463 case IPV6_RECVHOPLIMIT: 464 case IPV6_2292HOPLIMIT: 465 case IPV6_RECVRTHDR: 466 case IPV6_2292RTHDR: 467 case IPV6_RECVHOPOPTS: 468 case IPV6_2292HOPOPTS: 469 case IPV6_RECVDSTOPTS: 470 case IPV6_2292DSTOPTS: 471 case IPV6_RECVTCLASS: 472 case IPV6_FLOWINFO: 473 case IPV6_RECVPATHMTU: 474 case IPV6_RECVORIGDSTADDR: 475 case IPV6_RECVFRAGSIZE: 476 477 /* the following ones need some love but are quite common */ 478 case IPV6_TCLASS: 479 case IPV6_TRANSPARENT: 480 case IPV6_FREEBIND: 481 case IPV6_PKTINFO: 482 case IPV6_2292PKTOPTIONS: 483 case IPV6_UNICAST_HOPS: 484 case IPV6_MTU_DISCOVER: 485 case IPV6_MTU: 486 case IPV6_RECVERR: 487 case IPV6_FLOWINFO_SEND: 488 case IPV6_FLOWLABEL_MGR: 489 case IPV6_MINHOPCOUNT: 490 case IPV6_DONTFRAG: 491 case IPV6_AUTOFLOWLABEL: 492 493 /* the following one is a no-op for plain TCP */ 494 case IPV6_RECVERR_RFC4884: 495 return true; 496 } 497 498 /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are 499 * not supported 500 */ 501 /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, 502 * IPV6_MULTICAST_IF, IPV6_ADDRFORM, 503 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, 504 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, 505 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, 506 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER 507 * are not supported better not deal with mcast 508 */ 509 /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ 510 511 /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ 512 /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ 513 return false; 514 } 515 if (level == SOL_TCP) { 516 switch (optname) { 517 /* the following are no-op or should work just fine */ 518 case TCP_THIN_DUPACK: 519 case TCP_DEFER_ACCEPT: 520 521 /* the following need some love */ 522 case TCP_MAXSEG: 523 case TCP_NODELAY: 524 case TCP_THIN_LINEAR_TIMEOUTS: 525 case TCP_CONGESTION: 526 case TCP_ULP: 527 case TCP_CORK: 528 case TCP_KEEPIDLE: 529 case TCP_KEEPINTVL: 530 case TCP_KEEPCNT: 531 case TCP_SYNCNT: 532 case TCP_SAVE_SYN: 533 case TCP_LINGER2: 534 case TCP_WINDOW_CLAMP: 535 case TCP_QUICKACK: 536 case TCP_USER_TIMEOUT: 537 case TCP_TIMESTAMP: 538 case TCP_NOTSENT_LOWAT: 539 case TCP_TX_DELAY: 540 return true; 541 } 542 543 /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ 544 545 /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, 546 * TCP_REPAIR_WINDOW are not supported, better avoid this mess 547 */ 548 /* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE, 549 * are not supported fastopen is currently unsupported 550 */ 551 /* TCP_INQ is currently unsupported, needs some recvmsg work */ 552 } 553 return false; 554 } 555 556 static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, 557 unsigned int optlen) 558 { 559 struct mptcp_subflow_context *subflow; 560 struct sock *sk = (struct sock *)msk; 561 char name[TCP_CA_NAME_MAX]; 562 bool cap_net_admin; 563 int ret; 564 565 if (optlen < 1) 566 return -EINVAL; 567 568 ret = strncpy_from_sockptr(name, optval, 569 min_t(long, TCP_CA_NAME_MAX - 1, optlen)); 570 if (ret < 0) 571 return -EFAULT; 572 573 name[ret] = 0; 574 575 cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); 576 577 ret = 0; 578 lock_sock(sk); 579 sockopt_seq_inc(msk); 580 mptcp_for_each_subflow(msk, subflow) { 581 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 582 int err; 583 584 lock_sock(ssk); 585 err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); 586 if (err < 0 && ret == 0) 587 ret = err; 588 subflow->setsockopt_seq = msk->setsockopt_seq; 589 release_sock(ssk); 590 } 591 592 if (ret == 0) 593 strcpy(msk->ca_name, name); 594 595 release_sock(sk); 596 return ret; 597 } 598 599 static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 600 sockptr_t optval, unsigned int optlen) 601 { 602 switch (optname) { 603 case TCP_ULP: 604 return -EOPNOTSUPP; 605 case TCP_CONGESTION: 606 return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); 607 } 608 609 return -EOPNOTSUPP; 610 } 611 612 int mptcp_setsockopt(struct sock *sk, int level, int optname, 613 sockptr_t optval, unsigned int optlen) 614 { 615 struct mptcp_sock *msk = mptcp_sk(sk); 616 struct sock *ssk; 617 618 pr_debug("msk=%p", msk); 619 620 if (level == SOL_SOCKET) 621 return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); 622 623 if (!mptcp_supported_sockopt(level, optname)) 624 return -ENOPROTOOPT; 625 626 /* @@ the meaning of setsockopt() when the socket is connected and 627 * there are multiple subflows is not yet defined. It is up to the 628 * MPTCP-level socket to configure the subflows until the subflow 629 * is in TCP fallback, when TCP socket options are passed through 630 * to the one remaining subflow. 631 */ 632 lock_sock(sk); 633 ssk = __mptcp_tcp_fallback(msk); 634 release_sock(sk); 635 if (ssk) 636 return tcp_setsockopt(ssk, level, optname, optval, optlen); 637 638 if (level == SOL_IPV6) 639 return mptcp_setsockopt_v6(msk, optname, optval, optlen); 640 641 if (level == SOL_TCP) 642 return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); 643 644 return -EOPNOTSUPP; 645 } 646 647 static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 648 char __user *optval, int __user *optlen) 649 { 650 struct sock *sk = (struct sock *)msk; 651 struct socket *ssock; 652 int ret = -EINVAL; 653 struct sock *ssk; 654 655 lock_sock(sk); 656 ssk = msk->first; 657 if (ssk) { 658 ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 659 goto out; 660 } 661 662 ssock = __mptcp_nmpc_socket(msk); 663 if (!ssock) 664 goto out; 665 666 ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); 667 668 out: 669 release_sock(sk); 670 return ret; 671 } 672 673 static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 674 char __user *optval, int __user *optlen) 675 { 676 switch (optname) { 677 case TCP_ULP: 678 case TCP_CONGESTION: 679 case TCP_INFO: 680 case TCP_CC_INFO: 681 return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, 682 optval, optlen); 683 } 684 return -EOPNOTSUPP; 685 } 686 687 int mptcp_getsockopt(struct sock *sk, int level, int optname, 688 char __user *optval, int __user *option) 689 { 690 struct mptcp_sock *msk = mptcp_sk(sk); 691 struct sock *ssk; 692 693 pr_debug("msk=%p", msk); 694 695 /* @@ the meaning of setsockopt() when the socket is connected and 696 * there are multiple subflows is not yet defined. It is up to the 697 * MPTCP-level socket to configure the subflows until the subflow 698 * is in TCP fallback, when socket options are passed through 699 * to the one remaining subflow. 700 */ 701 lock_sock(sk); 702 ssk = __mptcp_tcp_fallback(msk); 703 release_sock(sk); 704 if (ssk) 705 return tcp_getsockopt(ssk, level, optname, optval, option); 706 707 if (level == SOL_TCP) 708 return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); 709 return -EOPNOTSUPP; 710 } 711 712 static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) 713 { 714 static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; 715 struct sock *sk = (struct sock *)msk; 716 717 if (ssk->sk_prot->keepalive) { 718 if (sock_flag(sk, SOCK_KEEPOPEN)) 719 ssk->sk_prot->keepalive(ssk, 1); 720 else 721 ssk->sk_prot->keepalive(ssk, 0); 722 } 723 724 ssk->sk_priority = sk->sk_priority; 725 ssk->sk_bound_dev_if = sk->sk_bound_dev_if; 726 ssk->sk_incoming_cpu = sk->sk_incoming_cpu; 727 728 if (sk->sk_userlocks & tx_rx_locks) { 729 ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; 730 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) 731 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 732 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 733 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 734 } 735 736 if (sock_flag(sk, SOCK_LINGER)) { 737 ssk->sk_lingertime = sk->sk_lingertime; 738 sock_set_flag(ssk, SOCK_LINGER); 739 } else { 740 sock_reset_flag(ssk, SOCK_LINGER); 741 } 742 743 if (sk->sk_mark != ssk->sk_mark) { 744 ssk->sk_mark = sk->sk_mark; 745 sk_dst_reset(ssk); 746 } 747 748 sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); 749 750 if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) 751 tcp_set_congestion_control(ssk, msk->ca_name, false, true); 752 } 753 754 static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) 755 { 756 bool slow = lock_sock_fast(ssk); 757 758 sync_socket_options(msk, ssk); 759 760 unlock_sock_fast(ssk, slow); 761 } 762 763 void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) 764 { 765 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 766 767 msk_owned_by_me(msk); 768 769 if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { 770 __mptcp_sockopt_sync(msk, ssk); 771 772 subflow->setsockopt_seq = msk->setsockopt_seq; 773 } 774 } 775 776 void mptcp_sockopt_sync_all(struct mptcp_sock *msk) 777 { 778 struct mptcp_subflow_context *subflow; 779 struct sock *sk = (struct sock *)msk; 780 u32 seq; 781 782 seq = sockopt_seq_reset(sk); 783 784 mptcp_for_each_subflow(msk, subflow) { 785 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 786 u32 sseq = READ_ONCE(subflow->setsockopt_seq); 787 788 if (sseq != msk->setsockopt_seq) { 789 __mptcp_sockopt_sync(msk, ssk); 790 WRITE_ONCE(subflow->setsockopt_seq, seq); 791 } else if (sseq != seq) { 792 WRITE_ONCE(subflow->setsockopt_seq, seq); 793 } 794 795 cond_resched(); 796 } 797 798 msk->setsockopt_seq = seq; 799 } 800