1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2025, Matthieu Baerts. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <net/netns/generic.h> 10 11 #include "protocol.h" 12 #include "mib.h" 13 #include "mptcp_pm_gen.h" 14 15 static int pm_nl_pernet_id; 16 17 struct pm_nl_pernet { 18 /* protects pernet updates */ 19 spinlock_t lock; 20 struct list_head local_addr_list; 21 unsigned int addrs; 22 unsigned int stale_loss_cnt; 23 unsigned int add_addr_signal_max; 24 unsigned int add_addr_accept_max; 25 unsigned int local_addr_max; 26 unsigned int subflows_max; 27 unsigned int next_id; 28 DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 29 }; 30 31 #define MPTCP_PM_ADDR_MAX 8 32 33 static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) 34 { 35 return net_generic(net, pm_nl_pernet_id); 36 } 37 38 static struct pm_nl_pernet * 39 pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) 40 { 41 return pm_nl_get_pernet(sock_net((struct sock *)msk)); 42 } 43 44 static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) 45 { 46 return pm_nl_get_pernet(genl_info_net(info)); 47 } 48 49 unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) 50 { 51 const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 52 53 return READ_ONCE(pernet->add_addr_signal_max); 54 } 55 EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); 56 57 unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) 58 { 59 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 60 61 return READ_ONCE(pernet->add_addr_accept_max); 62 } 63 EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); 64 65 unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) 66 { 67 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 68 69 return READ_ONCE(pernet->subflows_max); 70 } 71 EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); 72 73 unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) 74 { 75 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 76 77 return READ_ONCE(pernet->local_addr_max); 78 } 79 EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); 80 81 static bool lookup_subflow_by_daddr(const struct list_head *list, 82 const struct mptcp_addr_info *daddr) 83 { 84 struct mptcp_subflow_context *subflow; 85 struct mptcp_addr_info cur; 86 87 list_for_each_entry(subflow, list, node) { 88 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 89 90 if (!((1 << inet_sk_state_load(ssk)) & 91 (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV))) 92 continue; 93 94 mptcp_remote_address((struct sock_common *)ssk, &cur); 95 if (mptcp_addresses_equal(&cur, daddr, daddr->port)) 96 return true; 97 } 98 99 return false; 100 } 101 102 static bool 103 select_local_address(const struct pm_nl_pernet *pernet, 104 const struct mptcp_sock *msk, 105 struct mptcp_pm_local *new_local) 106 { 107 struct mptcp_pm_addr_entry *entry; 108 bool found = false; 109 110 msk_owned_by_me(msk); 111 112 rcu_read_lock(); 113 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 114 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) 115 continue; 116 117 if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 118 continue; 119 120 new_local->addr = entry->addr; 121 new_local->flags = entry->flags; 122 new_local->ifindex = entry->ifindex; 123 found = true; 124 break; 125 } 126 rcu_read_unlock(); 127 128 return found; 129 } 130 131 static bool 132 select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, 133 struct mptcp_pm_local *new_local) 134 { 135 struct mptcp_pm_addr_entry *entry; 136 bool found = false; 137 138 rcu_read_lock(); 139 /* do not keep any additional per socket state, just signal 140 * the address list in order. 141 * Note: removal from the local address list during the msk life-cycle 142 * can lead to additional addresses not being announced. 143 */ 144 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 145 if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 146 continue; 147 148 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) 149 continue; 150 151 new_local->addr = entry->addr; 152 new_local->flags = entry->flags; 153 new_local->ifindex = entry->ifindex; 154 found = true; 155 break; 156 } 157 rcu_read_unlock(); 158 159 return found; 160 } 161 162 /* Fill all the remote addresses into the array addrs[], 163 * and return the array size. 164 */ 165 static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, 166 struct mptcp_addr_info *local, 167 bool fullmesh, 168 struct mptcp_addr_info *addrs) 169 { 170 bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); 171 struct sock *sk = (struct sock *)msk, *ssk; 172 struct mptcp_subflow_context *subflow; 173 struct mptcp_addr_info remote = { 0 }; 174 unsigned int subflows_max; 175 int i = 0; 176 177 subflows_max = mptcp_pm_get_subflows_max(msk); 178 mptcp_remote_address((struct sock_common *)sk, &remote); 179 180 /* Non-fullmesh endpoint, fill in the single entry 181 * corresponding to the primary MPC subflow remote address 182 */ 183 if (!fullmesh) { 184 if (deny_id0) 185 return 0; 186 187 if (!mptcp_pm_addr_families_match(sk, local, &remote)) 188 return 0; 189 190 msk->pm.subflows++; 191 addrs[i++] = remote; 192 } else { 193 DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 194 195 /* Forbid creation of new subflows matching existing 196 * ones, possibly already created by incoming ADD_ADDR 197 */ 198 bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 199 mptcp_for_each_subflow(msk, subflow) 200 if (READ_ONCE(subflow->local_id) == local->id) 201 __set_bit(subflow->remote_id, unavail_id); 202 203 mptcp_for_each_subflow(msk, subflow) { 204 ssk = mptcp_subflow_tcp_sock(subflow); 205 mptcp_remote_address((struct sock_common *)ssk, &addrs[i]); 206 addrs[i].id = READ_ONCE(subflow->remote_id); 207 if (deny_id0 && !addrs[i].id) 208 continue; 209 210 if (test_bit(addrs[i].id, unavail_id)) 211 continue; 212 213 if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) 214 continue; 215 216 if (msk->pm.subflows < subflows_max) { 217 /* forbid creating multiple address towards 218 * this id 219 */ 220 __set_bit(addrs[i].id, unavail_id); 221 msk->pm.subflows++; 222 i++; 223 } 224 } 225 } 226 227 return i; 228 } 229 230 static struct mptcp_pm_addr_entry * 231 __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) 232 { 233 struct mptcp_pm_addr_entry *entry; 234 235 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, 236 lockdep_is_held(&pernet->lock)) { 237 if (entry->addr.id == id) 238 return entry; 239 } 240 return NULL; 241 } 242 243 static struct mptcp_pm_addr_entry * 244 __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) 245 { 246 struct mptcp_pm_addr_entry *entry; 247 248 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, 249 lockdep_is_held(&pernet->lock)) { 250 if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) 251 return entry; 252 } 253 return NULL; 254 } 255 256 static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) 257 { 258 struct sock *sk = (struct sock *)msk; 259 unsigned int add_addr_signal_max; 260 bool signal_and_subflow = false; 261 unsigned int local_addr_max; 262 struct pm_nl_pernet *pernet; 263 struct mptcp_pm_local local; 264 unsigned int subflows_max; 265 266 pernet = pm_nl_get_pernet(sock_net(sk)); 267 268 add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); 269 local_addr_max = mptcp_pm_get_local_addr_max(msk); 270 subflows_max = mptcp_pm_get_subflows_max(msk); 271 272 /* do lazy endpoint usage accounting for the MPC subflows */ 273 if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { 274 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first); 275 struct mptcp_pm_addr_entry *entry; 276 struct mptcp_addr_info mpc_addr; 277 bool backup = false; 278 279 mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); 280 rcu_read_lock(); 281 entry = __lookup_addr(pernet, &mpc_addr); 282 if (entry) { 283 __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); 284 msk->mpc_endpoint_id = entry->addr.id; 285 backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 286 } 287 rcu_read_unlock(); 288 289 if (backup) 290 mptcp_pm_send_ack(msk, subflow, true, backup); 291 292 msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); 293 } 294 295 pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", 296 msk->pm.local_addr_used, local_addr_max, 297 msk->pm.add_addr_signaled, add_addr_signal_max, 298 msk->pm.subflows, subflows_max); 299 300 /* check first for announce */ 301 if (msk->pm.add_addr_signaled < add_addr_signal_max) { 302 /* due to racing events on both ends we can reach here while 303 * previous add address is still running: if we invoke now 304 * mptcp_pm_announce_addr(), that will fail and the 305 * corresponding id will be marked as used. 306 * Instead let the PM machinery reschedule us when the 307 * current address announce will be completed. 308 */ 309 if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) 310 return; 311 312 if (!select_signal_address(pernet, msk, &local)) 313 goto subflow; 314 315 /* If the alloc fails, we are on memory pressure, not worth 316 * continuing, and trying to create subflows. 317 */ 318 if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) 319 return; 320 321 __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); 322 msk->pm.add_addr_signaled++; 323 324 /* Special case for ID0: set the correct ID */ 325 if (local.addr.id == msk->mpc_endpoint_id) 326 local.addr.id = 0; 327 328 mptcp_pm_announce_addr(msk, &local.addr, false); 329 mptcp_pm_addr_send_ack(msk); 330 331 if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) 332 signal_and_subflow = true; 333 } 334 335 subflow: 336 /* check if should create a new subflow */ 337 while (msk->pm.local_addr_used < local_addr_max && 338 msk->pm.subflows < subflows_max) { 339 struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; 340 bool fullmesh; 341 int i, nr; 342 343 if (signal_and_subflow) 344 signal_and_subflow = false; 345 else if (!select_local_address(pernet, msk, &local)) 346 break; 347 348 fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); 349 350 __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); 351 352 /* Special case for ID0: set the correct ID */ 353 if (local.addr.id == msk->mpc_endpoint_id) 354 local.addr.id = 0; 355 else /* local_addr_used is not decr for ID 0 */ 356 msk->pm.local_addr_used++; 357 358 nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); 359 if (nr == 0) 360 continue; 361 362 spin_unlock_bh(&msk->pm.lock); 363 for (i = 0; i < nr; i++) 364 __mptcp_subflow_connect(sk, &local, &addrs[i]); 365 spin_lock_bh(&msk->pm.lock); 366 } 367 mptcp_pm_nl_check_work_pending(msk); 368 } 369 370 static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) 371 { 372 mptcp_pm_create_subflow_or_signal_addr(msk); 373 } 374 375 static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) 376 { 377 mptcp_pm_create_subflow_or_signal_addr(msk); 378 } 379 380 /* Fill all the local addresses into the array addrs[], 381 * and return the array size. 382 */ 383 static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, 384 struct mptcp_addr_info *remote, 385 struct mptcp_pm_local *locals) 386 { 387 struct sock *sk = (struct sock *)msk; 388 struct mptcp_pm_addr_entry *entry; 389 struct mptcp_addr_info mpc_addr; 390 struct pm_nl_pernet *pernet; 391 unsigned int subflows_max; 392 int i = 0; 393 394 pernet = pm_nl_get_pernet_from_msk(msk); 395 subflows_max = mptcp_pm_get_subflows_max(msk); 396 397 mptcp_local_address((struct sock_common *)msk, &mpc_addr); 398 399 rcu_read_lock(); 400 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 401 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) 402 continue; 403 404 if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) 405 continue; 406 407 if (msk->pm.subflows < subflows_max) { 408 locals[i].addr = entry->addr; 409 locals[i].flags = entry->flags; 410 locals[i].ifindex = entry->ifindex; 411 412 /* Special case for ID0: set the correct ID */ 413 if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port)) 414 locals[i].addr.id = 0; 415 416 msk->pm.subflows++; 417 i++; 418 } 419 } 420 rcu_read_unlock(); 421 422 /* If the array is empty, fill in the single 423 * 'IPADDRANY' local address 424 */ 425 if (!i) { 426 memset(&locals[i], 0, sizeof(locals[i])); 427 locals[i].addr.family = 428 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 429 remote->family == AF_INET6 && 430 ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : 431 #endif 432 remote->family; 433 434 if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote)) 435 return 0; 436 437 msk->pm.subflows++; 438 i++; 439 } 440 441 return i; 442 } 443 444 static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) 445 { 446 struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX]; 447 struct sock *sk = (struct sock *)msk; 448 unsigned int add_addr_accept_max; 449 struct mptcp_addr_info remote; 450 unsigned int subflows_max; 451 bool sf_created = false; 452 int i, nr; 453 454 add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); 455 subflows_max = mptcp_pm_get_subflows_max(msk); 456 457 pr_debug("accepted %d:%d remote family %d\n", 458 msk->pm.add_addr_accepted, add_addr_accept_max, 459 msk->pm.remote.family); 460 461 remote = msk->pm.remote; 462 mptcp_pm_announce_addr(msk, &remote, true); 463 mptcp_pm_addr_send_ack(msk); 464 465 if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) 466 return; 467 468 /* pick id 0 port, if none is provided the remote address */ 469 if (!remote.port) 470 remote.port = sk->sk_dport; 471 472 /* connect to the specified remote address, using whatever 473 * local address the routing configuration will pick. 474 */ 475 nr = fill_local_addresses_vec(msk, &remote, locals); 476 if (nr == 0) 477 return; 478 479 spin_unlock_bh(&msk->pm.lock); 480 for (i = 0; i < nr; i++) 481 if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0) 482 sf_created = true; 483 spin_lock_bh(&msk->pm.lock); 484 485 if (sf_created) { 486 /* add_addr_accepted is not decr for ID 0 */ 487 if (remote.id) 488 msk->pm.add_addr_accepted++; 489 if (msk->pm.add_addr_accepted >= add_addr_accept_max || 490 msk->pm.subflows >= subflows_max) 491 WRITE_ONCE(msk->pm.accept_addr, false); 492 } 493 } 494 495 void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id) 496 { 497 if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { 498 /* Note: if the subflow has been closed before, this 499 * add_addr_accepted counter will not be decremented. 500 */ 501 if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) 502 WRITE_ONCE(msk->pm.accept_addr, true); 503 } 504 } 505 506 static bool address_use_port(struct mptcp_pm_addr_entry *entry) 507 { 508 return (entry->flags & 509 (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == 510 MPTCP_PM_ADDR_FLAG_SIGNAL; 511 } 512 513 /* caller must ensure the RCU grace period is already elapsed */ 514 static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) 515 { 516 if (entry->lsk) 517 sock_release(entry->lsk); 518 kfree(entry); 519 } 520 521 static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, 522 struct mptcp_pm_addr_entry *entry, 523 bool needs_id, bool replace) 524 { 525 struct mptcp_pm_addr_entry *cur, *del_entry = NULL; 526 unsigned int addr_max; 527 int ret = -EINVAL; 528 529 spin_lock_bh(&pernet->lock); 530 /* to keep the code simple, don't do IDR-like allocation for address ID, 531 * just bail when we exceed limits 532 */ 533 if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) 534 pernet->next_id = 1; 535 if (pernet->addrs >= MPTCP_PM_ADDR_MAX) { 536 ret = -ERANGE; 537 goto out; 538 } 539 if (test_bit(entry->addr.id, pernet->id_bitmap)) { 540 ret = -EBUSY; 541 goto out; 542 } 543 544 /* do not insert duplicate address, differentiate on port only 545 * singled addresses 546 */ 547 if (!address_use_port(entry)) 548 entry->addr.port = 0; 549 list_for_each_entry(cur, &pernet->local_addr_list, list) { 550 if (mptcp_addresses_equal(&cur->addr, &entry->addr, 551 cur->addr.port || entry->addr.port)) { 552 /* allow replacing the exiting endpoint only if such 553 * endpoint is an implicit one and the user-space 554 * did not provide an endpoint id 555 */ 556 if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) { 557 ret = -EEXIST; 558 goto out; 559 } 560 if (entry->addr.id) 561 goto out; 562 563 /* allow callers that only need to look up the local 564 * addr's id to skip replacement. This allows them to 565 * avoid calling synchronize_rcu in the packet recv 566 * path. 567 */ 568 if (!replace) { 569 kfree(entry); 570 ret = cur->addr.id; 571 goto out; 572 } 573 574 pernet->addrs--; 575 entry->addr.id = cur->addr.id; 576 list_del_rcu(&cur->list); 577 del_entry = cur; 578 break; 579 } 580 } 581 582 if (!entry->addr.id && needs_id) { 583 find_next: 584 entry->addr.id = find_next_zero_bit(pernet->id_bitmap, 585 MPTCP_PM_MAX_ADDR_ID + 1, 586 pernet->next_id); 587 if (!entry->addr.id && pernet->next_id != 1) { 588 pernet->next_id = 1; 589 goto find_next; 590 } 591 } 592 593 if (!entry->addr.id && needs_id) 594 goto out; 595 596 __set_bit(entry->addr.id, pernet->id_bitmap); 597 if (entry->addr.id > pernet->next_id) 598 pernet->next_id = entry->addr.id; 599 600 if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 601 addr_max = pernet->add_addr_signal_max; 602 WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); 603 } 604 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 605 addr_max = pernet->local_addr_max; 606 WRITE_ONCE(pernet->local_addr_max, addr_max + 1); 607 } 608 609 pernet->addrs++; 610 if (!entry->addr.port) 611 list_add_tail_rcu(&entry->list, &pernet->local_addr_list); 612 else 613 list_add_rcu(&entry->list, &pernet->local_addr_list); 614 ret = entry->addr.id; 615 616 out: 617 spin_unlock_bh(&pernet->lock); 618 619 /* just replaced an existing entry, free it */ 620 if (del_entry) { 621 synchronize_rcu(); 622 __mptcp_pm_release_addr_entry(del_entry); 623 } 624 return ret; 625 } 626 627 static struct lock_class_key mptcp_slock_keys[2]; 628 static struct lock_class_key mptcp_keys[2]; 629 630 static int mptcp_pm_nl_create_listen_socket(struct sock *sk, 631 struct mptcp_pm_addr_entry *entry) 632 { 633 bool is_ipv6 = sk->sk_family == AF_INET6; 634 int addrlen = sizeof(struct sockaddr_in); 635 struct sockaddr_storage addr; 636 struct sock *newsk, *ssk; 637 int backlog = 1024; 638 int err; 639 640 err = sock_create_kern(sock_net(sk), entry->addr.family, 641 SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); 642 if (err) 643 return err; 644 645 newsk = entry->lsk->sk; 646 if (!newsk) 647 return -EINVAL; 648 649 /* The subflow socket lock is acquired in a nested to the msk one 650 * in several places, even by the TCP stack, and this msk is a kernel 651 * socket: lockdep complains. Instead of propagating the _nested 652 * modifiers in several places, re-init the lock class for the msk 653 * socket to an mptcp specific one. 654 */ 655 sock_lock_init_class_and_name(newsk, 656 is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", 657 &mptcp_slock_keys[is_ipv6], 658 is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", 659 &mptcp_keys[is_ipv6]); 660 661 lock_sock(newsk); 662 ssk = __mptcp_nmpc_sk(mptcp_sk(newsk)); 663 release_sock(newsk); 664 if (IS_ERR(ssk)) 665 return PTR_ERR(ssk); 666 667 mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); 668 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 669 if (entry->addr.family == AF_INET6) 670 addrlen = sizeof(struct sockaddr_in6); 671 #endif 672 if (ssk->sk_family == AF_INET) 673 err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 674 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 675 else if (ssk->sk_family == AF_INET6) 676 err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 677 #endif 678 if (err) 679 return err; 680 681 /* We don't use mptcp_set_state() here because it needs to be called 682 * under the msk socket lock. For the moment, that will not bring 683 * anything more than only calling inet_sk_state_store(), because the 684 * old status is known (TCP_CLOSE). 685 */ 686 inet_sk_state_store(newsk, TCP_LISTEN); 687 lock_sock(ssk); 688 WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); 689 err = __inet_listen_sk(ssk, backlog); 690 if (!err) 691 mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); 692 release_sock(ssk); 693 return err; 694 } 695 696 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, 697 struct mptcp_pm_addr_entry *skc) 698 { 699 struct mptcp_pm_addr_entry *entry; 700 struct pm_nl_pernet *pernet; 701 int ret; 702 703 pernet = pm_nl_get_pernet_from_msk(msk); 704 705 rcu_read_lock(); 706 entry = __lookup_addr(pernet, &skc->addr); 707 ret = entry ? entry->addr.id : -1; 708 rcu_read_unlock(); 709 if (ret >= 0) 710 return ret; 711 712 /* address not found, add to local list */ 713 entry = kmemdup(skc, sizeof(*skc), GFP_ATOMIC); 714 if (!entry) 715 return -ENOMEM; 716 717 entry->addr.port = 0; 718 ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); 719 if (ret < 0) 720 kfree(entry); 721 722 return ret; 723 } 724 725 bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) 726 { 727 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 728 struct mptcp_pm_addr_entry *entry; 729 bool backup; 730 731 rcu_read_lock(); 732 entry = __lookup_addr(pernet, skc); 733 backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 734 rcu_read_unlock(); 735 736 return backup; 737 } 738 739 static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, 740 struct mptcp_addr_info *addr) 741 { 742 struct mptcp_sock *msk; 743 long s_slot = 0, s_num = 0; 744 745 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 746 struct sock *sk = (struct sock *)msk; 747 struct mptcp_addr_info mpc_addr; 748 749 if (!READ_ONCE(msk->fully_established) || 750 mptcp_pm_is_userspace(msk)) 751 goto next; 752 753 /* if the endp linked to the init sf is re-added with a != ID */ 754 mptcp_local_address((struct sock_common *)msk, &mpc_addr); 755 756 lock_sock(sk); 757 spin_lock_bh(&msk->pm.lock); 758 if (mptcp_addresses_equal(addr, &mpc_addr, addr->port)) 759 msk->mpc_endpoint_id = addr->id; 760 mptcp_pm_create_subflow_or_signal_addr(msk); 761 spin_unlock_bh(&msk->pm.lock); 762 release_sock(sk); 763 764 next: 765 sock_put(sk); 766 cond_resched(); 767 } 768 769 return 0; 770 } 771 772 static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, 773 struct genl_info *info) 774 { 775 struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; 776 777 if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, 778 mptcp_pm_address_nl_policy, info->extack) && 779 tb[MPTCP_PM_ADDR_ATTR_ID]) 780 return true; 781 return false; 782 } 783 784 /* Add an MPTCP endpoint */ 785 int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) 786 { 787 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 788 struct mptcp_pm_addr_entry addr, *entry; 789 struct nlattr *attr; 790 int ret; 791 792 if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) 793 return -EINVAL; 794 795 attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; 796 ret = mptcp_pm_parse_entry(attr, info, true, &addr); 797 if (ret < 0) 798 return ret; 799 800 if (addr.addr.port && !address_use_port(&addr)) { 801 NL_SET_ERR_MSG_ATTR(info->extack, attr, 802 "flags must have signal and not subflow when using port"); 803 return -EINVAL; 804 } 805 806 if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && 807 addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { 808 NL_SET_ERR_MSG_ATTR(info->extack, attr, 809 "flags mustn't have both signal and fullmesh"); 810 return -EINVAL; 811 } 812 813 if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { 814 NL_SET_ERR_MSG_ATTR(info->extack, attr, 815 "can't create IMPLICIT endpoint"); 816 return -EINVAL; 817 } 818 819 entry = kmemdup(&addr, sizeof(addr), GFP_KERNEL_ACCOUNT); 820 if (!entry) { 821 GENL_SET_ERR_MSG(info, "can't allocate addr"); 822 return -ENOMEM; 823 } 824 825 if (entry->addr.port) { 826 ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); 827 if (ret) { 828 GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret); 829 goto out_free; 830 } 831 } 832 ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, 833 !mptcp_pm_has_addr_attr_id(attr, info), 834 true); 835 if (ret < 0) { 836 GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); 837 goto out_free; 838 } 839 840 mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr); 841 return 0; 842 843 out_free: 844 __mptcp_pm_release_addr_entry(entry); 845 return ret; 846 } 847 848 static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk, 849 const struct mptcp_addr_info *addr) 850 { 851 return msk->mpc_endpoint_id == addr->id ? 0 : addr->id; 852 } 853 854 static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, 855 const struct mptcp_addr_info *addr, 856 bool force) 857 { 858 struct mptcp_rm_list list = { .nr = 0 }; 859 bool ret; 860 861 list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); 862 863 ret = mptcp_remove_anno_list_by_saddr(msk, addr); 864 if (ret || force) { 865 spin_lock_bh(&msk->pm.lock); 866 if (ret) { 867 __set_bit(addr->id, msk->pm.id_avail_bitmap); 868 msk->pm.add_addr_signaled--; 869 } 870 mptcp_pm_remove_addr(msk, &list); 871 spin_unlock_bh(&msk->pm.lock); 872 } 873 return ret; 874 } 875 876 static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) 877 { 878 /* If it was marked as used, and not ID 0, decrement local_addr_used */ 879 if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && 880 id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) 881 msk->pm.local_addr_used--; 882 } 883 884 static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, 885 const struct mptcp_pm_addr_entry *entry) 886 { 887 const struct mptcp_addr_info *addr = &entry->addr; 888 struct mptcp_rm_list list = { .nr = 1 }; 889 long s_slot = 0, s_num = 0; 890 struct mptcp_sock *msk; 891 892 pr_debug("remove_id=%d\n", addr->id); 893 894 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 895 struct sock *sk = (struct sock *)msk; 896 bool remove_subflow; 897 898 if (mptcp_pm_is_userspace(msk)) 899 goto next; 900 901 lock_sock(sk); 902 remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); 903 mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && 904 !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); 905 906 list.ids[0] = mptcp_endp_get_local_id(msk, addr); 907 if (remove_subflow) { 908 spin_lock_bh(&msk->pm.lock); 909 mptcp_pm_rm_subflow(msk, &list); 910 spin_unlock_bh(&msk->pm.lock); 911 } 912 913 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 914 spin_lock_bh(&msk->pm.lock); 915 __mark_subflow_endp_available(msk, list.ids[0]); 916 spin_unlock_bh(&msk->pm.lock); 917 } 918 919 if (msk->mpc_endpoint_id == entry->addr.id) 920 msk->mpc_endpoint_id = 0; 921 release_sock(sk); 922 923 next: 924 sock_put(sk); 925 cond_resched(); 926 } 927 928 return 0; 929 } 930 931 static int mptcp_nl_remove_id_zero_address(struct net *net, 932 struct mptcp_addr_info *addr) 933 { 934 struct mptcp_rm_list list = { .nr = 0 }; 935 long s_slot = 0, s_num = 0; 936 struct mptcp_sock *msk; 937 938 list.ids[list.nr++] = 0; 939 940 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 941 struct sock *sk = (struct sock *)msk; 942 struct mptcp_addr_info msk_local; 943 944 if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 945 goto next; 946 947 mptcp_local_address((struct sock_common *)msk, &msk_local); 948 if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) 949 goto next; 950 951 lock_sock(sk); 952 spin_lock_bh(&msk->pm.lock); 953 mptcp_pm_remove_addr(msk, &list); 954 mptcp_pm_rm_subflow(msk, &list); 955 __mark_subflow_endp_available(msk, 0); 956 spin_unlock_bh(&msk->pm.lock); 957 release_sock(sk); 958 959 next: 960 sock_put(sk); 961 cond_resched(); 962 } 963 964 return 0; 965 } 966 967 /* Remove an MPTCP endpoint */ 968 int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) 969 { 970 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 971 struct mptcp_pm_addr_entry addr, *entry; 972 unsigned int addr_max; 973 struct nlattr *attr; 974 int ret; 975 976 if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) 977 return -EINVAL; 978 979 attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; 980 ret = mptcp_pm_parse_entry(attr, info, false, &addr); 981 if (ret < 0) 982 return ret; 983 984 /* the zero id address is special: the first address used by the msk 985 * always gets such an id, so different subflows can have different zero 986 * id addresses. Additionally zero id is not accounted for in id_bitmap. 987 * Let's use an 'mptcp_rm_list' instead of the common remove code. 988 */ 989 if (addr.addr.id == 0) 990 return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); 991 992 spin_lock_bh(&pernet->lock); 993 entry = __lookup_addr_by_id(pernet, addr.addr.id); 994 if (!entry) { 995 NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); 996 spin_unlock_bh(&pernet->lock); 997 return -EINVAL; 998 } 999 if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 1000 addr_max = pernet->add_addr_signal_max; 1001 WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); 1002 } 1003 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 1004 addr_max = pernet->local_addr_max; 1005 WRITE_ONCE(pernet->local_addr_max, addr_max - 1); 1006 } 1007 1008 pernet->addrs--; 1009 list_del_rcu(&entry->list); 1010 __clear_bit(entry->addr.id, pernet->id_bitmap); 1011 spin_unlock_bh(&pernet->lock); 1012 1013 mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry); 1014 synchronize_rcu(); 1015 __mptcp_pm_release_addr_entry(entry); 1016 1017 return ret; 1018 } 1019 1020 static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, 1021 struct list_head *rm_list) 1022 { 1023 struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; 1024 struct mptcp_pm_addr_entry *entry; 1025 1026 list_for_each_entry(entry, rm_list, list) { 1027 if (slist.nr < MPTCP_RM_IDS_MAX && 1028 mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) 1029 slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); 1030 1031 if (alist.nr < MPTCP_RM_IDS_MAX && 1032 mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) 1033 alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); 1034 } 1035 1036 spin_lock_bh(&msk->pm.lock); 1037 if (alist.nr) { 1038 msk->pm.add_addr_signaled -= alist.nr; 1039 mptcp_pm_remove_addr(msk, &alist); 1040 } 1041 if (slist.nr) 1042 mptcp_pm_rm_subflow(msk, &slist); 1043 /* Reset counters: maybe some subflows have been removed before */ 1044 bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1045 msk->pm.local_addr_used = 0; 1046 spin_unlock_bh(&msk->pm.lock); 1047 } 1048 1049 static void mptcp_nl_flush_addrs_list(struct net *net, 1050 struct list_head *rm_list) 1051 { 1052 long s_slot = 0, s_num = 0; 1053 struct mptcp_sock *msk; 1054 1055 if (list_empty(rm_list)) 1056 return; 1057 1058 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1059 struct sock *sk = (struct sock *)msk; 1060 1061 if (!mptcp_pm_is_userspace(msk)) { 1062 lock_sock(sk); 1063 mptcp_pm_flush_addrs_and_subflows(msk, rm_list); 1064 release_sock(sk); 1065 } 1066 1067 sock_put(sk); 1068 cond_resched(); 1069 } 1070 } 1071 1072 /* caller must ensure the RCU grace period is already elapsed */ 1073 static void __flush_addrs(struct list_head *list) 1074 { 1075 while (!list_empty(list)) { 1076 struct mptcp_pm_addr_entry *cur; 1077 1078 cur = list_entry(list->next, 1079 struct mptcp_pm_addr_entry, list); 1080 list_del_rcu(&cur->list); 1081 __mptcp_pm_release_addr_entry(cur); 1082 } 1083 } 1084 1085 static void __reset_counters(struct pm_nl_pernet *pernet) 1086 { 1087 WRITE_ONCE(pernet->add_addr_signal_max, 0); 1088 WRITE_ONCE(pernet->add_addr_accept_max, 0); 1089 WRITE_ONCE(pernet->local_addr_max, 0); 1090 pernet->addrs = 0; 1091 } 1092 1093 int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) 1094 { 1095 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1096 LIST_HEAD(free_list); 1097 1098 spin_lock_bh(&pernet->lock); 1099 list_splice_init(&pernet->local_addr_list, &free_list); 1100 __reset_counters(pernet); 1101 pernet->next_id = 1; 1102 bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1103 spin_unlock_bh(&pernet->lock); 1104 mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list); 1105 synchronize_rcu(); 1106 __flush_addrs(&free_list); 1107 return 0; 1108 } 1109 1110 int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, 1111 struct genl_info *info) 1112 { 1113 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1114 struct mptcp_pm_addr_entry *entry; 1115 int ret = -EINVAL; 1116 1117 rcu_read_lock(); 1118 entry = __lookup_addr_by_id(pernet, id); 1119 if (entry) { 1120 *addr = *entry; 1121 ret = 0; 1122 } 1123 rcu_read_unlock(); 1124 1125 return ret; 1126 } 1127 1128 int mptcp_pm_nl_dump_addr(struct sk_buff *msg, 1129 struct netlink_callback *cb) 1130 { 1131 struct net *net = sock_net(msg->sk); 1132 struct mptcp_pm_addr_entry *entry; 1133 struct pm_nl_pernet *pernet; 1134 int id = cb->args[0]; 1135 int i; 1136 1137 pernet = pm_nl_get_pernet(net); 1138 1139 rcu_read_lock(); 1140 for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { 1141 if (test_bit(i, pernet->id_bitmap)) { 1142 entry = __lookup_addr_by_id(pernet, i); 1143 if (!entry) 1144 break; 1145 1146 if (entry->addr.id <= id) 1147 continue; 1148 1149 if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) 1150 break; 1151 1152 id = entry->addr.id; 1153 } 1154 } 1155 rcu_read_unlock(); 1156 1157 cb->args[0] = id; 1158 return msg->len; 1159 } 1160 1161 static int parse_limit(struct genl_info *info, int id, unsigned int *limit) 1162 { 1163 struct nlattr *attr = info->attrs[id]; 1164 1165 if (!attr) 1166 return 0; 1167 1168 *limit = nla_get_u32(attr); 1169 if (*limit > MPTCP_PM_ADDR_MAX) { 1170 NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, 1171 "limit greater than maximum (%u)", 1172 MPTCP_PM_ADDR_MAX); 1173 return -EINVAL; 1174 } 1175 return 0; 1176 } 1177 1178 int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) 1179 { 1180 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1181 unsigned int rcv_addrs, subflows; 1182 int ret; 1183 1184 spin_lock_bh(&pernet->lock); 1185 rcv_addrs = pernet->add_addr_accept_max; 1186 ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs); 1187 if (ret) 1188 goto unlock; 1189 1190 subflows = pernet->subflows_max; 1191 ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows); 1192 if (ret) 1193 goto unlock; 1194 1195 WRITE_ONCE(pernet->add_addr_accept_max, rcv_addrs); 1196 WRITE_ONCE(pernet->subflows_max, subflows); 1197 1198 unlock: 1199 spin_unlock_bh(&pernet->lock); 1200 return ret; 1201 } 1202 1203 int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) 1204 { 1205 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1206 struct sk_buff *msg; 1207 void *reply; 1208 1209 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1210 if (!msg) 1211 return -ENOMEM; 1212 1213 reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, 1214 MPTCP_PM_CMD_GET_LIMITS); 1215 if (!reply) 1216 goto fail; 1217 1218 if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS, 1219 READ_ONCE(pernet->add_addr_accept_max))) 1220 goto fail; 1221 1222 if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS, 1223 READ_ONCE(pernet->subflows_max))) 1224 goto fail; 1225 1226 genlmsg_end(msg, reply); 1227 return genlmsg_reply(msg, info); 1228 1229 fail: 1230 GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); 1231 nlmsg_free(msg); 1232 return -EMSGSIZE; 1233 } 1234 1235 static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, 1236 struct mptcp_addr_info *addr) 1237 { 1238 struct mptcp_rm_list list = { .nr = 0 }; 1239 1240 list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); 1241 1242 spin_lock_bh(&msk->pm.lock); 1243 mptcp_pm_rm_subflow(msk, &list); 1244 __mark_subflow_endp_available(msk, list.ids[0]); 1245 mptcp_pm_create_subflow_or_signal_addr(msk); 1246 spin_unlock_bh(&msk->pm.lock); 1247 } 1248 1249 static void mptcp_pm_nl_set_flags_all(struct net *net, 1250 struct mptcp_pm_addr_entry *local, 1251 u8 changed) 1252 { 1253 u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW); 1254 u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 1255 long s_slot = 0, s_num = 0; 1256 struct mptcp_sock *msk; 1257 1258 if (changed == MPTCP_PM_ADDR_FLAG_FULLMESH && !is_subflow) 1259 return; 1260 1261 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1262 struct sock *sk = (struct sock *)msk; 1263 1264 if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 1265 goto next; 1266 1267 lock_sock(sk); 1268 if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) 1269 mptcp_pm_mp_prio_send_ack(msk, &local->addr, NULL, bkup); 1270 /* Subflows will only be recreated if the SUBFLOW flag is set */ 1271 if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)) 1272 mptcp_pm_nl_fullmesh(msk, &local->addr); 1273 release_sock(sk); 1274 1275 next: 1276 sock_put(sk); 1277 cond_resched(); 1278 } 1279 } 1280 1281 int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, 1282 struct genl_info *info) 1283 { 1284 struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 1285 u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | 1286 MPTCP_PM_ADDR_FLAG_FULLMESH; 1287 struct net *net = genl_info_net(info); 1288 struct mptcp_pm_addr_entry *entry; 1289 struct pm_nl_pernet *pernet; 1290 u8 lookup_by_id = 0; 1291 1292 pernet = pm_nl_get_pernet(net); 1293 1294 if (local->addr.family == AF_UNSPEC) { 1295 lookup_by_id = 1; 1296 if (!local->addr.id) { 1297 NL_SET_ERR_MSG_ATTR(info->extack, attr, 1298 "missing address ID"); 1299 return -EOPNOTSUPP; 1300 } 1301 } 1302 1303 spin_lock_bh(&pernet->lock); 1304 entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) : 1305 __lookup_addr(pernet, &local->addr); 1306 if (!entry) { 1307 spin_unlock_bh(&pernet->lock); 1308 NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); 1309 return -EINVAL; 1310 } 1311 if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && 1312 (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | 1313 MPTCP_PM_ADDR_FLAG_IMPLICIT))) { 1314 spin_unlock_bh(&pernet->lock); 1315 NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags"); 1316 return -EINVAL; 1317 } 1318 1319 changed = (local->flags ^ entry->flags) & mask; 1320 entry->flags = (entry->flags & ~mask) | (local->flags & mask); 1321 *local = *entry; 1322 spin_unlock_bh(&pernet->lock); 1323 1324 mptcp_pm_nl_set_flags_all(net, local, changed); 1325 return 0; 1326 } 1327 1328 bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) 1329 { 1330 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 1331 1332 if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || 1333 (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, 1334 MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { 1335 WRITE_ONCE(msk->pm.work_pending, false); 1336 return false; 1337 } 1338 return true; 1339 } 1340 1341 /* Called under PM lock */ 1342 void __mptcp_pm_kernel_worker(struct mptcp_sock *msk) 1343 { 1344 struct mptcp_pm_data *pm = &msk->pm; 1345 1346 if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { 1347 pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); 1348 mptcp_pm_nl_add_addr_received(msk); 1349 } 1350 if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { 1351 pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); 1352 mptcp_pm_nl_fully_established(msk); 1353 } 1354 if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { 1355 pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); 1356 mptcp_pm_nl_subflow_established(msk); 1357 } 1358 } 1359 1360 static int __net_init pm_nl_init_net(struct net *net) 1361 { 1362 struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 1363 1364 INIT_LIST_HEAD_RCU(&pernet->local_addr_list); 1365 1366 /* Cit. 2 subflows ought to be enough for anybody. */ 1367 pernet->subflows_max = 2; 1368 pernet->next_id = 1; 1369 pernet->stale_loss_cnt = 4; 1370 spin_lock_init(&pernet->lock); 1371 1372 /* No need to initialize other pernet fields, the struct is zeroed at 1373 * allocation time. 1374 */ 1375 1376 return 0; 1377 } 1378 1379 static void __net_exit pm_nl_exit_net(struct list_head *net_list) 1380 { 1381 struct net *net; 1382 1383 list_for_each_entry(net, net_list, exit_list) { 1384 struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 1385 1386 /* net is removed from namespace list, can't race with 1387 * other modifiers, also netns core already waited for a 1388 * RCU grace period. 1389 */ 1390 __flush_addrs(&pernet->local_addr_list); 1391 } 1392 } 1393 1394 static struct pernet_operations mptcp_pm_pernet_ops = { 1395 .init = pm_nl_init_net, 1396 .exit_batch = pm_nl_exit_net, 1397 .id = &pm_nl_pernet_id, 1398 .size = sizeof(struct pm_nl_pernet), 1399 }; 1400 1401 struct mptcp_pm_ops mptcp_pm_kernel = { 1402 .name = "kernel", 1403 .owner = THIS_MODULE, 1404 }; 1405 1406 void __init mptcp_pm_kernel_register(void) 1407 { 1408 if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) 1409 panic("Failed to register MPTCP PM pernet subsystem.\n"); 1410 1411 mptcp_pm_register(&mptcp_pm_kernel); 1412 } 1413