1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2025, Matthieu Baerts. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <net/netns/generic.h> 10 11 #include "protocol.h" 12 #include "mib.h" 13 #include "mptcp_pm_gen.h" 14 15 static int pm_nl_pernet_id; 16 17 struct pm_nl_pernet { 18 /* protects pernet updates */ 19 spinlock_t lock; 20 struct list_head endp_list; 21 u8 endpoints; 22 u8 endp_signal_max; 23 u8 endp_subflow_max; 24 u8 endp_laminar_max; 25 u8 limit_add_addr_accepted; 26 u8 limit_extra_subflows; 27 u8 next_id; 28 DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 29 }; 30 31 #define MPTCP_PM_ADDR_MAX 8 32 33 static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) 34 { 35 return net_generic(net, pm_nl_pernet_id); 36 } 37 38 static struct pm_nl_pernet * 39 pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) 40 { 41 return pm_nl_get_pernet(sock_net((struct sock *)msk)); 42 } 43 44 static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) 45 { 46 return pm_nl_get_pernet(genl_info_net(info)); 47 } 48 49 u8 mptcp_pm_get_endp_signal_max(const struct mptcp_sock *msk) 50 { 51 const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 52 53 return READ_ONCE(pernet->endp_signal_max); 54 } 55 EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_signal_max); 56 57 u8 mptcp_pm_get_endp_subflow_max(const struct mptcp_sock *msk) 58 { 59 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 60 61 return READ_ONCE(pernet->endp_subflow_max); 62 } 63 EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_subflow_max); 64 65 u8 mptcp_pm_get_endp_laminar_max(const struct mptcp_sock *msk) 66 { 67 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 68 69 return READ_ONCE(pernet->endp_laminar_max); 70 } 71 EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_laminar_max); 72 73 u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk) 74 { 75 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 76 77 return READ_ONCE(pernet->limit_add_addr_accepted); 78 } 79 EXPORT_SYMBOL_GPL(mptcp_pm_get_limit_add_addr_accepted); 80 81 u8 mptcp_pm_get_limit_extra_subflows(const struct mptcp_sock *msk) 82 { 83 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 84 85 return READ_ONCE(pernet->limit_extra_subflows); 86 } 87 EXPORT_SYMBOL_GPL(mptcp_pm_get_limit_extra_subflows); 88 89 static bool lookup_subflow_by_daddr(const struct list_head *list, 90 const struct mptcp_addr_info *daddr) 91 { 92 struct mptcp_subflow_context *subflow; 93 struct mptcp_addr_info cur; 94 95 list_for_each_entry(subflow, list, node) { 96 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 97 98 if (!((1 << inet_sk_state_load(ssk)) & 99 (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV))) 100 continue; 101 102 mptcp_remote_address((struct sock_common *)ssk, &cur); 103 if (mptcp_addresses_equal(&cur, daddr, daddr->port)) 104 return true; 105 } 106 107 return false; 108 } 109 110 static bool 111 select_local_address(const struct pm_nl_pernet *pernet, 112 const struct mptcp_sock *msk, 113 struct mptcp_pm_local *new_local) 114 { 115 struct mptcp_pm_addr_entry *entry; 116 bool found = false; 117 118 msk_owned_by_me(msk); 119 120 rcu_read_lock(); 121 list_for_each_entry_rcu(entry, &pernet->endp_list, list) { 122 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) 123 continue; 124 125 if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 126 continue; 127 128 new_local->addr = entry->addr; 129 new_local->flags = entry->flags; 130 new_local->ifindex = entry->ifindex; 131 found = true; 132 break; 133 } 134 rcu_read_unlock(); 135 136 return found; 137 } 138 139 static bool 140 select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, 141 struct mptcp_pm_local *new_local) 142 { 143 struct mptcp_pm_addr_entry *entry; 144 bool found = false; 145 146 rcu_read_lock(); 147 /* do not keep any additional per socket state, just signal 148 * the address list in order. 149 * Note: removal from the local address list during the msk life-cycle 150 * can lead to additional addresses not being announced. 151 */ 152 list_for_each_entry_rcu(entry, &pernet->endp_list, list) { 153 if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 154 continue; 155 156 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) 157 continue; 158 159 new_local->addr = entry->addr; 160 new_local->flags = entry->flags; 161 new_local->ifindex = entry->ifindex; 162 found = true; 163 break; 164 } 165 rcu_read_unlock(); 166 167 return found; 168 } 169 170 static unsigned int 171 fill_remote_addr(struct mptcp_sock *msk, struct mptcp_addr_info *local, 172 struct mptcp_addr_info *addrs) 173 { 174 bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); 175 struct mptcp_addr_info remote = { 0 }; 176 struct sock *sk = (struct sock *)msk; 177 178 if (deny_id0) 179 return 0; 180 181 mptcp_remote_address((struct sock_common *)sk, &remote); 182 183 if (!mptcp_pm_addr_families_match(sk, local, &remote)) 184 return 0; 185 186 msk->pm.extra_subflows++; 187 *addrs = remote; 188 189 return 1; 190 } 191 192 static unsigned int 193 fill_remote_addresses_fullmesh(struct mptcp_sock *msk, 194 struct mptcp_addr_info *local, 195 struct mptcp_addr_info *addrs) 196 { 197 u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk); 198 bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); 199 DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 200 struct sock *sk = (struct sock *)msk, *ssk; 201 struct mptcp_subflow_context *subflow; 202 int i = 0; 203 204 /* Forbid creation of new subflows matching existing ones, possibly 205 * already created by incoming ADD_ADDR 206 */ 207 bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 208 mptcp_for_each_subflow(msk, subflow) 209 if (READ_ONCE(subflow->local_id) == local->id) 210 __set_bit(subflow->remote_id, unavail_id); 211 212 mptcp_for_each_subflow(msk, subflow) { 213 ssk = mptcp_subflow_tcp_sock(subflow); 214 mptcp_remote_address((struct sock_common *)ssk, &addrs[i]); 215 addrs[i].id = READ_ONCE(subflow->remote_id); 216 if (deny_id0 && !addrs[i].id) 217 continue; 218 219 if (test_bit(addrs[i].id, unavail_id)) 220 continue; 221 222 if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) 223 continue; 224 225 /* forbid creating multiple address towards this id */ 226 __set_bit(addrs[i].id, unavail_id); 227 msk->pm.extra_subflows++; 228 i++; 229 230 if (msk->pm.extra_subflows >= limit_extra_subflows) 231 break; 232 } 233 234 return i; 235 } 236 237 /* Fill all the remote addresses into the array addrs[], 238 * and return the array size. 239 */ 240 static unsigned int 241 fill_remote_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *local, 242 bool fullmesh, struct mptcp_addr_info *addrs) 243 { 244 /* Non-fullmesh: fill in the single entry corresponding to the primary 245 * MPC subflow remote address, and return 1, corresponding to 1 entry. 246 */ 247 if (!fullmesh) 248 return fill_remote_addr(msk, local, addrs); 249 250 /* Fullmesh endpoint: fill all possible remote addresses */ 251 return fill_remote_addresses_fullmesh(msk, local, addrs); 252 } 253 254 static struct mptcp_pm_addr_entry * 255 __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) 256 { 257 struct mptcp_pm_addr_entry *entry; 258 259 list_for_each_entry_rcu(entry, &pernet->endp_list, list, 260 lockdep_is_held(&pernet->lock)) { 261 if (entry->addr.id == id) 262 return entry; 263 } 264 return NULL; 265 } 266 267 static struct mptcp_pm_addr_entry * 268 __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) 269 { 270 struct mptcp_pm_addr_entry *entry; 271 272 list_for_each_entry_rcu(entry, &pernet->endp_list, list, 273 lockdep_is_held(&pernet->lock)) { 274 if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) 275 return entry; 276 } 277 return NULL; 278 } 279 280 static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk, 281 const struct mptcp_addr_info *addr) 282 { 283 return msk->mpc_endpoint_id == addr->id ? 0 : addr->id; 284 } 285 286 /* Set mpc_endpoint_id, and send MP_PRIO for ID0 if needed */ 287 static void mptcp_mpc_endpoint_setup(struct mptcp_sock *msk) 288 { 289 struct mptcp_subflow_context *subflow; 290 struct mptcp_pm_addr_entry *entry; 291 struct mptcp_addr_info mpc_addr; 292 struct pm_nl_pernet *pernet; 293 bool backup = false; 294 295 /* do lazy endpoint usage accounting for the MPC subflows */ 296 if (likely(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED)) || 297 !msk->first) 298 return; 299 300 subflow = mptcp_subflow_ctx(msk->first); 301 pernet = pm_nl_get_pernet_from_msk(msk); 302 303 mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); 304 rcu_read_lock(); 305 entry = __lookup_addr(pernet, &mpc_addr); 306 if (entry) { 307 __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); 308 msk->mpc_endpoint_id = entry->addr.id; 309 backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 310 } 311 rcu_read_unlock(); 312 313 /* Send MP_PRIO */ 314 if (backup) 315 mptcp_pm_send_ack(msk, subflow, true, backup); 316 317 msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); 318 } 319 320 static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) 321 { 322 u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk); 323 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 324 u8 endp_subflow_max = mptcp_pm_get_endp_subflow_max(msk); 325 u8 endp_signal_max = mptcp_pm_get_endp_signal_max(msk); 326 struct sock *sk = (struct sock *)msk; 327 bool signal_and_subflow = false; 328 struct mptcp_pm_local local; 329 330 mptcp_mpc_endpoint_setup(msk); 331 332 pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", 333 msk->pm.local_addr_used, endp_subflow_max, 334 msk->pm.add_addr_signaled, endp_signal_max, 335 msk->pm.extra_subflows, limit_extra_subflows); 336 337 /* check first for announce */ 338 if (msk->pm.add_addr_signaled < endp_signal_max) { 339 /* due to racing events on both ends we can reach here while 340 * previous add address is still running: if we invoke now 341 * mptcp_pm_announce_addr(), that will fail and the 342 * corresponding id will be marked as used. 343 * Instead let the PM machinery reschedule us when the 344 * current address announce will be completed. 345 */ 346 if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) 347 return; 348 349 if (!select_signal_address(pernet, msk, &local)) 350 goto subflow; 351 352 /* If the alloc fails, we are on memory pressure, not worth 353 * continuing, and trying to create subflows. 354 */ 355 if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) 356 return; 357 358 __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); 359 msk->pm.add_addr_signaled++; 360 361 /* Special case for ID0: set the correct ID */ 362 if (local.addr.id == msk->mpc_endpoint_id) 363 local.addr.id = 0; 364 365 mptcp_pm_announce_addr(msk, &local.addr, false); 366 mptcp_pm_addr_send_ack(msk); 367 368 if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) 369 signal_and_subflow = true; 370 } 371 372 subflow: 373 /* No need to try establishing subflows to remote id0 if not allowed */ 374 if (mptcp_pm_add_addr_c_flag_case(msk)) 375 goto exit; 376 377 /* check if should create a new subflow */ 378 while (msk->pm.local_addr_used < endp_subflow_max && 379 msk->pm.extra_subflows < limit_extra_subflows) { 380 struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; 381 bool fullmesh; 382 int i, nr; 383 384 if (signal_and_subflow) 385 signal_and_subflow = false; 386 else if (!select_local_address(pernet, msk, &local)) 387 break; 388 389 fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); 390 391 __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); 392 393 /* Special case for ID0: set the correct ID */ 394 if (local.addr.id == msk->mpc_endpoint_id) 395 local.addr.id = 0; 396 else /* local_addr_used is not decr for ID 0 */ 397 msk->pm.local_addr_used++; 398 399 nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); 400 if (nr == 0) 401 continue; 402 403 spin_unlock_bh(&msk->pm.lock); 404 for (i = 0; i < nr; i++) 405 __mptcp_subflow_connect(sk, &local, &addrs[i]); 406 spin_lock_bh(&msk->pm.lock); 407 } 408 409 exit: 410 mptcp_pm_nl_check_work_pending(msk); 411 } 412 413 static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) 414 { 415 mptcp_pm_create_subflow_or_signal_addr(msk); 416 } 417 418 static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) 419 { 420 mptcp_pm_create_subflow_or_signal_addr(msk); 421 } 422 423 static unsigned int 424 fill_local_addresses_vec_fullmesh(struct mptcp_sock *msk, 425 struct mptcp_addr_info *remote, 426 struct mptcp_pm_local *locals, 427 bool c_flag_case) 428 { 429 u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk); 430 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 431 struct sock *sk = (struct sock *)msk; 432 struct mptcp_pm_addr_entry *entry; 433 struct mptcp_pm_local *local; 434 int i = 0; 435 436 rcu_read_lock(); 437 list_for_each_entry_rcu(entry, &pernet->endp_list, list) { 438 bool is_id0; 439 440 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) 441 continue; 442 443 if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) 444 continue; 445 446 local = &locals[i]; 447 local->addr = entry->addr; 448 local->flags = entry->flags; 449 local->ifindex = entry->ifindex; 450 451 is_id0 = local->addr.id == msk->mpc_endpoint_id; 452 453 if (c_flag_case && 454 (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) { 455 __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); 456 457 if (!is_id0) 458 msk->pm.local_addr_used++; 459 } 460 461 /* Special case for ID0: set the correct ID */ 462 if (is_id0) 463 local->addr.id = 0; 464 465 msk->pm.extra_subflows++; 466 i++; 467 468 if (msk->pm.extra_subflows >= limit_extra_subflows) 469 break; 470 } 471 rcu_read_unlock(); 472 473 return i; 474 } 475 476 static unsigned int 477 fill_local_laminar_endp(struct mptcp_sock *msk, struct mptcp_addr_info *remote, 478 struct mptcp_pm_local *locals) 479 { 480 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 481 DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 482 struct mptcp_subflow_context *subflow; 483 struct sock *sk = (struct sock *)msk; 484 struct mptcp_pm_addr_entry *entry; 485 struct mptcp_pm_local *local; 486 int found = 0; 487 488 /* Forbid creation of new subflows matching existing ones, possibly 489 * already created by 'subflow' endpoints 490 */ 491 bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 492 mptcp_for_each_subflow(msk, subflow) { 493 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 494 495 if ((1 << inet_sk_state_load(ssk)) & 496 (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | 497 TCPF_CLOSE)) 498 continue; 499 500 __set_bit(subflow_get_local_id(subflow), unavail_id); 501 } 502 503 rcu_read_lock(); 504 list_for_each_entry_rcu(entry, &pernet->endp_list, list) { 505 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR)) 506 continue; 507 508 if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) 509 continue; 510 511 if (test_bit(mptcp_endp_get_local_id(msk, &entry->addr), 512 unavail_id)) 513 continue; 514 515 local = &locals[0]; 516 local->addr = entry->addr; 517 local->flags = entry->flags; 518 local->ifindex = entry->ifindex; 519 520 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 521 __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); 522 523 if (local->addr.id != msk->mpc_endpoint_id) 524 msk->pm.local_addr_used++; 525 } 526 527 msk->pm.extra_subflows++; 528 found = 1; 529 break; 530 } 531 rcu_read_unlock(); 532 533 return found; 534 } 535 536 static unsigned int 537 fill_local_addresses_vec_c_flag(struct mptcp_sock *msk, 538 struct mptcp_addr_info *remote, 539 struct mptcp_pm_local *locals) 540 { 541 u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk); 542 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 543 u8 endp_subflow_max = mptcp_pm_get_endp_subflow_max(msk); 544 struct sock *sk = (struct sock *)msk; 545 struct mptcp_pm_local *local; 546 int i = 0; 547 548 while (msk->pm.local_addr_used < endp_subflow_max) { 549 local = &locals[i]; 550 551 if (!select_local_address(pernet, msk, local)) 552 break; 553 554 __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); 555 556 if (!mptcp_pm_addr_families_match(sk, &local->addr, remote)) 557 continue; 558 559 if (local->addr.id == msk->mpc_endpoint_id) 560 continue; 561 562 msk->pm.local_addr_used++; 563 msk->pm.extra_subflows++; 564 i++; 565 566 if (msk->pm.extra_subflows >= limit_extra_subflows) 567 break; 568 } 569 570 return i; 571 } 572 573 static unsigned int 574 fill_local_address_any(struct mptcp_sock *msk, struct mptcp_addr_info *remote, 575 struct mptcp_pm_local *local) 576 { 577 struct sock *sk = (struct sock *)msk; 578 579 memset(local, 0, sizeof(*local)); 580 local->addr.family = 581 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 582 remote->family == AF_INET6 && 583 ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : 584 #endif 585 remote->family; 586 587 if (!mptcp_pm_addr_families_match(sk, &local->addr, remote)) 588 return 0; 589 590 msk->pm.extra_subflows++; 591 592 return 1; 593 } 594 595 /* Fill all the local addresses into the array addrs[], 596 * and return the array size. 597 */ 598 static unsigned int 599 fill_local_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *remote, 600 struct mptcp_pm_local *locals) 601 { 602 bool c_flag_case = remote->id && mptcp_pm_add_addr_c_flag_case(msk); 603 int i; 604 605 /* If there is at least one MPTCP endpoint with a fullmesh flag */ 606 i = fill_local_addresses_vec_fullmesh(msk, remote, locals, c_flag_case); 607 if (i) 608 return i; 609 610 /* If there is at least one MPTCP endpoint with a laminar flag */ 611 if (mptcp_pm_get_endp_laminar_max(msk)) 612 return fill_local_laminar_endp(msk, remote, locals); 613 614 /* Special case: peer sets the C flag, accept one ADD_ADDR if default 615 * limits are used -- accepting no ADD_ADDR -- and use subflow endpoints 616 */ 617 if (c_flag_case) 618 return fill_local_addresses_vec_c_flag(msk, remote, locals); 619 620 /* No special case: fill in the single 'IPADDRANY' local address */ 621 return fill_local_address_any(msk, remote, &locals[0]); 622 } 623 624 static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) 625 { 626 u8 limit_add_addr_accepted = mptcp_pm_get_limit_add_addr_accepted(msk); 627 u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk); 628 struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX]; 629 struct sock *sk = (struct sock *)msk; 630 struct mptcp_addr_info remote; 631 bool sf_created = false; 632 int i, nr; 633 634 pr_debug("accepted %d:%d remote family %d\n", 635 msk->pm.add_addr_accepted, limit_add_addr_accepted, 636 msk->pm.remote.family); 637 638 remote = msk->pm.remote; 639 mptcp_pm_announce_addr(msk, &remote, true); 640 mptcp_pm_addr_send_ack(msk); 641 mptcp_mpc_endpoint_setup(msk); 642 643 if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) 644 return; 645 646 /* pick id 0 port, if none is provided the remote address */ 647 if (!remote.port) 648 remote.port = sk->sk_dport; 649 650 /* connect to the specified remote address, using whatever 651 * local address the routing configuration will pick. 652 */ 653 nr = fill_local_addresses_vec(msk, &remote, locals); 654 if (nr == 0) 655 return; 656 657 spin_unlock_bh(&msk->pm.lock); 658 for (i = 0; i < nr; i++) 659 if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0) 660 sf_created = true; 661 spin_lock_bh(&msk->pm.lock); 662 663 if (sf_created) { 664 /* add_addr_accepted is not decr for ID 0 */ 665 if (remote.id) 666 msk->pm.add_addr_accepted++; 667 if (msk->pm.add_addr_accepted >= limit_add_addr_accepted || 668 msk->pm.extra_subflows >= limit_extra_subflows) 669 WRITE_ONCE(msk->pm.accept_addr, false); 670 } 671 } 672 673 void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id) 674 { 675 if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { 676 u8 limit_add_addr_accepted = 677 mptcp_pm_get_limit_add_addr_accepted(msk); 678 679 /* Note: if the subflow has been closed before, this 680 * add_addr_accepted counter will not be decremented. 681 */ 682 if (--msk->pm.add_addr_accepted < limit_add_addr_accepted) 683 WRITE_ONCE(msk->pm.accept_addr, true); 684 } 685 } 686 687 static bool address_use_port(struct mptcp_pm_addr_entry *entry) 688 { 689 return (entry->flags & 690 (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == 691 MPTCP_PM_ADDR_FLAG_SIGNAL; 692 } 693 694 /* caller must ensure the RCU grace period is already elapsed */ 695 static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) 696 { 697 if (entry->lsk) 698 sock_release(entry->lsk); 699 kfree(entry); 700 } 701 702 static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, 703 struct mptcp_pm_addr_entry *entry, 704 bool needs_id, bool replace) 705 { 706 struct mptcp_pm_addr_entry *cur, *del_entry = NULL; 707 int ret = -EINVAL; 708 u8 addr_max; 709 710 spin_lock_bh(&pernet->lock); 711 /* to keep the code simple, don't do IDR-like allocation for address ID, 712 * just bail when we exceed limits 713 */ 714 if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) 715 pernet->next_id = 1; 716 if (pernet->endpoints >= MPTCP_PM_ADDR_MAX) { 717 ret = -ERANGE; 718 goto out; 719 } 720 if (test_bit(entry->addr.id, pernet->id_bitmap)) { 721 ret = -EBUSY; 722 goto out; 723 } 724 725 /* do not insert duplicate address, differentiate on port only 726 * singled addresses 727 */ 728 if (!address_use_port(entry)) 729 entry->addr.port = 0; 730 list_for_each_entry(cur, &pernet->endp_list, list) { 731 if (mptcp_addresses_equal(&cur->addr, &entry->addr, 732 cur->addr.port || entry->addr.port)) { 733 /* allow replacing the exiting endpoint only if such 734 * endpoint is an implicit one and the user-space 735 * did not provide an endpoint id 736 */ 737 if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) { 738 ret = -EEXIST; 739 goto out; 740 } 741 if (entry->addr.id) 742 goto out; 743 744 /* allow callers that only need to look up the local 745 * addr's id to skip replacement. This allows them to 746 * avoid calling synchronize_rcu in the packet recv 747 * path. 748 */ 749 if (!replace) { 750 kfree(entry); 751 ret = cur->addr.id; 752 goto out; 753 } 754 755 pernet->endpoints--; 756 entry->addr.id = cur->addr.id; 757 list_del_rcu(&cur->list); 758 del_entry = cur; 759 break; 760 } 761 } 762 763 if (!entry->addr.id && needs_id) { 764 find_next: 765 entry->addr.id = find_next_zero_bit(pernet->id_bitmap, 766 MPTCP_PM_MAX_ADDR_ID + 1, 767 pernet->next_id); 768 if (!entry->addr.id && pernet->next_id != 1) { 769 pernet->next_id = 1; 770 goto find_next; 771 } 772 } 773 774 if (!entry->addr.id && needs_id) 775 goto out; 776 777 __set_bit(entry->addr.id, pernet->id_bitmap); 778 if (entry->addr.id > pernet->next_id) 779 pernet->next_id = entry->addr.id; 780 781 if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 782 addr_max = pernet->endp_signal_max; 783 WRITE_ONCE(pernet->endp_signal_max, addr_max + 1); 784 } 785 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 786 addr_max = pernet->endp_subflow_max; 787 WRITE_ONCE(pernet->endp_subflow_max, addr_max + 1); 788 } 789 if (entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR) { 790 addr_max = pernet->endp_laminar_max; 791 WRITE_ONCE(pernet->endp_laminar_max, addr_max + 1); 792 } 793 794 pernet->endpoints++; 795 if (!entry->addr.port) 796 list_add_tail_rcu(&entry->list, &pernet->endp_list); 797 else 798 list_add_rcu(&entry->list, &pernet->endp_list); 799 ret = entry->addr.id; 800 801 out: 802 spin_unlock_bh(&pernet->lock); 803 804 /* just replaced an existing entry, free it */ 805 if (del_entry) { 806 synchronize_rcu(); 807 __mptcp_pm_release_addr_entry(del_entry); 808 } 809 return ret; 810 } 811 812 static struct lock_class_key mptcp_slock_keys[2]; 813 static struct lock_class_key mptcp_keys[2]; 814 815 static int mptcp_pm_nl_create_listen_socket(struct sock *sk, 816 struct mptcp_pm_addr_entry *entry) 817 { 818 bool is_ipv6 = sk->sk_family == AF_INET6; 819 int addrlen = sizeof(struct sockaddr_in); 820 struct sockaddr_storage addr; 821 struct sock *newsk, *ssk; 822 int backlog = 1024; 823 int err; 824 825 err = sock_create_kern(sock_net(sk), entry->addr.family, 826 SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); 827 if (err) 828 return err; 829 830 newsk = entry->lsk->sk; 831 if (!newsk) 832 return -EINVAL; 833 834 /* The subflow socket lock is acquired in a nested to the msk one 835 * in several places, even by the TCP stack, and this msk is a kernel 836 * socket: lockdep complains. Instead of propagating the _nested 837 * modifiers in several places, re-init the lock class for the msk 838 * socket to an mptcp specific one. 839 */ 840 sock_lock_init_class_and_name(newsk, 841 is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", 842 &mptcp_slock_keys[is_ipv6], 843 is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", 844 &mptcp_keys[is_ipv6]); 845 846 lock_sock(newsk); 847 ssk = __mptcp_nmpc_sk(mptcp_sk(newsk)); 848 release_sock(newsk); 849 if (IS_ERR(ssk)) 850 return PTR_ERR(ssk); 851 852 mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); 853 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 854 if (entry->addr.family == AF_INET6) 855 addrlen = sizeof(struct sockaddr_in6); 856 #endif 857 if (ssk->sk_family == AF_INET) 858 err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 859 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 860 else if (ssk->sk_family == AF_INET6) 861 err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 862 #endif 863 if (err) 864 return err; 865 866 /* We don't use mptcp_set_state() here because it needs to be called 867 * under the msk socket lock. For the moment, that will not bring 868 * anything more than only calling inet_sk_state_store(), because the 869 * old status is known (TCP_CLOSE). 870 */ 871 inet_sk_state_store(newsk, TCP_LISTEN); 872 lock_sock(ssk); 873 WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); 874 err = __inet_listen_sk(ssk, backlog); 875 if (!err) 876 mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); 877 release_sock(ssk); 878 return err; 879 } 880 881 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, 882 struct mptcp_pm_addr_entry *skc) 883 { 884 struct mptcp_pm_addr_entry *entry; 885 struct pm_nl_pernet *pernet; 886 int ret; 887 888 pernet = pm_nl_get_pernet_from_msk(msk); 889 890 rcu_read_lock(); 891 entry = __lookup_addr(pernet, &skc->addr); 892 ret = entry ? entry->addr.id : -1; 893 rcu_read_unlock(); 894 if (ret >= 0) 895 return ret; 896 897 /* address not found, add to local list */ 898 entry = kmemdup(skc, sizeof(*skc), GFP_ATOMIC); 899 if (!entry) 900 return -ENOMEM; 901 902 entry->addr.port = 0; 903 ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); 904 if (ret < 0) 905 kfree(entry); 906 907 return ret; 908 } 909 910 bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) 911 { 912 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 913 struct mptcp_pm_addr_entry *entry; 914 bool backup; 915 916 rcu_read_lock(); 917 entry = __lookup_addr(pernet, skc); 918 backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 919 rcu_read_unlock(); 920 921 return backup; 922 } 923 924 static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, 925 struct mptcp_addr_info *addr) 926 { 927 struct mptcp_sock *msk; 928 long s_slot = 0, s_num = 0; 929 930 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 931 struct sock *sk = (struct sock *)msk; 932 struct mptcp_addr_info mpc_addr; 933 934 if (!READ_ONCE(msk->fully_established) || 935 mptcp_pm_is_userspace(msk)) 936 goto next; 937 938 /* if the endp linked to the init sf is re-added with a != ID */ 939 mptcp_local_address((struct sock_common *)msk, &mpc_addr); 940 941 lock_sock(sk); 942 spin_lock_bh(&msk->pm.lock); 943 if (mptcp_addresses_equal(addr, &mpc_addr, addr->port)) 944 msk->mpc_endpoint_id = addr->id; 945 mptcp_pm_create_subflow_or_signal_addr(msk); 946 spin_unlock_bh(&msk->pm.lock); 947 release_sock(sk); 948 949 next: 950 sock_put(sk); 951 cond_resched(); 952 } 953 954 return 0; 955 } 956 957 static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, 958 struct genl_info *info) 959 { 960 struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; 961 962 if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, 963 mptcp_pm_address_nl_policy, info->extack) && 964 tb[MPTCP_PM_ADDR_ATTR_ID]) 965 return true; 966 return false; 967 } 968 969 /* Add an MPTCP endpoint */ 970 int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) 971 { 972 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 973 struct mptcp_pm_addr_entry addr, *entry; 974 struct nlattr *attr; 975 int ret; 976 977 if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) 978 return -EINVAL; 979 980 attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; 981 ret = mptcp_pm_parse_entry(attr, info, true, &addr); 982 if (ret < 0) 983 return ret; 984 985 if (addr.addr.port && !address_use_port(&addr)) { 986 NL_SET_ERR_MSG_ATTR(info->extack, attr, 987 "flags must have signal and not subflow when using port"); 988 return -EINVAL; 989 } 990 991 if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && 992 addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { 993 NL_SET_ERR_MSG_ATTR(info->extack, attr, 994 "flags mustn't have both signal and fullmesh"); 995 return -EINVAL; 996 } 997 998 if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { 999 NL_SET_ERR_MSG_ATTR(info->extack, attr, 1000 "can't create IMPLICIT endpoint"); 1001 return -EINVAL; 1002 } 1003 1004 entry = kmemdup(&addr, sizeof(addr), GFP_KERNEL_ACCOUNT); 1005 if (!entry) { 1006 GENL_SET_ERR_MSG(info, "can't allocate addr"); 1007 return -ENOMEM; 1008 } 1009 1010 if (entry->addr.port) { 1011 ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); 1012 if (ret) { 1013 GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret); 1014 goto out_free; 1015 } 1016 } 1017 ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, 1018 !mptcp_pm_has_addr_attr_id(attr, info), 1019 true); 1020 if (ret < 0) { 1021 GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); 1022 goto out_free; 1023 } 1024 1025 mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr); 1026 return 0; 1027 1028 out_free: 1029 __mptcp_pm_release_addr_entry(entry); 1030 return ret; 1031 } 1032 1033 static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, 1034 const struct mptcp_addr_info *addr, 1035 bool force) 1036 { 1037 struct mptcp_rm_list list = { .nr = 0 }; 1038 bool ret; 1039 1040 list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); 1041 1042 ret = mptcp_remove_anno_list_by_saddr(msk, addr); 1043 if (ret || force) { 1044 spin_lock_bh(&msk->pm.lock); 1045 if (ret) { 1046 __set_bit(addr->id, msk->pm.id_avail_bitmap); 1047 msk->pm.add_addr_signaled--; 1048 } 1049 mptcp_pm_remove_addr(msk, &list); 1050 spin_unlock_bh(&msk->pm.lock); 1051 } 1052 return ret; 1053 } 1054 1055 static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) 1056 { 1057 /* If it was marked as used, and not ID 0, decrement local_addr_used */ 1058 if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && 1059 id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) 1060 msk->pm.local_addr_used--; 1061 } 1062 1063 static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, 1064 const struct mptcp_pm_addr_entry *entry) 1065 { 1066 const struct mptcp_addr_info *addr = &entry->addr; 1067 struct mptcp_rm_list list = { .nr = 1 }; 1068 long s_slot = 0, s_num = 0; 1069 struct mptcp_sock *msk; 1070 1071 pr_debug("remove_id=%d\n", addr->id); 1072 1073 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1074 struct sock *sk = (struct sock *)msk; 1075 bool remove_subflow; 1076 1077 if (mptcp_pm_is_userspace(msk)) 1078 goto next; 1079 1080 lock_sock(sk); 1081 remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); 1082 mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && 1083 !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); 1084 1085 list.ids[0] = mptcp_endp_get_local_id(msk, addr); 1086 if (remove_subflow) { 1087 spin_lock_bh(&msk->pm.lock); 1088 mptcp_pm_rm_subflow(msk, &list); 1089 spin_unlock_bh(&msk->pm.lock); 1090 } 1091 1092 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 1093 spin_lock_bh(&msk->pm.lock); 1094 __mark_subflow_endp_available(msk, list.ids[0]); 1095 spin_unlock_bh(&msk->pm.lock); 1096 } 1097 1098 if (msk->mpc_endpoint_id == entry->addr.id) 1099 msk->mpc_endpoint_id = 0; 1100 release_sock(sk); 1101 1102 next: 1103 sock_put(sk); 1104 cond_resched(); 1105 } 1106 1107 return 0; 1108 } 1109 1110 static int mptcp_nl_remove_id_zero_address(struct net *net, 1111 struct mptcp_addr_info *addr) 1112 { 1113 struct mptcp_rm_list list = { .nr = 0 }; 1114 long s_slot = 0, s_num = 0; 1115 struct mptcp_sock *msk; 1116 1117 list.ids[list.nr++] = 0; 1118 1119 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1120 struct sock *sk = (struct sock *)msk; 1121 struct mptcp_addr_info msk_local; 1122 1123 if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 1124 goto next; 1125 1126 mptcp_local_address((struct sock_common *)msk, &msk_local); 1127 if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) 1128 goto next; 1129 1130 lock_sock(sk); 1131 spin_lock_bh(&msk->pm.lock); 1132 mptcp_pm_remove_addr(msk, &list); 1133 mptcp_pm_rm_subflow(msk, &list); 1134 __mark_subflow_endp_available(msk, 0); 1135 spin_unlock_bh(&msk->pm.lock); 1136 release_sock(sk); 1137 1138 next: 1139 sock_put(sk); 1140 cond_resched(); 1141 } 1142 1143 return 0; 1144 } 1145 1146 /* Remove an MPTCP endpoint */ 1147 int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) 1148 { 1149 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1150 struct mptcp_pm_addr_entry addr, *entry; 1151 struct nlattr *attr; 1152 u8 addr_max; 1153 int ret; 1154 1155 if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) 1156 return -EINVAL; 1157 1158 attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; 1159 ret = mptcp_pm_parse_entry(attr, info, false, &addr); 1160 if (ret < 0) 1161 return ret; 1162 1163 /* the zero id address is special: the first address used by the msk 1164 * always gets such an id, so different subflows can have different zero 1165 * id addresses. Additionally zero id is not accounted for in id_bitmap. 1166 * Let's use an 'mptcp_rm_list' instead of the common remove code. 1167 */ 1168 if (addr.addr.id == 0) 1169 return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); 1170 1171 spin_lock_bh(&pernet->lock); 1172 entry = __lookup_addr_by_id(pernet, addr.addr.id); 1173 if (!entry) { 1174 NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); 1175 spin_unlock_bh(&pernet->lock); 1176 return -EINVAL; 1177 } 1178 if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 1179 addr_max = pernet->endp_signal_max; 1180 WRITE_ONCE(pernet->endp_signal_max, addr_max - 1); 1181 } 1182 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 1183 addr_max = pernet->endp_subflow_max; 1184 WRITE_ONCE(pernet->endp_subflow_max, addr_max - 1); 1185 } 1186 if (entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR) { 1187 addr_max = pernet->endp_laminar_max; 1188 WRITE_ONCE(pernet->endp_laminar_max, addr_max - 1); 1189 } 1190 1191 pernet->endpoints--; 1192 list_del_rcu(&entry->list); 1193 __clear_bit(entry->addr.id, pernet->id_bitmap); 1194 spin_unlock_bh(&pernet->lock); 1195 1196 mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry); 1197 synchronize_rcu(); 1198 __mptcp_pm_release_addr_entry(entry); 1199 1200 return ret; 1201 } 1202 1203 static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, 1204 struct list_head *rm_list) 1205 { 1206 struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; 1207 struct mptcp_pm_addr_entry *entry; 1208 1209 list_for_each_entry(entry, rm_list, list) { 1210 if (slist.nr < MPTCP_RM_IDS_MAX && 1211 mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) 1212 slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); 1213 1214 if (alist.nr < MPTCP_RM_IDS_MAX && 1215 mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) 1216 alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); 1217 } 1218 1219 spin_lock_bh(&msk->pm.lock); 1220 if (alist.nr) { 1221 msk->pm.add_addr_signaled -= alist.nr; 1222 mptcp_pm_remove_addr(msk, &alist); 1223 } 1224 if (slist.nr) 1225 mptcp_pm_rm_subflow(msk, &slist); 1226 /* Reset counters: maybe some subflows have been removed before */ 1227 bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1228 msk->pm.local_addr_used = 0; 1229 spin_unlock_bh(&msk->pm.lock); 1230 } 1231 1232 static void mptcp_nl_flush_addrs_list(struct net *net, 1233 struct list_head *rm_list) 1234 { 1235 long s_slot = 0, s_num = 0; 1236 struct mptcp_sock *msk; 1237 1238 if (list_empty(rm_list)) 1239 return; 1240 1241 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1242 struct sock *sk = (struct sock *)msk; 1243 1244 if (!mptcp_pm_is_userspace(msk)) { 1245 lock_sock(sk); 1246 mptcp_pm_flush_addrs_and_subflows(msk, rm_list); 1247 release_sock(sk); 1248 } 1249 1250 sock_put(sk); 1251 cond_resched(); 1252 } 1253 } 1254 1255 /* caller must ensure the RCU grace period is already elapsed */ 1256 static void __flush_addrs(struct list_head *list) 1257 { 1258 while (!list_empty(list)) { 1259 struct mptcp_pm_addr_entry *cur; 1260 1261 cur = list_entry(list->next, 1262 struct mptcp_pm_addr_entry, list); 1263 list_del_rcu(&cur->list); 1264 __mptcp_pm_release_addr_entry(cur); 1265 } 1266 } 1267 1268 static void __reset_counters(struct pm_nl_pernet *pernet) 1269 { 1270 WRITE_ONCE(pernet->endp_signal_max, 0); 1271 WRITE_ONCE(pernet->endp_subflow_max, 0); 1272 WRITE_ONCE(pernet->endp_laminar_max, 0); 1273 pernet->endpoints = 0; 1274 } 1275 1276 int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) 1277 { 1278 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1279 LIST_HEAD(free_list); 1280 1281 spin_lock_bh(&pernet->lock); 1282 list_splice_init(&pernet->endp_list, &free_list); 1283 __reset_counters(pernet); 1284 pernet->next_id = 1; 1285 bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1286 spin_unlock_bh(&pernet->lock); 1287 mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list); 1288 synchronize_rcu(); 1289 __flush_addrs(&free_list); 1290 return 0; 1291 } 1292 1293 int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, 1294 struct genl_info *info) 1295 { 1296 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1297 struct mptcp_pm_addr_entry *entry; 1298 int ret = -EINVAL; 1299 1300 rcu_read_lock(); 1301 entry = __lookup_addr_by_id(pernet, id); 1302 if (entry) { 1303 *addr = *entry; 1304 ret = 0; 1305 } 1306 rcu_read_unlock(); 1307 1308 return ret; 1309 } 1310 1311 int mptcp_pm_nl_dump_addr(struct sk_buff *msg, 1312 struct netlink_callback *cb) 1313 { 1314 struct net *net = sock_net(msg->sk); 1315 struct mptcp_pm_addr_entry *entry; 1316 struct pm_nl_pernet *pernet; 1317 int id = cb->args[0]; 1318 int i; 1319 1320 pernet = pm_nl_get_pernet(net); 1321 1322 rcu_read_lock(); 1323 for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { 1324 if (test_bit(i, pernet->id_bitmap)) { 1325 entry = __lookup_addr_by_id(pernet, i); 1326 if (!entry) 1327 break; 1328 1329 if (entry->addr.id <= id) 1330 continue; 1331 1332 if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) 1333 break; 1334 1335 id = entry->addr.id; 1336 } 1337 } 1338 rcu_read_unlock(); 1339 1340 cb->args[0] = id; 1341 return msg->len; 1342 } 1343 1344 static int parse_limit(struct genl_info *info, int id, unsigned int *limit) 1345 { 1346 struct nlattr *attr = info->attrs[id]; 1347 1348 if (!attr) 1349 return 0; 1350 1351 *limit = nla_get_u32(attr); 1352 if (*limit > MPTCP_PM_ADDR_MAX) { 1353 NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, 1354 "limit greater than maximum (%u)", 1355 MPTCP_PM_ADDR_MAX); 1356 return -EINVAL; 1357 } 1358 return 0; 1359 } 1360 1361 int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) 1362 { 1363 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1364 unsigned int rcv_addrs, subflows; 1365 int ret; 1366 1367 spin_lock_bh(&pernet->lock); 1368 rcv_addrs = pernet->limit_add_addr_accepted; 1369 ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs); 1370 if (ret) 1371 goto unlock; 1372 1373 subflows = pernet->limit_extra_subflows; 1374 ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows); 1375 if (ret) 1376 goto unlock; 1377 1378 WRITE_ONCE(pernet->limit_add_addr_accepted, rcv_addrs); 1379 WRITE_ONCE(pernet->limit_extra_subflows, subflows); 1380 1381 unlock: 1382 spin_unlock_bh(&pernet->lock); 1383 return ret; 1384 } 1385 1386 int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) 1387 { 1388 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1389 struct sk_buff *msg; 1390 void *reply; 1391 1392 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1393 if (!msg) 1394 return -ENOMEM; 1395 1396 reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, 1397 MPTCP_PM_CMD_GET_LIMITS); 1398 if (!reply) 1399 goto fail; 1400 1401 if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS, 1402 READ_ONCE(pernet->limit_add_addr_accepted))) 1403 goto fail; 1404 1405 if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS, 1406 READ_ONCE(pernet->limit_extra_subflows))) 1407 goto fail; 1408 1409 genlmsg_end(msg, reply); 1410 return genlmsg_reply(msg, info); 1411 1412 fail: 1413 GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); 1414 nlmsg_free(msg); 1415 return -EMSGSIZE; 1416 } 1417 1418 static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, 1419 struct mptcp_addr_info *addr) 1420 { 1421 struct mptcp_rm_list list = { .nr = 0 }; 1422 1423 list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); 1424 1425 spin_lock_bh(&msk->pm.lock); 1426 mptcp_pm_rm_subflow(msk, &list); 1427 __mark_subflow_endp_available(msk, list.ids[0]); 1428 mptcp_pm_create_subflow_or_signal_addr(msk); 1429 spin_unlock_bh(&msk->pm.lock); 1430 } 1431 1432 static void mptcp_pm_nl_set_flags_all(struct net *net, 1433 struct mptcp_pm_addr_entry *local, 1434 u8 changed) 1435 { 1436 u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW); 1437 u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 1438 long s_slot = 0, s_num = 0; 1439 struct mptcp_sock *msk; 1440 1441 if (changed == MPTCP_PM_ADDR_FLAG_FULLMESH && !is_subflow) 1442 return; 1443 1444 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1445 struct sock *sk = (struct sock *)msk; 1446 1447 if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 1448 goto next; 1449 1450 lock_sock(sk); 1451 if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) 1452 mptcp_pm_mp_prio_send_ack(msk, &local->addr, NULL, bkup); 1453 /* Subflows will only be recreated if the SUBFLOW flag is set */ 1454 if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)) 1455 mptcp_pm_nl_fullmesh(msk, &local->addr); 1456 release_sock(sk); 1457 1458 next: 1459 sock_put(sk); 1460 cond_resched(); 1461 } 1462 } 1463 1464 int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, 1465 struct genl_info *info) 1466 { 1467 struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 1468 u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | 1469 MPTCP_PM_ADDR_FLAG_FULLMESH; 1470 struct net *net = genl_info_net(info); 1471 struct mptcp_pm_addr_entry *entry; 1472 struct pm_nl_pernet *pernet; 1473 u8 lookup_by_id = 0; 1474 1475 pernet = pm_nl_get_pernet(net); 1476 1477 if (local->addr.family == AF_UNSPEC) { 1478 lookup_by_id = 1; 1479 if (!local->addr.id) { 1480 NL_SET_ERR_MSG_ATTR(info->extack, attr, 1481 "missing address ID"); 1482 return -EOPNOTSUPP; 1483 } 1484 } 1485 1486 spin_lock_bh(&pernet->lock); 1487 entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) : 1488 __lookup_addr(pernet, &local->addr); 1489 if (!entry) { 1490 spin_unlock_bh(&pernet->lock); 1491 NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); 1492 return -EINVAL; 1493 } 1494 if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && 1495 (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | 1496 MPTCP_PM_ADDR_FLAG_IMPLICIT))) { 1497 spin_unlock_bh(&pernet->lock); 1498 NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags"); 1499 return -EINVAL; 1500 } 1501 1502 changed = (local->flags ^ entry->flags) & mask; 1503 entry->flags = (entry->flags & ~mask) | (local->flags & mask); 1504 *local = *entry; 1505 spin_unlock_bh(&pernet->lock); 1506 1507 mptcp_pm_nl_set_flags_all(net, local, changed); 1508 return 0; 1509 } 1510 1511 bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) 1512 { 1513 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 1514 1515 if (msk->pm.extra_subflows == mptcp_pm_get_limit_extra_subflows(msk) || 1516 (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, 1517 MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { 1518 WRITE_ONCE(msk->pm.work_pending, false); 1519 return false; 1520 } 1521 return true; 1522 } 1523 1524 /* Called under PM lock */ 1525 void __mptcp_pm_kernel_worker(struct mptcp_sock *msk) 1526 { 1527 struct mptcp_pm_data *pm = &msk->pm; 1528 1529 if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { 1530 pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); 1531 mptcp_pm_nl_add_addr_received(msk); 1532 } 1533 if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { 1534 pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); 1535 mptcp_pm_nl_fully_established(msk); 1536 } 1537 if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { 1538 pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); 1539 mptcp_pm_nl_subflow_established(msk); 1540 } 1541 } 1542 1543 static int __net_init pm_nl_init_net(struct net *net) 1544 { 1545 struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 1546 1547 INIT_LIST_HEAD_RCU(&pernet->endp_list); 1548 1549 /* Cit. 2 subflows ought to be enough for anybody. */ 1550 pernet->limit_extra_subflows = 2; 1551 pernet->next_id = 1; 1552 spin_lock_init(&pernet->lock); 1553 1554 /* No need to initialize other pernet fields, the struct is zeroed at 1555 * allocation time. 1556 */ 1557 1558 return 0; 1559 } 1560 1561 static void __net_exit pm_nl_exit_net(struct list_head *net_list) 1562 { 1563 struct net *net; 1564 1565 list_for_each_entry(net, net_list, exit_list) { 1566 struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 1567 1568 /* net is removed from namespace list, can't race with 1569 * other modifiers, also netns core already waited for a 1570 * RCU grace period. 1571 */ 1572 __flush_addrs(&pernet->endp_list); 1573 } 1574 } 1575 1576 static struct pernet_operations mptcp_pm_pernet_ops = { 1577 .init = pm_nl_init_net, 1578 .exit_batch = pm_nl_exit_net, 1579 .id = &pm_nl_pernet_id, 1580 .size = sizeof(struct pm_nl_pernet), 1581 }; 1582 1583 struct mptcp_pm_ops mptcp_pm_kernel = { 1584 .name = "kernel", 1585 .owner = THIS_MODULE, 1586 }; 1587 1588 void __init mptcp_pm_kernel_register(void) 1589 { 1590 if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) 1591 panic("Failed to register MPTCP PM pernet subsystem.\n"); 1592 1593 mptcp_pm_register(&mptcp_pm_kernel); 1594 } 1595