1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2025, Matthieu Baerts. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <net/netns/generic.h> 10 11 #include "protocol.h" 12 #include "mib.h" 13 #include "mptcp_pm_gen.h" 14 15 static int pm_nl_pernet_id; 16 17 struct pm_nl_pernet { 18 /* protects pernet updates */ 19 spinlock_t lock; 20 struct list_head endp_list; 21 u8 endpoints; 22 u8 endp_signal_max; 23 u8 endp_subflow_max; 24 u8 endp_laminar_max; 25 u8 limit_add_addr_accepted; 26 u8 limit_extra_subflows; 27 u8 next_id; 28 DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 29 }; 30 31 #define MPTCP_PM_ADDR_MAX 8 32 33 static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) 34 { 35 return net_generic(net, pm_nl_pernet_id); 36 } 37 38 static struct pm_nl_pernet * 39 pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) 40 { 41 return pm_nl_get_pernet(sock_net((struct sock *)msk)); 42 } 43 44 static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) 45 { 46 return pm_nl_get_pernet(genl_info_net(info)); 47 } 48 49 u8 mptcp_pm_get_endp_signal_max(const struct mptcp_sock *msk) 50 { 51 const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 52 53 return READ_ONCE(pernet->endp_signal_max); 54 } 55 EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_signal_max); 56 57 u8 mptcp_pm_get_endp_subflow_max(const struct mptcp_sock *msk) 58 { 59 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 60 61 return READ_ONCE(pernet->endp_subflow_max); 62 } 63 EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_subflow_max); 64 65 u8 mptcp_pm_get_endp_laminar_max(const struct mptcp_sock *msk) 66 { 67 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 68 69 return READ_ONCE(pernet->endp_laminar_max); 70 } 71 EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_laminar_max); 72 73 u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk) 74 { 75 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 76 77 return READ_ONCE(pernet->limit_add_addr_accepted); 78 } 79 EXPORT_SYMBOL_GPL(mptcp_pm_get_limit_add_addr_accepted); 80 81 u8 mptcp_pm_get_limit_extra_subflows(const struct mptcp_sock *msk) 82 { 83 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 84 85 return READ_ONCE(pernet->limit_extra_subflows); 86 } 87 EXPORT_SYMBOL_GPL(mptcp_pm_get_limit_extra_subflows); 88 89 static bool lookup_subflow_by_daddr(const struct list_head *list, 90 const struct mptcp_addr_info *daddr) 91 { 92 struct mptcp_subflow_context *subflow; 93 struct mptcp_addr_info cur; 94 95 list_for_each_entry(subflow, list, node) { 96 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 97 98 if (!((1 << inet_sk_state_load(ssk)) & 99 (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV))) 100 continue; 101 102 mptcp_remote_address((struct sock_common *)ssk, &cur); 103 if (mptcp_addresses_equal(&cur, daddr, daddr->port)) 104 return true; 105 } 106 107 return false; 108 } 109 110 static bool 111 select_local_address(const struct pm_nl_pernet *pernet, 112 const struct mptcp_sock *msk, 113 struct mptcp_pm_local *new_local) 114 { 115 struct mptcp_pm_addr_entry *entry; 116 bool found = false; 117 118 msk_owned_by_me(msk); 119 120 rcu_read_lock(); 121 list_for_each_entry_rcu(entry, &pernet->endp_list, list) { 122 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) 123 continue; 124 125 if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 126 continue; 127 128 new_local->addr = entry->addr; 129 new_local->flags = entry->flags; 130 new_local->ifindex = entry->ifindex; 131 found = true; 132 break; 133 } 134 rcu_read_unlock(); 135 136 return found; 137 } 138 139 static bool 140 select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, 141 struct mptcp_pm_local *new_local) 142 { 143 struct mptcp_pm_addr_entry *entry; 144 bool found = false; 145 146 rcu_read_lock(); 147 /* do not keep any additional per socket state, just signal 148 * the address list in order. 149 * Note: removal from the local address list during the msk life-cycle 150 * can lead to additional addresses not being announced. 151 */ 152 list_for_each_entry_rcu(entry, &pernet->endp_list, list) { 153 if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 154 continue; 155 156 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) 157 continue; 158 159 new_local->addr = entry->addr; 160 new_local->flags = entry->flags; 161 new_local->ifindex = entry->ifindex; 162 found = true; 163 break; 164 } 165 rcu_read_unlock(); 166 167 return found; 168 } 169 170 static unsigned int 171 fill_remote_addr(struct mptcp_sock *msk, struct mptcp_addr_info *local, 172 struct mptcp_addr_info *addrs) 173 { 174 bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); 175 struct mptcp_addr_info remote = { 0 }; 176 struct sock *sk = (struct sock *)msk; 177 178 if (deny_id0) 179 return 0; 180 181 mptcp_remote_address((struct sock_common *)sk, &remote); 182 183 if (!mptcp_pm_addr_families_match(sk, local, &remote)) 184 return 0; 185 186 msk->pm.extra_subflows++; 187 *addrs = remote; 188 189 return 1; 190 } 191 192 static unsigned int 193 fill_remote_addresses_fullmesh(struct mptcp_sock *msk, 194 struct mptcp_addr_info *local, 195 struct mptcp_addr_info *addrs) 196 { 197 u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk); 198 bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); 199 DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 200 struct sock *sk = (struct sock *)msk, *ssk; 201 struct mptcp_subflow_context *subflow; 202 int i = 0; 203 204 /* Forbid creation of new subflows matching existing ones, possibly 205 * already created by incoming ADD_ADDR 206 */ 207 bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 208 mptcp_for_each_subflow(msk, subflow) 209 if (READ_ONCE(subflow->local_id) == local->id) 210 __set_bit(subflow->remote_id, unavail_id); 211 212 mptcp_for_each_subflow(msk, subflow) { 213 ssk = mptcp_subflow_tcp_sock(subflow); 214 mptcp_remote_address((struct sock_common *)ssk, &addrs[i]); 215 addrs[i].id = READ_ONCE(subflow->remote_id); 216 if (deny_id0 && !addrs[i].id) 217 continue; 218 219 if (test_bit(addrs[i].id, unavail_id)) 220 continue; 221 222 if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) 223 continue; 224 225 /* forbid creating multiple address towards this id */ 226 __set_bit(addrs[i].id, unavail_id); 227 msk->pm.extra_subflows++; 228 i++; 229 230 if (msk->pm.extra_subflows >= limit_extra_subflows) 231 break; 232 } 233 234 return i; 235 } 236 237 /* Fill all the remote addresses into the array addrs[], 238 * and return the array size. 239 */ 240 static unsigned int 241 fill_remote_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *local, 242 bool fullmesh, struct mptcp_addr_info *addrs) 243 { 244 /* Non-fullmesh: fill in the single entry corresponding to the primary 245 * MPC subflow remote address, and return 1, corresponding to 1 entry. 246 */ 247 if (!fullmesh) 248 return fill_remote_addr(msk, local, addrs); 249 250 /* Fullmesh endpoint: fill all possible remote addresses */ 251 return fill_remote_addresses_fullmesh(msk, local, addrs); 252 } 253 254 static struct mptcp_pm_addr_entry * 255 __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) 256 { 257 struct mptcp_pm_addr_entry *entry; 258 259 list_for_each_entry_rcu(entry, &pernet->endp_list, list, 260 lockdep_is_held(&pernet->lock)) { 261 if (entry->addr.id == id) 262 return entry; 263 } 264 return NULL; 265 } 266 267 static struct mptcp_pm_addr_entry * 268 __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) 269 { 270 struct mptcp_pm_addr_entry *entry; 271 272 list_for_each_entry_rcu(entry, &pernet->endp_list, list, 273 lockdep_is_held(&pernet->lock)) { 274 if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) 275 return entry; 276 } 277 return NULL; 278 } 279 280 static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk, 281 const struct mptcp_addr_info *addr) 282 { 283 return msk->mpc_endpoint_id == addr->id ? 0 : addr->id; 284 } 285 286 /* Set mpc_endpoint_id, and send MP_PRIO for ID0 if needed */ 287 static void mptcp_mpc_endpoint_setup(struct mptcp_sock *msk) 288 { 289 struct mptcp_subflow_context *subflow; 290 struct mptcp_pm_addr_entry *entry; 291 struct mptcp_addr_info mpc_addr; 292 struct pm_nl_pernet *pernet; 293 bool backup = false; 294 295 /* do lazy endpoint usage accounting for the MPC subflows */ 296 if (likely(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED)) || 297 !msk->first) 298 return; 299 300 subflow = mptcp_subflow_ctx(msk->first); 301 pernet = pm_nl_get_pernet_from_msk(msk); 302 303 mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); 304 rcu_read_lock(); 305 entry = __lookup_addr(pernet, &mpc_addr); 306 if (entry) { 307 __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); 308 msk->mpc_endpoint_id = entry->addr.id; 309 backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 310 } 311 rcu_read_unlock(); 312 313 /* Send MP_PRIO */ 314 if (backup) 315 mptcp_pm_send_ack(msk, subflow, true, backup); 316 317 msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); 318 } 319 320 static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) 321 { 322 u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk); 323 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 324 u8 endp_subflow_max = mptcp_pm_get_endp_subflow_max(msk); 325 u8 endp_signal_max = mptcp_pm_get_endp_signal_max(msk); 326 struct sock *sk = (struct sock *)msk; 327 bool signal_and_subflow = false; 328 struct mptcp_pm_local local; 329 330 mptcp_mpc_endpoint_setup(msk); 331 332 pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", 333 msk->pm.local_addr_used, endp_subflow_max, 334 msk->pm.add_addr_signaled, endp_signal_max, 335 msk->pm.extra_subflows, limit_extra_subflows); 336 337 /* check first for announce */ 338 if (msk->pm.add_addr_signaled < endp_signal_max) { 339 /* due to racing events on both ends we can reach here while 340 * previous add address is still running: if we invoke now 341 * mptcp_pm_announce_addr(), that will fail and the 342 * corresponding id will be marked as used. 343 * Instead let the PM machinery reschedule us when the 344 * current address announce will be completed. 345 */ 346 if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) 347 return; 348 349 if (!select_signal_address(pernet, msk, &local)) 350 goto subflow; 351 352 /* If the alloc fails, we are on memory pressure, not worth 353 * continuing, and trying to create subflows. 354 */ 355 if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) 356 return; 357 358 __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); 359 msk->pm.add_addr_signaled++; 360 361 /* Special case for ID0: set the correct ID */ 362 if (local.addr.id == msk->mpc_endpoint_id) 363 local.addr.id = 0; 364 365 mptcp_pm_announce_addr(msk, &local.addr, false); 366 mptcp_pm_addr_send_ack(msk); 367 368 if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) 369 signal_and_subflow = true; 370 } 371 372 subflow: 373 /* check if should create a new subflow */ 374 while (msk->pm.local_addr_used < endp_subflow_max && 375 msk->pm.extra_subflows < limit_extra_subflows) { 376 struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; 377 bool fullmesh; 378 int i, nr; 379 380 if (signal_and_subflow) 381 signal_and_subflow = false; 382 else if (!select_local_address(pernet, msk, &local)) 383 break; 384 385 fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); 386 387 __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); 388 389 /* Special case for ID0: set the correct ID */ 390 if (local.addr.id == msk->mpc_endpoint_id) 391 local.addr.id = 0; 392 else /* local_addr_used is not decr for ID 0 */ 393 msk->pm.local_addr_used++; 394 395 nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); 396 if (nr == 0) 397 continue; 398 399 spin_unlock_bh(&msk->pm.lock); 400 for (i = 0; i < nr; i++) 401 __mptcp_subflow_connect(sk, &local, &addrs[i]); 402 spin_lock_bh(&msk->pm.lock); 403 } 404 mptcp_pm_nl_check_work_pending(msk); 405 } 406 407 static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) 408 { 409 mptcp_pm_create_subflow_or_signal_addr(msk); 410 } 411 412 static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) 413 { 414 mptcp_pm_create_subflow_or_signal_addr(msk); 415 } 416 417 static unsigned int 418 fill_local_addresses_vec_fullmesh(struct mptcp_sock *msk, 419 struct mptcp_addr_info *remote, 420 struct mptcp_pm_local *locals, 421 bool c_flag_case) 422 { 423 u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk); 424 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 425 struct sock *sk = (struct sock *)msk; 426 struct mptcp_pm_addr_entry *entry; 427 struct mptcp_pm_local *local; 428 int i = 0; 429 430 rcu_read_lock(); 431 list_for_each_entry_rcu(entry, &pernet->endp_list, list) { 432 bool is_id0; 433 434 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) 435 continue; 436 437 if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) 438 continue; 439 440 local = &locals[i]; 441 local->addr = entry->addr; 442 local->flags = entry->flags; 443 local->ifindex = entry->ifindex; 444 445 is_id0 = local->addr.id == msk->mpc_endpoint_id; 446 447 if (c_flag_case && 448 (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) { 449 __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); 450 451 if (!is_id0) 452 msk->pm.local_addr_used++; 453 } 454 455 /* Special case for ID0: set the correct ID */ 456 if (is_id0) 457 local->addr.id = 0; 458 459 msk->pm.extra_subflows++; 460 i++; 461 462 if (msk->pm.extra_subflows >= limit_extra_subflows) 463 break; 464 } 465 rcu_read_unlock(); 466 467 return i; 468 } 469 470 static unsigned int 471 fill_local_laminar_endp(struct mptcp_sock *msk, struct mptcp_addr_info *remote, 472 struct mptcp_pm_local *locals) 473 { 474 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 475 DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 476 struct mptcp_subflow_context *subflow; 477 struct sock *sk = (struct sock *)msk; 478 struct mptcp_pm_addr_entry *entry; 479 struct mptcp_pm_local *local; 480 int found = 0; 481 482 /* Forbid creation of new subflows matching existing ones, possibly 483 * already created by 'subflow' endpoints 484 */ 485 bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 486 mptcp_for_each_subflow(msk, subflow) { 487 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 488 489 if ((1 << inet_sk_state_load(ssk)) & 490 (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | 491 TCPF_CLOSE)) 492 continue; 493 494 __set_bit(subflow_get_local_id(subflow), unavail_id); 495 } 496 497 rcu_read_lock(); 498 list_for_each_entry_rcu(entry, &pernet->endp_list, list) { 499 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR)) 500 continue; 501 502 if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) 503 continue; 504 505 if (test_bit(mptcp_endp_get_local_id(msk, &entry->addr), 506 unavail_id)) 507 continue; 508 509 local = &locals[0]; 510 local->addr = entry->addr; 511 local->flags = entry->flags; 512 local->ifindex = entry->ifindex; 513 514 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 515 __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); 516 517 if (local->addr.id != msk->mpc_endpoint_id) 518 msk->pm.local_addr_used++; 519 } 520 521 msk->pm.extra_subflows++; 522 found = 1; 523 break; 524 } 525 rcu_read_unlock(); 526 527 return found; 528 } 529 530 static unsigned int 531 fill_local_addresses_vec_c_flag(struct mptcp_sock *msk, 532 struct mptcp_addr_info *remote, 533 struct mptcp_pm_local *locals) 534 { 535 u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk); 536 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 537 u8 endp_subflow_max = mptcp_pm_get_endp_subflow_max(msk); 538 struct sock *sk = (struct sock *)msk; 539 struct mptcp_pm_local *local; 540 int i = 0; 541 542 while (msk->pm.local_addr_used < endp_subflow_max) { 543 local = &locals[i]; 544 545 if (!select_local_address(pernet, msk, local)) 546 break; 547 548 __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); 549 550 if (!mptcp_pm_addr_families_match(sk, &local->addr, remote)) 551 continue; 552 553 if (local->addr.id == msk->mpc_endpoint_id) 554 continue; 555 556 msk->pm.local_addr_used++; 557 msk->pm.extra_subflows++; 558 i++; 559 560 if (msk->pm.extra_subflows >= limit_extra_subflows) 561 break; 562 } 563 564 return i; 565 } 566 567 static unsigned int 568 fill_local_address_any(struct mptcp_sock *msk, struct mptcp_addr_info *remote, 569 struct mptcp_pm_local *local) 570 { 571 struct sock *sk = (struct sock *)msk; 572 573 memset(local, 0, sizeof(*local)); 574 local->addr.family = 575 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 576 remote->family == AF_INET6 && 577 ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : 578 #endif 579 remote->family; 580 581 if (!mptcp_pm_addr_families_match(sk, &local->addr, remote)) 582 return 0; 583 584 msk->pm.extra_subflows++; 585 586 return 1; 587 } 588 589 /* Fill all the local addresses into the array addrs[], 590 * and return the array size. 591 */ 592 static unsigned int 593 fill_local_addresses_vec(struct mptcp_sock *msk, struct mptcp_addr_info *remote, 594 struct mptcp_pm_local *locals) 595 { 596 bool c_flag_case = remote->id && mptcp_pm_add_addr_c_flag_case(msk); 597 int i; 598 599 /* If there is at least one MPTCP endpoint with a fullmesh flag */ 600 i = fill_local_addresses_vec_fullmesh(msk, remote, locals, c_flag_case); 601 if (i) 602 return i; 603 604 /* If there is at least one MPTCP endpoint with a laminar flag */ 605 if (mptcp_pm_get_endp_laminar_max(msk)) 606 return fill_local_laminar_endp(msk, remote, locals); 607 608 /* Special case: peer sets the C flag, accept one ADD_ADDR if default 609 * limits are used -- accepting no ADD_ADDR -- and use subflow endpoints 610 */ 611 if (c_flag_case) 612 return fill_local_addresses_vec_c_flag(msk, remote, locals); 613 614 /* No special case: fill in the single 'IPADDRANY' local address */ 615 return fill_local_address_any(msk, remote, &locals[0]); 616 } 617 618 static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) 619 { 620 u8 limit_add_addr_accepted = mptcp_pm_get_limit_add_addr_accepted(msk); 621 u8 limit_extra_subflows = mptcp_pm_get_limit_extra_subflows(msk); 622 struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX]; 623 struct sock *sk = (struct sock *)msk; 624 struct mptcp_addr_info remote; 625 bool sf_created = false; 626 int i, nr; 627 628 pr_debug("accepted %d:%d remote family %d\n", 629 msk->pm.add_addr_accepted, limit_add_addr_accepted, 630 msk->pm.remote.family); 631 632 remote = msk->pm.remote; 633 mptcp_pm_announce_addr(msk, &remote, true); 634 mptcp_pm_addr_send_ack(msk); 635 mptcp_mpc_endpoint_setup(msk); 636 637 if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) 638 return; 639 640 /* pick id 0 port, if none is provided the remote address */ 641 if (!remote.port) 642 remote.port = sk->sk_dport; 643 644 /* connect to the specified remote address, using whatever 645 * local address the routing configuration will pick. 646 */ 647 nr = fill_local_addresses_vec(msk, &remote, locals); 648 if (nr == 0) 649 return; 650 651 spin_unlock_bh(&msk->pm.lock); 652 for (i = 0; i < nr; i++) 653 if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0) 654 sf_created = true; 655 spin_lock_bh(&msk->pm.lock); 656 657 if (sf_created) { 658 /* add_addr_accepted is not decr for ID 0 */ 659 if (remote.id) 660 msk->pm.add_addr_accepted++; 661 if (msk->pm.add_addr_accepted >= limit_add_addr_accepted || 662 msk->pm.extra_subflows >= limit_extra_subflows) 663 WRITE_ONCE(msk->pm.accept_addr, false); 664 } 665 } 666 667 void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id) 668 { 669 if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { 670 u8 limit_add_addr_accepted = 671 mptcp_pm_get_limit_add_addr_accepted(msk); 672 673 /* Note: if the subflow has been closed before, this 674 * add_addr_accepted counter will not be decremented. 675 */ 676 if (--msk->pm.add_addr_accepted < limit_add_addr_accepted) 677 WRITE_ONCE(msk->pm.accept_addr, true); 678 } 679 } 680 681 static bool address_use_port(struct mptcp_pm_addr_entry *entry) 682 { 683 return (entry->flags & 684 (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == 685 MPTCP_PM_ADDR_FLAG_SIGNAL; 686 } 687 688 /* caller must ensure the RCU grace period is already elapsed */ 689 static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) 690 { 691 if (entry->lsk) 692 sock_release(entry->lsk); 693 kfree(entry); 694 } 695 696 static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, 697 struct mptcp_pm_addr_entry *entry, 698 bool needs_id, bool replace) 699 { 700 struct mptcp_pm_addr_entry *cur, *del_entry = NULL; 701 int ret = -EINVAL; 702 u8 addr_max; 703 704 spin_lock_bh(&pernet->lock); 705 /* to keep the code simple, don't do IDR-like allocation for address ID, 706 * just bail when we exceed limits 707 */ 708 if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) 709 pernet->next_id = 1; 710 if (pernet->endpoints >= MPTCP_PM_ADDR_MAX) { 711 ret = -ERANGE; 712 goto out; 713 } 714 if (test_bit(entry->addr.id, pernet->id_bitmap)) { 715 ret = -EBUSY; 716 goto out; 717 } 718 719 /* do not insert duplicate address, differentiate on port only 720 * singled addresses 721 */ 722 if (!address_use_port(entry)) 723 entry->addr.port = 0; 724 list_for_each_entry(cur, &pernet->endp_list, list) { 725 if (mptcp_addresses_equal(&cur->addr, &entry->addr, 726 cur->addr.port || entry->addr.port)) { 727 /* allow replacing the exiting endpoint only if such 728 * endpoint is an implicit one and the user-space 729 * did not provide an endpoint id 730 */ 731 if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) { 732 ret = -EEXIST; 733 goto out; 734 } 735 if (entry->addr.id) 736 goto out; 737 738 /* allow callers that only need to look up the local 739 * addr's id to skip replacement. This allows them to 740 * avoid calling synchronize_rcu in the packet recv 741 * path. 742 */ 743 if (!replace) { 744 kfree(entry); 745 ret = cur->addr.id; 746 goto out; 747 } 748 749 pernet->endpoints--; 750 entry->addr.id = cur->addr.id; 751 list_del_rcu(&cur->list); 752 del_entry = cur; 753 break; 754 } 755 } 756 757 if (!entry->addr.id && needs_id) { 758 find_next: 759 entry->addr.id = find_next_zero_bit(pernet->id_bitmap, 760 MPTCP_PM_MAX_ADDR_ID + 1, 761 pernet->next_id); 762 if (!entry->addr.id && pernet->next_id != 1) { 763 pernet->next_id = 1; 764 goto find_next; 765 } 766 } 767 768 if (!entry->addr.id && needs_id) 769 goto out; 770 771 __set_bit(entry->addr.id, pernet->id_bitmap); 772 if (entry->addr.id > pernet->next_id) 773 pernet->next_id = entry->addr.id; 774 775 if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 776 addr_max = pernet->endp_signal_max; 777 WRITE_ONCE(pernet->endp_signal_max, addr_max + 1); 778 } 779 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 780 addr_max = pernet->endp_subflow_max; 781 WRITE_ONCE(pernet->endp_subflow_max, addr_max + 1); 782 } 783 if (entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR) { 784 addr_max = pernet->endp_laminar_max; 785 WRITE_ONCE(pernet->endp_laminar_max, addr_max + 1); 786 } 787 788 pernet->endpoints++; 789 if (!entry->addr.port) 790 list_add_tail_rcu(&entry->list, &pernet->endp_list); 791 else 792 list_add_rcu(&entry->list, &pernet->endp_list); 793 ret = entry->addr.id; 794 795 out: 796 spin_unlock_bh(&pernet->lock); 797 798 /* just replaced an existing entry, free it */ 799 if (del_entry) { 800 synchronize_rcu(); 801 __mptcp_pm_release_addr_entry(del_entry); 802 } 803 return ret; 804 } 805 806 static struct lock_class_key mptcp_slock_keys[2]; 807 static struct lock_class_key mptcp_keys[2]; 808 809 static int mptcp_pm_nl_create_listen_socket(struct sock *sk, 810 struct mptcp_pm_addr_entry *entry) 811 { 812 bool is_ipv6 = sk->sk_family == AF_INET6; 813 int addrlen = sizeof(struct sockaddr_in); 814 struct sockaddr_storage addr; 815 struct sock *newsk, *ssk; 816 int backlog = 1024; 817 int err; 818 819 err = sock_create_kern(sock_net(sk), entry->addr.family, 820 SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); 821 if (err) 822 return err; 823 824 newsk = entry->lsk->sk; 825 if (!newsk) 826 return -EINVAL; 827 828 /* The subflow socket lock is acquired in a nested to the msk one 829 * in several places, even by the TCP stack, and this msk is a kernel 830 * socket: lockdep complains. Instead of propagating the _nested 831 * modifiers in several places, re-init the lock class for the msk 832 * socket to an mptcp specific one. 833 */ 834 sock_lock_init_class_and_name(newsk, 835 is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", 836 &mptcp_slock_keys[is_ipv6], 837 is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", 838 &mptcp_keys[is_ipv6]); 839 840 lock_sock(newsk); 841 ssk = __mptcp_nmpc_sk(mptcp_sk(newsk)); 842 release_sock(newsk); 843 if (IS_ERR(ssk)) 844 return PTR_ERR(ssk); 845 846 mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); 847 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 848 if (entry->addr.family == AF_INET6) 849 addrlen = sizeof(struct sockaddr_in6); 850 #endif 851 if (ssk->sk_family == AF_INET) 852 err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 853 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 854 else if (ssk->sk_family == AF_INET6) 855 err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 856 #endif 857 if (err) 858 return err; 859 860 /* We don't use mptcp_set_state() here because it needs to be called 861 * under the msk socket lock. For the moment, that will not bring 862 * anything more than only calling inet_sk_state_store(), because the 863 * old status is known (TCP_CLOSE). 864 */ 865 inet_sk_state_store(newsk, TCP_LISTEN); 866 lock_sock(ssk); 867 WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); 868 err = __inet_listen_sk(ssk, backlog); 869 if (!err) 870 mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); 871 release_sock(ssk); 872 return err; 873 } 874 875 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, 876 struct mptcp_pm_addr_entry *skc) 877 { 878 struct mptcp_pm_addr_entry *entry; 879 struct pm_nl_pernet *pernet; 880 int ret; 881 882 pernet = pm_nl_get_pernet_from_msk(msk); 883 884 rcu_read_lock(); 885 entry = __lookup_addr(pernet, &skc->addr); 886 ret = entry ? entry->addr.id : -1; 887 rcu_read_unlock(); 888 if (ret >= 0) 889 return ret; 890 891 /* address not found, add to local list */ 892 entry = kmemdup(skc, sizeof(*skc), GFP_ATOMIC); 893 if (!entry) 894 return -ENOMEM; 895 896 entry->addr.port = 0; 897 ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); 898 if (ret < 0) 899 kfree(entry); 900 901 return ret; 902 } 903 904 bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) 905 { 906 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 907 struct mptcp_pm_addr_entry *entry; 908 bool backup; 909 910 rcu_read_lock(); 911 entry = __lookup_addr(pernet, skc); 912 backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 913 rcu_read_unlock(); 914 915 return backup; 916 } 917 918 static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, 919 struct mptcp_addr_info *addr) 920 { 921 struct mptcp_sock *msk; 922 long s_slot = 0, s_num = 0; 923 924 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 925 struct sock *sk = (struct sock *)msk; 926 struct mptcp_addr_info mpc_addr; 927 928 if (!READ_ONCE(msk->fully_established) || 929 mptcp_pm_is_userspace(msk)) 930 goto next; 931 932 /* if the endp linked to the init sf is re-added with a != ID */ 933 mptcp_local_address((struct sock_common *)msk, &mpc_addr); 934 935 lock_sock(sk); 936 spin_lock_bh(&msk->pm.lock); 937 if (mptcp_addresses_equal(addr, &mpc_addr, addr->port)) 938 msk->mpc_endpoint_id = addr->id; 939 mptcp_pm_create_subflow_or_signal_addr(msk); 940 spin_unlock_bh(&msk->pm.lock); 941 release_sock(sk); 942 943 next: 944 sock_put(sk); 945 cond_resched(); 946 } 947 948 return 0; 949 } 950 951 static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, 952 struct genl_info *info) 953 { 954 struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; 955 956 if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, 957 mptcp_pm_address_nl_policy, info->extack) && 958 tb[MPTCP_PM_ADDR_ATTR_ID]) 959 return true; 960 return false; 961 } 962 963 /* Add an MPTCP endpoint */ 964 int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) 965 { 966 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 967 struct mptcp_pm_addr_entry addr, *entry; 968 struct nlattr *attr; 969 int ret; 970 971 if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) 972 return -EINVAL; 973 974 attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; 975 ret = mptcp_pm_parse_entry(attr, info, true, &addr); 976 if (ret < 0) 977 return ret; 978 979 if (addr.addr.port && !address_use_port(&addr)) { 980 NL_SET_ERR_MSG_ATTR(info->extack, attr, 981 "flags must have signal and not subflow when using port"); 982 return -EINVAL; 983 } 984 985 if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && 986 addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { 987 NL_SET_ERR_MSG_ATTR(info->extack, attr, 988 "flags mustn't have both signal and fullmesh"); 989 return -EINVAL; 990 } 991 992 if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { 993 NL_SET_ERR_MSG_ATTR(info->extack, attr, 994 "can't create IMPLICIT endpoint"); 995 return -EINVAL; 996 } 997 998 entry = kmemdup(&addr, sizeof(addr), GFP_KERNEL_ACCOUNT); 999 if (!entry) { 1000 GENL_SET_ERR_MSG(info, "can't allocate addr"); 1001 return -ENOMEM; 1002 } 1003 1004 if (entry->addr.port) { 1005 ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); 1006 if (ret) { 1007 GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret); 1008 goto out_free; 1009 } 1010 } 1011 ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, 1012 !mptcp_pm_has_addr_attr_id(attr, info), 1013 true); 1014 if (ret < 0) { 1015 GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); 1016 goto out_free; 1017 } 1018 1019 mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr); 1020 return 0; 1021 1022 out_free: 1023 __mptcp_pm_release_addr_entry(entry); 1024 return ret; 1025 } 1026 1027 static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, 1028 const struct mptcp_addr_info *addr, 1029 bool force) 1030 { 1031 struct mptcp_rm_list list = { .nr = 0 }; 1032 bool ret; 1033 1034 list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); 1035 1036 ret = mptcp_remove_anno_list_by_saddr(msk, addr); 1037 if (ret || force) { 1038 spin_lock_bh(&msk->pm.lock); 1039 if (ret) { 1040 __set_bit(addr->id, msk->pm.id_avail_bitmap); 1041 msk->pm.add_addr_signaled--; 1042 } 1043 mptcp_pm_remove_addr(msk, &list); 1044 spin_unlock_bh(&msk->pm.lock); 1045 } 1046 return ret; 1047 } 1048 1049 static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) 1050 { 1051 /* If it was marked as used, and not ID 0, decrement local_addr_used */ 1052 if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && 1053 id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) 1054 msk->pm.local_addr_used--; 1055 } 1056 1057 static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, 1058 const struct mptcp_pm_addr_entry *entry) 1059 { 1060 const struct mptcp_addr_info *addr = &entry->addr; 1061 struct mptcp_rm_list list = { .nr = 1 }; 1062 long s_slot = 0, s_num = 0; 1063 struct mptcp_sock *msk; 1064 1065 pr_debug("remove_id=%d\n", addr->id); 1066 1067 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1068 struct sock *sk = (struct sock *)msk; 1069 bool remove_subflow; 1070 1071 if (mptcp_pm_is_userspace(msk)) 1072 goto next; 1073 1074 lock_sock(sk); 1075 remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); 1076 mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && 1077 !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); 1078 1079 list.ids[0] = mptcp_endp_get_local_id(msk, addr); 1080 if (remove_subflow) { 1081 spin_lock_bh(&msk->pm.lock); 1082 mptcp_pm_rm_subflow(msk, &list); 1083 spin_unlock_bh(&msk->pm.lock); 1084 } 1085 1086 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 1087 spin_lock_bh(&msk->pm.lock); 1088 __mark_subflow_endp_available(msk, list.ids[0]); 1089 spin_unlock_bh(&msk->pm.lock); 1090 } 1091 1092 if (msk->mpc_endpoint_id == entry->addr.id) 1093 msk->mpc_endpoint_id = 0; 1094 release_sock(sk); 1095 1096 next: 1097 sock_put(sk); 1098 cond_resched(); 1099 } 1100 1101 return 0; 1102 } 1103 1104 static int mptcp_nl_remove_id_zero_address(struct net *net, 1105 struct mptcp_addr_info *addr) 1106 { 1107 struct mptcp_rm_list list = { .nr = 0 }; 1108 long s_slot = 0, s_num = 0; 1109 struct mptcp_sock *msk; 1110 1111 list.ids[list.nr++] = 0; 1112 1113 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1114 struct sock *sk = (struct sock *)msk; 1115 struct mptcp_addr_info msk_local; 1116 1117 if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 1118 goto next; 1119 1120 mptcp_local_address((struct sock_common *)msk, &msk_local); 1121 if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) 1122 goto next; 1123 1124 lock_sock(sk); 1125 spin_lock_bh(&msk->pm.lock); 1126 mptcp_pm_remove_addr(msk, &list); 1127 mptcp_pm_rm_subflow(msk, &list); 1128 __mark_subflow_endp_available(msk, 0); 1129 spin_unlock_bh(&msk->pm.lock); 1130 release_sock(sk); 1131 1132 next: 1133 sock_put(sk); 1134 cond_resched(); 1135 } 1136 1137 return 0; 1138 } 1139 1140 /* Remove an MPTCP endpoint */ 1141 int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) 1142 { 1143 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1144 struct mptcp_pm_addr_entry addr, *entry; 1145 struct nlattr *attr; 1146 u8 addr_max; 1147 int ret; 1148 1149 if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) 1150 return -EINVAL; 1151 1152 attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; 1153 ret = mptcp_pm_parse_entry(attr, info, false, &addr); 1154 if (ret < 0) 1155 return ret; 1156 1157 /* the zero id address is special: the first address used by the msk 1158 * always gets such an id, so different subflows can have different zero 1159 * id addresses. Additionally zero id is not accounted for in id_bitmap. 1160 * Let's use an 'mptcp_rm_list' instead of the common remove code. 1161 */ 1162 if (addr.addr.id == 0) 1163 return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); 1164 1165 spin_lock_bh(&pernet->lock); 1166 entry = __lookup_addr_by_id(pernet, addr.addr.id); 1167 if (!entry) { 1168 NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); 1169 spin_unlock_bh(&pernet->lock); 1170 return -EINVAL; 1171 } 1172 if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 1173 addr_max = pernet->endp_signal_max; 1174 WRITE_ONCE(pernet->endp_signal_max, addr_max - 1); 1175 } 1176 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 1177 addr_max = pernet->endp_subflow_max; 1178 WRITE_ONCE(pernet->endp_subflow_max, addr_max - 1); 1179 } 1180 if (entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR) { 1181 addr_max = pernet->endp_laminar_max; 1182 WRITE_ONCE(pernet->endp_laminar_max, addr_max - 1); 1183 } 1184 1185 pernet->endpoints--; 1186 list_del_rcu(&entry->list); 1187 __clear_bit(entry->addr.id, pernet->id_bitmap); 1188 spin_unlock_bh(&pernet->lock); 1189 1190 mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry); 1191 synchronize_rcu(); 1192 __mptcp_pm_release_addr_entry(entry); 1193 1194 return ret; 1195 } 1196 1197 static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, 1198 struct list_head *rm_list) 1199 { 1200 struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; 1201 struct mptcp_pm_addr_entry *entry; 1202 1203 list_for_each_entry(entry, rm_list, list) { 1204 if (slist.nr < MPTCP_RM_IDS_MAX && 1205 mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) 1206 slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); 1207 1208 if (alist.nr < MPTCP_RM_IDS_MAX && 1209 mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) 1210 alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); 1211 } 1212 1213 spin_lock_bh(&msk->pm.lock); 1214 if (alist.nr) { 1215 msk->pm.add_addr_signaled -= alist.nr; 1216 mptcp_pm_remove_addr(msk, &alist); 1217 } 1218 if (slist.nr) 1219 mptcp_pm_rm_subflow(msk, &slist); 1220 /* Reset counters: maybe some subflows have been removed before */ 1221 bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1222 msk->pm.local_addr_used = 0; 1223 spin_unlock_bh(&msk->pm.lock); 1224 } 1225 1226 static void mptcp_nl_flush_addrs_list(struct net *net, 1227 struct list_head *rm_list) 1228 { 1229 long s_slot = 0, s_num = 0; 1230 struct mptcp_sock *msk; 1231 1232 if (list_empty(rm_list)) 1233 return; 1234 1235 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1236 struct sock *sk = (struct sock *)msk; 1237 1238 if (!mptcp_pm_is_userspace(msk)) { 1239 lock_sock(sk); 1240 mptcp_pm_flush_addrs_and_subflows(msk, rm_list); 1241 release_sock(sk); 1242 } 1243 1244 sock_put(sk); 1245 cond_resched(); 1246 } 1247 } 1248 1249 /* caller must ensure the RCU grace period is already elapsed */ 1250 static void __flush_addrs(struct list_head *list) 1251 { 1252 while (!list_empty(list)) { 1253 struct mptcp_pm_addr_entry *cur; 1254 1255 cur = list_entry(list->next, 1256 struct mptcp_pm_addr_entry, list); 1257 list_del_rcu(&cur->list); 1258 __mptcp_pm_release_addr_entry(cur); 1259 } 1260 } 1261 1262 static void __reset_counters(struct pm_nl_pernet *pernet) 1263 { 1264 WRITE_ONCE(pernet->endp_signal_max, 0); 1265 WRITE_ONCE(pernet->endp_subflow_max, 0); 1266 WRITE_ONCE(pernet->endp_laminar_max, 0); 1267 pernet->endpoints = 0; 1268 } 1269 1270 int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) 1271 { 1272 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1273 LIST_HEAD(free_list); 1274 1275 spin_lock_bh(&pernet->lock); 1276 list_splice_init(&pernet->endp_list, &free_list); 1277 __reset_counters(pernet); 1278 pernet->next_id = 1; 1279 bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1280 spin_unlock_bh(&pernet->lock); 1281 mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list); 1282 synchronize_rcu(); 1283 __flush_addrs(&free_list); 1284 return 0; 1285 } 1286 1287 int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, 1288 struct genl_info *info) 1289 { 1290 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1291 struct mptcp_pm_addr_entry *entry; 1292 int ret = -EINVAL; 1293 1294 rcu_read_lock(); 1295 entry = __lookup_addr_by_id(pernet, id); 1296 if (entry) { 1297 *addr = *entry; 1298 ret = 0; 1299 } 1300 rcu_read_unlock(); 1301 1302 return ret; 1303 } 1304 1305 int mptcp_pm_nl_dump_addr(struct sk_buff *msg, 1306 struct netlink_callback *cb) 1307 { 1308 struct net *net = sock_net(msg->sk); 1309 struct mptcp_pm_addr_entry *entry; 1310 struct pm_nl_pernet *pernet; 1311 int id = cb->args[0]; 1312 int i; 1313 1314 pernet = pm_nl_get_pernet(net); 1315 1316 rcu_read_lock(); 1317 for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { 1318 if (test_bit(i, pernet->id_bitmap)) { 1319 entry = __lookup_addr_by_id(pernet, i); 1320 if (!entry) 1321 break; 1322 1323 if (entry->addr.id <= id) 1324 continue; 1325 1326 if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) 1327 break; 1328 1329 id = entry->addr.id; 1330 } 1331 } 1332 rcu_read_unlock(); 1333 1334 cb->args[0] = id; 1335 return msg->len; 1336 } 1337 1338 static int parse_limit(struct genl_info *info, int id, unsigned int *limit) 1339 { 1340 struct nlattr *attr = info->attrs[id]; 1341 1342 if (!attr) 1343 return 0; 1344 1345 *limit = nla_get_u32(attr); 1346 if (*limit > MPTCP_PM_ADDR_MAX) { 1347 NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, 1348 "limit greater than maximum (%u)", 1349 MPTCP_PM_ADDR_MAX); 1350 return -EINVAL; 1351 } 1352 return 0; 1353 } 1354 1355 int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) 1356 { 1357 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1358 unsigned int rcv_addrs, subflows; 1359 int ret; 1360 1361 spin_lock_bh(&pernet->lock); 1362 rcv_addrs = pernet->limit_add_addr_accepted; 1363 ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs); 1364 if (ret) 1365 goto unlock; 1366 1367 subflows = pernet->limit_extra_subflows; 1368 ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows); 1369 if (ret) 1370 goto unlock; 1371 1372 WRITE_ONCE(pernet->limit_add_addr_accepted, rcv_addrs); 1373 WRITE_ONCE(pernet->limit_extra_subflows, subflows); 1374 1375 unlock: 1376 spin_unlock_bh(&pernet->lock); 1377 return ret; 1378 } 1379 1380 int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) 1381 { 1382 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1383 struct sk_buff *msg; 1384 void *reply; 1385 1386 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1387 if (!msg) 1388 return -ENOMEM; 1389 1390 reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, 1391 MPTCP_PM_CMD_GET_LIMITS); 1392 if (!reply) 1393 goto fail; 1394 1395 if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS, 1396 READ_ONCE(pernet->limit_add_addr_accepted))) 1397 goto fail; 1398 1399 if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS, 1400 READ_ONCE(pernet->limit_extra_subflows))) 1401 goto fail; 1402 1403 genlmsg_end(msg, reply); 1404 return genlmsg_reply(msg, info); 1405 1406 fail: 1407 GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); 1408 nlmsg_free(msg); 1409 return -EMSGSIZE; 1410 } 1411 1412 static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, 1413 struct mptcp_addr_info *addr) 1414 { 1415 struct mptcp_rm_list list = { .nr = 0 }; 1416 1417 list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); 1418 1419 spin_lock_bh(&msk->pm.lock); 1420 mptcp_pm_rm_subflow(msk, &list); 1421 __mark_subflow_endp_available(msk, list.ids[0]); 1422 mptcp_pm_create_subflow_or_signal_addr(msk); 1423 spin_unlock_bh(&msk->pm.lock); 1424 } 1425 1426 static void mptcp_pm_nl_set_flags_all(struct net *net, 1427 struct mptcp_pm_addr_entry *local, 1428 u8 changed) 1429 { 1430 u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW); 1431 u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 1432 long s_slot = 0, s_num = 0; 1433 struct mptcp_sock *msk; 1434 1435 if (changed == MPTCP_PM_ADDR_FLAG_FULLMESH && !is_subflow) 1436 return; 1437 1438 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1439 struct sock *sk = (struct sock *)msk; 1440 1441 if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 1442 goto next; 1443 1444 lock_sock(sk); 1445 if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) 1446 mptcp_pm_mp_prio_send_ack(msk, &local->addr, NULL, bkup); 1447 /* Subflows will only be recreated if the SUBFLOW flag is set */ 1448 if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)) 1449 mptcp_pm_nl_fullmesh(msk, &local->addr); 1450 release_sock(sk); 1451 1452 next: 1453 sock_put(sk); 1454 cond_resched(); 1455 } 1456 } 1457 1458 int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, 1459 struct genl_info *info) 1460 { 1461 struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 1462 u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | 1463 MPTCP_PM_ADDR_FLAG_FULLMESH; 1464 struct net *net = genl_info_net(info); 1465 struct mptcp_pm_addr_entry *entry; 1466 struct pm_nl_pernet *pernet; 1467 u8 lookup_by_id = 0; 1468 1469 pernet = pm_nl_get_pernet(net); 1470 1471 if (local->addr.family == AF_UNSPEC) { 1472 lookup_by_id = 1; 1473 if (!local->addr.id) { 1474 NL_SET_ERR_MSG_ATTR(info->extack, attr, 1475 "missing address ID"); 1476 return -EOPNOTSUPP; 1477 } 1478 } 1479 1480 spin_lock_bh(&pernet->lock); 1481 entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) : 1482 __lookup_addr(pernet, &local->addr); 1483 if (!entry) { 1484 spin_unlock_bh(&pernet->lock); 1485 NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); 1486 return -EINVAL; 1487 } 1488 if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && 1489 (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | 1490 MPTCP_PM_ADDR_FLAG_IMPLICIT))) { 1491 spin_unlock_bh(&pernet->lock); 1492 NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags"); 1493 return -EINVAL; 1494 } 1495 1496 changed = (local->flags ^ entry->flags) & mask; 1497 entry->flags = (entry->flags & ~mask) | (local->flags & mask); 1498 *local = *entry; 1499 spin_unlock_bh(&pernet->lock); 1500 1501 mptcp_pm_nl_set_flags_all(net, local, changed); 1502 return 0; 1503 } 1504 1505 bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) 1506 { 1507 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 1508 1509 if (msk->pm.extra_subflows == mptcp_pm_get_limit_extra_subflows(msk) || 1510 (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, 1511 MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { 1512 WRITE_ONCE(msk->pm.work_pending, false); 1513 return false; 1514 } 1515 return true; 1516 } 1517 1518 /* Called under PM lock */ 1519 void __mptcp_pm_kernel_worker(struct mptcp_sock *msk) 1520 { 1521 struct mptcp_pm_data *pm = &msk->pm; 1522 1523 if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { 1524 pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); 1525 mptcp_pm_nl_add_addr_received(msk); 1526 } 1527 if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { 1528 pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); 1529 mptcp_pm_nl_fully_established(msk); 1530 } 1531 if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { 1532 pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); 1533 mptcp_pm_nl_subflow_established(msk); 1534 } 1535 } 1536 1537 static int __net_init pm_nl_init_net(struct net *net) 1538 { 1539 struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 1540 1541 INIT_LIST_HEAD_RCU(&pernet->endp_list); 1542 1543 /* Cit. 2 subflows ought to be enough for anybody. */ 1544 pernet->limit_extra_subflows = 2; 1545 pernet->next_id = 1; 1546 spin_lock_init(&pernet->lock); 1547 1548 /* No need to initialize other pernet fields, the struct is zeroed at 1549 * allocation time. 1550 */ 1551 1552 return 0; 1553 } 1554 1555 static void __net_exit pm_nl_exit_net(struct list_head *net_list) 1556 { 1557 struct net *net; 1558 1559 list_for_each_entry(net, net_list, exit_list) { 1560 struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 1561 1562 /* net is removed from namespace list, can't race with 1563 * other modifiers, also netns core already waited for a 1564 * RCU grace period. 1565 */ 1566 __flush_addrs(&pernet->endp_list); 1567 } 1568 } 1569 1570 static struct pernet_operations mptcp_pm_pernet_ops = { 1571 .init = pm_nl_init_net, 1572 .exit_batch = pm_nl_exit_net, 1573 .id = &pm_nl_pernet_id, 1574 .size = sizeof(struct pm_nl_pernet), 1575 }; 1576 1577 struct mptcp_pm_ops mptcp_pm_kernel = { 1578 .name = "kernel", 1579 .owner = THIS_MODULE, 1580 }; 1581 1582 void __init mptcp_pm_kernel_register(void) 1583 { 1584 if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) 1585 panic("Failed to register MPTCP PM pernet subsystem.\n"); 1586 1587 mptcp_pm_register(&mptcp_pm_kernel); 1588 } 1589