1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2019, Intel Corporation. 5 */ 6 #define pr_fmt(fmt) "MPTCP: " fmt 7 8 #include "protocol.h" 9 #include "mib.h" 10 11 #define ADD_ADDR_RETRANS_MAX 3 12 13 struct mptcp_pm_add_entry { 14 struct list_head list; 15 struct mptcp_addr_info addr; 16 u8 retrans_times; 17 struct timer_list add_timer; 18 struct mptcp_sock *sock; 19 }; 20 21 /* path manager helpers */ 22 23 /* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses, 24 * otherwise allow any matching local/remote pair 25 */ 26 bool mptcp_pm_addr_families_match(const struct sock *sk, 27 const struct mptcp_addr_info *loc, 28 const struct mptcp_addr_info *rem) 29 { 30 bool mptcp_is_v4 = sk->sk_family == AF_INET; 31 32 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 33 bool loc_is_v4 = loc->family == AF_INET || ipv6_addr_v4mapped(&loc->addr6); 34 bool rem_is_v4 = rem->family == AF_INET || ipv6_addr_v4mapped(&rem->addr6); 35 36 if (mptcp_is_v4) 37 return loc_is_v4 && rem_is_v4; 38 39 if (ipv6_only_sock(sk)) 40 return !loc_is_v4 && !rem_is_v4; 41 42 return loc_is_v4 == rem_is_v4; 43 #else 44 return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET; 45 #endif 46 } 47 48 bool mptcp_addresses_equal(const struct mptcp_addr_info *a, 49 const struct mptcp_addr_info *b, bool use_port) 50 { 51 bool addr_equals = false; 52 53 if (a->family == b->family) { 54 if (a->family == AF_INET) 55 addr_equals = a->addr.s_addr == b->addr.s_addr; 56 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 57 else 58 addr_equals = ipv6_addr_equal(&a->addr6, &b->addr6); 59 } else if (a->family == AF_INET) { 60 if (ipv6_addr_v4mapped(&b->addr6)) 61 addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3]; 62 } else if (b->family == AF_INET) { 63 if (ipv6_addr_v4mapped(&a->addr6)) 64 addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr; 65 #endif 66 } 67 68 if (!addr_equals) 69 return false; 70 if (!use_port) 71 return true; 72 73 return a->port == b->port; 74 } 75 76 void mptcp_local_address(const struct sock_common *skc, 77 struct mptcp_addr_info *addr) 78 { 79 addr->family = skc->skc_family; 80 addr->port = htons(skc->skc_num); 81 if (addr->family == AF_INET) 82 addr->addr.s_addr = skc->skc_rcv_saddr; 83 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 84 else if (addr->family == AF_INET6) 85 addr->addr6 = skc->skc_v6_rcv_saddr; 86 #endif 87 } 88 89 void mptcp_remote_address(const struct sock_common *skc, 90 struct mptcp_addr_info *addr) 91 { 92 addr->family = skc->skc_family; 93 addr->port = skc->skc_dport; 94 if (addr->family == AF_INET) 95 addr->addr.s_addr = skc->skc_daddr; 96 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 97 else if (addr->family == AF_INET6) 98 addr->addr6 = skc->skc_v6_daddr; 99 #endif 100 } 101 102 static bool mptcp_pm_is_init_remote_addr(struct mptcp_sock *msk, 103 const struct mptcp_addr_info *remote) 104 { 105 struct mptcp_addr_info mpc_remote; 106 107 mptcp_remote_address((struct sock_common *)msk, &mpc_remote); 108 return mptcp_addresses_equal(&mpc_remote, remote, remote->port); 109 } 110 111 bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, 112 const struct mptcp_addr_info *saddr) 113 { 114 struct mptcp_subflow_context *subflow; 115 struct mptcp_addr_info cur; 116 struct sock_common *skc; 117 118 list_for_each_entry(subflow, list, node) { 119 skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); 120 121 mptcp_local_address(skc, &cur); 122 if (mptcp_addresses_equal(&cur, saddr, saddr->port)) 123 return true; 124 } 125 126 return false; 127 } 128 129 static struct mptcp_pm_add_entry * 130 mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, 131 const struct mptcp_addr_info *addr) 132 { 133 struct mptcp_pm_add_entry *entry; 134 135 lockdep_assert_held(&msk->pm.lock); 136 137 list_for_each_entry(entry, &msk->pm.anno_list, list) { 138 if (mptcp_addresses_equal(&entry->addr, addr, true)) 139 return entry; 140 } 141 142 return NULL; 143 } 144 145 bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, 146 const struct mptcp_addr_info *addr) 147 { 148 struct mptcp_pm_add_entry *entry; 149 150 entry = mptcp_pm_del_add_timer(msk, addr, false); 151 kfree(entry); 152 return entry; 153 } 154 155 bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) 156 { 157 struct mptcp_pm_add_entry *entry; 158 struct mptcp_addr_info saddr; 159 bool ret = false; 160 161 mptcp_local_address((struct sock_common *)sk, &saddr); 162 163 spin_lock_bh(&msk->pm.lock); 164 list_for_each_entry(entry, &msk->pm.anno_list, list) { 165 if (mptcp_addresses_equal(&entry->addr, &saddr, true)) { 166 ret = true; 167 goto out; 168 } 169 } 170 171 out: 172 spin_unlock_bh(&msk->pm.lock); 173 return ret; 174 } 175 176 static void __mptcp_pm_send_ack(struct mptcp_sock *msk, 177 struct mptcp_subflow_context *subflow, 178 bool prio, bool backup) 179 { 180 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 181 bool slow; 182 183 pr_debug("send ack for %s\n", 184 prio ? "mp_prio" : 185 (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); 186 187 slow = lock_sock_fast(ssk); 188 if (prio) { 189 subflow->send_mp_prio = 1; 190 subflow->request_bkup = backup; 191 } 192 193 __mptcp_subflow_send_ack(ssk); 194 unlock_sock_fast(ssk, slow); 195 } 196 197 void mptcp_pm_send_ack(struct mptcp_sock *msk, 198 struct mptcp_subflow_context *subflow, 199 bool prio, bool backup) 200 { 201 spin_unlock_bh(&msk->pm.lock); 202 __mptcp_pm_send_ack(msk, subflow, prio, backup); 203 spin_lock_bh(&msk->pm.lock); 204 } 205 206 void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) 207 { 208 struct mptcp_subflow_context *subflow, *alt = NULL; 209 210 msk_owned_by_me(msk); 211 lockdep_assert_held(&msk->pm.lock); 212 213 if (!mptcp_pm_should_add_signal(msk) && 214 !mptcp_pm_should_rm_signal(msk)) 215 return; 216 217 mptcp_for_each_subflow(msk, subflow) { 218 if (__mptcp_subflow_active(subflow)) { 219 if (!subflow->stale) { 220 mptcp_pm_send_ack(msk, subflow, false, false); 221 return; 222 } 223 224 if (!alt) 225 alt = subflow; 226 } 227 } 228 229 if (alt) 230 mptcp_pm_send_ack(msk, alt, false, false); 231 } 232 233 int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, 234 struct mptcp_addr_info *addr, 235 struct mptcp_addr_info *rem, 236 u8 bkup) 237 { 238 struct mptcp_subflow_context *subflow; 239 240 pr_debug("bkup=%d\n", bkup); 241 242 mptcp_for_each_subflow(msk, subflow) { 243 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 244 struct mptcp_addr_info local, remote; 245 246 mptcp_local_address((struct sock_common *)ssk, &local); 247 if (!mptcp_addresses_equal(&local, addr, addr->port)) 248 continue; 249 250 if (rem && rem->family != AF_UNSPEC) { 251 mptcp_remote_address((struct sock_common *)ssk, &remote); 252 if (!mptcp_addresses_equal(&remote, rem, rem->port)) 253 continue; 254 } 255 256 __mptcp_pm_send_ack(msk, subflow, true, bkup); 257 return 0; 258 } 259 260 return -EINVAL; 261 } 262 263 static void mptcp_pm_add_timer(struct timer_list *timer) 264 { 265 struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); 266 struct mptcp_sock *msk = entry->sock; 267 struct sock *sk = (struct sock *)msk; 268 269 pr_debug("msk=%p\n", msk); 270 271 if (!msk) 272 return; 273 274 if (inet_sk_state_load(sk) == TCP_CLOSE) 275 return; 276 277 if (!entry->addr.id) 278 return; 279 280 if (mptcp_pm_should_add_signal_addr(msk)) { 281 sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); 282 goto out; 283 } 284 285 spin_lock_bh(&msk->pm.lock); 286 287 if (!mptcp_pm_should_add_signal_addr(msk)) { 288 pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); 289 mptcp_pm_announce_addr(msk, &entry->addr, false); 290 mptcp_pm_add_addr_send_ack(msk); 291 entry->retrans_times++; 292 } 293 294 if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) 295 sk_reset_timer(sk, timer, 296 jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); 297 298 spin_unlock_bh(&msk->pm.lock); 299 300 if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) 301 mptcp_pm_subflow_established(msk); 302 303 out: 304 __sock_put(sk); 305 } 306 307 struct mptcp_pm_add_entry * 308 mptcp_pm_del_add_timer(struct mptcp_sock *msk, 309 const struct mptcp_addr_info *addr, bool check_id) 310 { 311 struct mptcp_pm_add_entry *entry; 312 struct sock *sk = (struct sock *)msk; 313 struct timer_list *add_timer = NULL; 314 315 spin_lock_bh(&msk->pm.lock); 316 entry = mptcp_lookup_anno_list_by_saddr(msk, addr); 317 if (entry && (!check_id || entry->addr.id == addr->id)) { 318 entry->retrans_times = ADD_ADDR_RETRANS_MAX; 319 add_timer = &entry->add_timer; 320 } 321 if (!check_id && entry) 322 list_del(&entry->list); 323 spin_unlock_bh(&msk->pm.lock); 324 325 /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */ 326 if (add_timer) 327 sk_stop_timer_sync(sk, add_timer); 328 329 return entry; 330 } 331 332 bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, 333 const struct mptcp_addr_info *addr) 334 { 335 struct mptcp_pm_add_entry *add_entry = NULL; 336 struct sock *sk = (struct sock *)msk; 337 struct net *net = sock_net(sk); 338 339 lockdep_assert_held(&msk->pm.lock); 340 341 add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); 342 343 if (add_entry) { 344 if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) 345 return false; 346 347 sk_reset_timer(sk, &add_entry->add_timer, 348 jiffies + mptcp_get_add_addr_timeout(net)); 349 return true; 350 } 351 352 add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); 353 if (!add_entry) 354 return false; 355 356 list_add(&add_entry->list, &msk->pm.anno_list); 357 358 add_entry->addr = *addr; 359 add_entry->sock = msk; 360 add_entry->retrans_times = 0; 361 362 timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); 363 sk_reset_timer(sk, &add_entry->add_timer, 364 jiffies + mptcp_get_add_addr_timeout(net)); 365 366 return true; 367 } 368 369 static void mptcp_pm_free_anno_list(struct mptcp_sock *msk) 370 { 371 struct mptcp_pm_add_entry *entry, *tmp; 372 struct sock *sk = (struct sock *)msk; 373 LIST_HEAD(free_list); 374 375 pr_debug("msk=%p\n", msk); 376 377 spin_lock_bh(&msk->pm.lock); 378 list_splice_init(&msk->pm.anno_list, &free_list); 379 spin_unlock_bh(&msk->pm.lock); 380 381 list_for_each_entry_safe(entry, tmp, &free_list, list) { 382 sk_stop_timer_sync(sk, &entry->add_timer); 383 kfree(entry); 384 } 385 } 386 387 /* path manager command handlers */ 388 389 int mptcp_pm_announce_addr(struct mptcp_sock *msk, 390 const struct mptcp_addr_info *addr, 391 bool echo) 392 { 393 u8 add_addr = READ_ONCE(msk->pm.addr_signal); 394 395 pr_debug("msk=%p, local_id=%d, echo=%d\n", msk, addr->id, echo); 396 397 lockdep_assert_held(&msk->pm.lock); 398 399 if (add_addr & 400 (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) { 401 MPTCP_INC_STATS(sock_net((struct sock *)msk), 402 echo ? MPTCP_MIB_ECHOADDTXDROP : MPTCP_MIB_ADDADDRTXDROP); 403 return -EINVAL; 404 } 405 406 if (echo) { 407 msk->pm.remote = *addr; 408 add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); 409 } else { 410 msk->pm.local = *addr; 411 add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); 412 } 413 WRITE_ONCE(msk->pm.addr_signal, add_addr); 414 return 0; 415 } 416 417 int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) 418 { 419 u8 rm_addr = READ_ONCE(msk->pm.addr_signal); 420 421 pr_debug("msk=%p, rm_list_nr=%d\n", msk, rm_list->nr); 422 423 if (rm_addr) { 424 MPTCP_ADD_STATS(sock_net((struct sock *)msk), 425 MPTCP_MIB_RMADDRTXDROP, rm_list->nr); 426 return -EINVAL; 427 } 428 429 msk->pm.rm_list_tx = *rm_list; 430 rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); 431 WRITE_ONCE(msk->pm.addr_signal, rm_addr); 432 mptcp_pm_addr_send_ack(msk); 433 return 0; 434 } 435 436 /* path manager event handlers */ 437 438 void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side) 439 { 440 struct mptcp_pm_data *pm = &msk->pm; 441 442 pr_debug("msk=%p, token=%u side=%d\n", msk, READ_ONCE(msk->token), server_side); 443 444 WRITE_ONCE(pm->server_side, server_side); 445 mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC); 446 } 447 448 bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) 449 { 450 struct mptcp_pm_data *pm = &msk->pm; 451 unsigned int subflows_max; 452 int ret = 0; 453 454 if (mptcp_pm_is_userspace(msk)) { 455 if (mptcp_userspace_pm_active(msk)) { 456 spin_lock_bh(&pm->lock); 457 pm->subflows++; 458 spin_unlock_bh(&pm->lock); 459 return true; 460 } 461 return false; 462 } 463 464 subflows_max = mptcp_pm_get_subflows_max(msk); 465 466 pr_debug("msk=%p subflows=%d max=%d allow=%d\n", msk, pm->subflows, 467 subflows_max, READ_ONCE(pm->accept_subflow)); 468 469 /* try to avoid acquiring the lock below */ 470 if (!READ_ONCE(pm->accept_subflow)) 471 return false; 472 473 spin_lock_bh(&pm->lock); 474 if (READ_ONCE(pm->accept_subflow)) { 475 ret = pm->subflows < subflows_max; 476 if (ret && ++pm->subflows == subflows_max) 477 WRITE_ONCE(pm->accept_subflow, false); 478 } 479 spin_unlock_bh(&pm->lock); 480 481 return ret; 482 } 483 484 /* return true if the new status bit is currently cleared, that is, this event 485 * can be server, eventually by an already scheduled work 486 */ 487 static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, 488 enum mptcp_pm_status new_status) 489 { 490 pr_debug("msk=%p status=%x new=%lx\n", msk, msk->pm.status, 491 BIT(new_status)); 492 if (msk->pm.status & BIT(new_status)) 493 return false; 494 495 msk->pm.status |= BIT(new_status); 496 mptcp_schedule_work((struct sock *)msk); 497 return true; 498 } 499 500 void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) 501 { 502 struct mptcp_pm_data *pm = &msk->pm; 503 bool announce = false; 504 505 pr_debug("msk=%p\n", msk); 506 507 spin_lock_bh(&pm->lock); 508 509 /* mptcp_pm_fully_established() can be invoked by multiple 510 * racing paths - accept() and check_fully_established() 511 * be sure to serve this event only once. 512 */ 513 if (READ_ONCE(pm->work_pending) && 514 !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) 515 mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); 516 517 if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) 518 announce = true; 519 520 msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); 521 spin_unlock_bh(&pm->lock); 522 523 if (announce) 524 mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, GFP_ATOMIC); 525 } 526 527 void mptcp_pm_connection_closed(struct mptcp_sock *msk) 528 { 529 pr_debug("msk=%p\n", msk); 530 531 if (msk->token) 532 mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); 533 } 534 535 void mptcp_pm_subflow_established(struct mptcp_sock *msk) 536 { 537 struct mptcp_pm_data *pm = &msk->pm; 538 539 pr_debug("msk=%p\n", msk); 540 541 if (!READ_ONCE(pm->work_pending)) 542 return; 543 544 spin_lock_bh(&pm->lock); 545 546 if (READ_ONCE(pm->work_pending)) 547 mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); 548 549 spin_unlock_bh(&pm->lock); 550 } 551 552 void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, 553 const struct mptcp_subflow_context *subflow) 554 { 555 struct mptcp_pm_data *pm = &msk->pm; 556 bool update_subflows; 557 558 update_subflows = subflow->request_join || subflow->mp_join; 559 if (mptcp_pm_is_userspace(msk)) { 560 if (update_subflows) { 561 spin_lock_bh(&pm->lock); 562 pm->subflows--; 563 spin_unlock_bh(&pm->lock); 564 } 565 return; 566 } 567 568 if (!READ_ONCE(pm->work_pending) && !update_subflows) 569 return; 570 571 spin_lock_bh(&pm->lock); 572 if (update_subflows) 573 __mptcp_pm_close_subflow(msk); 574 575 /* Even if this subflow is not really established, tell the PM to try 576 * to pick the next ones, if possible. 577 */ 578 if (mptcp_pm_nl_check_work_pending(msk)) 579 mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); 580 581 spin_unlock_bh(&pm->lock); 582 } 583 584 void mptcp_pm_add_addr_received(const struct sock *ssk, 585 const struct mptcp_addr_info *addr) 586 { 587 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 588 struct mptcp_sock *msk = mptcp_sk(subflow->conn); 589 struct mptcp_pm_data *pm = &msk->pm; 590 591 pr_debug("msk=%p remote_id=%d accept=%d\n", msk, addr->id, 592 READ_ONCE(pm->accept_addr)); 593 594 mptcp_event_addr_announced(ssk, addr); 595 596 spin_lock_bh(&pm->lock); 597 598 if (mptcp_pm_is_userspace(msk)) { 599 if (mptcp_userspace_pm_active(msk)) { 600 mptcp_pm_announce_addr(msk, addr, true); 601 mptcp_pm_add_addr_send_ack(msk); 602 } else { 603 __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); 604 } 605 /* id0 should not have a different address */ 606 } else if ((addr->id == 0 && !mptcp_pm_is_init_remote_addr(msk, addr)) || 607 (addr->id > 0 && !READ_ONCE(pm->accept_addr))) { 608 mptcp_pm_announce_addr(msk, addr, true); 609 mptcp_pm_add_addr_send_ack(msk); 610 } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { 611 pm->remote = *addr; 612 } else { 613 __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); 614 } 615 616 spin_unlock_bh(&pm->lock); 617 } 618 619 void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, 620 const struct mptcp_addr_info *addr) 621 { 622 struct mptcp_pm_data *pm = &msk->pm; 623 624 pr_debug("msk=%p\n", msk); 625 626 if (!READ_ONCE(pm->work_pending)) 627 return; 628 629 spin_lock_bh(&pm->lock); 630 631 if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending)) 632 mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); 633 634 spin_unlock_bh(&pm->lock); 635 } 636 637 void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) 638 { 639 if (!mptcp_pm_should_add_signal(msk)) 640 return; 641 642 mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); 643 } 644 645 static void mptcp_pm_rm_addr_or_subflow(struct mptcp_sock *msk, 646 const struct mptcp_rm_list *rm_list, 647 enum linux_mptcp_mib_field rm_type) 648 { 649 struct mptcp_subflow_context *subflow, *tmp; 650 struct sock *sk = (struct sock *)msk; 651 u8 i; 652 653 pr_debug("%s rm_list_nr %d\n", 654 rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); 655 656 msk_owned_by_me(msk); 657 658 if (sk->sk_state == TCP_LISTEN) 659 return; 660 661 if (!rm_list->nr) 662 return; 663 664 if (list_empty(&msk->conn_list)) 665 return; 666 667 for (i = 0; i < rm_list->nr; i++) { 668 u8 rm_id = rm_list->ids[i]; 669 bool removed = false; 670 671 mptcp_for_each_subflow_safe(msk, subflow, tmp) { 672 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 673 u8 remote_id = READ_ONCE(subflow->remote_id); 674 int how = RCV_SHUTDOWN | SEND_SHUTDOWN; 675 u8 id = subflow_get_local_id(subflow); 676 677 if ((1 << inet_sk_state_load(ssk)) & 678 (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE)) 679 continue; 680 if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) 681 continue; 682 if (rm_type == MPTCP_MIB_RMSUBFLOW && id != rm_id) 683 continue; 684 685 pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u\n", 686 rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", 687 i, rm_id, id, remote_id, msk->mpc_endpoint_id); 688 spin_unlock_bh(&msk->pm.lock); 689 mptcp_subflow_shutdown(sk, ssk, how); 690 removed |= subflow->request_join; 691 692 /* the following takes care of updating the subflows counter */ 693 mptcp_close_ssk(sk, ssk, subflow); 694 spin_lock_bh(&msk->pm.lock); 695 696 if (rm_type == MPTCP_MIB_RMSUBFLOW) 697 __MPTCP_INC_STATS(sock_net(sk), rm_type); 698 } 699 700 if (rm_type == MPTCP_MIB_RMADDR) { 701 __MPTCP_INC_STATS(sock_net(sk), rm_type); 702 if (removed && mptcp_pm_is_kernel(msk)) 703 mptcp_pm_nl_rm_addr(msk, rm_id); 704 } 705 } 706 } 707 708 static void mptcp_pm_rm_addr_recv(struct mptcp_sock *msk) 709 { 710 mptcp_pm_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); 711 } 712 713 void mptcp_pm_rm_subflow(struct mptcp_sock *msk, 714 const struct mptcp_rm_list *rm_list) 715 { 716 mptcp_pm_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); 717 } 718 719 void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, 720 const struct mptcp_rm_list *rm_list) 721 { 722 struct mptcp_pm_data *pm = &msk->pm; 723 u8 i; 724 725 pr_debug("msk=%p remote_ids_nr=%d\n", msk, rm_list->nr); 726 727 for (i = 0; i < rm_list->nr; i++) 728 mptcp_event_addr_removed(msk, rm_list->ids[i]); 729 730 spin_lock_bh(&pm->lock); 731 if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED)) 732 pm->rm_list_rx = *rm_list; 733 else 734 __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP); 735 spin_unlock_bh(&pm->lock); 736 } 737 738 void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup) 739 { 740 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 741 struct sock *sk = subflow->conn; 742 struct mptcp_sock *msk; 743 744 pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); 745 msk = mptcp_sk(sk); 746 if (subflow->backup != bkup) 747 subflow->backup = bkup; 748 749 mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC); 750 } 751 752 void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) 753 { 754 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 755 struct mptcp_sock *msk = mptcp_sk(subflow->conn); 756 757 pr_debug("fail_seq=%llu\n", fail_seq); 758 759 if (!READ_ONCE(msk->allow_infinite_fallback)) 760 return; 761 762 if (!subflow->fail_tout) { 763 pr_debug("send MP_FAIL response and infinite map\n"); 764 765 subflow->send_mp_fail = 1; 766 subflow->send_infinite_map = 1; 767 tcp_send_ack(sk); 768 } else { 769 pr_debug("MP_FAIL response received\n"); 770 WRITE_ONCE(subflow->fail_tout, 0); 771 } 772 } 773 774 bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, 775 unsigned int opt_size, unsigned int remaining, 776 struct mptcp_addr_info *addr, bool *echo, 777 bool *drop_other_suboptions) 778 { 779 int ret = false; 780 u8 add_addr; 781 u8 family; 782 bool port; 783 784 spin_lock_bh(&msk->pm.lock); 785 786 /* double check after the lock is acquired */ 787 if (!mptcp_pm_should_add_signal(msk)) 788 goto out_unlock; 789 790 /* always drop every other options for pure ack ADD_ADDR; this is a 791 * plain dup-ack from TCP perspective. The other MPTCP-relevant info, 792 * if any, will be carried by the 'original' TCP ack 793 */ 794 if (skb && skb_is_tcp_pure_ack(skb)) { 795 remaining += opt_size; 796 *drop_other_suboptions = true; 797 } 798 799 *echo = mptcp_pm_should_add_signal_echo(msk); 800 port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port); 801 802 family = *echo ? msk->pm.remote.family : msk->pm.local.family; 803 if (remaining < mptcp_add_addr_len(family, *echo, port)) 804 goto out_unlock; 805 806 if (*echo) { 807 *addr = msk->pm.remote; 808 add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO); 809 } else { 810 *addr = msk->pm.local; 811 add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL); 812 } 813 WRITE_ONCE(msk->pm.addr_signal, add_addr); 814 ret = true; 815 816 out_unlock: 817 spin_unlock_bh(&msk->pm.lock); 818 return ret; 819 } 820 821 bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, 822 struct mptcp_rm_list *rm_list) 823 { 824 int ret = false, len; 825 u8 rm_addr; 826 827 spin_lock_bh(&msk->pm.lock); 828 829 /* double check after the lock is acquired */ 830 if (!mptcp_pm_should_rm_signal(msk)) 831 goto out_unlock; 832 833 rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL); 834 len = mptcp_rm_addr_len(&msk->pm.rm_list_tx); 835 if (len < 0) { 836 WRITE_ONCE(msk->pm.addr_signal, rm_addr); 837 goto out_unlock; 838 } 839 if (remaining < len) 840 goto out_unlock; 841 842 *rm_list = msk->pm.rm_list_tx; 843 WRITE_ONCE(msk->pm.addr_signal, rm_addr); 844 ret = true; 845 846 out_unlock: 847 spin_unlock_bh(&msk->pm.lock); 848 return ret; 849 } 850 851 int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) 852 { 853 struct mptcp_pm_addr_entry skc_local = { 0 }; 854 struct mptcp_addr_info msk_local; 855 856 if (WARN_ON_ONCE(!msk)) 857 return -1; 858 859 /* The 0 ID mapping is defined by the first subflow, copied into the msk 860 * addr 861 */ 862 mptcp_local_address((struct sock_common *)msk, &msk_local); 863 mptcp_local_address((struct sock_common *)skc, &skc_local.addr); 864 if (mptcp_addresses_equal(&msk_local, &skc_local.addr, false)) 865 return 0; 866 867 skc_local.addr.id = 0; 868 skc_local.flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; 869 870 if (mptcp_pm_is_userspace(msk)) 871 return mptcp_userspace_pm_get_local_id(msk, &skc_local); 872 return mptcp_pm_nl_get_local_id(msk, &skc_local); 873 } 874 875 bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc) 876 { 877 struct mptcp_addr_info skc_local; 878 879 mptcp_local_address((struct sock_common *)skc, &skc_local); 880 881 if (mptcp_pm_is_userspace(msk)) 882 return mptcp_userspace_pm_is_backup(msk, &skc_local); 883 884 return mptcp_pm_nl_is_backup(msk, &skc_local); 885 } 886 887 static void mptcp_pm_subflows_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) 888 { 889 struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); 890 struct sock *sk = (struct sock *)msk; 891 unsigned int active_max_loss_cnt; 892 struct net *net = sock_net(sk); 893 unsigned int stale_loss_cnt; 894 bool slow; 895 896 stale_loss_cnt = mptcp_stale_loss_cnt(net); 897 if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) 898 return; 899 900 /* look for another available subflow not in loss state */ 901 active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); 902 mptcp_for_each_subflow(msk, iter) { 903 if (iter != subflow && mptcp_subflow_active(iter) && 904 iter->stale_count < active_max_loss_cnt) { 905 /* we have some alternatives, try to mark this subflow as idle ...*/ 906 slow = lock_sock_fast(ssk); 907 if (!tcp_rtx_and_write_queues_empty(ssk)) { 908 subflow->stale = 1; 909 __mptcp_retransmit_pending_data(sk); 910 MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE); 911 } 912 unlock_sock_fast(ssk, slow); 913 914 /* always try to push the pending data regardless of re-injections: 915 * we can possibly use backup subflows now, and subflow selection 916 * is cheap under the msk socket lock 917 */ 918 __mptcp_push_pending(sk, 0); 919 return; 920 } 921 } 922 } 923 924 void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) 925 { 926 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 927 u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp); 928 929 /* keep track of rtx periods with no progress */ 930 if (!subflow->stale_count) { 931 subflow->stale_rcv_tstamp = rcv_tstamp; 932 subflow->stale_count++; 933 } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { 934 if (subflow->stale_count < U8_MAX) 935 subflow->stale_count++; 936 mptcp_pm_subflows_chk_stale(msk, ssk); 937 } else { 938 subflow->stale_count = 0; 939 mptcp_subflow_set_active(subflow); 940 } 941 } 942 943 void mptcp_pm_worker(struct mptcp_sock *msk) 944 { 945 struct mptcp_pm_data *pm = &msk->pm; 946 947 msk_owned_by_me(msk); 948 949 if (!(pm->status & MPTCP_PM_WORK_MASK)) 950 return; 951 952 spin_lock_bh(&msk->pm.lock); 953 954 pr_debug("msk=%p status=%x\n", msk, pm->status); 955 if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { 956 pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); 957 mptcp_pm_addr_send_ack(msk); 958 } 959 if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { 960 pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); 961 mptcp_pm_rm_addr_recv(msk); 962 } 963 __mptcp_pm_kernel_worker(msk); 964 965 spin_unlock_bh(&msk->pm.lock); 966 } 967 968 void mptcp_pm_destroy(struct mptcp_sock *msk) 969 { 970 mptcp_pm_free_anno_list(msk); 971 972 if (mptcp_pm_is_userspace(msk)) 973 mptcp_userspace_pm_free_local_addr_list(msk); 974 } 975 976 void mptcp_pm_data_reset(struct mptcp_sock *msk) 977 { 978 u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk)); 979 struct mptcp_pm_data *pm = &msk->pm; 980 981 pm->add_addr_signaled = 0; 982 pm->add_addr_accepted = 0; 983 pm->local_addr_used = 0; 984 pm->subflows = 0; 985 pm->rm_list_tx.nr = 0; 986 pm->rm_list_rx.nr = 0; 987 WRITE_ONCE(pm->pm_type, pm_type); 988 989 if (pm_type == MPTCP_PM_TYPE_KERNEL) { 990 bool subflows_allowed = !!mptcp_pm_get_subflows_max(msk); 991 992 /* pm->work_pending must be only be set to 'true' when 993 * pm->pm_type is set to MPTCP_PM_TYPE_KERNEL 994 */ 995 WRITE_ONCE(pm->work_pending, 996 (!!mptcp_pm_get_local_addr_max(msk) && 997 subflows_allowed) || 998 !!mptcp_pm_get_add_addr_signal_max(msk)); 999 WRITE_ONCE(pm->accept_addr, 1000 !!mptcp_pm_get_add_addr_accept_max(msk) && 1001 subflows_allowed); 1002 WRITE_ONCE(pm->accept_subflow, subflows_allowed); 1003 } else { 1004 WRITE_ONCE(pm->work_pending, 0); 1005 WRITE_ONCE(pm->accept_addr, 0); 1006 WRITE_ONCE(pm->accept_subflow, 0); 1007 } 1008 1009 WRITE_ONCE(pm->addr_signal, 0); 1010 WRITE_ONCE(pm->remote_deny_join_id0, false); 1011 pm->status = 0; 1012 bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1013 } 1014 1015 void mptcp_pm_data_init(struct mptcp_sock *msk) 1016 { 1017 spin_lock_init(&msk->pm.lock); 1018 INIT_LIST_HEAD(&msk->pm.anno_list); 1019 INIT_LIST_HEAD(&msk->pm.userspace_pm_local_addr_list); 1020 mptcp_pm_data_reset(msk); 1021 } 1022 1023 void __init mptcp_pm_init(void) 1024 { 1025 mptcp_pm_nl_init(); 1026 } 1027