1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2019, Intel Corporation. 5 */ 6 #define pr_fmt(fmt) "MPTCP: " fmt 7 8 #include <linux/kernel.h> 9 #include <net/tcp.h> 10 #include <net/mptcp.h> 11 #include "protocol.h" 12 13 #include "mib.h" 14 15 /* path manager command handlers */ 16 17 int mptcp_pm_announce_addr(struct mptcp_sock *msk, 18 const struct mptcp_addr_info *addr, 19 bool echo) 20 { 21 u8 add_addr = READ_ONCE(msk->pm.addr_signal); 22 23 pr_debug("msk=%p, local_id=%d, echo=%d", msk, addr->id, echo); 24 25 lockdep_assert_held(&msk->pm.lock); 26 27 if (add_addr & 28 (echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) { 29 pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo); 30 return -EINVAL; 31 } 32 33 if (echo) { 34 msk->pm.remote = *addr; 35 add_addr |= BIT(MPTCP_ADD_ADDR_ECHO); 36 } else { 37 msk->pm.local = *addr; 38 add_addr |= BIT(MPTCP_ADD_ADDR_SIGNAL); 39 } 40 WRITE_ONCE(msk->pm.addr_signal, add_addr); 41 return 0; 42 } 43 44 int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) 45 { 46 u8 rm_addr = READ_ONCE(msk->pm.addr_signal); 47 48 pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); 49 50 if (rm_addr) { 51 pr_warn("addr_signal error, rm_addr=%d", rm_addr); 52 return -EINVAL; 53 } 54 55 msk->pm.rm_list_tx = *rm_list; 56 rm_addr |= BIT(MPTCP_RM_ADDR_SIGNAL); 57 WRITE_ONCE(msk->pm.addr_signal, rm_addr); 58 mptcp_pm_nl_addr_send_ack(msk); 59 return 0; 60 } 61 62 int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) 63 { 64 pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); 65 66 spin_lock_bh(&msk->pm.lock); 67 mptcp_pm_nl_rm_subflow_received(msk, rm_list); 68 spin_unlock_bh(&msk->pm.lock); 69 return 0; 70 } 71 72 /* path manager event handlers */ 73 74 void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side) 75 { 76 struct mptcp_pm_data *pm = &msk->pm; 77 78 pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side); 79 80 WRITE_ONCE(pm->server_side, server_side); 81 mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC); 82 } 83 84 bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk) 85 { 86 struct mptcp_pm_data *pm = &msk->pm; 87 unsigned int subflows_max; 88 int ret = 0; 89 90 subflows_max = mptcp_pm_get_subflows_max(msk); 91 92 pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, 93 subflows_max, READ_ONCE(pm->accept_subflow)); 94 95 /* try to avoid acquiring the lock below */ 96 if (!READ_ONCE(pm->accept_subflow)) 97 return false; 98 99 spin_lock_bh(&pm->lock); 100 if (READ_ONCE(pm->accept_subflow)) { 101 ret = pm->subflows < subflows_max; 102 if (ret && ++pm->subflows == subflows_max) 103 WRITE_ONCE(pm->accept_subflow, false); 104 } 105 spin_unlock_bh(&pm->lock); 106 107 return ret; 108 } 109 110 /* return true if the new status bit is currently cleared, that is, this event 111 * can be server, eventually by an already scheduled work 112 */ 113 static bool mptcp_pm_schedule_work(struct mptcp_sock *msk, 114 enum mptcp_pm_status new_status) 115 { 116 pr_debug("msk=%p status=%x new=%lx", msk, msk->pm.status, 117 BIT(new_status)); 118 if (msk->pm.status & BIT(new_status)) 119 return false; 120 121 msk->pm.status |= BIT(new_status); 122 mptcp_schedule_work((struct sock *)msk); 123 return true; 124 } 125 126 void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp) 127 { 128 struct mptcp_pm_data *pm = &msk->pm; 129 bool announce = false; 130 131 pr_debug("msk=%p", msk); 132 133 spin_lock_bh(&pm->lock); 134 135 /* mptcp_pm_fully_established() can be invoked by multiple 136 * racing paths - accept() and check_fully_established() 137 * be sure to serve this event only once. 138 */ 139 if (READ_ONCE(pm->work_pending) && 140 !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) 141 mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); 142 143 if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) 144 announce = true; 145 146 msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); 147 spin_unlock_bh(&pm->lock); 148 149 if (announce) 150 mptcp_event(MPTCP_EVENT_ESTABLISHED, msk, ssk, gfp); 151 } 152 153 void mptcp_pm_connection_closed(struct mptcp_sock *msk) 154 { 155 pr_debug("msk=%p", msk); 156 } 157 158 void mptcp_pm_subflow_established(struct mptcp_sock *msk) 159 { 160 struct mptcp_pm_data *pm = &msk->pm; 161 162 pr_debug("msk=%p", msk); 163 164 if (!READ_ONCE(pm->work_pending)) 165 return; 166 167 spin_lock_bh(&pm->lock); 168 169 if (READ_ONCE(pm->work_pending)) 170 mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); 171 172 spin_unlock_bh(&pm->lock); 173 } 174 175 void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, 176 const struct mptcp_subflow_context *subflow) 177 { 178 struct mptcp_pm_data *pm = &msk->pm; 179 bool update_subflows; 180 181 update_subflows = (ssk->sk_state == TCP_CLOSE) && 182 (subflow->request_join || subflow->mp_join); 183 if (!READ_ONCE(pm->work_pending) && !update_subflows) 184 return; 185 186 spin_lock_bh(&pm->lock); 187 if (update_subflows) 188 pm->subflows--; 189 190 /* Even if this subflow is not really established, tell the PM to try 191 * to pick the next ones, if possible. 192 */ 193 if (mptcp_pm_nl_check_work_pending(msk)) 194 mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); 195 196 spin_unlock_bh(&pm->lock); 197 } 198 199 void mptcp_pm_add_addr_received(struct mptcp_sock *msk, 200 const struct mptcp_addr_info *addr) 201 { 202 struct mptcp_pm_data *pm = &msk->pm; 203 204 pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, 205 READ_ONCE(pm->accept_addr)); 206 207 mptcp_event_addr_announced(msk, addr); 208 209 spin_lock_bh(&pm->lock); 210 211 if (!READ_ONCE(pm->accept_addr)) { 212 mptcp_pm_announce_addr(msk, addr, true); 213 mptcp_pm_add_addr_send_ack(msk); 214 } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) { 215 pm->remote = *addr; 216 } else { 217 __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); 218 } 219 220 spin_unlock_bh(&pm->lock); 221 } 222 223 void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, 224 const struct mptcp_addr_info *addr) 225 { 226 struct mptcp_pm_data *pm = &msk->pm; 227 228 pr_debug("msk=%p", msk); 229 230 spin_lock_bh(&pm->lock); 231 232 if (mptcp_lookup_anno_list_by_saddr(msk, addr) && READ_ONCE(pm->work_pending)) 233 mptcp_pm_schedule_work(msk, MPTCP_PM_SUBFLOW_ESTABLISHED); 234 235 spin_unlock_bh(&pm->lock); 236 } 237 238 void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) 239 { 240 if (!mptcp_pm_should_add_signal(msk)) 241 return; 242 243 mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); 244 } 245 246 void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, 247 const struct mptcp_rm_list *rm_list) 248 { 249 struct mptcp_pm_data *pm = &msk->pm; 250 u8 i; 251 252 pr_debug("msk=%p remote_ids_nr=%d", msk, rm_list->nr); 253 254 for (i = 0; i < rm_list->nr; i++) 255 mptcp_event_addr_removed(msk, rm_list->ids[i]); 256 257 spin_lock_bh(&pm->lock); 258 if (mptcp_pm_schedule_work(msk, MPTCP_PM_RM_ADDR_RECEIVED)) 259 pm->rm_list_rx = *rm_list; 260 else 261 __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_RMADDRDROP); 262 spin_unlock_bh(&pm->lock); 263 } 264 265 void mptcp_pm_mp_prio_received(struct sock *ssk, u8 bkup) 266 { 267 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 268 struct sock *sk = subflow->conn; 269 struct mptcp_sock *msk; 270 271 pr_debug("subflow->backup=%d, bkup=%d\n", subflow->backup, bkup); 272 msk = mptcp_sk(sk); 273 if (subflow->backup != bkup) { 274 subflow->backup = bkup; 275 mptcp_data_lock(sk); 276 if (!sock_owned_by_user(sk)) 277 msk->last_snd = NULL; 278 else 279 __set_bit(MPTCP_RESET_SCHEDULER, &msk->cb_flags); 280 mptcp_data_unlock(sk); 281 } 282 283 mptcp_event(MPTCP_EVENT_SUB_PRIORITY, msk, ssk, GFP_ATOMIC); 284 } 285 286 void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) 287 { 288 pr_debug("fail_seq=%llu", fail_seq); 289 } 290 291 /* path manager helpers */ 292 293 bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb, 294 unsigned int opt_size, unsigned int remaining, 295 struct mptcp_addr_info *addr, bool *echo, 296 bool *drop_other_suboptions) 297 { 298 int ret = false; 299 u8 add_addr; 300 u8 family; 301 bool port; 302 303 spin_lock_bh(&msk->pm.lock); 304 305 /* double check after the lock is acquired */ 306 if (!mptcp_pm_should_add_signal(msk)) 307 goto out_unlock; 308 309 /* always drop every other options for pure ack ADD_ADDR; this is a 310 * plain dup-ack from TCP perspective. The other MPTCP-relevant info, 311 * if any, will be carried by the 'original' TCP ack 312 */ 313 if (skb && skb_is_tcp_pure_ack(skb)) { 314 remaining += opt_size; 315 *drop_other_suboptions = true; 316 } 317 318 *echo = mptcp_pm_should_add_signal_echo(msk); 319 port = !!(*echo ? msk->pm.remote.port : msk->pm.local.port); 320 321 family = *echo ? msk->pm.remote.family : msk->pm.local.family; 322 if (remaining < mptcp_add_addr_len(family, *echo, port)) 323 goto out_unlock; 324 325 if (*echo) { 326 *addr = msk->pm.remote; 327 add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_ECHO); 328 } else { 329 *addr = msk->pm.local; 330 add_addr = msk->pm.addr_signal & ~BIT(MPTCP_ADD_ADDR_SIGNAL); 331 } 332 WRITE_ONCE(msk->pm.addr_signal, add_addr); 333 ret = true; 334 335 out_unlock: 336 spin_unlock_bh(&msk->pm.lock); 337 return ret; 338 } 339 340 bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, 341 struct mptcp_rm_list *rm_list) 342 { 343 int ret = false, len; 344 u8 rm_addr; 345 346 spin_lock_bh(&msk->pm.lock); 347 348 /* double check after the lock is acquired */ 349 if (!mptcp_pm_should_rm_signal(msk)) 350 goto out_unlock; 351 352 rm_addr = msk->pm.addr_signal & ~BIT(MPTCP_RM_ADDR_SIGNAL); 353 len = mptcp_rm_addr_len(&msk->pm.rm_list_tx); 354 if (len < 0) { 355 WRITE_ONCE(msk->pm.addr_signal, rm_addr); 356 goto out_unlock; 357 } 358 if (remaining < len) 359 goto out_unlock; 360 361 *rm_list = msk->pm.rm_list_tx; 362 WRITE_ONCE(msk->pm.addr_signal, rm_addr); 363 ret = true; 364 365 out_unlock: 366 spin_unlock_bh(&msk->pm.lock); 367 return ret; 368 } 369 370 int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) 371 { 372 return mptcp_pm_nl_get_local_id(msk, skc); 373 } 374 375 void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) 376 { 377 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 378 u32 rcv_tstamp = READ_ONCE(tcp_sk(ssk)->rcv_tstamp); 379 380 /* keep track of rtx periods with no progress */ 381 if (!subflow->stale_count) { 382 subflow->stale_rcv_tstamp = rcv_tstamp; 383 subflow->stale_count++; 384 } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { 385 if (subflow->stale_count < U8_MAX) 386 subflow->stale_count++; 387 mptcp_pm_nl_subflow_chk_stale(msk, ssk); 388 } else { 389 subflow->stale_count = 0; 390 mptcp_subflow_set_active(subflow); 391 } 392 } 393 394 void mptcp_pm_data_reset(struct mptcp_sock *msk) 395 { 396 msk->pm.add_addr_signaled = 0; 397 msk->pm.add_addr_accepted = 0; 398 msk->pm.local_addr_used = 0; 399 msk->pm.subflows = 0; 400 msk->pm.rm_list_tx.nr = 0; 401 msk->pm.rm_list_rx.nr = 0; 402 WRITE_ONCE(msk->pm.work_pending, false); 403 WRITE_ONCE(msk->pm.addr_signal, 0); 404 WRITE_ONCE(msk->pm.accept_addr, false); 405 WRITE_ONCE(msk->pm.accept_subflow, false); 406 WRITE_ONCE(msk->pm.remote_deny_join_id0, false); 407 msk->pm.status = 0; 408 bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 409 410 mptcp_pm_nl_data_init(msk); 411 } 412 413 void mptcp_pm_data_init(struct mptcp_sock *msk) 414 { 415 spin_lock_init(&msk->pm.lock); 416 INIT_LIST_HEAD(&msk->pm.anno_list); 417 mptcp_pm_data_reset(msk); 418 } 419 420 void __init mptcp_pm_init(void) 421 { 422 mptcp_pm_nl_init(); 423 } 424