1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2019, Tessares SA. 5 */ 6 7 #ifdef CONFIG_SYSCTL 8 #include <linux/sysctl.h> 9 #endif 10 11 #include <net/net_namespace.h> 12 #include <net/netns/generic.h> 13 14 #include "protocol.h" 15 #include "mib.h" 16 17 #define MPTCP_SYSCTL_PATH "net/mptcp" 18 19 static int mptcp_pernet_id; 20 21 #ifdef CONFIG_SYSCTL 22 static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX; 23 #endif 24 25 struct mptcp_pernet { 26 #ifdef CONFIG_SYSCTL 27 struct ctl_table_header *ctl_table_hdr; 28 #endif 29 30 unsigned int add_addr_timeout; 31 unsigned int blackhole_timeout; 32 unsigned int close_timeout; 33 unsigned int stale_loss_cnt; 34 atomic_t active_disable_times; 35 u8 syn_retrans_before_tcp_fallback; 36 unsigned long active_disable_stamp; 37 u8 mptcp_enabled; 38 u8 checksum_enabled; 39 u8 allow_join_initial_addr_port; 40 u8 pm_type; 41 char scheduler[MPTCP_SCHED_NAME_MAX]; 42 }; 43 44 static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) 45 { 46 return net_generic(net, mptcp_pernet_id); 47 } 48 49 int mptcp_is_enabled(const struct net *net) 50 { 51 return mptcp_get_pernet(net)->mptcp_enabled; 52 } 53 54 unsigned int mptcp_get_add_addr_timeout(const struct net *net) 55 { 56 return mptcp_get_pernet(net)->add_addr_timeout; 57 } 58 59 int mptcp_is_checksum_enabled(const struct net *net) 60 { 61 return mptcp_get_pernet(net)->checksum_enabled; 62 } 63 64 int mptcp_allow_join_id0(const struct net *net) 65 { 66 return mptcp_get_pernet(net)->allow_join_initial_addr_port; 67 } 68 69 unsigned int mptcp_stale_loss_cnt(const struct net *net) 70 { 71 return mptcp_get_pernet(net)->stale_loss_cnt; 72 } 73 74 unsigned int mptcp_close_timeout(const struct sock *sk) 75 { 76 if (sock_flag(sk, SOCK_DEAD)) 77 return TCP_TIMEWAIT_LEN; 78 return mptcp_get_pernet(sock_net(sk))->close_timeout; 79 } 80 81 int mptcp_get_pm_type(const struct net *net) 82 { 83 return mptcp_get_pernet(net)->pm_type; 84 } 85 86 const char *mptcp_get_scheduler(const struct net *net) 87 { 88 return mptcp_get_pernet(net)->scheduler; 89 } 90 91 static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) 92 { 93 pernet->mptcp_enabled = 1; 94 pernet->add_addr_timeout = TCP_RTO_MAX; 95 pernet->blackhole_timeout = 3600; 96 pernet->syn_retrans_before_tcp_fallback = 2; 97 atomic_set(&pernet->active_disable_times, 0); 98 pernet->close_timeout = TCP_TIMEWAIT_LEN; 99 pernet->checksum_enabled = 0; 100 pernet->allow_join_initial_addr_port = 1; 101 pernet->stale_loss_cnt = 4; 102 pernet->pm_type = MPTCP_PM_TYPE_KERNEL; 103 strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler)); 104 } 105 106 #ifdef CONFIG_SYSCTL 107 static int mptcp_set_scheduler(char *scheduler, const char *name) 108 { 109 struct mptcp_sched_ops *sched; 110 int ret = 0; 111 112 rcu_read_lock(); 113 sched = mptcp_sched_find(name); 114 if (sched) 115 strscpy(scheduler, name, MPTCP_SCHED_NAME_MAX); 116 else 117 ret = -ENOENT; 118 rcu_read_unlock(); 119 120 return ret; 121 } 122 123 static int proc_scheduler(const struct ctl_table *ctl, int write, 124 void *buffer, size_t *lenp, loff_t *ppos) 125 { 126 char (*scheduler)[MPTCP_SCHED_NAME_MAX] = ctl->data; 127 char val[MPTCP_SCHED_NAME_MAX]; 128 struct ctl_table tbl = { 129 .data = val, 130 .maxlen = MPTCP_SCHED_NAME_MAX, 131 }; 132 int ret; 133 134 strscpy(val, *scheduler, MPTCP_SCHED_NAME_MAX); 135 136 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 137 if (write && ret == 0) 138 ret = mptcp_set_scheduler(*scheduler, val); 139 140 return ret; 141 } 142 143 static int proc_available_schedulers(const struct ctl_table *ctl, 144 int write, void *buffer, 145 size_t *lenp, loff_t *ppos) 146 { 147 struct ctl_table tbl = { .maxlen = MPTCP_SCHED_BUF_MAX, }; 148 int ret; 149 150 tbl.data = kmalloc(tbl.maxlen, GFP_USER); 151 if (!tbl.data) 152 return -ENOMEM; 153 154 mptcp_get_available_schedulers(tbl.data, MPTCP_SCHED_BUF_MAX); 155 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 156 kfree(tbl.data); 157 158 return ret; 159 } 160 161 static int proc_blackhole_detect_timeout(const struct ctl_table *table, 162 int write, void *buffer, size_t *lenp, 163 loff_t *ppos) 164 { 165 struct mptcp_pernet *pernet = container_of(table->data, 166 struct mptcp_pernet, 167 blackhole_timeout); 168 int ret; 169 170 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 171 if (write && ret == 0) 172 atomic_set(&pernet->active_disable_times, 0); 173 174 return ret; 175 } 176 177 static struct ctl_table mptcp_sysctl_table[] = { 178 { 179 .procname = "enabled", 180 .maxlen = sizeof(u8), 181 .mode = 0644, 182 /* users with CAP_NET_ADMIN or root (not and) can change this 183 * value, same as other sysctl or the 'net' tree. 184 */ 185 .proc_handler = proc_dou8vec_minmax, 186 .extra1 = SYSCTL_ZERO, 187 .extra2 = SYSCTL_ONE 188 }, 189 { 190 .procname = "add_addr_timeout", 191 .maxlen = sizeof(unsigned int), 192 .mode = 0644, 193 .proc_handler = proc_dointvec_jiffies, 194 }, 195 { 196 .procname = "checksum_enabled", 197 .maxlen = sizeof(u8), 198 .mode = 0644, 199 .proc_handler = proc_dou8vec_minmax, 200 .extra1 = SYSCTL_ZERO, 201 .extra2 = SYSCTL_ONE 202 }, 203 { 204 .procname = "allow_join_initial_addr_port", 205 .maxlen = sizeof(u8), 206 .mode = 0644, 207 .proc_handler = proc_dou8vec_minmax, 208 .extra1 = SYSCTL_ZERO, 209 .extra2 = SYSCTL_ONE 210 }, 211 { 212 .procname = "stale_loss_cnt", 213 .maxlen = sizeof(unsigned int), 214 .mode = 0644, 215 .proc_handler = proc_douintvec_minmax, 216 }, 217 { 218 .procname = "pm_type", 219 .maxlen = sizeof(u8), 220 .mode = 0644, 221 .proc_handler = proc_dou8vec_minmax, 222 .extra1 = SYSCTL_ZERO, 223 .extra2 = &mptcp_pm_type_max 224 }, 225 { 226 .procname = "scheduler", 227 .maxlen = MPTCP_SCHED_NAME_MAX, 228 .mode = 0644, 229 .proc_handler = proc_scheduler, 230 }, 231 { 232 .procname = "available_schedulers", 233 .maxlen = MPTCP_SCHED_BUF_MAX, 234 .mode = 0444, 235 .proc_handler = proc_available_schedulers, 236 }, 237 { 238 .procname = "close_timeout", 239 .maxlen = sizeof(unsigned int), 240 .mode = 0644, 241 .proc_handler = proc_dointvec_jiffies, 242 }, 243 { 244 .procname = "blackhole_timeout", 245 .maxlen = sizeof(unsigned int), 246 .mode = 0644, 247 .proc_handler = proc_blackhole_detect_timeout, 248 .extra1 = SYSCTL_ZERO, 249 }, 250 { 251 .procname = "syn_retrans_before_tcp_fallback", 252 .maxlen = sizeof(u8), 253 .mode = 0644, 254 .proc_handler = proc_dou8vec_minmax, 255 }, 256 }; 257 258 static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) 259 { 260 struct ctl_table_header *hdr; 261 struct ctl_table *table; 262 263 table = mptcp_sysctl_table; 264 if (!net_eq(net, &init_net)) { 265 table = kmemdup(table, sizeof(mptcp_sysctl_table), GFP_KERNEL); 266 if (!table) 267 goto err_alloc; 268 } 269 270 table[0].data = &pernet->mptcp_enabled; 271 table[1].data = &pernet->add_addr_timeout; 272 table[2].data = &pernet->checksum_enabled; 273 table[3].data = &pernet->allow_join_initial_addr_port; 274 table[4].data = &pernet->stale_loss_cnt; 275 table[5].data = &pernet->pm_type; 276 table[6].data = &pernet->scheduler; 277 /* table[7] is for available_schedulers which is read-only info */ 278 table[8].data = &pernet->close_timeout; 279 table[9].data = &pernet->blackhole_timeout; 280 table[10].data = &pernet->syn_retrans_before_tcp_fallback; 281 282 hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, 283 ARRAY_SIZE(mptcp_sysctl_table)); 284 if (!hdr) 285 goto err_reg; 286 287 pernet->ctl_table_hdr = hdr; 288 289 return 0; 290 291 err_reg: 292 if (!net_eq(net, &init_net)) 293 kfree(table); 294 err_alloc: 295 return -ENOMEM; 296 } 297 298 static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) 299 { 300 const struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg; 301 302 unregister_net_sysctl_table(pernet->ctl_table_hdr); 303 304 kfree(table); 305 } 306 307 #else 308 309 static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) 310 { 311 return 0; 312 } 313 314 static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {} 315 316 #endif /* CONFIG_SYSCTL */ 317 318 /* The following code block is to deal with middle box issues with MPTCP, 319 * similar to what is done with TFO. 320 * The proposed solution is to disable active MPTCP globally when SYN+MPC are 321 * dropped, while SYN without MPC aren't. In this case, active side MPTCP is 322 * disabled globally for 1hr at first. Then if it happens again, it is disabled 323 * for 2h, then 4h, 8h, ... 324 * The timeout is reset back to 1hr when a successful active MPTCP connection is 325 * fully established. 326 */ 327 328 /* Disable active MPTCP and record current jiffies and active_disable_times */ 329 void mptcp_active_disable(struct sock *sk) 330 { 331 struct net *net = sock_net(sk); 332 struct mptcp_pernet *pernet; 333 334 pernet = mptcp_get_pernet(net); 335 336 if (!READ_ONCE(pernet->blackhole_timeout)) 337 return; 338 339 /* Paired with READ_ONCE() in mptcp_active_should_disable() */ 340 WRITE_ONCE(pernet->active_disable_stamp, jiffies); 341 342 /* Paired with smp_rmb() in mptcp_active_should_disable(). 343 * We want pernet->active_disable_stamp to be updated first. 344 */ 345 smp_mb__before_atomic(); 346 atomic_inc(&pernet->active_disable_times); 347 348 MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE); 349 } 350 351 /* Calculate timeout for MPTCP active disable 352 * Return true if we are still in the active MPTCP disable period 353 * Return false if timeout already expired and we should use active MPTCP 354 */ 355 bool mptcp_active_should_disable(struct sock *ssk) 356 { 357 struct net *net = sock_net(ssk); 358 unsigned int blackhole_timeout; 359 struct mptcp_pernet *pernet; 360 unsigned long timeout; 361 int disable_times; 362 int multiplier; 363 364 pernet = mptcp_get_pernet(net); 365 blackhole_timeout = READ_ONCE(pernet->blackhole_timeout); 366 367 if (!blackhole_timeout) 368 return false; 369 370 disable_times = atomic_read(&pernet->active_disable_times); 371 if (!disable_times) 372 return false; 373 374 /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */ 375 smp_rmb(); 376 377 /* Limit timeout to max: 2^6 * initial timeout */ 378 multiplier = 1 << min(disable_times - 1, 6); 379 380 /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */ 381 timeout = READ_ONCE(pernet->active_disable_stamp) + 382 multiplier * blackhole_timeout * HZ; 383 384 return time_before(jiffies, timeout); 385 } 386 387 /* Enable active MPTCP and reset active_disable_times if needed */ 388 void mptcp_active_enable(struct sock *sk) 389 { 390 struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk)); 391 392 if (atomic_read(&pernet->active_disable_times)) { 393 struct dst_entry *dst = sk_dst_get(sk); 394 395 if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)) 396 atomic_set(&pernet->active_disable_times, 0); 397 } 398 } 399 400 /* Check the number of retransmissions, and fallback to TCP if needed */ 401 void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) 402 { 403 struct mptcp_subflow_context *subflow; 404 405 if (!sk_is_mptcp(ssk)) 406 return; 407 408 subflow = mptcp_subflow_ctx(ssk); 409 410 if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) { 411 struct net *net = sock_net(ssk); 412 u8 timeouts, to_max; 413 414 timeouts = inet_csk(ssk)->icsk_retransmits; 415 to_max = mptcp_get_pernet(net)->syn_retrans_before_tcp_fallback; 416 417 if (timeouts == to_max || (timeouts < to_max && expired)) { 418 MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP); 419 subflow->mpc_drop = 1; 420 mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); 421 } else { 422 subflow->mpc_drop = 0; 423 } 424 } 425 } 426 427 static int __net_init mptcp_net_init(struct net *net) 428 { 429 struct mptcp_pernet *pernet = mptcp_get_pernet(net); 430 431 mptcp_pernet_set_defaults(pernet); 432 433 return mptcp_pernet_new_table(net, pernet); 434 } 435 436 /* Note: the callback will only be called per extra netns */ 437 static void __net_exit mptcp_net_exit(struct net *net) 438 { 439 struct mptcp_pernet *pernet = mptcp_get_pernet(net); 440 441 mptcp_pernet_del_table(pernet); 442 } 443 444 static struct pernet_operations mptcp_pernet_ops = { 445 .init = mptcp_net_init, 446 .exit = mptcp_net_exit, 447 .id = &mptcp_pernet_id, 448 .size = sizeof(struct mptcp_pernet), 449 }; 450 451 void __init mptcp_init(void) 452 { 453 mptcp_join_cookie_init(); 454 mptcp_proto_init(); 455 456 if (register_pernet_subsys(&mptcp_pernet_ops) < 0) 457 panic("Failed to register MPTCP pernet subsystem.\n"); 458 } 459 460 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 461 int __init mptcpv6_init(void) 462 { 463 int err; 464 465 err = mptcp_proto_v6_init(); 466 467 return err; 468 } 469 #endif 470