1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2019, Tessares SA. 5 */ 6 7 #ifdef CONFIG_SYSCTL 8 #include <linux/sysctl.h> 9 #endif 10 11 #include <net/net_namespace.h> 12 #include <net/netns/generic.h> 13 14 #include "protocol.h" 15 #include "mib.h" 16 17 #define MPTCP_SYSCTL_PATH "net/mptcp" 18 19 static int mptcp_pernet_id; 20 21 #ifdef CONFIG_SYSCTL 22 static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX; 23 #endif 24 25 struct mptcp_pernet { 26 #ifdef CONFIG_SYSCTL 27 struct ctl_table_header *ctl_table_hdr; 28 #endif 29 30 unsigned int add_addr_timeout; 31 unsigned int blackhole_timeout; 32 unsigned int close_timeout; 33 unsigned int stale_loss_cnt; 34 atomic_t active_disable_times; 35 unsigned long active_disable_stamp; 36 u8 mptcp_enabled; 37 u8 checksum_enabled; 38 u8 allow_join_initial_addr_port; 39 u8 pm_type; 40 char scheduler[MPTCP_SCHED_NAME_MAX]; 41 }; 42 43 static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) 44 { 45 return net_generic(net, mptcp_pernet_id); 46 } 47 48 int mptcp_is_enabled(const struct net *net) 49 { 50 return mptcp_get_pernet(net)->mptcp_enabled; 51 } 52 53 unsigned int mptcp_get_add_addr_timeout(const struct net *net) 54 { 55 return mptcp_get_pernet(net)->add_addr_timeout; 56 } 57 58 int mptcp_is_checksum_enabled(const struct net *net) 59 { 60 return mptcp_get_pernet(net)->checksum_enabled; 61 } 62 63 int mptcp_allow_join_id0(const struct net *net) 64 { 65 return mptcp_get_pernet(net)->allow_join_initial_addr_port; 66 } 67 68 unsigned int mptcp_stale_loss_cnt(const struct net *net) 69 { 70 return mptcp_get_pernet(net)->stale_loss_cnt; 71 } 72 73 unsigned int mptcp_close_timeout(const struct sock *sk) 74 { 75 if (sock_flag(sk, SOCK_DEAD)) 76 return TCP_TIMEWAIT_LEN; 77 return mptcp_get_pernet(sock_net(sk))->close_timeout; 78 } 79 80 int mptcp_get_pm_type(const struct net *net) 81 { 82 return mptcp_get_pernet(net)->pm_type; 83 } 84 85 const char *mptcp_get_scheduler(const struct net *net) 86 { 87 return mptcp_get_pernet(net)->scheduler; 88 } 89 90 static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) 91 { 92 pernet->mptcp_enabled = 1; 93 pernet->add_addr_timeout = TCP_RTO_MAX; 94 pernet->blackhole_timeout = 3600; 95 atomic_set(&pernet->active_disable_times, 0); 96 pernet->close_timeout = TCP_TIMEWAIT_LEN; 97 pernet->checksum_enabled = 0; 98 pernet->allow_join_initial_addr_port = 1; 99 pernet->stale_loss_cnt = 4; 100 pernet->pm_type = MPTCP_PM_TYPE_KERNEL; 101 strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler)); 102 } 103 104 #ifdef CONFIG_SYSCTL 105 static int mptcp_set_scheduler(const struct net *net, const char *name) 106 { 107 struct mptcp_pernet *pernet = mptcp_get_pernet(net); 108 struct mptcp_sched_ops *sched; 109 int ret = 0; 110 111 rcu_read_lock(); 112 sched = mptcp_sched_find(name); 113 if (sched) 114 strscpy(pernet->scheduler, name, MPTCP_SCHED_NAME_MAX); 115 else 116 ret = -ENOENT; 117 rcu_read_unlock(); 118 119 return ret; 120 } 121 122 static int proc_scheduler(const struct ctl_table *ctl, int write, 123 void *buffer, size_t *lenp, loff_t *ppos) 124 { 125 const struct net *net = current->nsproxy->net_ns; 126 char val[MPTCP_SCHED_NAME_MAX]; 127 struct ctl_table tbl = { 128 .data = val, 129 .maxlen = MPTCP_SCHED_NAME_MAX, 130 }; 131 int ret; 132 133 strscpy(val, mptcp_get_scheduler(net), MPTCP_SCHED_NAME_MAX); 134 135 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 136 if (write && ret == 0) 137 ret = mptcp_set_scheduler(net, val); 138 139 return ret; 140 } 141 142 static int proc_available_schedulers(const struct ctl_table *ctl, 143 int write, void *buffer, 144 size_t *lenp, loff_t *ppos) 145 { 146 struct ctl_table tbl = { .maxlen = MPTCP_SCHED_BUF_MAX, }; 147 int ret; 148 149 tbl.data = kmalloc(tbl.maxlen, GFP_USER); 150 if (!tbl.data) 151 return -ENOMEM; 152 153 mptcp_get_available_schedulers(tbl.data, MPTCP_SCHED_BUF_MAX); 154 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 155 kfree(tbl.data); 156 157 return ret; 158 } 159 160 static int proc_blackhole_detect_timeout(const struct ctl_table *table, 161 int write, void *buffer, size_t *lenp, 162 loff_t *ppos) 163 { 164 struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns); 165 int ret; 166 167 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 168 if (write && ret == 0) 169 atomic_set(&pernet->active_disable_times, 0); 170 171 return ret; 172 } 173 174 static struct ctl_table mptcp_sysctl_table[] = { 175 { 176 .procname = "enabled", 177 .maxlen = sizeof(u8), 178 .mode = 0644, 179 /* users with CAP_NET_ADMIN or root (not and) can change this 180 * value, same as other sysctl or the 'net' tree. 181 */ 182 .proc_handler = proc_dou8vec_minmax, 183 .extra1 = SYSCTL_ZERO, 184 .extra2 = SYSCTL_ONE 185 }, 186 { 187 .procname = "add_addr_timeout", 188 .maxlen = sizeof(unsigned int), 189 .mode = 0644, 190 .proc_handler = proc_dointvec_jiffies, 191 }, 192 { 193 .procname = "checksum_enabled", 194 .maxlen = sizeof(u8), 195 .mode = 0644, 196 .proc_handler = proc_dou8vec_minmax, 197 .extra1 = SYSCTL_ZERO, 198 .extra2 = SYSCTL_ONE 199 }, 200 { 201 .procname = "allow_join_initial_addr_port", 202 .maxlen = sizeof(u8), 203 .mode = 0644, 204 .proc_handler = proc_dou8vec_minmax, 205 .extra1 = SYSCTL_ZERO, 206 .extra2 = SYSCTL_ONE 207 }, 208 { 209 .procname = "stale_loss_cnt", 210 .maxlen = sizeof(unsigned int), 211 .mode = 0644, 212 .proc_handler = proc_douintvec_minmax, 213 }, 214 { 215 .procname = "pm_type", 216 .maxlen = sizeof(u8), 217 .mode = 0644, 218 .proc_handler = proc_dou8vec_minmax, 219 .extra1 = SYSCTL_ZERO, 220 .extra2 = &mptcp_pm_type_max 221 }, 222 { 223 .procname = "scheduler", 224 .maxlen = MPTCP_SCHED_NAME_MAX, 225 .mode = 0644, 226 .proc_handler = proc_scheduler, 227 }, 228 { 229 .procname = "available_schedulers", 230 .maxlen = MPTCP_SCHED_BUF_MAX, 231 .mode = 0644, 232 .proc_handler = proc_available_schedulers, 233 }, 234 { 235 .procname = "close_timeout", 236 .maxlen = sizeof(unsigned int), 237 .mode = 0644, 238 .proc_handler = proc_dointvec_jiffies, 239 }, 240 { 241 .procname = "blackhole_timeout", 242 .maxlen = sizeof(unsigned int), 243 .mode = 0644, 244 .proc_handler = proc_blackhole_detect_timeout, 245 .extra1 = SYSCTL_ZERO, 246 }, 247 }; 248 249 static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) 250 { 251 struct ctl_table_header *hdr; 252 struct ctl_table *table; 253 254 table = mptcp_sysctl_table; 255 if (!net_eq(net, &init_net)) { 256 table = kmemdup(table, sizeof(mptcp_sysctl_table), GFP_KERNEL); 257 if (!table) 258 goto err_alloc; 259 } 260 261 table[0].data = &pernet->mptcp_enabled; 262 table[1].data = &pernet->add_addr_timeout; 263 table[2].data = &pernet->checksum_enabled; 264 table[3].data = &pernet->allow_join_initial_addr_port; 265 table[4].data = &pernet->stale_loss_cnt; 266 table[5].data = &pernet->pm_type; 267 table[6].data = &pernet->scheduler; 268 /* table[7] is for available_schedulers which is read-only info */ 269 table[8].data = &pernet->close_timeout; 270 table[9].data = &pernet->blackhole_timeout; 271 272 hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, 273 ARRAY_SIZE(mptcp_sysctl_table)); 274 if (!hdr) 275 goto err_reg; 276 277 pernet->ctl_table_hdr = hdr; 278 279 return 0; 280 281 err_reg: 282 if (!net_eq(net, &init_net)) 283 kfree(table); 284 err_alloc: 285 return -ENOMEM; 286 } 287 288 static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) 289 { 290 const struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg; 291 292 unregister_net_sysctl_table(pernet->ctl_table_hdr); 293 294 kfree(table); 295 } 296 297 #else 298 299 static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) 300 { 301 return 0; 302 } 303 304 static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {} 305 306 #endif /* CONFIG_SYSCTL */ 307 308 /* The following code block is to deal with middle box issues with MPTCP, 309 * similar to what is done with TFO. 310 * The proposed solution is to disable active MPTCP globally when SYN+MPC are 311 * dropped, while SYN without MPC aren't. In this case, active side MPTCP is 312 * disabled globally for 1hr at first. Then if it happens again, it is disabled 313 * for 2h, then 4h, 8h, ... 314 * The timeout is reset back to 1hr when a successful active MPTCP connection is 315 * fully established. 316 */ 317 318 /* Disable active MPTCP and record current jiffies and active_disable_times */ 319 void mptcp_active_disable(struct sock *sk) 320 { 321 struct net *net = sock_net(sk); 322 struct mptcp_pernet *pernet; 323 324 pernet = mptcp_get_pernet(net); 325 326 if (!READ_ONCE(pernet->blackhole_timeout)) 327 return; 328 329 /* Paired with READ_ONCE() in mptcp_active_should_disable() */ 330 WRITE_ONCE(pernet->active_disable_stamp, jiffies); 331 332 /* Paired with smp_rmb() in mptcp_active_should_disable(). 333 * We want pernet->active_disable_stamp to be updated first. 334 */ 335 smp_mb__before_atomic(); 336 atomic_inc(&pernet->active_disable_times); 337 338 MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE); 339 } 340 341 /* Calculate timeout for MPTCP active disable 342 * Return true if we are still in the active MPTCP disable period 343 * Return false if timeout already expired and we should use active MPTCP 344 */ 345 bool mptcp_active_should_disable(struct sock *ssk) 346 { 347 struct net *net = sock_net(ssk); 348 unsigned int blackhole_timeout; 349 struct mptcp_pernet *pernet; 350 unsigned long timeout; 351 int disable_times; 352 int multiplier; 353 354 pernet = mptcp_get_pernet(net); 355 blackhole_timeout = READ_ONCE(pernet->blackhole_timeout); 356 357 if (!blackhole_timeout) 358 return false; 359 360 disable_times = atomic_read(&pernet->active_disable_times); 361 if (!disable_times) 362 return false; 363 364 /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */ 365 smp_rmb(); 366 367 /* Limit timeout to max: 2^6 * initial timeout */ 368 multiplier = 1 << min(disable_times - 1, 6); 369 370 /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */ 371 timeout = READ_ONCE(pernet->active_disable_stamp) + 372 multiplier * blackhole_timeout * HZ; 373 374 return time_before(jiffies, timeout); 375 } 376 377 /* Enable active MPTCP and reset active_disable_times if needed */ 378 void mptcp_active_enable(struct sock *sk) 379 { 380 struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk)); 381 382 if (atomic_read(&pernet->active_disable_times)) { 383 struct dst_entry *dst = sk_dst_get(sk); 384 385 if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)) 386 atomic_set(&pernet->active_disable_times, 0); 387 } 388 } 389 390 /* Check the number of retransmissions, and fallback to TCP if needed */ 391 void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) 392 { 393 struct mptcp_subflow_context *subflow; 394 u32 timeouts; 395 396 if (!sk_is_mptcp(ssk)) 397 return; 398 399 timeouts = inet_csk(ssk)->icsk_retransmits; 400 subflow = mptcp_subflow_ctx(ssk); 401 402 if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) { 403 if (timeouts == 2 || (timeouts < 2 && expired)) { 404 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP); 405 subflow->mpc_drop = 1; 406 mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); 407 } else { 408 subflow->mpc_drop = 0; 409 } 410 } 411 } 412 413 static int __net_init mptcp_net_init(struct net *net) 414 { 415 struct mptcp_pernet *pernet = mptcp_get_pernet(net); 416 417 mptcp_pernet_set_defaults(pernet); 418 419 return mptcp_pernet_new_table(net, pernet); 420 } 421 422 /* Note: the callback will only be called per extra netns */ 423 static void __net_exit mptcp_net_exit(struct net *net) 424 { 425 struct mptcp_pernet *pernet = mptcp_get_pernet(net); 426 427 mptcp_pernet_del_table(pernet); 428 } 429 430 static struct pernet_operations mptcp_pernet_ops = { 431 .init = mptcp_net_init, 432 .exit = mptcp_net_exit, 433 .id = &mptcp_pernet_id, 434 .size = sizeof(struct mptcp_pernet), 435 }; 436 437 void __init mptcp_init(void) 438 { 439 mptcp_join_cookie_init(); 440 mptcp_proto_init(); 441 442 if (register_pernet_subsys(&mptcp_pernet_ops) < 0) 443 panic("Failed to register MPTCP pernet subsystem.\n"); 444 } 445 446 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 447 int __init mptcpv6_init(void) 448 { 449 int err; 450 451 err = mptcp_proto_v6_init(); 452 453 return err; 454 } 455 #endif 456