1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2019, Tessares SA. 5 */ 6 7 #ifdef CONFIG_SYSCTL 8 #include <linux/sysctl.h> 9 #endif 10 11 #include <net/net_namespace.h> 12 #include <net/netns/generic.h> 13 14 #include "protocol.h" 15 #include "mib.h" 16 17 #define MPTCP_SYSCTL_PATH "net/mptcp" 18 19 static int mptcp_pernet_id; 20 21 #ifdef CONFIG_SYSCTL 22 static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX; 23 #endif 24 25 struct mptcp_pernet { 26 #ifdef CONFIG_SYSCTL 27 struct ctl_table_header *ctl_table_hdr; 28 #endif 29 30 unsigned int add_addr_timeout; 31 unsigned int blackhole_timeout; 32 unsigned int close_timeout; 33 unsigned int stale_loss_cnt; 34 atomic_t active_disable_times; 35 unsigned long active_disable_stamp; 36 u8 mptcp_enabled; 37 u8 checksum_enabled; 38 u8 allow_join_initial_addr_port; 39 u8 pm_type; 40 char scheduler[MPTCP_SCHED_NAME_MAX]; 41 }; 42 43 static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) 44 { 45 return net_generic(net, mptcp_pernet_id); 46 } 47 48 int mptcp_is_enabled(const struct net *net) 49 { 50 return mptcp_get_pernet(net)->mptcp_enabled; 51 } 52 53 unsigned int mptcp_get_add_addr_timeout(const struct net *net) 54 { 55 return mptcp_get_pernet(net)->add_addr_timeout; 56 } 57 58 int mptcp_is_checksum_enabled(const struct net *net) 59 { 60 return mptcp_get_pernet(net)->checksum_enabled; 61 } 62 63 int mptcp_allow_join_id0(const struct net *net) 64 { 65 return mptcp_get_pernet(net)->allow_join_initial_addr_port; 66 } 67 68 unsigned int mptcp_stale_loss_cnt(const struct net *net) 69 { 70 return mptcp_get_pernet(net)->stale_loss_cnt; 71 } 72 73 unsigned int mptcp_close_timeout(const struct sock *sk) 74 { 75 if (sock_flag(sk, SOCK_DEAD)) 76 return TCP_TIMEWAIT_LEN; 77 return mptcp_get_pernet(sock_net(sk))->close_timeout; 78 } 79 80 int mptcp_get_pm_type(const struct net *net) 81 { 82 return mptcp_get_pernet(net)->pm_type; 83 } 84 85 const char *mptcp_get_scheduler(const struct net *net) 86 { 87 return mptcp_get_pernet(net)->scheduler; 88 } 89 90 static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) 91 { 92 pernet->mptcp_enabled = 1; 93 pernet->add_addr_timeout = TCP_RTO_MAX; 94 pernet->blackhole_timeout = 3600; 95 atomic_set(&pernet->active_disable_times, 0); 96 pernet->close_timeout = TCP_TIMEWAIT_LEN; 97 pernet->checksum_enabled = 0; 98 pernet->allow_join_initial_addr_port = 1; 99 pernet->stale_loss_cnt = 4; 100 pernet->pm_type = MPTCP_PM_TYPE_KERNEL; 101 strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler)); 102 } 103 104 #ifdef CONFIG_SYSCTL 105 static int mptcp_set_scheduler(char *scheduler, const char *name) 106 { 107 struct mptcp_sched_ops *sched; 108 int ret = 0; 109 110 rcu_read_lock(); 111 sched = mptcp_sched_find(name); 112 if (sched) 113 strscpy(scheduler, name, MPTCP_SCHED_NAME_MAX); 114 else 115 ret = -ENOENT; 116 rcu_read_unlock(); 117 118 return ret; 119 } 120 121 static int proc_scheduler(const struct ctl_table *ctl, int write, 122 void *buffer, size_t *lenp, loff_t *ppos) 123 { 124 char (*scheduler)[MPTCP_SCHED_NAME_MAX] = ctl->data; 125 char val[MPTCP_SCHED_NAME_MAX]; 126 struct ctl_table tbl = { 127 .data = val, 128 .maxlen = MPTCP_SCHED_NAME_MAX, 129 }; 130 int ret; 131 132 strscpy(val, *scheduler, MPTCP_SCHED_NAME_MAX); 133 134 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 135 if (write && ret == 0) 136 ret = mptcp_set_scheduler(*scheduler, val); 137 138 return ret; 139 } 140 141 static int proc_available_schedulers(const struct ctl_table *ctl, 142 int write, void *buffer, 143 size_t *lenp, loff_t *ppos) 144 { 145 struct ctl_table tbl = { .maxlen = MPTCP_SCHED_BUF_MAX, }; 146 int ret; 147 148 tbl.data = kmalloc(tbl.maxlen, GFP_USER); 149 if (!tbl.data) 150 return -ENOMEM; 151 152 mptcp_get_available_schedulers(tbl.data, MPTCP_SCHED_BUF_MAX); 153 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 154 kfree(tbl.data); 155 156 return ret; 157 } 158 159 static int proc_blackhole_detect_timeout(const struct ctl_table *table, 160 int write, void *buffer, size_t *lenp, 161 loff_t *ppos) 162 { 163 struct mptcp_pernet *pernet = container_of(table->data, 164 struct mptcp_pernet, 165 blackhole_timeout); 166 int ret; 167 168 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 169 if (write && ret == 0) 170 atomic_set(&pernet->active_disable_times, 0); 171 172 return ret; 173 } 174 175 static struct ctl_table mptcp_sysctl_table[] = { 176 { 177 .procname = "enabled", 178 .maxlen = sizeof(u8), 179 .mode = 0644, 180 /* users with CAP_NET_ADMIN or root (not and) can change this 181 * value, same as other sysctl or the 'net' tree. 182 */ 183 .proc_handler = proc_dou8vec_minmax, 184 .extra1 = SYSCTL_ZERO, 185 .extra2 = SYSCTL_ONE 186 }, 187 { 188 .procname = "add_addr_timeout", 189 .maxlen = sizeof(unsigned int), 190 .mode = 0644, 191 .proc_handler = proc_dointvec_jiffies, 192 }, 193 { 194 .procname = "checksum_enabled", 195 .maxlen = sizeof(u8), 196 .mode = 0644, 197 .proc_handler = proc_dou8vec_minmax, 198 .extra1 = SYSCTL_ZERO, 199 .extra2 = SYSCTL_ONE 200 }, 201 { 202 .procname = "allow_join_initial_addr_port", 203 .maxlen = sizeof(u8), 204 .mode = 0644, 205 .proc_handler = proc_dou8vec_minmax, 206 .extra1 = SYSCTL_ZERO, 207 .extra2 = SYSCTL_ONE 208 }, 209 { 210 .procname = "stale_loss_cnt", 211 .maxlen = sizeof(unsigned int), 212 .mode = 0644, 213 .proc_handler = proc_douintvec_minmax, 214 }, 215 { 216 .procname = "pm_type", 217 .maxlen = sizeof(u8), 218 .mode = 0644, 219 .proc_handler = proc_dou8vec_minmax, 220 .extra1 = SYSCTL_ZERO, 221 .extra2 = &mptcp_pm_type_max 222 }, 223 { 224 .procname = "scheduler", 225 .maxlen = MPTCP_SCHED_NAME_MAX, 226 .mode = 0644, 227 .proc_handler = proc_scheduler, 228 }, 229 { 230 .procname = "available_schedulers", 231 .maxlen = MPTCP_SCHED_BUF_MAX, 232 .mode = 0444, 233 .proc_handler = proc_available_schedulers, 234 }, 235 { 236 .procname = "close_timeout", 237 .maxlen = sizeof(unsigned int), 238 .mode = 0644, 239 .proc_handler = proc_dointvec_jiffies, 240 }, 241 { 242 .procname = "blackhole_timeout", 243 .maxlen = sizeof(unsigned int), 244 .mode = 0644, 245 .proc_handler = proc_blackhole_detect_timeout, 246 .extra1 = SYSCTL_ZERO, 247 }, 248 }; 249 250 static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) 251 { 252 struct ctl_table_header *hdr; 253 struct ctl_table *table; 254 255 table = mptcp_sysctl_table; 256 if (!net_eq(net, &init_net)) { 257 table = kmemdup(table, sizeof(mptcp_sysctl_table), GFP_KERNEL); 258 if (!table) 259 goto err_alloc; 260 } 261 262 table[0].data = &pernet->mptcp_enabled; 263 table[1].data = &pernet->add_addr_timeout; 264 table[2].data = &pernet->checksum_enabled; 265 table[3].data = &pernet->allow_join_initial_addr_port; 266 table[4].data = &pernet->stale_loss_cnt; 267 table[5].data = &pernet->pm_type; 268 table[6].data = &pernet->scheduler; 269 /* table[7] is for available_schedulers which is read-only info */ 270 table[8].data = &pernet->close_timeout; 271 table[9].data = &pernet->blackhole_timeout; 272 273 hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, 274 ARRAY_SIZE(mptcp_sysctl_table)); 275 if (!hdr) 276 goto err_reg; 277 278 pernet->ctl_table_hdr = hdr; 279 280 return 0; 281 282 err_reg: 283 if (!net_eq(net, &init_net)) 284 kfree(table); 285 err_alloc: 286 return -ENOMEM; 287 } 288 289 static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) 290 { 291 const struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg; 292 293 unregister_net_sysctl_table(pernet->ctl_table_hdr); 294 295 kfree(table); 296 } 297 298 #else 299 300 static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) 301 { 302 return 0; 303 } 304 305 static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {} 306 307 #endif /* CONFIG_SYSCTL */ 308 309 /* The following code block is to deal with middle box issues with MPTCP, 310 * similar to what is done with TFO. 311 * The proposed solution is to disable active MPTCP globally when SYN+MPC are 312 * dropped, while SYN without MPC aren't. In this case, active side MPTCP is 313 * disabled globally for 1hr at first. Then if it happens again, it is disabled 314 * for 2h, then 4h, 8h, ... 315 * The timeout is reset back to 1hr when a successful active MPTCP connection is 316 * fully established. 317 */ 318 319 /* Disable active MPTCP and record current jiffies and active_disable_times */ 320 void mptcp_active_disable(struct sock *sk) 321 { 322 struct net *net = sock_net(sk); 323 struct mptcp_pernet *pernet; 324 325 pernet = mptcp_get_pernet(net); 326 327 if (!READ_ONCE(pernet->blackhole_timeout)) 328 return; 329 330 /* Paired with READ_ONCE() in mptcp_active_should_disable() */ 331 WRITE_ONCE(pernet->active_disable_stamp, jiffies); 332 333 /* Paired with smp_rmb() in mptcp_active_should_disable(). 334 * We want pernet->active_disable_stamp to be updated first. 335 */ 336 smp_mb__before_atomic(); 337 atomic_inc(&pernet->active_disable_times); 338 339 MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE); 340 } 341 342 /* Calculate timeout for MPTCP active disable 343 * Return true if we are still in the active MPTCP disable period 344 * Return false if timeout already expired and we should use active MPTCP 345 */ 346 bool mptcp_active_should_disable(struct sock *ssk) 347 { 348 struct net *net = sock_net(ssk); 349 unsigned int blackhole_timeout; 350 struct mptcp_pernet *pernet; 351 unsigned long timeout; 352 int disable_times; 353 int multiplier; 354 355 pernet = mptcp_get_pernet(net); 356 blackhole_timeout = READ_ONCE(pernet->blackhole_timeout); 357 358 if (!blackhole_timeout) 359 return false; 360 361 disable_times = atomic_read(&pernet->active_disable_times); 362 if (!disable_times) 363 return false; 364 365 /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */ 366 smp_rmb(); 367 368 /* Limit timeout to max: 2^6 * initial timeout */ 369 multiplier = 1 << min(disable_times - 1, 6); 370 371 /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */ 372 timeout = READ_ONCE(pernet->active_disable_stamp) + 373 multiplier * blackhole_timeout * HZ; 374 375 return time_before(jiffies, timeout); 376 } 377 378 /* Enable active MPTCP and reset active_disable_times if needed */ 379 void mptcp_active_enable(struct sock *sk) 380 { 381 struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk)); 382 383 if (atomic_read(&pernet->active_disable_times)) { 384 struct dst_entry *dst = sk_dst_get(sk); 385 386 if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)) 387 atomic_set(&pernet->active_disable_times, 0); 388 } 389 } 390 391 /* Check the number of retransmissions, and fallback to TCP if needed */ 392 void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) 393 { 394 struct mptcp_subflow_context *subflow; 395 u32 timeouts; 396 397 if (!sk_is_mptcp(ssk)) 398 return; 399 400 timeouts = inet_csk(ssk)->icsk_retransmits; 401 subflow = mptcp_subflow_ctx(ssk); 402 403 if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) { 404 if (timeouts == 2 || (timeouts < 2 && expired)) { 405 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP); 406 subflow->mpc_drop = 1; 407 mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); 408 } else { 409 subflow->mpc_drop = 0; 410 } 411 } 412 } 413 414 static int __net_init mptcp_net_init(struct net *net) 415 { 416 struct mptcp_pernet *pernet = mptcp_get_pernet(net); 417 418 mptcp_pernet_set_defaults(pernet); 419 420 return mptcp_pernet_new_table(net, pernet); 421 } 422 423 /* Note: the callback will only be called per extra netns */ 424 static void __net_exit mptcp_net_exit(struct net *net) 425 { 426 struct mptcp_pernet *pernet = mptcp_get_pernet(net); 427 428 mptcp_pernet_del_table(pernet); 429 } 430 431 static struct pernet_operations mptcp_pernet_ops = { 432 .init = mptcp_net_init, 433 .exit = mptcp_net_exit, 434 .id = &mptcp_pernet_id, 435 .size = sizeof(struct mptcp_pernet), 436 }; 437 438 void __init mptcp_init(void) 439 { 440 mptcp_join_cookie_init(); 441 mptcp_proto_init(); 442 443 if (register_pernet_subsys(&mptcp_pernet_ops) < 0) 444 panic("Failed to register MPTCP pernet subsystem.\n"); 445 } 446 447 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 448 int __init mptcpv6_init(void) 449 { 450 int err; 451 452 err = mptcp_proto_v6_init(); 453 454 return err; 455 } 456 #endif 457