1 // SPDX-License-Identifier: GPL-2.0 2 /* -*- linux-c -*- 3 * sysctl_net_core.c: sysctl interface to net core subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 7 */ 8 9 #include <linux/filter.h> 10 #include <linux/mm.h> 11 #include <linux/sysctl.h> 12 #include <linux/module.h> 13 #include <linux/socket.h> 14 #include <linux/netdevice.h> 15 #include <linux/ratelimit.h> 16 #include <linux/vmalloc.h> 17 #include <linux/init.h> 18 #include <linux/slab.h> 19 #include <linux/sched/isolation.h> 20 21 #include <net/ip.h> 22 #include <net/sock.h> 23 #include <net/net_ratelimit.h> 24 #include <net/busy_poll.h> 25 #include <net/pkt_sched.h> 26 #include <net/hotdata.h> 27 #include <net/rps.h> 28 29 #include "dev.h" 30 31 static int int_3600 = 3600; 32 static int min_sndbuf = SOCK_MIN_SNDBUF; 33 static int min_rcvbuf = SOCK_MIN_RCVBUF; 34 static int max_skb_frags = MAX_SKB_FRAGS; 35 static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; 36 37 static int net_msg_warn; /* Unused, but still a sysctl */ 38 39 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; 40 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); 41 42 /* 0 - Keep current behavior: 43 * IPv4: inherit all current settings from init_net 44 * IPv6: reset all settings to default 45 * 1 - Both inherit all current settings from init_net 46 * 2 - Both reset all settings to default 47 * 3 - Both inherit all settings from current netns 48 */ 49 int sysctl_devconf_inherit_init_net __read_mostly; 50 EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); 51 52 #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) 53 static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, 54 struct cpumask *mask) 55 { 56 char kbuf[128]; 57 int len; 58 59 if (*ppos || !*lenp) { 60 *lenp = 0; 61 return; 62 } 63 64 len = min(sizeof(kbuf) - 1, *lenp); 65 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 66 if (!len) { 67 *lenp = 0; 68 return; 69 } 70 71 if (len < *lenp) 72 kbuf[len++] = '\n'; 73 memcpy(buffer, kbuf, len); 74 *lenp = len; 75 *ppos += len; 76 } 77 #endif 78 79 #ifdef CONFIG_RPS 80 81 static struct cpumask *rps_default_mask_cow_alloc(struct net *net) 82 { 83 struct cpumask *rps_default_mask; 84 85 if (net->core.rps_default_mask) 86 return net->core.rps_default_mask; 87 88 rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL); 89 if (!rps_default_mask) 90 return NULL; 91 92 /* pairs with READ_ONCE in rx_queue_default_mask() */ 93 WRITE_ONCE(net->core.rps_default_mask, rps_default_mask); 94 return rps_default_mask; 95 } 96 97 static int rps_default_mask_sysctl(struct ctl_table *table, int write, 98 void *buffer, size_t *lenp, loff_t *ppos) 99 { 100 struct net *net = (struct net *)table->data; 101 int err = 0; 102 103 rtnl_lock(); 104 if (write) { 105 struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net); 106 107 err = -ENOMEM; 108 if (!rps_default_mask) 109 goto done; 110 111 err = cpumask_parse(buffer, rps_default_mask); 112 if (err) 113 goto done; 114 115 err = rps_cpumask_housekeeping(rps_default_mask); 116 if (err) 117 goto done; 118 } else { 119 dump_cpumask(buffer, lenp, ppos, 120 net->core.rps_default_mask ? : cpu_none_mask); 121 } 122 123 done: 124 rtnl_unlock(); 125 return err; 126 } 127 128 static int rps_sock_flow_sysctl(struct ctl_table *table, int write, 129 void *buffer, size_t *lenp, loff_t *ppos) 130 { 131 unsigned int orig_size, size; 132 int ret, i; 133 struct ctl_table tmp = { 134 .data = &size, 135 .maxlen = sizeof(size), 136 .mode = table->mode 137 }; 138 struct rps_sock_flow_table *orig_sock_table, *sock_table; 139 static DEFINE_MUTEX(sock_flow_mutex); 140 141 mutex_lock(&sock_flow_mutex); 142 143 orig_sock_table = rcu_dereference_protected( 144 net_hotdata.rps_sock_flow_table, 145 lockdep_is_held(&sock_flow_mutex)); 146 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 147 148 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 149 150 if (write) { 151 if (size) { 152 if (size > 1<<29) { 153 /* Enforce limit to prevent overflow */ 154 mutex_unlock(&sock_flow_mutex); 155 return -EINVAL; 156 } 157 size = roundup_pow_of_two(size); 158 if (size != orig_size) { 159 sock_table = 160 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 161 if (!sock_table) { 162 mutex_unlock(&sock_flow_mutex); 163 return -ENOMEM; 164 } 165 net_hotdata.rps_cpu_mask = 166 roundup_pow_of_two(nr_cpu_ids) - 1; 167 sock_table->mask = size - 1; 168 } else 169 sock_table = orig_sock_table; 170 171 for (i = 0; i < size; i++) 172 sock_table->ents[i] = RPS_NO_CPU; 173 } else 174 sock_table = NULL; 175 176 if (sock_table != orig_sock_table) { 177 rcu_assign_pointer(net_hotdata.rps_sock_flow_table, 178 sock_table); 179 if (sock_table) { 180 static_branch_inc(&rps_needed); 181 static_branch_inc(&rfs_needed); 182 } 183 if (orig_sock_table) { 184 static_branch_dec(&rps_needed); 185 static_branch_dec(&rfs_needed); 186 kvfree_rcu_mightsleep(orig_sock_table); 187 } 188 } 189 } 190 191 mutex_unlock(&sock_flow_mutex); 192 193 return ret; 194 } 195 #endif /* CONFIG_RPS */ 196 197 #ifdef CONFIG_NET_FLOW_LIMIT 198 static DEFINE_MUTEX(flow_limit_update_mutex); 199 200 static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, 201 void *buffer, size_t *lenp, loff_t *ppos) 202 { 203 struct sd_flow_limit *cur; 204 struct softnet_data *sd; 205 cpumask_var_t mask; 206 int i, len, ret = 0; 207 208 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 209 return -ENOMEM; 210 211 if (write) { 212 ret = cpumask_parse(buffer, mask); 213 if (ret) 214 goto done; 215 216 mutex_lock(&flow_limit_update_mutex); 217 len = sizeof(*cur) + netdev_flow_limit_table_len; 218 for_each_possible_cpu(i) { 219 sd = &per_cpu(softnet_data, i); 220 cur = rcu_dereference_protected(sd->flow_limit, 221 lockdep_is_held(&flow_limit_update_mutex)); 222 if (cur && !cpumask_test_cpu(i, mask)) { 223 RCU_INIT_POINTER(sd->flow_limit, NULL); 224 kfree_rcu_mightsleep(cur); 225 } else if (!cur && cpumask_test_cpu(i, mask)) { 226 cur = kzalloc_node(len, GFP_KERNEL, 227 cpu_to_node(i)); 228 if (!cur) { 229 /* not unwinding previous changes */ 230 ret = -ENOMEM; 231 goto write_unlock; 232 } 233 cur->num_buckets = netdev_flow_limit_table_len; 234 rcu_assign_pointer(sd->flow_limit, cur); 235 } 236 } 237 write_unlock: 238 mutex_unlock(&flow_limit_update_mutex); 239 } else { 240 cpumask_clear(mask); 241 rcu_read_lock(); 242 for_each_possible_cpu(i) { 243 sd = &per_cpu(softnet_data, i); 244 if (rcu_dereference(sd->flow_limit)) 245 cpumask_set_cpu(i, mask); 246 } 247 rcu_read_unlock(); 248 249 dump_cpumask(buffer, lenp, ppos, mask); 250 } 251 252 done: 253 free_cpumask_var(mask); 254 return ret; 255 } 256 257 static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, 258 void *buffer, size_t *lenp, loff_t *ppos) 259 { 260 unsigned int old, *ptr; 261 int ret; 262 263 mutex_lock(&flow_limit_update_mutex); 264 265 ptr = table->data; 266 old = *ptr; 267 ret = proc_dointvec(table, write, buffer, lenp, ppos); 268 if (!ret && write && !is_power_of_2(*ptr)) { 269 *ptr = old; 270 ret = -EINVAL; 271 } 272 273 mutex_unlock(&flow_limit_update_mutex); 274 return ret; 275 } 276 #endif /* CONFIG_NET_FLOW_LIMIT */ 277 278 #ifdef CONFIG_NET_SCHED 279 static int set_default_qdisc(struct ctl_table *table, int write, 280 void *buffer, size_t *lenp, loff_t *ppos) 281 { 282 char id[IFNAMSIZ]; 283 struct ctl_table tbl = { 284 .data = id, 285 .maxlen = IFNAMSIZ, 286 }; 287 int ret; 288 289 qdisc_get_default(id, IFNAMSIZ); 290 291 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 292 if (write && ret == 0) 293 ret = qdisc_set_default(id); 294 return ret; 295 } 296 #endif 297 298 static int proc_do_dev_weight(struct ctl_table *table, int write, 299 void *buffer, size_t *lenp, loff_t *ppos) 300 { 301 static DEFINE_MUTEX(dev_weight_mutex); 302 int ret, weight; 303 304 mutex_lock(&dev_weight_mutex); 305 ret = proc_dointvec(table, write, buffer, lenp, ppos); 306 if (!ret && write) { 307 weight = READ_ONCE(weight_p); 308 WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); 309 WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); 310 } 311 mutex_unlock(&dev_weight_mutex); 312 313 return ret; 314 } 315 316 static int proc_do_rss_key(struct ctl_table *table, int write, 317 void *buffer, size_t *lenp, loff_t *ppos) 318 { 319 struct ctl_table fake_table; 320 char buf[NETDEV_RSS_KEY_LEN * 3]; 321 322 snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); 323 fake_table.data = buf; 324 fake_table.maxlen = sizeof(buf); 325 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 326 } 327 328 #ifdef CONFIG_BPF_JIT 329 static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, 330 void *buffer, size_t *lenp, 331 loff_t *ppos) 332 { 333 int ret, jit_enable = *(int *)table->data; 334 int min = *(int *)table->extra1; 335 int max = *(int *)table->extra2; 336 struct ctl_table tmp = *table; 337 338 if (write && !capable(CAP_SYS_ADMIN)) 339 return -EPERM; 340 341 tmp.data = &jit_enable; 342 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 343 if (write && !ret) { 344 if (jit_enable < 2 || 345 (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { 346 *(int *)table->data = jit_enable; 347 if (jit_enable == 2) 348 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 349 } else { 350 ret = -EPERM; 351 } 352 } 353 354 if (write && ret && min == max) 355 pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); 356 357 return ret; 358 } 359 360 # ifdef CONFIG_HAVE_EBPF_JIT 361 static int 362 proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, 363 void *buffer, size_t *lenp, loff_t *ppos) 364 { 365 if (!capable(CAP_SYS_ADMIN)) 366 return -EPERM; 367 368 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 369 } 370 # endif /* CONFIG_HAVE_EBPF_JIT */ 371 372 static int 373 proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, 374 void *buffer, size_t *lenp, loff_t *ppos) 375 { 376 if (!capable(CAP_SYS_ADMIN)) 377 return -EPERM; 378 379 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 380 } 381 #endif 382 383 static struct ctl_table net_core_table[] = { 384 { 385 .procname = "wmem_max", 386 .data = &sysctl_wmem_max, 387 .maxlen = sizeof(int), 388 .mode = 0644, 389 .proc_handler = proc_dointvec_minmax, 390 .extra1 = &min_sndbuf, 391 }, 392 { 393 .procname = "rmem_max", 394 .data = &sysctl_rmem_max, 395 .maxlen = sizeof(int), 396 .mode = 0644, 397 .proc_handler = proc_dointvec_minmax, 398 .extra1 = &min_rcvbuf, 399 }, 400 { 401 .procname = "wmem_default", 402 .data = &sysctl_wmem_default, 403 .maxlen = sizeof(int), 404 .mode = 0644, 405 .proc_handler = proc_dointvec_minmax, 406 .extra1 = &min_sndbuf, 407 }, 408 { 409 .procname = "rmem_default", 410 .data = &sysctl_rmem_default, 411 .maxlen = sizeof(int), 412 .mode = 0644, 413 .proc_handler = proc_dointvec_minmax, 414 .extra1 = &min_rcvbuf, 415 }, 416 { 417 .procname = "mem_pcpu_rsv", 418 .data = &sysctl_mem_pcpu_rsv, 419 .maxlen = sizeof(int), 420 .mode = 0644, 421 .proc_handler = proc_dointvec_minmax, 422 .extra1 = &min_mem_pcpu_rsv, 423 }, 424 { 425 .procname = "dev_weight", 426 .data = &weight_p, 427 .maxlen = sizeof(int), 428 .mode = 0644, 429 .proc_handler = proc_do_dev_weight, 430 }, 431 { 432 .procname = "dev_weight_rx_bias", 433 .data = &dev_weight_rx_bias, 434 .maxlen = sizeof(int), 435 .mode = 0644, 436 .proc_handler = proc_do_dev_weight, 437 }, 438 { 439 .procname = "dev_weight_tx_bias", 440 .data = &dev_weight_tx_bias, 441 .maxlen = sizeof(int), 442 .mode = 0644, 443 .proc_handler = proc_do_dev_weight, 444 }, 445 { 446 .procname = "netdev_max_backlog", 447 .data = &net_hotdata.max_backlog, 448 .maxlen = sizeof(int), 449 .mode = 0644, 450 .proc_handler = proc_dointvec 451 }, 452 { 453 .procname = "netdev_rss_key", 454 .data = &netdev_rss_key, 455 .maxlen = sizeof(int), 456 .mode = 0444, 457 .proc_handler = proc_do_rss_key, 458 }, 459 #ifdef CONFIG_BPF_JIT 460 { 461 .procname = "bpf_jit_enable", 462 .data = &bpf_jit_enable, 463 .maxlen = sizeof(int), 464 .mode = 0644, 465 .proc_handler = proc_dointvec_minmax_bpf_enable, 466 # ifdef CONFIG_BPF_JIT_ALWAYS_ON 467 .extra1 = SYSCTL_ONE, 468 .extra2 = SYSCTL_ONE, 469 # else 470 .extra1 = SYSCTL_ZERO, 471 .extra2 = SYSCTL_TWO, 472 # endif 473 }, 474 # ifdef CONFIG_HAVE_EBPF_JIT 475 { 476 .procname = "bpf_jit_harden", 477 .data = &bpf_jit_harden, 478 .maxlen = sizeof(int), 479 .mode = 0600, 480 .proc_handler = proc_dointvec_minmax_bpf_restricted, 481 .extra1 = SYSCTL_ZERO, 482 .extra2 = SYSCTL_TWO, 483 }, 484 { 485 .procname = "bpf_jit_kallsyms", 486 .data = &bpf_jit_kallsyms, 487 .maxlen = sizeof(int), 488 .mode = 0600, 489 .proc_handler = proc_dointvec_minmax_bpf_restricted, 490 .extra1 = SYSCTL_ZERO, 491 .extra2 = SYSCTL_ONE, 492 }, 493 # endif 494 { 495 .procname = "bpf_jit_limit", 496 .data = &bpf_jit_limit, 497 .maxlen = sizeof(long), 498 .mode = 0600, 499 .proc_handler = proc_dolongvec_minmax_bpf_restricted, 500 .extra1 = SYSCTL_LONG_ONE, 501 .extra2 = &bpf_jit_limit_max, 502 }, 503 #endif 504 { 505 .procname = "netdev_tstamp_prequeue", 506 .data = &net_hotdata.tstamp_prequeue, 507 .maxlen = sizeof(int), 508 .mode = 0644, 509 .proc_handler = proc_dointvec 510 }, 511 { 512 .procname = "message_cost", 513 .data = &net_ratelimit_state.interval, 514 .maxlen = sizeof(int), 515 .mode = 0644, 516 .proc_handler = proc_dointvec_jiffies, 517 }, 518 { 519 .procname = "message_burst", 520 .data = &net_ratelimit_state.burst, 521 .maxlen = sizeof(int), 522 .mode = 0644, 523 .proc_handler = proc_dointvec, 524 }, 525 { 526 .procname = "tstamp_allow_data", 527 .data = &sysctl_tstamp_allow_data, 528 .maxlen = sizeof(int), 529 .mode = 0644, 530 .proc_handler = proc_dointvec_minmax, 531 .extra1 = SYSCTL_ZERO, 532 .extra2 = SYSCTL_ONE 533 }, 534 #ifdef CONFIG_RPS 535 { 536 .procname = "rps_sock_flow_entries", 537 .maxlen = sizeof(int), 538 .mode = 0644, 539 .proc_handler = rps_sock_flow_sysctl 540 }, 541 #endif 542 #ifdef CONFIG_NET_FLOW_LIMIT 543 { 544 .procname = "flow_limit_cpu_bitmap", 545 .mode = 0644, 546 .proc_handler = flow_limit_cpu_sysctl 547 }, 548 { 549 .procname = "flow_limit_table_len", 550 .data = &netdev_flow_limit_table_len, 551 .maxlen = sizeof(int), 552 .mode = 0644, 553 .proc_handler = flow_limit_table_len_sysctl 554 }, 555 #endif /* CONFIG_NET_FLOW_LIMIT */ 556 #ifdef CONFIG_NET_RX_BUSY_POLL 557 { 558 .procname = "busy_poll", 559 .data = &sysctl_net_busy_poll, 560 .maxlen = sizeof(unsigned int), 561 .mode = 0644, 562 .proc_handler = proc_dointvec_minmax, 563 .extra1 = SYSCTL_ZERO, 564 }, 565 { 566 .procname = "busy_read", 567 .data = &sysctl_net_busy_read, 568 .maxlen = sizeof(unsigned int), 569 .mode = 0644, 570 .proc_handler = proc_dointvec_minmax, 571 .extra1 = SYSCTL_ZERO, 572 }, 573 #endif 574 #ifdef CONFIG_NET_SCHED 575 { 576 .procname = "default_qdisc", 577 .mode = 0644, 578 .maxlen = IFNAMSIZ, 579 .proc_handler = set_default_qdisc 580 }, 581 #endif 582 { 583 .procname = "netdev_budget", 584 .data = &net_hotdata.netdev_budget, 585 .maxlen = sizeof(int), 586 .mode = 0644, 587 .proc_handler = proc_dointvec 588 }, 589 { 590 .procname = "warnings", 591 .data = &net_msg_warn, 592 .maxlen = sizeof(int), 593 .mode = 0644, 594 .proc_handler = proc_dointvec 595 }, 596 { 597 .procname = "max_skb_frags", 598 .data = &sysctl_max_skb_frags, 599 .maxlen = sizeof(int), 600 .mode = 0644, 601 .proc_handler = proc_dointvec_minmax, 602 .extra1 = SYSCTL_ONE, 603 .extra2 = &max_skb_frags, 604 }, 605 { 606 .procname = "netdev_budget_usecs", 607 .data = &net_hotdata.netdev_budget_usecs, 608 .maxlen = sizeof(unsigned int), 609 .mode = 0644, 610 .proc_handler = proc_dointvec_minmax, 611 .extra1 = SYSCTL_ZERO, 612 }, 613 { 614 .procname = "fb_tunnels_only_for_init_net", 615 .data = &sysctl_fb_tunnels_only_for_init_net, 616 .maxlen = sizeof(int), 617 .mode = 0644, 618 .proc_handler = proc_dointvec_minmax, 619 .extra1 = SYSCTL_ZERO, 620 .extra2 = SYSCTL_TWO, 621 }, 622 { 623 .procname = "devconf_inherit_init_net", 624 .data = &sysctl_devconf_inherit_init_net, 625 .maxlen = sizeof(int), 626 .mode = 0644, 627 .proc_handler = proc_dointvec_minmax, 628 .extra1 = SYSCTL_ZERO, 629 .extra2 = SYSCTL_THREE, 630 }, 631 { 632 .procname = "high_order_alloc_disable", 633 .data = &net_high_order_alloc_disable_key.key, 634 .maxlen = sizeof(net_high_order_alloc_disable_key), 635 .mode = 0644, 636 .proc_handler = proc_do_static_key, 637 }, 638 { 639 .procname = "gro_normal_batch", 640 .data = &net_hotdata.gro_normal_batch, 641 .maxlen = sizeof(unsigned int), 642 .mode = 0644, 643 .proc_handler = proc_dointvec_minmax, 644 .extra1 = SYSCTL_ONE, 645 }, 646 { 647 .procname = "netdev_unregister_timeout_secs", 648 .data = &netdev_unregister_timeout_secs, 649 .maxlen = sizeof(unsigned int), 650 .mode = 0644, 651 .proc_handler = proc_dointvec_minmax, 652 .extra1 = SYSCTL_ONE, 653 .extra2 = &int_3600, 654 }, 655 { 656 .procname = "skb_defer_max", 657 .data = &sysctl_skb_defer_max, 658 .maxlen = sizeof(unsigned int), 659 .mode = 0644, 660 .proc_handler = proc_dointvec_minmax, 661 .extra1 = SYSCTL_ZERO, 662 }, 663 { } 664 }; 665 666 static struct ctl_table netns_core_table[] = { 667 #if IS_ENABLED(CONFIG_RPS) 668 { 669 .procname = "rps_default_mask", 670 .data = &init_net, 671 .mode = 0644, 672 .proc_handler = rps_default_mask_sysctl 673 }, 674 #endif 675 { 676 .procname = "somaxconn", 677 .data = &init_net.core.sysctl_somaxconn, 678 .maxlen = sizeof(int), 679 .mode = 0644, 680 .extra1 = SYSCTL_ZERO, 681 .proc_handler = proc_dointvec_minmax 682 }, 683 { 684 .procname = "optmem_max", 685 .data = &init_net.core.sysctl_optmem_max, 686 .maxlen = sizeof(int), 687 .mode = 0644, 688 .extra1 = SYSCTL_ZERO, 689 .proc_handler = proc_dointvec_minmax 690 }, 691 { 692 .procname = "txrehash", 693 .data = &init_net.core.sysctl_txrehash, 694 .maxlen = sizeof(u8), 695 .mode = 0644, 696 .extra1 = SYSCTL_ZERO, 697 .extra2 = SYSCTL_ONE, 698 .proc_handler = proc_dou8vec_minmax, 699 }, 700 { } 701 }; 702 703 static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) 704 { 705 /* fallback tunnels for initns only */ 706 if (!strncmp(str, "initns", 6)) 707 sysctl_fb_tunnels_only_for_init_net = 1; 708 /* no fallback tunnels anywhere */ 709 else if (!strncmp(str, "none", 4)) 710 sysctl_fb_tunnels_only_for_init_net = 2; 711 712 return 1; 713 } 714 __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); 715 716 static __net_init int sysctl_core_net_init(struct net *net) 717 { 718 struct ctl_table *tbl, *tmp; 719 720 tbl = netns_core_table; 721 if (!net_eq(net, &init_net)) { 722 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 723 if (tbl == NULL) 724 goto err_dup; 725 726 for (tmp = tbl; tmp->procname; tmp++) 727 tmp->data += (char *)net - (char *)&init_net; 728 } 729 730 net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, 731 ARRAY_SIZE(netns_core_table)); 732 if (net->core.sysctl_hdr == NULL) 733 goto err_reg; 734 735 return 0; 736 737 err_reg: 738 if (tbl != netns_core_table) 739 kfree(tbl); 740 err_dup: 741 return -ENOMEM; 742 } 743 744 static __net_exit void sysctl_core_net_exit(struct net *net) 745 { 746 const struct ctl_table *tbl; 747 748 tbl = net->core.sysctl_hdr->ctl_table_arg; 749 unregister_net_sysctl_table(net->core.sysctl_hdr); 750 BUG_ON(tbl == netns_core_table); 751 #if IS_ENABLED(CONFIG_RPS) 752 kfree(net->core.rps_default_mask); 753 #endif 754 kfree(tbl); 755 } 756 757 static __net_initdata struct pernet_operations sysctl_core_ops = { 758 .init = sysctl_core_net_init, 759 .exit = sysctl_core_net_exit, 760 }; 761 762 static __init int sysctl_core_init(void) 763 { 764 register_net_sysctl(&init_net, "net/core", net_core_table); 765 return register_pernet_subsys(&sysctl_core_ops); 766 } 767 768 fs_initcall(sysctl_core_init); 769