1 // SPDX-License-Identifier: GPL-2.0 2 /* -*- linux-c -*- 3 * sysctl_net_core.c: sysctl interface to net core subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 7 */ 8 9 #include <linux/filter.h> 10 #include <linux/mm.h> 11 #include <linux/sysctl.h> 12 #include <linux/module.h> 13 #include <linux/socket.h> 14 #include <linux/netdevice.h> 15 #include <linux/ratelimit.h> 16 #include <linux/vmalloc.h> 17 #include <linux/init.h> 18 #include <linux/slab.h> 19 #include <linux/sched/isolation.h> 20 21 #include <net/ip.h> 22 #include <net/sock.h> 23 #include <net/net_ratelimit.h> 24 #include <net/busy_poll.h> 25 #include <net/pkt_sched.h> 26 #include <net/hotdata.h> 27 #include <net/proto_memory.h> 28 #include <net/rps.h> 29 30 #include "dev.h" 31 32 static int int_3600 = 3600; 33 static int min_sndbuf = SOCK_MIN_SNDBUF; 34 static int min_rcvbuf = SOCK_MIN_RCVBUF; 35 static int max_skb_frags = MAX_SKB_FRAGS; 36 static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; 37 38 static int net_msg_warn; /* Unused, but still a sysctl */ 39 40 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; 41 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); 42 43 /* 0 - Keep current behavior: 44 * IPv4: inherit all current settings from init_net 45 * IPv6: reset all settings to default 46 * 1 - Both inherit all current settings from init_net 47 * 2 - Both reset all settings to default 48 * 3 - Both inherit all settings from current netns 49 */ 50 int sysctl_devconf_inherit_init_net __read_mostly; 51 EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); 52 53 #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) 54 static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, 55 struct cpumask *mask) 56 { 57 char kbuf[128]; 58 int len; 59 60 if (*ppos || !*lenp) { 61 *lenp = 0; 62 return; 63 } 64 65 len = min(sizeof(kbuf) - 1, *lenp); 66 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 67 if (!len) { 68 *lenp = 0; 69 return; 70 } 71 72 if (len < *lenp) 73 kbuf[len++] = '\n'; 74 memcpy(buffer, kbuf, len); 75 *lenp = len; 76 *ppos += len; 77 } 78 #endif 79 80 #ifdef CONFIG_RPS 81 82 static struct cpumask *rps_default_mask_cow_alloc(struct net *net) 83 { 84 struct cpumask *rps_default_mask; 85 86 if (net->core.rps_default_mask) 87 return net->core.rps_default_mask; 88 89 rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL); 90 if (!rps_default_mask) 91 return NULL; 92 93 /* pairs with READ_ONCE in rx_queue_default_mask() */ 94 WRITE_ONCE(net->core.rps_default_mask, rps_default_mask); 95 return rps_default_mask; 96 } 97 98 static int rps_default_mask_sysctl(const struct ctl_table *table, int write, 99 void *buffer, size_t *lenp, loff_t *ppos) 100 { 101 struct net *net = (struct net *)table->data; 102 int err = 0; 103 104 rtnl_lock(); 105 if (write) { 106 struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net); 107 108 err = -ENOMEM; 109 if (!rps_default_mask) 110 goto done; 111 112 err = cpumask_parse(buffer, rps_default_mask); 113 if (err) 114 goto done; 115 116 err = rps_cpumask_housekeeping(rps_default_mask); 117 if (err) 118 goto done; 119 } else { 120 dump_cpumask(buffer, lenp, ppos, 121 net->core.rps_default_mask ? : cpu_none_mask); 122 } 123 124 done: 125 rtnl_unlock(); 126 return err; 127 } 128 129 static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, 130 void *buffer, size_t *lenp, loff_t *ppos) 131 { 132 unsigned int orig_size, size; 133 int ret, i; 134 struct ctl_table tmp = { 135 .data = &size, 136 .maxlen = sizeof(size), 137 .mode = table->mode 138 }; 139 struct rps_sock_flow_table *orig_sock_table, *sock_table; 140 static DEFINE_MUTEX(sock_flow_mutex); 141 142 mutex_lock(&sock_flow_mutex); 143 144 orig_sock_table = rcu_dereference_protected( 145 net_hotdata.rps_sock_flow_table, 146 lockdep_is_held(&sock_flow_mutex)); 147 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 148 149 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 150 151 if (write) { 152 if (size) { 153 if (size > 1<<29) { 154 /* Enforce limit to prevent overflow */ 155 mutex_unlock(&sock_flow_mutex); 156 return -EINVAL; 157 } 158 size = roundup_pow_of_two(size); 159 if (size != orig_size) { 160 sock_table = 161 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 162 if (!sock_table) { 163 mutex_unlock(&sock_flow_mutex); 164 return -ENOMEM; 165 } 166 net_hotdata.rps_cpu_mask = 167 roundup_pow_of_two(nr_cpu_ids) - 1; 168 sock_table->mask = size - 1; 169 } else 170 sock_table = orig_sock_table; 171 172 for (i = 0; i < size; i++) 173 sock_table->ents[i] = RPS_NO_CPU; 174 } else 175 sock_table = NULL; 176 177 if (sock_table != orig_sock_table) { 178 rcu_assign_pointer(net_hotdata.rps_sock_flow_table, 179 sock_table); 180 if (sock_table) { 181 static_branch_inc(&rps_needed); 182 static_branch_inc(&rfs_needed); 183 } 184 if (orig_sock_table) { 185 static_branch_dec(&rps_needed); 186 static_branch_dec(&rfs_needed); 187 kvfree_rcu_mightsleep(orig_sock_table); 188 } 189 } 190 } 191 192 mutex_unlock(&sock_flow_mutex); 193 194 return ret; 195 } 196 #endif /* CONFIG_RPS */ 197 198 #ifdef CONFIG_NET_FLOW_LIMIT 199 static DEFINE_MUTEX(flow_limit_update_mutex); 200 201 static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write, 202 void *buffer, size_t *lenp, loff_t *ppos) 203 { 204 struct sd_flow_limit *cur; 205 struct softnet_data *sd; 206 cpumask_var_t mask; 207 int i, len, ret = 0; 208 209 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 210 return -ENOMEM; 211 212 if (write) { 213 ret = cpumask_parse(buffer, mask); 214 if (ret) 215 goto done; 216 217 mutex_lock(&flow_limit_update_mutex); 218 len = sizeof(*cur) + netdev_flow_limit_table_len; 219 for_each_possible_cpu(i) { 220 sd = &per_cpu(softnet_data, i); 221 cur = rcu_dereference_protected(sd->flow_limit, 222 lockdep_is_held(&flow_limit_update_mutex)); 223 if (cur && !cpumask_test_cpu(i, mask)) { 224 RCU_INIT_POINTER(sd->flow_limit, NULL); 225 kfree_rcu_mightsleep(cur); 226 } else if (!cur && cpumask_test_cpu(i, mask)) { 227 cur = kzalloc_node(len, GFP_KERNEL, 228 cpu_to_node(i)); 229 if (!cur) { 230 /* not unwinding previous changes */ 231 ret = -ENOMEM; 232 goto write_unlock; 233 } 234 cur->num_buckets = netdev_flow_limit_table_len; 235 rcu_assign_pointer(sd->flow_limit, cur); 236 } 237 } 238 write_unlock: 239 mutex_unlock(&flow_limit_update_mutex); 240 } else { 241 cpumask_clear(mask); 242 rcu_read_lock(); 243 for_each_possible_cpu(i) { 244 sd = &per_cpu(softnet_data, i); 245 if (rcu_dereference(sd->flow_limit)) 246 cpumask_set_cpu(i, mask); 247 } 248 rcu_read_unlock(); 249 250 dump_cpumask(buffer, lenp, ppos, mask); 251 } 252 253 done: 254 free_cpumask_var(mask); 255 return ret; 256 } 257 258 static int flow_limit_table_len_sysctl(const struct ctl_table *table, int write, 259 void *buffer, size_t *lenp, loff_t *ppos) 260 { 261 unsigned int old, *ptr; 262 int ret; 263 264 mutex_lock(&flow_limit_update_mutex); 265 266 ptr = table->data; 267 old = *ptr; 268 ret = proc_dointvec(table, write, buffer, lenp, ppos); 269 if (!ret && write && !is_power_of_2(*ptr)) { 270 *ptr = old; 271 ret = -EINVAL; 272 } 273 274 mutex_unlock(&flow_limit_update_mutex); 275 return ret; 276 } 277 #endif /* CONFIG_NET_FLOW_LIMIT */ 278 279 #ifdef CONFIG_NET_SCHED 280 static int set_default_qdisc(const struct ctl_table *table, int write, 281 void *buffer, size_t *lenp, loff_t *ppos) 282 { 283 char id[IFNAMSIZ]; 284 struct ctl_table tbl = { 285 .data = id, 286 .maxlen = IFNAMSIZ, 287 }; 288 int ret; 289 290 qdisc_get_default(id, IFNAMSIZ); 291 292 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 293 if (write && ret == 0) 294 ret = qdisc_set_default(id); 295 return ret; 296 } 297 #endif 298 299 static int proc_do_dev_weight(const struct ctl_table *table, int write, 300 void *buffer, size_t *lenp, loff_t *ppos) 301 { 302 static DEFINE_MUTEX(dev_weight_mutex); 303 int ret, weight; 304 305 mutex_lock(&dev_weight_mutex); 306 ret = proc_dointvec(table, write, buffer, lenp, ppos); 307 if (!ret && write) { 308 weight = READ_ONCE(weight_p); 309 WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); 310 WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); 311 } 312 mutex_unlock(&dev_weight_mutex); 313 314 return ret; 315 } 316 317 static int proc_do_rss_key(const struct ctl_table *table, int write, 318 void *buffer, size_t *lenp, loff_t *ppos) 319 { 320 struct ctl_table fake_table; 321 char buf[NETDEV_RSS_KEY_LEN * 3]; 322 323 snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); 324 fake_table.data = buf; 325 fake_table.maxlen = sizeof(buf); 326 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 327 } 328 329 #ifdef CONFIG_BPF_JIT 330 static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write, 331 void *buffer, size_t *lenp, 332 loff_t *ppos) 333 { 334 int ret, jit_enable = *(int *)table->data; 335 int min = *(int *)table->extra1; 336 int max = *(int *)table->extra2; 337 struct ctl_table tmp = *table; 338 339 if (write && !capable(CAP_SYS_ADMIN)) 340 return -EPERM; 341 342 tmp.data = &jit_enable; 343 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 344 if (write && !ret) { 345 if (jit_enable < 2 || 346 (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { 347 *(int *)table->data = jit_enable; 348 if (jit_enable == 2) 349 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 350 } else { 351 ret = -EPERM; 352 } 353 } 354 355 if (write && ret && min == max) 356 pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); 357 358 return ret; 359 } 360 361 # ifdef CONFIG_HAVE_EBPF_JIT 362 static int 363 proc_dointvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 364 void *buffer, size_t *lenp, loff_t *ppos) 365 { 366 if (!capable(CAP_SYS_ADMIN)) 367 return -EPERM; 368 369 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 370 } 371 # endif /* CONFIG_HAVE_EBPF_JIT */ 372 373 static int 374 proc_dolongvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 375 void *buffer, size_t *lenp, loff_t *ppos) 376 { 377 if (!capable(CAP_SYS_ADMIN)) 378 return -EPERM; 379 380 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 381 } 382 #endif 383 384 static struct ctl_table net_core_table[] = { 385 { 386 .procname = "mem_pcpu_rsv", 387 .data = &net_hotdata.sysctl_mem_pcpu_rsv, 388 .maxlen = sizeof(int), 389 .mode = 0644, 390 .proc_handler = proc_dointvec_minmax, 391 .extra1 = &min_mem_pcpu_rsv, 392 }, 393 { 394 .procname = "dev_weight", 395 .data = &weight_p, 396 .maxlen = sizeof(int), 397 .mode = 0644, 398 .proc_handler = proc_do_dev_weight, 399 }, 400 { 401 .procname = "dev_weight_rx_bias", 402 .data = &dev_weight_rx_bias, 403 .maxlen = sizeof(int), 404 .mode = 0644, 405 .proc_handler = proc_do_dev_weight, 406 }, 407 { 408 .procname = "dev_weight_tx_bias", 409 .data = &dev_weight_tx_bias, 410 .maxlen = sizeof(int), 411 .mode = 0644, 412 .proc_handler = proc_do_dev_weight, 413 }, 414 { 415 .procname = "netdev_max_backlog", 416 .data = &net_hotdata.max_backlog, 417 .maxlen = sizeof(int), 418 .mode = 0644, 419 .proc_handler = proc_dointvec 420 }, 421 { 422 .procname = "netdev_rss_key", 423 .data = &netdev_rss_key, 424 .maxlen = sizeof(int), 425 .mode = 0444, 426 .proc_handler = proc_do_rss_key, 427 }, 428 #ifdef CONFIG_BPF_JIT 429 { 430 .procname = "bpf_jit_enable", 431 .data = &bpf_jit_enable, 432 .maxlen = sizeof(int), 433 .mode = 0644, 434 .proc_handler = proc_dointvec_minmax_bpf_enable, 435 # ifdef CONFIG_BPF_JIT_ALWAYS_ON 436 .extra1 = SYSCTL_ONE, 437 .extra2 = SYSCTL_ONE, 438 # else 439 .extra1 = SYSCTL_ZERO, 440 .extra2 = SYSCTL_TWO, 441 # endif 442 }, 443 # ifdef CONFIG_HAVE_EBPF_JIT 444 { 445 .procname = "bpf_jit_harden", 446 .data = &bpf_jit_harden, 447 .maxlen = sizeof(int), 448 .mode = 0600, 449 .proc_handler = proc_dointvec_minmax_bpf_restricted, 450 .extra1 = SYSCTL_ZERO, 451 .extra2 = SYSCTL_TWO, 452 }, 453 { 454 .procname = "bpf_jit_kallsyms", 455 .data = &bpf_jit_kallsyms, 456 .maxlen = sizeof(int), 457 .mode = 0600, 458 .proc_handler = proc_dointvec_minmax_bpf_restricted, 459 .extra1 = SYSCTL_ZERO, 460 .extra2 = SYSCTL_ONE, 461 }, 462 # endif 463 { 464 .procname = "bpf_jit_limit", 465 .data = &bpf_jit_limit, 466 .maxlen = sizeof(long), 467 .mode = 0600, 468 .proc_handler = proc_dolongvec_minmax_bpf_restricted, 469 .extra1 = SYSCTL_LONG_ONE, 470 .extra2 = &bpf_jit_limit_max, 471 }, 472 #endif 473 { 474 .procname = "netdev_tstamp_prequeue", 475 .data = &net_hotdata.tstamp_prequeue, 476 .maxlen = sizeof(int), 477 .mode = 0644, 478 .proc_handler = proc_dointvec 479 }, 480 { 481 .procname = "message_cost", 482 .data = &net_ratelimit_state.interval, 483 .maxlen = sizeof(int), 484 .mode = 0644, 485 .proc_handler = proc_dointvec_jiffies, 486 }, 487 { 488 .procname = "message_burst", 489 .data = &net_ratelimit_state.burst, 490 .maxlen = sizeof(int), 491 .mode = 0644, 492 .proc_handler = proc_dointvec, 493 }, 494 { 495 .procname = "tstamp_allow_data", 496 .data = &sysctl_tstamp_allow_data, 497 .maxlen = sizeof(int), 498 .mode = 0644, 499 .proc_handler = proc_dointvec_minmax, 500 .extra1 = SYSCTL_ZERO, 501 .extra2 = SYSCTL_ONE 502 }, 503 #ifdef CONFIG_RPS 504 { 505 .procname = "rps_sock_flow_entries", 506 .maxlen = sizeof(int), 507 .mode = 0644, 508 .proc_handler = rps_sock_flow_sysctl 509 }, 510 #endif 511 #ifdef CONFIG_NET_FLOW_LIMIT 512 { 513 .procname = "flow_limit_cpu_bitmap", 514 .mode = 0644, 515 .proc_handler = flow_limit_cpu_sysctl 516 }, 517 { 518 .procname = "flow_limit_table_len", 519 .data = &netdev_flow_limit_table_len, 520 .maxlen = sizeof(int), 521 .mode = 0644, 522 .proc_handler = flow_limit_table_len_sysctl 523 }, 524 #endif /* CONFIG_NET_FLOW_LIMIT */ 525 #ifdef CONFIG_NET_RX_BUSY_POLL 526 { 527 .procname = "busy_poll", 528 .data = &sysctl_net_busy_poll, 529 .maxlen = sizeof(unsigned int), 530 .mode = 0644, 531 .proc_handler = proc_dointvec_minmax, 532 .extra1 = SYSCTL_ZERO, 533 }, 534 { 535 .procname = "busy_read", 536 .data = &sysctl_net_busy_read, 537 .maxlen = sizeof(unsigned int), 538 .mode = 0644, 539 .proc_handler = proc_dointvec_minmax, 540 .extra1 = SYSCTL_ZERO, 541 }, 542 #endif 543 #ifdef CONFIG_NET_SCHED 544 { 545 .procname = "default_qdisc", 546 .mode = 0644, 547 .maxlen = IFNAMSIZ, 548 .proc_handler = set_default_qdisc 549 }, 550 #endif 551 { 552 .procname = "netdev_budget", 553 .data = &net_hotdata.netdev_budget, 554 .maxlen = sizeof(int), 555 .mode = 0644, 556 .proc_handler = proc_dointvec 557 }, 558 { 559 .procname = "warnings", 560 .data = &net_msg_warn, 561 .maxlen = sizeof(int), 562 .mode = 0644, 563 .proc_handler = proc_dointvec 564 }, 565 { 566 .procname = "max_skb_frags", 567 .data = &net_hotdata.sysctl_max_skb_frags, 568 .maxlen = sizeof(int), 569 .mode = 0644, 570 .proc_handler = proc_dointvec_minmax, 571 .extra1 = SYSCTL_ONE, 572 .extra2 = &max_skb_frags, 573 }, 574 { 575 .procname = "netdev_budget_usecs", 576 .data = &net_hotdata.netdev_budget_usecs, 577 .maxlen = sizeof(unsigned int), 578 .mode = 0644, 579 .proc_handler = proc_dointvec_minmax, 580 .extra1 = SYSCTL_ZERO, 581 }, 582 { 583 .procname = "fb_tunnels_only_for_init_net", 584 .data = &sysctl_fb_tunnels_only_for_init_net, 585 .maxlen = sizeof(int), 586 .mode = 0644, 587 .proc_handler = proc_dointvec_minmax, 588 .extra1 = SYSCTL_ZERO, 589 .extra2 = SYSCTL_TWO, 590 }, 591 { 592 .procname = "devconf_inherit_init_net", 593 .data = &sysctl_devconf_inherit_init_net, 594 .maxlen = sizeof(int), 595 .mode = 0644, 596 .proc_handler = proc_dointvec_minmax, 597 .extra1 = SYSCTL_ZERO, 598 .extra2 = SYSCTL_THREE, 599 }, 600 { 601 .procname = "high_order_alloc_disable", 602 .data = &net_high_order_alloc_disable_key.key, 603 .maxlen = sizeof(net_high_order_alloc_disable_key), 604 .mode = 0644, 605 .proc_handler = proc_do_static_key, 606 }, 607 { 608 .procname = "gro_normal_batch", 609 .data = &net_hotdata.gro_normal_batch, 610 .maxlen = sizeof(unsigned int), 611 .mode = 0644, 612 .proc_handler = proc_dointvec_minmax, 613 .extra1 = SYSCTL_ONE, 614 }, 615 { 616 .procname = "netdev_unregister_timeout_secs", 617 .data = &netdev_unregister_timeout_secs, 618 .maxlen = sizeof(unsigned int), 619 .mode = 0644, 620 .proc_handler = proc_dointvec_minmax, 621 .extra1 = SYSCTL_ONE, 622 .extra2 = &int_3600, 623 }, 624 { 625 .procname = "skb_defer_max", 626 .data = &net_hotdata.sysctl_skb_defer_max, 627 .maxlen = sizeof(unsigned int), 628 .mode = 0644, 629 .proc_handler = proc_dointvec_minmax, 630 .extra1 = SYSCTL_ZERO, 631 }, 632 }; 633 634 static struct ctl_table netns_core_table[] = { 635 #if IS_ENABLED(CONFIG_RPS) 636 { 637 .procname = "rps_default_mask", 638 .data = &init_net, 639 .mode = 0644, 640 .proc_handler = rps_default_mask_sysctl 641 }, 642 #endif 643 { 644 .procname = "somaxconn", 645 .data = &init_net.core.sysctl_somaxconn, 646 .maxlen = sizeof(int), 647 .mode = 0644, 648 .extra1 = SYSCTL_ZERO, 649 .proc_handler = proc_dointvec_minmax 650 }, 651 { 652 .procname = "optmem_max", 653 .data = &init_net.core.sysctl_optmem_max, 654 .maxlen = sizeof(int), 655 .mode = 0644, 656 .extra1 = SYSCTL_ZERO, 657 .proc_handler = proc_dointvec_minmax 658 }, 659 { 660 .procname = "txrehash", 661 .data = &init_net.core.sysctl_txrehash, 662 .maxlen = sizeof(u8), 663 .mode = 0644, 664 .extra1 = SYSCTL_ZERO, 665 .extra2 = SYSCTL_ONE, 666 .proc_handler = proc_dou8vec_minmax, 667 }, 668 /* sysctl_core_net_init() will set the values after this 669 * to readonly in network namespaces 670 */ 671 { 672 .procname = "wmem_max", 673 .data = &sysctl_wmem_max, 674 .maxlen = sizeof(int), 675 .mode = 0644, 676 .proc_handler = proc_dointvec_minmax, 677 .extra1 = &min_sndbuf, 678 }, 679 { 680 .procname = "rmem_max", 681 .data = &sysctl_rmem_max, 682 .maxlen = sizeof(int), 683 .mode = 0644, 684 .proc_handler = proc_dointvec_minmax, 685 .extra1 = &min_rcvbuf, 686 }, 687 { 688 .procname = "wmem_default", 689 .data = &sysctl_wmem_default, 690 .maxlen = sizeof(int), 691 .mode = 0644, 692 .proc_handler = proc_dointvec_minmax, 693 .extra1 = &min_sndbuf, 694 }, 695 { 696 .procname = "rmem_default", 697 .data = &sysctl_rmem_default, 698 .maxlen = sizeof(int), 699 .mode = 0644, 700 .proc_handler = proc_dointvec_minmax, 701 .extra1 = &min_rcvbuf, 702 }, 703 }; 704 705 static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) 706 { 707 /* fallback tunnels for initns only */ 708 if (!strncmp(str, "initns", 6)) 709 sysctl_fb_tunnels_only_for_init_net = 1; 710 /* no fallback tunnels anywhere */ 711 else if (!strncmp(str, "none", 4)) 712 sysctl_fb_tunnels_only_for_init_net = 2; 713 714 return 1; 715 } 716 __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); 717 718 static __net_init int sysctl_core_net_init(struct net *net) 719 { 720 size_t table_size = ARRAY_SIZE(netns_core_table); 721 struct ctl_table *tbl; 722 723 tbl = netns_core_table; 724 if (!net_eq(net, &init_net)) { 725 int i; 726 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 727 if (tbl == NULL) 728 goto err_dup; 729 730 for (i = 0; i < table_size; ++i) { 731 if (tbl[i].data == &sysctl_wmem_max) 732 break; 733 734 tbl[i].data += (char *)net - (char *)&init_net; 735 } 736 for (; i < table_size; ++i) 737 tbl[i].mode &= ~0222; 738 } 739 740 net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); 741 if (net->core.sysctl_hdr == NULL) 742 goto err_reg; 743 744 return 0; 745 746 err_reg: 747 if (tbl != netns_core_table) 748 kfree(tbl); 749 err_dup: 750 return -ENOMEM; 751 } 752 753 static __net_exit void sysctl_core_net_exit(struct net *net) 754 { 755 const struct ctl_table *tbl; 756 757 tbl = net->core.sysctl_hdr->ctl_table_arg; 758 unregister_net_sysctl_table(net->core.sysctl_hdr); 759 BUG_ON(tbl == netns_core_table); 760 #if IS_ENABLED(CONFIG_RPS) 761 kfree(net->core.rps_default_mask); 762 #endif 763 kfree(tbl); 764 } 765 766 static __net_initdata struct pernet_operations sysctl_core_ops = { 767 .init = sysctl_core_net_init, 768 .exit = sysctl_core_net_exit, 769 }; 770 771 static __init int sysctl_core_init(void) 772 { 773 register_net_sysctl(&init_net, "net/core", net_core_table); 774 return register_pernet_subsys(&sysctl_core_ops); 775 } 776 777 fs_initcall(sysctl_core_init); 778