1 // SPDX-License-Identifier: GPL-2.0 2 /* -*- linux-c -*- 3 * sysctl_net_core.c: sysctl interface to net core subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 7 */ 8 9 #include <linux/filter.h> 10 #include <linux/mm.h> 11 #include <linux/sysctl.h> 12 #include <linux/module.h> 13 #include <linux/socket.h> 14 #include <linux/netdevice.h> 15 #include <linux/ratelimit.h> 16 #include <linux/vmalloc.h> 17 #include <linux/init.h> 18 #include <linux/slab.h> 19 #include <linux/sched/isolation.h> 20 21 #include <net/ip.h> 22 #include <net/sock.h> 23 #include <net/net_ratelimit.h> 24 #include <net/busy_poll.h> 25 #include <net/pkt_sched.h> 26 #include <net/hotdata.h> 27 #include <net/proto_memory.h> 28 #include <net/rps.h> 29 30 #include "dev.h" 31 #include "net-sysfs.h" 32 33 static int int_3600 = 3600; 34 static int min_sndbuf = SOCK_MIN_SNDBUF; 35 static int min_rcvbuf = SOCK_MIN_RCVBUF; 36 static int max_skb_frags = MAX_SKB_FRAGS; 37 static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; 38 static int netdev_budget_usecs_min = 2 * USEC_PER_SEC / HZ; 39 40 static int net_msg_warn; /* Unused, but still a sysctl */ 41 42 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; 43 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); 44 45 /* 0 - Keep current behavior: 46 * IPv4: inherit all current settings from init_net 47 * IPv6: reset all settings to default 48 * 1 - Both inherit all current settings from init_net 49 * 2 - Both reset all settings to default 50 * 3 - Both inherit all settings from current netns 51 */ 52 int sysctl_devconf_inherit_init_net __read_mostly; 53 EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); 54 55 #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) 56 static int dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, 57 struct cpumask *mask) 58 { 59 char *kbuf; 60 int len; 61 62 if (*ppos || !*lenp) { 63 *lenp = 0; 64 return 0; 65 } 66 67 /* CPUs are displayed as a hex bitmap + a comma between each groups of 8 68 * nibbles (except the last one which has a newline instead). 69 * Guesstimate the buffer size at the group granularity level. 70 */ 71 len = min(DIV_ROUND_UP(nr_cpumask_bits, 32) * (8 + 1), *lenp); 72 kbuf = kmalloc(len, GFP_KERNEL); 73 if (!kbuf) { 74 *lenp = 0; 75 return -ENOMEM; 76 } 77 78 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 79 if (!len) { 80 *lenp = 0; 81 goto free_buf; 82 } 83 84 /* scnprintf writes a trailing null char not counted in the returned 85 * length, override it with a newline. 86 */ 87 kbuf[len++] = '\n'; 88 memcpy(buffer, kbuf, len); 89 *lenp = len; 90 *ppos += len; 91 92 free_buf: 93 kfree(kbuf); 94 return 0; 95 } 96 #endif 97 98 #ifdef CONFIG_RPS 99 100 DEFINE_MUTEX(rps_default_mask_mutex); 101 102 static int rps_default_mask_sysctl(const struct ctl_table *table, int write, 103 void *buffer, size_t *lenp, loff_t *ppos) 104 { 105 struct net *net = (struct net *)table->data; 106 struct cpumask *mask; 107 int err = 0; 108 109 mutex_lock(&rps_default_mask_mutex); 110 mask = net->core.rps_default_mask; 111 if (write) { 112 if (!mask) { 113 mask = kzalloc(cpumask_size(), GFP_KERNEL); 114 net->core.rps_default_mask = mask; 115 } 116 err = -ENOMEM; 117 if (!mask) 118 goto done; 119 120 err = cpumask_parse(buffer, mask); 121 if (err) 122 goto done; 123 124 err = rps_cpumask_housekeeping(mask); 125 if (err) 126 goto done; 127 } else { 128 err = dump_cpumask(buffer, lenp, ppos, 129 mask ?: cpu_none_mask); 130 } 131 132 done: 133 mutex_unlock(&rps_default_mask_mutex); 134 return err; 135 } 136 137 static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, 138 void *buffer, size_t *lenp, loff_t *ppos) 139 { 140 unsigned int orig_size, size; 141 int ret, i; 142 struct ctl_table tmp = { 143 .data = &size, 144 .maxlen = sizeof(size), 145 .mode = table->mode 146 }; 147 struct rps_sock_flow_table *orig_sock_table, *sock_table; 148 static DEFINE_MUTEX(sock_flow_mutex); 149 150 mutex_lock(&sock_flow_mutex); 151 152 orig_sock_table = rcu_dereference_protected( 153 net_hotdata.rps_sock_flow_table, 154 lockdep_is_held(&sock_flow_mutex)); 155 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 156 157 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 158 159 if (write) { 160 if (size) { 161 if (size > 1<<29) { 162 /* Enforce limit to prevent overflow */ 163 mutex_unlock(&sock_flow_mutex); 164 return -EINVAL; 165 } 166 size = roundup_pow_of_two(size); 167 if (size != orig_size) { 168 sock_table = 169 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 170 if (!sock_table) { 171 mutex_unlock(&sock_flow_mutex); 172 return -ENOMEM; 173 } 174 net_hotdata.rps_cpu_mask = 175 roundup_pow_of_two(nr_cpu_ids) - 1; 176 sock_table->mask = size - 1; 177 } else 178 sock_table = orig_sock_table; 179 180 for (i = 0; i < size; i++) 181 sock_table->ents[i] = RPS_NO_CPU; 182 } else 183 sock_table = NULL; 184 185 if (sock_table != orig_sock_table) { 186 rcu_assign_pointer(net_hotdata.rps_sock_flow_table, 187 sock_table); 188 if (sock_table) { 189 static_branch_inc(&rps_needed); 190 static_branch_inc(&rfs_needed); 191 } 192 if (orig_sock_table) { 193 static_branch_dec(&rps_needed); 194 static_branch_dec(&rfs_needed); 195 kvfree_rcu(orig_sock_table, rcu); 196 } 197 } 198 } 199 200 mutex_unlock(&sock_flow_mutex); 201 202 return ret; 203 } 204 #endif /* CONFIG_RPS */ 205 206 #ifdef CONFIG_NET_FLOW_LIMIT 207 static DEFINE_MUTEX(flow_limit_update_mutex); 208 209 static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write, 210 void *buffer, size_t *lenp, loff_t *ppos) 211 { 212 struct sd_flow_limit *cur; 213 struct softnet_data *sd; 214 cpumask_var_t mask; 215 int i, len, ret = 0; 216 217 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 218 return -ENOMEM; 219 220 if (write) { 221 ret = cpumask_parse(buffer, mask); 222 if (ret) 223 goto done; 224 225 mutex_lock(&flow_limit_update_mutex); 226 len = sizeof(*cur) + netdev_flow_limit_table_len; 227 for_each_possible_cpu(i) { 228 sd = &per_cpu(softnet_data, i); 229 cur = rcu_dereference_protected(sd->flow_limit, 230 lockdep_is_held(&flow_limit_update_mutex)); 231 if (cur && !cpumask_test_cpu(i, mask)) { 232 RCU_INIT_POINTER(sd->flow_limit, NULL); 233 kfree_rcu(cur, rcu); 234 } else if (!cur && cpumask_test_cpu(i, mask)) { 235 cur = kzalloc_node(len, GFP_KERNEL, 236 cpu_to_node(i)); 237 if (!cur) { 238 /* not unwinding previous changes */ 239 ret = -ENOMEM; 240 goto write_unlock; 241 } 242 cur->log_buckets = ilog2(netdev_flow_limit_table_len); 243 rcu_assign_pointer(sd->flow_limit, cur); 244 } 245 } 246 write_unlock: 247 mutex_unlock(&flow_limit_update_mutex); 248 } else { 249 cpumask_clear(mask); 250 rcu_read_lock(); 251 for_each_possible_cpu(i) { 252 sd = &per_cpu(softnet_data, i); 253 if (rcu_dereference(sd->flow_limit)) 254 cpumask_set_cpu(i, mask); 255 } 256 rcu_read_unlock(); 257 258 ret = dump_cpumask(buffer, lenp, ppos, mask); 259 } 260 261 done: 262 free_cpumask_var(mask); 263 return ret; 264 } 265 266 static int flow_limit_table_len_sysctl(const struct ctl_table *table, int write, 267 void *buffer, size_t *lenp, loff_t *ppos) 268 { 269 unsigned int old, *ptr; 270 int ret; 271 272 mutex_lock(&flow_limit_update_mutex); 273 274 ptr = table->data; 275 old = *ptr; 276 ret = proc_dointvec(table, write, buffer, lenp, ppos); 277 if (!ret && write && !is_power_of_2(*ptr)) { 278 *ptr = old; 279 ret = -EINVAL; 280 } 281 282 mutex_unlock(&flow_limit_update_mutex); 283 return ret; 284 } 285 #endif /* CONFIG_NET_FLOW_LIMIT */ 286 287 #ifdef CONFIG_NET_SCHED 288 static int set_default_qdisc(const struct ctl_table *table, int write, 289 void *buffer, size_t *lenp, loff_t *ppos) 290 { 291 char id[IFNAMSIZ]; 292 struct ctl_table tbl = { 293 .data = id, 294 .maxlen = IFNAMSIZ, 295 }; 296 int ret; 297 298 qdisc_get_default(id, IFNAMSIZ); 299 300 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 301 if (write && ret == 0) 302 ret = qdisc_set_default(id); 303 return ret; 304 } 305 #endif 306 307 static int proc_do_dev_weight(const struct ctl_table *table, int write, 308 void *buffer, size_t *lenp, loff_t *ppos) 309 { 310 static DEFINE_MUTEX(dev_weight_mutex); 311 int ret, weight; 312 313 mutex_lock(&dev_weight_mutex); 314 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 315 if (!ret && write) { 316 weight = READ_ONCE(weight_p); 317 WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); 318 WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); 319 } 320 mutex_unlock(&dev_weight_mutex); 321 322 return ret; 323 } 324 325 static int proc_do_rss_key(const struct ctl_table *table, int write, 326 void *buffer, size_t *lenp, loff_t *ppos) 327 { 328 struct ctl_table fake_table; 329 char buf[NETDEV_RSS_KEY_LEN * 3]; 330 331 snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); 332 fake_table.data = buf; 333 fake_table.maxlen = sizeof(buf); 334 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 335 } 336 337 #ifdef CONFIG_BPF_JIT 338 static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write, 339 void *buffer, size_t *lenp, 340 loff_t *ppos) 341 { 342 int ret, jit_enable = *(int *)table->data; 343 int min = *(int *)table->extra1; 344 int max = *(int *)table->extra2; 345 struct ctl_table tmp = *table; 346 347 if (write && !capable(CAP_SYS_ADMIN)) 348 return -EPERM; 349 350 tmp.data = &jit_enable; 351 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 352 if (write && !ret) { 353 if (jit_enable < 2 || 354 (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { 355 *(int *)table->data = jit_enable; 356 if (jit_enable == 2) 357 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 358 } else { 359 ret = -EPERM; 360 } 361 } 362 363 if (write && ret && min == max) 364 pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); 365 366 return ret; 367 } 368 369 # ifdef CONFIG_HAVE_EBPF_JIT 370 static int 371 proc_dointvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 372 void *buffer, size_t *lenp, loff_t *ppos) 373 { 374 if (!capable(CAP_SYS_ADMIN)) 375 return -EPERM; 376 377 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 378 } 379 # endif /* CONFIG_HAVE_EBPF_JIT */ 380 381 static int 382 proc_dolongvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 383 void *buffer, size_t *lenp, loff_t *ppos) 384 { 385 if (!capable(CAP_SYS_ADMIN)) 386 return -EPERM; 387 388 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 389 } 390 #endif 391 392 static struct ctl_table net_core_table[] = { 393 { 394 .procname = "mem_pcpu_rsv", 395 .data = &net_hotdata.sysctl_mem_pcpu_rsv, 396 .maxlen = sizeof(int), 397 .mode = 0644, 398 .proc_handler = proc_dointvec_minmax, 399 .extra1 = &min_mem_pcpu_rsv, 400 }, 401 { 402 .procname = "dev_weight", 403 .data = &weight_p, 404 .maxlen = sizeof(int), 405 .mode = 0644, 406 .proc_handler = proc_do_dev_weight, 407 .extra1 = SYSCTL_ONE, 408 }, 409 { 410 .procname = "dev_weight_rx_bias", 411 .data = &dev_weight_rx_bias, 412 .maxlen = sizeof(int), 413 .mode = 0644, 414 .proc_handler = proc_do_dev_weight, 415 .extra1 = SYSCTL_ONE, 416 }, 417 { 418 .procname = "dev_weight_tx_bias", 419 .data = &dev_weight_tx_bias, 420 .maxlen = sizeof(int), 421 .mode = 0644, 422 .proc_handler = proc_do_dev_weight, 423 .extra1 = SYSCTL_ONE, 424 }, 425 { 426 .procname = "netdev_max_backlog", 427 .data = &net_hotdata.max_backlog, 428 .maxlen = sizeof(int), 429 .mode = 0644, 430 .proc_handler = proc_dointvec 431 }, 432 { 433 .procname = "netdev_rss_key", 434 .data = &netdev_rss_key, 435 .maxlen = sizeof(int), 436 .mode = 0444, 437 .proc_handler = proc_do_rss_key, 438 }, 439 #ifdef CONFIG_BPF_JIT 440 { 441 .procname = "bpf_jit_enable", 442 .data = &bpf_jit_enable, 443 .maxlen = sizeof(int), 444 .mode = 0644, 445 .proc_handler = proc_dointvec_minmax_bpf_enable, 446 # ifdef CONFIG_BPF_JIT_ALWAYS_ON 447 .extra1 = SYSCTL_ONE, 448 .extra2 = SYSCTL_ONE, 449 # else 450 .extra1 = SYSCTL_ZERO, 451 .extra2 = SYSCTL_TWO, 452 # endif 453 }, 454 # ifdef CONFIG_HAVE_EBPF_JIT 455 { 456 .procname = "bpf_jit_harden", 457 .data = &bpf_jit_harden, 458 .maxlen = sizeof(int), 459 .mode = 0600, 460 .proc_handler = proc_dointvec_minmax_bpf_restricted, 461 .extra1 = SYSCTL_ZERO, 462 .extra2 = SYSCTL_TWO, 463 }, 464 { 465 .procname = "bpf_jit_kallsyms", 466 .data = &bpf_jit_kallsyms, 467 .maxlen = sizeof(int), 468 .mode = 0600, 469 .proc_handler = proc_dointvec_minmax_bpf_restricted, 470 .extra1 = SYSCTL_ZERO, 471 .extra2 = SYSCTL_ONE, 472 }, 473 # endif 474 { 475 .procname = "bpf_jit_limit", 476 .data = &bpf_jit_limit, 477 .maxlen = sizeof(long), 478 .mode = 0600, 479 .proc_handler = proc_dolongvec_minmax_bpf_restricted, 480 .extra1 = SYSCTL_LONG_ONE, 481 .extra2 = &bpf_jit_limit_max, 482 }, 483 #endif 484 { 485 .procname = "netdev_tstamp_prequeue", 486 .data = &net_hotdata.tstamp_prequeue, 487 .maxlen = sizeof(int), 488 .mode = 0644, 489 .proc_handler = proc_dointvec 490 }, 491 { 492 .procname = "message_cost", 493 .data = &net_ratelimit_state.interval, 494 .maxlen = sizeof(int), 495 .mode = 0644, 496 .proc_handler = proc_dointvec_jiffies, 497 }, 498 { 499 .procname = "message_burst", 500 .data = &net_ratelimit_state.burst, 501 .maxlen = sizeof(int), 502 .mode = 0644, 503 .proc_handler = proc_dointvec, 504 }, 505 #ifdef CONFIG_RPS 506 { 507 .procname = "rps_sock_flow_entries", 508 .maxlen = sizeof(int), 509 .mode = 0644, 510 .proc_handler = rps_sock_flow_sysctl 511 }, 512 #endif 513 #ifdef CONFIG_NET_FLOW_LIMIT 514 { 515 .procname = "flow_limit_cpu_bitmap", 516 .mode = 0644, 517 .proc_handler = flow_limit_cpu_sysctl 518 }, 519 { 520 .procname = "flow_limit_table_len", 521 .data = &netdev_flow_limit_table_len, 522 .maxlen = sizeof(int), 523 .mode = 0644, 524 .proc_handler = flow_limit_table_len_sysctl 525 }, 526 #endif /* CONFIG_NET_FLOW_LIMIT */ 527 #ifdef CONFIG_NET_RX_BUSY_POLL 528 { 529 .procname = "busy_poll", 530 .data = &sysctl_net_busy_poll, 531 .maxlen = sizeof(unsigned int), 532 .mode = 0644, 533 .proc_handler = proc_dointvec_minmax, 534 .extra1 = SYSCTL_ZERO, 535 }, 536 { 537 .procname = "busy_read", 538 .data = &sysctl_net_busy_read, 539 .maxlen = sizeof(unsigned int), 540 .mode = 0644, 541 .proc_handler = proc_dointvec_minmax, 542 .extra1 = SYSCTL_ZERO, 543 }, 544 #endif 545 #ifdef CONFIG_NET_SCHED 546 { 547 .procname = "default_qdisc", 548 .mode = 0644, 549 .maxlen = IFNAMSIZ, 550 .proc_handler = set_default_qdisc 551 }, 552 #endif 553 { 554 .procname = "netdev_budget", 555 .data = &net_hotdata.netdev_budget, 556 .maxlen = sizeof(int), 557 .mode = 0644, 558 .proc_handler = proc_dointvec 559 }, 560 { 561 .procname = "warnings", 562 .data = &net_msg_warn, 563 .maxlen = sizeof(int), 564 .mode = 0644, 565 .proc_handler = proc_dointvec 566 }, 567 { 568 .procname = "max_skb_frags", 569 .data = &net_hotdata.sysctl_max_skb_frags, 570 .maxlen = sizeof(int), 571 .mode = 0644, 572 .proc_handler = proc_dointvec_minmax, 573 .extra1 = SYSCTL_ONE, 574 .extra2 = &max_skb_frags, 575 }, 576 { 577 .procname = "netdev_budget_usecs", 578 .data = &net_hotdata.netdev_budget_usecs, 579 .maxlen = sizeof(unsigned int), 580 .mode = 0644, 581 .proc_handler = proc_dointvec_minmax, 582 .extra1 = &netdev_budget_usecs_min, 583 }, 584 { 585 .procname = "fb_tunnels_only_for_init_net", 586 .data = &sysctl_fb_tunnels_only_for_init_net, 587 .maxlen = sizeof(int), 588 .mode = 0644, 589 .proc_handler = proc_dointvec_minmax, 590 .extra1 = SYSCTL_ZERO, 591 .extra2 = SYSCTL_TWO, 592 }, 593 { 594 .procname = "devconf_inherit_init_net", 595 .data = &sysctl_devconf_inherit_init_net, 596 .maxlen = sizeof(int), 597 .mode = 0644, 598 .proc_handler = proc_dointvec_minmax, 599 .extra1 = SYSCTL_ZERO, 600 .extra2 = SYSCTL_THREE, 601 }, 602 { 603 .procname = "high_order_alloc_disable", 604 .data = &net_high_order_alloc_disable_key.key, 605 .maxlen = sizeof(net_high_order_alloc_disable_key), 606 .mode = 0644, 607 .proc_handler = proc_do_static_key, 608 }, 609 { 610 .procname = "gro_normal_batch", 611 .data = &net_hotdata.gro_normal_batch, 612 .maxlen = sizeof(unsigned int), 613 .mode = 0644, 614 .proc_handler = proc_dointvec_minmax, 615 .extra1 = SYSCTL_ONE, 616 }, 617 { 618 .procname = "netdev_unregister_timeout_secs", 619 .data = &netdev_unregister_timeout_secs, 620 .maxlen = sizeof(unsigned int), 621 .mode = 0644, 622 .proc_handler = proc_dointvec_minmax, 623 .extra1 = SYSCTL_ONE, 624 .extra2 = &int_3600, 625 }, 626 { 627 .procname = "skb_defer_max", 628 .data = &net_hotdata.sysctl_skb_defer_max, 629 .maxlen = sizeof(unsigned int), 630 .mode = 0644, 631 .proc_handler = proc_dointvec_minmax, 632 .extra1 = SYSCTL_ZERO, 633 }, 634 }; 635 636 static struct ctl_table netns_core_table[] = { 637 #if IS_ENABLED(CONFIG_RPS) 638 { 639 .procname = "rps_default_mask", 640 .data = &init_net, 641 .mode = 0644, 642 .proc_handler = rps_default_mask_sysctl 643 }, 644 #endif 645 { 646 .procname = "somaxconn", 647 .data = &init_net.core.sysctl_somaxconn, 648 .maxlen = sizeof(int), 649 .mode = 0644, 650 .extra1 = SYSCTL_ZERO, 651 .proc_handler = proc_dointvec_minmax 652 }, 653 { 654 .procname = "optmem_max", 655 .data = &init_net.core.sysctl_optmem_max, 656 .maxlen = sizeof(int), 657 .mode = 0644, 658 .extra1 = SYSCTL_ZERO, 659 .proc_handler = proc_dointvec_minmax 660 }, 661 { 662 .procname = "txrehash", 663 .data = &init_net.core.sysctl_txrehash, 664 .maxlen = sizeof(u8), 665 .mode = 0644, 666 .extra1 = SYSCTL_ZERO, 667 .extra2 = SYSCTL_ONE, 668 .proc_handler = proc_dou8vec_minmax, 669 }, 670 { 671 .procname = "tstamp_allow_data", 672 .data = &init_net.core.sysctl_tstamp_allow_data, 673 .maxlen = sizeof(u8), 674 .mode = 0644, 675 .proc_handler = proc_dou8vec_minmax, 676 .extra1 = SYSCTL_ZERO, 677 .extra2 = SYSCTL_ONE 678 }, 679 /* sysctl_core_net_init() will set the values after this 680 * to readonly in network namespaces 681 */ 682 { 683 .procname = "wmem_max", 684 .data = &sysctl_wmem_max, 685 .maxlen = sizeof(int), 686 .mode = 0644, 687 .proc_handler = proc_dointvec_minmax, 688 .extra1 = &min_sndbuf, 689 }, 690 { 691 .procname = "rmem_max", 692 .data = &sysctl_rmem_max, 693 .maxlen = sizeof(int), 694 .mode = 0644, 695 .proc_handler = proc_dointvec_minmax, 696 .extra1 = &min_rcvbuf, 697 }, 698 { 699 .procname = "wmem_default", 700 .data = &sysctl_wmem_default, 701 .maxlen = sizeof(int), 702 .mode = 0644, 703 .proc_handler = proc_dointvec_minmax, 704 .extra1 = &min_sndbuf, 705 }, 706 { 707 .procname = "rmem_default", 708 .data = &sysctl_rmem_default, 709 .maxlen = sizeof(int), 710 .mode = 0644, 711 .proc_handler = proc_dointvec_minmax, 712 .extra1 = &min_rcvbuf, 713 }, 714 }; 715 716 static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) 717 { 718 /* fallback tunnels for initns only */ 719 if (!strncmp(str, "initns", 6)) 720 sysctl_fb_tunnels_only_for_init_net = 1; 721 /* no fallback tunnels anywhere */ 722 else if (!strncmp(str, "none", 4)) 723 sysctl_fb_tunnels_only_for_init_net = 2; 724 725 return 1; 726 } 727 __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); 728 729 static __net_init int sysctl_core_net_init(struct net *net) 730 { 731 size_t table_size = ARRAY_SIZE(netns_core_table); 732 struct ctl_table *tbl; 733 734 tbl = netns_core_table; 735 if (!net_eq(net, &init_net)) { 736 int i; 737 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 738 if (tbl == NULL) 739 goto err_dup; 740 741 for (i = 0; i < table_size; ++i) { 742 if (tbl[i].data == &sysctl_wmem_max) 743 break; 744 745 tbl[i].data += (char *)net - (char *)&init_net; 746 } 747 for (; i < table_size; ++i) 748 tbl[i].mode &= ~0222; 749 } 750 751 net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); 752 if (net->core.sysctl_hdr == NULL) 753 goto err_reg; 754 755 return 0; 756 757 err_reg: 758 if (tbl != netns_core_table) 759 kfree(tbl); 760 err_dup: 761 return -ENOMEM; 762 } 763 764 static __net_exit void sysctl_core_net_exit(struct net *net) 765 { 766 const struct ctl_table *tbl; 767 768 tbl = net->core.sysctl_hdr->ctl_table_arg; 769 unregister_net_sysctl_table(net->core.sysctl_hdr); 770 BUG_ON(tbl == netns_core_table); 771 #if IS_ENABLED(CONFIG_RPS) 772 kfree(net->core.rps_default_mask); 773 #endif 774 kfree(tbl); 775 } 776 777 static __net_initdata struct pernet_operations sysctl_core_ops = { 778 .init = sysctl_core_net_init, 779 .exit = sysctl_core_net_exit, 780 }; 781 782 static __init int sysctl_core_init(void) 783 { 784 register_net_sysctl(&init_net, "net/core", net_core_table); 785 return register_pernet_subsys(&sysctl_core_ops); 786 } 787 788 fs_initcall(sysctl_core_init); 789