1 // SPDX-License-Identifier: GPL-2.0 2 /* -*- linux-c -*- 3 * sysctl_net_core.c: sysctl interface to net core subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 7 */ 8 9 #include <linux/filter.h> 10 #include <linux/mm.h> 11 #include <linux/sysctl.h> 12 #include <linux/module.h> 13 #include <linux/socket.h> 14 #include <linux/netdevice.h> 15 #include <linux/ratelimit.h> 16 #include <linux/vmalloc.h> 17 #include <linux/init.h> 18 #include <linux/slab.h> 19 #include <linux/sched/isolation.h> 20 21 #include <net/ip.h> 22 #include <net/sock.h> 23 #include <net/net_ratelimit.h> 24 #include <net/busy_poll.h> 25 #include <net/pkt_sched.h> 26 #include <net/hotdata.h> 27 #include <net/proto_memory.h> 28 #include <net/rps.h> 29 30 #include "dev.h" 31 32 static int int_3600 = 3600; 33 static int min_sndbuf = SOCK_MIN_SNDBUF; 34 static int min_rcvbuf = SOCK_MIN_RCVBUF; 35 static int max_skb_frags = MAX_SKB_FRAGS; 36 static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; 37 38 static int net_msg_warn; /* Unused, but still a sysctl */ 39 40 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; 41 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); 42 43 /* 0 - Keep current behavior: 44 * IPv4: inherit all current settings from init_net 45 * IPv6: reset all settings to default 46 * 1 - Both inherit all current settings from init_net 47 * 2 - Both reset all settings to default 48 * 3 - Both inherit all settings from current netns 49 */ 50 int sysctl_devconf_inherit_init_net __read_mostly; 51 EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); 52 53 #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) 54 static int dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, 55 struct cpumask *mask) 56 { 57 char *kbuf; 58 int len; 59 60 if (*ppos || !*lenp) { 61 *lenp = 0; 62 return 0; 63 } 64 65 /* CPUs are displayed as a hex bitmap + a comma between each groups of 8 66 * nibbles (except the last one which has a newline instead). 67 * Guesstimate the buffer size at the group granularity level. 68 */ 69 len = min(DIV_ROUND_UP(nr_cpumask_bits, 32) * (8 + 1), *lenp); 70 kbuf = kmalloc(len, GFP_KERNEL); 71 if (!kbuf) { 72 *lenp = 0; 73 return -ENOMEM; 74 } 75 76 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 77 if (!len) { 78 *lenp = 0; 79 goto free_buf; 80 } 81 82 /* scnprintf writes a trailing null char not counted in the returned 83 * length, override it with a newline. 84 */ 85 kbuf[len++] = '\n'; 86 memcpy(buffer, kbuf, len); 87 *lenp = len; 88 *ppos += len; 89 90 free_buf: 91 kfree(kbuf); 92 return 0; 93 } 94 #endif 95 96 #ifdef CONFIG_RPS 97 98 static struct cpumask *rps_default_mask_cow_alloc(struct net *net) 99 { 100 struct cpumask *rps_default_mask; 101 102 if (net->core.rps_default_mask) 103 return net->core.rps_default_mask; 104 105 rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL); 106 if (!rps_default_mask) 107 return NULL; 108 109 /* pairs with READ_ONCE in rx_queue_default_mask() */ 110 WRITE_ONCE(net->core.rps_default_mask, rps_default_mask); 111 return rps_default_mask; 112 } 113 114 static int rps_default_mask_sysctl(const struct ctl_table *table, int write, 115 void *buffer, size_t *lenp, loff_t *ppos) 116 { 117 struct net *net = (struct net *)table->data; 118 int err = 0; 119 120 rtnl_lock(); 121 if (write) { 122 struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net); 123 124 err = -ENOMEM; 125 if (!rps_default_mask) 126 goto done; 127 128 err = cpumask_parse(buffer, rps_default_mask); 129 if (err) 130 goto done; 131 132 err = rps_cpumask_housekeeping(rps_default_mask); 133 if (err) 134 goto done; 135 } else { 136 err = dump_cpumask(buffer, lenp, ppos, 137 net->core.rps_default_mask ? : cpu_none_mask); 138 } 139 140 done: 141 rtnl_unlock(); 142 return err; 143 } 144 145 static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, 146 void *buffer, size_t *lenp, loff_t *ppos) 147 { 148 unsigned int orig_size, size; 149 int ret, i; 150 struct ctl_table tmp = { 151 .data = &size, 152 .maxlen = sizeof(size), 153 .mode = table->mode 154 }; 155 struct rps_sock_flow_table *orig_sock_table, *sock_table; 156 static DEFINE_MUTEX(sock_flow_mutex); 157 158 mutex_lock(&sock_flow_mutex); 159 160 orig_sock_table = rcu_dereference_protected( 161 net_hotdata.rps_sock_flow_table, 162 lockdep_is_held(&sock_flow_mutex)); 163 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 164 165 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 166 167 if (write) { 168 if (size) { 169 if (size > 1<<29) { 170 /* Enforce limit to prevent overflow */ 171 mutex_unlock(&sock_flow_mutex); 172 return -EINVAL; 173 } 174 size = roundup_pow_of_two(size); 175 if (size != orig_size) { 176 sock_table = 177 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 178 if (!sock_table) { 179 mutex_unlock(&sock_flow_mutex); 180 return -ENOMEM; 181 } 182 net_hotdata.rps_cpu_mask = 183 roundup_pow_of_two(nr_cpu_ids) - 1; 184 sock_table->mask = size - 1; 185 } else 186 sock_table = orig_sock_table; 187 188 for (i = 0; i < size; i++) 189 sock_table->ents[i] = RPS_NO_CPU; 190 } else 191 sock_table = NULL; 192 193 if (sock_table != orig_sock_table) { 194 rcu_assign_pointer(net_hotdata.rps_sock_flow_table, 195 sock_table); 196 if (sock_table) { 197 static_branch_inc(&rps_needed); 198 static_branch_inc(&rfs_needed); 199 } 200 if (orig_sock_table) { 201 static_branch_dec(&rps_needed); 202 static_branch_dec(&rfs_needed); 203 kvfree_rcu_mightsleep(orig_sock_table); 204 } 205 } 206 } 207 208 mutex_unlock(&sock_flow_mutex); 209 210 return ret; 211 } 212 #endif /* CONFIG_RPS */ 213 214 #ifdef CONFIG_NET_FLOW_LIMIT 215 static DEFINE_MUTEX(flow_limit_update_mutex); 216 217 static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write, 218 void *buffer, size_t *lenp, loff_t *ppos) 219 { 220 struct sd_flow_limit *cur; 221 struct softnet_data *sd; 222 cpumask_var_t mask; 223 int i, len, ret = 0; 224 225 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 226 return -ENOMEM; 227 228 if (write) { 229 ret = cpumask_parse(buffer, mask); 230 if (ret) 231 goto done; 232 233 mutex_lock(&flow_limit_update_mutex); 234 len = sizeof(*cur) + netdev_flow_limit_table_len; 235 for_each_possible_cpu(i) { 236 sd = &per_cpu(softnet_data, i); 237 cur = rcu_dereference_protected(sd->flow_limit, 238 lockdep_is_held(&flow_limit_update_mutex)); 239 if (cur && !cpumask_test_cpu(i, mask)) { 240 RCU_INIT_POINTER(sd->flow_limit, NULL); 241 kfree_rcu_mightsleep(cur); 242 } else if (!cur && cpumask_test_cpu(i, mask)) { 243 cur = kzalloc_node(len, GFP_KERNEL, 244 cpu_to_node(i)); 245 if (!cur) { 246 /* not unwinding previous changes */ 247 ret = -ENOMEM; 248 goto write_unlock; 249 } 250 cur->num_buckets = netdev_flow_limit_table_len; 251 rcu_assign_pointer(sd->flow_limit, cur); 252 } 253 } 254 write_unlock: 255 mutex_unlock(&flow_limit_update_mutex); 256 } else { 257 cpumask_clear(mask); 258 rcu_read_lock(); 259 for_each_possible_cpu(i) { 260 sd = &per_cpu(softnet_data, i); 261 if (rcu_dereference(sd->flow_limit)) 262 cpumask_set_cpu(i, mask); 263 } 264 rcu_read_unlock(); 265 266 ret = dump_cpumask(buffer, lenp, ppos, mask); 267 } 268 269 done: 270 free_cpumask_var(mask); 271 return ret; 272 } 273 274 static int flow_limit_table_len_sysctl(const struct ctl_table *table, int write, 275 void *buffer, size_t *lenp, loff_t *ppos) 276 { 277 unsigned int old, *ptr; 278 int ret; 279 280 mutex_lock(&flow_limit_update_mutex); 281 282 ptr = table->data; 283 old = *ptr; 284 ret = proc_dointvec(table, write, buffer, lenp, ppos); 285 if (!ret && write && !is_power_of_2(*ptr)) { 286 *ptr = old; 287 ret = -EINVAL; 288 } 289 290 mutex_unlock(&flow_limit_update_mutex); 291 return ret; 292 } 293 #endif /* CONFIG_NET_FLOW_LIMIT */ 294 295 #ifdef CONFIG_NET_SCHED 296 static int set_default_qdisc(const struct ctl_table *table, int write, 297 void *buffer, size_t *lenp, loff_t *ppos) 298 { 299 char id[IFNAMSIZ]; 300 struct ctl_table tbl = { 301 .data = id, 302 .maxlen = IFNAMSIZ, 303 }; 304 int ret; 305 306 qdisc_get_default(id, IFNAMSIZ); 307 308 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 309 if (write && ret == 0) 310 ret = qdisc_set_default(id); 311 return ret; 312 } 313 #endif 314 315 static int proc_do_dev_weight(const struct ctl_table *table, int write, 316 void *buffer, size_t *lenp, loff_t *ppos) 317 { 318 static DEFINE_MUTEX(dev_weight_mutex); 319 int ret, weight; 320 321 mutex_lock(&dev_weight_mutex); 322 ret = proc_dointvec(table, write, buffer, lenp, ppos); 323 if (!ret && write) { 324 weight = READ_ONCE(weight_p); 325 WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); 326 WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); 327 } 328 mutex_unlock(&dev_weight_mutex); 329 330 return ret; 331 } 332 333 static int proc_do_rss_key(const struct ctl_table *table, int write, 334 void *buffer, size_t *lenp, loff_t *ppos) 335 { 336 struct ctl_table fake_table; 337 char buf[NETDEV_RSS_KEY_LEN * 3]; 338 339 snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); 340 fake_table.data = buf; 341 fake_table.maxlen = sizeof(buf); 342 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 343 } 344 345 #ifdef CONFIG_BPF_JIT 346 static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write, 347 void *buffer, size_t *lenp, 348 loff_t *ppos) 349 { 350 int ret, jit_enable = *(int *)table->data; 351 int min = *(int *)table->extra1; 352 int max = *(int *)table->extra2; 353 struct ctl_table tmp = *table; 354 355 if (write && !capable(CAP_SYS_ADMIN)) 356 return -EPERM; 357 358 tmp.data = &jit_enable; 359 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 360 if (write && !ret) { 361 if (jit_enable < 2 || 362 (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { 363 *(int *)table->data = jit_enable; 364 if (jit_enable == 2) 365 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 366 } else { 367 ret = -EPERM; 368 } 369 } 370 371 if (write && ret && min == max) 372 pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); 373 374 return ret; 375 } 376 377 # ifdef CONFIG_HAVE_EBPF_JIT 378 static int 379 proc_dointvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 380 void *buffer, size_t *lenp, loff_t *ppos) 381 { 382 if (!capable(CAP_SYS_ADMIN)) 383 return -EPERM; 384 385 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 386 } 387 # endif /* CONFIG_HAVE_EBPF_JIT */ 388 389 static int 390 proc_dolongvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 391 void *buffer, size_t *lenp, loff_t *ppos) 392 { 393 if (!capable(CAP_SYS_ADMIN)) 394 return -EPERM; 395 396 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 397 } 398 #endif 399 400 static struct ctl_table net_core_table[] = { 401 { 402 .procname = "mem_pcpu_rsv", 403 .data = &net_hotdata.sysctl_mem_pcpu_rsv, 404 .maxlen = sizeof(int), 405 .mode = 0644, 406 .proc_handler = proc_dointvec_minmax, 407 .extra1 = &min_mem_pcpu_rsv, 408 }, 409 { 410 .procname = "dev_weight", 411 .data = &weight_p, 412 .maxlen = sizeof(int), 413 .mode = 0644, 414 .proc_handler = proc_do_dev_weight, 415 }, 416 { 417 .procname = "dev_weight_rx_bias", 418 .data = &dev_weight_rx_bias, 419 .maxlen = sizeof(int), 420 .mode = 0644, 421 .proc_handler = proc_do_dev_weight, 422 }, 423 { 424 .procname = "dev_weight_tx_bias", 425 .data = &dev_weight_tx_bias, 426 .maxlen = sizeof(int), 427 .mode = 0644, 428 .proc_handler = proc_do_dev_weight, 429 }, 430 { 431 .procname = "netdev_max_backlog", 432 .data = &net_hotdata.max_backlog, 433 .maxlen = sizeof(int), 434 .mode = 0644, 435 .proc_handler = proc_dointvec 436 }, 437 { 438 .procname = "netdev_rss_key", 439 .data = &netdev_rss_key, 440 .maxlen = sizeof(int), 441 .mode = 0444, 442 .proc_handler = proc_do_rss_key, 443 }, 444 #ifdef CONFIG_BPF_JIT 445 { 446 .procname = "bpf_jit_enable", 447 .data = &bpf_jit_enable, 448 .maxlen = sizeof(int), 449 .mode = 0644, 450 .proc_handler = proc_dointvec_minmax_bpf_enable, 451 # ifdef CONFIG_BPF_JIT_ALWAYS_ON 452 .extra1 = SYSCTL_ONE, 453 .extra2 = SYSCTL_ONE, 454 # else 455 .extra1 = SYSCTL_ZERO, 456 .extra2 = SYSCTL_TWO, 457 # endif 458 }, 459 # ifdef CONFIG_HAVE_EBPF_JIT 460 { 461 .procname = "bpf_jit_harden", 462 .data = &bpf_jit_harden, 463 .maxlen = sizeof(int), 464 .mode = 0600, 465 .proc_handler = proc_dointvec_minmax_bpf_restricted, 466 .extra1 = SYSCTL_ZERO, 467 .extra2 = SYSCTL_TWO, 468 }, 469 { 470 .procname = "bpf_jit_kallsyms", 471 .data = &bpf_jit_kallsyms, 472 .maxlen = sizeof(int), 473 .mode = 0600, 474 .proc_handler = proc_dointvec_minmax_bpf_restricted, 475 .extra1 = SYSCTL_ZERO, 476 .extra2 = SYSCTL_ONE, 477 }, 478 # endif 479 { 480 .procname = "bpf_jit_limit", 481 .data = &bpf_jit_limit, 482 .maxlen = sizeof(long), 483 .mode = 0600, 484 .proc_handler = proc_dolongvec_minmax_bpf_restricted, 485 .extra1 = SYSCTL_LONG_ONE, 486 .extra2 = &bpf_jit_limit_max, 487 }, 488 #endif 489 { 490 .procname = "netdev_tstamp_prequeue", 491 .data = &net_hotdata.tstamp_prequeue, 492 .maxlen = sizeof(int), 493 .mode = 0644, 494 .proc_handler = proc_dointvec 495 }, 496 { 497 .procname = "message_cost", 498 .data = &net_ratelimit_state.interval, 499 .maxlen = sizeof(int), 500 .mode = 0644, 501 .proc_handler = proc_dointvec_jiffies, 502 }, 503 { 504 .procname = "message_burst", 505 .data = &net_ratelimit_state.burst, 506 .maxlen = sizeof(int), 507 .mode = 0644, 508 .proc_handler = proc_dointvec, 509 }, 510 #ifdef CONFIG_RPS 511 { 512 .procname = "rps_sock_flow_entries", 513 .maxlen = sizeof(int), 514 .mode = 0644, 515 .proc_handler = rps_sock_flow_sysctl 516 }, 517 #endif 518 #ifdef CONFIG_NET_FLOW_LIMIT 519 { 520 .procname = "flow_limit_cpu_bitmap", 521 .mode = 0644, 522 .proc_handler = flow_limit_cpu_sysctl 523 }, 524 { 525 .procname = "flow_limit_table_len", 526 .data = &netdev_flow_limit_table_len, 527 .maxlen = sizeof(int), 528 .mode = 0644, 529 .proc_handler = flow_limit_table_len_sysctl 530 }, 531 #endif /* CONFIG_NET_FLOW_LIMIT */ 532 #ifdef CONFIG_NET_RX_BUSY_POLL 533 { 534 .procname = "busy_poll", 535 .data = &sysctl_net_busy_poll, 536 .maxlen = sizeof(unsigned int), 537 .mode = 0644, 538 .proc_handler = proc_dointvec_minmax, 539 .extra1 = SYSCTL_ZERO, 540 }, 541 { 542 .procname = "busy_read", 543 .data = &sysctl_net_busy_read, 544 .maxlen = sizeof(unsigned int), 545 .mode = 0644, 546 .proc_handler = proc_dointvec_minmax, 547 .extra1 = SYSCTL_ZERO, 548 }, 549 #endif 550 #ifdef CONFIG_NET_SCHED 551 { 552 .procname = "default_qdisc", 553 .mode = 0644, 554 .maxlen = IFNAMSIZ, 555 .proc_handler = set_default_qdisc 556 }, 557 #endif 558 { 559 .procname = "netdev_budget", 560 .data = &net_hotdata.netdev_budget, 561 .maxlen = sizeof(int), 562 .mode = 0644, 563 .proc_handler = proc_dointvec 564 }, 565 { 566 .procname = "warnings", 567 .data = &net_msg_warn, 568 .maxlen = sizeof(int), 569 .mode = 0644, 570 .proc_handler = proc_dointvec 571 }, 572 { 573 .procname = "max_skb_frags", 574 .data = &net_hotdata.sysctl_max_skb_frags, 575 .maxlen = sizeof(int), 576 .mode = 0644, 577 .proc_handler = proc_dointvec_minmax, 578 .extra1 = SYSCTL_ONE, 579 .extra2 = &max_skb_frags, 580 }, 581 { 582 .procname = "netdev_budget_usecs", 583 .data = &net_hotdata.netdev_budget_usecs, 584 .maxlen = sizeof(unsigned int), 585 .mode = 0644, 586 .proc_handler = proc_dointvec_minmax, 587 .extra1 = SYSCTL_ZERO, 588 }, 589 { 590 .procname = "fb_tunnels_only_for_init_net", 591 .data = &sysctl_fb_tunnels_only_for_init_net, 592 .maxlen = sizeof(int), 593 .mode = 0644, 594 .proc_handler = proc_dointvec_minmax, 595 .extra1 = SYSCTL_ZERO, 596 .extra2 = SYSCTL_TWO, 597 }, 598 { 599 .procname = "devconf_inherit_init_net", 600 .data = &sysctl_devconf_inherit_init_net, 601 .maxlen = sizeof(int), 602 .mode = 0644, 603 .proc_handler = proc_dointvec_minmax, 604 .extra1 = SYSCTL_ZERO, 605 .extra2 = SYSCTL_THREE, 606 }, 607 { 608 .procname = "high_order_alloc_disable", 609 .data = &net_high_order_alloc_disable_key.key, 610 .maxlen = sizeof(net_high_order_alloc_disable_key), 611 .mode = 0644, 612 .proc_handler = proc_do_static_key, 613 }, 614 { 615 .procname = "gro_normal_batch", 616 .data = &net_hotdata.gro_normal_batch, 617 .maxlen = sizeof(unsigned int), 618 .mode = 0644, 619 .proc_handler = proc_dointvec_minmax, 620 .extra1 = SYSCTL_ONE, 621 }, 622 { 623 .procname = "netdev_unregister_timeout_secs", 624 .data = &netdev_unregister_timeout_secs, 625 .maxlen = sizeof(unsigned int), 626 .mode = 0644, 627 .proc_handler = proc_dointvec_minmax, 628 .extra1 = SYSCTL_ONE, 629 .extra2 = &int_3600, 630 }, 631 { 632 .procname = "skb_defer_max", 633 .data = &net_hotdata.sysctl_skb_defer_max, 634 .maxlen = sizeof(unsigned int), 635 .mode = 0644, 636 .proc_handler = proc_dointvec_minmax, 637 .extra1 = SYSCTL_ZERO, 638 }, 639 }; 640 641 static struct ctl_table netns_core_table[] = { 642 #if IS_ENABLED(CONFIG_RPS) 643 { 644 .procname = "rps_default_mask", 645 .data = &init_net, 646 .mode = 0644, 647 .proc_handler = rps_default_mask_sysctl 648 }, 649 #endif 650 { 651 .procname = "somaxconn", 652 .data = &init_net.core.sysctl_somaxconn, 653 .maxlen = sizeof(int), 654 .mode = 0644, 655 .extra1 = SYSCTL_ZERO, 656 .proc_handler = proc_dointvec_minmax 657 }, 658 { 659 .procname = "optmem_max", 660 .data = &init_net.core.sysctl_optmem_max, 661 .maxlen = sizeof(int), 662 .mode = 0644, 663 .extra1 = SYSCTL_ZERO, 664 .proc_handler = proc_dointvec_minmax 665 }, 666 { 667 .procname = "txrehash", 668 .data = &init_net.core.sysctl_txrehash, 669 .maxlen = sizeof(u8), 670 .mode = 0644, 671 .extra1 = SYSCTL_ZERO, 672 .extra2 = SYSCTL_ONE, 673 .proc_handler = proc_dou8vec_minmax, 674 }, 675 { 676 .procname = "tstamp_allow_data", 677 .data = &init_net.core.sysctl_tstamp_allow_data, 678 .maxlen = sizeof(u8), 679 .mode = 0644, 680 .proc_handler = proc_dou8vec_minmax, 681 .extra1 = SYSCTL_ZERO, 682 .extra2 = SYSCTL_ONE 683 }, 684 /* sysctl_core_net_init() will set the values after this 685 * to readonly in network namespaces 686 */ 687 { 688 .procname = "wmem_max", 689 .data = &sysctl_wmem_max, 690 .maxlen = sizeof(int), 691 .mode = 0644, 692 .proc_handler = proc_dointvec_minmax, 693 .extra1 = &min_sndbuf, 694 }, 695 { 696 .procname = "rmem_max", 697 .data = &sysctl_rmem_max, 698 .maxlen = sizeof(int), 699 .mode = 0644, 700 .proc_handler = proc_dointvec_minmax, 701 .extra1 = &min_rcvbuf, 702 }, 703 { 704 .procname = "wmem_default", 705 .data = &sysctl_wmem_default, 706 .maxlen = sizeof(int), 707 .mode = 0644, 708 .proc_handler = proc_dointvec_minmax, 709 .extra1 = &min_sndbuf, 710 }, 711 { 712 .procname = "rmem_default", 713 .data = &sysctl_rmem_default, 714 .maxlen = sizeof(int), 715 .mode = 0644, 716 .proc_handler = proc_dointvec_minmax, 717 .extra1 = &min_rcvbuf, 718 }, 719 }; 720 721 static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) 722 { 723 /* fallback tunnels for initns only */ 724 if (!strncmp(str, "initns", 6)) 725 sysctl_fb_tunnels_only_for_init_net = 1; 726 /* no fallback tunnels anywhere */ 727 else if (!strncmp(str, "none", 4)) 728 sysctl_fb_tunnels_only_for_init_net = 2; 729 730 return 1; 731 } 732 __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); 733 734 static __net_init int sysctl_core_net_init(struct net *net) 735 { 736 size_t table_size = ARRAY_SIZE(netns_core_table); 737 struct ctl_table *tbl; 738 739 tbl = netns_core_table; 740 if (!net_eq(net, &init_net)) { 741 int i; 742 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 743 if (tbl == NULL) 744 goto err_dup; 745 746 for (i = 0; i < table_size; ++i) { 747 if (tbl[i].data == &sysctl_wmem_max) 748 break; 749 750 tbl[i].data += (char *)net - (char *)&init_net; 751 } 752 for (; i < table_size; ++i) 753 tbl[i].mode &= ~0222; 754 } 755 756 net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); 757 if (net->core.sysctl_hdr == NULL) 758 goto err_reg; 759 760 return 0; 761 762 err_reg: 763 if (tbl != netns_core_table) 764 kfree(tbl); 765 err_dup: 766 return -ENOMEM; 767 } 768 769 static __net_exit void sysctl_core_net_exit(struct net *net) 770 { 771 const struct ctl_table *tbl; 772 773 tbl = net->core.sysctl_hdr->ctl_table_arg; 774 unregister_net_sysctl_table(net->core.sysctl_hdr); 775 BUG_ON(tbl == netns_core_table); 776 #if IS_ENABLED(CONFIG_RPS) 777 kfree(net->core.rps_default_mask); 778 #endif 779 kfree(tbl); 780 } 781 782 static __net_initdata struct pernet_operations sysctl_core_ops = { 783 .init = sysctl_core_net_init, 784 .exit = sysctl_core_net_exit, 785 }; 786 787 static __init int sysctl_core_init(void) 788 { 789 register_net_sysctl(&init_net, "net/core", net_core_table); 790 return register_pernet_subsys(&sysctl_core_ops); 791 } 792 793 fs_initcall(sysctl_core_init); 794