1 // SPDX-License-Identifier: GPL-2.0 2 /* -*- linux-c -*- 3 * sysctl_net_core.c: sysctl interface to net core subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 7 */ 8 9 #include <linux/filter.h> 10 #include <linux/mm.h> 11 #include <linux/sysctl.h> 12 #include <linux/module.h> 13 #include <linux/socket.h> 14 #include <linux/netdevice.h> 15 #include <linux/ratelimit.h> 16 #include <linux/vmalloc.h> 17 #include <linux/init.h> 18 #include <linux/slab.h> 19 #include <linux/sched/isolation.h> 20 21 #include <net/ip.h> 22 #include <net/sock.h> 23 #include <net/net_ratelimit.h> 24 #include <net/busy_poll.h> 25 #include <net/pkt_sched.h> 26 #include <net/hotdata.h> 27 #include <net/proto_memory.h> 28 #include <net/rps.h> 29 30 #include "dev.h" 31 32 static int int_3600 = 3600; 33 static int min_sndbuf = SOCK_MIN_SNDBUF; 34 static int min_rcvbuf = SOCK_MIN_RCVBUF; 35 static int max_skb_frags = MAX_SKB_FRAGS; 36 static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; 37 38 static int net_msg_warn; /* Unused, but still a sysctl */ 39 40 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; 41 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); 42 43 /* 0 - Keep current behavior: 44 * IPv4: inherit all current settings from init_net 45 * IPv6: reset all settings to default 46 * 1 - Both inherit all current settings from init_net 47 * 2 - Both reset all settings to default 48 * 3 - Both inherit all settings from current netns 49 */ 50 int sysctl_devconf_inherit_init_net __read_mostly; 51 EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); 52 53 #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) 54 static int dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, 55 struct cpumask *mask) 56 { 57 char *kbuf; 58 int len; 59 60 if (*ppos || !*lenp) { 61 *lenp = 0; 62 return 0; 63 } 64 65 /* CPUs are displayed as a hex bitmap + a comma between each groups of 8 66 * nibbles (except the last one which has a newline instead). 67 * Guesstimate the buffer size at the group granularity level. 68 */ 69 len = min(DIV_ROUND_UP(nr_cpumask_bits, 32) * (8 + 1), *lenp); 70 kbuf = kmalloc(len, GFP_KERNEL); 71 if (!kbuf) { 72 *lenp = 0; 73 return -ENOMEM; 74 } 75 76 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 77 if (!len) { 78 *lenp = 0; 79 goto free_buf; 80 } 81 82 /* scnprintf writes a trailing null char not counted in the returned 83 * length, override it with a newline. 84 */ 85 kbuf[len++] = '\n'; 86 memcpy(buffer, kbuf, len); 87 *lenp = len; 88 *ppos += len; 89 90 free_buf: 91 kfree(kbuf); 92 return 0; 93 } 94 #endif 95 96 #ifdef CONFIG_RPS 97 98 static struct cpumask *rps_default_mask_cow_alloc(struct net *net) 99 { 100 struct cpumask *rps_default_mask; 101 102 if (net->core.rps_default_mask) 103 return net->core.rps_default_mask; 104 105 rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL); 106 if (!rps_default_mask) 107 return NULL; 108 109 /* pairs with READ_ONCE in rx_queue_default_mask() */ 110 WRITE_ONCE(net->core.rps_default_mask, rps_default_mask); 111 return rps_default_mask; 112 } 113 114 static int rps_default_mask_sysctl(const struct ctl_table *table, int write, 115 void *buffer, size_t *lenp, loff_t *ppos) 116 { 117 struct net *net = (struct net *)table->data; 118 int err = 0; 119 120 rtnl_lock(); 121 if (write) { 122 struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net); 123 124 err = -ENOMEM; 125 if (!rps_default_mask) 126 goto done; 127 128 err = cpumask_parse(buffer, rps_default_mask); 129 if (err) 130 goto done; 131 132 err = rps_cpumask_housekeeping(rps_default_mask); 133 if (err) 134 goto done; 135 } else { 136 err = dump_cpumask(buffer, lenp, ppos, 137 net->core.rps_default_mask ? : cpu_none_mask); 138 } 139 140 done: 141 rtnl_unlock(); 142 return err; 143 } 144 145 static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, 146 void *buffer, size_t *lenp, loff_t *ppos) 147 { 148 unsigned int orig_size, size; 149 int ret, i; 150 struct ctl_table tmp = { 151 .data = &size, 152 .maxlen = sizeof(size), 153 .mode = table->mode 154 }; 155 struct rps_sock_flow_table *orig_sock_table, *sock_table; 156 static DEFINE_MUTEX(sock_flow_mutex); 157 158 mutex_lock(&sock_flow_mutex); 159 160 orig_sock_table = rcu_dereference_protected( 161 net_hotdata.rps_sock_flow_table, 162 lockdep_is_held(&sock_flow_mutex)); 163 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 164 165 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 166 167 if (write) { 168 if (size) { 169 if (size > 1<<29) { 170 /* Enforce limit to prevent overflow */ 171 mutex_unlock(&sock_flow_mutex); 172 return -EINVAL; 173 } 174 size = roundup_pow_of_two(size); 175 if (size != orig_size) { 176 sock_table = 177 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 178 if (!sock_table) { 179 mutex_unlock(&sock_flow_mutex); 180 return -ENOMEM; 181 } 182 net_hotdata.rps_cpu_mask = 183 roundup_pow_of_two(nr_cpu_ids) - 1; 184 sock_table->mask = size - 1; 185 } else 186 sock_table = orig_sock_table; 187 188 for (i = 0; i < size; i++) 189 sock_table->ents[i] = RPS_NO_CPU; 190 } else 191 sock_table = NULL; 192 193 if (sock_table != orig_sock_table) { 194 rcu_assign_pointer(net_hotdata.rps_sock_flow_table, 195 sock_table); 196 if (sock_table) { 197 static_branch_inc(&rps_needed); 198 static_branch_inc(&rfs_needed); 199 } 200 if (orig_sock_table) { 201 static_branch_dec(&rps_needed); 202 static_branch_dec(&rfs_needed); 203 kvfree_rcu_mightsleep(orig_sock_table); 204 } 205 } 206 } 207 208 mutex_unlock(&sock_flow_mutex); 209 210 return ret; 211 } 212 #endif /* CONFIG_RPS */ 213 214 #ifdef CONFIG_NET_FLOW_LIMIT 215 static DEFINE_MUTEX(flow_limit_update_mutex); 216 217 static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write, 218 void *buffer, size_t *lenp, loff_t *ppos) 219 { 220 struct sd_flow_limit *cur; 221 struct softnet_data *sd; 222 cpumask_var_t mask; 223 int i, len, ret = 0; 224 225 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 226 return -ENOMEM; 227 228 if (write) { 229 ret = cpumask_parse(buffer, mask); 230 if (ret) 231 goto done; 232 233 mutex_lock(&flow_limit_update_mutex); 234 len = sizeof(*cur) + netdev_flow_limit_table_len; 235 for_each_possible_cpu(i) { 236 sd = &per_cpu(softnet_data, i); 237 cur = rcu_dereference_protected(sd->flow_limit, 238 lockdep_is_held(&flow_limit_update_mutex)); 239 if (cur && !cpumask_test_cpu(i, mask)) { 240 RCU_INIT_POINTER(sd->flow_limit, NULL); 241 kfree_rcu_mightsleep(cur); 242 } else if (!cur && cpumask_test_cpu(i, mask)) { 243 cur = kzalloc_node(len, GFP_KERNEL, 244 cpu_to_node(i)); 245 if (!cur) { 246 /* not unwinding previous changes */ 247 ret = -ENOMEM; 248 goto write_unlock; 249 } 250 cur->num_buckets = netdev_flow_limit_table_len; 251 rcu_assign_pointer(sd->flow_limit, cur); 252 } 253 } 254 write_unlock: 255 mutex_unlock(&flow_limit_update_mutex); 256 } else { 257 cpumask_clear(mask); 258 rcu_read_lock(); 259 for_each_possible_cpu(i) { 260 sd = &per_cpu(softnet_data, i); 261 if (rcu_dereference(sd->flow_limit)) 262 cpumask_set_cpu(i, mask); 263 } 264 rcu_read_unlock(); 265 266 ret = dump_cpumask(buffer, lenp, ppos, mask); 267 } 268 269 done: 270 free_cpumask_var(mask); 271 return ret; 272 } 273 274 static int flow_limit_table_len_sysctl(const struct ctl_table *table, int write, 275 void *buffer, size_t *lenp, loff_t *ppos) 276 { 277 unsigned int old, *ptr; 278 int ret; 279 280 mutex_lock(&flow_limit_update_mutex); 281 282 ptr = table->data; 283 old = *ptr; 284 ret = proc_dointvec(table, write, buffer, lenp, ppos); 285 if (!ret && write && !is_power_of_2(*ptr)) { 286 *ptr = old; 287 ret = -EINVAL; 288 } 289 290 mutex_unlock(&flow_limit_update_mutex); 291 return ret; 292 } 293 #endif /* CONFIG_NET_FLOW_LIMIT */ 294 295 #ifdef CONFIG_NET_SCHED 296 static int set_default_qdisc(const struct ctl_table *table, int write, 297 void *buffer, size_t *lenp, loff_t *ppos) 298 { 299 char id[IFNAMSIZ]; 300 struct ctl_table tbl = { 301 .data = id, 302 .maxlen = IFNAMSIZ, 303 }; 304 int ret; 305 306 qdisc_get_default(id, IFNAMSIZ); 307 308 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 309 if (write && ret == 0) 310 ret = qdisc_set_default(id); 311 return ret; 312 } 313 #endif 314 315 static int proc_do_dev_weight(const struct ctl_table *table, int write, 316 void *buffer, size_t *lenp, loff_t *ppos) 317 { 318 static DEFINE_MUTEX(dev_weight_mutex); 319 int ret, weight; 320 321 mutex_lock(&dev_weight_mutex); 322 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 323 if (!ret && write) { 324 weight = READ_ONCE(weight_p); 325 WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); 326 WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); 327 } 328 mutex_unlock(&dev_weight_mutex); 329 330 return ret; 331 } 332 333 static int proc_do_rss_key(const struct ctl_table *table, int write, 334 void *buffer, size_t *lenp, loff_t *ppos) 335 { 336 struct ctl_table fake_table; 337 char buf[NETDEV_RSS_KEY_LEN * 3]; 338 339 snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); 340 fake_table.data = buf; 341 fake_table.maxlen = sizeof(buf); 342 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 343 } 344 345 #ifdef CONFIG_BPF_JIT 346 static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write, 347 void *buffer, size_t *lenp, 348 loff_t *ppos) 349 { 350 int ret, jit_enable = *(int *)table->data; 351 int min = *(int *)table->extra1; 352 int max = *(int *)table->extra2; 353 struct ctl_table tmp = *table; 354 355 if (write && !capable(CAP_SYS_ADMIN)) 356 return -EPERM; 357 358 tmp.data = &jit_enable; 359 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 360 if (write && !ret) { 361 if (jit_enable < 2 || 362 (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { 363 *(int *)table->data = jit_enable; 364 if (jit_enable == 2) 365 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 366 } else { 367 ret = -EPERM; 368 } 369 } 370 371 if (write && ret && min == max) 372 pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); 373 374 return ret; 375 } 376 377 # ifdef CONFIG_HAVE_EBPF_JIT 378 static int 379 proc_dointvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 380 void *buffer, size_t *lenp, loff_t *ppos) 381 { 382 if (!capable(CAP_SYS_ADMIN)) 383 return -EPERM; 384 385 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 386 } 387 # endif /* CONFIG_HAVE_EBPF_JIT */ 388 389 static int 390 proc_dolongvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 391 void *buffer, size_t *lenp, loff_t *ppos) 392 { 393 if (!capable(CAP_SYS_ADMIN)) 394 return -EPERM; 395 396 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 397 } 398 #endif 399 400 static struct ctl_table net_core_table[] = { 401 { 402 .procname = "mem_pcpu_rsv", 403 .data = &net_hotdata.sysctl_mem_pcpu_rsv, 404 .maxlen = sizeof(int), 405 .mode = 0644, 406 .proc_handler = proc_dointvec_minmax, 407 .extra1 = &min_mem_pcpu_rsv, 408 }, 409 { 410 .procname = "dev_weight", 411 .data = &weight_p, 412 .maxlen = sizeof(int), 413 .mode = 0644, 414 .proc_handler = proc_do_dev_weight, 415 .extra1 = SYSCTL_ONE, 416 }, 417 { 418 .procname = "dev_weight_rx_bias", 419 .data = &dev_weight_rx_bias, 420 .maxlen = sizeof(int), 421 .mode = 0644, 422 .proc_handler = proc_do_dev_weight, 423 .extra1 = SYSCTL_ONE, 424 }, 425 { 426 .procname = "dev_weight_tx_bias", 427 .data = &dev_weight_tx_bias, 428 .maxlen = sizeof(int), 429 .mode = 0644, 430 .proc_handler = proc_do_dev_weight, 431 .extra1 = SYSCTL_ONE, 432 }, 433 { 434 .procname = "netdev_max_backlog", 435 .data = &net_hotdata.max_backlog, 436 .maxlen = sizeof(int), 437 .mode = 0644, 438 .proc_handler = proc_dointvec 439 }, 440 { 441 .procname = "netdev_rss_key", 442 .data = &netdev_rss_key, 443 .maxlen = sizeof(int), 444 .mode = 0444, 445 .proc_handler = proc_do_rss_key, 446 }, 447 #ifdef CONFIG_BPF_JIT 448 { 449 .procname = "bpf_jit_enable", 450 .data = &bpf_jit_enable, 451 .maxlen = sizeof(int), 452 .mode = 0644, 453 .proc_handler = proc_dointvec_minmax_bpf_enable, 454 # ifdef CONFIG_BPF_JIT_ALWAYS_ON 455 .extra1 = SYSCTL_ONE, 456 .extra2 = SYSCTL_ONE, 457 # else 458 .extra1 = SYSCTL_ZERO, 459 .extra2 = SYSCTL_TWO, 460 # endif 461 }, 462 # ifdef CONFIG_HAVE_EBPF_JIT 463 { 464 .procname = "bpf_jit_harden", 465 .data = &bpf_jit_harden, 466 .maxlen = sizeof(int), 467 .mode = 0600, 468 .proc_handler = proc_dointvec_minmax_bpf_restricted, 469 .extra1 = SYSCTL_ZERO, 470 .extra2 = SYSCTL_TWO, 471 }, 472 { 473 .procname = "bpf_jit_kallsyms", 474 .data = &bpf_jit_kallsyms, 475 .maxlen = sizeof(int), 476 .mode = 0600, 477 .proc_handler = proc_dointvec_minmax_bpf_restricted, 478 .extra1 = SYSCTL_ZERO, 479 .extra2 = SYSCTL_ONE, 480 }, 481 # endif 482 { 483 .procname = "bpf_jit_limit", 484 .data = &bpf_jit_limit, 485 .maxlen = sizeof(long), 486 .mode = 0600, 487 .proc_handler = proc_dolongvec_minmax_bpf_restricted, 488 .extra1 = SYSCTL_LONG_ONE, 489 .extra2 = &bpf_jit_limit_max, 490 }, 491 #endif 492 { 493 .procname = "netdev_tstamp_prequeue", 494 .data = &net_hotdata.tstamp_prequeue, 495 .maxlen = sizeof(int), 496 .mode = 0644, 497 .proc_handler = proc_dointvec 498 }, 499 { 500 .procname = "message_cost", 501 .data = &net_ratelimit_state.interval, 502 .maxlen = sizeof(int), 503 .mode = 0644, 504 .proc_handler = proc_dointvec_jiffies, 505 }, 506 { 507 .procname = "message_burst", 508 .data = &net_ratelimit_state.burst, 509 .maxlen = sizeof(int), 510 .mode = 0644, 511 .proc_handler = proc_dointvec, 512 }, 513 #ifdef CONFIG_RPS 514 { 515 .procname = "rps_sock_flow_entries", 516 .maxlen = sizeof(int), 517 .mode = 0644, 518 .proc_handler = rps_sock_flow_sysctl 519 }, 520 #endif 521 #ifdef CONFIG_NET_FLOW_LIMIT 522 { 523 .procname = "flow_limit_cpu_bitmap", 524 .mode = 0644, 525 .proc_handler = flow_limit_cpu_sysctl 526 }, 527 { 528 .procname = "flow_limit_table_len", 529 .data = &netdev_flow_limit_table_len, 530 .maxlen = sizeof(int), 531 .mode = 0644, 532 .proc_handler = flow_limit_table_len_sysctl 533 }, 534 #endif /* CONFIG_NET_FLOW_LIMIT */ 535 #ifdef CONFIG_NET_RX_BUSY_POLL 536 { 537 .procname = "busy_poll", 538 .data = &sysctl_net_busy_poll, 539 .maxlen = sizeof(unsigned int), 540 .mode = 0644, 541 .proc_handler = proc_dointvec_minmax, 542 .extra1 = SYSCTL_ZERO, 543 }, 544 { 545 .procname = "busy_read", 546 .data = &sysctl_net_busy_read, 547 .maxlen = sizeof(unsigned int), 548 .mode = 0644, 549 .proc_handler = proc_dointvec_minmax, 550 .extra1 = SYSCTL_ZERO, 551 }, 552 #endif 553 #ifdef CONFIG_NET_SCHED 554 { 555 .procname = "default_qdisc", 556 .mode = 0644, 557 .maxlen = IFNAMSIZ, 558 .proc_handler = set_default_qdisc 559 }, 560 #endif 561 { 562 .procname = "netdev_budget", 563 .data = &net_hotdata.netdev_budget, 564 .maxlen = sizeof(int), 565 .mode = 0644, 566 .proc_handler = proc_dointvec 567 }, 568 { 569 .procname = "warnings", 570 .data = &net_msg_warn, 571 .maxlen = sizeof(int), 572 .mode = 0644, 573 .proc_handler = proc_dointvec 574 }, 575 { 576 .procname = "max_skb_frags", 577 .data = &net_hotdata.sysctl_max_skb_frags, 578 .maxlen = sizeof(int), 579 .mode = 0644, 580 .proc_handler = proc_dointvec_minmax, 581 .extra1 = SYSCTL_ONE, 582 .extra2 = &max_skb_frags, 583 }, 584 { 585 .procname = "netdev_budget_usecs", 586 .data = &net_hotdata.netdev_budget_usecs, 587 .maxlen = sizeof(unsigned int), 588 .mode = 0644, 589 .proc_handler = proc_dointvec_minmax, 590 .extra1 = SYSCTL_ZERO, 591 }, 592 { 593 .procname = "fb_tunnels_only_for_init_net", 594 .data = &sysctl_fb_tunnels_only_for_init_net, 595 .maxlen = sizeof(int), 596 .mode = 0644, 597 .proc_handler = proc_dointvec_minmax, 598 .extra1 = SYSCTL_ZERO, 599 .extra2 = SYSCTL_TWO, 600 }, 601 { 602 .procname = "devconf_inherit_init_net", 603 .data = &sysctl_devconf_inherit_init_net, 604 .maxlen = sizeof(int), 605 .mode = 0644, 606 .proc_handler = proc_dointvec_minmax, 607 .extra1 = SYSCTL_ZERO, 608 .extra2 = SYSCTL_THREE, 609 }, 610 { 611 .procname = "high_order_alloc_disable", 612 .data = &net_high_order_alloc_disable_key.key, 613 .maxlen = sizeof(net_high_order_alloc_disable_key), 614 .mode = 0644, 615 .proc_handler = proc_do_static_key, 616 }, 617 { 618 .procname = "gro_normal_batch", 619 .data = &net_hotdata.gro_normal_batch, 620 .maxlen = sizeof(unsigned int), 621 .mode = 0644, 622 .proc_handler = proc_dointvec_minmax, 623 .extra1 = SYSCTL_ONE, 624 }, 625 { 626 .procname = "netdev_unregister_timeout_secs", 627 .data = &netdev_unregister_timeout_secs, 628 .maxlen = sizeof(unsigned int), 629 .mode = 0644, 630 .proc_handler = proc_dointvec_minmax, 631 .extra1 = SYSCTL_ONE, 632 .extra2 = &int_3600, 633 }, 634 { 635 .procname = "skb_defer_max", 636 .data = &net_hotdata.sysctl_skb_defer_max, 637 .maxlen = sizeof(unsigned int), 638 .mode = 0644, 639 .proc_handler = proc_dointvec_minmax, 640 .extra1 = SYSCTL_ZERO, 641 }, 642 }; 643 644 static struct ctl_table netns_core_table[] = { 645 #if IS_ENABLED(CONFIG_RPS) 646 { 647 .procname = "rps_default_mask", 648 .data = &init_net, 649 .mode = 0644, 650 .proc_handler = rps_default_mask_sysctl 651 }, 652 #endif 653 { 654 .procname = "somaxconn", 655 .data = &init_net.core.sysctl_somaxconn, 656 .maxlen = sizeof(int), 657 .mode = 0644, 658 .extra1 = SYSCTL_ZERO, 659 .proc_handler = proc_dointvec_minmax 660 }, 661 { 662 .procname = "optmem_max", 663 .data = &init_net.core.sysctl_optmem_max, 664 .maxlen = sizeof(int), 665 .mode = 0644, 666 .extra1 = SYSCTL_ZERO, 667 .proc_handler = proc_dointvec_minmax 668 }, 669 { 670 .procname = "txrehash", 671 .data = &init_net.core.sysctl_txrehash, 672 .maxlen = sizeof(u8), 673 .mode = 0644, 674 .extra1 = SYSCTL_ZERO, 675 .extra2 = SYSCTL_ONE, 676 .proc_handler = proc_dou8vec_minmax, 677 }, 678 { 679 .procname = "tstamp_allow_data", 680 .data = &init_net.core.sysctl_tstamp_allow_data, 681 .maxlen = sizeof(u8), 682 .mode = 0644, 683 .proc_handler = proc_dou8vec_minmax, 684 .extra1 = SYSCTL_ZERO, 685 .extra2 = SYSCTL_ONE 686 }, 687 /* sysctl_core_net_init() will set the values after this 688 * to readonly in network namespaces 689 */ 690 { 691 .procname = "wmem_max", 692 .data = &sysctl_wmem_max, 693 .maxlen = sizeof(int), 694 .mode = 0644, 695 .proc_handler = proc_dointvec_minmax, 696 .extra1 = &min_sndbuf, 697 }, 698 { 699 .procname = "rmem_max", 700 .data = &sysctl_rmem_max, 701 .maxlen = sizeof(int), 702 .mode = 0644, 703 .proc_handler = proc_dointvec_minmax, 704 .extra1 = &min_rcvbuf, 705 }, 706 { 707 .procname = "wmem_default", 708 .data = &sysctl_wmem_default, 709 .maxlen = sizeof(int), 710 .mode = 0644, 711 .proc_handler = proc_dointvec_minmax, 712 .extra1 = &min_sndbuf, 713 }, 714 { 715 .procname = "rmem_default", 716 .data = &sysctl_rmem_default, 717 .maxlen = sizeof(int), 718 .mode = 0644, 719 .proc_handler = proc_dointvec_minmax, 720 .extra1 = &min_rcvbuf, 721 }, 722 }; 723 724 static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) 725 { 726 /* fallback tunnels for initns only */ 727 if (!strncmp(str, "initns", 6)) 728 sysctl_fb_tunnels_only_for_init_net = 1; 729 /* no fallback tunnels anywhere */ 730 else if (!strncmp(str, "none", 4)) 731 sysctl_fb_tunnels_only_for_init_net = 2; 732 733 return 1; 734 } 735 __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); 736 737 static __net_init int sysctl_core_net_init(struct net *net) 738 { 739 size_t table_size = ARRAY_SIZE(netns_core_table); 740 struct ctl_table *tbl; 741 742 tbl = netns_core_table; 743 if (!net_eq(net, &init_net)) { 744 int i; 745 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 746 if (tbl == NULL) 747 goto err_dup; 748 749 for (i = 0; i < table_size; ++i) { 750 if (tbl[i].data == &sysctl_wmem_max) 751 break; 752 753 tbl[i].data += (char *)net - (char *)&init_net; 754 } 755 for (; i < table_size; ++i) 756 tbl[i].mode &= ~0222; 757 } 758 759 net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); 760 if (net->core.sysctl_hdr == NULL) 761 goto err_reg; 762 763 return 0; 764 765 err_reg: 766 if (tbl != netns_core_table) 767 kfree(tbl); 768 err_dup: 769 return -ENOMEM; 770 } 771 772 static __net_exit void sysctl_core_net_exit(struct net *net) 773 { 774 const struct ctl_table *tbl; 775 776 tbl = net->core.sysctl_hdr->ctl_table_arg; 777 unregister_net_sysctl_table(net->core.sysctl_hdr); 778 BUG_ON(tbl == netns_core_table); 779 #if IS_ENABLED(CONFIG_RPS) 780 kfree(net->core.rps_default_mask); 781 #endif 782 kfree(tbl); 783 } 784 785 static __net_initdata struct pernet_operations sysctl_core_ops = { 786 .init = sysctl_core_net_init, 787 .exit = sysctl_core_net_exit, 788 }; 789 790 static __init int sysctl_core_init(void) 791 { 792 register_net_sysctl(&init_net, "net/core", net_core_table); 793 return register_pernet_subsys(&sysctl_core_ops); 794 } 795 796 fs_initcall(sysctl_core_init); 797