1 // SPDX-License-Identifier: GPL-2.0 2 /* -*- linux-c -*- 3 * sysctl_net_core.c: sysctl interface to net core subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 7 */ 8 9 #include <linux/filter.h> 10 #include <linux/mm.h> 11 #include <linux/sysctl.h> 12 #include <linux/module.h> 13 #include <linux/socket.h> 14 #include <linux/netdevice.h> 15 #include <linux/ratelimit.h> 16 #include <linux/vmalloc.h> 17 #include <linux/init.h> 18 #include <linux/slab.h> 19 #include <linux/sched/isolation.h> 20 #include <linux/hex.h> 21 22 #include <net/ip.h> 23 #include <net/sock.h> 24 #include <net/net_ratelimit.h> 25 #include <net/busy_poll.h> 26 #include <net/pkt_sched.h> 27 #include <net/hotdata.h> 28 #include <net/proto_memory.h> 29 #include <net/rps.h> 30 31 #include "dev.h" 32 #include "net-sysfs.h" 33 34 static int int_3600 = 3600; 35 static int min_sndbuf = SOCK_MIN_SNDBUF; 36 static int min_rcvbuf = SOCK_MIN_RCVBUF; 37 static int max_skb_frags = MAX_SKB_FRAGS; 38 static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; 39 static int netdev_budget_usecs_min = 2 * USEC_PER_SEC / HZ; 40 41 static int net_msg_warn; /* Unused, but still a sysctl */ 42 43 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; 44 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); 45 46 /* 0 - Keep current behavior: 47 * IPv4: inherit all current settings from init_net 48 * IPv6: reset all settings to default 49 * 1 - Both inherit all current settings from init_net 50 * 2 - Both reset all settings to default 51 * 3 - Both inherit all settings from current netns 52 */ 53 int sysctl_devconf_inherit_init_net __read_mostly; 54 EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); 55 56 #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) 57 static int dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, 58 struct cpumask *mask) 59 { 60 char *kbuf; 61 int len; 62 63 if (*ppos || !*lenp) { 64 *lenp = 0; 65 return 0; 66 } 67 68 /* CPUs are displayed as a hex bitmap + a comma between each groups of 8 69 * nibbles (except the last one which has a newline instead). 70 * Guesstimate the buffer size at the group granularity level. 71 */ 72 len = min(DIV_ROUND_UP(nr_cpumask_bits, 32) * (8 + 1), *lenp); 73 kbuf = kmalloc(len, GFP_KERNEL); 74 if (!kbuf) { 75 *lenp = 0; 76 return -ENOMEM; 77 } 78 79 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 80 if (!len) { 81 *lenp = 0; 82 goto free_buf; 83 } 84 85 /* scnprintf writes a trailing null char not counted in the returned 86 * length, override it with a newline. 87 */ 88 kbuf[len++] = '\n'; 89 memcpy(buffer, kbuf, len); 90 *lenp = len; 91 *ppos += len; 92 93 free_buf: 94 kfree(kbuf); 95 return 0; 96 } 97 #endif 98 99 #ifdef CONFIG_RPS 100 101 DEFINE_MUTEX(rps_default_mask_mutex); 102 103 static int rps_default_mask_sysctl(const struct ctl_table *table, int write, 104 void *buffer, size_t *lenp, loff_t *ppos) 105 { 106 struct net *net = (struct net *)table->data; 107 struct cpumask *mask; 108 int err = 0; 109 110 mutex_lock(&rps_default_mask_mutex); 111 mask = net->core.rps_default_mask; 112 if (write) { 113 if (!mask) { 114 mask = kzalloc(cpumask_size(), GFP_KERNEL); 115 net->core.rps_default_mask = mask; 116 } 117 err = -ENOMEM; 118 if (!mask) 119 goto done; 120 121 err = cpumask_parse(buffer, mask); 122 if (err) 123 goto done; 124 125 err = rps_cpumask_housekeeping(mask); 126 if (err) 127 goto done; 128 } else { 129 err = dump_cpumask(buffer, lenp, ppos, 130 mask ?: cpu_none_mask); 131 } 132 133 done: 134 mutex_unlock(&rps_default_mask_mutex); 135 return err; 136 } 137 138 static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, 139 void *buffer, size_t *lenp, loff_t *ppos) 140 { 141 unsigned int orig_size, size; 142 int ret, i; 143 struct ctl_table tmp = { 144 .data = &size, 145 .maxlen = sizeof(size), 146 .mode = table->mode 147 }; 148 struct rps_sock_flow_table *orig_sock_table, *sock_table; 149 static DEFINE_MUTEX(sock_flow_mutex); 150 151 mutex_lock(&sock_flow_mutex); 152 153 orig_sock_table = rcu_dereference_protected( 154 net_hotdata.rps_sock_flow_table, 155 lockdep_is_held(&sock_flow_mutex)); 156 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 157 158 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 159 160 if (write) { 161 if (size) { 162 if (size > 1<<29) { 163 /* Enforce limit to prevent overflow */ 164 mutex_unlock(&sock_flow_mutex); 165 return -EINVAL; 166 } 167 size = roundup_pow_of_two(size); 168 if (size != orig_size) { 169 sock_table = 170 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 171 if (!sock_table) { 172 mutex_unlock(&sock_flow_mutex); 173 return -ENOMEM; 174 } 175 net_hotdata.rps_cpu_mask = 176 roundup_pow_of_two(nr_cpu_ids) - 1; 177 sock_table->mask = size - 1; 178 } else 179 sock_table = orig_sock_table; 180 181 for (i = 0; i < size; i++) 182 sock_table->ents[i] = RPS_NO_CPU; 183 } else 184 sock_table = NULL; 185 186 if (sock_table != orig_sock_table) { 187 rcu_assign_pointer(net_hotdata.rps_sock_flow_table, 188 sock_table); 189 if (sock_table) { 190 static_branch_inc(&rps_needed); 191 static_branch_inc(&rfs_needed); 192 } 193 if (orig_sock_table) { 194 static_branch_dec(&rps_needed); 195 static_branch_dec(&rfs_needed); 196 kvfree_rcu(orig_sock_table, rcu); 197 } 198 } 199 } 200 201 mutex_unlock(&sock_flow_mutex); 202 203 return ret; 204 } 205 #endif /* CONFIG_RPS */ 206 207 #ifdef CONFIG_NET_FLOW_LIMIT 208 static DEFINE_MUTEX(flow_limit_update_mutex); 209 210 static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write, 211 void *buffer, size_t *lenp, loff_t *ppos) 212 { 213 struct sd_flow_limit *cur; 214 struct softnet_data *sd; 215 cpumask_var_t mask; 216 int i, len, ret = 0; 217 218 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 219 return -ENOMEM; 220 221 if (write) { 222 ret = cpumask_parse(buffer, mask); 223 if (ret) 224 goto done; 225 226 mutex_lock(&flow_limit_update_mutex); 227 len = sizeof(*cur) + netdev_flow_limit_table_len; 228 for_each_possible_cpu(i) { 229 sd = &per_cpu(softnet_data, i); 230 cur = rcu_dereference_protected(sd->flow_limit, 231 lockdep_is_held(&flow_limit_update_mutex)); 232 if (cur && !cpumask_test_cpu(i, mask)) { 233 RCU_INIT_POINTER(sd->flow_limit, NULL); 234 kfree_rcu(cur, rcu); 235 } else if (!cur && cpumask_test_cpu(i, mask)) { 236 cur = kzalloc_node(len, GFP_KERNEL, 237 cpu_to_node(i)); 238 if (!cur) { 239 /* not unwinding previous changes */ 240 ret = -ENOMEM; 241 goto write_unlock; 242 } 243 cur->log_buckets = ilog2(netdev_flow_limit_table_len); 244 rcu_assign_pointer(sd->flow_limit, cur); 245 } 246 } 247 write_unlock: 248 mutex_unlock(&flow_limit_update_mutex); 249 } else { 250 cpumask_clear(mask); 251 rcu_read_lock(); 252 for_each_possible_cpu(i) { 253 sd = &per_cpu(softnet_data, i); 254 if (rcu_dereference(sd->flow_limit)) 255 cpumask_set_cpu(i, mask); 256 } 257 rcu_read_unlock(); 258 259 ret = dump_cpumask(buffer, lenp, ppos, mask); 260 } 261 262 done: 263 free_cpumask_var(mask); 264 return ret; 265 } 266 267 static int flow_limit_table_len_sysctl(const struct ctl_table *table, int write, 268 void *buffer, size_t *lenp, loff_t *ppos) 269 { 270 unsigned int old, *ptr; 271 int ret; 272 273 mutex_lock(&flow_limit_update_mutex); 274 275 ptr = table->data; 276 old = *ptr; 277 ret = proc_dointvec(table, write, buffer, lenp, ppos); 278 if (!ret && write && !is_power_of_2(*ptr)) { 279 *ptr = old; 280 ret = -EINVAL; 281 } 282 283 mutex_unlock(&flow_limit_update_mutex); 284 return ret; 285 } 286 #endif /* CONFIG_NET_FLOW_LIMIT */ 287 288 #ifdef CONFIG_NET_SCHED 289 static int set_default_qdisc(const struct ctl_table *table, int write, 290 void *buffer, size_t *lenp, loff_t *ppos) 291 { 292 char id[IFNAMSIZ]; 293 struct ctl_table tbl = { 294 .data = id, 295 .maxlen = IFNAMSIZ, 296 }; 297 int ret; 298 299 qdisc_get_default(id, IFNAMSIZ); 300 301 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 302 if (write && ret == 0) 303 ret = qdisc_set_default(id); 304 return ret; 305 } 306 #endif 307 308 static int proc_do_dev_weight(const struct ctl_table *table, int write, 309 void *buffer, size_t *lenp, loff_t *ppos) 310 { 311 static DEFINE_MUTEX(dev_weight_mutex); 312 int ret, weight; 313 314 mutex_lock(&dev_weight_mutex); 315 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 316 if (!ret && write) { 317 weight = READ_ONCE(weight_p); 318 WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); 319 WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); 320 } 321 mutex_unlock(&dev_weight_mutex); 322 323 return ret; 324 } 325 326 static int proc_do_rss_key(const struct ctl_table *table, int write, 327 void *buffer, size_t *lenp, loff_t *ppos) 328 { 329 char buf[NETDEV_RSS_KEY_LEN * 3]; 330 struct ctl_table fake_table; 331 char *pos = buf; 332 333 for (int i = 0; i < NETDEV_RSS_KEY_LEN; i++) { 334 pos = hex_byte_pack(pos, netdev_rss_key[i]); 335 *pos++ = ':'; 336 } 337 *(--pos) = 0; 338 339 fake_table.data = buf; 340 fake_table.maxlen = sizeof(buf); 341 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 342 } 343 344 #ifdef CONFIG_BPF_JIT 345 static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write, 346 void *buffer, size_t *lenp, 347 loff_t *ppos) 348 { 349 int ret, jit_enable = *(int *)table->data; 350 int min = *(int *)table->extra1; 351 int max = *(int *)table->extra2; 352 struct ctl_table tmp = *table; 353 354 if (write && !capable(CAP_SYS_ADMIN)) 355 return -EPERM; 356 357 tmp.data = &jit_enable; 358 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 359 if (write && !ret) { 360 if (jit_enable < 2 || 361 (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { 362 *(int *)table->data = jit_enable; 363 if (jit_enable == 2) 364 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 365 } else { 366 ret = -EPERM; 367 } 368 } 369 370 if (write && ret && min == max) 371 pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); 372 373 return ret; 374 } 375 376 # ifdef CONFIG_HAVE_EBPF_JIT 377 static int 378 proc_dointvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 379 void *buffer, size_t *lenp, loff_t *ppos) 380 { 381 if (!capable(CAP_SYS_ADMIN)) 382 return -EPERM; 383 384 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 385 } 386 # endif /* CONFIG_HAVE_EBPF_JIT */ 387 388 static int 389 proc_dolongvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 390 void *buffer, size_t *lenp, loff_t *ppos) 391 { 392 if (!capable(CAP_SYS_ADMIN)) 393 return -EPERM; 394 395 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 396 } 397 #endif 398 399 static struct ctl_table net_core_table[] = { 400 { 401 .procname = "mem_pcpu_rsv", 402 .data = &net_hotdata.sysctl_mem_pcpu_rsv, 403 .maxlen = sizeof(int), 404 .mode = 0644, 405 .proc_handler = proc_dointvec_minmax, 406 .extra1 = &min_mem_pcpu_rsv, 407 }, 408 { 409 .procname = "dev_weight", 410 .data = &weight_p, 411 .maxlen = sizeof(int), 412 .mode = 0644, 413 .proc_handler = proc_do_dev_weight, 414 .extra1 = SYSCTL_ONE, 415 }, 416 { 417 .procname = "dev_weight_rx_bias", 418 .data = &dev_weight_rx_bias, 419 .maxlen = sizeof(int), 420 .mode = 0644, 421 .proc_handler = proc_do_dev_weight, 422 .extra1 = SYSCTL_ONE, 423 }, 424 { 425 .procname = "dev_weight_tx_bias", 426 .data = &dev_weight_tx_bias, 427 .maxlen = sizeof(int), 428 .mode = 0644, 429 .proc_handler = proc_do_dev_weight, 430 .extra1 = SYSCTL_ONE, 431 }, 432 { 433 .procname = "netdev_max_backlog", 434 .data = &net_hotdata.max_backlog, 435 .maxlen = sizeof(int), 436 .mode = 0644, 437 .proc_handler = proc_dointvec 438 }, 439 { 440 .procname = "qdisc_max_burst", 441 .data = &net_hotdata.qdisc_max_burst, 442 .maxlen = sizeof(int), 443 .mode = 0644, 444 .proc_handler = proc_dointvec 445 }, 446 { 447 .procname = "netdev_rss_key", 448 .data = &netdev_rss_key, 449 .maxlen = sizeof(int), 450 .mode = 0444, 451 .proc_handler = proc_do_rss_key, 452 }, 453 #ifdef CONFIG_BPF_JIT 454 { 455 .procname = "bpf_jit_enable", 456 .data = &bpf_jit_enable, 457 .maxlen = sizeof(int), 458 .mode = 0644, 459 .proc_handler = proc_dointvec_minmax_bpf_enable, 460 # ifdef CONFIG_BPF_JIT_ALWAYS_ON 461 .extra1 = SYSCTL_ONE, 462 .extra2 = SYSCTL_ONE, 463 # else 464 .extra1 = SYSCTL_ZERO, 465 .extra2 = SYSCTL_TWO, 466 # endif 467 }, 468 # ifdef CONFIG_HAVE_EBPF_JIT 469 { 470 .procname = "bpf_jit_harden", 471 .data = &bpf_jit_harden, 472 .maxlen = sizeof(int), 473 .mode = 0600, 474 .proc_handler = proc_dointvec_minmax_bpf_restricted, 475 .extra1 = SYSCTL_ZERO, 476 .extra2 = SYSCTL_TWO, 477 }, 478 { 479 .procname = "bpf_jit_kallsyms", 480 .data = &bpf_jit_kallsyms, 481 .maxlen = sizeof(int), 482 .mode = 0600, 483 .proc_handler = proc_dointvec_minmax_bpf_restricted, 484 .extra1 = SYSCTL_ZERO, 485 .extra2 = SYSCTL_ONE, 486 }, 487 # endif 488 { 489 .procname = "bpf_jit_limit", 490 .data = &bpf_jit_limit, 491 .maxlen = sizeof(long), 492 .mode = 0600, 493 .proc_handler = proc_dolongvec_minmax_bpf_restricted, 494 .extra1 = SYSCTL_LONG_ONE, 495 .extra2 = &bpf_jit_limit_max, 496 }, 497 #endif 498 { 499 .procname = "netdev_tstamp_prequeue", 500 .data = &net_hotdata.tstamp_prequeue, 501 .maxlen = sizeof(int), 502 .mode = 0644, 503 .proc_handler = proc_dointvec 504 }, 505 { 506 .procname = "message_cost", 507 .data = &net_ratelimit_state.interval, 508 .maxlen = sizeof(int), 509 .mode = 0644, 510 .proc_handler = proc_dointvec_jiffies, 511 }, 512 { 513 .procname = "message_burst", 514 .data = &net_ratelimit_state.burst, 515 .maxlen = sizeof(int), 516 .mode = 0644, 517 .proc_handler = proc_dointvec, 518 }, 519 #ifdef CONFIG_RPS 520 { 521 .procname = "rps_sock_flow_entries", 522 .maxlen = sizeof(int), 523 .mode = 0644, 524 .proc_handler = rps_sock_flow_sysctl 525 }, 526 #endif 527 #ifdef CONFIG_NET_FLOW_LIMIT 528 { 529 .procname = "flow_limit_cpu_bitmap", 530 .mode = 0644, 531 .proc_handler = flow_limit_cpu_sysctl 532 }, 533 { 534 .procname = "flow_limit_table_len", 535 .data = &netdev_flow_limit_table_len, 536 .maxlen = sizeof(int), 537 .mode = 0644, 538 .proc_handler = flow_limit_table_len_sysctl 539 }, 540 #endif /* CONFIG_NET_FLOW_LIMIT */ 541 #ifdef CONFIG_NET_RX_BUSY_POLL 542 { 543 .procname = "busy_poll", 544 .data = &sysctl_net_busy_poll, 545 .maxlen = sizeof(unsigned int), 546 .mode = 0644, 547 .proc_handler = proc_dointvec_minmax, 548 .extra1 = SYSCTL_ZERO, 549 }, 550 { 551 .procname = "busy_read", 552 .data = &sysctl_net_busy_read, 553 .maxlen = sizeof(unsigned int), 554 .mode = 0644, 555 .proc_handler = proc_dointvec_minmax, 556 .extra1 = SYSCTL_ZERO, 557 }, 558 #endif 559 #ifdef CONFIG_NET_SCHED 560 { 561 .procname = "default_qdisc", 562 .mode = 0644, 563 .maxlen = IFNAMSIZ, 564 .proc_handler = set_default_qdisc 565 }, 566 #endif 567 { 568 .procname = "netdev_budget", 569 .data = &net_hotdata.netdev_budget, 570 .maxlen = sizeof(int), 571 .mode = 0644, 572 .proc_handler = proc_dointvec 573 }, 574 { 575 .procname = "warnings", 576 .data = &net_msg_warn, 577 .maxlen = sizeof(int), 578 .mode = 0644, 579 .proc_handler = proc_dointvec 580 }, 581 { 582 .procname = "max_skb_frags", 583 .data = &net_hotdata.sysctl_max_skb_frags, 584 .maxlen = sizeof(int), 585 .mode = 0644, 586 .proc_handler = proc_dointvec_minmax, 587 .extra1 = SYSCTL_ONE, 588 .extra2 = &max_skb_frags, 589 }, 590 { 591 .procname = "netdev_budget_usecs", 592 .data = &net_hotdata.netdev_budget_usecs, 593 .maxlen = sizeof(unsigned int), 594 .mode = 0644, 595 .proc_handler = proc_dointvec_minmax, 596 .extra1 = &netdev_budget_usecs_min, 597 }, 598 { 599 .procname = "fb_tunnels_only_for_init_net", 600 .data = &sysctl_fb_tunnels_only_for_init_net, 601 .maxlen = sizeof(int), 602 .mode = 0644, 603 .proc_handler = proc_dointvec_minmax, 604 .extra1 = SYSCTL_ZERO, 605 .extra2 = SYSCTL_TWO, 606 }, 607 { 608 .procname = "devconf_inherit_init_net", 609 .data = &sysctl_devconf_inherit_init_net, 610 .maxlen = sizeof(int), 611 .mode = 0644, 612 .proc_handler = proc_dointvec_minmax, 613 .extra1 = SYSCTL_ZERO, 614 .extra2 = SYSCTL_THREE, 615 }, 616 { 617 .procname = "high_order_alloc_disable", 618 .data = &net_high_order_alloc_disable_key.key, 619 .maxlen = sizeof(net_high_order_alloc_disable_key), 620 .mode = 0644, 621 .proc_handler = proc_do_static_key, 622 }, 623 { 624 .procname = "gro_normal_batch", 625 .data = &net_hotdata.gro_normal_batch, 626 .maxlen = sizeof(unsigned int), 627 .mode = 0644, 628 .proc_handler = proc_dointvec_minmax, 629 .extra1 = SYSCTL_ONE, 630 }, 631 { 632 .procname = "netdev_unregister_timeout_secs", 633 .data = &netdev_unregister_timeout_secs, 634 .maxlen = sizeof(unsigned int), 635 .mode = 0644, 636 .proc_handler = proc_dointvec_minmax, 637 .extra1 = SYSCTL_ONE, 638 .extra2 = &int_3600, 639 }, 640 { 641 .procname = "skb_defer_max", 642 .data = &net_hotdata.sysctl_skb_defer_max, 643 .maxlen = sizeof(unsigned int), 644 .mode = 0644, 645 .proc_handler = proc_dointvec_minmax, 646 .extra1 = SYSCTL_ZERO, 647 }, 648 }; 649 650 static struct ctl_table netns_core_table[] = { 651 #if IS_ENABLED(CONFIG_RPS) 652 { 653 .procname = "rps_default_mask", 654 .data = &init_net, 655 .mode = 0644, 656 .proc_handler = rps_default_mask_sysctl 657 }, 658 #endif 659 { 660 .procname = "somaxconn", 661 .data = &init_net.core.sysctl_somaxconn, 662 .maxlen = sizeof(int), 663 .mode = 0644, 664 .extra1 = SYSCTL_ZERO, 665 .proc_handler = proc_dointvec_minmax 666 }, 667 { 668 .procname = "optmem_max", 669 .data = &init_net.core.sysctl_optmem_max, 670 .maxlen = sizeof(int), 671 .mode = 0644, 672 .extra1 = SYSCTL_ZERO, 673 .proc_handler = proc_dointvec_minmax 674 }, 675 { 676 .procname = "txrehash", 677 .data = &init_net.core.sysctl_txrehash, 678 .maxlen = sizeof(u8), 679 .mode = 0644, 680 .extra1 = SYSCTL_ZERO, 681 .extra2 = SYSCTL_ONE, 682 .proc_handler = proc_dou8vec_minmax, 683 }, 684 { 685 .procname = "txq_reselection_ms", 686 .data = &init_net.core.sysctl_txq_reselection, 687 .maxlen = sizeof(int), 688 .mode = 0644, 689 .proc_handler = proc_dointvec_ms_jiffies, 690 }, 691 { 692 .procname = "tstamp_allow_data", 693 .data = &init_net.core.sysctl_tstamp_allow_data, 694 .maxlen = sizeof(u8), 695 .mode = 0644, 696 .proc_handler = proc_dou8vec_minmax, 697 .extra1 = SYSCTL_ZERO, 698 .extra2 = SYSCTL_ONE 699 }, 700 { 701 .procname = "bypass_prot_mem", 702 .data = &init_net.core.sysctl_bypass_prot_mem, 703 .maxlen = sizeof(u8), 704 .mode = 0644, 705 .proc_handler = proc_dou8vec_minmax, 706 .extra1 = SYSCTL_ZERO, 707 .extra2 = SYSCTL_ONE 708 }, 709 /* sysctl_core_net_init() will set the values after this 710 * to readonly in network namespaces 711 */ 712 { 713 .procname = "wmem_max", 714 .data = &sysctl_wmem_max, 715 .maxlen = sizeof(int), 716 .mode = 0644, 717 .proc_handler = proc_dointvec_minmax, 718 .extra1 = &min_sndbuf, 719 }, 720 { 721 .procname = "rmem_max", 722 .data = &sysctl_rmem_max, 723 .maxlen = sizeof(int), 724 .mode = 0644, 725 .proc_handler = proc_dointvec_minmax, 726 .extra1 = &min_rcvbuf, 727 }, 728 { 729 .procname = "wmem_default", 730 .data = &sysctl_wmem_default, 731 .maxlen = sizeof(int), 732 .mode = 0644, 733 .proc_handler = proc_dointvec_minmax, 734 .extra1 = &min_sndbuf, 735 }, 736 { 737 .procname = "rmem_default", 738 .data = &sysctl_rmem_default, 739 .maxlen = sizeof(int), 740 .mode = 0644, 741 .proc_handler = proc_dointvec_minmax, 742 .extra1 = &min_rcvbuf, 743 }, 744 }; 745 746 static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) 747 { 748 /* fallback tunnels for initns only */ 749 if (!strncmp(str, "initns", 6)) 750 sysctl_fb_tunnels_only_for_init_net = 1; 751 /* no fallback tunnels anywhere */ 752 else if (!strncmp(str, "none", 4)) 753 sysctl_fb_tunnels_only_for_init_net = 2; 754 755 return 1; 756 } 757 __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); 758 759 static __net_init int sysctl_core_net_init(struct net *net) 760 { 761 size_t table_size = ARRAY_SIZE(netns_core_table); 762 struct ctl_table *tbl; 763 764 tbl = netns_core_table; 765 if (!net_eq(net, &init_net)) { 766 int i; 767 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 768 if (tbl == NULL) 769 goto err_dup; 770 771 for (i = 0; i < table_size; ++i) { 772 if (tbl[i].data == &sysctl_wmem_max) 773 break; 774 775 tbl[i].data += (char *)net - (char *)&init_net; 776 } 777 for (; i < table_size; ++i) 778 tbl[i].mode &= ~0222; 779 } 780 781 net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); 782 if (net->core.sysctl_hdr == NULL) 783 goto err_reg; 784 785 return 0; 786 787 err_reg: 788 if (tbl != netns_core_table) 789 kfree(tbl); 790 err_dup: 791 return -ENOMEM; 792 } 793 794 static __net_exit void sysctl_core_net_exit(struct net *net) 795 { 796 const struct ctl_table *tbl; 797 798 tbl = net->core.sysctl_hdr->ctl_table_arg; 799 unregister_net_sysctl_table(net->core.sysctl_hdr); 800 BUG_ON(tbl == netns_core_table); 801 #if IS_ENABLED(CONFIG_RPS) 802 kfree(net->core.rps_default_mask); 803 #endif 804 kfree(tbl); 805 } 806 807 static __net_initdata struct pernet_operations sysctl_core_ops = { 808 .init = sysctl_core_net_init, 809 .exit = sysctl_core_net_exit, 810 }; 811 812 static __init int sysctl_core_init(void) 813 { 814 register_net_sysctl(&init_net, "net/core", net_core_table); 815 return register_pernet_subsys(&sysctl_core_ops); 816 } 817 818 fs_initcall(sysctl_core_init); 819