1 // SPDX-License-Identifier: GPL-2.0 2 /* -*- linux-c -*- 3 * sysctl_net_core.c: sysctl interface to net core subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 7 */ 8 9 #include <linux/filter.h> 10 #include <linux/mm.h> 11 #include <linux/sysctl.h> 12 #include <linux/module.h> 13 #include <linux/socket.h> 14 #include <linux/netdevice.h> 15 #include <linux/ratelimit.h> 16 #include <linux/vmalloc.h> 17 #include <linux/init.h> 18 #include <linux/slab.h> 19 #include <linux/sched/isolation.h> 20 #include <linux/hex.h> 21 22 #include <net/ip.h> 23 #include <net/sock.h> 24 #include <net/net_ratelimit.h> 25 #include <net/busy_poll.h> 26 #include <net/pkt_sched.h> 27 #include <net/hotdata.h> 28 #include <net/proto_memory.h> 29 #include <net/rps.h> 30 31 #include "dev.h" 32 #include "net-sysfs.h" 33 34 static int int_3600 = 3600; 35 static int min_sndbuf = SOCK_MIN_SNDBUF; 36 static int min_rcvbuf = SOCK_MIN_RCVBUF; 37 static int max_skb_frags = MAX_SKB_FRAGS; 38 static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; 39 static int netdev_budget_usecs_min = 2 * USEC_PER_SEC / HZ; 40 41 static int net_msg_warn; /* Unused, but still a sysctl */ 42 43 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; 44 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); 45 46 /* 0 - Keep current behavior: 47 * IPv4: inherit all current settings from init_net 48 * IPv6: reset all settings to default 49 * 1 - Both inherit all current settings from init_net 50 * 2 - Both reset all settings to default 51 * 3 - Both inherit all settings from current netns 52 */ 53 int sysctl_devconf_inherit_init_net __read_mostly; 54 EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); 55 56 #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) 57 static int dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, 58 struct cpumask *mask) 59 { 60 char *kbuf; 61 int len; 62 63 if (*ppos || !*lenp) { 64 *lenp = 0; 65 return 0; 66 } 67 68 /* CPUs are displayed as a hex bitmap + a comma between each groups of 8 69 * nibbles (except the last one which has a newline instead). 70 * Guesstimate the buffer size at the group granularity level. 71 */ 72 len = min(DIV_ROUND_UP(nr_cpumask_bits, 32) * (8 + 1), *lenp); 73 kbuf = kmalloc(len, GFP_KERNEL); 74 if (!kbuf) { 75 *lenp = 0; 76 return -ENOMEM; 77 } 78 79 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 80 if (!len) { 81 *lenp = 0; 82 goto free_buf; 83 } 84 85 /* scnprintf writes a trailing null char not counted in the returned 86 * length, override it with a newline. 87 */ 88 kbuf[len++] = '\n'; 89 memcpy(buffer, kbuf, len); 90 *lenp = len; 91 *ppos += len; 92 93 free_buf: 94 kfree(kbuf); 95 return 0; 96 } 97 #endif 98 99 #ifdef CONFIG_RPS 100 101 DEFINE_MUTEX(rps_default_mask_mutex); 102 103 static int rps_default_mask_sysctl(const struct ctl_table *table, int write, 104 void *buffer, size_t *lenp, loff_t *ppos) 105 { 106 struct net *net = (struct net *)table->data; 107 struct cpumask *mask; 108 int err = 0; 109 110 mutex_lock(&rps_default_mask_mutex); 111 mask = net->core.rps_default_mask; 112 if (write) { 113 if (!mask) { 114 mask = kzalloc(cpumask_size(), GFP_KERNEL); 115 net->core.rps_default_mask = mask; 116 } 117 err = -ENOMEM; 118 if (!mask) 119 goto done; 120 121 err = cpumask_parse(buffer, mask); 122 if (err) 123 goto done; 124 125 err = rps_cpumask_housekeeping(mask); 126 if (err) 127 goto done; 128 } else { 129 err = dump_cpumask(buffer, lenp, ppos, 130 mask ?: cpu_none_mask); 131 } 132 133 done: 134 mutex_unlock(&rps_default_mask_mutex); 135 return err; 136 } 137 138 static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, 139 void *buffer, size_t *lenp, loff_t *ppos) 140 { 141 struct rps_sock_flow_table *o_sock_table, *sock_table; 142 static DEFINE_MUTEX(sock_flow_mutex); 143 rps_tag_ptr o_tag_ptr, tag_ptr; 144 unsigned int orig_size, size; 145 struct ctl_table tmp = { 146 .data = &size, 147 .maxlen = sizeof(size), 148 .mode = table->mode 149 }; 150 void *tofree = NULL; 151 int ret, i; 152 u8 log; 153 154 mutex_lock(&sock_flow_mutex); 155 156 o_tag_ptr = tag_ptr = net_hotdata.rps_sock_flow_table; 157 158 size = o_tag_ptr ? rps_tag_to_mask(o_tag_ptr) + 1 : 0; 159 o_sock_table = rps_tag_to_table(o_tag_ptr); 160 orig_size = size; 161 162 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 163 164 if (!write) 165 goto unlock; 166 167 if (size) { 168 if (size > 1<<29) { 169 /* Enforce limit to prevent overflow */ 170 mutex_unlock(&sock_flow_mutex); 171 return -EINVAL; 172 } 173 sock_table = o_sock_table; 174 size = roundup_pow_of_two(size); 175 if (size != orig_size) { 176 sock_table = vmalloc_huge(size * sizeof(*sock_table), 177 GFP_KERNEL); 178 if (!sock_table) { 179 mutex_unlock(&sock_flow_mutex); 180 return -ENOMEM; 181 } 182 net_hotdata.rps_cpu_mask = 183 roundup_pow_of_two(nr_cpu_ids) - 1; 184 log = ilog2(size); 185 tag_ptr = (rps_tag_ptr)sock_table | log; 186 } 187 188 for (i = 0; i < size; i++) 189 sock_table[i].ent = RPS_NO_CPU; 190 } else { 191 sock_table = NULL; 192 tag_ptr = 0UL; 193 } 194 if (tag_ptr != o_tag_ptr) { 195 smp_store_release(&net_hotdata.rps_sock_flow_table, tag_ptr); 196 if (sock_table) { 197 static_branch_inc(&rps_needed); 198 static_branch_inc(&rfs_needed); 199 } 200 if (o_sock_table) { 201 static_branch_dec(&rps_needed); 202 static_branch_dec(&rfs_needed); 203 tofree = o_sock_table; 204 } 205 } 206 207 unlock: 208 mutex_unlock(&sock_flow_mutex); 209 210 kvfree_rcu_mightsleep(tofree); 211 return ret; 212 } 213 #endif /* CONFIG_RPS */ 214 215 #ifdef CONFIG_NET_FLOW_LIMIT 216 static DEFINE_MUTEX(flow_limit_update_mutex); 217 218 static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write, 219 void *buffer, size_t *lenp, loff_t *ppos) 220 { 221 struct sd_flow_limit *cur; 222 struct softnet_data *sd; 223 cpumask_var_t mask; 224 int i, len, ret = 0; 225 226 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 227 return -ENOMEM; 228 229 if (write) { 230 ret = cpumask_parse(buffer, mask); 231 if (ret) 232 goto done; 233 234 mutex_lock(&flow_limit_update_mutex); 235 len = sizeof(*cur) + netdev_flow_limit_table_len; 236 for_each_possible_cpu(i) { 237 sd = &per_cpu(softnet_data, i); 238 cur = rcu_dereference_protected(sd->flow_limit, 239 lockdep_is_held(&flow_limit_update_mutex)); 240 if (cur && !cpumask_test_cpu(i, mask)) { 241 RCU_INIT_POINTER(sd->flow_limit, NULL); 242 kfree_rcu(cur, rcu); 243 } else if (!cur && cpumask_test_cpu(i, mask)) { 244 cur = kzalloc_node(len, GFP_KERNEL, 245 cpu_to_node(i)); 246 if (!cur) { 247 /* not unwinding previous changes */ 248 ret = -ENOMEM; 249 goto write_unlock; 250 } 251 cur->log_buckets = ilog2(netdev_flow_limit_table_len); 252 rcu_assign_pointer(sd->flow_limit, cur); 253 } 254 } 255 write_unlock: 256 mutex_unlock(&flow_limit_update_mutex); 257 } else { 258 cpumask_clear(mask); 259 rcu_read_lock(); 260 for_each_possible_cpu(i) { 261 sd = &per_cpu(softnet_data, i); 262 if (rcu_dereference(sd->flow_limit)) 263 cpumask_set_cpu(i, mask); 264 } 265 rcu_read_unlock(); 266 267 ret = dump_cpumask(buffer, lenp, ppos, mask); 268 } 269 270 done: 271 free_cpumask_var(mask); 272 return ret; 273 } 274 275 static int flow_limit_table_len_sysctl(const struct ctl_table *table, int write, 276 void *buffer, size_t *lenp, loff_t *ppos) 277 { 278 unsigned int old, *ptr; 279 int ret; 280 281 mutex_lock(&flow_limit_update_mutex); 282 283 ptr = table->data; 284 old = *ptr; 285 ret = proc_dointvec(table, write, buffer, lenp, ppos); 286 if (!ret && write && !is_power_of_2(*ptr)) { 287 *ptr = old; 288 ret = -EINVAL; 289 } 290 291 mutex_unlock(&flow_limit_update_mutex); 292 return ret; 293 } 294 #endif /* CONFIG_NET_FLOW_LIMIT */ 295 296 #ifdef CONFIG_NET_SCHED 297 static int set_default_qdisc(const struct ctl_table *table, int write, 298 void *buffer, size_t *lenp, loff_t *ppos) 299 { 300 char id[IFNAMSIZ]; 301 struct ctl_table tbl = { 302 .data = id, 303 .maxlen = IFNAMSIZ, 304 }; 305 int ret; 306 307 qdisc_get_default(id, IFNAMSIZ); 308 309 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 310 if (write && ret == 0) 311 ret = qdisc_set_default(id); 312 return ret; 313 } 314 #endif 315 316 static int proc_do_dev_weight(const struct ctl_table *table, int write, 317 void *buffer, size_t *lenp, loff_t *ppos) 318 { 319 static DEFINE_MUTEX(dev_weight_mutex); 320 int ret, weight; 321 322 mutex_lock(&dev_weight_mutex); 323 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 324 if (!ret && write) { 325 weight = READ_ONCE(weight_p); 326 WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); 327 WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); 328 } 329 mutex_unlock(&dev_weight_mutex); 330 331 return ret; 332 } 333 334 static int proc_do_rss_key(const struct ctl_table *table, int write, 335 void *buffer, size_t *lenp, loff_t *ppos) 336 { 337 char buf[NETDEV_RSS_KEY_LEN * 3]; 338 struct ctl_table fake_table; 339 char *pos = buf; 340 341 for (int i = 0; i < NETDEV_RSS_KEY_LEN; i++) { 342 pos = hex_byte_pack(pos, netdev_rss_key[i]); 343 *pos++ = ':'; 344 } 345 *(--pos) = 0; 346 347 fake_table.data = buf; 348 fake_table.maxlen = sizeof(buf); 349 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 350 } 351 352 static int proc_do_skb_defer_max(const struct ctl_table *table, int write, 353 void *buffer, size_t *lenp, loff_t *ppos) 354 { 355 static DEFINE_MUTEX(skb_defer_max_mutex); 356 int ret, oval, nval; 357 358 mutex_lock(&skb_defer_max_mutex); 359 360 oval = !net_hotdata.sysctl_skb_defer_max; 361 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 362 nval = !net_hotdata.sysctl_skb_defer_max; 363 364 if (nval != oval) { 365 if (nval) 366 static_branch_enable(&skb_defer_disable_key); 367 else 368 static_branch_disable(&skb_defer_disable_key); 369 } 370 371 mutex_unlock(&skb_defer_max_mutex); 372 return ret; 373 } 374 375 #ifdef CONFIG_BPF_JIT 376 static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write, 377 void *buffer, size_t *lenp, 378 loff_t *ppos) 379 { 380 int ret, jit_enable = *(int *)table->data; 381 int min = *(int *)table->extra1; 382 int max = *(int *)table->extra2; 383 struct ctl_table tmp = *table; 384 385 if (write && !capable(CAP_SYS_ADMIN)) 386 return -EPERM; 387 388 tmp.data = &jit_enable; 389 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 390 if (write && !ret) { 391 if (jit_enable < 2 || 392 (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { 393 *(int *)table->data = jit_enable; 394 if (jit_enable == 2) 395 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 396 } else { 397 ret = -EPERM; 398 } 399 } 400 401 if (write && ret && min == max) 402 pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); 403 404 return ret; 405 } 406 407 # ifdef CONFIG_HAVE_EBPF_JIT 408 static int 409 proc_dointvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 410 void *buffer, size_t *lenp, loff_t *ppos) 411 { 412 if (!capable(CAP_SYS_ADMIN)) 413 return -EPERM; 414 415 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 416 } 417 # endif /* CONFIG_HAVE_EBPF_JIT */ 418 419 static int 420 proc_dolongvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 421 void *buffer, size_t *lenp, loff_t *ppos) 422 { 423 if (!capable(CAP_SYS_ADMIN)) 424 return -EPERM; 425 426 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 427 } 428 #endif 429 430 static struct ctl_table net_core_table[] = { 431 { 432 .procname = "mem_pcpu_rsv", 433 .data = &net_hotdata.sysctl_mem_pcpu_rsv, 434 .maxlen = sizeof(int), 435 .mode = 0644, 436 .proc_handler = proc_dointvec_minmax, 437 .extra1 = &min_mem_pcpu_rsv, 438 }, 439 { 440 .procname = "dev_weight", 441 .data = &weight_p, 442 .maxlen = sizeof(int), 443 .mode = 0644, 444 .proc_handler = proc_do_dev_weight, 445 .extra1 = SYSCTL_ONE, 446 }, 447 { 448 .procname = "dev_weight_rx_bias", 449 .data = &dev_weight_rx_bias, 450 .maxlen = sizeof(int), 451 .mode = 0644, 452 .proc_handler = proc_do_dev_weight, 453 .extra1 = SYSCTL_ONE, 454 }, 455 { 456 .procname = "dev_weight_tx_bias", 457 .data = &dev_weight_tx_bias, 458 .maxlen = sizeof(int), 459 .mode = 0644, 460 .proc_handler = proc_do_dev_weight, 461 .extra1 = SYSCTL_ONE, 462 }, 463 { 464 .procname = "netdev_max_backlog", 465 .data = &net_hotdata.max_backlog, 466 .maxlen = sizeof(int), 467 .mode = 0644, 468 .proc_handler = proc_dointvec 469 }, 470 { 471 .procname = "qdisc_max_burst", 472 .data = &net_hotdata.qdisc_max_burst, 473 .maxlen = sizeof(int), 474 .mode = 0644, 475 .proc_handler = proc_dointvec 476 }, 477 { 478 .procname = "netdev_rss_key", 479 .data = &netdev_rss_key, 480 .maxlen = sizeof(int), 481 .mode = 0444, 482 .proc_handler = proc_do_rss_key, 483 }, 484 #ifdef CONFIG_BPF_JIT 485 { 486 .procname = "bpf_jit_enable", 487 .data = &bpf_jit_enable, 488 .maxlen = sizeof(int), 489 .mode = 0644, 490 .proc_handler = proc_dointvec_minmax_bpf_enable, 491 # ifdef CONFIG_BPF_JIT_ALWAYS_ON 492 .extra1 = SYSCTL_ONE, 493 .extra2 = SYSCTL_ONE, 494 # else 495 .extra1 = SYSCTL_ZERO, 496 .extra2 = SYSCTL_TWO, 497 # endif 498 }, 499 # ifdef CONFIG_HAVE_EBPF_JIT 500 { 501 .procname = "bpf_jit_harden", 502 .data = &bpf_jit_harden, 503 .maxlen = sizeof(int), 504 .mode = 0600, 505 .proc_handler = proc_dointvec_minmax_bpf_restricted, 506 .extra1 = SYSCTL_ZERO, 507 .extra2 = SYSCTL_TWO, 508 }, 509 { 510 .procname = "bpf_jit_kallsyms", 511 .data = &bpf_jit_kallsyms, 512 .maxlen = sizeof(int), 513 .mode = 0600, 514 .proc_handler = proc_dointvec_minmax_bpf_restricted, 515 .extra1 = SYSCTL_ZERO, 516 .extra2 = SYSCTL_ONE, 517 }, 518 # endif 519 { 520 .procname = "bpf_jit_limit", 521 .data = &bpf_jit_limit, 522 .maxlen = sizeof(long), 523 .mode = 0600, 524 .proc_handler = proc_dolongvec_minmax_bpf_restricted, 525 .extra1 = SYSCTL_LONG_ONE, 526 .extra2 = &bpf_jit_limit_max, 527 }, 528 #endif 529 { 530 .procname = "netdev_tstamp_prequeue", 531 .data = &net_hotdata.tstamp_prequeue, 532 .maxlen = sizeof(int), 533 .mode = 0644, 534 .proc_handler = proc_dointvec 535 }, 536 { 537 .procname = "message_cost", 538 .data = &net_ratelimit_state.interval, 539 .maxlen = sizeof(int), 540 .mode = 0644, 541 .proc_handler = proc_dointvec_jiffies, 542 }, 543 { 544 .procname = "message_burst", 545 .data = &net_ratelimit_state.burst, 546 .maxlen = sizeof(int), 547 .mode = 0644, 548 .proc_handler = proc_dointvec, 549 }, 550 #ifdef CONFIG_RPS 551 { 552 .procname = "rps_sock_flow_entries", 553 .maxlen = sizeof(int), 554 .mode = 0644, 555 .proc_handler = rps_sock_flow_sysctl 556 }, 557 #endif 558 #ifdef CONFIG_NET_FLOW_LIMIT 559 { 560 .procname = "flow_limit_cpu_bitmap", 561 .mode = 0644, 562 .proc_handler = flow_limit_cpu_sysctl 563 }, 564 { 565 .procname = "flow_limit_table_len", 566 .data = &netdev_flow_limit_table_len, 567 .maxlen = sizeof(int), 568 .mode = 0644, 569 .proc_handler = flow_limit_table_len_sysctl 570 }, 571 #endif /* CONFIG_NET_FLOW_LIMIT */ 572 #ifdef CONFIG_NET_RX_BUSY_POLL 573 { 574 .procname = "busy_poll", 575 .data = &sysctl_net_busy_poll, 576 .maxlen = sizeof(unsigned int), 577 .mode = 0644, 578 .proc_handler = proc_dointvec_minmax, 579 .extra1 = SYSCTL_ZERO, 580 }, 581 { 582 .procname = "busy_read", 583 .data = &sysctl_net_busy_read, 584 .maxlen = sizeof(unsigned int), 585 .mode = 0644, 586 .proc_handler = proc_dointvec_minmax, 587 .extra1 = SYSCTL_ZERO, 588 }, 589 #endif 590 #ifdef CONFIG_NET_SCHED 591 { 592 .procname = "default_qdisc", 593 .mode = 0644, 594 .maxlen = IFNAMSIZ, 595 .proc_handler = set_default_qdisc 596 }, 597 #endif 598 { 599 .procname = "netdev_budget", 600 .data = &net_hotdata.netdev_budget, 601 .maxlen = sizeof(int), 602 .mode = 0644, 603 .proc_handler = proc_dointvec 604 }, 605 { 606 .procname = "warnings", 607 .data = &net_msg_warn, 608 .maxlen = sizeof(int), 609 .mode = 0644, 610 .proc_handler = proc_dointvec 611 }, 612 { 613 .procname = "max_skb_frags", 614 .data = &net_hotdata.sysctl_max_skb_frags, 615 .maxlen = sizeof(int), 616 .mode = 0644, 617 .proc_handler = proc_dointvec_minmax, 618 .extra1 = SYSCTL_ONE, 619 .extra2 = &max_skb_frags, 620 }, 621 { 622 .procname = "netdev_budget_usecs", 623 .data = &net_hotdata.netdev_budget_usecs, 624 .maxlen = sizeof(unsigned int), 625 .mode = 0644, 626 .proc_handler = proc_dointvec_minmax, 627 .extra1 = &netdev_budget_usecs_min, 628 }, 629 { 630 .procname = "fb_tunnels_only_for_init_net", 631 .data = &sysctl_fb_tunnels_only_for_init_net, 632 .maxlen = sizeof(int), 633 .mode = 0644, 634 .proc_handler = proc_dointvec_minmax, 635 .extra1 = SYSCTL_ZERO, 636 .extra2 = SYSCTL_TWO, 637 }, 638 { 639 .procname = "devconf_inherit_init_net", 640 .data = &sysctl_devconf_inherit_init_net, 641 .maxlen = sizeof(int), 642 .mode = 0644, 643 .proc_handler = proc_dointvec_minmax, 644 .extra1 = SYSCTL_ZERO, 645 .extra2 = SYSCTL_THREE, 646 }, 647 { 648 .procname = "high_order_alloc_disable", 649 .data = &net_high_order_alloc_disable_key.key, 650 .maxlen = sizeof(net_high_order_alloc_disable_key), 651 .mode = 0644, 652 .proc_handler = proc_do_static_key, 653 }, 654 { 655 .procname = "gro_normal_batch", 656 .data = &net_hotdata.gro_normal_batch, 657 .maxlen = sizeof(unsigned int), 658 .mode = 0644, 659 .proc_handler = proc_dointvec_minmax, 660 .extra1 = SYSCTL_ONE, 661 }, 662 { 663 .procname = "netdev_unregister_timeout_secs", 664 .data = &netdev_unregister_timeout_secs, 665 .maxlen = sizeof(unsigned int), 666 .mode = 0644, 667 .proc_handler = proc_dointvec_minmax, 668 .extra1 = SYSCTL_ONE, 669 .extra2 = &int_3600, 670 }, 671 { 672 .procname = "skb_defer_max", 673 .data = &net_hotdata.sysctl_skb_defer_max, 674 .maxlen = sizeof(unsigned int), 675 .mode = 0644, 676 .proc_handler = proc_do_skb_defer_max, 677 .extra1 = SYSCTL_ZERO, 678 }, 679 }; 680 681 static struct ctl_table netns_core_table[] = { 682 #if IS_ENABLED(CONFIG_RPS) 683 { 684 .procname = "rps_default_mask", 685 .data = &init_net, 686 .mode = 0644, 687 .proc_handler = rps_default_mask_sysctl 688 }, 689 #endif 690 { 691 .procname = "somaxconn", 692 .data = &init_net.core.sysctl_somaxconn, 693 .maxlen = sizeof(int), 694 .mode = 0644, 695 .extra1 = SYSCTL_ZERO, 696 .proc_handler = proc_dointvec_minmax 697 }, 698 { 699 .procname = "optmem_max", 700 .data = &init_net.core.sysctl_optmem_max, 701 .maxlen = sizeof(int), 702 .mode = 0644, 703 .extra1 = SYSCTL_ZERO, 704 .proc_handler = proc_dointvec_minmax 705 }, 706 { 707 .procname = "txrehash", 708 .data = &init_net.core.sysctl_txrehash, 709 .maxlen = sizeof(u8), 710 .mode = 0644, 711 .extra1 = SYSCTL_ZERO, 712 .extra2 = SYSCTL_ONE, 713 .proc_handler = proc_dou8vec_minmax, 714 }, 715 { 716 .procname = "txq_reselection_ms", 717 .data = &init_net.core.sysctl_txq_reselection, 718 .maxlen = sizeof(int), 719 .mode = 0644, 720 .proc_handler = proc_dointvec_ms_jiffies, 721 }, 722 { 723 .procname = "tstamp_allow_data", 724 .data = &init_net.core.sysctl_tstamp_allow_data, 725 .maxlen = sizeof(u8), 726 .mode = 0644, 727 .proc_handler = proc_dou8vec_minmax, 728 .extra1 = SYSCTL_ZERO, 729 .extra2 = SYSCTL_ONE 730 }, 731 { 732 .procname = "bypass_prot_mem", 733 .data = &init_net.core.sysctl_bypass_prot_mem, 734 .maxlen = sizeof(u8), 735 .mode = 0644, 736 .proc_handler = proc_dou8vec_minmax, 737 .extra1 = SYSCTL_ZERO, 738 .extra2 = SYSCTL_ONE 739 }, 740 /* sysctl_core_net_init() will set the values after this 741 * to readonly in network namespaces 742 */ 743 { 744 .procname = "wmem_max", 745 .data = &sysctl_wmem_max, 746 .maxlen = sizeof(int), 747 .mode = 0644, 748 .proc_handler = proc_dointvec_minmax, 749 .extra1 = &min_sndbuf, 750 }, 751 { 752 .procname = "rmem_max", 753 .data = &sysctl_rmem_max, 754 .maxlen = sizeof(int), 755 .mode = 0644, 756 .proc_handler = proc_dointvec_minmax, 757 .extra1 = &min_rcvbuf, 758 }, 759 { 760 .procname = "wmem_default", 761 .data = &sysctl_wmem_default, 762 .maxlen = sizeof(int), 763 .mode = 0644, 764 .proc_handler = proc_dointvec_minmax, 765 .extra1 = &min_sndbuf, 766 }, 767 { 768 .procname = "rmem_default", 769 .data = &sysctl_rmem_default, 770 .maxlen = sizeof(int), 771 .mode = 0644, 772 .proc_handler = proc_dointvec_minmax, 773 .extra1 = &min_rcvbuf, 774 }, 775 }; 776 777 static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) 778 { 779 /* fallback tunnels for initns only */ 780 if (!strncmp(str, "initns", 6)) 781 sysctl_fb_tunnels_only_for_init_net = 1; 782 /* no fallback tunnels anywhere */ 783 else if (!strncmp(str, "none", 4)) 784 sysctl_fb_tunnels_only_for_init_net = 2; 785 786 return 1; 787 } 788 __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); 789 790 static __net_init int sysctl_core_net_init(struct net *net) 791 { 792 size_t table_size = ARRAY_SIZE(netns_core_table); 793 struct ctl_table *tbl; 794 795 tbl = netns_core_table; 796 if (!net_eq(net, &init_net)) { 797 int i; 798 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 799 if (tbl == NULL) 800 goto err_dup; 801 802 for (i = 0; i < table_size; ++i) { 803 if (tbl[i].data == &sysctl_wmem_max) 804 break; 805 806 tbl[i].data += (char *)net - (char *)&init_net; 807 } 808 for (; i < table_size; ++i) 809 tbl[i].mode &= ~0222; 810 } 811 812 net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); 813 if (net->core.sysctl_hdr == NULL) 814 goto err_reg; 815 816 return 0; 817 818 err_reg: 819 if (tbl != netns_core_table) 820 kfree(tbl); 821 err_dup: 822 return -ENOMEM; 823 } 824 825 static __net_exit void sysctl_core_net_exit(struct net *net) 826 { 827 const struct ctl_table *tbl; 828 829 tbl = net->core.sysctl_hdr->ctl_table_arg; 830 unregister_net_sysctl_table(net->core.sysctl_hdr); 831 BUG_ON(tbl == netns_core_table); 832 #if IS_ENABLED(CONFIG_RPS) 833 kfree(net->core.rps_default_mask); 834 #endif 835 kfree(tbl); 836 } 837 838 static __net_initdata struct pernet_operations sysctl_core_ops = { 839 .init = sysctl_core_net_init, 840 .exit = sysctl_core_net_exit, 841 }; 842 843 static __init int sysctl_core_init(void) 844 { 845 register_net_sysctl(&init_net, "net/core", net_core_table); 846 return register_pernet_subsys(&sysctl_core_ops); 847 } 848 849 fs_initcall(sysctl_core_init); 850