1 // SPDX-License-Identifier: GPL-2.0 2 /* -*- linux-c -*- 3 * sysctl_net_core.c: sysctl interface to net core subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 7 */ 8 9 #include <linux/filter.h> 10 #include <linux/mm.h> 11 #include <linux/sysctl.h> 12 #include <linux/module.h> 13 #include <linux/socket.h> 14 #include <linux/netdevice.h> 15 #include <linux/ratelimit.h> 16 #include <linux/vmalloc.h> 17 #include <linux/init.h> 18 #include <linux/slab.h> 19 #include <linux/sched/isolation.h> 20 #include <linux/hex.h> 21 22 #include <net/ip.h> 23 #include <net/sock.h> 24 #include <net/net_ratelimit.h> 25 #include <net/busy_poll.h> 26 #include <net/pkt_sched.h> 27 #include <net/hotdata.h> 28 #include <net/proto_memory.h> 29 #include <net/rps.h> 30 31 #include "dev.h" 32 #include "net-sysfs.h" 33 34 static int int_3600 = 3600; 35 static int min_sndbuf = SOCK_MIN_SNDBUF; 36 static int min_rcvbuf = SOCK_MIN_RCVBUF; 37 static int max_skb_frags = MAX_SKB_FRAGS; 38 static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; 39 static int netdev_budget_usecs_min = 2 * USEC_PER_SEC / HZ; 40 41 static int net_msg_warn; /* Unused, but still a sysctl */ 42 43 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; 44 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); 45 46 /* 0 - Keep current behavior: 47 * IPv4: inherit all current settings from init_net 48 * IPv6: reset all settings to default 49 * 1 - Both inherit all current settings from init_net 50 * 2 - Both reset all settings to default 51 * 3 - Both inherit all settings from current netns 52 */ 53 int sysctl_devconf_inherit_init_net __read_mostly; 54 EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); 55 56 #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) 57 static int dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, 58 struct cpumask *mask) 59 { 60 char *kbuf; 61 int len; 62 63 if (*ppos || !*lenp) { 64 *lenp = 0; 65 return 0; 66 } 67 68 /* CPUs are displayed as a hex bitmap + a comma between each groups of 8 69 * nibbles (except the last one which has a newline instead). 70 * Guesstimate the buffer size at the group granularity level. 71 */ 72 len = min(DIV_ROUND_UP(nr_cpumask_bits, 32) * (8 + 1), *lenp); 73 kbuf = kmalloc(len, GFP_KERNEL); 74 if (!kbuf) { 75 *lenp = 0; 76 return -ENOMEM; 77 } 78 79 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 80 if (!len) { 81 *lenp = 0; 82 goto free_buf; 83 } 84 85 /* scnprintf writes a trailing null char not counted in the returned 86 * length, override it with a newline. 87 */ 88 kbuf[len++] = '\n'; 89 memcpy(buffer, kbuf, len); 90 *lenp = len; 91 *ppos += len; 92 93 free_buf: 94 kfree(kbuf); 95 return 0; 96 } 97 #endif 98 99 #ifdef CONFIG_RPS 100 101 DEFINE_MUTEX(rps_default_mask_mutex); 102 103 static int rps_default_mask_sysctl(const struct ctl_table *table, int write, 104 void *buffer, size_t *lenp, loff_t *ppos) 105 { 106 struct net *net = (struct net *)table->data; 107 struct cpumask *mask; 108 int err = 0; 109 110 mutex_lock(&rps_default_mask_mutex); 111 mask = net->core.rps_default_mask; 112 if (write) { 113 if (!mask) { 114 mask = kzalloc(cpumask_size(), GFP_KERNEL); 115 net->core.rps_default_mask = mask; 116 } 117 err = -ENOMEM; 118 if (!mask) 119 goto done; 120 121 err = cpumask_parse(buffer, mask); 122 if (err) 123 goto done; 124 125 err = rps_cpumask_housekeeping(mask); 126 if (err) 127 goto done; 128 } else { 129 err = dump_cpumask(buffer, lenp, ppos, 130 mask ?: cpu_none_mask); 131 } 132 133 done: 134 mutex_unlock(&rps_default_mask_mutex); 135 return err; 136 } 137 138 static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, 139 void *buffer, size_t *lenp, loff_t *ppos) 140 { 141 struct rps_sock_flow_table *o_sock_table, *sock_table; 142 static DEFINE_MUTEX(sock_flow_mutex); 143 rps_tag_ptr o_tag_ptr, tag_ptr; 144 unsigned int orig_size, size; 145 struct ctl_table tmp = { 146 .data = &size, 147 .maxlen = sizeof(size), 148 .mode = table->mode 149 }; 150 void *tofree = NULL; 151 int ret, i; 152 u8 log; 153 154 mutex_lock(&sock_flow_mutex); 155 156 o_tag_ptr = tag_ptr = net_hotdata.rps_sock_flow_table; 157 158 size = o_tag_ptr ? rps_tag_to_mask(o_tag_ptr) + 1 : 0; 159 o_sock_table = rps_tag_to_table(o_tag_ptr); 160 orig_size = size; 161 162 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 163 164 if (!write) 165 goto unlock; 166 167 if (size) { 168 if (size > 1<<29) { 169 /* Enforce limit to prevent overflow */ 170 mutex_unlock(&sock_flow_mutex); 171 return -EINVAL; 172 } 173 sock_table = o_sock_table; 174 size = roundup_pow_of_two(size); 175 if (size != orig_size) { 176 sock_table = vmalloc_huge(size * sizeof(*sock_table), 177 GFP_KERNEL); 178 if (!sock_table) { 179 mutex_unlock(&sock_flow_mutex); 180 return -ENOMEM; 181 } 182 net_hotdata.rps_cpu_mask = 183 roundup_pow_of_two(nr_cpu_ids) - 1; 184 log = ilog2(size); 185 tag_ptr = (rps_tag_ptr)sock_table | log; 186 } 187 188 for (i = 0; i < size; i++) 189 sock_table[i].ent = RPS_NO_CPU; 190 } else { 191 sock_table = NULL; 192 tag_ptr = 0UL; 193 } 194 if (tag_ptr != o_tag_ptr) { 195 smp_store_release(&net_hotdata.rps_sock_flow_table, tag_ptr); 196 if (sock_table) { 197 static_branch_inc(&rps_needed); 198 static_branch_inc(&rfs_needed); 199 } 200 if (o_sock_table) { 201 static_branch_dec(&rps_needed); 202 static_branch_dec(&rfs_needed); 203 tofree = o_sock_table; 204 } 205 } 206 207 unlock: 208 mutex_unlock(&sock_flow_mutex); 209 210 kvfree_rcu_mightsleep(tofree); 211 return ret; 212 } 213 #endif /* CONFIG_RPS */ 214 215 #ifdef CONFIG_NET_FLOW_LIMIT 216 static DEFINE_MUTEX(flow_limit_update_mutex); 217 218 static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write, 219 void *buffer, size_t *lenp, loff_t *ppos) 220 { 221 struct sd_flow_limit *cur; 222 struct softnet_data *sd; 223 cpumask_var_t mask; 224 int i, len, ret = 0; 225 226 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 227 return -ENOMEM; 228 229 if (write) { 230 ret = cpumask_parse(buffer, mask); 231 if (ret) 232 goto done; 233 234 mutex_lock(&flow_limit_update_mutex); 235 len = sizeof(*cur) + netdev_flow_limit_table_len; 236 for_each_possible_cpu(i) { 237 sd = &per_cpu(softnet_data, i); 238 cur = rcu_dereference_protected(sd->flow_limit, 239 lockdep_is_held(&flow_limit_update_mutex)); 240 if (cur && !cpumask_test_cpu(i, mask)) { 241 RCU_INIT_POINTER(sd->flow_limit, NULL); 242 kfree_rcu(cur, rcu); 243 } else if (!cur && cpumask_test_cpu(i, mask)) { 244 cur = kzalloc_node(len, GFP_KERNEL, 245 cpu_to_node(i)); 246 if (!cur) { 247 /* not unwinding previous changes */ 248 ret = -ENOMEM; 249 goto write_unlock; 250 } 251 cur->log_buckets = ilog2(netdev_flow_limit_table_len); 252 rcu_assign_pointer(sd->flow_limit, cur); 253 } 254 } 255 write_unlock: 256 mutex_unlock(&flow_limit_update_mutex); 257 } else { 258 cpumask_clear(mask); 259 rcu_read_lock(); 260 for_each_possible_cpu(i) { 261 sd = &per_cpu(softnet_data, i); 262 if (rcu_dereference(sd->flow_limit)) 263 cpumask_set_cpu(i, mask); 264 } 265 rcu_read_unlock(); 266 267 ret = dump_cpumask(buffer, lenp, ppos, mask); 268 } 269 270 done: 271 free_cpumask_var(mask); 272 return ret; 273 } 274 275 static int flow_limit_table_len_sysctl(const struct ctl_table *table, int write, 276 void *buffer, size_t *lenp, loff_t *ppos) 277 { 278 unsigned int old, *ptr; 279 int ret; 280 281 mutex_lock(&flow_limit_update_mutex); 282 283 ptr = table->data; 284 old = *ptr; 285 ret = proc_dointvec(table, write, buffer, lenp, ppos); 286 if (!ret && write && !is_power_of_2(*ptr)) { 287 *ptr = old; 288 ret = -EINVAL; 289 } 290 291 mutex_unlock(&flow_limit_update_mutex); 292 return ret; 293 } 294 #endif /* CONFIG_NET_FLOW_LIMIT */ 295 296 #ifdef CONFIG_NET_SCHED 297 static int set_default_qdisc(const struct ctl_table *table, int write, 298 void *buffer, size_t *lenp, loff_t *ppos) 299 { 300 char id[IFNAMSIZ]; 301 struct ctl_table tbl = { 302 .data = id, 303 .maxlen = IFNAMSIZ, 304 }; 305 int ret; 306 307 qdisc_get_default(id, IFNAMSIZ); 308 309 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 310 if (write && ret == 0) 311 ret = qdisc_set_default(id); 312 return ret; 313 } 314 #endif 315 316 static int proc_do_dev_weight(const struct ctl_table *table, int write, 317 void *buffer, size_t *lenp, loff_t *ppos) 318 { 319 static DEFINE_MUTEX(dev_weight_mutex); 320 int ret, weight; 321 322 mutex_lock(&dev_weight_mutex); 323 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 324 if (!ret && write) { 325 weight = READ_ONCE(weight_p); 326 WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); 327 WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); 328 } 329 mutex_unlock(&dev_weight_mutex); 330 331 return ret; 332 } 333 334 static int proc_do_rss_key(const struct ctl_table *table, int write, 335 void *buffer, size_t *lenp, loff_t *ppos) 336 { 337 char buf[NETDEV_RSS_KEY_LEN * 3]; 338 struct ctl_table fake_table; 339 char *pos = buf; 340 341 for (int i = 0; i < NETDEV_RSS_KEY_LEN; i++) { 342 pos = hex_byte_pack(pos, netdev_rss_key[i]); 343 *pos++ = ':'; 344 } 345 *(--pos) = 0; 346 347 fake_table.data = buf; 348 fake_table.maxlen = sizeof(buf); 349 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 350 } 351 352 #ifdef CONFIG_BPF_JIT 353 static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write, 354 void *buffer, size_t *lenp, 355 loff_t *ppos) 356 { 357 int ret, jit_enable = *(int *)table->data; 358 int min = *(int *)table->extra1; 359 int max = *(int *)table->extra2; 360 struct ctl_table tmp = *table; 361 362 if (write && !capable(CAP_SYS_ADMIN)) 363 return -EPERM; 364 365 tmp.data = &jit_enable; 366 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 367 if (write && !ret) { 368 if (jit_enable < 2 || 369 (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { 370 *(int *)table->data = jit_enable; 371 if (jit_enable == 2) 372 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 373 } else { 374 ret = -EPERM; 375 } 376 } 377 378 if (write && ret && min == max) 379 pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); 380 381 return ret; 382 } 383 384 # ifdef CONFIG_HAVE_EBPF_JIT 385 static int 386 proc_dointvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 387 void *buffer, size_t *lenp, loff_t *ppos) 388 { 389 if (!capable(CAP_SYS_ADMIN)) 390 return -EPERM; 391 392 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 393 } 394 # endif /* CONFIG_HAVE_EBPF_JIT */ 395 396 static int 397 proc_dolongvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 398 void *buffer, size_t *lenp, loff_t *ppos) 399 { 400 if (!capable(CAP_SYS_ADMIN)) 401 return -EPERM; 402 403 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 404 } 405 #endif 406 407 static struct ctl_table net_core_table[] = { 408 { 409 .procname = "mem_pcpu_rsv", 410 .data = &net_hotdata.sysctl_mem_pcpu_rsv, 411 .maxlen = sizeof(int), 412 .mode = 0644, 413 .proc_handler = proc_dointvec_minmax, 414 .extra1 = &min_mem_pcpu_rsv, 415 }, 416 { 417 .procname = "dev_weight", 418 .data = &weight_p, 419 .maxlen = sizeof(int), 420 .mode = 0644, 421 .proc_handler = proc_do_dev_weight, 422 .extra1 = SYSCTL_ONE, 423 }, 424 { 425 .procname = "dev_weight_rx_bias", 426 .data = &dev_weight_rx_bias, 427 .maxlen = sizeof(int), 428 .mode = 0644, 429 .proc_handler = proc_do_dev_weight, 430 .extra1 = SYSCTL_ONE, 431 }, 432 { 433 .procname = "dev_weight_tx_bias", 434 .data = &dev_weight_tx_bias, 435 .maxlen = sizeof(int), 436 .mode = 0644, 437 .proc_handler = proc_do_dev_weight, 438 .extra1 = SYSCTL_ONE, 439 }, 440 { 441 .procname = "netdev_max_backlog", 442 .data = &net_hotdata.max_backlog, 443 .maxlen = sizeof(int), 444 .mode = 0644, 445 .proc_handler = proc_dointvec 446 }, 447 { 448 .procname = "qdisc_max_burst", 449 .data = &net_hotdata.qdisc_max_burst, 450 .maxlen = sizeof(int), 451 .mode = 0644, 452 .proc_handler = proc_dointvec 453 }, 454 { 455 .procname = "netdev_rss_key", 456 .data = &netdev_rss_key, 457 .maxlen = sizeof(int), 458 .mode = 0444, 459 .proc_handler = proc_do_rss_key, 460 }, 461 #ifdef CONFIG_BPF_JIT 462 { 463 .procname = "bpf_jit_enable", 464 .data = &bpf_jit_enable, 465 .maxlen = sizeof(int), 466 .mode = 0644, 467 .proc_handler = proc_dointvec_minmax_bpf_enable, 468 # ifdef CONFIG_BPF_JIT_ALWAYS_ON 469 .extra1 = SYSCTL_ONE, 470 .extra2 = SYSCTL_ONE, 471 # else 472 .extra1 = SYSCTL_ZERO, 473 .extra2 = SYSCTL_TWO, 474 # endif 475 }, 476 # ifdef CONFIG_HAVE_EBPF_JIT 477 { 478 .procname = "bpf_jit_harden", 479 .data = &bpf_jit_harden, 480 .maxlen = sizeof(int), 481 .mode = 0600, 482 .proc_handler = proc_dointvec_minmax_bpf_restricted, 483 .extra1 = SYSCTL_ZERO, 484 .extra2 = SYSCTL_TWO, 485 }, 486 { 487 .procname = "bpf_jit_kallsyms", 488 .data = &bpf_jit_kallsyms, 489 .maxlen = sizeof(int), 490 .mode = 0600, 491 .proc_handler = proc_dointvec_minmax_bpf_restricted, 492 .extra1 = SYSCTL_ZERO, 493 .extra2 = SYSCTL_ONE, 494 }, 495 # endif 496 { 497 .procname = "bpf_jit_limit", 498 .data = &bpf_jit_limit, 499 .maxlen = sizeof(long), 500 .mode = 0600, 501 .proc_handler = proc_dolongvec_minmax_bpf_restricted, 502 .extra1 = SYSCTL_LONG_ONE, 503 .extra2 = &bpf_jit_limit_max, 504 }, 505 #endif 506 { 507 .procname = "netdev_tstamp_prequeue", 508 .data = &net_hotdata.tstamp_prequeue, 509 .maxlen = sizeof(int), 510 .mode = 0644, 511 .proc_handler = proc_dointvec 512 }, 513 { 514 .procname = "message_cost", 515 .data = &net_ratelimit_state.interval, 516 .maxlen = sizeof(int), 517 .mode = 0644, 518 .proc_handler = proc_dointvec_jiffies, 519 }, 520 { 521 .procname = "message_burst", 522 .data = &net_ratelimit_state.burst, 523 .maxlen = sizeof(int), 524 .mode = 0644, 525 .proc_handler = proc_dointvec, 526 }, 527 #ifdef CONFIG_RPS 528 { 529 .procname = "rps_sock_flow_entries", 530 .maxlen = sizeof(int), 531 .mode = 0644, 532 .proc_handler = rps_sock_flow_sysctl 533 }, 534 #endif 535 #ifdef CONFIG_NET_FLOW_LIMIT 536 { 537 .procname = "flow_limit_cpu_bitmap", 538 .mode = 0644, 539 .proc_handler = flow_limit_cpu_sysctl 540 }, 541 { 542 .procname = "flow_limit_table_len", 543 .data = &netdev_flow_limit_table_len, 544 .maxlen = sizeof(int), 545 .mode = 0644, 546 .proc_handler = flow_limit_table_len_sysctl 547 }, 548 #endif /* CONFIG_NET_FLOW_LIMIT */ 549 #ifdef CONFIG_NET_RX_BUSY_POLL 550 { 551 .procname = "busy_poll", 552 .data = &sysctl_net_busy_poll, 553 .maxlen = sizeof(unsigned int), 554 .mode = 0644, 555 .proc_handler = proc_dointvec_minmax, 556 .extra1 = SYSCTL_ZERO, 557 }, 558 { 559 .procname = "busy_read", 560 .data = &sysctl_net_busy_read, 561 .maxlen = sizeof(unsigned int), 562 .mode = 0644, 563 .proc_handler = proc_dointvec_minmax, 564 .extra1 = SYSCTL_ZERO, 565 }, 566 #endif 567 #ifdef CONFIG_NET_SCHED 568 { 569 .procname = "default_qdisc", 570 .mode = 0644, 571 .maxlen = IFNAMSIZ, 572 .proc_handler = set_default_qdisc 573 }, 574 #endif 575 { 576 .procname = "netdev_budget", 577 .data = &net_hotdata.netdev_budget, 578 .maxlen = sizeof(int), 579 .mode = 0644, 580 .proc_handler = proc_dointvec 581 }, 582 { 583 .procname = "warnings", 584 .data = &net_msg_warn, 585 .maxlen = sizeof(int), 586 .mode = 0644, 587 .proc_handler = proc_dointvec 588 }, 589 { 590 .procname = "max_skb_frags", 591 .data = &net_hotdata.sysctl_max_skb_frags, 592 .maxlen = sizeof(int), 593 .mode = 0644, 594 .proc_handler = proc_dointvec_minmax, 595 .extra1 = SYSCTL_ONE, 596 .extra2 = &max_skb_frags, 597 }, 598 { 599 .procname = "netdev_budget_usecs", 600 .data = &net_hotdata.netdev_budget_usecs, 601 .maxlen = sizeof(unsigned int), 602 .mode = 0644, 603 .proc_handler = proc_dointvec_minmax, 604 .extra1 = &netdev_budget_usecs_min, 605 }, 606 { 607 .procname = "fb_tunnels_only_for_init_net", 608 .data = &sysctl_fb_tunnels_only_for_init_net, 609 .maxlen = sizeof(int), 610 .mode = 0644, 611 .proc_handler = proc_dointvec_minmax, 612 .extra1 = SYSCTL_ZERO, 613 .extra2 = SYSCTL_TWO, 614 }, 615 { 616 .procname = "devconf_inherit_init_net", 617 .data = &sysctl_devconf_inherit_init_net, 618 .maxlen = sizeof(int), 619 .mode = 0644, 620 .proc_handler = proc_dointvec_minmax, 621 .extra1 = SYSCTL_ZERO, 622 .extra2 = SYSCTL_THREE, 623 }, 624 { 625 .procname = "high_order_alloc_disable", 626 .data = &net_high_order_alloc_disable_key.key, 627 .maxlen = sizeof(net_high_order_alloc_disable_key), 628 .mode = 0644, 629 .proc_handler = proc_do_static_key, 630 }, 631 { 632 .procname = "gro_normal_batch", 633 .data = &net_hotdata.gro_normal_batch, 634 .maxlen = sizeof(unsigned int), 635 .mode = 0644, 636 .proc_handler = proc_dointvec_minmax, 637 .extra1 = SYSCTL_ONE, 638 }, 639 { 640 .procname = "netdev_unregister_timeout_secs", 641 .data = &netdev_unregister_timeout_secs, 642 .maxlen = sizeof(unsigned int), 643 .mode = 0644, 644 .proc_handler = proc_dointvec_minmax, 645 .extra1 = SYSCTL_ONE, 646 .extra2 = &int_3600, 647 }, 648 { 649 .procname = "skb_defer_max", 650 .data = &net_hotdata.sysctl_skb_defer_max, 651 .maxlen = sizeof(unsigned int), 652 .mode = 0644, 653 .proc_handler = proc_dointvec_minmax, 654 .extra1 = SYSCTL_ZERO, 655 }, 656 }; 657 658 static struct ctl_table netns_core_table[] = { 659 #if IS_ENABLED(CONFIG_RPS) 660 { 661 .procname = "rps_default_mask", 662 .data = &init_net, 663 .mode = 0644, 664 .proc_handler = rps_default_mask_sysctl 665 }, 666 #endif 667 { 668 .procname = "somaxconn", 669 .data = &init_net.core.sysctl_somaxconn, 670 .maxlen = sizeof(int), 671 .mode = 0644, 672 .extra1 = SYSCTL_ZERO, 673 .proc_handler = proc_dointvec_minmax 674 }, 675 { 676 .procname = "optmem_max", 677 .data = &init_net.core.sysctl_optmem_max, 678 .maxlen = sizeof(int), 679 .mode = 0644, 680 .extra1 = SYSCTL_ZERO, 681 .proc_handler = proc_dointvec_minmax 682 }, 683 { 684 .procname = "txrehash", 685 .data = &init_net.core.sysctl_txrehash, 686 .maxlen = sizeof(u8), 687 .mode = 0644, 688 .extra1 = SYSCTL_ZERO, 689 .extra2 = SYSCTL_ONE, 690 .proc_handler = proc_dou8vec_minmax, 691 }, 692 { 693 .procname = "txq_reselection_ms", 694 .data = &init_net.core.sysctl_txq_reselection, 695 .maxlen = sizeof(int), 696 .mode = 0644, 697 .proc_handler = proc_dointvec_ms_jiffies, 698 }, 699 { 700 .procname = "tstamp_allow_data", 701 .data = &init_net.core.sysctl_tstamp_allow_data, 702 .maxlen = sizeof(u8), 703 .mode = 0644, 704 .proc_handler = proc_dou8vec_minmax, 705 .extra1 = SYSCTL_ZERO, 706 .extra2 = SYSCTL_ONE 707 }, 708 { 709 .procname = "bypass_prot_mem", 710 .data = &init_net.core.sysctl_bypass_prot_mem, 711 .maxlen = sizeof(u8), 712 .mode = 0644, 713 .proc_handler = proc_dou8vec_minmax, 714 .extra1 = SYSCTL_ZERO, 715 .extra2 = SYSCTL_ONE 716 }, 717 /* sysctl_core_net_init() will set the values after this 718 * to readonly in network namespaces 719 */ 720 { 721 .procname = "wmem_max", 722 .data = &sysctl_wmem_max, 723 .maxlen = sizeof(int), 724 .mode = 0644, 725 .proc_handler = proc_dointvec_minmax, 726 .extra1 = &min_sndbuf, 727 }, 728 { 729 .procname = "rmem_max", 730 .data = &sysctl_rmem_max, 731 .maxlen = sizeof(int), 732 .mode = 0644, 733 .proc_handler = proc_dointvec_minmax, 734 .extra1 = &min_rcvbuf, 735 }, 736 { 737 .procname = "wmem_default", 738 .data = &sysctl_wmem_default, 739 .maxlen = sizeof(int), 740 .mode = 0644, 741 .proc_handler = proc_dointvec_minmax, 742 .extra1 = &min_sndbuf, 743 }, 744 { 745 .procname = "rmem_default", 746 .data = &sysctl_rmem_default, 747 .maxlen = sizeof(int), 748 .mode = 0644, 749 .proc_handler = proc_dointvec_minmax, 750 .extra1 = &min_rcvbuf, 751 }, 752 }; 753 754 static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) 755 { 756 /* fallback tunnels for initns only */ 757 if (!strncmp(str, "initns", 6)) 758 sysctl_fb_tunnels_only_for_init_net = 1; 759 /* no fallback tunnels anywhere */ 760 else if (!strncmp(str, "none", 4)) 761 sysctl_fb_tunnels_only_for_init_net = 2; 762 763 return 1; 764 } 765 __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); 766 767 static __net_init int sysctl_core_net_init(struct net *net) 768 { 769 size_t table_size = ARRAY_SIZE(netns_core_table); 770 struct ctl_table *tbl; 771 772 tbl = netns_core_table; 773 if (!net_eq(net, &init_net)) { 774 int i; 775 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 776 if (tbl == NULL) 777 goto err_dup; 778 779 for (i = 0; i < table_size; ++i) { 780 if (tbl[i].data == &sysctl_wmem_max) 781 break; 782 783 tbl[i].data += (char *)net - (char *)&init_net; 784 } 785 for (; i < table_size; ++i) 786 tbl[i].mode &= ~0222; 787 } 788 789 net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); 790 if (net->core.sysctl_hdr == NULL) 791 goto err_reg; 792 793 return 0; 794 795 err_reg: 796 if (tbl != netns_core_table) 797 kfree(tbl); 798 err_dup: 799 return -ENOMEM; 800 } 801 802 static __net_exit void sysctl_core_net_exit(struct net *net) 803 { 804 const struct ctl_table *tbl; 805 806 tbl = net->core.sysctl_hdr->ctl_table_arg; 807 unregister_net_sysctl_table(net->core.sysctl_hdr); 808 BUG_ON(tbl == netns_core_table); 809 #if IS_ENABLED(CONFIG_RPS) 810 kfree(net->core.rps_default_mask); 811 #endif 812 kfree(tbl); 813 } 814 815 static __net_initdata struct pernet_operations sysctl_core_ops = { 816 .init = sysctl_core_net_init, 817 .exit = sysctl_core_net_exit, 818 }; 819 820 static __init int sysctl_core_init(void) 821 { 822 register_net_sysctl(&init_net, "net/core", net_core_table); 823 return register_pernet_subsys(&sysctl_core_ops); 824 } 825 826 fs_initcall(sysctl_core_init); 827