1 // SPDX-License-Identifier: GPL-2.0 2 /* -*- linux-c -*- 3 * sysctl_net_core.c: sysctl interface to net core subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 7 */ 8 9 #include <linux/filter.h> 10 #include <linux/mm.h> 11 #include <linux/sysctl.h> 12 #include <linux/module.h> 13 #include <linux/socket.h> 14 #include <linux/netdevice.h> 15 #include <linux/ratelimit.h> 16 #include <linux/vmalloc.h> 17 #include <linux/init.h> 18 #include <linux/slab.h> 19 #include <linux/sched/isolation.h> 20 21 #include <net/ip.h> 22 #include <net/sock.h> 23 #include <net/net_ratelimit.h> 24 #include <net/busy_poll.h> 25 #include <net/pkt_sched.h> 26 #include <net/hotdata.h> 27 #include <net/proto_memory.h> 28 #include <net/rps.h> 29 30 #include "dev.h" 31 32 static int int_3600 = 3600; 33 static int min_sndbuf = SOCK_MIN_SNDBUF; 34 static int min_rcvbuf = SOCK_MIN_RCVBUF; 35 static int max_skb_frags = MAX_SKB_FRAGS; 36 static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; 37 38 static int net_msg_warn; /* Unused, but still a sysctl */ 39 40 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; 41 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); 42 43 /* 0 - Keep current behavior: 44 * IPv4: inherit all current settings from init_net 45 * IPv6: reset all settings to default 46 * 1 - Both inherit all current settings from init_net 47 * 2 - Both reset all settings to default 48 * 3 - Both inherit all settings from current netns 49 */ 50 int sysctl_devconf_inherit_init_net __read_mostly; 51 EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); 52 53 #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) 54 static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, 55 struct cpumask *mask) 56 { 57 char kbuf[128]; 58 int len; 59 60 if (*ppos || !*lenp) { 61 *lenp = 0; 62 return; 63 } 64 65 len = min(sizeof(kbuf) - 1, *lenp); 66 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 67 if (!len) { 68 *lenp = 0; 69 return; 70 } 71 72 if (len < *lenp) 73 kbuf[len++] = '\n'; 74 memcpy(buffer, kbuf, len); 75 *lenp = len; 76 *ppos += len; 77 } 78 #endif 79 80 #ifdef CONFIG_RPS 81 82 static struct cpumask *rps_default_mask_cow_alloc(struct net *net) 83 { 84 struct cpumask *rps_default_mask; 85 86 if (net->core.rps_default_mask) 87 return net->core.rps_default_mask; 88 89 rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL); 90 if (!rps_default_mask) 91 return NULL; 92 93 /* pairs with READ_ONCE in rx_queue_default_mask() */ 94 WRITE_ONCE(net->core.rps_default_mask, rps_default_mask); 95 return rps_default_mask; 96 } 97 98 static int rps_default_mask_sysctl(struct ctl_table *table, int write, 99 void *buffer, size_t *lenp, loff_t *ppos) 100 { 101 struct net *net = (struct net *)table->data; 102 int err = 0; 103 104 rtnl_lock(); 105 if (write) { 106 struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net); 107 108 err = -ENOMEM; 109 if (!rps_default_mask) 110 goto done; 111 112 err = cpumask_parse(buffer, rps_default_mask); 113 if (err) 114 goto done; 115 116 err = rps_cpumask_housekeeping(rps_default_mask); 117 if (err) 118 goto done; 119 } else { 120 dump_cpumask(buffer, lenp, ppos, 121 net->core.rps_default_mask ? : cpu_none_mask); 122 } 123 124 done: 125 rtnl_unlock(); 126 return err; 127 } 128 129 static int rps_sock_flow_sysctl(struct ctl_table *table, int write, 130 void *buffer, size_t *lenp, loff_t *ppos) 131 { 132 unsigned int orig_size, size; 133 int ret, i; 134 struct ctl_table tmp = { 135 .data = &size, 136 .maxlen = sizeof(size), 137 .mode = table->mode 138 }; 139 struct rps_sock_flow_table *orig_sock_table, *sock_table; 140 static DEFINE_MUTEX(sock_flow_mutex); 141 142 mutex_lock(&sock_flow_mutex); 143 144 orig_sock_table = rcu_dereference_protected( 145 net_hotdata.rps_sock_flow_table, 146 lockdep_is_held(&sock_flow_mutex)); 147 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 148 149 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 150 151 if (write) { 152 if (size) { 153 if (size > 1<<29) { 154 /* Enforce limit to prevent overflow */ 155 mutex_unlock(&sock_flow_mutex); 156 return -EINVAL; 157 } 158 size = roundup_pow_of_two(size); 159 if (size != orig_size) { 160 sock_table = 161 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 162 if (!sock_table) { 163 mutex_unlock(&sock_flow_mutex); 164 return -ENOMEM; 165 } 166 net_hotdata.rps_cpu_mask = 167 roundup_pow_of_two(nr_cpu_ids) - 1; 168 sock_table->mask = size - 1; 169 } else 170 sock_table = orig_sock_table; 171 172 for (i = 0; i < size; i++) 173 sock_table->ents[i] = RPS_NO_CPU; 174 } else 175 sock_table = NULL; 176 177 if (sock_table != orig_sock_table) { 178 rcu_assign_pointer(net_hotdata.rps_sock_flow_table, 179 sock_table); 180 if (sock_table) { 181 static_branch_inc(&rps_needed); 182 static_branch_inc(&rfs_needed); 183 } 184 if (orig_sock_table) { 185 static_branch_dec(&rps_needed); 186 static_branch_dec(&rfs_needed); 187 kvfree_rcu_mightsleep(orig_sock_table); 188 } 189 } 190 } 191 192 mutex_unlock(&sock_flow_mutex); 193 194 return ret; 195 } 196 #endif /* CONFIG_RPS */ 197 198 #ifdef CONFIG_NET_FLOW_LIMIT 199 static DEFINE_MUTEX(flow_limit_update_mutex); 200 201 static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, 202 void *buffer, size_t *lenp, loff_t *ppos) 203 { 204 struct sd_flow_limit *cur; 205 struct softnet_data *sd; 206 cpumask_var_t mask; 207 int i, len, ret = 0; 208 209 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 210 return -ENOMEM; 211 212 if (write) { 213 ret = cpumask_parse(buffer, mask); 214 if (ret) 215 goto done; 216 217 mutex_lock(&flow_limit_update_mutex); 218 len = sizeof(*cur) + netdev_flow_limit_table_len; 219 for_each_possible_cpu(i) { 220 sd = &per_cpu(softnet_data, i); 221 cur = rcu_dereference_protected(sd->flow_limit, 222 lockdep_is_held(&flow_limit_update_mutex)); 223 if (cur && !cpumask_test_cpu(i, mask)) { 224 RCU_INIT_POINTER(sd->flow_limit, NULL); 225 kfree_rcu_mightsleep(cur); 226 } else if (!cur && cpumask_test_cpu(i, mask)) { 227 cur = kzalloc_node(len, GFP_KERNEL, 228 cpu_to_node(i)); 229 if (!cur) { 230 /* not unwinding previous changes */ 231 ret = -ENOMEM; 232 goto write_unlock; 233 } 234 cur->num_buckets = netdev_flow_limit_table_len; 235 rcu_assign_pointer(sd->flow_limit, cur); 236 } 237 } 238 write_unlock: 239 mutex_unlock(&flow_limit_update_mutex); 240 } else { 241 cpumask_clear(mask); 242 rcu_read_lock(); 243 for_each_possible_cpu(i) { 244 sd = &per_cpu(softnet_data, i); 245 if (rcu_dereference(sd->flow_limit)) 246 cpumask_set_cpu(i, mask); 247 } 248 rcu_read_unlock(); 249 250 dump_cpumask(buffer, lenp, ppos, mask); 251 } 252 253 done: 254 free_cpumask_var(mask); 255 return ret; 256 } 257 258 static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, 259 void *buffer, size_t *lenp, loff_t *ppos) 260 { 261 unsigned int old, *ptr; 262 int ret; 263 264 mutex_lock(&flow_limit_update_mutex); 265 266 ptr = table->data; 267 old = *ptr; 268 ret = proc_dointvec(table, write, buffer, lenp, ppos); 269 if (!ret && write && !is_power_of_2(*ptr)) { 270 *ptr = old; 271 ret = -EINVAL; 272 } 273 274 mutex_unlock(&flow_limit_update_mutex); 275 return ret; 276 } 277 #endif /* CONFIG_NET_FLOW_LIMIT */ 278 279 #ifdef CONFIG_NET_SCHED 280 static int set_default_qdisc(struct ctl_table *table, int write, 281 void *buffer, size_t *lenp, loff_t *ppos) 282 { 283 char id[IFNAMSIZ]; 284 struct ctl_table tbl = { 285 .data = id, 286 .maxlen = IFNAMSIZ, 287 }; 288 int ret; 289 290 qdisc_get_default(id, IFNAMSIZ); 291 292 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 293 if (write && ret == 0) 294 ret = qdisc_set_default(id); 295 return ret; 296 } 297 #endif 298 299 static int proc_do_dev_weight(struct ctl_table *table, int write, 300 void *buffer, size_t *lenp, loff_t *ppos) 301 { 302 static DEFINE_MUTEX(dev_weight_mutex); 303 int ret, weight; 304 305 mutex_lock(&dev_weight_mutex); 306 ret = proc_dointvec(table, write, buffer, lenp, ppos); 307 if (!ret && write) { 308 weight = READ_ONCE(weight_p); 309 WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); 310 WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); 311 } 312 mutex_unlock(&dev_weight_mutex); 313 314 return ret; 315 } 316 317 static int proc_do_rss_key(struct ctl_table *table, int write, 318 void *buffer, size_t *lenp, loff_t *ppos) 319 { 320 struct ctl_table fake_table; 321 char buf[NETDEV_RSS_KEY_LEN * 3]; 322 323 snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); 324 fake_table.data = buf; 325 fake_table.maxlen = sizeof(buf); 326 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 327 } 328 329 #ifdef CONFIG_BPF_JIT 330 static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, 331 void *buffer, size_t *lenp, 332 loff_t *ppos) 333 { 334 int ret, jit_enable = *(int *)table->data; 335 int min = *(int *)table->extra1; 336 int max = *(int *)table->extra2; 337 struct ctl_table tmp = *table; 338 339 if (write && !capable(CAP_SYS_ADMIN)) 340 return -EPERM; 341 342 tmp.data = &jit_enable; 343 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 344 if (write && !ret) { 345 if (jit_enable < 2 || 346 (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { 347 *(int *)table->data = jit_enable; 348 if (jit_enable == 2) 349 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 350 } else { 351 ret = -EPERM; 352 } 353 } 354 355 if (write && ret && min == max) 356 pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); 357 358 return ret; 359 } 360 361 # ifdef CONFIG_HAVE_EBPF_JIT 362 static int 363 proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, 364 void *buffer, size_t *lenp, loff_t *ppos) 365 { 366 if (!capable(CAP_SYS_ADMIN)) 367 return -EPERM; 368 369 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 370 } 371 # endif /* CONFIG_HAVE_EBPF_JIT */ 372 373 static int 374 proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, 375 void *buffer, size_t *lenp, loff_t *ppos) 376 { 377 if (!capable(CAP_SYS_ADMIN)) 378 return -EPERM; 379 380 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 381 } 382 #endif 383 384 static struct ctl_table net_core_table[] = { 385 { 386 .procname = "wmem_max", 387 .data = &sysctl_wmem_max, 388 .maxlen = sizeof(int), 389 .mode = 0644, 390 .proc_handler = proc_dointvec_minmax, 391 .extra1 = &min_sndbuf, 392 }, 393 { 394 .procname = "rmem_max", 395 .data = &sysctl_rmem_max, 396 .maxlen = sizeof(int), 397 .mode = 0644, 398 .proc_handler = proc_dointvec_minmax, 399 .extra1 = &min_rcvbuf, 400 }, 401 { 402 .procname = "wmem_default", 403 .data = &sysctl_wmem_default, 404 .maxlen = sizeof(int), 405 .mode = 0644, 406 .proc_handler = proc_dointvec_minmax, 407 .extra1 = &min_sndbuf, 408 }, 409 { 410 .procname = "rmem_default", 411 .data = &sysctl_rmem_default, 412 .maxlen = sizeof(int), 413 .mode = 0644, 414 .proc_handler = proc_dointvec_minmax, 415 .extra1 = &min_rcvbuf, 416 }, 417 { 418 .procname = "mem_pcpu_rsv", 419 .data = &net_hotdata.sysctl_mem_pcpu_rsv, 420 .maxlen = sizeof(int), 421 .mode = 0644, 422 .proc_handler = proc_dointvec_minmax, 423 .extra1 = &min_mem_pcpu_rsv, 424 }, 425 { 426 .procname = "dev_weight", 427 .data = &weight_p, 428 .maxlen = sizeof(int), 429 .mode = 0644, 430 .proc_handler = proc_do_dev_weight, 431 }, 432 { 433 .procname = "dev_weight_rx_bias", 434 .data = &dev_weight_rx_bias, 435 .maxlen = sizeof(int), 436 .mode = 0644, 437 .proc_handler = proc_do_dev_weight, 438 }, 439 { 440 .procname = "dev_weight_tx_bias", 441 .data = &dev_weight_tx_bias, 442 .maxlen = sizeof(int), 443 .mode = 0644, 444 .proc_handler = proc_do_dev_weight, 445 }, 446 { 447 .procname = "netdev_max_backlog", 448 .data = &net_hotdata.max_backlog, 449 .maxlen = sizeof(int), 450 .mode = 0644, 451 .proc_handler = proc_dointvec 452 }, 453 { 454 .procname = "netdev_rss_key", 455 .data = &netdev_rss_key, 456 .maxlen = sizeof(int), 457 .mode = 0444, 458 .proc_handler = proc_do_rss_key, 459 }, 460 #ifdef CONFIG_BPF_JIT 461 { 462 .procname = "bpf_jit_enable", 463 .data = &bpf_jit_enable, 464 .maxlen = sizeof(int), 465 .mode = 0644, 466 .proc_handler = proc_dointvec_minmax_bpf_enable, 467 # ifdef CONFIG_BPF_JIT_ALWAYS_ON 468 .extra1 = SYSCTL_ONE, 469 .extra2 = SYSCTL_ONE, 470 # else 471 .extra1 = SYSCTL_ZERO, 472 .extra2 = SYSCTL_TWO, 473 # endif 474 }, 475 # ifdef CONFIG_HAVE_EBPF_JIT 476 { 477 .procname = "bpf_jit_harden", 478 .data = &bpf_jit_harden, 479 .maxlen = sizeof(int), 480 .mode = 0600, 481 .proc_handler = proc_dointvec_minmax_bpf_restricted, 482 .extra1 = SYSCTL_ZERO, 483 .extra2 = SYSCTL_TWO, 484 }, 485 { 486 .procname = "bpf_jit_kallsyms", 487 .data = &bpf_jit_kallsyms, 488 .maxlen = sizeof(int), 489 .mode = 0600, 490 .proc_handler = proc_dointvec_minmax_bpf_restricted, 491 .extra1 = SYSCTL_ZERO, 492 .extra2 = SYSCTL_ONE, 493 }, 494 # endif 495 { 496 .procname = "bpf_jit_limit", 497 .data = &bpf_jit_limit, 498 .maxlen = sizeof(long), 499 .mode = 0600, 500 .proc_handler = proc_dolongvec_minmax_bpf_restricted, 501 .extra1 = SYSCTL_LONG_ONE, 502 .extra2 = &bpf_jit_limit_max, 503 }, 504 #endif 505 { 506 .procname = "netdev_tstamp_prequeue", 507 .data = &net_hotdata.tstamp_prequeue, 508 .maxlen = sizeof(int), 509 .mode = 0644, 510 .proc_handler = proc_dointvec 511 }, 512 { 513 .procname = "message_cost", 514 .data = &net_ratelimit_state.interval, 515 .maxlen = sizeof(int), 516 .mode = 0644, 517 .proc_handler = proc_dointvec_jiffies, 518 }, 519 { 520 .procname = "message_burst", 521 .data = &net_ratelimit_state.burst, 522 .maxlen = sizeof(int), 523 .mode = 0644, 524 .proc_handler = proc_dointvec, 525 }, 526 { 527 .procname = "tstamp_allow_data", 528 .data = &sysctl_tstamp_allow_data, 529 .maxlen = sizeof(int), 530 .mode = 0644, 531 .proc_handler = proc_dointvec_minmax, 532 .extra1 = SYSCTL_ZERO, 533 .extra2 = SYSCTL_ONE 534 }, 535 #ifdef CONFIG_RPS 536 { 537 .procname = "rps_sock_flow_entries", 538 .maxlen = sizeof(int), 539 .mode = 0644, 540 .proc_handler = rps_sock_flow_sysctl 541 }, 542 #endif 543 #ifdef CONFIG_NET_FLOW_LIMIT 544 { 545 .procname = "flow_limit_cpu_bitmap", 546 .mode = 0644, 547 .proc_handler = flow_limit_cpu_sysctl 548 }, 549 { 550 .procname = "flow_limit_table_len", 551 .data = &netdev_flow_limit_table_len, 552 .maxlen = sizeof(int), 553 .mode = 0644, 554 .proc_handler = flow_limit_table_len_sysctl 555 }, 556 #endif /* CONFIG_NET_FLOW_LIMIT */ 557 #ifdef CONFIG_NET_RX_BUSY_POLL 558 { 559 .procname = "busy_poll", 560 .data = &sysctl_net_busy_poll, 561 .maxlen = sizeof(unsigned int), 562 .mode = 0644, 563 .proc_handler = proc_dointvec_minmax, 564 .extra1 = SYSCTL_ZERO, 565 }, 566 { 567 .procname = "busy_read", 568 .data = &sysctl_net_busy_read, 569 .maxlen = sizeof(unsigned int), 570 .mode = 0644, 571 .proc_handler = proc_dointvec_minmax, 572 .extra1 = SYSCTL_ZERO, 573 }, 574 #endif 575 #ifdef CONFIG_NET_SCHED 576 { 577 .procname = "default_qdisc", 578 .mode = 0644, 579 .maxlen = IFNAMSIZ, 580 .proc_handler = set_default_qdisc 581 }, 582 #endif 583 { 584 .procname = "netdev_budget", 585 .data = &net_hotdata.netdev_budget, 586 .maxlen = sizeof(int), 587 .mode = 0644, 588 .proc_handler = proc_dointvec 589 }, 590 { 591 .procname = "warnings", 592 .data = &net_msg_warn, 593 .maxlen = sizeof(int), 594 .mode = 0644, 595 .proc_handler = proc_dointvec 596 }, 597 { 598 .procname = "max_skb_frags", 599 .data = &net_hotdata.sysctl_max_skb_frags, 600 .maxlen = sizeof(int), 601 .mode = 0644, 602 .proc_handler = proc_dointvec_minmax, 603 .extra1 = SYSCTL_ONE, 604 .extra2 = &max_skb_frags, 605 }, 606 { 607 .procname = "netdev_budget_usecs", 608 .data = &net_hotdata.netdev_budget_usecs, 609 .maxlen = sizeof(unsigned int), 610 .mode = 0644, 611 .proc_handler = proc_dointvec_minmax, 612 .extra1 = SYSCTL_ZERO, 613 }, 614 { 615 .procname = "fb_tunnels_only_for_init_net", 616 .data = &sysctl_fb_tunnels_only_for_init_net, 617 .maxlen = sizeof(int), 618 .mode = 0644, 619 .proc_handler = proc_dointvec_minmax, 620 .extra1 = SYSCTL_ZERO, 621 .extra2 = SYSCTL_TWO, 622 }, 623 { 624 .procname = "devconf_inherit_init_net", 625 .data = &sysctl_devconf_inherit_init_net, 626 .maxlen = sizeof(int), 627 .mode = 0644, 628 .proc_handler = proc_dointvec_minmax, 629 .extra1 = SYSCTL_ZERO, 630 .extra2 = SYSCTL_THREE, 631 }, 632 { 633 .procname = "high_order_alloc_disable", 634 .data = &net_high_order_alloc_disable_key.key, 635 .maxlen = sizeof(net_high_order_alloc_disable_key), 636 .mode = 0644, 637 .proc_handler = proc_do_static_key, 638 }, 639 { 640 .procname = "gro_normal_batch", 641 .data = &net_hotdata.gro_normal_batch, 642 .maxlen = sizeof(unsigned int), 643 .mode = 0644, 644 .proc_handler = proc_dointvec_minmax, 645 .extra1 = SYSCTL_ONE, 646 }, 647 { 648 .procname = "netdev_unregister_timeout_secs", 649 .data = &netdev_unregister_timeout_secs, 650 .maxlen = sizeof(unsigned int), 651 .mode = 0644, 652 .proc_handler = proc_dointvec_minmax, 653 .extra1 = SYSCTL_ONE, 654 .extra2 = &int_3600, 655 }, 656 { 657 .procname = "skb_defer_max", 658 .data = &net_hotdata.sysctl_skb_defer_max, 659 .maxlen = sizeof(unsigned int), 660 .mode = 0644, 661 .proc_handler = proc_dointvec_minmax, 662 .extra1 = SYSCTL_ZERO, 663 }, 664 }; 665 666 static struct ctl_table netns_core_table[] = { 667 #if IS_ENABLED(CONFIG_RPS) 668 { 669 .procname = "rps_default_mask", 670 .data = &init_net, 671 .mode = 0644, 672 .proc_handler = rps_default_mask_sysctl 673 }, 674 #endif 675 { 676 .procname = "somaxconn", 677 .data = &init_net.core.sysctl_somaxconn, 678 .maxlen = sizeof(int), 679 .mode = 0644, 680 .extra1 = SYSCTL_ZERO, 681 .proc_handler = proc_dointvec_minmax 682 }, 683 { 684 .procname = "optmem_max", 685 .data = &init_net.core.sysctl_optmem_max, 686 .maxlen = sizeof(int), 687 .mode = 0644, 688 .extra1 = SYSCTL_ZERO, 689 .proc_handler = proc_dointvec_minmax 690 }, 691 { 692 .procname = "txrehash", 693 .data = &init_net.core.sysctl_txrehash, 694 .maxlen = sizeof(u8), 695 .mode = 0644, 696 .extra1 = SYSCTL_ZERO, 697 .extra2 = SYSCTL_ONE, 698 .proc_handler = proc_dou8vec_minmax, 699 }, 700 }; 701 702 static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) 703 { 704 /* fallback tunnels for initns only */ 705 if (!strncmp(str, "initns", 6)) 706 sysctl_fb_tunnels_only_for_init_net = 1; 707 /* no fallback tunnels anywhere */ 708 else if (!strncmp(str, "none", 4)) 709 sysctl_fb_tunnels_only_for_init_net = 2; 710 711 return 1; 712 } 713 __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); 714 715 static __net_init int sysctl_core_net_init(struct net *net) 716 { 717 size_t table_size = ARRAY_SIZE(netns_core_table); 718 struct ctl_table *tbl; 719 720 tbl = netns_core_table; 721 if (!net_eq(net, &init_net)) { 722 int i; 723 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 724 if (tbl == NULL) 725 goto err_dup; 726 727 for (i = 0; i < table_size; ++i) 728 tbl[i].data += (char *)net - (char *)&init_net; 729 } 730 731 net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); 732 if (net->core.sysctl_hdr == NULL) 733 goto err_reg; 734 735 return 0; 736 737 err_reg: 738 if (tbl != netns_core_table) 739 kfree(tbl); 740 err_dup: 741 return -ENOMEM; 742 } 743 744 static __net_exit void sysctl_core_net_exit(struct net *net) 745 { 746 const struct ctl_table *tbl; 747 748 tbl = net->core.sysctl_hdr->ctl_table_arg; 749 unregister_net_sysctl_table(net->core.sysctl_hdr); 750 BUG_ON(tbl == netns_core_table); 751 #if IS_ENABLED(CONFIG_RPS) 752 kfree(net->core.rps_default_mask); 753 #endif 754 kfree(tbl); 755 } 756 757 static __net_initdata struct pernet_operations sysctl_core_ops = { 758 .init = sysctl_core_net_init, 759 .exit = sysctl_core_net_exit, 760 }; 761 762 static __init int sysctl_core_init(void) 763 { 764 register_net_sysctl(&init_net, "net/core", net_core_table); 765 return register_pernet_subsys(&sysctl_core_ops); 766 } 767 768 fs_initcall(sysctl_core_init); 769