1 // SPDX-License-Identifier: GPL-2.0
2 /* -*- linux-c -*-
3 * sysctl_net_core.c: sysctl interface to net core subsystem.
4 *
5 * Begun April 1, 1996, Mike Shaver.
6 * Added /proc/sys/net/core directory entry (empty =) ). [MS]
7 */
8
9 #include <linux/filter.h>
10 #include <linux/mm.h>
11 #include <linux/sysctl.h>
12 #include <linux/module.h>
13 #include <linux/socket.h>
14 #include <linux/netdevice.h>
15 #include <linux/ratelimit.h>
16 #include <linux/vmalloc.h>
17 #include <linux/init.h>
18 #include <linux/slab.h>
19 #include <linux/sched/isolation.h>
20 #include <linux/hex.h>
21
22 #include <net/ip.h>
23 #include <net/sock.h>
24 #include <net/net_ratelimit.h>
25 #include <net/busy_poll.h>
26 #include <net/pkt_sched.h>
27 #include <net/hotdata.h>
28 #include <net/proto_memory.h>
29 #include <net/rps.h>
30
31 #include "dev.h"
32 #include "net-sysfs.h"
33
34 static int int_3600 = 3600;
35 static int min_sndbuf = SOCK_MIN_SNDBUF;
36 static int min_rcvbuf = SOCK_MIN_RCVBUF;
37 static int max_skb_frags = MAX_SKB_FRAGS;
38 static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE;
39 static int netdev_budget_usecs_min = 2 * USEC_PER_SEC / HZ;
40
41 static int net_msg_warn; /* Unused, but still a sysctl */
42
43 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
44 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
45
46 /* 0 - Keep current behavior:
47 * IPv4: inherit all current settings from init_net
48 * IPv6: reset all settings to default
49 * 1 - Both inherit all current settings from init_net
50 * 2 - Both reset all settings to default
51 * 3 - Both inherit all settings from current netns
52 */
53 int sysctl_devconf_inherit_init_net __read_mostly;
54 EXPORT_SYMBOL(sysctl_devconf_inherit_init_net);
55
56 #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS)
dump_cpumask(void * buffer,size_t * lenp,loff_t * ppos,struct cpumask * mask)57 static int dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos,
58 struct cpumask *mask)
59 {
60 char *kbuf;
61 int len;
62
63 if (*ppos || !*lenp) {
64 *lenp = 0;
65 return 0;
66 }
67
68 /* CPUs are displayed as a hex bitmap + a comma between each groups of 8
69 * nibbles (except the last one which has a newline instead).
70 * Guesstimate the buffer size at the group granularity level.
71 */
72 len = min(DIV_ROUND_UP(nr_cpumask_bits, 32) * (8 + 1), *lenp);
73 kbuf = kmalloc(len, GFP_KERNEL);
74 if (!kbuf) {
75 *lenp = 0;
76 return -ENOMEM;
77 }
78
79 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask));
80 if (!len) {
81 *lenp = 0;
82 goto free_buf;
83 }
84
85 /* scnprintf writes a trailing null char not counted in the returned
86 * length, override it with a newline.
87 */
88 kbuf[len++] = '\n';
89 memcpy(buffer, kbuf, len);
90 *lenp = len;
91 *ppos += len;
92
93 free_buf:
94 kfree(kbuf);
95 return 0;
96 }
97 #endif
98
99 #ifdef CONFIG_RPS
100
101 DEFINE_MUTEX(rps_default_mask_mutex);
102
rps_default_mask_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)103 static int rps_default_mask_sysctl(const struct ctl_table *table, int write,
104 void *buffer, size_t *lenp, loff_t *ppos)
105 {
106 struct net *net = (struct net *)table->data;
107 struct cpumask *mask;
108 int err = 0;
109
110 mutex_lock(&rps_default_mask_mutex);
111 mask = net->core.rps_default_mask;
112 if (write) {
113 if (!mask) {
114 mask = kzalloc(cpumask_size(), GFP_KERNEL);
115 net->core.rps_default_mask = mask;
116 }
117 err = -ENOMEM;
118 if (!mask)
119 goto done;
120
121 err = cpumask_parse(buffer, mask);
122 if (err)
123 goto done;
124
125 err = rps_cpumask_housekeeping(mask);
126 if (err)
127 goto done;
128 } else {
129 err = dump_cpumask(buffer, lenp, ppos,
130 mask ?: cpu_none_mask);
131 }
132
133 done:
134 mutex_unlock(&rps_default_mask_mutex);
135 return err;
136 }
137
rps_sock_flow_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)138 static int rps_sock_flow_sysctl(const struct ctl_table *table, int write,
139 void *buffer, size_t *lenp, loff_t *ppos)
140 {
141 unsigned int orig_size, size;
142 int ret, i;
143 struct ctl_table tmp = {
144 .data = &size,
145 .maxlen = sizeof(size),
146 .mode = table->mode
147 };
148 struct rps_sock_flow_table *orig_sock_table, *sock_table;
149 static DEFINE_MUTEX(sock_flow_mutex);
150
151 mutex_lock(&sock_flow_mutex);
152
153 orig_sock_table = rcu_dereference_protected(
154 net_hotdata.rps_sock_flow_table,
155 lockdep_is_held(&sock_flow_mutex));
156 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
157
158 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
159
160 if (write) {
161 if (size) {
162 if (size > 1<<29) {
163 /* Enforce limit to prevent overflow */
164 mutex_unlock(&sock_flow_mutex);
165 return -EINVAL;
166 }
167 size = roundup_pow_of_two(size);
168 if (size != orig_size) {
169 sock_table =
170 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
171 if (!sock_table) {
172 mutex_unlock(&sock_flow_mutex);
173 return -ENOMEM;
174 }
175 net_hotdata.rps_cpu_mask =
176 roundup_pow_of_two(nr_cpu_ids) - 1;
177 sock_table->mask = size - 1;
178 } else
179 sock_table = orig_sock_table;
180
181 for (i = 0; i < size; i++)
182 sock_table->ents[i] = RPS_NO_CPU;
183 } else
184 sock_table = NULL;
185
186 if (sock_table != orig_sock_table) {
187 rcu_assign_pointer(net_hotdata.rps_sock_flow_table,
188 sock_table);
189 if (sock_table) {
190 static_branch_inc(&rps_needed);
191 static_branch_inc(&rfs_needed);
192 }
193 if (orig_sock_table) {
194 static_branch_dec(&rps_needed);
195 static_branch_dec(&rfs_needed);
196 kvfree_rcu(orig_sock_table, rcu);
197 }
198 }
199 }
200
201 mutex_unlock(&sock_flow_mutex);
202
203 return ret;
204 }
205 #endif /* CONFIG_RPS */
206
207 #ifdef CONFIG_NET_FLOW_LIMIT
208 static DEFINE_MUTEX(flow_limit_update_mutex);
209
flow_limit_cpu_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)210 static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write,
211 void *buffer, size_t *lenp, loff_t *ppos)
212 {
213 struct sd_flow_limit *cur;
214 struct softnet_data *sd;
215 cpumask_var_t mask;
216 int i, len, ret = 0;
217
218 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
219 return -ENOMEM;
220
221 if (write) {
222 ret = cpumask_parse(buffer, mask);
223 if (ret)
224 goto done;
225
226 mutex_lock(&flow_limit_update_mutex);
227 len = sizeof(*cur) + netdev_flow_limit_table_len;
228 for_each_possible_cpu(i) {
229 sd = &per_cpu(softnet_data, i);
230 cur = rcu_dereference_protected(sd->flow_limit,
231 lockdep_is_held(&flow_limit_update_mutex));
232 if (cur && !cpumask_test_cpu(i, mask)) {
233 RCU_INIT_POINTER(sd->flow_limit, NULL);
234 kfree_rcu(cur, rcu);
235 } else if (!cur && cpumask_test_cpu(i, mask)) {
236 cur = kzalloc_node(len, GFP_KERNEL,
237 cpu_to_node(i));
238 if (!cur) {
239 /* not unwinding previous changes */
240 ret = -ENOMEM;
241 goto write_unlock;
242 }
243 cur->log_buckets = ilog2(netdev_flow_limit_table_len);
244 rcu_assign_pointer(sd->flow_limit, cur);
245 }
246 }
247 write_unlock:
248 mutex_unlock(&flow_limit_update_mutex);
249 } else {
250 cpumask_clear(mask);
251 rcu_read_lock();
252 for_each_possible_cpu(i) {
253 sd = &per_cpu(softnet_data, i);
254 if (rcu_dereference(sd->flow_limit))
255 cpumask_set_cpu(i, mask);
256 }
257 rcu_read_unlock();
258
259 ret = dump_cpumask(buffer, lenp, ppos, mask);
260 }
261
262 done:
263 free_cpumask_var(mask);
264 return ret;
265 }
266
flow_limit_table_len_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)267 static int flow_limit_table_len_sysctl(const struct ctl_table *table, int write,
268 void *buffer, size_t *lenp, loff_t *ppos)
269 {
270 unsigned int old, *ptr;
271 int ret;
272
273 mutex_lock(&flow_limit_update_mutex);
274
275 ptr = table->data;
276 old = *ptr;
277 ret = proc_dointvec(table, write, buffer, lenp, ppos);
278 if (!ret && write && !is_power_of_2(*ptr)) {
279 *ptr = old;
280 ret = -EINVAL;
281 }
282
283 mutex_unlock(&flow_limit_update_mutex);
284 return ret;
285 }
286 #endif /* CONFIG_NET_FLOW_LIMIT */
287
288 #ifdef CONFIG_NET_SCHED
set_default_qdisc(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)289 static int set_default_qdisc(const struct ctl_table *table, int write,
290 void *buffer, size_t *lenp, loff_t *ppos)
291 {
292 char id[IFNAMSIZ];
293 struct ctl_table tbl = {
294 .data = id,
295 .maxlen = IFNAMSIZ,
296 };
297 int ret;
298
299 qdisc_get_default(id, IFNAMSIZ);
300
301 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
302 if (write && ret == 0)
303 ret = qdisc_set_default(id);
304 return ret;
305 }
306 #endif
307
proc_do_dev_weight(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)308 static int proc_do_dev_weight(const struct ctl_table *table, int write,
309 void *buffer, size_t *lenp, loff_t *ppos)
310 {
311 static DEFINE_MUTEX(dev_weight_mutex);
312 int ret, weight;
313
314 mutex_lock(&dev_weight_mutex);
315 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
316 if (!ret && write) {
317 weight = READ_ONCE(weight_p);
318 WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias);
319 WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias);
320 }
321 mutex_unlock(&dev_weight_mutex);
322
323 return ret;
324 }
325
proc_do_rss_key(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)326 static int proc_do_rss_key(const struct ctl_table *table, int write,
327 void *buffer, size_t *lenp, loff_t *ppos)
328 {
329 char buf[NETDEV_RSS_KEY_LEN * 3];
330 struct ctl_table fake_table;
331 char *pos = buf;
332
333 for (int i = 0; i < NETDEV_RSS_KEY_LEN; i++) {
334 pos = hex_byte_pack(pos, netdev_rss_key[i]);
335 *pos++ = ':';
336 }
337 *(--pos) = 0;
338
339 fake_table.data = buf;
340 fake_table.maxlen = sizeof(buf);
341 return proc_dostring(&fake_table, write, buffer, lenp, ppos);
342 }
343
344 #ifdef CONFIG_BPF_JIT
proc_dointvec_minmax_bpf_enable(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)345 static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write,
346 void *buffer, size_t *lenp,
347 loff_t *ppos)
348 {
349 int ret, jit_enable = *(int *)table->data;
350 int min = *(int *)table->extra1;
351 int max = *(int *)table->extra2;
352 struct ctl_table tmp = *table;
353
354 if (write && !capable(CAP_SYS_ADMIN))
355 return -EPERM;
356
357 tmp.data = &jit_enable;
358 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
359 if (write && !ret) {
360 if (jit_enable < 2 ||
361 (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) {
362 *(int *)table->data = jit_enable;
363 if (jit_enable == 2)
364 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n");
365 } else {
366 ret = -EPERM;
367 }
368 }
369
370 if (write && ret && min == max)
371 pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n");
372
373 return ret;
374 }
375
376 # ifdef CONFIG_HAVE_EBPF_JIT
377 static int
proc_dointvec_minmax_bpf_restricted(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)378 proc_dointvec_minmax_bpf_restricted(const struct ctl_table *table, int write,
379 void *buffer, size_t *lenp, loff_t *ppos)
380 {
381 if (!capable(CAP_SYS_ADMIN))
382 return -EPERM;
383
384 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
385 }
386 # endif /* CONFIG_HAVE_EBPF_JIT */
387
388 static int
proc_dolongvec_minmax_bpf_restricted(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)389 proc_dolongvec_minmax_bpf_restricted(const struct ctl_table *table, int write,
390 void *buffer, size_t *lenp, loff_t *ppos)
391 {
392 if (!capable(CAP_SYS_ADMIN))
393 return -EPERM;
394
395 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
396 }
397 #endif
398
399 static struct ctl_table net_core_table[] = {
400 {
401 .procname = "mem_pcpu_rsv",
402 .data = &net_hotdata.sysctl_mem_pcpu_rsv,
403 .maxlen = sizeof(int),
404 .mode = 0644,
405 .proc_handler = proc_dointvec_minmax,
406 .extra1 = &min_mem_pcpu_rsv,
407 },
408 {
409 .procname = "dev_weight",
410 .data = &weight_p,
411 .maxlen = sizeof(int),
412 .mode = 0644,
413 .proc_handler = proc_do_dev_weight,
414 .extra1 = SYSCTL_ONE,
415 },
416 {
417 .procname = "dev_weight_rx_bias",
418 .data = &dev_weight_rx_bias,
419 .maxlen = sizeof(int),
420 .mode = 0644,
421 .proc_handler = proc_do_dev_weight,
422 .extra1 = SYSCTL_ONE,
423 },
424 {
425 .procname = "dev_weight_tx_bias",
426 .data = &dev_weight_tx_bias,
427 .maxlen = sizeof(int),
428 .mode = 0644,
429 .proc_handler = proc_do_dev_weight,
430 .extra1 = SYSCTL_ONE,
431 },
432 {
433 .procname = "netdev_max_backlog",
434 .data = &net_hotdata.max_backlog,
435 .maxlen = sizeof(int),
436 .mode = 0644,
437 .proc_handler = proc_dointvec
438 },
439 {
440 .procname = "qdisc_max_burst",
441 .data = &net_hotdata.qdisc_max_burst,
442 .maxlen = sizeof(int),
443 .mode = 0644,
444 .proc_handler = proc_dointvec
445 },
446 {
447 .procname = "netdev_rss_key",
448 .data = &netdev_rss_key,
449 .maxlen = sizeof(int),
450 .mode = 0444,
451 .proc_handler = proc_do_rss_key,
452 },
453 #ifdef CONFIG_BPF_JIT
454 {
455 .procname = "bpf_jit_enable",
456 .data = &bpf_jit_enable,
457 .maxlen = sizeof(int),
458 .mode = 0644,
459 .proc_handler = proc_dointvec_minmax_bpf_enable,
460 # ifdef CONFIG_BPF_JIT_ALWAYS_ON
461 .extra1 = SYSCTL_ONE,
462 .extra2 = SYSCTL_ONE,
463 # else
464 .extra1 = SYSCTL_ZERO,
465 .extra2 = SYSCTL_TWO,
466 # endif
467 },
468 # ifdef CONFIG_HAVE_EBPF_JIT
469 {
470 .procname = "bpf_jit_harden",
471 .data = &bpf_jit_harden,
472 .maxlen = sizeof(int),
473 .mode = 0600,
474 .proc_handler = proc_dointvec_minmax_bpf_restricted,
475 .extra1 = SYSCTL_ZERO,
476 .extra2 = SYSCTL_TWO,
477 },
478 {
479 .procname = "bpf_jit_kallsyms",
480 .data = &bpf_jit_kallsyms,
481 .maxlen = sizeof(int),
482 .mode = 0600,
483 .proc_handler = proc_dointvec_minmax_bpf_restricted,
484 .extra1 = SYSCTL_ZERO,
485 .extra2 = SYSCTL_ONE,
486 },
487 # endif
488 {
489 .procname = "bpf_jit_limit",
490 .data = &bpf_jit_limit,
491 .maxlen = sizeof(long),
492 .mode = 0600,
493 .proc_handler = proc_dolongvec_minmax_bpf_restricted,
494 .extra1 = SYSCTL_LONG_ONE,
495 .extra2 = &bpf_jit_limit_max,
496 },
497 #endif
498 {
499 .procname = "netdev_tstamp_prequeue",
500 .data = &net_hotdata.tstamp_prequeue,
501 .maxlen = sizeof(int),
502 .mode = 0644,
503 .proc_handler = proc_dointvec
504 },
505 {
506 .procname = "message_cost",
507 .data = &net_ratelimit_state.interval,
508 .maxlen = sizeof(int),
509 .mode = 0644,
510 .proc_handler = proc_dointvec_jiffies,
511 },
512 {
513 .procname = "message_burst",
514 .data = &net_ratelimit_state.burst,
515 .maxlen = sizeof(int),
516 .mode = 0644,
517 .proc_handler = proc_dointvec,
518 },
519 #ifdef CONFIG_RPS
520 {
521 .procname = "rps_sock_flow_entries",
522 .maxlen = sizeof(int),
523 .mode = 0644,
524 .proc_handler = rps_sock_flow_sysctl
525 },
526 #endif
527 #ifdef CONFIG_NET_FLOW_LIMIT
528 {
529 .procname = "flow_limit_cpu_bitmap",
530 .mode = 0644,
531 .proc_handler = flow_limit_cpu_sysctl
532 },
533 {
534 .procname = "flow_limit_table_len",
535 .data = &netdev_flow_limit_table_len,
536 .maxlen = sizeof(int),
537 .mode = 0644,
538 .proc_handler = flow_limit_table_len_sysctl
539 },
540 #endif /* CONFIG_NET_FLOW_LIMIT */
541 #ifdef CONFIG_NET_RX_BUSY_POLL
542 {
543 .procname = "busy_poll",
544 .data = &sysctl_net_busy_poll,
545 .maxlen = sizeof(unsigned int),
546 .mode = 0644,
547 .proc_handler = proc_dointvec_minmax,
548 .extra1 = SYSCTL_ZERO,
549 },
550 {
551 .procname = "busy_read",
552 .data = &sysctl_net_busy_read,
553 .maxlen = sizeof(unsigned int),
554 .mode = 0644,
555 .proc_handler = proc_dointvec_minmax,
556 .extra1 = SYSCTL_ZERO,
557 },
558 #endif
559 #ifdef CONFIG_NET_SCHED
560 {
561 .procname = "default_qdisc",
562 .mode = 0644,
563 .maxlen = IFNAMSIZ,
564 .proc_handler = set_default_qdisc
565 },
566 #endif
567 {
568 .procname = "netdev_budget",
569 .data = &net_hotdata.netdev_budget,
570 .maxlen = sizeof(int),
571 .mode = 0644,
572 .proc_handler = proc_dointvec
573 },
574 {
575 .procname = "warnings",
576 .data = &net_msg_warn,
577 .maxlen = sizeof(int),
578 .mode = 0644,
579 .proc_handler = proc_dointvec
580 },
581 {
582 .procname = "max_skb_frags",
583 .data = &net_hotdata.sysctl_max_skb_frags,
584 .maxlen = sizeof(int),
585 .mode = 0644,
586 .proc_handler = proc_dointvec_minmax,
587 .extra1 = SYSCTL_ONE,
588 .extra2 = &max_skb_frags,
589 },
590 {
591 .procname = "netdev_budget_usecs",
592 .data = &net_hotdata.netdev_budget_usecs,
593 .maxlen = sizeof(unsigned int),
594 .mode = 0644,
595 .proc_handler = proc_dointvec_minmax,
596 .extra1 = &netdev_budget_usecs_min,
597 },
598 {
599 .procname = "fb_tunnels_only_for_init_net",
600 .data = &sysctl_fb_tunnels_only_for_init_net,
601 .maxlen = sizeof(int),
602 .mode = 0644,
603 .proc_handler = proc_dointvec_minmax,
604 .extra1 = SYSCTL_ZERO,
605 .extra2 = SYSCTL_TWO,
606 },
607 {
608 .procname = "devconf_inherit_init_net",
609 .data = &sysctl_devconf_inherit_init_net,
610 .maxlen = sizeof(int),
611 .mode = 0644,
612 .proc_handler = proc_dointvec_minmax,
613 .extra1 = SYSCTL_ZERO,
614 .extra2 = SYSCTL_THREE,
615 },
616 {
617 .procname = "high_order_alloc_disable",
618 .data = &net_high_order_alloc_disable_key.key,
619 .maxlen = sizeof(net_high_order_alloc_disable_key),
620 .mode = 0644,
621 .proc_handler = proc_do_static_key,
622 },
623 {
624 .procname = "gro_normal_batch",
625 .data = &net_hotdata.gro_normal_batch,
626 .maxlen = sizeof(unsigned int),
627 .mode = 0644,
628 .proc_handler = proc_dointvec_minmax,
629 .extra1 = SYSCTL_ONE,
630 },
631 {
632 .procname = "netdev_unregister_timeout_secs",
633 .data = &netdev_unregister_timeout_secs,
634 .maxlen = sizeof(unsigned int),
635 .mode = 0644,
636 .proc_handler = proc_dointvec_minmax,
637 .extra1 = SYSCTL_ONE,
638 .extra2 = &int_3600,
639 },
640 {
641 .procname = "skb_defer_max",
642 .data = &net_hotdata.sysctl_skb_defer_max,
643 .maxlen = sizeof(unsigned int),
644 .mode = 0644,
645 .proc_handler = proc_dointvec_minmax,
646 .extra1 = SYSCTL_ZERO,
647 },
648 };
649
650 static struct ctl_table netns_core_table[] = {
651 #if IS_ENABLED(CONFIG_RPS)
652 {
653 .procname = "rps_default_mask",
654 .data = &init_net,
655 .mode = 0644,
656 .proc_handler = rps_default_mask_sysctl
657 },
658 #endif
659 {
660 .procname = "somaxconn",
661 .data = &init_net.core.sysctl_somaxconn,
662 .maxlen = sizeof(int),
663 .mode = 0644,
664 .extra1 = SYSCTL_ZERO,
665 .proc_handler = proc_dointvec_minmax
666 },
667 {
668 .procname = "optmem_max",
669 .data = &init_net.core.sysctl_optmem_max,
670 .maxlen = sizeof(int),
671 .mode = 0644,
672 .extra1 = SYSCTL_ZERO,
673 .proc_handler = proc_dointvec_minmax
674 },
675 {
676 .procname = "txrehash",
677 .data = &init_net.core.sysctl_txrehash,
678 .maxlen = sizeof(u8),
679 .mode = 0644,
680 .extra1 = SYSCTL_ZERO,
681 .extra2 = SYSCTL_ONE,
682 .proc_handler = proc_dou8vec_minmax,
683 },
684 {
685 .procname = "txq_reselection_ms",
686 .data = &init_net.core.sysctl_txq_reselection,
687 .maxlen = sizeof(int),
688 .mode = 0644,
689 .proc_handler = proc_dointvec_ms_jiffies,
690 },
691 {
692 .procname = "tstamp_allow_data",
693 .data = &init_net.core.sysctl_tstamp_allow_data,
694 .maxlen = sizeof(u8),
695 .mode = 0644,
696 .proc_handler = proc_dou8vec_minmax,
697 .extra1 = SYSCTL_ZERO,
698 .extra2 = SYSCTL_ONE
699 },
700 {
701 .procname = "bypass_prot_mem",
702 .data = &init_net.core.sysctl_bypass_prot_mem,
703 .maxlen = sizeof(u8),
704 .mode = 0644,
705 .proc_handler = proc_dou8vec_minmax,
706 .extra1 = SYSCTL_ZERO,
707 .extra2 = SYSCTL_ONE
708 },
709 /* sysctl_core_net_init() will set the values after this
710 * to readonly in network namespaces
711 */
712 {
713 .procname = "wmem_max",
714 .data = &sysctl_wmem_max,
715 .maxlen = sizeof(int),
716 .mode = 0644,
717 .proc_handler = proc_dointvec_minmax,
718 .extra1 = &min_sndbuf,
719 },
720 {
721 .procname = "rmem_max",
722 .data = &sysctl_rmem_max,
723 .maxlen = sizeof(int),
724 .mode = 0644,
725 .proc_handler = proc_dointvec_minmax,
726 .extra1 = &min_rcvbuf,
727 },
728 {
729 .procname = "wmem_default",
730 .data = &sysctl_wmem_default,
731 .maxlen = sizeof(int),
732 .mode = 0644,
733 .proc_handler = proc_dointvec_minmax,
734 .extra1 = &min_sndbuf,
735 },
736 {
737 .procname = "rmem_default",
738 .data = &sysctl_rmem_default,
739 .maxlen = sizeof(int),
740 .mode = 0644,
741 .proc_handler = proc_dointvec_minmax,
742 .extra1 = &min_rcvbuf,
743 },
744 };
745
fb_tunnels_only_for_init_net_sysctl_setup(char * str)746 static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str)
747 {
748 /* fallback tunnels for initns only */
749 if (!strncmp(str, "initns", 6))
750 sysctl_fb_tunnels_only_for_init_net = 1;
751 /* no fallback tunnels anywhere */
752 else if (!strncmp(str, "none", 4))
753 sysctl_fb_tunnels_only_for_init_net = 2;
754
755 return 1;
756 }
757 __setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup);
758
sysctl_core_net_init(struct net * net)759 static __net_init int sysctl_core_net_init(struct net *net)
760 {
761 size_t table_size = ARRAY_SIZE(netns_core_table);
762 struct ctl_table *tbl;
763
764 tbl = netns_core_table;
765 if (!net_eq(net, &init_net)) {
766 int i;
767 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL);
768 if (tbl == NULL)
769 goto err_dup;
770
771 for (i = 0; i < table_size; ++i) {
772 if (tbl[i].data == &sysctl_wmem_max)
773 break;
774
775 tbl[i].data += (char *)net - (char *)&init_net;
776 }
777 for (; i < table_size; ++i)
778 tbl[i].mode &= ~0222;
779 }
780
781 net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size);
782 if (net->core.sysctl_hdr == NULL)
783 goto err_reg;
784
785 return 0;
786
787 err_reg:
788 if (tbl != netns_core_table)
789 kfree(tbl);
790 err_dup:
791 return -ENOMEM;
792 }
793
sysctl_core_net_exit(struct net * net)794 static __net_exit void sysctl_core_net_exit(struct net *net)
795 {
796 const struct ctl_table *tbl;
797
798 tbl = net->core.sysctl_hdr->ctl_table_arg;
799 unregister_net_sysctl_table(net->core.sysctl_hdr);
800 BUG_ON(tbl == netns_core_table);
801 #if IS_ENABLED(CONFIG_RPS)
802 kfree(net->core.rps_default_mask);
803 #endif
804 kfree(tbl);
805 }
806
807 static __net_initdata struct pernet_operations sysctl_core_ops = {
808 .init = sysctl_core_net_init,
809 .exit = sysctl_core_net_exit,
810 };
811
sysctl_core_init(void)812 static __init int sysctl_core_init(void)
813 {
814 register_net_sysctl(&init_net, "net/core", net_core_table);
815 return register_pernet_subsys(&sysctl_core_ops);
816 }
817
818 fs_initcall(sysctl_core_init);
819