1 /* -*- linux-c -*- 2 * sysctl_net_core.c: sysctl interface to net core subsystem. 3 * 4 * Begun April 1, 1996, Mike Shaver. 5 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/sysctl.h> 10 #include <linux/module.h> 11 #include <linux/socket.h> 12 #include <linux/netdevice.h> 13 #include <linux/ratelimit.h> 14 #include <linux/vmalloc.h> 15 #include <linux/init.h> 16 #include <linux/slab.h> 17 #include <linux/kmemleak.h> 18 19 #include <net/ip.h> 20 #include <net/sock.h> 21 #include <net/net_ratelimit.h> 22 #include <net/busy_poll.h> 23 24 static int one = 1; 25 26 #ifdef CONFIG_RPS 27 static int rps_sock_flow_sysctl(struct ctl_table *table, int write, 28 void __user *buffer, size_t *lenp, loff_t *ppos) 29 { 30 unsigned int orig_size, size; 31 int ret, i; 32 struct ctl_table tmp = { 33 .data = &size, 34 .maxlen = sizeof(size), 35 .mode = table->mode 36 }; 37 struct rps_sock_flow_table *orig_sock_table, *sock_table; 38 static DEFINE_MUTEX(sock_flow_mutex); 39 40 mutex_lock(&sock_flow_mutex); 41 42 orig_sock_table = rcu_dereference_protected(rps_sock_flow_table, 43 lockdep_is_held(&sock_flow_mutex)); 44 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 45 46 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 47 48 if (write) { 49 if (size) { 50 if (size > 1<<30) { 51 /* Enforce limit to prevent overflow */ 52 mutex_unlock(&sock_flow_mutex); 53 return -EINVAL; 54 } 55 size = roundup_pow_of_two(size); 56 if (size != orig_size) { 57 sock_table = 58 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 59 if (!sock_table) { 60 mutex_unlock(&sock_flow_mutex); 61 return -ENOMEM; 62 } 63 64 sock_table->mask = size - 1; 65 } else 66 sock_table = orig_sock_table; 67 68 for (i = 0; i < size; i++) 69 sock_table->ents[i] = RPS_NO_CPU; 70 } else 71 sock_table = NULL; 72 73 if (sock_table != orig_sock_table) { 74 rcu_assign_pointer(rps_sock_flow_table, sock_table); 75 if (sock_table) 76 static_key_slow_inc(&rps_needed); 77 if (orig_sock_table) { 78 static_key_slow_dec(&rps_needed); 79 synchronize_rcu(); 80 vfree(orig_sock_table); 81 } 82 } 83 } 84 85 mutex_unlock(&sock_flow_mutex); 86 87 return ret; 88 } 89 #endif /* CONFIG_RPS */ 90 91 #ifdef CONFIG_NET_FLOW_LIMIT 92 static DEFINE_MUTEX(flow_limit_update_mutex); 93 94 static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, 95 void __user *buffer, size_t *lenp, 96 loff_t *ppos) 97 { 98 struct sd_flow_limit *cur; 99 struct softnet_data *sd; 100 cpumask_var_t mask; 101 int i, len, ret = 0; 102 103 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 104 return -ENOMEM; 105 106 if (write) { 107 ret = cpumask_parse_user(buffer, *lenp, mask); 108 if (ret) 109 goto done; 110 111 mutex_lock(&flow_limit_update_mutex); 112 len = sizeof(*cur) + netdev_flow_limit_table_len; 113 for_each_possible_cpu(i) { 114 sd = &per_cpu(softnet_data, i); 115 cur = rcu_dereference_protected(sd->flow_limit, 116 lockdep_is_held(&flow_limit_update_mutex)); 117 if (cur && !cpumask_test_cpu(i, mask)) { 118 RCU_INIT_POINTER(sd->flow_limit, NULL); 119 synchronize_rcu(); 120 kfree(cur); 121 } else if (!cur && cpumask_test_cpu(i, mask)) { 122 cur = kzalloc(len, GFP_KERNEL); 123 if (!cur) { 124 /* not unwinding previous changes */ 125 ret = -ENOMEM; 126 goto write_unlock; 127 } 128 cur->num_buckets = netdev_flow_limit_table_len; 129 rcu_assign_pointer(sd->flow_limit, cur); 130 } 131 } 132 write_unlock: 133 mutex_unlock(&flow_limit_update_mutex); 134 } else { 135 char kbuf[128]; 136 137 if (*ppos || !*lenp) { 138 *lenp = 0; 139 goto done; 140 } 141 142 cpumask_clear(mask); 143 rcu_read_lock(); 144 for_each_possible_cpu(i) { 145 sd = &per_cpu(softnet_data, i); 146 if (rcu_dereference(sd->flow_limit)) 147 cpumask_set_cpu(i, mask); 148 } 149 rcu_read_unlock(); 150 151 len = min(sizeof(kbuf) - 1, *lenp); 152 len = cpumask_scnprintf(kbuf, len, mask); 153 if (!len) { 154 *lenp = 0; 155 goto done; 156 } 157 if (len < *lenp) 158 kbuf[len++] = '\n'; 159 if (copy_to_user(buffer, kbuf, len)) { 160 ret = -EFAULT; 161 goto done; 162 } 163 *lenp = len; 164 *ppos += len; 165 } 166 167 done: 168 free_cpumask_var(mask); 169 return ret; 170 } 171 172 static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, 173 void __user *buffer, size_t *lenp, 174 loff_t *ppos) 175 { 176 unsigned int old, *ptr; 177 int ret; 178 179 mutex_lock(&flow_limit_update_mutex); 180 181 ptr = table->data; 182 old = *ptr; 183 ret = proc_dointvec(table, write, buffer, lenp, ppos); 184 if (!ret && write && !is_power_of_2(*ptr)) { 185 *ptr = old; 186 ret = -EINVAL; 187 } 188 189 mutex_unlock(&flow_limit_update_mutex); 190 return ret; 191 } 192 #endif /* CONFIG_NET_FLOW_LIMIT */ 193 194 static struct ctl_table net_core_table[] = { 195 #ifdef CONFIG_NET 196 { 197 .procname = "wmem_max", 198 .data = &sysctl_wmem_max, 199 .maxlen = sizeof(int), 200 .mode = 0644, 201 .proc_handler = proc_dointvec_minmax, 202 .extra1 = &one, 203 }, 204 { 205 .procname = "rmem_max", 206 .data = &sysctl_rmem_max, 207 .maxlen = sizeof(int), 208 .mode = 0644, 209 .proc_handler = proc_dointvec_minmax, 210 .extra1 = &one, 211 }, 212 { 213 .procname = "wmem_default", 214 .data = &sysctl_wmem_default, 215 .maxlen = sizeof(int), 216 .mode = 0644, 217 .proc_handler = proc_dointvec_minmax, 218 .extra1 = &one, 219 }, 220 { 221 .procname = "rmem_default", 222 .data = &sysctl_rmem_default, 223 .maxlen = sizeof(int), 224 .mode = 0644, 225 .proc_handler = proc_dointvec_minmax, 226 .extra1 = &one, 227 }, 228 { 229 .procname = "dev_weight", 230 .data = &weight_p, 231 .maxlen = sizeof(int), 232 .mode = 0644, 233 .proc_handler = proc_dointvec 234 }, 235 { 236 .procname = "netdev_max_backlog", 237 .data = &netdev_max_backlog, 238 .maxlen = sizeof(int), 239 .mode = 0644, 240 .proc_handler = proc_dointvec 241 }, 242 #ifdef CONFIG_BPF_JIT 243 { 244 .procname = "bpf_jit_enable", 245 .data = &bpf_jit_enable, 246 .maxlen = sizeof(int), 247 .mode = 0644, 248 .proc_handler = proc_dointvec 249 }, 250 #endif 251 { 252 .procname = "netdev_tstamp_prequeue", 253 .data = &netdev_tstamp_prequeue, 254 .maxlen = sizeof(int), 255 .mode = 0644, 256 .proc_handler = proc_dointvec 257 }, 258 { 259 .procname = "message_cost", 260 .data = &net_ratelimit_state.interval, 261 .maxlen = sizeof(int), 262 .mode = 0644, 263 .proc_handler = proc_dointvec_jiffies, 264 }, 265 { 266 .procname = "message_burst", 267 .data = &net_ratelimit_state.burst, 268 .maxlen = sizeof(int), 269 .mode = 0644, 270 .proc_handler = proc_dointvec, 271 }, 272 { 273 .procname = "optmem_max", 274 .data = &sysctl_optmem_max, 275 .maxlen = sizeof(int), 276 .mode = 0644, 277 .proc_handler = proc_dointvec 278 }, 279 #ifdef CONFIG_RPS 280 { 281 .procname = "rps_sock_flow_entries", 282 .maxlen = sizeof(int), 283 .mode = 0644, 284 .proc_handler = rps_sock_flow_sysctl 285 }, 286 #endif 287 #ifdef CONFIG_NET_FLOW_LIMIT 288 { 289 .procname = "flow_limit_cpu_bitmap", 290 .mode = 0644, 291 .proc_handler = flow_limit_cpu_sysctl 292 }, 293 { 294 .procname = "flow_limit_table_len", 295 .data = &netdev_flow_limit_table_len, 296 .maxlen = sizeof(int), 297 .mode = 0644, 298 .proc_handler = flow_limit_table_len_sysctl 299 }, 300 #endif /* CONFIG_NET_FLOW_LIMIT */ 301 #ifdef CONFIG_NET_LL_RX_POLL 302 { 303 .procname = "busy_poll", 304 .data = &sysctl_net_busy_poll, 305 .maxlen = sizeof(unsigned int), 306 .mode = 0644, 307 .proc_handler = proc_dointvec 308 }, 309 { 310 .procname = "busy_read", 311 .data = &sysctl_net_busy_read, 312 .maxlen = sizeof(unsigned int), 313 .mode = 0644, 314 .proc_handler = proc_dointvec 315 }, 316 # 317 #endif 318 #endif /* CONFIG_NET */ 319 { 320 .procname = "netdev_budget", 321 .data = &netdev_budget, 322 .maxlen = sizeof(int), 323 .mode = 0644, 324 .proc_handler = proc_dointvec 325 }, 326 { 327 .procname = "warnings", 328 .data = &net_msg_warn, 329 .maxlen = sizeof(int), 330 .mode = 0644, 331 .proc_handler = proc_dointvec 332 }, 333 { } 334 }; 335 336 static struct ctl_table netns_core_table[] = { 337 { 338 .procname = "somaxconn", 339 .data = &init_net.core.sysctl_somaxconn, 340 .maxlen = sizeof(int), 341 .mode = 0644, 342 .proc_handler = proc_dointvec 343 }, 344 { } 345 }; 346 347 static __net_init int sysctl_core_net_init(struct net *net) 348 { 349 struct ctl_table *tbl; 350 351 net->core.sysctl_somaxconn = SOMAXCONN; 352 353 tbl = netns_core_table; 354 if (!net_eq(net, &init_net)) { 355 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 356 if (tbl == NULL) 357 goto err_dup; 358 359 tbl[0].data = &net->core.sysctl_somaxconn; 360 361 /* Don't export any sysctls to unprivileged users */ 362 if (net->user_ns != &init_user_ns) { 363 tbl[0].procname = NULL; 364 } 365 } 366 367 net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); 368 if (net->core.sysctl_hdr == NULL) 369 goto err_reg; 370 371 return 0; 372 373 err_reg: 374 if (tbl != netns_core_table) 375 kfree(tbl); 376 err_dup: 377 return -ENOMEM; 378 } 379 380 static __net_exit void sysctl_core_net_exit(struct net *net) 381 { 382 struct ctl_table *tbl; 383 384 tbl = net->core.sysctl_hdr->ctl_table_arg; 385 unregister_net_sysctl_table(net->core.sysctl_hdr); 386 BUG_ON(tbl == netns_core_table); 387 kfree(tbl); 388 } 389 390 static __net_initdata struct pernet_operations sysctl_core_ops = { 391 .init = sysctl_core_net_init, 392 .exit = sysctl_core_net_exit, 393 }; 394 395 static __init int sysctl_core_init(void) 396 { 397 register_net_sysctl(&init_net, "net/core", net_core_table); 398 return register_pernet_subsys(&sysctl_core_ops); 399 } 400 401 fs_initcall(sysctl_core_init); 402