xref: /linux/kernel/sysctl.c (revision ac6a0cf6716bb46813d0161024c66c2af66e53d1)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/security.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/kmemcheck.h>
31 #include <linux/smp_lock.h>
32 #include <linux/fs.h>
33 #include <linux/init.h>
34 #include <linux/kernel.h>
35 #include <linux/kobject.h>
36 #include <linux/net.h>
37 #include <linux/sysrq.h>
38 #include <linux/highuid.h>
39 #include <linux/writeback.h>
40 #include <linux/hugetlb.h>
41 #include <linux/initrd.h>
42 #include <linux/key.h>
43 #include <linux/times.h>
44 #include <linux/limits.h>
45 #include <linux/dcache.h>
46 #include <linux/syscalls.h>
47 #include <linux/vmstat.h>
48 #include <linux/nfs_fs.h>
49 #include <linux/acpi.h>
50 #include <linux/reboot.h>
51 #include <linux/ftrace.h>
52 #include <linux/security.h>
53 #include <linux/slow-work.h>
54 #include <linux/perf_counter.h>
55 
56 #include <asm/uaccess.h>
57 #include <asm/processor.h>
58 
59 #ifdef CONFIG_X86
60 #include <asm/nmi.h>
61 #include <asm/stacktrace.h>
62 #include <asm/io.h>
63 #endif
64 
65 static int deprecated_sysctl_warning(struct __sysctl_args *args);
66 
67 #if defined(CONFIG_SYSCTL)
68 
69 /* External variables not in a header file. */
70 extern int C_A_D;
71 extern int print_fatal_signals;
72 extern int sysctl_overcommit_memory;
73 extern int sysctl_overcommit_ratio;
74 extern int sysctl_panic_on_oom;
75 extern int sysctl_oom_kill_allocating_task;
76 extern int sysctl_oom_dump_tasks;
77 extern int max_threads;
78 extern int core_uses_pid;
79 extern int suid_dumpable;
80 extern char core_pattern[];
81 extern int pid_max;
82 extern int min_free_kbytes;
83 extern int pid_max_min, pid_max_max;
84 extern int sysctl_drop_caches;
85 extern int percpu_pagelist_fraction;
86 extern int compat_log;
87 extern int latencytop_enabled;
88 extern int sysctl_nr_open_min, sysctl_nr_open_max;
89 #ifndef CONFIG_MMU
90 extern int sysctl_nr_trim_pages;
91 #endif
92 #ifdef CONFIG_RCU_TORTURE_TEST
93 extern int rcutorture_runnable;
94 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
95 
96 /* Constants used for minimum and  maximum */
97 #ifdef CONFIG_DETECT_SOFTLOCKUP
98 static int sixty = 60;
99 static int neg_one = -1;
100 #endif
101 
102 static int zero;
103 static int __maybe_unused one = 1;
104 static int __maybe_unused two = 2;
105 static unsigned long one_ul = 1;
106 static int one_hundred = 100;
107 
108 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
109 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
110 
111 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
112 static int maxolduid = 65535;
113 static int minolduid;
114 static int min_percpu_pagelist_fract = 8;
115 
116 static int ngroups_max = NGROUPS_MAX;
117 
118 #ifdef CONFIG_MODULES
119 extern char modprobe_path[];
120 extern int modules_disabled;
121 #endif
122 #ifdef CONFIG_CHR_DEV_SG
123 extern int sg_big_buff;
124 #endif
125 
126 #ifdef CONFIG_SPARC
127 #include <asm/system.h>
128 #endif
129 
130 #ifdef CONFIG_SPARC64
131 extern int sysctl_tsb_ratio;
132 #endif
133 
134 #ifdef __hppa__
135 extern int pwrsw_enabled;
136 extern int unaligned_enabled;
137 #endif
138 
139 #ifdef CONFIG_S390
140 #ifdef CONFIG_MATHEMU
141 extern int sysctl_ieee_emulation_warnings;
142 #endif
143 extern int sysctl_userprocess_debug;
144 extern int spin_retry;
145 #endif
146 
147 #ifdef CONFIG_BSD_PROCESS_ACCT
148 extern int acct_parm[];
149 #endif
150 
151 #ifdef CONFIG_IA64
152 extern int no_unaligned_warning;
153 extern int unaligned_dump_stack;
154 #endif
155 
156 #ifdef CONFIG_RT_MUTEXES
157 extern int max_lock_depth;
158 #endif
159 
160 #ifdef CONFIG_PROC_SYSCTL
161 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
162 		  void __user *buffer, size_t *lenp, loff_t *ppos);
163 static int proc_taint(struct ctl_table *table, int write, struct file *filp,
164 			       void __user *buffer, size_t *lenp, loff_t *ppos);
165 #endif
166 
167 static struct ctl_table root_table[];
168 static struct ctl_table_root sysctl_table_root;
169 static struct ctl_table_header root_table_header = {
170 	.count = 1,
171 	.ctl_table = root_table,
172 	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
173 	.root = &sysctl_table_root,
174 	.set = &sysctl_table_root.default_set,
175 };
176 static struct ctl_table_root sysctl_table_root = {
177 	.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
178 	.default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
179 };
180 
181 static struct ctl_table kern_table[];
182 static struct ctl_table vm_table[];
183 static struct ctl_table fs_table[];
184 static struct ctl_table debug_table[];
185 static struct ctl_table dev_table[];
186 extern struct ctl_table random_table[];
187 #ifdef CONFIG_INOTIFY_USER
188 extern struct ctl_table inotify_table[];
189 #endif
190 #ifdef CONFIG_EPOLL
191 extern struct ctl_table epoll_table[];
192 #endif
193 
194 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
195 int sysctl_legacy_va_layout;
196 #endif
197 
198 extern int prove_locking;
199 extern int lock_stat;
200 
201 /* The default sysctl tables: */
202 
203 static struct ctl_table root_table[] = {
204 	{
205 		.ctl_name	= CTL_KERN,
206 		.procname	= "kernel",
207 		.mode		= 0555,
208 		.child		= kern_table,
209 	},
210 	{
211 		.ctl_name	= CTL_VM,
212 		.procname	= "vm",
213 		.mode		= 0555,
214 		.child		= vm_table,
215 	},
216 	{
217 		.ctl_name	= CTL_FS,
218 		.procname	= "fs",
219 		.mode		= 0555,
220 		.child		= fs_table,
221 	},
222 	{
223 		.ctl_name	= CTL_DEBUG,
224 		.procname	= "debug",
225 		.mode		= 0555,
226 		.child		= debug_table,
227 	},
228 	{
229 		.ctl_name	= CTL_DEV,
230 		.procname	= "dev",
231 		.mode		= 0555,
232 		.child		= dev_table,
233 	},
234 /*
235  * NOTE: do not add new entries to this table unless you have read
236  * Documentation/sysctl/ctl_unnumbered.txt
237  */
238 	{ .ctl_name = 0 }
239 };
240 
241 #ifdef CONFIG_SCHED_DEBUG
242 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
243 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
244 static int min_wakeup_granularity_ns;			/* 0 usecs */
245 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
246 #endif
247 
248 static struct ctl_table kern_table[] = {
249 #ifdef CONFIG_SCHED_DEBUG
250 	{
251 		.ctl_name	= CTL_UNNUMBERED,
252 		.procname	= "sched_min_granularity_ns",
253 		.data		= &sysctl_sched_min_granularity,
254 		.maxlen		= sizeof(unsigned int),
255 		.mode		= 0644,
256 		.proc_handler	= &sched_nr_latency_handler,
257 		.strategy	= &sysctl_intvec,
258 		.extra1		= &min_sched_granularity_ns,
259 		.extra2		= &max_sched_granularity_ns,
260 	},
261 	{
262 		.ctl_name	= CTL_UNNUMBERED,
263 		.procname	= "sched_latency_ns",
264 		.data		= &sysctl_sched_latency,
265 		.maxlen		= sizeof(unsigned int),
266 		.mode		= 0644,
267 		.proc_handler	= &sched_nr_latency_handler,
268 		.strategy	= &sysctl_intvec,
269 		.extra1		= &min_sched_granularity_ns,
270 		.extra2		= &max_sched_granularity_ns,
271 	},
272 	{
273 		.ctl_name	= CTL_UNNUMBERED,
274 		.procname	= "sched_wakeup_granularity_ns",
275 		.data		= &sysctl_sched_wakeup_granularity,
276 		.maxlen		= sizeof(unsigned int),
277 		.mode		= 0644,
278 		.proc_handler	= &proc_dointvec_minmax,
279 		.strategy	= &sysctl_intvec,
280 		.extra1		= &min_wakeup_granularity_ns,
281 		.extra2		= &max_wakeup_granularity_ns,
282 	},
283 	{
284 		.ctl_name	= CTL_UNNUMBERED,
285 		.procname	= "sched_shares_ratelimit",
286 		.data		= &sysctl_sched_shares_ratelimit,
287 		.maxlen		= sizeof(unsigned int),
288 		.mode		= 0644,
289 		.proc_handler	= &proc_dointvec,
290 	},
291 	{
292 		.ctl_name	= CTL_UNNUMBERED,
293 		.procname	= "sched_shares_thresh",
294 		.data		= &sysctl_sched_shares_thresh,
295 		.maxlen		= sizeof(unsigned int),
296 		.mode		= 0644,
297 		.proc_handler	= &proc_dointvec_minmax,
298 		.strategy	= &sysctl_intvec,
299 		.extra1		= &zero,
300 	},
301 	{
302 		.ctl_name	= CTL_UNNUMBERED,
303 		.procname	= "sched_child_runs_first",
304 		.data		= &sysctl_sched_child_runs_first,
305 		.maxlen		= sizeof(unsigned int),
306 		.mode		= 0644,
307 		.proc_handler	= &proc_dointvec,
308 	},
309 	{
310 		.ctl_name	= CTL_UNNUMBERED,
311 		.procname	= "sched_features",
312 		.data		= &sysctl_sched_features,
313 		.maxlen		= sizeof(unsigned int),
314 		.mode		= 0644,
315 		.proc_handler	= &proc_dointvec,
316 	},
317 	{
318 		.ctl_name	= CTL_UNNUMBERED,
319 		.procname	= "sched_migration_cost",
320 		.data		= &sysctl_sched_migration_cost,
321 		.maxlen		= sizeof(unsigned int),
322 		.mode		= 0644,
323 		.proc_handler	= &proc_dointvec,
324 	},
325 	{
326 		.ctl_name	= CTL_UNNUMBERED,
327 		.procname	= "sched_nr_migrate",
328 		.data		= &sysctl_sched_nr_migrate,
329 		.maxlen		= sizeof(unsigned int),
330 		.mode		= 0644,
331 		.proc_handler	= &proc_dointvec,
332 	},
333 	{
334 		.ctl_name	= CTL_UNNUMBERED,
335 		.procname	= "timer_migration",
336 		.data		= &sysctl_timer_migration,
337 		.maxlen		= sizeof(unsigned int),
338 		.mode		= 0644,
339 		.proc_handler	= &proc_dointvec_minmax,
340 		.strategy	= &sysctl_intvec,
341 		.extra1		= &zero,
342 		.extra2		= &one,
343 	},
344 #endif
345 	{
346 		.ctl_name	= CTL_UNNUMBERED,
347 		.procname	= "sched_rt_period_us",
348 		.data		= &sysctl_sched_rt_period,
349 		.maxlen		= sizeof(unsigned int),
350 		.mode		= 0644,
351 		.proc_handler	= &sched_rt_handler,
352 	},
353 	{
354 		.ctl_name	= CTL_UNNUMBERED,
355 		.procname	= "sched_rt_runtime_us",
356 		.data		= &sysctl_sched_rt_runtime,
357 		.maxlen		= sizeof(int),
358 		.mode		= 0644,
359 		.proc_handler	= &sched_rt_handler,
360 	},
361 	{
362 		.ctl_name	= CTL_UNNUMBERED,
363 		.procname	= "sched_compat_yield",
364 		.data		= &sysctl_sched_compat_yield,
365 		.maxlen		= sizeof(unsigned int),
366 		.mode		= 0644,
367 		.proc_handler	= &proc_dointvec,
368 	},
369 #ifdef CONFIG_PROVE_LOCKING
370 	{
371 		.ctl_name	= CTL_UNNUMBERED,
372 		.procname	= "prove_locking",
373 		.data		= &prove_locking,
374 		.maxlen		= sizeof(int),
375 		.mode		= 0644,
376 		.proc_handler	= &proc_dointvec,
377 	},
378 #endif
379 #ifdef CONFIG_LOCK_STAT
380 	{
381 		.ctl_name	= CTL_UNNUMBERED,
382 		.procname	= "lock_stat",
383 		.data		= &lock_stat,
384 		.maxlen		= sizeof(int),
385 		.mode		= 0644,
386 		.proc_handler	= &proc_dointvec,
387 	},
388 #endif
389 	{
390 		.ctl_name	= KERN_PANIC,
391 		.procname	= "panic",
392 		.data		= &panic_timeout,
393 		.maxlen		= sizeof(int),
394 		.mode		= 0644,
395 		.proc_handler	= &proc_dointvec,
396 	},
397 	{
398 		.ctl_name	= KERN_CORE_USES_PID,
399 		.procname	= "core_uses_pid",
400 		.data		= &core_uses_pid,
401 		.maxlen		= sizeof(int),
402 		.mode		= 0644,
403 		.proc_handler	= &proc_dointvec,
404 	},
405 	{
406 		.ctl_name	= KERN_CORE_PATTERN,
407 		.procname	= "core_pattern",
408 		.data		= core_pattern,
409 		.maxlen		= CORENAME_MAX_SIZE,
410 		.mode		= 0644,
411 		.proc_handler	= &proc_dostring,
412 		.strategy	= &sysctl_string,
413 	},
414 #ifdef CONFIG_PROC_SYSCTL
415 	{
416 		.procname	= "tainted",
417 		.maxlen 	= sizeof(long),
418 		.mode		= 0644,
419 		.proc_handler	= &proc_taint,
420 	},
421 #endif
422 #ifdef CONFIG_LATENCYTOP
423 	{
424 		.procname	= "latencytop",
425 		.data		= &latencytop_enabled,
426 		.maxlen		= sizeof(int),
427 		.mode		= 0644,
428 		.proc_handler	= &proc_dointvec,
429 	},
430 #endif
431 #ifdef CONFIG_BLK_DEV_INITRD
432 	{
433 		.ctl_name	= KERN_REALROOTDEV,
434 		.procname	= "real-root-dev",
435 		.data		= &real_root_dev,
436 		.maxlen		= sizeof(int),
437 		.mode		= 0644,
438 		.proc_handler	= &proc_dointvec,
439 	},
440 #endif
441 	{
442 		.ctl_name	= CTL_UNNUMBERED,
443 		.procname	= "print-fatal-signals",
444 		.data		= &print_fatal_signals,
445 		.maxlen		= sizeof(int),
446 		.mode		= 0644,
447 		.proc_handler	= &proc_dointvec,
448 	},
449 #ifdef CONFIG_SPARC
450 	{
451 		.ctl_name	= KERN_SPARC_REBOOT,
452 		.procname	= "reboot-cmd",
453 		.data		= reboot_command,
454 		.maxlen		= 256,
455 		.mode		= 0644,
456 		.proc_handler	= &proc_dostring,
457 		.strategy	= &sysctl_string,
458 	},
459 	{
460 		.ctl_name	= KERN_SPARC_STOP_A,
461 		.procname	= "stop-a",
462 		.data		= &stop_a_enabled,
463 		.maxlen		= sizeof (int),
464 		.mode		= 0644,
465 		.proc_handler	= &proc_dointvec,
466 	},
467 	{
468 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
469 		.procname	= "scons-poweroff",
470 		.data		= &scons_pwroff,
471 		.maxlen		= sizeof (int),
472 		.mode		= 0644,
473 		.proc_handler	= &proc_dointvec,
474 	},
475 #endif
476 #ifdef CONFIG_SPARC64
477 	{
478 		.ctl_name	= CTL_UNNUMBERED,
479 		.procname	= "tsb-ratio",
480 		.data		= &sysctl_tsb_ratio,
481 		.maxlen		= sizeof (int),
482 		.mode		= 0644,
483 		.proc_handler	= &proc_dointvec,
484 	},
485 #endif
486 #ifdef __hppa__
487 	{
488 		.ctl_name	= KERN_HPPA_PWRSW,
489 		.procname	= "soft-power",
490 		.data		= &pwrsw_enabled,
491 		.maxlen		= sizeof (int),
492 	 	.mode		= 0644,
493 		.proc_handler	= &proc_dointvec,
494 	},
495 	{
496 		.ctl_name	= KERN_HPPA_UNALIGNED,
497 		.procname	= "unaligned-trap",
498 		.data		= &unaligned_enabled,
499 		.maxlen		= sizeof (int),
500 		.mode		= 0644,
501 		.proc_handler	= &proc_dointvec,
502 	},
503 #endif
504 	{
505 		.ctl_name	= KERN_CTLALTDEL,
506 		.procname	= "ctrl-alt-del",
507 		.data		= &C_A_D,
508 		.maxlen		= sizeof(int),
509 		.mode		= 0644,
510 		.proc_handler	= &proc_dointvec,
511 	},
512 #ifdef CONFIG_FUNCTION_TRACER
513 	{
514 		.ctl_name	= CTL_UNNUMBERED,
515 		.procname	= "ftrace_enabled",
516 		.data		= &ftrace_enabled,
517 		.maxlen		= sizeof(int),
518 		.mode		= 0644,
519 		.proc_handler	= &ftrace_enable_sysctl,
520 	},
521 #endif
522 #ifdef CONFIG_STACK_TRACER
523 	{
524 		.ctl_name	= CTL_UNNUMBERED,
525 		.procname	= "stack_tracer_enabled",
526 		.data		= &stack_tracer_enabled,
527 		.maxlen		= sizeof(int),
528 		.mode		= 0644,
529 		.proc_handler	= &stack_trace_sysctl,
530 	},
531 #endif
532 #ifdef CONFIG_TRACING
533 	{
534 		.ctl_name	= CTL_UNNUMBERED,
535 		.procname	= "ftrace_dump_on_oops",
536 		.data		= &ftrace_dump_on_oops,
537 		.maxlen		= sizeof(int),
538 		.mode		= 0644,
539 		.proc_handler	= &proc_dointvec,
540 	},
541 #endif
542 #ifdef CONFIG_MODULES
543 	{
544 		.ctl_name	= KERN_MODPROBE,
545 		.procname	= "modprobe",
546 		.data		= &modprobe_path,
547 		.maxlen		= KMOD_PATH_LEN,
548 		.mode		= 0644,
549 		.proc_handler	= &proc_dostring,
550 		.strategy	= &sysctl_string,
551 	},
552 	{
553 		.ctl_name	= CTL_UNNUMBERED,
554 		.procname	= "modules_disabled",
555 		.data		= &modules_disabled,
556 		.maxlen		= sizeof(int),
557 		.mode		= 0644,
558 		/* only handle a transition from default "0" to "1" */
559 		.proc_handler	= &proc_dointvec_minmax,
560 		.extra1		= &one,
561 		.extra2		= &one,
562 	},
563 #endif
564 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
565 	{
566 		.ctl_name	= KERN_HOTPLUG,
567 		.procname	= "hotplug",
568 		.data		= &uevent_helper,
569 		.maxlen		= UEVENT_HELPER_PATH_LEN,
570 		.mode		= 0644,
571 		.proc_handler	= &proc_dostring,
572 		.strategy	= &sysctl_string,
573 	},
574 #endif
575 #ifdef CONFIG_CHR_DEV_SG
576 	{
577 		.ctl_name	= KERN_SG_BIG_BUFF,
578 		.procname	= "sg-big-buff",
579 		.data		= &sg_big_buff,
580 		.maxlen		= sizeof (int),
581 		.mode		= 0444,
582 		.proc_handler	= &proc_dointvec,
583 	},
584 #endif
585 #ifdef CONFIG_BSD_PROCESS_ACCT
586 	{
587 		.ctl_name	= KERN_ACCT,
588 		.procname	= "acct",
589 		.data		= &acct_parm,
590 		.maxlen		= 3*sizeof(int),
591 		.mode		= 0644,
592 		.proc_handler	= &proc_dointvec,
593 	},
594 #endif
595 #ifdef CONFIG_MAGIC_SYSRQ
596 	{
597 		.ctl_name	= KERN_SYSRQ,
598 		.procname	= "sysrq",
599 		.data		= &__sysrq_enabled,
600 		.maxlen		= sizeof (int),
601 		.mode		= 0644,
602 		.proc_handler	= &proc_dointvec,
603 	},
604 #endif
605 #ifdef CONFIG_PROC_SYSCTL
606 	{
607 		.procname	= "cad_pid",
608 		.data		= NULL,
609 		.maxlen		= sizeof (int),
610 		.mode		= 0600,
611 		.proc_handler	= &proc_do_cad_pid,
612 	},
613 #endif
614 	{
615 		.ctl_name	= KERN_MAX_THREADS,
616 		.procname	= "threads-max",
617 		.data		= &max_threads,
618 		.maxlen		= sizeof(int),
619 		.mode		= 0644,
620 		.proc_handler	= &proc_dointvec,
621 	},
622 	{
623 		.ctl_name	= KERN_RANDOM,
624 		.procname	= "random",
625 		.mode		= 0555,
626 		.child		= random_table,
627 	},
628 	{
629 		.ctl_name	= KERN_OVERFLOWUID,
630 		.procname	= "overflowuid",
631 		.data		= &overflowuid,
632 		.maxlen		= sizeof(int),
633 		.mode		= 0644,
634 		.proc_handler	= &proc_dointvec_minmax,
635 		.strategy	= &sysctl_intvec,
636 		.extra1		= &minolduid,
637 		.extra2		= &maxolduid,
638 	},
639 	{
640 		.ctl_name	= KERN_OVERFLOWGID,
641 		.procname	= "overflowgid",
642 		.data		= &overflowgid,
643 		.maxlen		= sizeof(int),
644 		.mode		= 0644,
645 		.proc_handler	= &proc_dointvec_minmax,
646 		.strategy	= &sysctl_intvec,
647 		.extra1		= &minolduid,
648 		.extra2		= &maxolduid,
649 	},
650 #ifdef CONFIG_S390
651 #ifdef CONFIG_MATHEMU
652 	{
653 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
654 		.procname	= "ieee_emulation_warnings",
655 		.data		= &sysctl_ieee_emulation_warnings,
656 		.maxlen		= sizeof(int),
657 		.mode		= 0644,
658 		.proc_handler	= &proc_dointvec,
659 	},
660 #endif
661 	{
662 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
663 		.procname	= "userprocess_debug",
664 		.data		= &sysctl_userprocess_debug,
665 		.maxlen		= sizeof(int),
666 		.mode		= 0644,
667 		.proc_handler	= &proc_dointvec,
668 	},
669 #endif
670 	{
671 		.ctl_name	= KERN_PIDMAX,
672 		.procname	= "pid_max",
673 		.data		= &pid_max,
674 		.maxlen		= sizeof (int),
675 		.mode		= 0644,
676 		.proc_handler	= &proc_dointvec_minmax,
677 		.strategy	= sysctl_intvec,
678 		.extra1		= &pid_max_min,
679 		.extra2		= &pid_max_max,
680 	},
681 	{
682 		.ctl_name	= KERN_PANIC_ON_OOPS,
683 		.procname	= "panic_on_oops",
684 		.data		= &panic_on_oops,
685 		.maxlen		= sizeof(int),
686 		.mode		= 0644,
687 		.proc_handler	= &proc_dointvec,
688 	},
689 #if defined CONFIG_PRINTK
690 	{
691 		.ctl_name	= KERN_PRINTK,
692 		.procname	= "printk",
693 		.data		= &console_loglevel,
694 		.maxlen		= 4*sizeof(int),
695 		.mode		= 0644,
696 		.proc_handler	= &proc_dointvec,
697 	},
698 	{
699 		.ctl_name	= KERN_PRINTK_RATELIMIT,
700 		.procname	= "printk_ratelimit",
701 		.data		= &printk_ratelimit_state.interval,
702 		.maxlen		= sizeof(int),
703 		.mode		= 0644,
704 		.proc_handler	= &proc_dointvec_jiffies,
705 		.strategy	= &sysctl_jiffies,
706 	},
707 	{
708 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
709 		.procname	= "printk_ratelimit_burst",
710 		.data		= &printk_ratelimit_state.burst,
711 		.maxlen		= sizeof(int),
712 		.mode		= 0644,
713 		.proc_handler	= &proc_dointvec,
714 	},
715 #endif
716 	{
717 		.ctl_name	= KERN_NGROUPS_MAX,
718 		.procname	= "ngroups_max",
719 		.data		= &ngroups_max,
720 		.maxlen		= sizeof (int),
721 		.mode		= 0444,
722 		.proc_handler	= &proc_dointvec,
723 	},
724 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
725 	{
726 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
727 		.procname       = "unknown_nmi_panic",
728 		.data           = &unknown_nmi_panic,
729 		.maxlen         = sizeof (int),
730 		.mode           = 0644,
731 		.proc_handler   = &proc_dointvec,
732 	},
733 	{
734 		.procname       = "nmi_watchdog",
735 		.data           = &nmi_watchdog_enabled,
736 		.maxlen         = sizeof (int),
737 		.mode           = 0644,
738 		.proc_handler   = &proc_nmi_enabled,
739 	},
740 #endif
741 #if defined(CONFIG_X86)
742 	{
743 		.ctl_name	= KERN_PANIC_ON_NMI,
744 		.procname	= "panic_on_unrecovered_nmi",
745 		.data		= &panic_on_unrecovered_nmi,
746 		.maxlen		= sizeof(int),
747 		.mode		= 0644,
748 		.proc_handler	= &proc_dointvec,
749 	},
750 	{
751 		.ctl_name	= CTL_UNNUMBERED,
752 		.procname	= "panic_on_io_nmi",
753 		.data		= &panic_on_io_nmi,
754 		.maxlen		= sizeof(int),
755 		.mode		= 0644,
756 		.proc_handler	= &proc_dointvec,
757 	},
758 	{
759 		.ctl_name	= KERN_BOOTLOADER_TYPE,
760 		.procname	= "bootloader_type",
761 		.data		= &bootloader_type,
762 		.maxlen		= sizeof (int),
763 		.mode		= 0444,
764 		.proc_handler	= &proc_dointvec,
765 	},
766 	{
767 		.ctl_name	= CTL_UNNUMBERED,
768 		.procname	= "bootloader_version",
769 		.data		= &bootloader_version,
770 		.maxlen		= sizeof (int),
771 		.mode		= 0444,
772 		.proc_handler	= &proc_dointvec,
773 	},
774 	{
775 		.ctl_name	= CTL_UNNUMBERED,
776 		.procname	= "kstack_depth_to_print",
777 		.data		= &kstack_depth_to_print,
778 		.maxlen		= sizeof(int),
779 		.mode		= 0644,
780 		.proc_handler	= &proc_dointvec,
781 	},
782 	{
783 		.ctl_name	= CTL_UNNUMBERED,
784 		.procname	= "io_delay_type",
785 		.data		= &io_delay_type,
786 		.maxlen		= sizeof(int),
787 		.mode		= 0644,
788 		.proc_handler	= &proc_dointvec,
789 	},
790 #endif
791 #if defined(CONFIG_MMU)
792 	{
793 		.ctl_name	= KERN_RANDOMIZE,
794 		.procname	= "randomize_va_space",
795 		.data		= &randomize_va_space,
796 		.maxlen		= sizeof(int),
797 		.mode		= 0644,
798 		.proc_handler	= &proc_dointvec,
799 	},
800 #endif
801 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
802 	{
803 		.ctl_name	= KERN_SPIN_RETRY,
804 		.procname	= "spin_retry",
805 		.data		= &spin_retry,
806 		.maxlen		= sizeof (int),
807 		.mode		= 0644,
808 		.proc_handler	= &proc_dointvec,
809 	},
810 #endif
811 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
812 	{
813 		.procname	= "acpi_video_flags",
814 		.data		= &acpi_realmode_flags,
815 		.maxlen		= sizeof (unsigned long),
816 		.mode		= 0644,
817 		.proc_handler	= &proc_doulongvec_minmax,
818 	},
819 #endif
820 #ifdef CONFIG_IA64
821 	{
822 		.ctl_name	= KERN_IA64_UNALIGNED,
823 		.procname	= "ignore-unaligned-usertrap",
824 		.data		= &no_unaligned_warning,
825 		.maxlen		= sizeof (int),
826 	 	.mode		= 0644,
827 		.proc_handler	= &proc_dointvec,
828 	},
829 	{
830 		.ctl_name	= CTL_UNNUMBERED,
831 		.procname	= "unaligned-dump-stack",
832 		.data		= &unaligned_dump_stack,
833 		.maxlen		= sizeof (int),
834 		.mode		= 0644,
835 		.proc_handler	= &proc_dointvec,
836 	},
837 #endif
838 #ifdef CONFIG_DETECT_SOFTLOCKUP
839 	{
840 		.ctl_name	= CTL_UNNUMBERED,
841 		.procname	= "softlockup_panic",
842 		.data		= &softlockup_panic,
843 		.maxlen		= sizeof(int),
844 		.mode		= 0644,
845 		.proc_handler	= &proc_dointvec_minmax,
846 		.strategy	= &sysctl_intvec,
847 		.extra1		= &zero,
848 		.extra2		= &one,
849 	},
850 	{
851 		.ctl_name	= CTL_UNNUMBERED,
852 		.procname	= "softlockup_thresh",
853 		.data		= &softlockup_thresh,
854 		.maxlen		= sizeof(int),
855 		.mode		= 0644,
856 		.proc_handler	= &proc_dosoftlockup_thresh,
857 		.strategy	= &sysctl_intvec,
858 		.extra1		= &neg_one,
859 		.extra2		= &sixty,
860 	},
861 #endif
862 #ifdef CONFIG_DETECT_HUNG_TASK
863 	{
864 		.ctl_name	= CTL_UNNUMBERED,
865 		.procname	= "hung_task_panic",
866 		.data		= &sysctl_hung_task_panic,
867 		.maxlen		= sizeof(int),
868 		.mode		= 0644,
869 		.proc_handler	= &proc_dointvec_minmax,
870 		.strategy	= &sysctl_intvec,
871 		.extra1		= &zero,
872 		.extra2		= &one,
873 	},
874 	{
875 		.ctl_name	= CTL_UNNUMBERED,
876 		.procname	= "hung_task_check_count",
877 		.data		= &sysctl_hung_task_check_count,
878 		.maxlen		= sizeof(unsigned long),
879 		.mode		= 0644,
880 		.proc_handler	= &proc_doulongvec_minmax,
881 		.strategy	= &sysctl_intvec,
882 	},
883 	{
884 		.ctl_name	= CTL_UNNUMBERED,
885 		.procname	= "hung_task_timeout_secs",
886 		.data		= &sysctl_hung_task_timeout_secs,
887 		.maxlen		= sizeof(unsigned long),
888 		.mode		= 0644,
889 		.proc_handler	= &proc_dohung_task_timeout_secs,
890 		.strategy	= &sysctl_intvec,
891 	},
892 	{
893 		.ctl_name	= CTL_UNNUMBERED,
894 		.procname	= "hung_task_warnings",
895 		.data		= &sysctl_hung_task_warnings,
896 		.maxlen		= sizeof(unsigned long),
897 		.mode		= 0644,
898 		.proc_handler	= &proc_doulongvec_minmax,
899 		.strategy	= &sysctl_intvec,
900 	},
901 #endif
902 #ifdef CONFIG_COMPAT
903 	{
904 		.ctl_name	= KERN_COMPAT_LOG,
905 		.procname	= "compat-log",
906 		.data		= &compat_log,
907 		.maxlen		= sizeof (int),
908 	 	.mode		= 0644,
909 		.proc_handler	= &proc_dointvec,
910 	},
911 #endif
912 #ifdef CONFIG_RT_MUTEXES
913 	{
914 		.ctl_name	= KERN_MAX_LOCK_DEPTH,
915 		.procname	= "max_lock_depth",
916 		.data		= &max_lock_depth,
917 		.maxlen		= sizeof(int),
918 		.mode		= 0644,
919 		.proc_handler	= &proc_dointvec,
920 	},
921 #endif
922 	{
923 		.ctl_name	= CTL_UNNUMBERED,
924 		.procname	= "poweroff_cmd",
925 		.data		= &poweroff_cmd,
926 		.maxlen		= POWEROFF_CMD_PATH_LEN,
927 		.mode		= 0644,
928 		.proc_handler	= &proc_dostring,
929 		.strategy	= &sysctl_string,
930 	},
931 #ifdef CONFIG_KEYS
932 	{
933 		.ctl_name	= CTL_UNNUMBERED,
934 		.procname	= "keys",
935 		.mode		= 0555,
936 		.child		= key_sysctls,
937 	},
938 #endif
939 #ifdef CONFIG_RCU_TORTURE_TEST
940 	{
941 		.ctl_name       = CTL_UNNUMBERED,
942 		.procname       = "rcutorture_runnable",
943 		.data           = &rcutorture_runnable,
944 		.maxlen         = sizeof(int),
945 		.mode           = 0644,
946 		.proc_handler   = &proc_dointvec,
947 	},
948 #endif
949 #ifdef CONFIG_SLOW_WORK
950 	{
951 		.ctl_name	= CTL_UNNUMBERED,
952 		.procname	= "slow-work",
953 		.mode		= 0555,
954 		.child		= slow_work_sysctls,
955 	},
956 #endif
957 #ifdef CONFIG_PERF_COUNTERS
958 	{
959 		.ctl_name	= CTL_UNNUMBERED,
960 		.procname	= "perf_counter_paranoid",
961 		.data		= &sysctl_perf_counter_paranoid,
962 		.maxlen		= sizeof(sysctl_perf_counter_paranoid),
963 		.mode		= 0644,
964 		.proc_handler	= &proc_dointvec,
965 	},
966 	{
967 		.ctl_name	= CTL_UNNUMBERED,
968 		.procname	= "perf_counter_mlock_kb",
969 		.data		= &sysctl_perf_counter_mlock,
970 		.maxlen		= sizeof(sysctl_perf_counter_mlock),
971 		.mode		= 0644,
972 		.proc_handler	= &proc_dointvec,
973 	},
974 	{
975 		.ctl_name	= CTL_UNNUMBERED,
976 		.procname	= "perf_counter_max_sample_rate",
977 		.data		= &sysctl_perf_counter_sample_rate,
978 		.maxlen		= sizeof(sysctl_perf_counter_sample_rate),
979 		.mode		= 0644,
980 		.proc_handler	= &proc_dointvec,
981 	},
982 #endif
983 #ifdef CONFIG_KMEMCHECK
984 	{
985 		.ctl_name	= CTL_UNNUMBERED,
986 		.procname	= "kmemcheck",
987 		.data		= &kmemcheck_enabled,
988 		.maxlen		= sizeof(int),
989 		.mode		= 0644,
990 		.proc_handler	= &proc_dointvec,
991 	},
992 #endif
993 
994 /*
995  * NOTE: do not add new entries to this table unless you have read
996  * Documentation/sysctl/ctl_unnumbered.txt
997  */
998 	{ .ctl_name = 0 }
999 };
1000 
1001 static struct ctl_table vm_table[] = {
1002 	{
1003 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
1004 		.procname	= "overcommit_memory",
1005 		.data		= &sysctl_overcommit_memory,
1006 		.maxlen		= sizeof(sysctl_overcommit_memory),
1007 		.mode		= 0644,
1008 		.proc_handler	= &proc_dointvec,
1009 	},
1010 	{
1011 		.ctl_name	= VM_PANIC_ON_OOM,
1012 		.procname	= "panic_on_oom",
1013 		.data		= &sysctl_panic_on_oom,
1014 		.maxlen		= sizeof(sysctl_panic_on_oom),
1015 		.mode		= 0644,
1016 		.proc_handler	= &proc_dointvec,
1017 	},
1018 	{
1019 		.ctl_name	= CTL_UNNUMBERED,
1020 		.procname	= "oom_kill_allocating_task",
1021 		.data		= &sysctl_oom_kill_allocating_task,
1022 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
1023 		.mode		= 0644,
1024 		.proc_handler	= &proc_dointvec,
1025 	},
1026 	{
1027 		.ctl_name	= CTL_UNNUMBERED,
1028 		.procname	= "oom_dump_tasks",
1029 		.data		= &sysctl_oom_dump_tasks,
1030 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
1031 		.mode		= 0644,
1032 		.proc_handler	= &proc_dointvec,
1033 	},
1034 	{
1035 		.ctl_name	= VM_OVERCOMMIT_RATIO,
1036 		.procname	= "overcommit_ratio",
1037 		.data		= &sysctl_overcommit_ratio,
1038 		.maxlen		= sizeof(sysctl_overcommit_ratio),
1039 		.mode		= 0644,
1040 		.proc_handler	= &proc_dointvec,
1041 	},
1042 	{
1043 		.ctl_name	= VM_PAGE_CLUSTER,
1044 		.procname	= "page-cluster",
1045 		.data		= &page_cluster,
1046 		.maxlen		= sizeof(int),
1047 		.mode		= 0644,
1048 		.proc_handler	= &proc_dointvec,
1049 	},
1050 	{
1051 		.ctl_name	= VM_DIRTY_BACKGROUND,
1052 		.procname	= "dirty_background_ratio",
1053 		.data		= &dirty_background_ratio,
1054 		.maxlen		= sizeof(dirty_background_ratio),
1055 		.mode		= 0644,
1056 		.proc_handler	= &dirty_background_ratio_handler,
1057 		.strategy	= &sysctl_intvec,
1058 		.extra1		= &zero,
1059 		.extra2		= &one_hundred,
1060 	},
1061 	{
1062 		.ctl_name	= CTL_UNNUMBERED,
1063 		.procname	= "dirty_background_bytes",
1064 		.data		= &dirty_background_bytes,
1065 		.maxlen		= sizeof(dirty_background_bytes),
1066 		.mode		= 0644,
1067 		.proc_handler	= &dirty_background_bytes_handler,
1068 		.strategy	= &sysctl_intvec,
1069 		.extra1		= &one_ul,
1070 	},
1071 	{
1072 		.ctl_name	= VM_DIRTY_RATIO,
1073 		.procname	= "dirty_ratio",
1074 		.data		= &vm_dirty_ratio,
1075 		.maxlen		= sizeof(vm_dirty_ratio),
1076 		.mode		= 0644,
1077 		.proc_handler	= &dirty_ratio_handler,
1078 		.strategy	= &sysctl_intvec,
1079 		.extra1		= &zero,
1080 		.extra2		= &one_hundred,
1081 	},
1082 	{
1083 		.ctl_name	= CTL_UNNUMBERED,
1084 		.procname	= "dirty_bytes",
1085 		.data		= &vm_dirty_bytes,
1086 		.maxlen		= sizeof(vm_dirty_bytes),
1087 		.mode		= 0644,
1088 		.proc_handler	= &dirty_bytes_handler,
1089 		.strategy	= &sysctl_intvec,
1090 		.extra1		= &dirty_bytes_min,
1091 	},
1092 	{
1093 		.procname	= "dirty_writeback_centisecs",
1094 		.data		= &dirty_writeback_interval,
1095 		.maxlen		= sizeof(dirty_writeback_interval),
1096 		.mode		= 0644,
1097 		.proc_handler	= &dirty_writeback_centisecs_handler,
1098 	},
1099 	{
1100 		.procname	= "dirty_expire_centisecs",
1101 		.data		= &dirty_expire_interval,
1102 		.maxlen		= sizeof(dirty_expire_interval),
1103 		.mode		= 0644,
1104 		.proc_handler	= &proc_dointvec,
1105 	},
1106 	{
1107 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
1108 		.procname	= "nr_pdflush_threads",
1109 		.data		= &nr_pdflush_threads,
1110 		.maxlen		= sizeof nr_pdflush_threads,
1111 		.mode		= 0444 /* read-only*/,
1112 		.proc_handler	= &proc_dointvec,
1113 	},
1114 	{
1115 		.ctl_name	= VM_SWAPPINESS,
1116 		.procname	= "swappiness",
1117 		.data		= &vm_swappiness,
1118 		.maxlen		= sizeof(vm_swappiness),
1119 		.mode		= 0644,
1120 		.proc_handler	= &proc_dointvec_minmax,
1121 		.strategy	= &sysctl_intvec,
1122 		.extra1		= &zero,
1123 		.extra2		= &one_hundred,
1124 	},
1125 #ifdef CONFIG_HUGETLB_PAGE
1126 	 {
1127 		.procname	= "nr_hugepages",
1128 		.data		= NULL,
1129 		.maxlen		= sizeof(unsigned long),
1130 		.mode		= 0644,
1131 		.proc_handler	= &hugetlb_sysctl_handler,
1132 		.extra1		= (void *)&hugetlb_zero,
1133 		.extra2		= (void *)&hugetlb_infinity,
1134 	 },
1135 	 {
1136 		.ctl_name	= VM_HUGETLB_GROUP,
1137 		.procname	= "hugetlb_shm_group",
1138 		.data		= &sysctl_hugetlb_shm_group,
1139 		.maxlen		= sizeof(gid_t),
1140 		.mode		= 0644,
1141 		.proc_handler	= &proc_dointvec,
1142 	 },
1143 	 {
1144 		.ctl_name	= CTL_UNNUMBERED,
1145 		.procname	= "hugepages_treat_as_movable",
1146 		.data		= &hugepages_treat_as_movable,
1147 		.maxlen		= sizeof(int),
1148 		.mode		= 0644,
1149 		.proc_handler	= &hugetlb_treat_movable_handler,
1150 	},
1151 	{
1152 		.ctl_name	= CTL_UNNUMBERED,
1153 		.procname	= "nr_overcommit_hugepages",
1154 		.data		= NULL,
1155 		.maxlen		= sizeof(unsigned long),
1156 		.mode		= 0644,
1157 		.proc_handler	= &hugetlb_overcommit_handler,
1158 		.extra1		= (void *)&hugetlb_zero,
1159 		.extra2		= (void *)&hugetlb_infinity,
1160 	},
1161 #endif
1162 	{
1163 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
1164 		.procname	= "lowmem_reserve_ratio",
1165 		.data		= &sysctl_lowmem_reserve_ratio,
1166 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
1167 		.mode		= 0644,
1168 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
1169 		.strategy	= &sysctl_intvec,
1170 	},
1171 	{
1172 		.ctl_name	= VM_DROP_PAGECACHE,
1173 		.procname	= "drop_caches",
1174 		.data		= &sysctl_drop_caches,
1175 		.maxlen		= sizeof(int),
1176 		.mode		= 0644,
1177 		.proc_handler	= drop_caches_sysctl_handler,
1178 		.strategy	= &sysctl_intvec,
1179 	},
1180 	{
1181 		.ctl_name	= VM_MIN_FREE_KBYTES,
1182 		.procname	= "min_free_kbytes",
1183 		.data		= &min_free_kbytes,
1184 		.maxlen		= sizeof(min_free_kbytes),
1185 		.mode		= 0644,
1186 		.proc_handler	= &min_free_kbytes_sysctl_handler,
1187 		.strategy	= &sysctl_intvec,
1188 		.extra1		= &zero,
1189 	},
1190 	{
1191 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
1192 		.procname	= "percpu_pagelist_fraction",
1193 		.data		= &percpu_pagelist_fraction,
1194 		.maxlen		= sizeof(percpu_pagelist_fraction),
1195 		.mode		= 0644,
1196 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
1197 		.strategy	= &sysctl_intvec,
1198 		.extra1		= &min_percpu_pagelist_fract,
1199 	},
1200 #ifdef CONFIG_MMU
1201 	{
1202 		.ctl_name	= VM_MAX_MAP_COUNT,
1203 		.procname	= "max_map_count",
1204 		.data		= &sysctl_max_map_count,
1205 		.maxlen		= sizeof(sysctl_max_map_count),
1206 		.mode		= 0644,
1207 		.proc_handler	= &proc_dointvec
1208 	},
1209 #else
1210 	{
1211 		.ctl_name	= CTL_UNNUMBERED,
1212 		.procname	= "nr_trim_pages",
1213 		.data		= &sysctl_nr_trim_pages,
1214 		.maxlen		= sizeof(sysctl_nr_trim_pages),
1215 		.mode		= 0644,
1216 		.proc_handler	= &proc_dointvec_minmax,
1217 		.strategy	= &sysctl_intvec,
1218 		.extra1		= &zero,
1219 	},
1220 #endif
1221 	{
1222 		.ctl_name	= VM_LAPTOP_MODE,
1223 		.procname	= "laptop_mode",
1224 		.data		= &laptop_mode,
1225 		.maxlen		= sizeof(laptop_mode),
1226 		.mode		= 0644,
1227 		.proc_handler	= &proc_dointvec_jiffies,
1228 		.strategy	= &sysctl_jiffies,
1229 	},
1230 	{
1231 		.ctl_name	= VM_BLOCK_DUMP,
1232 		.procname	= "block_dump",
1233 		.data		= &block_dump,
1234 		.maxlen		= sizeof(block_dump),
1235 		.mode		= 0644,
1236 		.proc_handler	= &proc_dointvec,
1237 		.strategy	= &sysctl_intvec,
1238 		.extra1		= &zero,
1239 	},
1240 	{
1241 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
1242 		.procname	= "vfs_cache_pressure",
1243 		.data		= &sysctl_vfs_cache_pressure,
1244 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
1245 		.mode		= 0644,
1246 		.proc_handler	= &proc_dointvec,
1247 		.strategy	= &sysctl_intvec,
1248 		.extra1		= &zero,
1249 	},
1250 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1251 	{
1252 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
1253 		.procname	= "legacy_va_layout",
1254 		.data		= &sysctl_legacy_va_layout,
1255 		.maxlen		= sizeof(sysctl_legacy_va_layout),
1256 		.mode		= 0644,
1257 		.proc_handler	= &proc_dointvec,
1258 		.strategy	= &sysctl_intvec,
1259 		.extra1		= &zero,
1260 	},
1261 #endif
1262 #ifdef CONFIG_NUMA
1263 	{
1264 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
1265 		.procname	= "zone_reclaim_mode",
1266 		.data		= &zone_reclaim_mode,
1267 		.maxlen		= sizeof(zone_reclaim_mode),
1268 		.mode		= 0644,
1269 		.proc_handler	= &proc_dointvec,
1270 		.strategy	= &sysctl_intvec,
1271 		.extra1		= &zero,
1272 	},
1273 	{
1274 		.ctl_name	= VM_MIN_UNMAPPED,
1275 		.procname	= "min_unmapped_ratio",
1276 		.data		= &sysctl_min_unmapped_ratio,
1277 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1278 		.mode		= 0644,
1279 		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler,
1280 		.strategy	= &sysctl_intvec,
1281 		.extra1		= &zero,
1282 		.extra2		= &one_hundred,
1283 	},
1284 	{
1285 		.ctl_name	= VM_MIN_SLAB,
1286 		.procname	= "min_slab_ratio",
1287 		.data		= &sysctl_min_slab_ratio,
1288 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1289 		.mode		= 0644,
1290 		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
1291 		.strategy	= &sysctl_intvec,
1292 		.extra1		= &zero,
1293 		.extra2		= &one_hundred,
1294 	},
1295 #endif
1296 #ifdef CONFIG_SMP
1297 	{
1298 		.ctl_name	= CTL_UNNUMBERED,
1299 		.procname	= "stat_interval",
1300 		.data		= &sysctl_stat_interval,
1301 		.maxlen		= sizeof(sysctl_stat_interval),
1302 		.mode		= 0644,
1303 		.proc_handler	= &proc_dointvec_jiffies,
1304 		.strategy	= &sysctl_jiffies,
1305 	},
1306 #endif
1307 	{
1308 		.ctl_name	= CTL_UNNUMBERED,
1309 		.procname	= "mmap_min_addr",
1310 		.data		= &dac_mmap_min_addr,
1311 		.maxlen		= sizeof(unsigned long),
1312 		.mode		= 0644,
1313 		.proc_handler	= &mmap_min_addr_handler,
1314 	},
1315 #ifdef CONFIG_NUMA
1316 	{
1317 		.ctl_name	= CTL_UNNUMBERED,
1318 		.procname	= "numa_zonelist_order",
1319 		.data		= &numa_zonelist_order,
1320 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1321 		.mode		= 0644,
1322 		.proc_handler	= &numa_zonelist_order_handler,
1323 		.strategy	= &sysctl_string,
1324 	},
1325 #endif
1326 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1327    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1328 	{
1329 		.ctl_name	= VM_VDSO_ENABLED,
1330 		.procname	= "vdso_enabled",
1331 		.data		= &vdso_enabled,
1332 		.maxlen		= sizeof(vdso_enabled),
1333 		.mode		= 0644,
1334 		.proc_handler	= &proc_dointvec,
1335 		.strategy	= &sysctl_intvec,
1336 		.extra1		= &zero,
1337 	},
1338 #endif
1339 #ifdef CONFIG_HIGHMEM
1340 	{
1341 		.ctl_name	= CTL_UNNUMBERED,
1342 		.procname	= "highmem_is_dirtyable",
1343 		.data		= &vm_highmem_is_dirtyable,
1344 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
1345 		.mode		= 0644,
1346 		.proc_handler	= &proc_dointvec_minmax,
1347 		.strategy	= &sysctl_intvec,
1348 		.extra1		= &zero,
1349 		.extra2		= &one,
1350 	},
1351 #endif
1352 	{
1353 		.ctl_name	= CTL_UNNUMBERED,
1354 		.procname	= "scan_unevictable_pages",
1355 		.data		= &scan_unevictable_pages,
1356 		.maxlen		= sizeof(scan_unevictable_pages),
1357 		.mode		= 0644,
1358 		.proc_handler	= &scan_unevictable_handler,
1359 	},
1360 /*
1361  * NOTE: do not add new entries to this table unless you have read
1362  * Documentation/sysctl/ctl_unnumbered.txt
1363  */
1364 	{ .ctl_name = 0 }
1365 };
1366 
1367 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1368 static struct ctl_table binfmt_misc_table[] = {
1369 	{ .ctl_name = 0 }
1370 };
1371 #endif
1372 
1373 static struct ctl_table fs_table[] = {
1374 	{
1375 		.ctl_name	= FS_NRINODE,
1376 		.procname	= "inode-nr",
1377 		.data		= &inodes_stat,
1378 		.maxlen		= 2*sizeof(int),
1379 		.mode		= 0444,
1380 		.proc_handler	= &proc_dointvec,
1381 	},
1382 	{
1383 		.ctl_name	= FS_STATINODE,
1384 		.procname	= "inode-state",
1385 		.data		= &inodes_stat,
1386 		.maxlen		= 7*sizeof(int),
1387 		.mode		= 0444,
1388 		.proc_handler	= &proc_dointvec,
1389 	},
1390 	{
1391 		.procname	= "file-nr",
1392 		.data		= &files_stat,
1393 		.maxlen		= 3*sizeof(int),
1394 		.mode		= 0444,
1395 		.proc_handler	= &proc_nr_files,
1396 	},
1397 	{
1398 		.ctl_name	= FS_MAXFILE,
1399 		.procname	= "file-max",
1400 		.data		= &files_stat.max_files,
1401 		.maxlen		= sizeof(int),
1402 		.mode		= 0644,
1403 		.proc_handler	= &proc_dointvec,
1404 	},
1405 	{
1406 		.ctl_name	= CTL_UNNUMBERED,
1407 		.procname	= "nr_open",
1408 		.data		= &sysctl_nr_open,
1409 		.maxlen		= sizeof(int),
1410 		.mode		= 0644,
1411 		.proc_handler	= &proc_dointvec_minmax,
1412 		.extra1		= &sysctl_nr_open_min,
1413 		.extra2		= &sysctl_nr_open_max,
1414 	},
1415 	{
1416 		.ctl_name	= FS_DENTRY,
1417 		.procname	= "dentry-state",
1418 		.data		= &dentry_stat,
1419 		.maxlen		= 6*sizeof(int),
1420 		.mode		= 0444,
1421 		.proc_handler	= &proc_dointvec,
1422 	},
1423 	{
1424 		.ctl_name	= FS_OVERFLOWUID,
1425 		.procname	= "overflowuid",
1426 		.data		= &fs_overflowuid,
1427 		.maxlen		= sizeof(int),
1428 		.mode		= 0644,
1429 		.proc_handler	= &proc_dointvec_minmax,
1430 		.strategy	= &sysctl_intvec,
1431 		.extra1		= &minolduid,
1432 		.extra2		= &maxolduid,
1433 	},
1434 	{
1435 		.ctl_name	= FS_OVERFLOWGID,
1436 		.procname	= "overflowgid",
1437 		.data		= &fs_overflowgid,
1438 		.maxlen		= sizeof(int),
1439 		.mode		= 0644,
1440 		.proc_handler	= &proc_dointvec_minmax,
1441 		.strategy	= &sysctl_intvec,
1442 		.extra1		= &minolduid,
1443 		.extra2		= &maxolduid,
1444 	},
1445 #ifdef CONFIG_FILE_LOCKING
1446 	{
1447 		.ctl_name	= FS_LEASES,
1448 		.procname	= "leases-enable",
1449 		.data		= &leases_enable,
1450 		.maxlen		= sizeof(int),
1451 		.mode		= 0644,
1452 		.proc_handler	= &proc_dointvec,
1453 	},
1454 #endif
1455 #ifdef CONFIG_DNOTIFY
1456 	{
1457 		.ctl_name	= FS_DIR_NOTIFY,
1458 		.procname	= "dir-notify-enable",
1459 		.data		= &dir_notify_enable,
1460 		.maxlen		= sizeof(int),
1461 		.mode		= 0644,
1462 		.proc_handler	= &proc_dointvec,
1463 	},
1464 #endif
1465 #ifdef CONFIG_MMU
1466 #ifdef CONFIG_FILE_LOCKING
1467 	{
1468 		.ctl_name	= FS_LEASE_TIME,
1469 		.procname	= "lease-break-time",
1470 		.data		= &lease_break_time,
1471 		.maxlen		= sizeof(int),
1472 		.mode		= 0644,
1473 		.proc_handler	= &proc_dointvec,
1474 	},
1475 #endif
1476 #ifdef CONFIG_AIO
1477 	{
1478 		.procname	= "aio-nr",
1479 		.data		= &aio_nr,
1480 		.maxlen		= sizeof(aio_nr),
1481 		.mode		= 0444,
1482 		.proc_handler	= &proc_doulongvec_minmax,
1483 	},
1484 	{
1485 		.procname	= "aio-max-nr",
1486 		.data		= &aio_max_nr,
1487 		.maxlen		= sizeof(aio_max_nr),
1488 		.mode		= 0644,
1489 		.proc_handler	= &proc_doulongvec_minmax,
1490 	},
1491 #endif /* CONFIG_AIO */
1492 #ifdef CONFIG_INOTIFY_USER
1493 	{
1494 		.ctl_name	= FS_INOTIFY,
1495 		.procname	= "inotify",
1496 		.mode		= 0555,
1497 		.child		= inotify_table,
1498 	},
1499 #endif
1500 #ifdef CONFIG_EPOLL
1501 	{
1502 		.procname	= "epoll",
1503 		.mode		= 0555,
1504 		.child		= epoll_table,
1505 	},
1506 #endif
1507 #endif
1508 	{
1509 		.ctl_name	= KERN_SETUID_DUMPABLE,
1510 		.procname	= "suid_dumpable",
1511 		.data		= &suid_dumpable,
1512 		.maxlen		= sizeof(int),
1513 		.mode		= 0644,
1514 		.proc_handler	= &proc_dointvec_minmax,
1515 		.strategy	= &sysctl_intvec,
1516 		.extra1		= &zero,
1517 		.extra2		= &two,
1518 	},
1519 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1520 	{
1521 		.ctl_name	= CTL_UNNUMBERED,
1522 		.procname	= "binfmt_misc",
1523 		.mode		= 0555,
1524 		.child		= binfmt_misc_table,
1525 	},
1526 #endif
1527 /*
1528  * NOTE: do not add new entries to this table unless you have read
1529  * Documentation/sysctl/ctl_unnumbered.txt
1530  */
1531 	{ .ctl_name = 0 }
1532 };
1533 
1534 static struct ctl_table debug_table[] = {
1535 #if defined(CONFIG_X86) || defined(CONFIG_PPC)
1536 	{
1537 		.ctl_name	= CTL_UNNUMBERED,
1538 		.procname	= "exception-trace",
1539 		.data		= &show_unhandled_signals,
1540 		.maxlen		= sizeof(int),
1541 		.mode		= 0644,
1542 		.proc_handler	= proc_dointvec
1543 	},
1544 #endif
1545 	{ .ctl_name = 0 }
1546 };
1547 
1548 static struct ctl_table dev_table[] = {
1549 	{ .ctl_name = 0 }
1550 };
1551 
1552 static DEFINE_SPINLOCK(sysctl_lock);
1553 
1554 /* called under sysctl_lock */
1555 static int use_table(struct ctl_table_header *p)
1556 {
1557 	if (unlikely(p->unregistering))
1558 		return 0;
1559 	p->used++;
1560 	return 1;
1561 }
1562 
1563 /* called under sysctl_lock */
1564 static void unuse_table(struct ctl_table_header *p)
1565 {
1566 	if (!--p->used)
1567 		if (unlikely(p->unregistering))
1568 			complete(p->unregistering);
1569 }
1570 
1571 /* called under sysctl_lock, will reacquire if has to wait */
1572 static void start_unregistering(struct ctl_table_header *p)
1573 {
1574 	/*
1575 	 * if p->used is 0, nobody will ever touch that entry again;
1576 	 * we'll eliminate all paths to it before dropping sysctl_lock
1577 	 */
1578 	if (unlikely(p->used)) {
1579 		struct completion wait;
1580 		init_completion(&wait);
1581 		p->unregistering = &wait;
1582 		spin_unlock(&sysctl_lock);
1583 		wait_for_completion(&wait);
1584 		spin_lock(&sysctl_lock);
1585 	} else {
1586 		/* anything non-NULL; we'll never dereference it */
1587 		p->unregistering = ERR_PTR(-EINVAL);
1588 	}
1589 	/*
1590 	 * do not remove from the list until nobody holds it; walking the
1591 	 * list in do_sysctl() relies on that.
1592 	 */
1593 	list_del_init(&p->ctl_entry);
1594 }
1595 
1596 void sysctl_head_get(struct ctl_table_header *head)
1597 {
1598 	spin_lock(&sysctl_lock);
1599 	head->count++;
1600 	spin_unlock(&sysctl_lock);
1601 }
1602 
1603 void sysctl_head_put(struct ctl_table_header *head)
1604 {
1605 	spin_lock(&sysctl_lock);
1606 	if (!--head->count)
1607 		kfree(head);
1608 	spin_unlock(&sysctl_lock);
1609 }
1610 
1611 struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1612 {
1613 	if (!head)
1614 		BUG();
1615 	spin_lock(&sysctl_lock);
1616 	if (!use_table(head))
1617 		head = ERR_PTR(-ENOENT);
1618 	spin_unlock(&sysctl_lock);
1619 	return head;
1620 }
1621 
1622 void sysctl_head_finish(struct ctl_table_header *head)
1623 {
1624 	if (!head)
1625 		return;
1626 	spin_lock(&sysctl_lock);
1627 	unuse_table(head);
1628 	spin_unlock(&sysctl_lock);
1629 }
1630 
1631 static struct ctl_table_set *
1632 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1633 {
1634 	struct ctl_table_set *set = &root->default_set;
1635 	if (root->lookup)
1636 		set = root->lookup(root, namespaces);
1637 	return set;
1638 }
1639 
1640 static struct list_head *
1641 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1642 {
1643 	struct ctl_table_set *set = lookup_header_set(root, namespaces);
1644 	return &set->list;
1645 }
1646 
1647 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1648 					    struct ctl_table_header *prev)
1649 {
1650 	struct ctl_table_root *root;
1651 	struct list_head *header_list;
1652 	struct ctl_table_header *head;
1653 	struct list_head *tmp;
1654 
1655 	spin_lock(&sysctl_lock);
1656 	if (prev) {
1657 		head = prev;
1658 		tmp = &prev->ctl_entry;
1659 		unuse_table(prev);
1660 		goto next;
1661 	}
1662 	tmp = &root_table_header.ctl_entry;
1663 	for (;;) {
1664 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1665 
1666 		if (!use_table(head))
1667 			goto next;
1668 		spin_unlock(&sysctl_lock);
1669 		return head;
1670 	next:
1671 		root = head->root;
1672 		tmp = tmp->next;
1673 		header_list = lookup_header_list(root, namespaces);
1674 		if (tmp != header_list)
1675 			continue;
1676 
1677 		do {
1678 			root = list_entry(root->root_list.next,
1679 					struct ctl_table_root, root_list);
1680 			if (root == &sysctl_table_root)
1681 				goto out;
1682 			header_list = lookup_header_list(root, namespaces);
1683 		} while (list_empty(header_list));
1684 		tmp = header_list->next;
1685 	}
1686 out:
1687 	spin_unlock(&sysctl_lock);
1688 	return NULL;
1689 }
1690 
1691 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1692 {
1693 	return __sysctl_head_next(current->nsproxy, prev);
1694 }
1695 
1696 void register_sysctl_root(struct ctl_table_root *root)
1697 {
1698 	spin_lock(&sysctl_lock);
1699 	list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1700 	spin_unlock(&sysctl_lock);
1701 }
1702 
1703 #ifdef CONFIG_SYSCTL_SYSCALL
1704 /* Perform the actual read/write of a sysctl table entry. */
1705 static int do_sysctl_strategy(struct ctl_table_root *root,
1706 			struct ctl_table *table,
1707 			void __user *oldval, size_t __user *oldlenp,
1708 			void __user *newval, size_t newlen)
1709 {
1710 	int op = 0, rc;
1711 
1712 	if (oldval)
1713 		op |= MAY_READ;
1714 	if (newval)
1715 		op |= MAY_WRITE;
1716 	if (sysctl_perm(root, table, op))
1717 		return -EPERM;
1718 
1719 	if (table->strategy) {
1720 		rc = table->strategy(table, oldval, oldlenp, newval, newlen);
1721 		if (rc < 0)
1722 			return rc;
1723 		if (rc > 0)
1724 			return 0;
1725 	}
1726 
1727 	/* If there is no strategy routine, or if the strategy returns
1728 	 * zero, proceed with automatic r/w */
1729 	if (table->data && table->maxlen) {
1730 		rc = sysctl_data(table, oldval, oldlenp, newval, newlen);
1731 		if (rc < 0)
1732 			return rc;
1733 	}
1734 	return 0;
1735 }
1736 
1737 static int parse_table(int __user *name, int nlen,
1738 		       void __user *oldval, size_t __user *oldlenp,
1739 		       void __user *newval, size_t newlen,
1740 		       struct ctl_table_root *root,
1741 		       struct ctl_table *table)
1742 {
1743 	int n;
1744 repeat:
1745 	if (!nlen)
1746 		return -ENOTDIR;
1747 	if (get_user(n, name))
1748 		return -EFAULT;
1749 	for ( ; table->ctl_name || table->procname; table++) {
1750 		if (!table->ctl_name)
1751 			continue;
1752 		if (n == table->ctl_name) {
1753 			int error;
1754 			if (table->child) {
1755 				if (sysctl_perm(root, table, MAY_EXEC))
1756 					return -EPERM;
1757 				name++;
1758 				nlen--;
1759 				table = table->child;
1760 				goto repeat;
1761 			}
1762 			error = do_sysctl_strategy(root, table,
1763 						   oldval, oldlenp,
1764 						   newval, newlen);
1765 			return error;
1766 		}
1767 	}
1768 	return -ENOTDIR;
1769 }
1770 
1771 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1772 	       void __user *newval, size_t newlen)
1773 {
1774 	struct ctl_table_header *head;
1775 	int error = -ENOTDIR;
1776 
1777 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1778 		return -ENOTDIR;
1779 	if (oldval) {
1780 		int old_len;
1781 		if (!oldlenp || get_user(old_len, oldlenp))
1782 			return -EFAULT;
1783 	}
1784 
1785 	for (head = sysctl_head_next(NULL); head;
1786 			head = sysctl_head_next(head)) {
1787 		error = parse_table(name, nlen, oldval, oldlenp,
1788 					newval, newlen,
1789 					head->root, head->ctl_table);
1790 		if (error != -ENOTDIR) {
1791 			sysctl_head_finish(head);
1792 			break;
1793 		}
1794 	}
1795 	return error;
1796 }
1797 
1798 SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
1799 {
1800 	struct __sysctl_args tmp;
1801 	int error;
1802 
1803 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1804 		return -EFAULT;
1805 
1806 	error = deprecated_sysctl_warning(&tmp);
1807 	if (error)
1808 		goto out;
1809 
1810 	lock_kernel();
1811 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1812 			  tmp.newval, tmp.newlen);
1813 	unlock_kernel();
1814 out:
1815 	return error;
1816 }
1817 #endif /* CONFIG_SYSCTL_SYSCALL */
1818 
1819 /*
1820  * sysctl_perm does NOT grant the superuser all rights automatically, because
1821  * some sysctl variables are readonly even to root.
1822  */
1823 
1824 static int test_perm(int mode, int op)
1825 {
1826 	if (!current_euid())
1827 		mode >>= 6;
1828 	else if (in_egroup_p(0))
1829 		mode >>= 3;
1830 	if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1831 		return 0;
1832 	return -EACCES;
1833 }
1834 
1835 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1836 {
1837 	int error;
1838 	int mode;
1839 
1840 	error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
1841 	if (error)
1842 		return error;
1843 
1844 	if (root->permissions)
1845 		mode = root->permissions(root, current->nsproxy, table);
1846 	else
1847 		mode = table->mode;
1848 
1849 	return test_perm(mode, op);
1850 }
1851 
1852 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1853 {
1854 	for (; table->ctl_name || table->procname; table++) {
1855 		table->parent = parent;
1856 		if (table->child)
1857 			sysctl_set_parent(table, table->child);
1858 	}
1859 }
1860 
1861 static __init int sysctl_init(void)
1862 {
1863 	sysctl_set_parent(NULL, root_table);
1864 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1865 	{
1866 		int err;
1867 		err = sysctl_check_table(current->nsproxy, root_table);
1868 	}
1869 #endif
1870 	return 0;
1871 }
1872 
1873 core_initcall(sysctl_init);
1874 
1875 static struct ctl_table *is_branch_in(struct ctl_table *branch,
1876 				      struct ctl_table *table)
1877 {
1878 	struct ctl_table *p;
1879 	const char *s = branch->procname;
1880 
1881 	/* branch should have named subdirectory as its first element */
1882 	if (!s || !branch->child)
1883 		return NULL;
1884 
1885 	/* ... and nothing else */
1886 	if (branch[1].procname || branch[1].ctl_name)
1887 		return NULL;
1888 
1889 	/* table should contain subdirectory with the same name */
1890 	for (p = table; p->procname || p->ctl_name; p++) {
1891 		if (!p->child)
1892 			continue;
1893 		if (p->procname && strcmp(p->procname, s) == 0)
1894 			return p;
1895 	}
1896 	return NULL;
1897 }
1898 
1899 /* see if attaching q to p would be an improvement */
1900 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1901 {
1902 	struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1903 	struct ctl_table *next;
1904 	int is_better = 0;
1905 	int not_in_parent = !p->attached_by;
1906 
1907 	while ((next = is_branch_in(by, to)) != NULL) {
1908 		if (by == q->attached_by)
1909 			is_better = 1;
1910 		if (to == p->attached_by)
1911 			not_in_parent = 1;
1912 		by = by->child;
1913 		to = next->child;
1914 	}
1915 
1916 	if (is_better && not_in_parent) {
1917 		q->attached_by = by;
1918 		q->attached_to = to;
1919 		q->parent = p;
1920 	}
1921 }
1922 
1923 /**
1924  * __register_sysctl_paths - register a sysctl hierarchy
1925  * @root: List of sysctl headers to register on
1926  * @namespaces: Data to compute which lists of sysctl entries are visible
1927  * @path: The path to the directory the sysctl table is in.
1928  * @table: the top-level table structure
1929  *
1930  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1931  * array. A completely 0 filled entry terminates the table.
1932  *
1933  * The members of the &struct ctl_table structure are used as follows:
1934  *
1935  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1936  *            must be unique within that level of sysctl
1937  *
1938  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1939  *            enter a sysctl file
1940  *
1941  * data - a pointer to data for use by proc_handler
1942  *
1943  * maxlen - the maximum size in bytes of the data
1944  *
1945  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1946  *
1947  * child - a pointer to the child sysctl table if this entry is a directory, or
1948  *         %NULL.
1949  *
1950  * proc_handler - the text handler routine (described below)
1951  *
1952  * strategy - the strategy routine (described below)
1953  *
1954  * de - for internal use by the sysctl routines
1955  *
1956  * extra1, extra2 - extra pointers usable by the proc handler routines
1957  *
1958  * Leaf nodes in the sysctl tree will be represented by a single file
1959  * under /proc; non-leaf nodes will be represented by directories.
1960  *
1961  * sysctl(2) can automatically manage read and write requests through
1962  * the sysctl table.  The data and maxlen fields of the ctl_table
1963  * struct enable minimal validation of the values being written to be
1964  * performed, and the mode field allows minimal authentication.
1965  *
1966  * More sophisticated management can be enabled by the provision of a
1967  * strategy routine with the table entry.  This will be called before
1968  * any automatic read or write of the data is performed.
1969  *
1970  * The strategy routine may return
1971  *
1972  * < 0 - Error occurred (error is passed to user process)
1973  *
1974  * 0   - OK - proceed with automatic read or write.
1975  *
1976  * > 0 - OK - read or write has been done by the strategy routine, so
1977  *       return immediately.
1978  *
1979  * There must be a proc_handler routine for any terminal nodes
1980  * mirrored under /proc/sys (non-terminals are handled by a built-in
1981  * directory handler).  Several default handlers are available to
1982  * cover common cases -
1983  *
1984  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1985  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1986  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1987  *
1988  * It is the handler's job to read the input buffer from user memory
1989  * and process it. The handler should return 0 on success.
1990  *
1991  * This routine returns %NULL on a failure to register, and a pointer
1992  * to the table header on success.
1993  */
1994 struct ctl_table_header *__register_sysctl_paths(
1995 	struct ctl_table_root *root,
1996 	struct nsproxy *namespaces,
1997 	const struct ctl_path *path, struct ctl_table *table)
1998 {
1999 	struct ctl_table_header *header;
2000 	struct ctl_table *new, **prevp;
2001 	unsigned int n, npath;
2002 	struct ctl_table_set *set;
2003 
2004 	/* Count the path components */
2005 	for (npath = 0; path[npath].ctl_name || path[npath].procname; ++npath)
2006 		;
2007 
2008 	/*
2009 	 * For each path component, allocate a 2-element ctl_table array.
2010 	 * The first array element will be filled with the sysctl entry
2011 	 * for this, the second will be the sentinel (ctl_name == 0).
2012 	 *
2013 	 * We allocate everything in one go so that we don't have to
2014 	 * worry about freeing additional memory in unregister_sysctl_table.
2015 	 */
2016 	header = kzalloc(sizeof(struct ctl_table_header) +
2017 			 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
2018 	if (!header)
2019 		return NULL;
2020 
2021 	new = (struct ctl_table *) (header + 1);
2022 
2023 	/* Now connect the dots */
2024 	prevp = &header->ctl_table;
2025 	for (n = 0; n < npath; ++n, ++path) {
2026 		/* Copy the procname */
2027 		new->procname = path->procname;
2028 		new->ctl_name = path->ctl_name;
2029 		new->mode     = 0555;
2030 
2031 		*prevp = new;
2032 		prevp = &new->child;
2033 
2034 		new += 2;
2035 	}
2036 	*prevp = table;
2037 	header->ctl_table_arg = table;
2038 
2039 	INIT_LIST_HEAD(&header->ctl_entry);
2040 	header->used = 0;
2041 	header->unregistering = NULL;
2042 	header->root = root;
2043 	sysctl_set_parent(NULL, header->ctl_table);
2044 	header->count = 1;
2045 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
2046 	if (sysctl_check_table(namespaces, header->ctl_table)) {
2047 		kfree(header);
2048 		return NULL;
2049 	}
2050 #endif
2051 	spin_lock(&sysctl_lock);
2052 	header->set = lookup_header_set(root, namespaces);
2053 	header->attached_by = header->ctl_table;
2054 	header->attached_to = root_table;
2055 	header->parent = &root_table_header;
2056 	for (set = header->set; set; set = set->parent) {
2057 		struct ctl_table_header *p;
2058 		list_for_each_entry(p, &set->list, ctl_entry) {
2059 			if (p->unregistering)
2060 				continue;
2061 			try_attach(p, header);
2062 		}
2063 	}
2064 	header->parent->count++;
2065 	list_add_tail(&header->ctl_entry, &header->set->list);
2066 	spin_unlock(&sysctl_lock);
2067 
2068 	return header;
2069 }
2070 
2071 /**
2072  * register_sysctl_table_path - register a sysctl table hierarchy
2073  * @path: The path to the directory the sysctl table is in.
2074  * @table: the top-level table structure
2075  *
2076  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
2077  * array. A completely 0 filled entry terminates the table.
2078  *
2079  * See __register_sysctl_paths for more details.
2080  */
2081 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2082 						struct ctl_table *table)
2083 {
2084 	return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
2085 					path, table);
2086 }
2087 
2088 /**
2089  * register_sysctl_table - register a sysctl table hierarchy
2090  * @table: the top-level table structure
2091  *
2092  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
2093  * array. A completely 0 filled entry terminates the table.
2094  *
2095  * See register_sysctl_paths for more details.
2096  */
2097 struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
2098 {
2099 	static const struct ctl_path null_path[] = { {} };
2100 
2101 	return register_sysctl_paths(null_path, table);
2102 }
2103 
2104 /**
2105  * unregister_sysctl_table - unregister a sysctl table hierarchy
2106  * @header: the header returned from register_sysctl_table
2107  *
2108  * Unregisters the sysctl table and all children. proc entries may not
2109  * actually be removed until they are no longer used by anyone.
2110  */
2111 void unregister_sysctl_table(struct ctl_table_header * header)
2112 {
2113 	might_sleep();
2114 
2115 	if (header == NULL)
2116 		return;
2117 
2118 	spin_lock(&sysctl_lock);
2119 	start_unregistering(header);
2120 	if (!--header->parent->count) {
2121 		WARN_ON(1);
2122 		kfree(header->parent);
2123 	}
2124 	if (!--header->count)
2125 		kfree(header);
2126 	spin_unlock(&sysctl_lock);
2127 }
2128 
2129 int sysctl_is_seen(struct ctl_table_header *p)
2130 {
2131 	struct ctl_table_set *set = p->set;
2132 	int res;
2133 	spin_lock(&sysctl_lock);
2134 	if (p->unregistering)
2135 		res = 0;
2136 	else if (!set->is_seen)
2137 		res = 1;
2138 	else
2139 		res = set->is_seen(set);
2140 	spin_unlock(&sysctl_lock);
2141 	return res;
2142 }
2143 
2144 void setup_sysctl_set(struct ctl_table_set *p,
2145 	struct ctl_table_set *parent,
2146 	int (*is_seen)(struct ctl_table_set *))
2147 {
2148 	INIT_LIST_HEAD(&p->list);
2149 	p->parent = parent ? parent : &sysctl_table_root.default_set;
2150 	p->is_seen = is_seen;
2151 }
2152 
2153 #else /* !CONFIG_SYSCTL */
2154 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
2155 {
2156 	return NULL;
2157 }
2158 
2159 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2160 						    struct ctl_table *table)
2161 {
2162 	return NULL;
2163 }
2164 
2165 void unregister_sysctl_table(struct ctl_table_header * table)
2166 {
2167 }
2168 
2169 void setup_sysctl_set(struct ctl_table_set *p,
2170 	struct ctl_table_set *parent,
2171 	int (*is_seen)(struct ctl_table_set *))
2172 {
2173 }
2174 
2175 void sysctl_head_put(struct ctl_table_header *head)
2176 {
2177 }
2178 
2179 #endif /* CONFIG_SYSCTL */
2180 
2181 /*
2182  * /proc/sys support
2183  */
2184 
2185 #ifdef CONFIG_PROC_SYSCTL
2186 
2187 static int _proc_do_string(void* data, int maxlen, int write,
2188 			   struct file *filp, void __user *buffer,
2189 			   size_t *lenp, loff_t *ppos)
2190 {
2191 	size_t len;
2192 	char __user *p;
2193 	char c;
2194 
2195 	if (!data || !maxlen || !*lenp) {
2196 		*lenp = 0;
2197 		return 0;
2198 	}
2199 
2200 	if (write) {
2201 		len = 0;
2202 		p = buffer;
2203 		while (len < *lenp) {
2204 			if (get_user(c, p++))
2205 				return -EFAULT;
2206 			if (c == 0 || c == '\n')
2207 				break;
2208 			len++;
2209 		}
2210 		if (len >= maxlen)
2211 			len = maxlen-1;
2212 		if(copy_from_user(data, buffer, len))
2213 			return -EFAULT;
2214 		((char *) data)[len] = 0;
2215 		*ppos += *lenp;
2216 	} else {
2217 		len = strlen(data);
2218 		if (len > maxlen)
2219 			len = maxlen;
2220 
2221 		if (*ppos > len) {
2222 			*lenp = 0;
2223 			return 0;
2224 		}
2225 
2226 		data += *ppos;
2227 		len  -= *ppos;
2228 
2229 		if (len > *lenp)
2230 			len = *lenp;
2231 		if (len)
2232 			if(copy_to_user(buffer, data, len))
2233 				return -EFAULT;
2234 		if (len < *lenp) {
2235 			if(put_user('\n', ((char __user *) buffer) + len))
2236 				return -EFAULT;
2237 			len++;
2238 		}
2239 		*lenp = len;
2240 		*ppos += len;
2241 	}
2242 	return 0;
2243 }
2244 
2245 /**
2246  * proc_dostring - read a string sysctl
2247  * @table: the sysctl table
2248  * @write: %TRUE if this is a write to the sysctl file
2249  * @filp: the file structure
2250  * @buffer: the user buffer
2251  * @lenp: the size of the user buffer
2252  * @ppos: file position
2253  *
2254  * Reads/writes a string from/to the user buffer. If the kernel
2255  * buffer provided is not large enough to hold the string, the
2256  * string is truncated. The copied string is %NULL-terminated.
2257  * If the string is being read by the user process, it is copied
2258  * and a newline '\n' is added. It is truncated if the buffer is
2259  * not large enough.
2260  *
2261  * Returns 0 on success.
2262  */
2263 int proc_dostring(struct ctl_table *table, int write, struct file *filp,
2264 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2265 {
2266 	return _proc_do_string(table->data, table->maxlen, write, filp,
2267 			       buffer, lenp, ppos);
2268 }
2269 
2270 
2271 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
2272 				 int *valp,
2273 				 int write, void *data)
2274 {
2275 	if (write) {
2276 		*valp = *negp ? -*lvalp : *lvalp;
2277 	} else {
2278 		int val = *valp;
2279 		if (val < 0) {
2280 			*negp = -1;
2281 			*lvalp = (unsigned long)-val;
2282 		} else {
2283 			*negp = 0;
2284 			*lvalp = (unsigned long)val;
2285 		}
2286 	}
2287 	return 0;
2288 }
2289 
2290 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2291 		  int write, struct file *filp, void __user *buffer,
2292 		  size_t *lenp, loff_t *ppos,
2293 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2294 			      int write, void *data),
2295 		  void *data)
2296 {
2297 #define TMPBUFLEN 21
2298 	int *i, vleft, first = 1, neg;
2299 	unsigned long lval;
2300 	size_t left, len;
2301 
2302 	char buf[TMPBUFLEN], *p;
2303 	char __user *s = buffer;
2304 
2305 	if (!tbl_data || !table->maxlen || !*lenp ||
2306 	    (*ppos && !write)) {
2307 		*lenp = 0;
2308 		return 0;
2309 	}
2310 
2311 	i = (int *) tbl_data;
2312 	vleft = table->maxlen / sizeof(*i);
2313 	left = *lenp;
2314 
2315 	if (!conv)
2316 		conv = do_proc_dointvec_conv;
2317 
2318 	for (; left && vleft--; i++, first=0) {
2319 		if (write) {
2320 			while (left) {
2321 				char c;
2322 				if (get_user(c, s))
2323 					return -EFAULT;
2324 				if (!isspace(c))
2325 					break;
2326 				left--;
2327 				s++;
2328 			}
2329 			if (!left)
2330 				break;
2331 			neg = 0;
2332 			len = left;
2333 			if (len > sizeof(buf) - 1)
2334 				len = sizeof(buf) - 1;
2335 			if (copy_from_user(buf, s, len))
2336 				return -EFAULT;
2337 			buf[len] = 0;
2338 			p = buf;
2339 			if (*p == '-' && left > 1) {
2340 				neg = 1;
2341 				p++;
2342 			}
2343 			if (*p < '0' || *p > '9')
2344 				break;
2345 
2346 			lval = simple_strtoul(p, &p, 0);
2347 
2348 			len = p-buf;
2349 			if ((len < left) && *p && !isspace(*p))
2350 				break;
2351 			s += len;
2352 			left -= len;
2353 
2354 			if (conv(&neg, &lval, i, 1, data))
2355 				break;
2356 		} else {
2357 			p = buf;
2358 			if (!first)
2359 				*p++ = '\t';
2360 
2361 			if (conv(&neg, &lval, i, 0, data))
2362 				break;
2363 
2364 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
2365 			len = strlen(buf);
2366 			if (len > left)
2367 				len = left;
2368 			if(copy_to_user(s, buf, len))
2369 				return -EFAULT;
2370 			left -= len;
2371 			s += len;
2372 		}
2373 	}
2374 
2375 	if (!write && !first && left) {
2376 		if(put_user('\n', s))
2377 			return -EFAULT;
2378 		left--, s++;
2379 	}
2380 	if (write) {
2381 		while (left) {
2382 			char c;
2383 			if (get_user(c, s++))
2384 				return -EFAULT;
2385 			if (!isspace(c))
2386 				break;
2387 			left--;
2388 		}
2389 	}
2390 	if (write && first)
2391 		return -EINVAL;
2392 	*lenp -= left;
2393 	*ppos += *lenp;
2394 	return 0;
2395 #undef TMPBUFLEN
2396 }
2397 
2398 static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2399 		  void __user *buffer, size_t *lenp, loff_t *ppos,
2400 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
2401 			      int write, void *data),
2402 		  void *data)
2403 {
2404 	return __do_proc_dointvec(table->data, table, write, filp,
2405 			buffer, lenp, ppos, conv, data);
2406 }
2407 
2408 /**
2409  * proc_dointvec - read a vector of integers
2410  * @table: the sysctl table
2411  * @write: %TRUE if this is a write to the sysctl file
2412  * @filp: the file structure
2413  * @buffer: the user buffer
2414  * @lenp: the size of the user buffer
2415  * @ppos: file position
2416  *
2417  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2418  * values from/to the user buffer, treated as an ASCII string.
2419  *
2420  * Returns 0 on success.
2421  */
2422 int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2423 		     void __user *buffer, size_t *lenp, loff_t *ppos)
2424 {
2425     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2426 		    	    NULL,NULL);
2427 }
2428 
2429 /*
2430  * Taint values can only be increased
2431  * This means we can safely use a temporary.
2432  */
2433 static int proc_taint(struct ctl_table *table, int write, struct file *filp,
2434 			       void __user *buffer, size_t *lenp, loff_t *ppos)
2435 {
2436 	struct ctl_table t;
2437 	unsigned long tmptaint = get_taint();
2438 	int err;
2439 
2440 	if (write && !capable(CAP_SYS_ADMIN))
2441 		return -EPERM;
2442 
2443 	t = *table;
2444 	t.data = &tmptaint;
2445 	err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
2446 	if (err < 0)
2447 		return err;
2448 
2449 	if (write) {
2450 		/*
2451 		 * Poor man's atomic or. Not worth adding a primitive
2452 		 * to everyone's atomic.h for this
2453 		 */
2454 		int i;
2455 		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2456 			if ((tmptaint >> i) & 1)
2457 				add_taint(i);
2458 		}
2459 	}
2460 
2461 	return err;
2462 }
2463 
2464 struct do_proc_dointvec_minmax_conv_param {
2465 	int *min;
2466 	int *max;
2467 };
2468 
2469 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
2470 					int *valp,
2471 					int write, void *data)
2472 {
2473 	struct do_proc_dointvec_minmax_conv_param *param = data;
2474 	if (write) {
2475 		int val = *negp ? -*lvalp : *lvalp;
2476 		if ((param->min && *param->min > val) ||
2477 		    (param->max && *param->max < val))
2478 			return -EINVAL;
2479 		*valp = val;
2480 	} else {
2481 		int val = *valp;
2482 		if (val < 0) {
2483 			*negp = -1;
2484 			*lvalp = (unsigned long)-val;
2485 		} else {
2486 			*negp = 0;
2487 			*lvalp = (unsigned long)val;
2488 		}
2489 	}
2490 	return 0;
2491 }
2492 
2493 /**
2494  * proc_dointvec_minmax - read a vector of integers with min/max values
2495  * @table: the sysctl table
2496  * @write: %TRUE if this is a write to the sysctl file
2497  * @filp: the file structure
2498  * @buffer: the user buffer
2499  * @lenp: the size of the user buffer
2500  * @ppos: file position
2501  *
2502  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2503  * values from/to the user buffer, treated as an ASCII string.
2504  *
2505  * This routine will ensure the values are within the range specified by
2506  * table->extra1 (min) and table->extra2 (max).
2507  *
2508  * Returns 0 on success.
2509  */
2510 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
2511 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2512 {
2513 	struct do_proc_dointvec_minmax_conv_param param = {
2514 		.min = (int *) table->extra1,
2515 		.max = (int *) table->extra2,
2516 	};
2517 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2518 				do_proc_dointvec_minmax_conv, &param);
2519 }
2520 
2521 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2522 				     struct file *filp,
2523 				     void __user *buffer,
2524 				     size_t *lenp, loff_t *ppos,
2525 				     unsigned long convmul,
2526 				     unsigned long convdiv)
2527 {
2528 #define TMPBUFLEN 21
2529 	unsigned long *i, *min, *max, val;
2530 	int vleft, first=1, neg;
2531 	size_t len, left;
2532 	char buf[TMPBUFLEN], *p;
2533 	char __user *s = buffer;
2534 
2535 	if (!data || !table->maxlen || !*lenp ||
2536 	    (*ppos && !write)) {
2537 		*lenp = 0;
2538 		return 0;
2539 	}
2540 
2541 	i = (unsigned long *) data;
2542 	min = (unsigned long *) table->extra1;
2543 	max = (unsigned long *) table->extra2;
2544 	vleft = table->maxlen / sizeof(unsigned long);
2545 	left = *lenp;
2546 
2547 	for (; left && vleft--; i++, min++, max++, first=0) {
2548 		if (write) {
2549 			while (left) {
2550 				char c;
2551 				if (get_user(c, s))
2552 					return -EFAULT;
2553 				if (!isspace(c))
2554 					break;
2555 				left--;
2556 				s++;
2557 			}
2558 			if (!left)
2559 				break;
2560 			neg = 0;
2561 			len = left;
2562 			if (len > TMPBUFLEN-1)
2563 				len = TMPBUFLEN-1;
2564 			if (copy_from_user(buf, s, len))
2565 				return -EFAULT;
2566 			buf[len] = 0;
2567 			p = buf;
2568 			if (*p == '-' && left > 1) {
2569 				neg = 1;
2570 				p++;
2571 			}
2572 			if (*p < '0' || *p > '9')
2573 				break;
2574 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2575 			len = p-buf;
2576 			if ((len < left) && *p && !isspace(*p))
2577 				break;
2578 			if (neg)
2579 				val = -val;
2580 			s += len;
2581 			left -= len;
2582 
2583 			if(neg)
2584 				continue;
2585 			if ((min && val < *min) || (max && val > *max))
2586 				continue;
2587 			*i = val;
2588 		} else {
2589 			p = buf;
2590 			if (!first)
2591 				*p++ = '\t';
2592 			sprintf(p, "%lu", convdiv * (*i) / convmul);
2593 			len = strlen(buf);
2594 			if (len > left)
2595 				len = left;
2596 			if(copy_to_user(s, buf, len))
2597 				return -EFAULT;
2598 			left -= len;
2599 			s += len;
2600 		}
2601 	}
2602 
2603 	if (!write && !first && left) {
2604 		if(put_user('\n', s))
2605 			return -EFAULT;
2606 		left--, s++;
2607 	}
2608 	if (write) {
2609 		while (left) {
2610 			char c;
2611 			if (get_user(c, s++))
2612 				return -EFAULT;
2613 			if (!isspace(c))
2614 				break;
2615 			left--;
2616 		}
2617 	}
2618 	if (write && first)
2619 		return -EINVAL;
2620 	*lenp -= left;
2621 	*ppos += *lenp;
2622 	return 0;
2623 #undef TMPBUFLEN
2624 }
2625 
2626 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2627 				     struct file *filp,
2628 				     void __user *buffer,
2629 				     size_t *lenp, loff_t *ppos,
2630 				     unsigned long convmul,
2631 				     unsigned long convdiv)
2632 {
2633 	return __do_proc_doulongvec_minmax(table->data, table, write,
2634 			filp, buffer, lenp, ppos, convmul, convdiv);
2635 }
2636 
2637 /**
2638  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2639  * @table: the sysctl table
2640  * @write: %TRUE if this is a write to the sysctl file
2641  * @filp: the file structure
2642  * @buffer: the user buffer
2643  * @lenp: the size of the user buffer
2644  * @ppos: file position
2645  *
2646  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2647  * values from/to the user buffer, treated as an ASCII string.
2648  *
2649  * This routine will ensure the values are within the range specified by
2650  * table->extra1 (min) and table->extra2 (max).
2651  *
2652  * Returns 0 on success.
2653  */
2654 int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
2655 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2656 {
2657     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2658 }
2659 
2660 /**
2661  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2662  * @table: the sysctl table
2663  * @write: %TRUE if this is a write to the sysctl file
2664  * @filp: the file structure
2665  * @buffer: the user buffer
2666  * @lenp: the size of the user buffer
2667  * @ppos: file position
2668  *
2669  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2670  * values from/to the user buffer, treated as an ASCII string. The values
2671  * are treated as milliseconds, and converted to jiffies when they are stored.
2672  *
2673  * This routine will ensure the values are within the range specified by
2674  * table->extra1 (min) and table->extra2 (max).
2675  *
2676  * Returns 0 on success.
2677  */
2678 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2679 				      struct file *filp,
2680 				      void __user *buffer,
2681 				      size_t *lenp, loff_t *ppos)
2682 {
2683     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2684 				     lenp, ppos, HZ, 1000l);
2685 }
2686 
2687 
2688 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2689 					 int *valp,
2690 					 int write, void *data)
2691 {
2692 	if (write) {
2693 		if (*lvalp > LONG_MAX / HZ)
2694 			return 1;
2695 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2696 	} else {
2697 		int val = *valp;
2698 		unsigned long lval;
2699 		if (val < 0) {
2700 			*negp = -1;
2701 			lval = (unsigned long)-val;
2702 		} else {
2703 			*negp = 0;
2704 			lval = (unsigned long)val;
2705 		}
2706 		*lvalp = lval / HZ;
2707 	}
2708 	return 0;
2709 }
2710 
2711 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2712 						int *valp,
2713 						int write, void *data)
2714 {
2715 	if (write) {
2716 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2717 			return 1;
2718 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2719 	} else {
2720 		int val = *valp;
2721 		unsigned long lval;
2722 		if (val < 0) {
2723 			*negp = -1;
2724 			lval = (unsigned long)-val;
2725 		} else {
2726 			*negp = 0;
2727 			lval = (unsigned long)val;
2728 		}
2729 		*lvalp = jiffies_to_clock_t(lval);
2730 	}
2731 	return 0;
2732 }
2733 
2734 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2735 					    int *valp,
2736 					    int write, void *data)
2737 {
2738 	if (write) {
2739 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2740 	} else {
2741 		int val = *valp;
2742 		unsigned long lval;
2743 		if (val < 0) {
2744 			*negp = -1;
2745 			lval = (unsigned long)-val;
2746 		} else {
2747 			*negp = 0;
2748 			lval = (unsigned long)val;
2749 		}
2750 		*lvalp = jiffies_to_msecs(lval);
2751 	}
2752 	return 0;
2753 }
2754 
2755 /**
2756  * proc_dointvec_jiffies - read a vector of integers as seconds
2757  * @table: the sysctl table
2758  * @write: %TRUE if this is a write to the sysctl file
2759  * @filp: the file structure
2760  * @buffer: the user buffer
2761  * @lenp: the size of the user buffer
2762  * @ppos: file position
2763  *
2764  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2765  * values from/to the user buffer, treated as an ASCII string.
2766  * The values read are assumed to be in seconds, and are converted into
2767  * jiffies.
2768  *
2769  * Returns 0 on success.
2770  */
2771 int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2772 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2773 {
2774     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2775 		    	    do_proc_dointvec_jiffies_conv,NULL);
2776 }
2777 
2778 /**
2779  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2780  * @table: the sysctl table
2781  * @write: %TRUE if this is a write to the sysctl file
2782  * @filp: the file structure
2783  * @buffer: the user buffer
2784  * @lenp: the size of the user buffer
2785  * @ppos: pointer to the file position
2786  *
2787  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2788  * values from/to the user buffer, treated as an ASCII string.
2789  * The values read are assumed to be in 1/USER_HZ seconds, and
2790  * are converted into jiffies.
2791  *
2792  * Returns 0 on success.
2793  */
2794 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
2795 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2796 {
2797     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2798 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2799 }
2800 
2801 /**
2802  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2803  * @table: the sysctl table
2804  * @write: %TRUE if this is a write to the sysctl file
2805  * @filp: the file structure
2806  * @buffer: the user buffer
2807  * @lenp: the size of the user buffer
2808  * @ppos: file position
2809  * @ppos: the current position in the file
2810  *
2811  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2812  * values from/to the user buffer, treated as an ASCII string.
2813  * The values read are assumed to be in 1/1000 seconds, and
2814  * are converted into jiffies.
2815  *
2816  * Returns 0 on success.
2817  */
2818 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
2819 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2820 {
2821 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2822 				do_proc_dointvec_ms_jiffies_conv, NULL);
2823 }
2824 
2825 static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
2826 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2827 {
2828 	struct pid *new_pid;
2829 	pid_t tmp;
2830 	int r;
2831 
2832 	tmp = pid_vnr(cad_pid);
2833 
2834 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2835 			       lenp, ppos, NULL, NULL);
2836 	if (r || !write)
2837 		return r;
2838 
2839 	new_pid = find_get_pid(tmp);
2840 	if (!new_pid)
2841 		return -ESRCH;
2842 
2843 	put_pid(xchg(&cad_pid, new_pid));
2844 	return 0;
2845 }
2846 
2847 #else /* CONFIG_PROC_FS */
2848 
2849 int proc_dostring(struct ctl_table *table, int write, struct file *filp,
2850 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2851 {
2852 	return -ENOSYS;
2853 }
2854 
2855 int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
2856 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2857 {
2858 	return -ENOSYS;
2859 }
2860 
2861 int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
2862 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2863 {
2864 	return -ENOSYS;
2865 }
2866 
2867 int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
2868 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2869 {
2870 	return -ENOSYS;
2871 }
2872 
2873 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
2874 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2875 {
2876 	return -ENOSYS;
2877 }
2878 
2879 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
2880 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2881 {
2882 	return -ENOSYS;
2883 }
2884 
2885 int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
2886 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2887 {
2888 	return -ENOSYS;
2889 }
2890 
2891 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2892 				      struct file *filp,
2893 				      void __user *buffer,
2894 				      size_t *lenp, loff_t *ppos)
2895 {
2896     return -ENOSYS;
2897 }
2898 
2899 
2900 #endif /* CONFIG_PROC_FS */
2901 
2902 
2903 #ifdef CONFIG_SYSCTL_SYSCALL
2904 /*
2905  * General sysctl support routines
2906  */
2907 
2908 /* The generic sysctl data routine (used if no strategy routine supplied) */
2909 int sysctl_data(struct ctl_table *table,
2910 		void __user *oldval, size_t __user *oldlenp,
2911 		void __user *newval, size_t newlen)
2912 {
2913 	size_t len;
2914 
2915 	/* Get out of I don't have a variable */
2916 	if (!table->data || !table->maxlen)
2917 		return -ENOTDIR;
2918 
2919 	if (oldval && oldlenp) {
2920 		if (get_user(len, oldlenp))
2921 			return -EFAULT;
2922 		if (len) {
2923 			if (len > table->maxlen)
2924 				len = table->maxlen;
2925 			if (copy_to_user(oldval, table->data, len))
2926 				return -EFAULT;
2927 			if (put_user(len, oldlenp))
2928 				return -EFAULT;
2929 		}
2930 	}
2931 
2932 	if (newval && newlen) {
2933 		if (newlen > table->maxlen)
2934 			newlen = table->maxlen;
2935 
2936 		if (copy_from_user(table->data, newval, newlen))
2937 			return -EFAULT;
2938 	}
2939 	return 1;
2940 }
2941 
2942 /* The generic string strategy routine: */
2943 int sysctl_string(struct ctl_table *table,
2944 		  void __user *oldval, size_t __user *oldlenp,
2945 		  void __user *newval, size_t newlen)
2946 {
2947 	if (!table->data || !table->maxlen)
2948 		return -ENOTDIR;
2949 
2950 	if (oldval && oldlenp) {
2951 		size_t bufsize;
2952 		if (get_user(bufsize, oldlenp))
2953 			return -EFAULT;
2954 		if (bufsize) {
2955 			size_t len = strlen(table->data), copied;
2956 
2957 			/* This shouldn't trigger for a well-formed sysctl */
2958 			if (len > table->maxlen)
2959 				len = table->maxlen;
2960 
2961 			/* Copy up to a max of bufsize-1 bytes of the string */
2962 			copied = (len >= bufsize) ? bufsize - 1 : len;
2963 
2964 			if (copy_to_user(oldval, table->data, copied) ||
2965 			    put_user(0, (char __user *)(oldval + copied)))
2966 				return -EFAULT;
2967 			if (put_user(len, oldlenp))
2968 				return -EFAULT;
2969 		}
2970 	}
2971 	if (newval && newlen) {
2972 		size_t len = newlen;
2973 		if (len > table->maxlen)
2974 			len = table->maxlen;
2975 		if(copy_from_user(table->data, newval, len))
2976 			return -EFAULT;
2977 		if (len == table->maxlen)
2978 			len--;
2979 		((char *) table->data)[len] = 0;
2980 	}
2981 	return 1;
2982 }
2983 
2984 /*
2985  * This function makes sure that all of the integers in the vector
2986  * are between the minimum and maximum values given in the arrays
2987  * table->extra1 and table->extra2, respectively.
2988  */
2989 int sysctl_intvec(struct ctl_table *table,
2990 		void __user *oldval, size_t __user *oldlenp,
2991 		void __user *newval, size_t newlen)
2992 {
2993 
2994 	if (newval && newlen) {
2995 		int __user *vec = (int __user *) newval;
2996 		int *min = (int *) table->extra1;
2997 		int *max = (int *) table->extra2;
2998 		size_t length;
2999 		int i;
3000 
3001 		if (newlen % sizeof(int) != 0)
3002 			return -EINVAL;
3003 
3004 		if (!table->extra1 && !table->extra2)
3005 			return 0;
3006 
3007 		if (newlen > table->maxlen)
3008 			newlen = table->maxlen;
3009 		length = newlen / sizeof(int);
3010 
3011 		for (i = 0; i < length; i++) {
3012 			int value;
3013 			if (get_user(value, vec + i))
3014 				return -EFAULT;
3015 			if (min && value < min[i])
3016 				return -EINVAL;
3017 			if (max && value > max[i])
3018 				return -EINVAL;
3019 		}
3020 	}
3021 	return 0;
3022 }
3023 
3024 /* Strategy function to convert jiffies to seconds */
3025 int sysctl_jiffies(struct ctl_table *table,
3026 		void __user *oldval, size_t __user *oldlenp,
3027 		void __user *newval, size_t newlen)
3028 {
3029 	if (oldval && oldlenp) {
3030 		size_t olen;
3031 
3032 		if (get_user(olen, oldlenp))
3033 			return -EFAULT;
3034 		if (olen) {
3035 			int val;
3036 
3037 			if (olen < sizeof(int))
3038 				return -EINVAL;
3039 
3040 			val = *(int *)(table->data) / HZ;
3041 			if (put_user(val, (int __user *)oldval))
3042 				return -EFAULT;
3043 			if (put_user(sizeof(int), oldlenp))
3044 				return -EFAULT;
3045 		}
3046 	}
3047 	if (newval && newlen) {
3048 		int new;
3049 		if (newlen != sizeof(int))
3050 			return -EINVAL;
3051 		if (get_user(new, (int __user *)newval))
3052 			return -EFAULT;
3053 		*(int *)(table->data) = new*HZ;
3054 	}
3055 	return 1;
3056 }
3057 
3058 /* Strategy function to convert jiffies to seconds */
3059 int sysctl_ms_jiffies(struct ctl_table *table,
3060 		void __user *oldval, size_t __user *oldlenp,
3061 		void __user *newval, size_t newlen)
3062 {
3063 	if (oldval && oldlenp) {
3064 		size_t olen;
3065 
3066 		if (get_user(olen, oldlenp))
3067 			return -EFAULT;
3068 		if (olen) {
3069 			int val;
3070 
3071 			if (olen < sizeof(int))
3072 				return -EINVAL;
3073 
3074 			val = jiffies_to_msecs(*(int *)(table->data));
3075 			if (put_user(val, (int __user *)oldval))
3076 				return -EFAULT;
3077 			if (put_user(sizeof(int), oldlenp))
3078 				return -EFAULT;
3079 		}
3080 	}
3081 	if (newval && newlen) {
3082 		int new;
3083 		if (newlen != sizeof(int))
3084 			return -EINVAL;
3085 		if (get_user(new, (int __user *)newval))
3086 			return -EFAULT;
3087 		*(int *)(table->data) = msecs_to_jiffies(new);
3088 	}
3089 	return 1;
3090 }
3091 
3092 
3093 
3094 #else /* CONFIG_SYSCTL_SYSCALL */
3095 
3096 
3097 SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
3098 {
3099 	struct __sysctl_args tmp;
3100 	int error;
3101 
3102 	if (copy_from_user(&tmp, args, sizeof(tmp)))
3103 		return -EFAULT;
3104 
3105 	error = deprecated_sysctl_warning(&tmp);
3106 
3107 	/* If no error reading the parameters then just -ENOSYS ... */
3108 	if (!error)
3109 		error = -ENOSYS;
3110 
3111 	return error;
3112 }
3113 
3114 int sysctl_data(struct ctl_table *table,
3115 		  void __user *oldval, size_t __user *oldlenp,
3116 		  void __user *newval, size_t newlen)
3117 {
3118 	return -ENOSYS;
3119 }
3120 
3121 int sysctl_string(struct ctl_table *table,
3122 		  void __user *oldval, size_t __user *oldlenp,
3123 		  void __user *newval, size_t newlen)
3124 {
3125 	return -ENOSYS;
3126 }
3127 
3128 int sysctl_intvec(struct ctl_table *table,
3129 		void __user *oldval, size_t __user *oldlenp,
3130 		void __user *newval, size_t newlen)
3131 {
3132 	return -ENOSYS;
3133 }
3134 
3135 int sysctl_jiffies(struct ctl_table *table,
3136 		void __user *oldval, size_t __user *oldlenp,
3137 		void __user *newval, size_t newlen)
3138 {
3139 	return -ENOSYS;
3140 }
3141 
3142 int sysctl_ms_jiffies(struct ctl_table *table,
3143 		void __user *oldval, size_t __user *oldlenp,
3144 		void __user *newval, size_t newlen)
3145 {
3146 	return -ENOSYS;
3147 }
3148 
3149 #endif /* CONFIG_SYSCTL_SYSCALL */
3150 
3151 static int deprecated_sysctl_warning(struct __sysctl_args *args)
3152 {
3153 	static int msg_count;
3154 	int name[CTL_MAXNAME];
3155 	int i;
3156 
3157 	/* Check args->nlen. */
3158 	if (args->nlen < 0 || args->nlen > CTL_MAXNAME)
3159 		return -ENOTDIR;
3160 
3161 	/* Read in the sysctl name for better debug message logging */
3162 	for (i = 0; i < args->nlen; i++)
3163 		if (get_user(name[i], args->name + i))
3164 			return -EFAULT;
3165 
3166 	/* Ignore accesses to kernel.version */
3167 	if ((args->nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
3168 		return 0;
3169 
3170 	if (msg_count < 5) {
3171 		msg_count++;
3172 		printk(KERN_INFO
3173 			"warning: process `%s' used the deprecated sysctl "
3174 			"system call with ", current->comm);
3175 		for (i = 0; i < args->nlen; i++)
3176 			printk("%d.", name[i]);
3177 		printk("\n");
3178 	}
3179 	return 0;
3180 }
3181 
3182 /*
3183  * No sense putting this after each symbol definition, twice,
3184  * exception granted :-)
3185  */
3186 EXPORT_SYMBOL(proc_dointvec);
3187 EXPORT_SYMBOL(proc_dointvec_jiffies);
3188 EXPORT_SYMBOL(proc_dointvec_minmax);
3189 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3190 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3191 EXPORT_SYMBOL(proc_dostring);
3192 EXPORT_SYMBOL(proc_doulongvec_minmax);
3193 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3194 EXPORT_SYMBOL(register_sysctl_table);
3195 EXPORT_SYMBOL(register_sysctl_paths);
3196 EXPORT_SYMBOL(sysctl_intvec);
3197 EXPORT_SYMBOL(sysctl_jiffies);
3198 EXPORT_SYMBOL(sysctl_ms_jiffies);
3199 EXPORT_SYMBOL(sysctl_string);
3200 EXPORT_SYMBOL(sysctl_data);
3201 EXPORT_SYMBOL(unregister_sysctl_table);
3202