xref: /linux/kernel/sysctl.c (revision 04c71976500352d02f60616d2b960267d8c5fe24)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/capability.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/smp_lock.h>
31 #include <linux/fs.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
48 #include <linux/reboot.h>
49 
50 #include <asm/uaccess.h>
51 #include <asm/processor.h>
52 
53 #ifdef CONFIG_X86
54 #include <asm/nmi.h>
55 #include <asm/stacktrace.h>
56 #endif
57 
58 #if defined(CONFIG_SYSCTL)
59 
60 /* External variables not in a header file. */
61 extern int C_A_D;
62 extern int print_fatal_signals;
63 extern int sysctl_overcommit_memory;
64 extern int sysctl_overcommit_ratio;
65 extern int sysctl_panic_on_oom;
66 extern int sysctl_oom_kill_allocating_task;
67 extern int max_threads;
68 extern int core_uses_pid;
69 extern int suid_dumpable;
70 extern char core_pattern[];
71 extern int pid_max;
72 extern int min_free_kbytes;
73 extern int printk_ratelimit_jiffies;
74 extern int printk_ratelimit_burst;
75 extern int pid_max_min, pid_max_max;
76 extern int sysctl_drop_caches;
77 extern int percpu_pagelist_fraction;
78 extern int compat_log;
79 extern int maps_protect;
80 extern int sysctl_stat_interval;
81 extern int audit_argv_kb;
82 
83 /* Constants used for minimum and  maximum */
84 #ifdef CONFIG_DETECT_SOFTLOCKUP
85 static int one = 1;
86 static int sixty = 60;
87 #endif
88 
89 #ifdef CONFIG_MMU
90 static int two = 2;
91 #endif
92 
93 static int zero;
94 static int one_hundred = 100;
95 
96 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
97 static int maxolduid = 65535;
98 static int minolduid;
99 static int min_percpu_pagelist_fract = 8;
100 
101 static int ngroups_max = NGROUPS_MAX;
102 
103 #ifdef CONFIG_KMOD
104 extern char modprobe_path[];
105 #endif
106 #ifdef CONFIG_CHR_DEV_SG
107 extern int sg_big_buff;
108 #endif
109 
110 #ifdef __sparc__
111 extern char reboot_command [];
112 extern int stop_a_enabled;
113 extern int scons_pwroff;
114 #endif
115 
116 #ifdef __hppa__
117 extern int pwrsw_enabled;
118 extern int unaligned_enabled;
119 #endif
120 
121 #ifdef CONFIG_S390
122 #ifdef CONFIG_MATHEMU
123 extern int sysctl_ieee_emulation_warnings;
124 #endif
125 extern int sysctl_userprocess_debug;
126 extern int spin_retry;
127 #endif
128 
129 extern int sysctl_hz_timer;
130 
131 #ifdef CONFIG_BSD_PROCESS_ACCT
132 extern int acct_parm[];
133 #endif
134 
135 #ifdef CONFIG_IA64
136 extern int no_unaligned_warning;
137 #endif
138 
139 #ifdef CONFIG_RT_MUTEXES
140 extern int max_lock_depth;
141 #endif
142 
143 #ifdef CONFIG_SYSCTL_SYSCALL
144 static int parse_table(int __user *, int, void __user *, size_t __user *,
145 		void __user *, size_t, ctl_table *);
146 #endif
147 
148 
149 #ifdef CONFIG_PROC_SYSCTL
150 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
151 		  void __user *buffer, size_t *lenp, loff_t *ppos);
152 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
153 			       void __user *buffer, size_t *lenp, loff_t *ppos);
154 #endif
155 
156 static ctl_table root_table[];
157 static struct ctl_table_header root_table_header =
158 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
159 
160 static ctl_table kern_table[];
161 static ctl_table vm_table[];
162 static ctl_table fs_table[];
163 static ctl_table debug_table[];
164 static ctl_table dev_table[];
165 extern ctl_table random_table[];
166 #ifdef CONFIG_UNIX98_PTYS
167 extern ctl_table pty_table[];
168 #endif
169 #ifdef CONFIG_INOTIFY_USER
170 extern ctl_table inotify_table[];
171 #endif
172 
173 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
174 int sysctl_legacy_va_layout;
175 #endif
176 
177 extern int prove_locking;
178 extern int lock_stat;
179 
180 /* The default sysctl tables: */
181 
182 static ctl_table root_table[] = {
183 	{
184 		.ctl_name	= CTL_KERN,
185 		.procname	= "kernel",
186 		.mode		= 0555,
187 		.child		= kern_table,
188 	},
189 	{
190 		.ctl_name	= CTL_VM,
191 		.procname	= "vm",
192 		.mode		= 0555,
193 		.child		= vm_table,
194 	},
195 #ifdef CONFIG_NET
196 	{
197 		.ctl_name	= CTL_NET,
198 		.procname	= "net",
199 		.mode		= 0555,
200 		.child		= net_table,
201 	},
202 #endif
203 	{
204 		.ctl_name	= CTL_FS,
205 		.procname	= "fs",
206 		.mode		= 0555,
207 		.child		= fs_table,
208 	},
209 	{
210 		.ctl_name	= CTL_DEBUG,
211 		.procname	= "debug",
212 		.mode		= 0555,
213 		.child		= debug_table,
214 	},
215 	{
216 		.ctl_name	= CTL_DEV,
217 		.procname	= "dev",
218 		.mode		= 0555,
219 		.child		= dev_table,
220 	},
221 /*
222  * NOTE: do not add new entries to this table unless you have read
223  * Documentation/sysctl/ctl_unnumbered.txt
224  */
225 	{ .ctl_name = 0 }
226 };
227 
228 #ifdef CONFIG_SCHED_DEBUG
229 static unsigned long min_sched_granularity_ns = 100000;		/* 100 usecs */
230 static unsigned long max_sched_granularity_ns = 1000000000;	/* 1 second */
231 static unsigned long min_wakeup_granularity_ns;			/* 0 usecs */
232 static unsigned long max_wakeup_granularity_ns = 1000000000;	/* 1 second */
233 #endif
234 
235 static ctl_table kern_table[] = {
236 #ifdef CONFIG_SCHED_DEBUG
237 	{
238 		.ctl_name	= CTL_UNNUMBERED,
239 		.procname	= "sched_nr_latency",
240 		.data		= &sysctl_sched_nr_latency,
241 		.maxlen		= sizeof(unsigned int),
242 		.mode		= 0644,
243 		.proc_handler	= &proc_dointvec,
244 	},
245 	{
246 		.ctl_name	= CTL_UNNUMBERED,
247 		.procname	= "sched_latency_ns",
248 		.data		= &sysctl_sched_latency,
249 		.maxlen		= sizeof(unsigned int),
250 		.mode		= 0644,
251 		.proc_handler	= &proc_dointvec_minmax,
252 		.strategy	= &sysctl_intvec,
253 		.extra1		= &min_sched_granularity_ns,
254 		.extra2		= &max_sched_granularity_ns,
255 	},
256 	{
257 		.ctl_name	= CTL_UNNUMBERED,
258 		.procname	= "sched_wakeup_granularity_ns",
259 		.data		= &sysctl_sched_wakeup_granularity,
260 		.maxlen		= sizeof(unsigned int),
261 		.mode		= 0644,
262 		.proc_handler	= &proc_dointvec_minmax,
263 		.strategy	= &sysctl_intvec,
264 		.extra1		= &min_wakeup_granularity_ns,
265 		.extra2		= &max_wakeup_granularity_ns,
266 	},
267 	{
268 		.ctl_name	= CTL_UNNUMBERED,
269 		.procname	= "sched_batch_wakeup_granularity_ns",
270 		.data		= &sysctl_sched_batch_wakeup_granularity,
271 		.maxlen		= sizeof(unsigned int),
272 		.mode		= 0644,
273 		.proc_handler	= &proc_dointvec_minmax,
274 		.strategy	= &sysctl_intvec,
275 		.extra1		= &min_wakeup_granularity_ns,
276 		.extra2		= &max_wakeup_granularity_ns,
277 	},
278 	{
279 		.ctl_name	= CTL_UNNUMBERED,
280 		.procname	= "sched_child_runs_first",
281 		.data		= &sysctl_sched_child_runs_first,
282 		.maxlen		= sizeof(unsigned int),
283 		.mode		= 0644,
284 		.proc_handler	= &proc_dointvec,
285 	},
286 	{
287 		.ctl_name	= CTL_UNNUMBERED,
288 		.procname	= "sched_features",
289 		.data		= &sysctl_sched_features,
290 		.maxlen		= sizeof(unsigned int),
291 		.mode		= 0644,
292 		.proc_handler	= &proc_dointvec,
293 	},
294 	{
295 		.ctl_name	= CTL_UNNUMBERED,
296 		.procname	= "sched_migration_cost",
297 		.data		= &sysctl_sched_migration_cost,
298 		.maxlen		= sizeof(unsigned int),
299 		.mode		= 0644,
300 		.proc_handler	= &proc_dointvec,
301 	},
302 #endif
303 	{
304 		.ctl_name	= CTL_UNNUMBERED,
305 		.procname	= "sched_compat_yield",
306 		.data		= &sysctl_sched_compat_yield,
307 		.maxlen		= sizeof(unsigned int),
308 		.mode		= 0644,
309 		.proc_handler	= &proc_dointvec,
310 	},
311 #ifdef CONFIG_PROVE_LOCKING
312 	{
313 		.ctl_name	= CTL_UNNUMBERED,
314 		.procname	= "prove_locking",
315 		.data		= &prove_locking,
316 		.maxlen		= sizeof(int),
317 		.mode		= 0644,
318 		.proc_handler	= &proc_dointvec,
319 	},
320 #endif
321 #ifdef CONFIG_LOCK_STAT
322 	{
323 		.ctl_name	= CTL_UNNUMBERED,
324 		.procname	= "lock_stat",
325 		.data		= &lock_stat,
326 		.maxlen		= sizeof(int),
327 		.mode		= 0644,
328 		.proc_handler	= &proc_dointvec,
329 	},
330 #endif
331 	{
332 		.ctl_name	= KERN_PANIC,
333 		.procname	= "panic",
334 		.data		= &panic_timeout,
335 		.maxlen		= sizeof(int),
336 		.mode		= 0644,
337 		.proc_handler	= &proc_dointvec,
338 	},
339 	{
340 		.ctl_name	= KERN_CORE_USES_PID,
341 		.procname	= "core_uses_pid",
342 		.data		= &core_uses_pid,
343 		.maxlen		= sizeof(int),
344 		.mode		= 0644,
345 		.proc_handler	= &proc_dointvec,
346 	},
347 #ifdef CONFIG_AUDITSYSCALL
348 	{
349 		.ctl_name	= CTL_UNNUMBERED,
350 		.procname	= "audit_argv_kb",
351 		.data		= &audit_argv_kb,
352 		.maxlen		= sizeof(int),
353 		.mode		= 0644,
354 		.proc_handler	= &proc_dointvec,
355 	},
356 #endif
357 	{
358 		.ctl_name	= KERN_CORE_PATTERN,
359 		.procname	= "core_pattern",
360 		.data		= core_pattern,
361 		.maxlen		= CORENAME_MAX_SIZE,
362 		.mode		= 0644,
363 		.proc_handler	= &proc_dostring,
364 		.strategy	= &sysctl_string,
365 	},
366 #ifdef CONFIG_PROC_SYSCTL
367 	{
368 		.ctl_name	= KERN_TAINTED,
369 		.procname	= "tainted",
370 		.data		= &tainted,
371 		.maxlen		= sizeof(int),
372 		.mode		= 0644,
373 		.proc_handler	= &proc_dointvec_taint,
374 	},
375 #endif
376 	{
377 		.ctl_name	= KERN_CAP_BSET,
378 		.procname	= "cap-bound",
379 		.data		= &cap_bset,
380 		.maxlen		= sizeof(kernel_cap_t),
381 		.mode		= 0600,
382 		.proc_handler	= &proc_dointvec_bset,
383 	},
384 #ifdef CONFIG_BLK_DEV_INITRD
385 	{
386 		.ctl_name	= KERN_REALROOTDEV,
387 		.procname	= "real-root-dev",
388 		.data		= &real_root_dev,
389 		.maxlen		= sizeof(int),
390 		.mode		= 0644,
391 		.proc_handler	= &proc_dointvec,
392 	},
393 #endif
394 	{
395 		.ctl_name	= CTL_UNNUMBERED,
396 		.procname	= "print-fatal-signals",
397 		.data		= &print_fatal_signals,
398 		.maxlen		= sizeof(int),
399 		.mode		= 0644,
400 		.proc_handler	= &proc_dointvec,
401 	},
402 #ifdef __sparc__
403 	{
404 		.ctl_name	= KERN_SPARC_REBOOT,
405 		.procname	= "reboot-cmd",
406 		.data		= reboot_command,
407 		.maxlen		= 256,
408 		.mode		= 0644,
409 		.proc_handler	= &proc_dostring,
410 		.strategy	= &sysctl_string,
411 	},
412 	{
413 		.ctl_name	= KERN_SPARC_STOP_A,
414 		.procname	= "stop-a",
415 		.data		= &stop_a_enabled,
416 		.maxlen		= sizeof (int),
417 		.mode		= 0644,
418 		.proc_handler	= &proc_dointvec,
419 	},
420 	{
421 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
422 		.procname	= "scons-poweroff",
423 		.data		= &scons_pwroff,
424 		.maxlen		= sizeof (int),
425 		.mode		= 0644,
426 		.proc_handler	= &proc_dointvec,
427 	},
428 #endif
429 #ifdef __hppa__
430 	{
431 		.ctl_name	= KERN_HPPA_PWRSW,
432 		.procname	= "soft-power",
433 		.data		= &pwrsw_enabled,
434 		.maxlen		= sizeof (int),
435 	 	.mode		= 0644,
436 		.proc_handler	= &proc_dointvec,
437 	},
438 	{
439 		.ctl_name	= KERN_HPPA_UNALIGNED,
440 		.procname	= "unaligned-trap",
441 		.data		= &unaligned_enabled,
442 		.maxlen		= sizeof (int),
443 		.mode		= 0644,
444 		.proc_handler	= &proc_dointvec,
445 	},
446 #endif
447 	{
448 		.ctl_name	= KERN_CTLALTDEL,
449 		.procname	= "ctrl-alt-del",
450 		.data		= &C_A_D,
451 		.maxlen		= sizeof(int),
452 		.mode		= 0644,
453 		.proc_handler	= &proc_dointvec,
454 	},
455 	{
456 		.ctl_name	= KERN_PRINTK,
457 		.procname	= "printk",
458 		.data		= &console_loglevel,
459 		.maxlen		= 4*sizeof(int),
460 		.mode		= 0644,
461 		.proc_handler	= &proc_dointvec,
462 	},
463 #ifdef CONFIG_KMOD
464 	{
465 		.ctl_name	= KERN_MODPROBE,
466 		.procname	= "modprobe",
467 		.data		= &modprobe_path,
468 		.maxlen		= KMOD_PATH_LEN,
469 		.mode		= 0644,
470 		.proc_handler	= &proc_dostring,
471 		.strategy	= &sysctl_string,
472 	},
473 #endif
474 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
475 	{
476 		.ctl_name	= KERN_HOTPLUG,
477 		.procname	= "hotplug",
478 		.data		= &uevent_helper,
479 		.maxlen		= UEVENT_HELPER_PATH_LEN,
480 		.mode		= 0644,
481 		.proc_handler	= &proc_dostring,
482 		.strategy	= &sysctl_string,
483 	},
484 #endif
485 #ifdef CONFIG_CHR_DEV_SG
486 	{
487 		.ctl_name	= KERN_SG_BIG_BUFF,
488 		.procname	= "sg-big-buff",
489 		.data		= &sg_big_buff,
490 		.maxlen		= sizeof (int),
491 		.mode		= 0444,
492 		.proc_handler	= &proc_dointvec,
493 	},
494 #endif
495 #ifdef CONFIG_BSD_PROCESS_ACCT
496 	{
497 		.ctl_name	= KERN_ACCT,
498 		.procname	= "acct",
499 		.data		= &acct_parm,
500 		.maxlen		= 3*sizeof(int),
501 		.mode		= 0644,
502 		.proc_handler	= &proc_dointvec,
503 	},
504 #endif
505 #ifdef CONFIG_MAGIC_SYSRQ
506 	{
507 		.ctl_name	= KERN_SYSRQ,
508 		.procname	= "sysrq",
509 		.data		= &__sysrq_enabled,
510 		.maxlen		= sizeof (int),
511 		.mode		= 0644,
512 		.proc_handler	= &proc_dointvec,
513 	},
514 #endif
515 #ifdef CONFIG_PROC_SYSCTL
516 	{
517 		.ctl_name	= KERN_CADPID,
518 		.procname	= "cad_pid",
519 		.data		= NULL,
520 		.maxlen		= sizeof (int),
521 		.mode		= 0600,
522 		.proc_handler	= &proc_do_cad_pid,
523 	},
524 #endif
525 	{
526 		.ctl_name	= KERN_MAX_THREADS,
527 		.procname	= "threads-max",
528 		.data		= &max_threads,
529 		.maxlen		= sizeof(int),
530 		.mode		= 0644,
531 		.proc_handler	= &proc_dointvec,
532 	},
533 	{
534 		.ctl_name	= KERN_RANDOM,
535 		.procname	= "random",
536 		.mode		= 0555,
537 		.child		= random_table,
538 	},
539 #ifdef CONFIG_UNIX98_PTYS
540 	{
541 		.ctl_name	= KERN_PTY,
542 		.procname	= "pty",
543 		.mode		= 0555,
544 		.child		= pty_table,
545 	},
546 #endif
547 	{
548 		.ctl_name	= KERN_OVERFLOWUID,
549 		.procname	= "overflowuid",
550 		.data		= &overflowuid,
551 		.maxlen		= sizeof(int),
552 		.mode		= 0644,
553 		.proc_handler	= &proc_dointvec_minmax,
554 		.strategy	= &sysctl_intvec,
555 		.extra1		= &minolduid,
556 		.extra2		= &maxolduid,
557 	},
558 	{
559 		.ctl_name	= KERN_OVERFLOWGID,
560 		.procname	= "overflowgid",
561 		.data		= &overflowgid,
562 		.maxlen		= sizeof(int),
563 		.mode		= 0644,
564 		.proc_handler	= &proc_dointvec_minmax,
565 		.strategy	= &sysctl_intvec,
566 		.extra1		= &minolduid,
567 		.extra2		= &maxolduid,
568 	},
569 #ifdef CONFIG_S390
570 #ifdef CONFIG_MATHEMU
571 	{
572 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
573 		.procname	= "ieee_emulation_warnings",
574 		.data		= &sysctl_ieee_emulation_warnings,
575 		.maxlen		= sizeof(int),
576 		.mode		= 0644,
577 		.proc_handler	= &proc_dointvec,
578 	},
579 #endif
580 #ifdef CONFIG_NO_IDLE_HZ
581 	{
582 		.ctl_name       = KERN_HZ_TIMER,
583 		.procname       = "hz_timer",
584 		.data           = &sysctl_hz_timer,
585 		.maxlen         = sizeof(int),
586 		.mode           = 0644,
587 		.proc_handler   = &proc_dointvec,
588 	},
589 #endif
590 	{
591 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
592 		.procname	= "userprocess_debug",
593 		.data		= &sysctl_userprocess_debug,
594 		.maxlen		= sizeof(int),
595 		.mode		= 0644,
596 		.proc_handler	= &proc_dointvec,
597 	},
598 #endif
599 	{
600 		.ctl_name	= KERN_PIDMAX,
601 		.procname	= "pid_max",
602 		.data		= &pid_max,
603 		.maxlen		= sizeof (int),
604 		.mode		= 0644,
605 		.proc_handler	= &proc_dointvec_minmax,
606 		.strategy	= sysctl_intvec,
607 		.extra1		= &pid_max_min,
608 		.extra2		= &pid_max_max,
609 	},
610 	{
611 		.ctl_name	= KERN_PANIC_ON_OOPS,
612 		.procname	= "panic_on_oops",
613 		.data		= &panic_on_oops,
614 		.maxlen		= sizeof(int),
615 		.mode		= 0644,
616 		.proc_handler	= &proc_dointvec,
617 	},
618 	{
619 		.ctl_name	= KERN_PRINTK_RATELIMIT,
620 		.procname	= "printk_ratelimit",
621 		.data		= &printk_ratelimit_jiffies,
622 		.maxlen		= sizeof(int),
623 		.mode		= 0644,
624 		.proc_handler	= &proc_dointvec_jiffies,
625 		.strategy	= &sysctl_jiffies,
626 	},
627 	{
628 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
629 		.procname	= "printk_ratelimit_burst",
630 		.data		= &printk_ratelimit_burst,
631 		.maxlen		= sizeof(int),
632 		.mode		= 0644,
633 		.proc_handler	= &proc_dointvec,
634 	},
635 	{
636 		.ctl_name	= KERN_NGROUPS_MAX,
637 		.procname	= "ngroups_max",
638 		.data		= &ngroups_max,
639 		.maxlen		= sizeof (int),
640 		.mode		= 0444,
641 		.proc_handler	= &proc_dointvec,
642 	},
643 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
644 	{
645 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
646 		.procname       = "unknown_nmi_panic",
647 		.data           = &unknown_nmi_panic,
648 		.maxlen         = sizeof (int),
649 		.mode           = 0644,
650 		.proc_handler   = &proc_dointvec,
651 	},
652 	{
653 		.ctl_name       = KERN_NMI_WATCHDOG,
654 		.procname       = "nmi_watchdog",
655 		.data           = &nmi_watchdog_enabled,
656 		.maxlen         = sizeof (int),
657 		.mode           = 0644,
658 		.proc_handler   = &proc_nmi_enabled,
659 	},
660 #endif
661 #if defined(CONFIG_X86)
662 	{
663 		.ctl_name	= KERN_PANIC_ON_NMI,
664 		.procname	= "panic_on_unrecovered_nmi",
665 		.data		= &panic_on_unrecovered_nmi,
666 		.maxlen		= sizeof(int),
667 		.mode		= 0644,
668 		.proc_handler	= &proc_dointvec,
669 	},
670 	{
671 		.ctl_name	= KERN_BOOTLOADER_TYPE,
672 		.procname	= "bootloader_type",
673 		.data		= &bootloader_type,
674 		.maxlen		= sizeof (int),
675 		.mode		= 0444,
676 		.proc_handler	= &proc_dointvec,
677 	},
678 	{
679 		.ctl_name	= CTL_UNNUMBERED,
680 		.procname	= "kstack_depth_to_print",
681 		.data		= &kstack_depth_to_print,
682 		.maxlen		= sizeof(int),
683 		.mode		= 0644,
684 		.proc_handler	= &proc_dointvec,
685 	},
686 #endif
687 #if defined(CONFIG_MMU)
688 	{
689 		.ctl_name	= KERN_RANDOMIZE,
690 		.procname	= "randomize_va_space",
691 		.data		= &randomize_va_space,
692 		.maxlen		= sizeof(int),
693 		.mode		= 0644,
694 		.proc_handler	= &proc_dointvec,
695 	},
696 #endif
697 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
698 	{
699 		.ctl_name	= KERN_SPIN_RETRY,
700 		.procname	= "spin_retry",
701 		.data		= &spin_retry,
702 		.maxlen		= sizeof (int),
703 		.mode		= 0644,
704 		.proc_handler	= &proc_dointvec,
705 	},
706 #endif
707 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
708 	{
709 		.ctl_name	= KERN_ACPI_VIDEO_FLAGS,
710 		.procname	= "acpi_video_flags",
711 		.data		= &acpi_realmode_flags,
712 		.maxlen		= sizeof (unsigned long),
713 		.mode		= 0644,
714 		.proc_handler	= &proc_doulongvec_minmax,
715 	},
716 #endif
717 #ifdef CONFIG_IA64
718 	{
719 		.ctl_name	= KERN_IA64_UNALIGNED,
720 		.procname	= "ignore-unaligned-usertrap",
721 		.data		= &no_unaligned_warning,
722 		.maxlen		= sizeof (int),
723 	 	.mode		= 0644,
724 		.proc_handler	= &proc_dointvec,
725 	},
726 #endif
727 #ifdef CONFIG_DETECT_SOFTLOCKUP
728 	{
729 		.ctl_name	= CTL_UNNUMBERED,
730 		.procname	= "softlockup_thresh",
731 		.data		= &softlockup_thresh,
732 		.maxlen		= sizeof(int),
733 		.mode		= 0644,
734 		.proc_handler	= &proc_dointvec_minmax,
735 		.strategy	= &sysctl_intvec,
736 		.extra1		= &one,
737 		.extra2		= &sixty,
738 	},
739 #endif
740 #ifdef CONFIG_COMPAT
741 	{
742 		.ctl_name	= KERN_COMPAT_LOG,
743 		.procname	= "compat-log",
744 		.data		= &compat_log,
745 		.maxlen		= sizeof (int),
746 	 	.mode		= 0644,
747 		.proc_handler	= &proc_dointvec,
748 	},
749 #endif
750 #ifdef CONFIG_RT_MUTEXES
751 	{
752 		.ctl_name	= KERN_MAX_LOCK_DEPTH,
753 		.procname	= "max_lock_depth",
754 		.data		= &max_lock_depth,
755 		.maxlen		= sizeof(int),
756 		.mode		= 0644,
757 		.proc_handler	= &proc_dointvec,
758 	},
759 #endif
760 #ifdef CONFIG_PROC_FS
761 	{
762 		.ctl_name       = CTL_UNNUMBERED,
763 		.procname       = "maps_protect",
764 		.data           = &maps_protect,
765 		.maxlen         = sizeof(int),
766 		.mode           = 0644,
767 		.proc_handler   = &proc_dointvec,
768 	},
769 #endif
770 	{
771 		.ctl_name	= CTL_UNNUMBERED,
772 		.procname	= "poweroff_cmd",
773 		.data		= &poweroff_cmd,
774 		.maxlen		= POWEROFF_CMD_PATH_LEN,
775 		.mode		= 0644,
776 		.proc_handler	= &proc_dostring,
777 		.strategy	= &sysctl_string,
778 	},
779 /*
780  * NOTE: do not add new entries to this table unless you have read
781  * Documentation/sysctl/ctl_unnumbered.txt
782  */
783 	{ .ctl_name = 0 }
784 };
785 
786 static ctl_table vm_table[] = {
787 	{
788 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
789 		.procname	= "overcommit_memory",
790 		.data		= &sysctl_overcommit_memory,
791 		.maxlen		= sizeof(sysctl_overcommit_memory),
792 		.mode		= 0644,
793 		.proc_handler	= &proc_dointvec,
794 	},
795 	{
796 		.ctl_name	= VM_PANIC_ON_OOM,
797 		.procname	= "panic_on_oom",
798 		.data		= &sysctl_panic_on_oom,
799 		.maxlen		= sizeof(sysctl_panic_on_oom),
800 		.mode		= 0644,
801 		.proc_handler	= &proc_dointvec,
802 	},
803 	{
804 		.ctl_name	= CTL_UNNUMBERED,
805 		.procname	= "oom_kill_allocating_task",
806 		.data		= &sysctl_oom_kill_allocating_task,
807 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
808 		.mode		= 0644,
809 		.proc_handler	= &proc_dointvec,
810 	},
811 	{
812 		.ctl_name	= VM_OVERCOMMIT_RATIO,
813 		.procname	= "overcommit_ratio",
814 		.data		= &sysctl_overcommit_ratio,
815 		.maxlen		= sizeof(sysctl_overcommit_ratio),
816 		.mode		= 0644,
817 		.proc_handler	= &proc_dointvec,
818 	},
819 	{
820 		.ctl_name	= VM_PAGE_CLUSTER,
821 		.procname	= "page-cluster",
822 		.data		= &page_cluster,
823 		.maxlen		= sizeof(int),
824 		.mode		= 0644,
825 		.proc_handler	= &proc_dointvec,
826 	},
827 	{
828 		.ctl_name	= VM_DIRTY_BACKGROUND,
829 		.procname	= "dirty_background_ratio",
830 		.data		= &dirty_background_ratio,
831 		.maxlen		= sizeof(dirty_background_ratio),
832 		.mode		= 0644,
833 		.proc_handler	= &proc_dointvec_minmax,
834 		.strategy	= &sysctl_intvec,
835 		.extra1		= &zero,
836 		.extra2		= &one_hundred,
837 	},
838 	{
839 		.ctl_name	= VM_DIRTY_RATIO,
840 		.procname	= "dirty_ratio",
841 		.data		= &vm_dirty_ratio,
842 		.maxlen		= sizeof(vm_dirty_ratio),
843 		.mode		= 0644,
844 		.proc_handler	= &dirty_ratio_handler,
845 		.strategy	= &sysctl_intvec,
846 		.extra1		= &zero,
847 		.extra2		= &one_hundred,
848 	},
849 	{
850 		.ctl_name	= VM_DIRTY_WB_CS,
851 		.procname	= "dirty_writeback_centisecs",
852 		.data		= &dirty_writeback_interval,
853 		.maxlen		= sizeof(dirty_writeback_interval),
854 		.mode		= 0644,
855 		.proc_handler	= &dirty_writeback_centisecs_handler,
856 	},
857 	{
858 		.ctl_name	= VM_DIRTY_EXPIRE_CS,
859 		.procname	= "dirty_expire_centisecs",
860 		.data		= &dirty_expire_interval,
861 		.maxlen		= sizeof(dirty_expire_interval),
862 		.mode		= 0644,
863 		.proc_handler	= &proc_dointvec_userhz_jiffies,
864 	},
865 	{
866 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
867 		.procname	= "nr_pdflush_threads",
868 		.data		= &nr_pdflush_threads,
869 		.maxlen		= sizeof nr_pdflush_threads,
870 		.mode		= 0444 /* read-only*/,
871 		.proc_handler	= &proc_dointvec,
872 	},
873 	{
874 		.ctl_name	= VM_SWAPPINESS,
875 		.procname	= "swappiness",
876 		.data		= &vm_swappiness,
877 		.maxlen		= sizeof(vm_swappiness),
878 		.mode		= 0644,
879 		.proc_handler	= &proc_dointvec_minmax,
880 		.strategy	= &sysctl_intvec,
881 		.extra1		= &zero,
882 		.extra2		= &one_hundred,
883 	},
884 #ifdef CONFIG_HUGETLB_PAGE
885 	 {
886 		.ctl_name	= VM_HUGETLB_PAGES,
887 		.procname	= "nr_hugepages",
888 		.data		= &max_huge_pages,
889 		.maxlen		= sizeof(unsigned long),
890 		.mode		= 0644,
891 		.proc_handler	= &hugetlb_sysctl_handler,
892 		.extra1		= (void *)&hugetlb_zero,
893 		.extra2		= (void *)&hugetlb_infinity,
894 	 },
895 	 {
896 		.ctl_name	= VM_HUGETLB_GROUP,
897 		.procname	= "hugetlb_shm_group",
898 		.data		= &sysctl_hugetlb_shm_group,
899 		.maxlen		= sizeof(gid_t),
900 		.mode		= 0644,
901 		.proc_handler	= &proc_dointvec,
902 	 },
903 	 {
904 		.ctl_name	= CTL_UNNUMBERED,
905 		.procname	= "hugepages_treat_as_movable",
906 		.data		= &hugepages_treat_as_movable,
907 		.maxlen		= sizeof(int),
908 		.mode		= 0644,
909 		.proc_handler	= &hugetlb_treat_movable_handler,
910 	},
911 	{
912 		.ctl_name	= CTL_UNNUMBERED,
913 		.procname	= "hugetlb_dynamic_pool",
914 		.data		= &hugetlb_dynamic_pool,
915 		.maxlen		= sizeof(hugetlb_dynamic_pool),
916 		.mode		= 0644,
917 		.proc_handler	= &proc_dointvec,
918 	},
919 #endif
920 	{
921 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
922 		.procname	= "lowmem_reserve_ratio",
923 		.data		= &sysctl_lowmem_reserve_ratio,
924 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
925 		.mode		= 0644,
926 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
927 		.strategy	= &sysctl_intvec,
928 	},
929 	{
930 		.ctl_name	= VM_DROP_PAGECACHE,
931 		.procname	= "drop_caches",
932 		.data		= &sysctl_drop_caches,
933 		.maxlen		= sizeof(int),
934 		.mode		= 0644,
935 		.proc_handler	= drop_caches_sysctl_handler,
936 		.strategy	= &sysctl_intvec,
937 	},
938 	{
939 		.ctl_name	= VM_MIN_FREE_KBYTES,
940 		.procname	= "min_free_kbytes",
941 		.data		= &min_free_kbytes,
942 		.maxlen		= sizeof(min_free_kbytes),
943 		.mode		= 0644,
944 		.proc_handler	= &min_free_kbytes_sysctl_handler,
945 		.strategy	= &sysctl_intvec,
946 		.extra1		= &zero,
947 	},
948 	{
949 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
950 		.procname	= "percpu_pagelist_fraction",
951 		.data		= &percpu_pagelist_fraction,
952 		.maxlen		= sizeof(percpu_pagelist_fraction),
953 		.mode		= 0644,
954 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
955 		.strategy	= &sysctl_intvec,
956 		.extra1		= &min_percpu_pagelist_fract,
957 	},
958 #ifdef CONFIG_MMU
959 	{
960 		.ctl_name	= VM_MAX_MAP_COUNT,
961 		.procname	= "max_map_count",
962 		.data		= &sysctl_max_map_count,
963 		.maxlen		= sizeof(sysctl_max_map_count),
964 		.mode		= 0644,
965 		.proc_handler	= &proc_dointvec
966 	},
967 #endif
968 	{
969 		.ctl_name	= VM_LAPTOP_MODE,
970 		.procname	= "laptop_mode",
971 		.data		= &laptop_mode,
972 		.maxlen		= sizeof(laptop_mode),
973 		.mode		= 0644,
974 		.proc_handler	= &proc_dointvec_jiffies,
975 		.strategy	= &sysctl_jiffies,
976 	},
977 	{
978 		.ctl_name	= VM_BLOCK_DUMP,
979 		.procname	= "block_dump",
980 		.data		= &block_dump,
981 		.maxlen		= sizeof(block_dump),
982 		.mode		= 0644,
983 		.proc_handler	= &proc_dointvec,
984 		.strategy	= &sysctl_intvec,
985 		.extra1		= &zero,
986 	},
987 	{
988 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
989 		.procname	= "vfs_cache_pressure",
990 		.data		= &sysctl_vfs_cache_pressure,
991 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
992 		.mode		= 0644,
993 		.proc_handler	= &proc_dointvec,
994 		.strategy	= &sysctl_intvec,
995 		.extra1		= &zero,
996 	},
997 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
998 	{
999 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
1000 		.procname	= "legacy_va_layout",
1001 		.data		= &sysctl_legacy_va_layout,
1002 		.maxlen		= sizeof(sysctl_legacy_va_layout),
1003 		.mode		= 0644,
1004 		.proc_handler	= &proc_dointvec,
1005 		.strategy	= &sysctl_intvec,
1006 		.extra1		= &zero,
1007 	},
1008 #endif
1009 #ifdef CONFIG_NUMA
1010 	{
1011 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
1012 		.procname	= "zone_reclaim_mode",
1013 		.data		= &zone_reclaim_mode,
1014 		.maxlen		= sizeof(zone_reclaim_mode),
1015 		.mode		= 0644,
1016 		.proc_handler	= &proc_dointvec,
1017 		.strategy	= &sysctl_intvec,
1018 		.extra1		= &zero,
1019 	},
1020 	{
1021 		.ctl_name	= VM_MIN_UNMAPPED,
1022 		.procname	= "min_unmapped_ratio",
1023 		.data		= &sysctl_min_unmapped_ratio,
1024 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1025 		.mode		= 0644,
1026 		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler,
1027 		.strategy	= &sysctl_intvec,
1028 		.extra1		= &zero,
1029 		.extra2		= &one_hundred,
1030 	},
1031 	{
1032 		.ctl_name	= VM_MIN_SLAB,
1033 		.procname	= "min_slab_ratio",
1034 		.data		= &sysctl_min_slab_ratio,
1035 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1036 		.mode		= 0644,
1037 		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
1038 		.strategy	= &sysctl_intvec,
1039 		.extra1		= &zero,
1040 		.extra2		= &one_hundred,
1041 	},
1042 #endif
1043 #ifdef CONFIG_SMP
1044 	{
1045 		.ctl_name	= CTL_UNNUMBERED,
1046 		.procname	= "stat_interval",
1047 		.data		= &sysctl_stat_interval,
1048 		.maxlen		= sizeof(sysctl_stat_interval),
1049 		.mode		= 0644,
1050 		.proc_handler	= &proc_dointvec_jiffies,
1051 		.strategy	= &sysctl_jiffies,
1052 	},
1053 #endif
1054 #ifdef CONFIG_SECURITY
1055 	{
1056 		.ctl_name	= CTL_UNNUMBERED,
1057 		.procname	= "mmap_min_addr",
1058 		.data		= &mmap_min_addr,
1059 		.maxlen         = sizeof(unsigned long),
1060 		.mode		= 0644,
1061 		.proc_handler	= &proc_doulongvec_minmax,
1062 	},
1063 #endif
1064 #ifdef CONFIG_NUMA
1065 	{
1066 		.ctl_name	= CTL_UNNUMBERED,
1067 		.procname	= "numa_zonelist_order",
1068 		.data		= &numa_zonelist_order,
1069 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1070 		.mode		= 0644,
1071 		.proc_handler	= &numa_zonelist_order_handler,
1072 		.strategy	= &sysctl_string,
1073 	},
1074 #endif
1075 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1076    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1077 	{
1078 		.ctl_name	= VM_VDSO_ENABLED,
1079 		.procname	= "vdso_enabled",
1080 		.data		= &vdso_enabled,
1081 		.maxlen		= sizeof(vdso_enabled),
1082 		.mode		= 0644,
1083 		.proc_handler	= &proc_dointvec,
1084 		.strategy	= &sysctl_intvec,
1085 		.extra1		= &zero,
1086 	},
1087 #endif
1088 /*
1089  * NOTE: do not add new entries to this table unless you have read
1090  * Documentation/sysctl/ctl_unnumbered.txt
1091  */
1092 	{ .ctl_name = 0 }
1093 };
1094 
1095 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1096 static ctl_table binfmt_misc_table[] = {
1097 	{ .ctl_name = 0 }
1098 };
1099 #endif
1100 
1101 static ctl_table fs_table[] = {
1102 	{
1103 		.ctl_name	= FS_NRINODE,
1104 		.procname	= "inode-nr",
1105 		.data		= &inodes_stat,
1106 		.maxlen		= 2*sizeof(int),
1107 		.mode		= 0444,
1108 		.proc_handler	= &proc_dointvec,
1109 	},
1110 	{
1111 		.ctl_name	= FS_STATINODE,
1112 		.procname	= "inode-state",
1113 		.data		= &inodes_stat,
1114 		.maxlen		= 7*sizeof(int),
1115 		.mode		= 0444,
1116 		.proc_handler	= &proc_dointvec,
1117 	},
1118 	{
1119 		.ctl_name	= FS_NRFILE,
1120 		.procname	= "file-nr",
1121 		.data		= &files_stat,
1122 		.maxlen		= 3*sizeof(int),
1123 		.mode		= 0444,
1124 		.proc_handler	= &proc_nr_files,
1125 	},
1126 	{
1127 		.ctl_name	= FS_MAXFILE,
1128 		.procname	= "file-max",
1129 		.data		= &files_stat.max_files,
1130 		.maxlen		= sizeof(int),
1131 		.mode		= 0644,
1132 		.proc_handler	= &proc_dointvec,
1133 	},
1134 	{
1135 		.ctl_name	= FS_DENTRY,
1136 		.procname	= "dentry-state",
1137 		.data		= &dentry_stat,
1138 		.maxlen		= 6*sizeof(int),
1139 		.mode		= 0444,
1140 		.proc_handler	= &proc_dointvec,
1141 	},
1142 	{
1143 		.ctl_name	= FS_OVERFLOWUID,
1144 		.procname	= "overflowuid",
1145 		.data		= &fs_overflowuid,
1146 		.maxlen		= sizeof(int),
1147 		.mode		= 0644,
1148 		.proc_handler	= &proc_dointvec_minmax,
1149 		.strategy	= &sysctl_intvec,
1150 		.extra1		= &minolduid,
1151 		.extra2		= &maxolduid,
1152 	},
1153 	{
1154 		.ctl_name	= FS_OVERFLOWGID,
1155 		.procname	= "overflowgid",
1156 		.data		= &fs_overflowgid,
1157 		.maxlen		= sizeof(int),
1158 		.mode		= 0644,
1159 		.proc_handler	= &proc_dointvec_minmax,
1160 		.strategy	= &sysctl_intvec,
1161 		.extra1		= &minolduid,
1162 		.extra2		= &maxolduid,
1163 	},
1164 	{
1165 		.ctl_name	= FS_LEASES,
1166 		.procname	= "leases-enable",
1167 		.data		= &leases_enable,
1168 		.maxlen		= sizeof(int),
1169 		.mode		= 0644,
1170 		.proc_handler	= &proc_dointvec,
1171 	},
1172 #ifdef CONFIG_DNOTIFY
1173 	{
1174 		.ctl_name	= FS_DIR_NOTIFY,
1175 		.procname	= "dir-notify-enable",
1176 		.data		= &dir_notify_enable,
1177 		.maxlen		= sizeof(int),
1178 		.mode		= 0644,
1179 		.proc_handler	= &proc_dointvec,
1180 	},
1181 #endif
1182 #ifdef CONFIG_MMU
1183 	{
1184 		.ctl_name	= FS_LEASE_TIME,
1185 		.procname	= "lease-break-time",
1186 		.data		= &lease_break_time,
1187 		.maxlen		= sizeof(int),
1188 		.mode		= 0644,
1189 		.proc_handler	= &proc_dointvec_minmax,
1190 		.strategy	= &sysctl_intvec,
1191 		.extra1		= &zero,
1192 		.extra2		= &two,
1193 	},
1194 	{
1195 		.ctl_name	= FS_AIO_NR,
1196 		.procname	= "aio-nr",
1197 		.data		= &aio_nr,
1198 		.maxlen		= sizeof(aio_nr),
1199 		.mode		= 0444,
1200 		.proc_handler	= &proc_doulongvec_minmax,
1201 	},
1202 	{
1203 		.ctl_name	= FS_AIO_MAX_NR,
1204 		.procname	= "aio-max-nr",
1205 		.data		= &aio_max_nr,
1206 		.maxlen		= sizeof(aio_max_nr),
1207 		.mode		= 0644,
1208 		.proc_handler	= &proc_doulongvec_minmax,
1209 	},
1210 #ifdef CONFIG_INOTIFY_USER
1211 	{
1212 		.ctl_name	= FS_INOTIFY,
1213 		.procname	= "inotify",
1214 		.mode		= 0555,
1215 		.child		= inotify_table,
1216 	},
1217 #endif
1218 #endif
1219 	{
1220 		.ctl_name	= KERN_SETUID_DUMPABLE,
1221 		.procname	= "suid_dumpable",
1222 		.data		= &suid_dumpable,
1223 		.maxlen		= sizeof(int),
1224 		.mode		= 0644,
1225 		.proc_handler	= &proc_dointvec,
1226 	},
1227 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1228 	{
1229 		.ctl_name	= CTL_UNNUMBERED,
1230 		.procname	= "binfmt_misc",
1231 		.mode		= 0555,
1232 		.child		= binfmt_misc_table,
1233 	},
1234 #endif
1235 /*
1236  * NOTE: do not add new entries to this table unless you have read
1237  * Documentation/sysctl/ctl_unnumbered.txt
1238  */
1239 	{ .ctl_name = 0 }
1240 };
1241 
1242 static ctl_table debug_table[] = {
1243 #if defined(CONFIG_X86) || defined(CONFIG_PPC)
1244 	{
1245 		.ctl_name	= CTL_UNNUMBERED,
1246 		.procname	= "exception-trace",
1247 		.data		= &show_unhandled_signals,
1248 		.maxlen		= sizeof(int),
1249 		.mode		= 0644,
1250 		.proc_handler	= proc_dointvec
1251 	},
1252 #endif
1253 	{ .ctl_name = 0 }
1254 };
1255 
1256 static ctl_table dev_table[] = {
1257 	{ .ctl_name = 0 }
1258 };
1259 
1260 static DEFINE_SPINLOCK(sysctl_lock);
1261 
1262 /* called under sysctl_lock */
1263 static int use_table(struct ctl_table_header *p)
1264 {
1265 	if (unlikely(p->unregistering))
1266 		return 0;
1267 	p->used++;
1268 	return 1;
1269 }
1270 
1271 /* called under sysctl_lock */
1272 static void unuse_table(struct ctl_table_header *p)
1273 {
1274 	if (!--p->used)
1275 		if (unlikely(p->unregistering))
1276 			complete(p->unregistering);
1277 }
1278 
1279 /* called under sysctl_lock, will reacquire if has to wait */
1280 static void start_unregistering(struct ctl_table_header *p)
1281 {
1282 	/*
1283 	 * if p->used is 0, nobody will ever touch that entry again;
1284 	 * we'll eliminate all paths to it before dropping sysctl_lock
1285 	 */
1286 	if (unlikely(p->used)) {
1287 		struct completion wait;
1288 		init_completion(&wait);
1289 		p->unregistering = &wait;
1290 		spin_unlock(&sysctl_lock);
1291 		wait_for_completion(&wait);
1292 		spin_lock(&sysctl_lock);
1293 	}
1294 	/*
1295 	 * do not remove from the list until nobody holds it; walking the
1296 	 * list in do_sysctl() relies on that.
1297 	 */
1298 	list_del_init(&p->ctl_entry);
1299 }
1300 
1301 void sysctl_head_finish(struct ctl_table_header *head)
1302 {
1303 	if (!head)
1304 		return;
1305 	spin_lock(&sysctl_lock);
1306 	unuse_table(head);
1307 	spin_unlock(&sysctl_lock);
1308 }
1309 
1310 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1311 {
1312 	struct ctl_table_header *head;
1313 	struct list_head *tmp;
1314 	spin_lock(&sysctl_lock);
1315 	if (prev) {
1316 		tmp = &prev->ctl_entry;
1317 		unuse_table(prev);
1318 		goto next;
1319 	}
1320 	tmp = &root_table_header.ctl_entry;
1321 	for (;;) {
1322 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1323 
1324 		if (!use_table(head))
1325 			goto next;
1326 		spin_unlock(&sysctl_lock);
1327 		return head;
1328 	next:
1329 		tmp = tmp->next;
1330 		if (tmp == &root_table_header.ctl_entry)
1331 			break;
1332 	}
1333 	spin_unlock(&sysctl_lock);
1334 	return NULL;
1335 }
1336 
1337 #ifdef CONFIG_SYSCTL_SYSCALL
1338 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1339 	       void __user *newval, size_t newlen)
1340 {
1341 	struct ctl_table_header *head;
1342 	int error = -ENOTDIR;
1343 
1344 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1345 		return -ENOTDIR;
1346 	if (oldval) {
1347 		int old_len;
1348 		if (!oldlenp || get_user(old_len, oldlenp))
1349 			return -EFAULT;
1350 	}
1351 
1352 	for (head = sysctl_head_next(NULL); head;
1353 			head = sysctl_head_next(head)) {
1354 		error = parse_table(name, nlen, oldval, oldlenp,
1355 					newval, newlen, head->ctl_table);
1356 		if (error != -ENOTDIR) {
1357 			sysctl_head_finish(head);
1358 			break;
1359 		}
1360 	}
1361 	return error;
1362 }
1363 
1364 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1365 {
1366 	struct __sysctl_args tmp;
1367 	int error;
1368 
1369 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1370 		return -EFAULT;
1371 
1372 	lock_kernel();
1373 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1374 			  tmp.newval, tmp.newlen);
1375 	unlock_kernel();
1376 	return error;
1377 }
1378 #endif /* CONFIG_SYSCTL_SYSCALL */
1379 
1380 /*
1381  * sysctl_perm does NOT grant the superuser all rights automatically, because
1382  * some sysctl variables are readonly even to root.
1383  */
1384 
1385 static int test_perm(int mode, int op)
1386 {
1387 	if (!current->euid)
1388 		mode >>= 6;
1389 	else if (in_egroup_p(0))
1390 		mode >>= 3;
1391 	if ((mode & op & 0007) == op)
1392 		return 0;
1393 	return -EACCES;
1394 }
1395 
1396 int sysctl_perm(ctl_table *table, int op)
1397 {
1398 	int error;
1399 	error = security_sysctl(table, op);
1400 	if (error)
1401 		return error;
1402 	return test_perm(table->mode, op);
1403 }
1404 
1405 #ifdef CONFIG_SYSCTL_SYSCALL
1406 static int parse_table(int __user *name, int nlen,
1407 		       void __user *oldval, size_t __user *oldlenp,
1408 		       void __user *newval, size_t newlen,
1409 		       ctl_table *table)
1410 {
1411 	int n;
1412 repeat:
1413 	if (!nlen)
1414 		return -ENOTDIR;
1415 	if (get_user(n, name))
1416 		return -EFAULT;
1417 	for ( ; table->ctl_name || table->procname; table++) {
1418 		if (!table->ctl_name)
1419 			continue;
1420 		if (n == table->ctl_name) {
1421 			int error;
1422 			if (table->child) {
1423 				if (sysctl_perm(table, 001))
1424 					return -EPERM;
1425 				name++;
1426 				nlen--;
1427 				table = table->child;
1428 				goto repeat;
1429 			}
1430 			error = do_sysctl_strategy(table, name, nlen,
1431 						   oldval, oldlenp,
1432 						   newval, newlen);
1433 			return error;
1434 		}
1435 	}
1436 	return -ENOTDIR;
1437 }
1438 
1439 /* Perform the actual read/write of a sysctl table entry. */
1440 int do_sysctl_strategy (ctl_table *table,
1441 			int __user *name, int nlen,
1442 			void __user *oldval, size_t __user *oldlenp,
1443 			void __user *newval, size_t newlen)
1444 {
1445 	int op = 0, rc;
1446 	size_t len;
1447 
1448 	if (oldval)
1449 		op |= 004;
1450 	if (newval)
1451 		op |= 002;
1452 	if (sysctl_perm(table, op))
1453 		return -EPERM;
1454 
1455 	if (table->strategy) {
1456 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
1457 				     newval, newlen);
1458 		if (rc < 0)
1459 			return rc;
1460 		if (rc > 0)
1461 			return 0;
1462 	}
1463 
1464 	/* If there is no strategy routine, or if the strategy returns
1465 	 * zero, proceed with automatic r/w */
1466 	if (table->data && table->maxlen) {
1467 		if (oldval && oldlenp) {
1468 			if (get_user(len, oldlenp))
1469 				return -EFAULT;
1470 			if (len) {
1471 				if (len > table->maxlen)
1472 					len = table->maxlen;
1473 				if(copy_to_user(oldval, table->data, len))
1474 					return -EFAULT;
1475 				if(put_user(len, oldlenp))
1476 					return -EFAULT;
1477 			}
1478 		}
1479 		if (newval && newlen) {
1480 			len = newlen;
1481 			if (len > table->maxlen)
1482 				len = table->maxlen;
1483 			if(copy_from_user(table->data, newval, len))
1484 				return -EFAULT;
1485 		}
1486 	}
1487 	return 0;
1488 }
1489 #endif /* CONFIG_SYSCTL_SYSCALL */
1490 
1491 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1492 {
1493 	for (; table->ctl_name || table->procname; table++) {
1494 		table->parent = parent;
1495 		if (table->child)
1496 			sysctl_set_parent(table, table->child);
1497 	}
1498 }
1499 
1500 static __init int sysctl_init(void)
1501 {
1502 	sysctl_set_parent(NULL, root_table);
1503 	return 0;
1504 }
1505 
1506 core_initcall(sysctl_init);
1507 
1508 /**
1509  * register_sysctl_table - register a sysctl hierarchy
1510  * @table: the top-level table structure
1511  *
1512  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1513  * array. An entry with a ctl_name of 0 terminates the table.
1514  *
1515  * The members of the &ctl_table structure are used as follows:
1516  *
1517  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1518  *            must be unique within that level of sysctl
1519  *
1520  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1521  *            enter a sysctl file
1522  *
1523  * data - a pointer to data for use by proc_handler
1524  *
1525  * maxlen - the maximum size in bytes of the data
1526  *
1527  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1528  *
1529  * child - a pointer to the child sysctl table if this entry is a directory, or
1530  *         %NULL.
1531  *
1532  * proc_handler - the text handler routine (described below)
1533  *
1534  * strategy - the strategy routine (described below)
1535  *
1536  * de - for internal use by the sysctl routines
1537  *
1538  * extra1, extra2 - extra pointers usable by the proc handler routines
1539  *
1540  * Leaf nodes in the sysctl tree will be represented by a single file
1541  * under /proc; non-leaf nodes will be represented by directories.
1542  *
1543  * sysctl(2) can automatically manage read and write requests through
1544  * the sysctl table.  The data and maxlen fields of the ctl_table
1545  * struct enable minimal validation of the values being written to be
1546  * performed, and the mode field allows minimal authentication.
1547  *
1548  * More sophisticated management can be enabled by the provision of a
1549  * strategy routine with the table entry.  This will be called before
1550  * any automatic read or write of the data is performed.
1551  *
1552  * The strategy routine may return
1553  *
1554  * < 0 - Error occurred (error is passed to user process)
1555  *
1556  * 0   - OK - proceed with automatic read or write.
1557  *
1558  * > 0 - OK - read or write has been done by the strategy routine, so
1559  *       return immediately.
1560  *
1561  * There must be a proc_handler routine for any terminal nodes
1562  * mirrored under /proc/sys (non-terminals are handled by a built-in
1563  * directory handler).  Several default handlers are available to
1564  * cover common cases -
1565  *
1566  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1567  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1568  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1569  *
1570  * It is the handler's job to read the input buffer from user memory
1571  * and process it. The handler should return 0 on success.
1572  *
1573  * This routine returns %NULL on a failure to register, and a pointer
1574  * to the table header on success.
1575  */
1576 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1577 {
1578 	struct ctl_table_header *tmp;
1579 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1580 	if (!tmp)
1581 		return NULL;
1582 	tmp->ctl_table = table;
1583 	INIT_LIST_HEAD(&tmp->ctl_entry);
1584 	tmp->used = 0;
1585 	tmp->unregistering = NULL;
1586 	sysctl_set_parent(NULL, table);
1587 	spin_lock(&sysctl_lock);
1588 	list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1589 	spin_unlock(&sysctl_lock);
1590 	return tmp;
1591 }
1592 
1593 /**
1594  * unregister_sysctl_table - unregister a sysctl table hierarchy
1595  * @header: the header returned from register_sysctl_table
1596  *
1597  * Unregisters the sysctl table and all children. proc entries may not
1598  * actually be removed until they are no longer used by anyone.
1599  */
1600 void unregister_sysctl_table(struct ctl_table_header * header)
1601 {
1602 	might_sleep();
1603 	spin_lock(&sysctl_lock);
1604 	start_unregistering(header);
1605 	spin_unlock(&sysctl_lock);
1606 	kfree(header);
1607 }
1608 
1609 #else /* !CONFIG_SYSCTL */
1610 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1611 {
1612 	return NULL;
1613 }
1614 
1615 void unregister_sysctl_table(struct ctl_table_header * table)
1616 {
1617 }
1618 
1619 #endif /* CONFIG_SYSCTL */
1620 
1621 /*
1622  * /proc/sys support
1623  */
1624 
1625 #ifdef CONFIG_PROC_SYSCTL
1626 
1627 static int _proc_do_string(void* data, int maxlen, int write,
1628 			   struct file *filp, void __user *buffer,
1629 			   size_t *lenp, loff_t *ppos)
1630 {
1631 	size_t len;
1632 	char __user *p;
1633 	char c;
1634 
1635 	if (!data || !maxlen || !*lenp) {
1636 		*lenp = 0;
1637 		return 0;
1638 	}
1639 
1640 	if (write) {
1641 		len = 0;
1642 		p = buffer;
1643 		while (len < *lenp) {
1644 			if (get_user(c, p++))
1645 				return -EFAULT;
1646 			if (c == 0 || c == '\n')
1647 				break;
1648 			len++;
1649 		}
1650 		if (len >= maxlen)
1651 			len = maxlen-1;
1652 		if(copy_from_user(data, buffer, len))
1653 			return -EFAULT;
1654 		((char *) data)[len] = 0;
1655 		*ppos += *lenp;
1656 	} else {
1657 		len = strlen(data);
1658 		if (len > maxlen)
1659 			len = maxlen;
1660 
1661 		if (*ppos > len) {
1662 			*lenp = 0;
1663 			return 0;
1664 		}
1665 
1666 		data += *ppos;
1667 		len  -= *ppos;
1668 
1669 		if (len > *lenp)
1670 			len = *lenp;
1671 		if (len)
1672 			if(copy_to_user(buffer, data, len))
1673 				return -EFAULT;
1674 		if (len < *lenp) {
1675 			if(put_user('\n', ((char __user *) buffer) + len))
1676 				return -EFAULT;
1677 			len++;
1678 		}
1679 		*lenp = len;
1680 		*ppos += len;
1681 	}
1682 	return 0;
1683 }
1684 
1685 /**
1686  * proc_dostring - read a string sysctl
1687  * @table: the sysctl table
1688  * @write: %TRUE if this is a write to the sysctl file
1689  * @filp: the file structure
1690  * @buffer: the user buffer
1691  * @lenp: the size of the user buffer
1692  * @ppos: file position
1693  *
1694  * Reads/writes a string from/to the user buffer. If the kernel
1695  * buffer provided is not large enough to hold the string, the
1696  * string is truncated. The copied string is %NULL-terminated.
1697  * If the string is being read by the user process, it is copied
1698  * and a newline '\n' is added. It is truncated if the buffer is
1699  * not large enough.
1700  *
1701  * Returns 0 on success.
1702  */
1703 int proc_dostring(ctl_table *table, int write, struct file *filp,
1704 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1705 {
1706 	return _proc_do_string(table->data, table->maxlen, write, filp,
1707 			       buffer, lenp, ppos);
1708 }
1709 
1710 
1711 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1712 				 int *valp,
1713 				 int write, void *data)
1714 {
1715 	if (write) {
1716 		*valp = *negp ? -*lvalp : *lvalp;
1717 	} else {
1718 		int val = *valp;
1719 		if (val < 0) {
1720 			*negp = -1;
1721 			*lvalp = (unsigned long)-val;
1722 		} else {
1723 			*negp = 0;
1724 			*lvalp = (unsigned long)val;
1725 		}
1726 	}
1727 	return 0;
1728 }
1729 
1730 static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
1731 		  int write, struct file *filp, void __user *buffer,
1732 		  size_t *lenp, loff_t *ppos,
1733 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1734 			      int write, void *data),
1735 		  void *data)
1736 {
1737 #define TMPBUFLEN 21
1738 	int *i, vleft, first=1, neg, val;
1739 	unsigned long lval;
1740 	size_t left, len;
1741 
1742 	char buf[TMPBUFLEN], *p;
1743 	char __user *s = buffer;
1744 
1745 	if (!tbl_data || !table->maxlen || !*lenp ||
1746 	    (*ppos && !write)) {
1747 		*lenp = 0;
1748 		return 0;
1749 	}
1750 
1751 	i = (int *) tbl_data;
1752 	vleft = table->maxlen / sizeof(*i);
1753 	left = *lenp;
1754 
1755 	if (!conv)
1756 		conv = do_proc_dointvec_conv;
1757 
1758 	for (; left && vleft--; i++, first=0) {
1759 		if (write) {
1760 			while (left) {
1761 				char c;
1762 				if (get_user(c, s))
1763 					return -EFAULT;
1764 				if (!isspace(c))
1765 					break;
1766 				left--;
1767 				s++;
1768 			}
1769 			if (!left)
1770 				break;
1771 			neg = 0;
1772 			len = left;
1773 			if (len > sizeof(buf) - 1)
1774 				len = sizeof(buf) - 1;
1775 			if (copy_from_user(buf, s, len))
1776 				return -EFAULT;
1777 			buf[len] = 0;
1778 			p = buf;
1779 			if (*p == '-' && left > 1) {
1780 				neg = 1;
1781 				p++;
1782 			}
1783 			if (*p < '0' || *p > '9')
1784 				break;
1785 
1786 			lval = simple_strtoul(p, &p, 0);
1787 
1788 			len = p-buf;
1789 			if ((len < left) && *p && !isspace(*p))
1790 				break;
1791 			if (neg)
1792 				val = -val;
1793 			s += len;
1794 			left -= len;
1795 
1796 			if (conv(&neg, &lval, i, 1, data))
1797 				break;
1798 		} else {
1799 			p = buf;
1800 			if (!first)
1801 				*p++ = '\t';
1802 
1803 			if (conv(&neg, &lval, i, 0, data))
1804 				break;
1805 
1806 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
1807 			len = strlen(buf);
1808 			if (len > left)
1809 				len = left;
1810 			if(copy_to_user(s, buf, len))
1811 				return -EFAULT;
1812 			left -= len;
1813 			s += len;
1814 		}
1815 	}
1816 
1817 	if (!write && !first && left) {
1818 		if(put_user('\n', s))
1819 			return -EFAULT;
1820 		left--, s++;
1821 	}
1822 	if (write) {
1823 		while (left) {
1824 			char c;
1825 			if (get_user(c, s++))
1826 				return -EFAULT;
1827 			if (!isspace(c))
1828 				break;
1829 			left--;
1830 		}
1831 	}
1832 	if (write && first)
1833 		return -EINVAL;
1834 	*lenp -= left;
1835 	*ppos += *lenp;
1836 	return 0;
1837 #undef TMPBUFLEN
1838 }
1839 
1840 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1841 		  void __user *buffer, size_t *lenp, loff_t *ppos,
1842 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1843 			      int write, void *data),
1844 		  void *data)
1845 {
1846 	return __do_proc_dointvec(table->data, table, write, filp,
1847 			buffer, lenp, ppos, conv, data);
1848 }
1849 
1850 /**
1851  * proc_dointvec - read a vector of integers
1852  * @table: the sysctl table
1853  * @write: %TRUE if this is a write to the sysctl file
1854  * @filp: the file structure
1855  * @buffer: the user buffer
1856  * @lenp: the size of the user buffer
1857  * @ppos: file position
1858  *
1859  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1860  * values from/to the user buffer, treated as an ASCII string.
1861  *
1862  * Returns 0 on success.
1863  */
1864 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1865 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1866 {
1867     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1868 		    	    NULL,NULL);
1869 }
1870 
1871 #define OP_SET	0
1872 #define OP_AND	1
1873 #define OP_OR	2
1874 
1875 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1876 				      int *valp,
1877 				      int write, void *data)
1878 {
1879 	int op = *(int *)data;
1880 	if (write) {
1881 		int val = *negp ? -*lvalp : *lvalp;
1882 		switch(op) {
1883 		case OP_SET:	*valp = val; break;
1884 		case OP_AND:	*valp &= val; break;
1885 		case OP_OR:	*valp |= val; break;
1886 		}
1887 	} else {
1888 		int val = *valp;
1889 		if (val < 0) {
1890 			*negp = -1;
1891 			*lvalp = (unsigned long)-val;
1892 		} else {
1893 			*negp = 0;
1894 			*lvalp = (unsigned long)val;
1895 		}
1896 	}
1897 	return 0;
1898 }
1899 
1900 /*
1901  *	init may raise the set.
1902  */
1903 
1904 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1905 			void __user *buffer, size_t *lenp, loff_t *ppos)
1906 {
1907 	int op;
1908 
1909 	if (write && !capable(CAP_SYS_MODULE)) {
1910 		return -EPERM;
1911 	}
1912 
1913 	op = is_init(current) ? OP_SET : OP_AND;
1914 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1915 				do_proc_dointvec_bset_conv,&op);
1916 }
1917 
1918 /*
1919  *	Taint values can only be increased
1920  */
1921 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
1922 			       void __user *buffer, size_t *lenp, loff_t *ppos)
1923 {
1924 	int op;
1925 
1926 	if (write && !capable(CAP_SYS_ADMIN))
1927 		return -EPERM;
1928 
1929 	op = OP_OR;
1930 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1931 				do_proc_dointvec_bset_conv,&op);
1932 }
1933 
1934 struct do_proc_dointvec_minmax_conv_param {
1935 	int *min;
1936 	int *max;
1937 };
1938 
1939 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1940 					int *valp,
1941 					int write, void *data)
1942 {
1943 	struct do_proc_dointvec_minmax_conv_param *param = data;
1944 	if (write) {
1945 		int val = *negp ? -*lvalp : *lvalp;
1946 		if ((param->min && *param->min > val) ||
1947 		    (param->max && *param->max < val))
1948 			return -EINVAL;
1949 		*valp = val;
1950 	} else {
1951 		int val = *valp;
1952 		if (val < 0) {
1953 			*negp = -1;
1954 			*lvalp = (unsigned long)-val;
1955 		} else {
1956 			*negp = 0;
1957 			*lvalp = (unsigned long)val;
1958 		}
1959 	}
1960 	return 0;
1961 }
1962 
1963 /**
1964  * proc_dointvec_minmax - read a vector of integers with min/max values
1965  * @table: the sysctl table
1966  * @write: %TRUE if this is a write to the sysctl file
1967  * @filp: the file structure
1968  * @buffer: the user buffer
1969  * @lenp: the size of the user buffer
1970  * @ppos: file position
1971  *
1972  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1973  * values from/to the user buffer, treated as an ASCII string.
1974  *
1975  * This routine will ensure the values are within the range specified by
1976  * table->extra1 (min) and table->extra2 (max).
1977  *
1978  * Returns 0 on success.
1979  */
1980 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1981 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1982 {
1983 	struct do_proc_dointvec_minmax_conv_param param = {
1984 		.min = (int *) table->extra1,
1985 		.max = (int *) table->extra2,
1986 	};
1987 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1988 				do_proc_dointvec_minmax_conv, &param);
1989 }
1990 
1991 static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
1992 				     struct file *filp,
1993 				     void __user *buffer,
1994 				     size_t *lenp, loff_t *ppos,
1995 				     unsigned long convmul,
1996 				     unsigned long convdiv)
1997 {
1998 #define TMPBUFLEN 21
1999 	unsigned long *i, *min, *max, val;
2000 	int vleft, first=1, neg;
2001 	size_t len, left;
2002 	char buf[TMPBUFLEN], *p;
2003 	char __user *s = buffer;
2004 
2005 	if (!data || !table->maxlen || !*lenp ||
2006 	    (*ppos && !write)) {
2007 		*lenp = 0;
2008 		return 0;
2009 	}
2010 
2011 	i = (unsigned long *) data;
2012 	min = (unsigned long *) table->extra1;
2013 	max = (unsigned long *) table->extra2;
2014 	vleft = table->maxlen / sizeof(unsigned long);
2015 	left = *lenp;
2016 
2017 	for (; left && vleft--; i++, min++, max++, first=0) {
2018 		if (write) {
2019 			while (left) {
2020 				char c;
2021 				if (get_user(c, s))
2022 					return -EFAULT;
2023 				if (!isspace(c))
2024 					break;
2025 				left--;
2026 				s++;
2027 			}
2028 			if (!left)
2029 				break;
2030 			neg = 0;
2031 			len = left;
2032 			if (len > TMPBUFLEN-1)
2033 				len = TMPBUFLEN-1;
2034 			if (copy_from_user(buf, s, len))
2035 				return -EFAULT;
2036 			buf[len] = 0;
2037 			p = buf;
2038 			if (*p == '-' && left > 1) {
2039 				neg = 1;
2040 				p++;
2041 			}
2042 			if (*p < '0' || *p > '9')
2043 				break;
2044 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2045 			len = p-buf;
2046 			if ((len < left) && *p && !isspace(*p))
2047 				break;
2048 			if (neg)
2049 				val = -val;
2050 			s += len;
2051 			left -= len;
2052 
2053 			if(neg)
2054 				continue;
2055 			if ((min && val < *min) || (max && val > *max))
2056 				continue;
2057 			*i = val;
2058 		} else {
2059 			p = buf;
2060 			if (!first)
2061 				*p++ = '\t';
2062 			sprintf(p, "%lu", convdiv * (*i) / convmul);
2063 			len = strlen(buf);
2064 			if (len > left)
2065 				len = left;
2066 			if(copy_to_user(s, buf, len))
2067 				return -EFAULT;
2068 			left -= len;
2069 			s += len;
2070 		}
2071 	}
2072 
2073 	if (!write && !first && left) {
2074 		if(put_user('\n', s))
2075 			return -EFAULT;
2076 		left--, s++;
2077 	}
2078 	if (write) {
2079 		while (left) {
2080 			char c;
2081 			if (get_user(c, s++))
2082 				return -EFAULT;
2083 			if (!isspace(c))
2084 				break;
2085 			left--;
2086 		}
2087 	}
2088 	if (write && first)
2089 		return -EINVAL;
2090 	*lenp -= left;
2091 	*ppos += *lenp;
2092 	return 0;
2093 #undef TMPBUFLEN
2094 }
2095 
2096 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
2097 				     struct file *filp,
2098 				     void __user *buffer,
2099 				     size_t *lenp, loff_t *ppos,
2100 				     unsigned long convmul,
2101 				     unsigned long convdiv)
2102 {
2103 	return __do_proc_doulongvec_minmax(table->data, table, write,
2104 			filp, buffer, lenp, ppos, convmul, convdiv);
2105 }
2106 
2107 /**
2108  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2109  * @table: the sysctl table
2110  * @write: %TRUE if this is a write to the sysctl file
2111  * @filp: the file structure
2112  * @buffer: the user buffer
2113  * @lenp: the size of the user buffer
2114  * @ppos: file position
2115  *
2116  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2117  * values from/to the user buffer, treated as an ASCII string.
2118  *
2119  * This routine will ensure the values are within the range specified by
2120  * table->extra1 (min) and table->extra2 (max).
2121  *
2122  * Returns 0 on success.
2123  */
2124 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2125 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2126 {
2127     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2128 }
2129 
2130 /**
2131  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2132  * @table: the sysctl table
2133  * @write: %TRUE if this is a write to the sysctl file
2134  * @filp: the file structure
2135  * @buffer: the user buffer
2136  * @lenp: the size of the user buffer
2137  * @ppos: file position
2138  *
2139  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2140  * values from/to the user buffer, treated as an ASCII string. The values
2141  * are treated as milliseconds, and converted to jiffies when they are stored.
2142  *
2143  * This routine will ensure the values are within the range specified by
2144  * table->extra1 (min) and table->extra2 (max).
2145  *
2146  * Returns 0 on success.
2147  */
2148 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2149 				      struct file *filp,
2150 				      void __user *buffer,
2151 				      size_t *lenp, loff_t *ppos)
2152 {
2153     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2154 				     lenp, ppos, HZ, 1000l);
2155 }
2156 
2157 
2158 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2159 					 int *valp,
2160 					 int write, void *data)
2161 {
2162 	if (write) {
2163 		if (*lvalp > LONG_MAX / HZ)
2164 			return 1;
2165 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2166 	} else {
2167 		int val = *valp;
2168 		unsigned long lval;
2169 		if (val < 0) {
2170 			*negp = -1;
2171 			lval = (unsigned long)-val;
2172 		} else {
2173 			*negp = 0;
2174 			lval = (unsigned long)val;
2175 		}
2176 		*lvalp = lval / HZ;
2177 	}
2178 	return 0;
2179 }
2180 
2181 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2182 						int *valp,
2183 						int write, void *data)
2184 {
2185 	if (write) {
2186 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2187 			return 1;
2188 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2189 	} else {
2190 		int val = *valp;
2191 		unsigned long lval;
2192 		if (val < 0) {
2193 			*negp = -1;
2194 			lval = (unsigned long)-val;
2195 		} else {
2196 			*negp = 0;
2197 			lval = (unsigned long)val;
2198 		}
2199 		*lvalp = jiffies_to_clock_t(lval);
2200 	}
2201 	return 0;
2202 }
2203 
2204 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2205 					    int *valp,
2206 					    int write, void *data)
2207 {
2208 	if (write) {
2209 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2210 	} else {
2211 		int val = *valp;
2212 		unsigned long lval;
2213 		if (val < 0) {
2214 			*negp = -1;
2215 			lval = (unsigned long)-val;
2216 		} else {
2217 			*negp = 0;
2218 			lval = (unsigned long)val;
2219 		}
2220 		*lvalp = jiffies_to_msecs(lval);
2221 	}
2222 	return 0;
2223 }
2224 
2225 /**
2226  * proc_dointvec_jiffies - read a vector of integers as seconds
2227  * @table: the sysctl table
2228  * @write: %TRUE if this is a write to the sysctl file
2229  * @filp: the file structure
2230  * @buffer: the user buffer
2231  * @lenp: the size of the user buffer
2232  * @ppos: file position
2233  *
2234  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2235  * values from/to the user buffer, treated as an ASCII string.
2236  * The values read are assumed to be in seconds, and are converted into
2237  * jiffies.
2238  *
2239  * Returns 0 on success.
2240  */
2241 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2242 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2243 {
2244     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2245 		    	    do_proc_dointvec_jiffies_conv,NULL);
2246 }
2247 
2248 /**
2249  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2250  * @table: the sysctl table
2251  * @write: %TRUE if this is a write to the sysctl file
2252  * @filp: the file structure
2253  * @buffer: the user buffer
2254  * @lenp: the size of the user buffer
2255  * @ppos: pointer to the file position
2256  *
2257  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2258  * values from/to the user buffer, treated as an ASCII string.
2259  * The values read are assumed to be in 1/USER_HZ seconds, and
2260  * are converted into jiffies.
2261  *
2262  * Returns 0 on success.
2263  */
2264 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2265 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2266 {
2267     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2268 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2269 }
2270 
2271 /**
2272  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2273  * @table: the sysctl table
2274  * @write: %TRUE if this is a write to the sysctl file
2275  * @filp: the file structure
2276  * @buffer: the user buffer
2277  * @lenp: the size of the user buffer
2278  * @ppos: file position
2279  * @ppos: the current position in the file
2280  *
2281  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2282  * values from/to the user buffer, treated as an ASCII string.
2283  * The values read are assumed to be in 1/1000 seconds, and
2284  * are converted into jiffies.
2285  *
2286  * Returns 0 on success.
2287  */
2288 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2289 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2290 {
2291 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2292 				do_proc_dointvec_ms_jiffies_conv, NULL);
2293 }
2294 
2295 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
2296 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2297 {
2298 	struct pid *new_pid;
2299 	pid_t tmp;
2300 	int r;
2301 
2302 	tmp = pid_nr(cad_pid);
2303 
2304 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2305 			       lenp, ppos, NULL, NULL);
2306 	if (r || !write)
2307 		return r;
2308 
2309 	new_pid = find_get_pid(tmp);
2310 	if (!new_pid)
2311 		return -ESRCH;
2312 
2313 	put_pid(xchg(&cad_pid, new_pid));
2314 	return 0;
2315 }
2316 
2317 #else /* CONFIG_PROC_FS */
2318 
2319 int proc_dostring(ctl_table *table, int write, struct file *filp,
2320 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2321 {
2322 	return -ENOSYS;
2323 }
2324 
2325 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2326 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2327 {
2328 	return -ENOSYS;
2329 }
2330 
2331 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2332 			void __user *buffer, size_t *lenp, loff_t *ppos)
2333 {
2334 	return -ENOSYS;
2335 }
2336 
2337 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2338 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2339 {
2340 	return -ENOSYS;
2341 }
2342 
2343 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2344 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2345 {
2346 	return -ENOSYS;
2347 }
2348 
2349 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2350 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2351 {
2352 	return -ENOSYS;
2353 }
2354 
2355 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2356 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2357 {
2358 	return -ENOSYS;
2359 }
2360 
2361 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2362 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2363 {
2364 	return -ENOSYS;
2365 }
2366 
2367 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2368 				      struct file *filp,
2369 				      void __user *buffer,
2370 				      size_t *lenp, loff_t *ppos)
2371 {
2372     return -ENOSYS;
2373 }
2374 
2375 
2376 #endif /* CONFIG_PROC_FS */
2377 
2378 
2379 #ifdef CONFIG_SYSCTL_SYSCALL
2380 /*
2381  * General sysctl support routines
2382  */
2383 
2384 /* The generic string strategy routine: */
2385 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2386 		  void __user *oldval, size_t __user *oldlenp,
2387 		  void __user *newval, size_t newlen)
2388 {
2389 	if (!table->data || !table->maxlen)
2390 		return -ENOTDIR;
2391 
2392 	if (oldval && oldlenp) {
2393 		size_t bufsize;
2394 		if (get_user(bufsize, oldlenp))
2395 			return -EFAULT;
2396 		if (bufsize) {
2397 			size_t len = strlen(table->data), copied;
2398 
2399 			/* This shouldn't trigger for a well-formed sysctl */
2400 			if (len > table->maxlen)
2401 				len = table->maxlen;
2402 
2403 			/* Copy up to a max of bufsize-1 bytes of the string */
2404 			copied = (len >= bufsize) ? bufsize - 1 : len;
2405 
2406 			if (copy_to_user(oldval, table->data, copied) ||
2407 			    put_user(0, (char __user *)(oldval + copied)))
2408 				return -EFAULT;
2409 			if (put_user(len, oldlenp))
2410 				return -EFAULT;
2411 		}
2412 	}
2413 	if (newval && newlen) {
2414 		size_t len = newlen;
2415 		if (len > table->maxlen)
2416 			len = table->maxlen;
2417 		if(copy_from_user(table->data, newval, len))
2418 			return -EFAULT;
2419 		if (len == table->maxlen)
2420 			len--;
2421 		((char *) table->data)[len] = 0;
2422 	}
2423 	return 1;
2424 }
2425 
2426 /*
2427  * This function makes sure that all of the integers in the vector
2428  * are between the minimum and maximum values given in the arrays
2429  * table->extra1 and table->extra2, respectively.
2430  */
2431 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2432 		void __user *oldval, size_t __user *oldlenp,
2433 		void __user *newval, size_t newlen)
2434 {
2435 
2436 	if (newval && newlen) {
2437 		int __user *vec = (int __user *) newval;
2438 		int *min = (int *) table->extra1;
2439 		int *max = (int *) table->extra2;
2440 		size_t length;
2441 		int i;
2442 
2443 		if (newlen % sizeof(int) != 0)
2444 			return -EINVAL;
2445 
2446 		if (!table->extra1 && !table->extra2)
2447 			return 0;
2448 
2449 		if (newlen > table->maxlen)
2450 			newlen = table->maxlen;
2451 		length = newlen / sizeof(int);
2452 
2453 		for (i = 0; i < length; i++) {
2454 			int value;
2455 			if (get_user(value, vec + i))
2456 				return -EFAULT;
2457 			if (min && value < min[i])
2458 				return -EINVAL;
2459 			if (max && value > max[i])
2460 				return -EINVAL;
2461 		}
2462 	}
2463 	return 0;
2464 }
2465 
2466 /* Strategy function to convert jiffies to seconds */
2467 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2468 		void __user *oldval, size_t __user *oldlenp,
2469 		void __user *newval, size_t newlen)
2470 {
2471 	if (oldval && oldlenp) {
2472 		size_t olen;
2473 
2474 		if (get_user(olen, oldlenp))
2475 			return -EFAULT;
2476 		if (olen) {
2477 			int val;
2478 
2479 			if (olen < sizeof(int))
2480 				return -EINVAL;
2481 
2482 			val = *(int *)(table->data) / HZ;
2483 			if (put_user(val, (int __user *)oldval))
2484 				return -EFAULT;
2485 			if (put_user(sizeof(int), oldlenp))
2486 				return -EFAULT;
2487 		}
2488 	}
2489 	if (newval && newlen) {
2490 		int new;
2491 		if (newlen != sizeof(int))
2492 			return -EINVAL;
2493 		if (get_user(new, (int __user *)newval))
2494 			return -EFAULT;
2495 		*(int *)(table->data) = new*HZ;
2496 	}
2497 	return 1;
2498 }
2499 
2500 /* Strategy function to convert jiffies to seconds */
2501 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2502 		void __user *oldval, size_t __user *oldlenp,
2503 		void __user *newval, size_t newlen)
2504 {
2505 	if (oldval && oldlenp) {
2506 		size_t olen;
2507 
2508 		if (get_user(olen, oldlenp))
2509 			return -EFAULT;
2510 		if (olen) {
2511 			int val;
2512 
2513 			if (olen < sizeof(int))
2514 				return -EINVAL;
2515 
2516 			val = jiffies_to_msecs(*(int *)(table->data));
2517 			if (put_user(val, (int __user *)oldval))
2518 				return -EFAULT;
2519 			if (put_user(sizeof(int), oldlenp))
2520 				return -EFAULT;
2521 		}
2522 	}
2523 	if (newval && newlen) {
2524 		int new;
2525 		if (newlen != sizeof(int))
2526 			return -EINVAL;
2527 		if (get_user(new, (int __user *)newval))
2528 			return -EFAULT;
2529 		*(int *)(table->data) = msecs_to_jiffies(new);
2530 	}
2531 	return 1;
2532 }
2533 
2534 
2535 
2536 #else /* CONFIG_SYSCTL_SYSCALL */
2537 
2538 
2539 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2540 {
2541 	static int msg_count;
2542 	struct __sysctl_args tmp;
2543 	int name[CTL_MAXNAME];
2544 	int i;
2545 
2546 	/* Read in the sysctl name for better debug message logging */
2547 	if (copy_from_user(&tmp, args, sizeof(tmp)))
2548 		return -EFAULT;
2549 	if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
2550 		return -ENOTDIR;
2551 	for (i = 0; i < tmp.nlen; i++)
2552 		if (get_user(name[i], tmp.name + i))
2553 			return -EFAULT;
2554 
2555 	/* Ignore accesses to kernel.version */
2556 	if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2557 		goto out;
2558 
2559 	if (msg_count < 5) {
2560 		msg_count++;
2561 		printk(KERN_INFO
2562 			"warning: process `%s' used the removed sysctl "
2563 			"system call with ", current->comm);
2564 		for (i = 0; i < tmp.nlen; i++)
2565 			printk("%d.", name[i]);
2566 		printk("\n");
2567 	}
2568 out:
2569 	return -ENOSYS;
2570 }
2571 
2572 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2573 		  void __user *oldval, size_t __user *oldlenp,
2574 		  void __user *newval, size_t newlen)
2575 {
2576 	return -ENOSYS;
2577 }
2578 
2579 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2580 		void __user *oldval, size_t __user *oldlenp,
2581 		void __user *newval, size_t newlen)
2582 {
2583 	return -ENOSYS;
2584 }
2585 
2586 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2587 		void __user *oldval, size_t __user *oldlenp,
2588 		void __user *newval, size_t newlen)
2589 {
2590 	return -ENOSYS;
2591 }
2592 
2593 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2594 		void __user *oldval, size_t __user *oldlenp,
2595 		void __user *newval, size_t newlen)
2596 {
2597 	return -ENOSYS;
2598 }
2599 
2600 #endif /* CONFIG_SYSCTL_SYSCALL */
2601 
2602 /*
2603  * No sense putting this after each symbol definition, twice,
2604  * exception granted :-)
2605  */
2606 EXPORT_SYMBOL(proc_dointvec);
2607 EXPORT_SYMBOL(proc_dointvec_jiffies);
2608 EXPORT_SYMBOL(proc_dointvec_minmax);
2609 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2610 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2611 EXPORT_SYMBOL(proc_dostring);
2612 EXPORT_SYMBOL(proc_doulongvec_minmax);
2613 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2614 EXPORT_SYMBOL(register_sysctl_table);
2615 EXPORT_SYMBOL(sysctl_intvec);
2616 EXPORT_SYMBOL(sysctl_jiffies);
2617 EXPORT_SYMBOL(sysctl_ms_jiffies);
2618 EXPORT_SYMBOL(sysctl_string);
2619 EXPORT_SYMBOL(unregister_sysctl_table);
2620