xref: /linux/kernel/sysctl.c (revision 42fda66387daa53538ae13a2c858396aaf037158)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/capability.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/smp_lock.h>
31 #include <linux/fs.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
48 #include <linux/reboot.h>
49 
50 #include <asm/uaccess.h>
51 #include <asm/processor.h>
52 
53 #ifdef CONFIG_X86
54 #include <asm/nmi.h>
55 #include <asm/stacktrace.h>
56 #endif
57 
58 #if defined(CONFIG_SYSCTL)
59 
60 /* External variables not in a header file. */
61 extern int C_A_D;
62 extern int print_fatal_signals;
63 extern int sysctl_overcommit_memory;
64 extern int sysctl_overcommit_ratio;
65 extern int sysctl_panic_on_oom;
66 extern int max_threads;
67 extern int core_uses_pid;
68 extern int suid_dumpable;
69 extern char core_pattern[];
70 extern int pid_max;
71 extern int min_free_kbytes;
72 extern int printk_ratelimit_jiffies;
73 extern int printk_ratelimit_burst;
74 extern int pid_max_min, pid_max_max;
75 extern int sysctl_drop_caches;
76 extern int percpu_pagelist_fraction;
77 extern int compat_log;
78 extern int maps_protect;
79 extern int sysctl_stat_interval;
80 extern int audit_argv_kb;
81 
82 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
83 static int maxolduid = 65535;
84 static int minolduid;
85 static int min_percpu_pagelist_fract = 8;
86 
87 static int ngroups_max = NGROUPS_MAX;
88 
89 #ifdef CONFIG_KMOD
90 extern char modprobe_path[];
91 #endif
92 #ifdef CONFIG_CHR_DEV_SG
93 extern int sg_big_buff;
94 #endif
95 
96 #ifdef __sparc__
97 extern char reboot_command [];
98 extern int stop_a_enabled;
99 extern int scons_pwroff;
100 #endif
101 
102 #ifdef __hppa__
103 extern int pwrsw_enabled;
104 extern int unaligned_enabled;
105 #endif
106 
107 #ifdef CONFIG_S390
108 #ifdef CONFIG_MATHEMU
109 extern int sysctl_ieee_emulation_warnings;
110 #endif
111 extern int sysctl_userprocess_debug;
112 extern int spin_retry;
113 #endif
114 
115 extern int sysctl_hz_timer;
116 
117 #ifdef CONFIG_BSD_PROCESS_ACCT
118 extern int acct_parm[];
119 #endif
120 
121 #ifdef CONFIG_IA64
122 extern int no_unaligned_warning;
123 #endif
124 
125 #ifdef CONFIG_RT_MUTEXES
126 extern int max_lock_depth;
127 #endif
128 
129 #ifdef CONFIG_SYSCTL_SYSCALL
130 static int parse_table(int __user *, int, void __user *, size_t __user *,
131 		void __user *, size_t, ctl_table *);
132 #endif
133 
134 
135 #ifdef CONFIG_PROC_SYSCTL
136 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
137 		  void __user *buffer, size_t *lenp, loff_t *ppos);
138 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
139 			       void __user *buffer, size_t *lenp, loff_t *ppos);
140 #endif
141 
142 static ctl_table root_table[];
143 static struct ctl_table_header root_table_header =
144 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
145 
146 static ctl_table kern_table[];
147 static ctl_table vm_table[];
148 static ctl_table fs_table[];
149 static ctl_table debug_table[];
150 static ctl_table dev_table[];
151 extern ctl_table random_table[];
152 #ifdef CONFIG_UNIX98_PTYS
153 extern ctl_table pty_table[];
154 #endif
155 #ifdef CONFIG_INOTIFY_USER
156 extern ctl_table inotify_table[];
157 #endif
158 
159 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
160 int sysctl_legacy_va_layout;
161 #endif
162 
163 extern int prove_locking;
164 extern int lock_stat;
165 
166 /* The default sysctl tables: */
167 
168 static ctl_table root_table[] = {
169 	{
170 		.ctl_name	= CTL_KERN,
171 		.procname	= "kernel",
172 		.mode		= 0555,
173 		.child		= kern_table,
174 	},
175 	{
176 		.ctl_name	= CTL_VM,
177 		.procname	= "vm",
178 		.mode		= 0555,
179 		.child		= vm_table,
180 	},
181 #ifdef CONFIG_NET
182 	{
183 		.ctl_name	= CTL_NET,
184 		.procname	= "net",
185 		.mode		= 0555,
186 		.child		= net_table,
187 	},
188 #endif
189 	{
190 		.ctl_name	= CTL_FS,
191 		.procname	= "fs",
192 		.mode		= 0555,
193 		.child		= fs_table,
194 	},
195 	{
196 		.ctl_name	= CTL_DEBUG,
197 		.procname	= "debug",
198 		.mode		= 0555,
199 		.child		= debug_table,
200 	},
201 	{
202 		.ctl_name	= CTL_DEV,
203 		.procname	= "dev",
204 		.mode		= 0555,
205 		.child		= dev_table,
206 	},
207 /*
208  * NOTE: do not add new entries to this table unless you have read
209  * Documentation/sysctl/ctl_unnumbered.txt
210  */
211 	{ .ctl_name = 0 }
212 };
213 
214 #ifdef CONFIG_SCHED_DEBUG
215 static unsigned long min_sched_granularity_ns = 100000;		/* 100 usecs */
216 static unsigned long max_sched_granularity_ns = 1000000000;	/* 1 second */
217 static unsigned long min_wakeup_granularity_ns;			/* 0 usecs */
218 static unsigned long max_wakeup_granularity_ns = 1000000000;	/* 1 second */
219 #endif
220 
221 static ctl_table kern_table[] = {
222 #ifdef CONFIG_SCHED_DEBUG
223 	{
224 		.ctl_name	= CTL_UNNUMBERED,
225 		.procname	= "sched_nr_latency",
226 		.data		= &sysctl_sched_nr_latency,
227 		.maxlen		= sizeof(unsigned int),
228 		.mode		= 0644,
229 		.proc_handler	= &proc_dointvec,
230 	},
231 	{
232 		.ctl_name	= CTL_UNNUMBERED,
233 		.procname	= "sched_latency_ns",
234 		.data		= &sysctl_sched_latency,
235 		.maxlen		= sizeof(unsigned int),
236 		.mode		= 0644,
237 		.proc_handler	= &proc_dointvec_minmax,
238 		.strategy	= &sysctl_intvec,
239 		.extra1		= &min_sched_granularity_ns,
240 		.extra2		= &max_sched_granularity_ns,
241 	},
242 	{
243 		.ctl_name	= CTL_UNNUMBERED,
244 		.procname	= "sched_wakeup_granularity_ns",
245 		.data		= &sysctl_sched_wakeup_granularity,
246 		.maxlen		= sizeof(unsigned int),
247 		.mode		= 0644,
248 		.proc_handler	= &proc_dointvec_minmax,
249 		.strategy	= &sysctl_intvec,
250 		.extra1		= &min_wakeup_granularity_ns,
251 		.extra2		= &max_wakeup_granularity_ns,
252 	},
253 	{
254 		.ctl_name	= CTL_UNNUMBERED,
255 		.procname	= "sched_batch_wakeup_granularity_ns",
256 		.data		= &sysctl_sched_batch_wakeup_granularity,
257 		.maxlen		= sizeof(unsigned int),
258 		.mode		= 0644,
259 		.proc_handler	= &proc_dointvec_minmax,
260 		.strategy	= &sysctl_intvec,
261 		.extra1		= &min_wakeup_granularity_ns,
262 		.extra2		= &max_wakeup_granularity_ns,
263 	},
264 	{
265 		.ctl_name	= CTL_UNNUMBERED,
266 		.procname	= "sched_child_runs_first",
267 		.data		= &sysctl_sched_child_runs_first,
268 		.maxlen		= sizeof(unsigned int),
269 		.mode		= 0644,
270 		.proc_handler	= &proc_dointvec,
271 	},
272 	{
273 		.ctl_name	= CTL_UNNUMBERED,
274 		.procname	= "sched_features",
275 		.data		= &sysctl_sched_features,
276 		.maxlen		= sizeof(unsigned int),
277 		.mode		= 0644,
278 		.proc_handler	= &proc_dointvec,
279 	},
280 	{
281 		.ctl_name	= CTL_UNNUMBERED,
282 		.procname	= "sched_migration_cost",
283 		.data		= &sysctl_sched_migration_cost,
284 		.maxlen		= sizeof(unsigned int),
285 		.mode		= 0644,
286 		.proc_handler	= &proc_dointvec,
287 	},
288 #endif
289 	{
290 		.ctl_name	= CTL_UNNUMBERED,
291 		.procname	= "sched_compat_yield",
292 		.data		= &sysctl_sched_compat_yield,
293 		.maxlen		= sizeof(unsigned int),
294 		.mode		= 0644,
295 		.proc_handler	= &proc_dointvec,
296 	},
297 #ifdef CONFIG_PROVE_LOCKING
298 	{
299 		.ctl_name	= CTL_UNNUMBERED,
300 		.procname	= "prove_locking",
301 		.data		= &prove_locking,
302 		.maxlen		= sizeof(int),
303 		.mode		= 0644,
304 		.proc_handler	= &proc_dointvec,
305 	},
306 #endif
307 #ifdef CONFIG_LOCK_STAT
308 	{
309 		.ctl_name	= CTL_UNNUMBERED,
310 		.procname	= "lock_stat",
311 		.data		= &lock_stat,
312 		.maxlen		= sizeof(int),
313 		.mode		= 0644,
314 		.proc_handler	= &proc_dointvec,
315 	},
316 #endif
317 	{
318 		.ctl_name	= KERN_PANIC,
319 		.procname	= "panic",
320 		.data		= &panic_timeout,
321 		.maxlen		= sizeof(int),
322 		.mode		= 0644,
323 		.proc_handler	= &proc_dointvec,
324 	},
325 	{
326 		.ctl_name	= KERN_CORE_USES_PID,
327 		.procname	= "core_uses_pid",
328 		.data		= &core_uses_pid,
329 		.maxlen		= sizeof(int),
330 		.mode		= 0644,
331 		.proc_handler	= &proc_dointvec,
332 	},
333 #ifdef CONFIG_AUDITSYSCALL
334 	{
335 		.ctl_name	= CTL_UNNUMBERED,
336 		.procname	= "audit_argv_kb",
337 		.data		= &audit_argv_kb,
338 		.maxlen		= sizeof(int),
339 		.mode		= 0644,
340 		.proc_handler	= &proc_dointvec,
341 	},
342 #endif
343 	{
344 		.ctl_name	= KERN_CORE_PATTERN,
345 		.procname	= "core_pattern",
346 		.data		= core_pattern,
347 		.maxlen		= CORENAME_MAX_SIZE,
348 		.mode		= 0644,
349 		.proc_handler	= &proc_dostring,
350 		.strategy	= &sysctl_string,
351 	},
352 #ifdef CONFIG_PROC_SYSCTL
353 	{
354 		.ctl_name	= KERN_TAINTED,
355 		.procname	= "tainted",
356 		.data		= &tainted,
357 		.maxlen		= sizeof(int),
358 		.mode		= 0644,
359 		.proc_handler	= &proc_dointvec_taint,
360 	},
361 #endif
362 	{
363 		.ctl_name	= KERN_CAP_BSET,
364 		.procname	= "cap-bound",
365 		.data		= &cap_bset,
366 		.maxlen		= sizeof(kernel_cap_t),
367 		.mode		= 0600,
368 		.proc_handler	= &proc_dointvec_bset,
369 	},
370 #ifdef CONFIG_BLK_DEV_INITRD
371 	{
372 		.ctl_name	= KERN_REALROOTDEV,
373 		.procname	= "real-root-dev",
374 		.data		= &real_root_dev,
375 		.maxlen		= sizeof(int),
376 		.mode		= 0644,
377 		.proc_handler	= &proc_dointvec,
378 	},
379 #endif
380 	{
381 		.ctl_name	= CTL_UNNUMBERED,
382 		.procname	= "print-fatal-signals",
383 		.data		= &print_fatal_signals,
384 		.maxlen		= sizeof(int),
385 		.mode		= 0644,
386 		.proc_handler	= &proc_dointvec,
387 	},
388 #ifdef __sparc__
389 	{
390 		.ctl_name	= KERN_SPARC_REBOOT,
391 		.procname	= "reboot-cmd",
392 		.data		= reboot_command,
393 		.maxlen		= 256,
394 		.mode		= 0644,
395 		.proc_handler	= &proc_dostring,
396 		.strategy	= &sysctl_string,
397 	},
398 	{
399 		.ctl_name	= KERN_SPARC_STOP_A,
400 		.procname	= "stop-a",
401 		.data		= &stop_a_enabled,
402 		.maxlen		= sizeof (int),
403 		.mode		= 0644,
404 		.proc_handler	= &proc_dointvec,
405 	},
406 	{
407 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
408 		.procname	= "scons-poweroff",
409 		.data		= &scons_pwroff,
410 		.maxlen		= sizeof (int),
411 		.mode		= 0644,
412 		.proc_handler	= &proc_dointvec,
413 	},
414 #endif
415 #ifdef __hppa__
416 	{
417 		.ctl_name	= KERN_HPPA_PWRSW,
418 		.procname	= "soft-power",
419 		.data		= &pwrsw_enabled,
420 		.maxlen		= sizeof (int),
421 	 	.mode		= 0644,
422 		.proc_handler	= &proc_dointvec,
423 	},
424 	{
425 		.ctl_name	= KERN_HPPA_UNALIGNED,
426 		.procname	= "unaligned-trap",
427 		.data		= &unaligned_enabled,
428 		.maxlen		= sizeof (int),
429 		.mode		= 0644,
430 		.proc_handler	= &proc_dointvec,
431 	},
432 #endif
433 	{
434 		.ctl_name	= KERN_CTLALTDEL,
435 		.procname	= "ctrl-alt-del",
436 		.data		= &C_A_D,
437 		.maxlen		= sizeof(int),
438 		.mode		= 0644,
439 		.proc_handler	= &proc_dointvec,
440 	},
441 	{
442 		.ctl_name	= KERN_PRINTK,
443 		.procname	= "printk",
444 		.data		= &console_loglevel,
445 		.maxlen		= 4*sizeof(int),
446 		.mode		= 0644,
447 		.proc_handler	= &proc_dointvec,
448 	},
449 #ifdef CONFIG_KMOD
450 	{
451 		.ctl_name	= KERN_MODPROBE,
452 		.procname	= "modprobe",
453 		.data		= &modprobe_path,
454 		.maxlen		= KMOD_PATH_LEN,
455 		.mode		= 0644,
456 		.proc_handler	= &proc_dostring,
457 		.strategy	= &sysctl_string,
458 	},
459 #endif
460 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
461 	{
462 		.ctl_name	= KERN_HOTPLUG,
463 		.procname	= "hotplug",
464 		.data		= &uevent_helper,
465 		.maxlen		= UEVENT_HELPER_PATH_LEN,
466 		.mode		= 0644,
467 		.proc_handler	= &proc_dostring,
468 		.strategy	= &sysctl_string,
469 	},
470 #endif
471 #ifdef CONFIG_CHR_DEV_SG
472 	{
473 		.ctl_name	= KERN_SG_BIG_BUFF,
474 		.procname	= "sg-big-buff",
475 		.data		= &sg_big_buff,
476 		.maxlen		= sizeof (int),
477 		.mode		= 0444,
478 		.proc_handler	= &proc_dointvec,
479 	},
480 #endif
481 #ifdef CONFIG_BSD_PROCESS_ACCT
482 	{
483 		.ctl_name	= KERN_ACCT,
484 		.procname	= "acct",
485 		.data		= &acct_parm,
486 		.maxlen		= 3*sizeof(int),
487 		.mode		= 0644,
488 		.proc_handler	= &proc_dointvec,
489 	},
490 #endif
491 #ifdef CONFIG_MAGIC_SYSRQ
492 	{
493 		.ctl_name	= KERN_SYSRQ,
494 		.procname	= "sysrq",
495 		.data		= &__sysrq_enabled,
496 		.maxlen		= sizeof (int),
497 		.mode		= 0644,
498 		.proc_handler	= &proc_dointvec,
499 	},
500 #endif
501 #ifdef CONFIG_PROC_SYSCTL
502 	{
503 		.ctl_name	= KERN_CADPID,
504 		.procname	= "cad_pid",
505 		.data		= NULL,
506 		.maxlen		= sizeof (int),
507 		.mode		= 0600,
508 		.proc_handler	= &proc_do_cad_pid,
509 	},
510 #endif
511 	{
512 		.ctl_name	= KERN_MAX_THREADS,
513 		.procname	= "threads-max",
514 		.data		= &max_threads,
515 		.maxlen		= sizeof(int),
516 		.mode		= 0644,
517 		.proc_handler	= &proc_dointvec,
518 	},
519 	{
520 		.ctl_name	= KERN_RANDOM,
521 		.procname	= "random",
522 		.mode		= 0555,
523 		.child		= random_table,
524 	},
525 #ifdef CONFIG_UNIX98_PTYS
526 	{
527 		.ctl_name	= KERN_PTY,
528 		.procname	= "pty",
529 		.mode		= 0555,
530 		.child		= pty_table,
531 	},
532 #endif
533 	{
534 		.ctl_name	= KERN_OVERFLOWUID,
535 		.procname	= "overflowuid",
536 		.data		= &overflowuid,
537 		.maxlen		= sizeof(int),
538 		.mode		= 0644,
539 		.proc_handler	= &proc_dointvec_minmax,
540 		.strategy	= &sysctl_intvec,
541 		.extra1		= &minolduid,
542 		.extra2		= &maxolduid,
543 	},
544 	{
545 		.ctl_name	= KERN_OVERFLOWGID,
546 		.procname	= "overflowgid",
547 		.data		= &overflowgid,
548 		.maxlen		= sizeof(int),
549 		.mode		= 0644,
550 		.proc_handler	= &proc_dointvec_minmax,
551 		.strategy	= &sysctl_intvec,
552 		.extra1		= &minolduid,
553 		.extra2		= &maxolduid,
554 	},
555 #ifdef CONFIG_S390
556 #ifdef CONFIG_MATHEMU
557 	{
558 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
559 		.procname	= "ieee_emulation_warnings",
560 		.data		= &sysctl_ieee_emulation_warnings,
561 		.maxlen		= sizeof(int),
562 		.mode		= 0644,
563 		.proc_handler	= &proc_dointvec,
564 	},
565 #endif
566 #ifdef CONFIG_NO_IDLE_HZ
567 	{
568 		.ctl_name       = KERN_HZ_TIMER,
569 		.procname       = "hz_timer",
570 		.data           = &sysctl_hz_timer,
571 		.maxlen         = sizeof(int),
572 		.mode           = 0644,
573 		.proc_handler   = &proc_dointvec,
574 	},
575 #endif
576 	{
577 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
578 		.procname	= "userprocess_debug",
579 		.data		= &sysctl_userprocess_debug,
580 		.maxlen		= sizeof(int),
581 		.mode		= 0644,
582 		.proc_handler	= &proc_dointvec,
583 	},
584 #endif
585 	{
586 		.ctl_name	= KERN_PIDMAX,
587 		.procname	= "pid_max",
588 		.data		= &pid_max,
589 		.maxlen		= sizeof (int),
590 		.mode		= 0644,
591 		.proc_handler	= &proc_dointvec_minmax,
592 		.strategy	= sysctl_intvec,
593 		.extra1		= &pid_max_min,
594 		.extra2		= &pid_max_max,
595 	},
596 	{
597 		.ctl_name	= KERN_PANIC_ON_OOPS,
598 		.procname	= "panic_on_oops",
599 		.data		= &panic_on_oops,
600 		.maxlen		= sizeof(int),
601 		.mode		= 0644,
602 		.proc_handler	= &proc_dointvec,
603 	},
604 	{
605 		.ctl_name	= KERN_PRINTK_RATELIMIT,
606 		.procname	= "printk_ratelimit",
607 		.data		= &printk_ratelimit_jiffies,
608 		.maxlen		= sizeof(int),
609 		.mode		= 0644,
610 		.proc_handler	= &proc_dointvec_jiffies,
611 		.strategy	= &sysctl_jiffies,
612 	},
613 	{
614 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
615 		.procname	= "printk_ratelimit_burst",
616 		.data		= &printk_ratelimit_burst,
617 		.maxlen		= sizeof(int),
618 		.mode		= 0644,
619 		.proc_handler	= &proc_dointvec,
620 	},
621 	{
622 		.ctl_name	= KERN_NGROUPS_MAX,
623 		.procname	= "ngroups_max",
624 		.data		= &ngroups_max,
625 		.maxlen		= sizeof (int),
626 		.mode		= 0444,
627 		.proc_handler	= &proc_dointvec,
628 	},
629 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
630 	{
631 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
632 		.procname       = "unknown_nmi_panic",
633 		.data           = &unknown_nmi_panic,
634 		.maxlen         = sizeof (int),
635 		.mode           = 0644,
636 		.proc_handler   = &proc_dointvec,
637 	},
638 	{
639 		.ctl_name       = KERN_NMI_WATCHDOG,
640 		.procname       = "nmi_watchdog",
641 		.data           = &nmi_watchdog_enabled,
642 		.maxlen         = sizeof (int),
643 		.mode           = 0644,
644 		.proc_handler   = &proc_nmi_enabled,
645 	},
646 #endif
647 #if defined(CONFIG_X86)
648 	{
649 		.ctl_name	= KERN_PANIC_ON_NMI,
650 		.procname	= "panic_on_unrecovered_nmi",
651 		.data		= &panic_on_unrecovered_nmi,
652 		.maxlen		= sizeof(int),
653 		.mode		= 0644,
654 		.proc_handler	= &proc_dointvec,
655 	},
656 	{
657 		.ctl_name	= KERN_BOOTLOADER_TYPE,
658 		.procname	= "bootloader_type",
659 		.data		= &bootloader_type,
660 		.maxlen		= sizeof (int),
661 		.mode		= 0444,
662 		.proc_handler	= &proc_dointvec,
663 	},
664 	{
665 		.ctl_name	= CTL_UNNUMBERED,
666 		.procname	= "kstack_depth_to_print",
667 		.data		= &kstack_depth_to_print,
668 		.maxlen		= sizeof(int),
669 		.mode		= 0644,
670 		.proc_handler	= &proc_dointvec,
671 	},
672 #endif
673 #if defined(CONFIG_MMU)
674 	{
675 		.ctl_name	= KERN_RANDOMIZE,
676 		.procname	= "randomize_va_space",
677 		.data		= &randomize_va_space,
678 		.maxlen		= sizeof(int),
679 		.mode		= 0644,
680 		.proc_handler	= &proc_dointvec,
681 	},
682 #endif
683 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
684 	{
685 		.ctl_name	= KERN_SPIN_RETRY,
686 		.procname	= "spin_retry",
687 		.data		= &spin_retry,
688 		.maxlen		= sizeof (int),
689 		.mode		= 0644,
690 		.proc_handler	= &proc_dointvec,
691 	},
692 #endif
693 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
694 	{
695 		.ctl_name	= KERN_ACPI_VIDEO_FLAGS,
696 		.procname	= "acpi_video_flags",
697 		.data		= &acpi_realmode_flags,
698 		.maxlen		= sizeof (unsigned long),
699 		.mode		= 0644,
700 		.proc_handler	= &proc_doulongvec_minmax,
701 	},
702 #endif
703 #ifdef CONFIG_IA64
704 	{
705 		.ctl_name	= KERN_IA64_UNALIGNED,
706 		.procname	= "ignore-unaligned-usertrap",
707 		.data		= &no_unaligned_warning,
708 		.maxlen		= sizeof (int),
709 	 	.mode		= 0644,
710 		.proc_handler	= &proc_dointvec,
711 	},
712 #endif
713 #ifdef CONFIG_COMPAT
714 	{
715 		.ctl_name	= KERN_COMPAT_LOG,
716 		.procname	= "compat-log",
717 		.data		= &compat_log,
718 		.maxlen		= sizeof (int),
719 	 	.mode		= 0644,
720 		.proc_handler	= &proc_dointvec,
721 	},
722 #endif
723 #ifdef CONFIG_RT_MUTEXES
724 	{
725 		.ctl_name	= KERN_MAX_LOCK_DEPTH,
726 		.procname	= "max_lock_depth",
727 		.data		= &max_lock_depth,
728 		.maxlen		= sizeof(int),
729 		.mode		= 0644,
730 		.proc_handler	= &proc_dointvec,
731 	},
732 #endif
733 #ifdef CONFIG_PROC_FS
734 	{
735 		.ctl_name       = CTL_UNNUMBERED,
736 		.procname       = "maps_protect",
737 		.data           = &maps_protect,
738 		.maxlen         = sizeof(int),
739 		.mode           = 0644,
740 		.proc_handler   = &proc_dointvec,
741 	},
742 #endif
743 	{
744 		.ctl_name	= CTL_UNNUMBERED,
745 		.procname	= "poweroff_cmd",
746 		.data		= &poweroff_cmd,
747 		.maxlen		= POWEROFF_CMD_PATH_LEN,
748 		.mode		= 0644,
749 		.proc_handler	= &proc_dostring,
750 		.strategy	= &sysctl_string,
751 	},
752 /*
753  * NOTE: do not add new entries to this table unless you have read
754  * Documentation/sysctl/ctl_unnumbered.txt
755  */
756 	{ .ctl_name = 0 }
757 };
758 
759 /* Constants for minimum and maximum testing in vm_table.
760    We use these as one-element integer vectors. */
761 static int zero;
762 static int two = 2;
763 static int one_hundred = 100;
764 
765 
766 static ctl_table vm_table[] = {
767 	{
768 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
769 		.procname	= "overcommit_memory",
770 		.data		= &sysctl_overcommit_memory,
771 		.maxlen		= sizeof(sysctl_overcommit_memory),
772 		.mode		= 0644,
773 		.proc_handler	= &proc_dointvec,
774 	},
775 	{
776 		.ctl_name	= VM_PANIC_ON_OOM,
777 		.procname	= "panic_on_oom",
778 		.data		= &sysctl_panic_on_oom,
779 		.maxlen		= sizeof(sysctl_panic_on_oom),
780 		.mode		= 0644,
781 		.proc_handler	= &proc_dointvec,
782 	},
783 	{
784 		.ctl_name	= VM_OVERCOMMIT_RATIO,
785 		.procname	= "overcommit_ratio",
786 		.data		= &sysctl_overcommit_ratio,
787 		.maxlen		= sizeof(sysctl_overcommit_ratio),
788 		.mode		= 0644,
789 		.proc_handler	= &proc_dointvec,
790 	},
791 	{
792 		.ctl_name	= VM_PAGE_CLUSTER,
793 		.procname	= "page-cluster",
794 		.data		= &page_cluster,
795 		.maxlen		= sizeof(int),
796 		.mode		= 0644,
797 		.proc_handler	= &proc_dointvec,
798 	},
799 	{
800 		.ctl_name	= VM_DIRTY_BACKGROUND,
801 		.procname	= "dirty_background_ratio",
802 		.data		= &dirty_background_ratio,
803 		.maxlen		= sizeof(dirty_background_ratio),
804 		.mode		= 0644,
805 		.proc_handler	= &proc_dointvec_minmax,
806 		.strategy	= &sysctl_intvec,
807 		.extra1		= &zero,
808 		.extra2		= &one_hundred,
809 	},
810 	{
811 		.ctl_name	= VM_DIRTY_RATIO,
812 		.procname	= "dirty_ratio",
813 		.data		= &vm_dirty_ratio,
814 		.maxlen		= sizeof(vm_dirty_ratio),
815 		.mode		= 0644,
816 		.proc_handler	= &proc_dointvec_minmax,
817 		.strategy	= &sysctl_intvec,
818 		.extra1		= &zero,
819 		.extra2		= &one_hundred,
820 	},
821 	{
822 		.ctl_name	= VM_DIRTY_WB_CS,
823 		.procname	= "dirty_writeback_centisecs",
824 		.data		= &dirty_writeback_interval,
825 		.maxlen		= sizeof(dirty_writeback_interval),
826 		.mode		= 0644,
827 		.proc_handler	= &dirty_writeback_centisecs_handler,
828 	},
829 	{
830 		.ctl_name	= VM_DIRTY_EXPIRE_CS,
831 		.procname	= "dirty_expire_centisecs",
832 		.data		= &dirty_expire_interval,
833 		.maxlen		= sizeof(dirty_expire_interval),
834 		.mode		= 0644,
835 		.proc_handler	= &proc_dointvec_userhz_jiffies,
836 	},
837 	{
838 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
839 		.procname	= "nr_pdflush_threads",
840 		.data		= &nr_pdflush_threads,
841 		.maxlen		= sizeof nr_pdflush_threads,
842 		.mode		= 0444 /* read-only*/,
843 		.proc_handler	= &proc_dointvec,
844 	},
845 	{
846 		.ctl_name	= VM_SWAPPINESS,
847 		.procname	= "swappiness",
848 		.data		= &vm_swappiness,
849 		.maxlen		= sizeof(vm_swappiness),
850 		.mode		= 0644,
851 		.proc_handler	= &proc_dointvec_minmax,
852 		.strategy	= &sysctl_intvec,
853 		.extra1		= &zero,
854 		.extra2		= &one_hundred,
855 	},
856 #ifdef CONFIG_HUGETLB_PAGE
857 	 {
858 		.ctl_name	= VM_HUGETLB_PAGES,
859 		.procname	= "nr_hugepages",
860 		.data		= &max_huge_pages,
861 		.maxlen		= sizeof(unsigned long),
862 		.mode		= 0644,
863 		.proc_handler	= &hugetlb_sysctl_handler,
864 		.extra1		= (void *)&hugetlb_zero,
865 		.extra2		= (void *)&hugetlb_infinity,
866 	 },
867 	 {
868 		.ctl_name	= VM_HUGETLB_GROUP,
869 		.procname	= "hugetlb_shm_group",
870 		.data		= &sysctl_hugetlb_shm_group,
871 		.maxlen		= sizeof(gid_t),
872 		.mode		= 0644,
873 		.proc_handler	= &proc_dointvec,
874 	 },
875 	 {
876 		.ctl_name	= CTL_UNNUMBERED,
877 		.procname	= "hugepages_treat_as_movable",
878 		.data		= &hugepages_treat_as_movable,
879 		.maxlen		= sizeof(int),
880 		.mode		= 0644,
881 		.proc_handler	= &hugetlb_treat_movable_handler,
882 	},
883 	{
884 		.ctl_name	= CTL_UNNUMBERED,
885 		.procname	= "hugetlb_dynamic_pool",
886 		.data		= &hugetlb_dynamic_pool,
887 		.maxlen		= sizeof(hugetlb_dynamic_pool),
888 		.mode		= 0644,
889 		.proc_handler	= &proc_dointvec,
890 	},
891 #endif
892 	{
893 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
894 		.procname	= "lowmem_reserve_ratio",
895 		.data		= &sysctl_lowmem_reserve_ratio,
896 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
897 		.mode		= 0644,
898 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
899 		.strategy	= &sysctl_intvec,
900 	},
901 	{
902 		.ctl_name	= VM_DROP_PAGECACHE,
903 		.procname	= "drop_caches",
904 		.data		= &sysctl_drop_caches,
905 		.maxlen		= sizeof(int),
906 		.mode		= 0644,
907 		.proc_handler	= drop_caches_sysctl_handler,
908 		.strategy	= &sysctl_intvec,
909 	},
910 	{
911 		.ctl_name	= VM_MIN_FREE_KBYTES,
912 		.procname	= "min_free_kbytes",
913 		.data		= &min_free_kbytes,
914 		.maxlen		= sizeof(min_free_kbytes),
915 		.mode		= 0644,
916 		.proc_handler	= &min_free_kbytes_sysctl_handler,
917 		.strategy	= &sysctl_intvec,
918 		.extra1		= &zero,
919 	},
920 	{
921 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
922 		.procname	= "percpu_pagelist_fraction",
923 		.data		= &percpu_pagelist_fraction,
924 		.maxlen		= sizeof(percpu_pagelist_fraction),
925 		.mode		= 0644,
926 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
927 		.strategy	= &sysctl_intvec,
928 		.extra1		= &min_percpu_pagelist_fract,
929 	},
930 #ifdef CONFIG_MMU
931 	{
932 		.ctl_name	= VM_MAX_MAP_COUNT,
933 		.procname	= "max_map_count",
934 		.data		= &sysctl_max_map_count,
935 		.maxlen		= sizeof(sysctl_max_map_count),
936 		.mode		= 0644,
937 		.proc_handler	= &proc_dointvec
938 	},
939 #endif
940 	{
941 		.ctl_name	= VM_LAPTOP_MODE,
942 		.procname	= "laptop_mode",
943 		.data		= &laptop_mode,
944 		.maxlen		= sizeof(laptop_mode),
945 		.mode		= 0644,
946 		.proc_handler	= &proc_dointvec_jiffies,
947 		.strategy	= &sysctl_jiffies,
948 	},
949 	{
950 		.ctl_name	= VM_BLOCK_DUMP,
951 		.procname	= "block_dump",
952 		.data		= &block_dump,
953 		.maxlen		= sizeof(block_dump),
954 		.mode		= 0644,
955 		.proc_handler	= &proc_dointvec,
956 		.strategy	= &sysctl_intvec,
957 		.extra1		= &zero,
958 	},
959 	{
960 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
961 		.procname	= "vfs_cache_pressure",
962 		.data		= &sysctl_vfs_cache_pressure,
963 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
964 		.mode		= 0644,
965 		.proc_handler	= &proc_dointvec,
966 		.strategy	= &sysctl_intvec,
967 		.extra1		= &zero,
968 	},
969 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
970 	{
971 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
972 		.procname	= "legacy_va_layout",
973 		.data		= &sysctl_legacy_va_layout,
974 		.maxlen		= sizeof(sysctl_legacy_va_layout),
975 		.mode		= 0644,
976 		.proc_handler	= &proc_dointvec,
977 		.strategy	= &sysctl_intvec,
978 		.extra1		= &zero,
979 	},
980 #endif
981 #ifdef CONFIG_NUMA
982 	{
983 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
984 		.procname	= "zone_reclaim_mode",
985 		.data		= &zone_reclaim_mode,
986 		.maxlen		= sizeof(zone_reclaim_mode),
987 		.mode		= 0644,
988 		.proc_handler	= &proc_dointvec,
989 		.strategy	= &sysctl_intvec,
990 		.extra1		= &zero,
991 	},
992 	{
993 		.ctl_name	= VM_MIN_UNMAPPED,
994 		.procname	= "min_unmapped_ratio",
995 		.data		= &sysctl_min_unmapped_ratio,
996 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
997 		.mode		= 0644,
998 		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler,
999 		.strategy	= &sysctl_intvec,
1000 		.extra1		= &zero,
1001 		.extra2		= &one_hundred,
1002 	},
1003 	{
1004 		.ctl_name	= VM_MIN_SLAB,
1005 		.procname	= "min_slab_ratio",
1006 		.data		= &sysctl_min_slab_ratio,
1007 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1008 		.mode		= 0644,
1009 		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
1010 		.strategy	= &sysctl_intvec,
1011 		.extra1		= &zero,
1012 		.extra2		= &one_hundred,
1013 	},
1014 #endif
1015 #ifdef CONFIG_SMP
1016 	{
1017 		.ctl_name	= CTL_UNNUMBERED,
1018 		.procname	= "stat_interval",
1019 		.data		= &sysctl_stat_interval,
1020 		.maxlen		= sizeof(sysctl_stat_interval),
1021 		.mode		= 0644,
1022 		.proc_handler	= &proc_dointvec_jiffies,
1023 		.strategy	= &sysctl_jiffies,
1024 	},
1025 #endif
1026 #ifdef CONFIG_SECURITY
1027 	{
1028 		.ctl_name	= CTL_UNNUMBERED,
1029 		.procname	= "mmap_min_addr",
1030 		.data		= &mmap_min_addr,
1031 		.maxlen         = sizeof(unsigned long),
1032 		.mode		= 0644,
1033 		.proc_handler	= &proc_doulongvec_minmax,
1034 	},
1035 #endif
1036 #ifdef CONFIG_NUMA
1037 	{
1038 		.ctl_name	= CTL_UNNUMBERED,
1039 		.procname	= "numa_zonelist_order",
1040 		.data		= &numa_zonelist_order,
1041 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1042 		.mode		= 0644,
1043 		.proc_handler	= &numa_zonelist_order_handler,
1044 		.strategy	= &sysctl_string,
1045 	},
1046 #endif
1047 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1048    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1049 	{
1050 		.ctl_name	= VM_VDSO_ENABLED,
1051 		.procname	= "vdso_enabled",
1052 		.data		= &vdso_enabled,
1053 		.maxlen		= sizeof(vdso_enabled),
1054 		.mode		= 0644,
1055 		.proc_handler	= &proc_dointvec,
1056 		.strategy	= &sysctl_intvec,
1057 		.extra1		= &zero,
1058 	},
1059 #endif
1060 /*
1061  * NOTE: do not add new entries to this table unless you have read
1062  * Documentation/sysctl/ctl_unnumbered.txt
1063  */
1064 	{ .ctl_name = 0 }
1065 };
1066 
1067 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1068 static ctl_table binfmt_misc_table[] = {
1069 	{ .ctl_name = 0 }
1070 };
1071 #endif
1072 
1073 static ctl_table fs_table[] = {
1074 	{
1075 		.ctl_name	= FS_NRINODE,
1076 		.procname	= "inode-nr",
1077 		.data		= &inodes_stat,
1078 		.maxlen		= 2*sizeof(int),
1079 		.mode		= 0444,
1080 		.proc_handler	= &proc_dointvec,
1081 	},
1082 	{
1083 		.ctl_name	= FS_STATINODE,
1084 		.procname	= "inode-state",
1085 		.data		= &inodes_stat,
1086 		.maxlen		= 7*sizeof(int),
1087 		.mode		= 0444,
1088 		.proc_handler	= &proc_dointvec,
1089 	},
1090 	{
1091 		.ctl_name	= FS_NRFILE,
1092 		.procname	= "file-nr",
1093 		.data		= &files_stat,
1094 		.maxlen		= 3*sizeof(int),
1095 		.mode		= 0444,
1096 		.proc_handler	= &proc_nr_files,
1097 	},
1098 	{
1099 		.ctl_name	= FS_MAXFILE,
1100 		.procname	= "file-max",
1101 		.data		= &files_stat.max_files,
1102 		.maxlen		= sizeof(int),
1103 		.mode		= 0644,
1104 		.proc_handler	= &proc_dointvec,
1105 	},
1106 	{
1107 		.ctl_name	= FS_DENTRY,
1108 		.procname	= "dentry-state",
1109 		.data		= &dentry_stat,
1110 		.maxlen		= 6*sizeof(int),
1111 		.mode		= 0444,
1112 		.proc_handler	= &proc_dointvec,
1113 	},
1114 	{
1115 		.ctl_name	= FS_OVERFLOWUID,
1116 		.procname	= "overflowuid",
1117 		.data		= &fs_overflowuid,
1118 		.maxlen		= sizeof(int),
1119 		.mode		= 0644,
1120 		.proc_handler	= &proc_dointvec_minmax,
1121 		.strategy	= &sysctl_intvec,
1122 		.extra1		= &minolduid,
1123 		.extra2		= &maxolduid,
1124 	},
1125 	{
1126 		.ctl_name	= FS_OVERFLOWGID,
1127 		.procname	= "overflowgid",
1128 		.data		= &fs_overflowgid,
1129 		.maxlen		= sizeof(int),
1130 		.mode		= 0644,
1131 		.proc_handler	= &proc_dointvec_minmax,
1132 		.strategy	= &sysctl_intvec,
1133 		.extra1		= &minolduid,
1134 		.extra2		= &maxolduid,
1135 	},
1136 	{
1137 		.ctl_name	= FS_LEASES,
1138 		.procname	= "leases-enable",
1139 		.data		= &leases_enable,
1140 		.maxlen		= sizeof(int),
1141 		.mode		= 0644,
1142 		.proc_handler	= &proc_dointvec,
1143 	},
1144 #ifdef CONFIG_DNOTIFY
1145 	{
1146 		.ctl_name	= FS_DIR_NOTIFY,
1147 		.procname	= "dir-notify-enable",
1148 		.data		= &dir_notify_enable,
1149 		.maxlen		= sizeof(int),
1150 		.mode		= 0644,
1151 		.proc_handler	= &proc_dointvec,
1152 	},
1153 #endif
1154 #ifdef CONFIG_MMU
1155 	{
1156 		.ctl_name	= FS_LEASE_TIME,
1157 		.procname	= "lease-break-time",
1158 		.data		= &lease_break_time,
1159 		.maxlen		= sizeof(int),
1160 		.mode		= 0644,
1161 		.proc_handler	= &proc_dointvec_minmax,
1162 		.strategy	= &sysctl_intvec,
1163 		.extra1		= &zero,
1164 		.extra2		= &two,
1165 	},
1166 	{
1167 		.ctl_name	= FS_AIO_NR,
1168 		.procname	= "aio-nr",
1169 		.data		= &aio_nr,
1170 		.maxlen		= sizeof(aio_nr),
1171 		.mode		= 0444,
1172 		.proc_handler	= &proc_doulongvec_minmax,
1173 	},
1174 	{
1175 		.ctl_name	= FS_AIO_MAX_NR,
1176 		.procname	= "aio-max-nr",
1177 		.data		= &aio_max_nr,
1178 		.maxlen		= sizeof(aio_max_nr),
1179 		.mode		= 0644,
1180 		.proc_handler	= &proc_doulongvec_minmax,
1181 	},
1182 #ifdef CONFIG_INOTIFY_USER
1183 	{
1184 		.ctl_name	= FS_INOTIFY,
1185 		.procname	= "inotify",
1186 		.mode		= 0555,
1187 		.child		= inotify_table,
1188 	},
1189 #endif
1190 #endif
1191 	{
1192 		.ctl_name	= KERN_SETUID_DUMPABLE,
1193 		.procname	= "suid_dumpable",
1194 		.data		= &suid_dumpable,
1195 		.maxlen		= sizeof(int),
1196 		.mode		= 0644,
1197 		.proc_handler	= &proc_dointvec,
1198 	},
1199 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1200 	{
1201 		.ctl_name	= CTL_UNNUMBERED,
1202 		.procname	= "binfmt_misc",
1203 		.mode		= 0555,
1204 		.child		= binfmt_misc_table,
1205 	},
1206 #endif
1207 /*
1208  * NOTE: do not add new entries to this table unless you have read
1209  * Documentation/sysctl/ctl_unnumbered.txt
1210  */
1211 	{ .ctl_name = 0 }
1212 };
1213 
1214 static ctl_table debug_table[] = {
1215 #if defined(CONFIG_X86) || defined(CONFIG_PPC)
1216 	{
1217 		.ctl_name	= CTL_UNNUMBERED,
1218 		.procname	= "exception-trace",
1219 		.data		= &show_unhandled_signals,
1220 		.maxlen		= sizeof(int),
1221 		.mode		= 0644,
1222 		.proc_handler	= proc_dointvec
1223 	},
1224 #endif
1225 	{ .ctl_name = 0 }
1226 };
1227 
1228 static ctl_table dev_table[] = {
1229 	{ .ctl_name = 0 }
1230 };
1231 
1232 static DEFINE_SPINLOCK(sysctl_lock);
1233 
1234 /* called under sysctl_lock */
1235 static int use_table(struct ctl_table_header *p)
1236 {
1237 	if (unlikely(p->unregistering))
1238 		return 0;
1239 	p->used++;
1240 	return 1;
1241 }
1242 
1243 /* called under sysctl_lock */
1244 static void unuse_table(struct ctl_table_header *p)
1245 {
1246 	if (!--p->used)
1247 		if (unlikely(p->unregistering))
1248 			complete(p->unregistering);
1249 }
1250 
1251 /* called under sysctl_lock, will reacquire if has to wait */
1252 static void start_unregistering(struct ctl_table_header *p)
1253 {
1254 	/*
1255 	 * if p->used is 0, nobody will ever touch that entry again;
1256 	 * we'll eliminate all paths to it before dropping sysctl_lock
1257 	 */
1258 	if (unlikely(p->used)) {
1259 		struct completion wait;
1260 		init_completion(&wait);
1261 		p->unregistering = &wait;
1262 		spin_unlock(&sysctl_lock);
1263 		wait_for_completion(&wait);
1264 		spin_lock(&sysctl_lock);
1265 	}
1266 	/*
1267 	 * do not remove from the list until nobody holds it; walking the
1268 	 * list in do_sysctl() relies on that.
1269 	 */
1270 	list_del_init(&p->ctl_entry);
1271 }
1272 
1273 void sysctl_head_finish(struct ctl_table_header *head)
1274 {
1275 	if (!head)
1276 		return;
1277 	spin_lock(&sysctl_lock);
1278 	unuse_table(head);
1279 	spin_unlock(&sysctl_lock);
1280 }
1281 
1282 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1283 {
1284 	struct ctl_table_header *head;
1285 	struct list_head *tmp;
1286 	spin_lock(&sysctl_lock);
1287 	if (prev) {
1288 		tmp = &prev->ctl_entry;
1289 		unuse_table(prev);
1290 		goto next;
1291 	}
1292 	tmp = &root_table_header.ctl_entry;
1293 	for (;;) {
1294 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1295 
1296 		if (!use_table(head))
1297 			goto next;
1298 		spin_unlock(&sysctl_lock);
1299 		return head;
1300 	next:
1301 		tmp = tmp->next;
1302 		if (tmp == &root_table_header.ctl_entry)
1303 			break;
1304 	}
1305 	spin_unlock(&sysctl_lock);
1306 	return NULL;
1307 }
1308 
1309 #ifdef CONFIG_SYSCTL_SYSCALL
1310 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1311 	       void __user *newval, size_t newlen)
1312 {
1313 	struct ctl_table_header *head;
1314 	int error = -ENOTDIR;
1315 
1316 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1317 		return -ENOTDIR;
1318 	if (oldval) {
1319 		int old_len;
1320 		if (!oldlenp || get_user(old_len, oldlenp))
1321 			return -EFAULT;
1322 	}
1323 
1324 	for (head = sysctl_head_next(NULL); head;
1325 			head = sysctl_head_next(head)) {
1326 		error = parse_table(name, nlen, oldval, oldlenp,
1327 					newval, newlen, head->ctl_table);
1328 		if (error != -ENOTDIR) {
1329 			sysctl_head_finish(head);
1330 			break;
1331 		}
1332 	}
1333 	return error;
1334 }
1335 
1336 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1337 {
1338 	struct __sysctl_args tmp;
1339 	int error;
1340 
1341 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1342 		return -EFAULT;
1343 
1344 	lock_kernel();
1345 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1346 			  tmp.newval, tmp.newlen);
1347 	unlock_kernel();
1348 	return error;
1349 }
1350 #endif /* CONFIG_SYSCTL_SYSCALL */
1351 
1352 /*
1353  * sysctl_perm does NOT grant the superuser all rights automatically, because
1354  * some sysctl variables are readonly even to root.
1355  */
1356 
1357 static int test_perm(int mode, int op)
1358 {
1359 	if (!current->euid)
1360 		mode >>= 6;
1361 	else if (in_egroup_p(0))
1362 		mode >>= 3;
1363 	if ((mode & op & 0007) == op)
1364 		return 0;
1365 	return -EACCES;
1366 }
1367 
1368 int sysctl_perm(ctl_table *table, int op)
1369 {
1370 	int error;
1371 	error = security_sysctl(table, op);
1372 	if (error)
1373 		return error;
1374 	return test_perm(table->mode, op);
1375 }
1376 
1377 #ifdef CONFIG_SYSCTL_SYSCALL
1378 static int parse_table(int __user *name, int nlen,
1379 		       void __user *oldval, size_t __user *oldlenp,
1380 		       void __user *newval, size_t newlen,
1381 		       ctl_table *table)
1382 {
1383 	int n;
1384 repeat:
1385 	if (!nlen)
1386 		return -ENOTDIR;
1387 	if (get_user(n, name))
1388 		return -EFAULT;
1389 	for ( ; table->ctl_name || table->procname; table++) {
1390 		if (!table->ctl_name)
1391 			continue;
1392 		if (n == table->ctl_name) {
1393 			int error;
1394 			if (table->child) {
1395 				if (sysctl_perm(table, 001))
1396 					return -EPERM;
1397 				name++;
1398 				nlen--;
1399 				table = table->child;
1400 				goto repeat;
1401 			}
1402 			error = do_sysctl_strategy(table, name, nlen,
1403 						   oldval, oldlenp,
1404 						   newval, newlen);
1405 			return error;
1406 		}
1407 	}
1408 	return -ENOTDIR;
1409 }
1410 
1411 /* Perform the actual read/write of a sysctl table entry. */
1412 int do_sysctl_strategy (ctl_table *table,
1413 			int __user *name, int nlen,
1414 			void __user *oldval, size_t __user *oldlenp,
1415 			void __user *newval, size_t newlen)
1416 {
1417 	int op = 0, rc;
1418 	size_t len;
1419 
1420 	if (oldval)
1421 		op |= 004;
1422 	if (newval)
1423 		op |= 002;
1424 	if (sysctl_perm(table, op))
1425 		return -EPERM;
1426 
1427 	if (table->strategy) {
1428 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
1429 				     newval, newlen);
1430 		if (rc < 0)
1431 			return rc;
1432 		if (rc > 0)
1433 			return 0;
1434 	}
1435 
1436 	/* If there is no strategy routine, or if the strategy returns
1437 	 * zero, proceed with automatic r/w */
1438 	if (table->data && table->maxlen) {
1439 		if (oldval && oldlenp) {
1440 			if (get_user(len, oldlenp))
1441 				return -EFAULT;
1442 			if (len) {
1443 				if (len > table->maxlen)
1444 					len = table->maxlen;
1445 				if(copy_to_user(oldval, table->data, len))
1446 					return -EFAULT;
1447 				if(put_user(len, oldlenp))
1448 					return -EFAULT;
1449 			}
1450 		}
1451 		if (newval && newlen) {
1452 			len = newlen;
1453 			if (len > table->maxlen)
1454 				len = table->maxlen;
1455 			if(copy_from_user(table->data, newval, len))
1456 				return -EFAULT;
1457 		}
1458 	}
1459 	return 0;
1460 }
1461 #endif /* CONFIG_SYSCTL_SYSCALL */
1462 
1463 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1464 {
1465 	for (; table->ctl_name || table->procname; table++) {
1466 		table->parent = parent;
1467 		if (table->child)
1468 			sysctl_set_parent(table, table->child);
1469 	}
1470 }
1471 
1472 static __init int sysctl_init(void)
1473 {
1474 	sysctl_set_parent(NULL, root_table);
1475 	return 0;
1476 }
1477 
1478 core_initcall(sysctl_init);
1479 
1480 /**
1481  * register_sysctl_table - register a sysctl hierarchy
1482  * @table: the top-level table structure
1483  *
1484  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1485  * array. An entry with a ctl_name of 0 terminates the table.
1486  *
1487  * The members of the &ctl_table structure are used as follows:
1488  *
1489  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1490  *            must be unique within that level of sysctl
1491  *
1492  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1493  *            enter a sysctl file
1494  *
1495  * data - a pointer to data for use by proc_handler
1496  *
1497  * maxlen - the maximum size in bytes of the data
1498  *
1499  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1500  *
1501  * child - a pointer to the child sysctl table if this entry is a directory, or
1502  *         %NULL.
1503  *
1504  * proc_handler - the text handler routine (described below)
1505  *
1506  * strategy - the strategy routine (described below)
1507  *
1508  * de - for internal use by the sysctl routines
1509  *
1510  * extra1, extra2 - extra pointers usable by the proc handler routines
1511  *
1512  * Leaf nodes in the sysctl tree will be represented by a single file
1513  * under /proc; non-leaf nodes will be represented by directories.
1514  *
1515  * sysctl(2) can automatically manage read and write requests through
1516  * the sysctl table.  The data and maxlen fields of the ctl_table
1517  * struct enable minimal validation of the values being written to be
1518  * performed, and the mode field allows minimal authentication.
1519  *
1520  * More sophisticated management can be enabled by the provision of a
1521  * strategy routine with the table entry.  This will be called before
1522  * any automatic read or write of the data is performed.
1523  *
1524  * The strategy routine may return
1525  *
1526  * < 0 - Error occurred (error is passed to user process)
1527  *
1528  * 0   - OK - proceed with automatic read or write.
1529  *
1530  * > 0 - OK - read or write has been done by the strategy routine, so
1531  *       return immediately.
1532  *
1533  * There must be a proc_handler routine for any terminal nodes
1534  * mirrored under /proc/sys (non-terminals are handled by a built-in
1535  * directory handler).  Several default handlers are available to
1536  * cover common cases -
1537  *
1538  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1539  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1540  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1541  *
1542  * It is the handler's job to read the input buffer from user memory
1543  * and process it. The handler should return 0 on success.
1544  *
1545  * This routine returns %NULL on a failure to register, and a pointer
1546  * to the table header on success.
1547  */
1548 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1549 {
1550 	struct ctl_table_header *tmp;
1551 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1552 	if (!tmp)
1553 		return NULL;
1554 	tmp->ctl_table = table;
1555 	INIT_LIST_HEAD(&tmp->ctl_entry);
1556 	tmp->used = 0;
1557 	tmp->unregistering = NULL;
1558 	sysctl_set_parent(NULL, table);
1559 	spin_lock(&sysctl_lock);
1560 	list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1561 	spin_unlock(&sysctl_lock);
1562 	return tmp;
1563 }
1564 
1565 /**
1566  * unregister_sysctl_table - unregister a sysctl table hierarchy
1567  * @header: the header returned from register_sysctl_table
1568  *
1569  * Unregisters the sysctl table and all children. proc entries may not
1570  * actually be removed until they are no longer used by anyone.
1571  */
1572 void unregister_sysctl_table(struct ctl_table_header * header)
1573 {
1574 	might_sleep();
1575 	spin_lock(&sysctl_lock);
1576 	start_unregistering(header);
1577 	spin_unlock(&sysctl_lock);
1578 	kfree(header);
1579 }
1580 
1581 #else /* !CONFIG_SYSCTL */
1582 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1583 {
1584 	return NULL;
1585 }
1586 
1587 void unregister_sysctl_table(struct ctl_table_header * table)
1588 {
1589 }
1590 
1591 #endif /* CONFIG_SYSCTL */
1592 
1593 /*
1594  * /proc/sys support
1595  */
1596 
1597 #ifdef CONFIG_PROC_SYSCTL
1598 
1599 static int _proc_do_string(void* data, int maxlen, int write,
1600 			   struct file *filp, void __user *buffer,
1601 			   size_t *lenp, loff_t *ppos)
1602 {
1603 	size_t len;
1604 	char __user *p;
1605 	char c;
1606 
1607 	if (!data || !maxlen || !*lenp) {
1608 		*lenp = 0;
1609 		return 0;
1610 	}
1611 
1612 	if (write) {
1613 		len = 0;
1614 		p = buffer;
1615 		while (len < *lenp) {
1616 			if (get_user(c, p++))
1617 				return -EFAULT;
1618 			if (c == 0 || c == '\n')
1619 				break;
1620 			len++;
1621 		}
1622 		if (len >= maxlen)
1623 			len = maxlen-1;
1624 		if(copy_from_user(data, buffer, len))
1625 			return -EFAULT;
1626 		((char *) data)[len] = 0;
1627 		*ppos += *lenp;
1628 	} else {
1629 		len = strlen(data);
1630 		if (len > maxlen)
1631 			len = maxlen;
1632 
1633 		if (*ppos > len) {
1634 			*lenp = 0;
1635 			return 0;
1636 		}
1637 
1638 		data += *ppos;
1639 		len  -= *ppos;
1640 
1641 		if (len > *lenp)
1642 			len = *lenp;
1643 		if (len)
1644 			if(copy_to_user(buffer, data, len))
1645 				return -EFAULT;
1646 		if (len < *lenp) {
1647 			if(put_user('\n', ((char __user *) buffer) + len))
1648 				return -EFAULT;
1649 			len++;
1650 		}
1651 		*lenp = len;
1652 		*ppos += len;
1653 	}
1654 	return 0;
1655 }
1656 
1657 /**
1658  * proc_dostring - read a string sysctl
1659  * @table: the sysctl table
1660  * @write: %TRUE if this is a write to the sysctl file
1661  * @filp: the file structure
1662  * @buffer: the user buffer
1663  * @lenp: the size of the user buffer
1664  * @ppos: file position
1665  *
1666  * Reads/writes a string from/to the user buffer. If the kernel
1667  * buffer provided is not large enough to hold the string, the
1668  * string is truncated. The copied string is %NULL-terminated.
1669  * If the string is being read by the user process, it is copied
1670  * and a newline '\n' is added. It is truncated if the buffer is
1671  * not large enough.
1672  *
1673  * Returns 0 on success.
1674  */
1675 int proc_dostring(ctl_table *table, int write, struct file *filp,
1676 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1677 {
1678 	return _proc_do_string(table->data, table->maxlen, write, filp,
1679 			       buffer, lenp, ppos);
1680 }
1681 
1682 
1683 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1684 				 int *valp,
1685 				 int write, void *data)
1686 {
1687 	if (write) {
1688 		*valp = *negp ? -*lvalp : *lvalp;
1689 	} else {
1690 		int val = *valp;
1691 		if (val < 0) {
1692 			*negp = -1;
1693 			*lvalp = (unsigned long)-val;
1694 		} else {
1695 			*negp = 0;
1696 			*lvalp = (unsigned long)val;
1697 		}
1698 	}
1699 	return 0;
1700 }
1701 
1702 static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
1703 		  int write, struct file *filp, void __user *buffer,
1704 		  size_t *lenp, loff_t *ppos,
1705 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1706 			      int write, void *data),
1707 		  void *data)
1708 {
1709 #define TMPBUFLEN 21
1710 	int *i, vleft, first=1, neg, val;
1711 	unsigned long lval;
1712 	size_t left, len;
1713 
1714 	char buf[TMPBUFLEN], *p;
1715 	char __user *s = buffer;
1716 
1717 	if (!tbl_data || !table->maxlen || !*lenp ||
1718 	    (*ppos && !write)) {
1719 		*lenp = 0;
1720 		return 0;
1721 	}
1722 
1723 	i = (int *) tbl_data;
1724 	vleft = table->maxlen / sizeof(*i);
1725 	left = *lenp;
1726 
1727 	if (!conv)
1728 		conv = do_proc_dointvec_conv;
1729 
1730 	for (; left && vleft--; i++, first=0) {
1731 		if (write) {
1732 			while (left) {
1733 				char c;
1734 				if (get_user(c, s))
1735 					return -EFAULT;
1736 				if (!isspace(c))
1737 					break;
1738 				left--;
1739 				s++;
1740 			}
1741 			if (!left)
1742 				break;
1743 			neg = 0;
1744 			len = left;
1745 			if (len > sizeof(buf) - 1)
1746 				len = sizeof(buf) - 1;
1747 			if (copy_from_user(buf, s, len))
1748 				return -EFAULT;
1749 			buf[len] = 0;
1750 			p = buf;
1751 			if (*p == '-' && left > 1) {
1752 				neg = 1;
1753 				p++;
1754 			}
1755 			if (*p < '0' || *p > '9')
1756 				break;
1757 
1758 			lval = simple_strtoul(p, &p, 0);
1759 
1760 			len = p-buf;
1761 			if ((len < left) && *p && !isspace(*p))
1762 				break;
1763 			if (neg)
1764 				val = -val;
1765 			s += len;
1766 			left -= len;
1767 
1768 			if (conv(&neg, &lval, i, 1, data))
1769 				break;
1770 		} else {
1771 			p = buf;
1772 			if (!first)
1773 				*p++ = '\t';
1774 
1775 			if (conv(&neg, &lval, i, 0, data))
1776 				break;
1777 
1778 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
1779 			len = strlen(buf);
1780 			if (len > left)
1781 				len = left;
1782 			if(copy_to_user(s, buf, len))
1783 				return -EFAULT;
1784 			left -= len;
1785 			s += len;
1786 		}
1787 	}
1788 
1789 	if (!write && !first && left) {
1790 		if(put_user('\n', s))
1791 			return -EFAULT;
1792 		left--, s++;
1793 	}
1794 	if (write) {
1795 		while (left) {
1796 			char c;
1797 			if (get_user(c, s++))
1798 				return -EFAULT;
1799 			if (!isspace(c))
1800 				break;
1801 			left--;
1802 		}
1803 	}
1804 	if (write && first)
1805 		return -EINVAL;
1806 	*lenp -= left;
1807 	*ppos += *lenp;
1808 	return 0;
1809 #undef TMPBUFLEN
1810 }
1811 
1812 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1813 		  void __user *buffer, size_t *lenp, loff_t *ppos,
1814 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1815 			      int write, void *data),
1816 		  void *data)
1817 {
1818 	return __do_proc_dointvec(table->data, table, write, filp,
1819 			buffer, lenp, ppos, conv, data);
1820 }
1821 
1822 /**
1823  * proc_dointvec - read a vector of integers
1824  * @table: the sysctl table
1825  * @write: %TRUE if this is a write to the sysctl file
1826  * @filp: the file structure
1827  * @buffer: the user buffer
1828  * @lenp: the size of the user buffer
1829  * @ppos: file position
1830  *
1831  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1832  * values from/to the user buffer, treated as an ASCII string.
1833  *
1834  * Returns 0 on success.
1835  */
1836 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1837 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1838 {
1839     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1840 		    	    NULL,NULL);
1841 }
1842 
1843 #define OP_SET	0
1844 #define OP_AND	1
1845 #define OP_OR	2
1846 
1847 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1848 				      int *valp,
1849 				      int write, void *data)
1850 {
1851 	int op = *(int *)data;
1852 	if (write) {
1853 		int val = *negp ? -*lvalp : *lvalp;
1854 		switch(op) {
1855 		case OP_SET:	*valp = val; break;
1856 		case OP_AND:	*valp &= val; break;
1857 		case OP_OR:	*valp |= val; break;
1858 		}
1859 	} else {
1860 		int val = *valp;
1861 		if (val < 0) {
1862 			*negp = -1;
1863 			*lvalp = (unsigned long)-val;
1864 		} else {
1865 			*negp = 0;
1866 			*lvalp = (unsigned long)val;
1867 		}
1868 	}
1869 	return 0;
1870 }
1871 
1872 /*
1873  *	init may raise the set.
1874  */
1875 
1876 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1877 			void __user *buffer, size_t *lenp, loff_t *ppos)
1878 {
1879 	int op;
1880 
1881 	if (write && !capable(CAP_SYS_MODULE)) {
1882 		return -EPERM;
1883 	}
1884 
1885 	op = is_init(current) ? OP_SET : OP_AND;
1886 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1887 				do_proc_dointvec_bset_conv,&op);
1888 }
1889 
1890 /*
1891  *	Taint values can only be increased
1892  */
1893 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
1894 			       void __user *buffer, size_t *lenp, loff_t *ppos)
1895 {
1896 	int op;
1897 
1898 	if (write && !capable(CAP_SYS_ADMIN))
1899 		return -EPERM;
1900 
1901 	op = OP_OR;
1902 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1903 				do_proc_dointvec_bset_conv,&op);
1904 }
1905 
1906 struct do_proc_dointvec_minmax_conv_param {
1907 	int *min;
1908 	int *max;
1909 };
1910 
1911 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1912 					int *valp,
1913 					int write, void *data)
1914 {
1915 	struct do_proc_dointvec_minmax_conv_param *param = data;
1916 	if (write) {
1917 		int val = *negp ? -*lvalp : *lvalp;
1918 		if ((param->min && *param->min > val) ||
1919 		    (param->max && *param->max < val))
1920 			return -EINVAL;
1921 		*valp = val;
1922 	} else {
1923 		int val = *valp;
1924 		if (val < 0) {
1925 			*negp = -1;
1926 			*lvalp = (unsigned long)-val;
1927 		} else {
1928 			*negp = 0;
1929 			*lvalp = (unsigned long)val;
1930 		}
1931 	}
1932 	return 0;
1933 }
1934 
1935 /**
1936  * proc_dointvec_minmax - read a vector of integers with min/max values
1937  * @table: the sysctl table
1938  * @write: %TRUE if this is a write to the sysctl file
1939  * @filp: the file structure
1940  * @buffer: the user buffer
1941  * @lenp: the size of the user buffer
1942  * @ppos: file position
1943  *
1944  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1945  * values from/to the user buffer, treated as an ASCII string.
1946  *
1947  * This routine will ensure the values are within the range specified by
1948  * table->extra1 (min) and table->extra2 (max).
1949  *
1950  * Returns 0 on success.
1951  */
1952 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1953 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1954 {
1955 	struct do_proc_dointvec_minmax_conv_param param = {
1956 		.min = (int *) table->extra1,
1957 		.max = (int *) table->extra2,
1958 	};
1959 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1960 				do_proc_dointvec_minmax_conv, &param);
1961 }
1962 
1963 static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
1964 				     struct file *filp,
1965 				     void __user *buffer,
1966 				     size_t *lenp, loff_t *ppos,
1967 				     unsigned long convmul,
1968 				     unsigned long convdiv)
1969 {
1970 #define TMPBUFLEN 21
1971 	unsigned long *i, *min, *max, val;
1972 	int vleft, first=1, neg;
1973 	size_t len, left;
1974 	char buf[TMPBUFLEN], *p;
1975 	char __user *s = buffer;
1976 
1977 	if (!data || !table->maxlen || !*lenp ||
1978 	    (*ppos && !write)) {
1979 		*lenp = 0;
1980 		return 0;
1981 	}
1982 
1983 	i = (unsigned long *) data;
1984 	min = (unsigned long *) table->extra1;
1985 	max = (unsigned long *) table->extra2;
1986 	vleft = table->maxlen / sizeof(unsigned long);
1987 	left = *lenp;
1988 
1989 	for (; left && vleft--; i++, min++, max++, first=0) {
1990 		if (write) {
1991 			while (left) {
1992 				char c;
1993 				if (get_user(c, s))
1994 					return -EFAULT;
1995 				if (!isspace(c))
1996 					break;
1997 				left--;
1998 				s++;
1999 			}
2000 			if (!left)
2001 				break;
2002 			neg = 0;
2003 			len = left;
2004 			if (len > TMPBUFLEN-1)
2005 				len = TMPBUFLEN-1;
2006 			if (copy_from_user(buf, s, len))
2007 				return -EFAULT;
2008 			buf[len] = 0;
2009 			p = buf;
2010 			if (*p == '-' && left > 1) {
2011 				neg = 1;
2012 				p++;
2013 			}
2014 			if (*p < '0' || *p > '9')
2015 				break;
2016 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2017 			len = p-buf;
2018 			if ((len < left) && *p && !isspace(*p))
2019 				break;
2020 			if (neg)
2021 				val = -val;
2022 			s += len;
2023 			left -= len;
2024 
2025 			if(neg)
2026 				continue;
2027 			if ((min && val < *min) || (max && val > *max))
2028 				continue;
2029 			*i = val;
2030 		} else {
2031 			p = buf;
2032 			if (!first)
2033 				*p++ = '\t';
2034 			sprintf(p, "%lu", convdiv * (*i) / convmul);
2035 			len = strlen(buf);
2036 			if (len > left)
2037 				len = left;
2038 			if(copy_to_user(s, buf, len))
2039 				return -EFAULT;
2040 			left -= len;
2041 			s += len;
2042 		}
2043 	}
2044 
2045 	if (!write && !first && left) {
2046 		if(put_user('\n', s))
2047 			return -EFAULT;
2048 		left--, s++;
2049 	}
2050 	if (write) {
2051 		while (left) {
2052 			char c;
2053 			if (get_user(c, s++))
2054 				return -EFAULT;
2055 			if (!isspace(c))
2056 				break;
2057 			left--;
2058 		}
2059 	}
2060 	if (write && first)
2061 		return -EINVAL;
2062 	*lenp -= left;
2063 	*ppos += *lenp;
2064 	return 0;
2065 #undef TMPBUFLEN
2066 }
2067 
2068 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
2069 				     struct file *filp,
2070 				     void __user *buffer,
2071 				     size_t *lenp, loff_t *ppos,
2072 				     unsigned long convmul,
2073 				     unsigned long convdiv)
2074 {
2075 	return __do_proc_doulongvec_minmax(table->data, table, write,
2076 			filp, buffer, lenp, ppos, convmul, convdiv);
2077 }
2078 
2079 /**
2080  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2081  * @table: the sysctl table
2082  * @write: %TRUE if this is a write to the sysctl file
2083  * @filp: the file structure
2084  * @buffer: the user buffer
2085  * @lenp: the size of the user buffer
2086  * @ppos: file position
2087  *
2088  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2089  * values from/to the user buffer, treated as an ASCII string.
2090  *
2091  * This routine will ensure the values are within the range specified by
2092  * table->extra1 (min) and table->extra2 (max).
2093  *
2094  * Returns 0 on success.
2095  */
2096 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2097 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2098 {
2099     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2100 }
2101 
2102 /**
2103  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2104  * @table: the sysctl table
2105  * @write: %TRUE if this is a write to the sysctl file
2106  * @filp: the file structure
2107  * @buffer: the user buffer
2108  * @lenp: the size of the user buffer
2109  * @ppos: file position
2110  *
2111  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2112  * values from/to the user buffer, treated as an ASCII string. The values
2113  * are treated as milliseconds, and converted to jiffies when they are stored.
2114  *
2115  * This routine will ensure the values are within the range specified by
2116  * table->extra1 (min) and table->extra2 (max).
2117  *
2118  * Returns 0 on success.
2119  */
2120 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2121 				      struct file *filp,
2122 				      void __user *buffer,
2123 				      size_t *lenp, loff_t *ppos)
2124 {
2125     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2126 				     lenp, ppos, HZ, 1000l);
2127 }
2128 
2129 
2130 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2131 					 int *valp,
2132 					 int write, void *data)
2133 {
2134 	if (write) {
2135 		if (*lvalp > LONG_MAX / HZ)
2136 			return 1;
2137 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2138 	} else {
2139 		int val = *valp;
2140 		unsigned long lval;
2141 		if (val < 0) {
2142 			*negp = -1;
2143 			lval = (unsigned long)-val;
2144 		} else {
2145 			*negp = 0;
2146 			lval = (unsigned long)val;
2147 		}
2148 		*lvalp = lval / HZ;
2149 	}
2150 	return 0;
2151 }
2152 
2153 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2154 						int *valp,
2155 						int write, void *data)
2156 {
2157 	if (write) {
2158 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2159 			return 1;
2160 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2161 	} else {
2162 		int val = *valp;
2163 		unsigned long lval;
2164 		if (val < 0) {
2165 			*negp = -1;
2166 			lval = (unsigned long)-val;
2167 		} else {
2168 			*negp = 0;
2169 			lval = (unsigned long)val;
2170 		}
2171 		*lvalp = jiffies_to_clock_t(lval);
2172 	}
2173 	return 0;
2174 }
2175 
2176 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2177 					    int *valp,
2178 					    int write, void *data)
2179 {
2180 	if (write) {
2181 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2182 	} else {
2183 		int val = *valp;
2184 		unsigned long lval;
2185 		if (val < 0) {
2186 			*negp = -1;
2187 			lval = (unsigned long)-val;
2188 		} else {
2189 			*negp = 0;
2190 			lval = (unsigned long)val;
2191 		}
2192 		*lvalp = jiffies_to_msecs(lval);
2193 	}
2194 	return 0;
2195 }
2196 
2197 /**
2198  * proc_dointvec_jiffies - read a vector of integers as seconds
2199  * @table: the sysctl table
2200  * @write: %TRUE if this is a write to the sysctl file
2201  * @filp: the file structure
2202  * @buffer: the user buffer
2203  * @lenp: the size of the user buffer
2204  * @ppos: file position
2205  *
2206  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2207  * values from/to the user buffer, treated as an ASCII string.
2208  * The values read are assumed to be in seconds, and are converted into
2209  * jiffies.
2210  *
2211  * Returns 0 on success.
2212  */
2213 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2214 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2215 {
2216     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2217 		    	    do_proc_dointvec_jiffies_conv,NULL);
2218 }
2219 
2220 /**
2221  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2222  * @table: the sysctl table
2223  * @write: %TRUE if this is a write to the sysctl file
2224  * @filp: the file structure
2225  * @buffer: the user buffer
2226  * @lenp: the size of the user buffer
2227  * @ppos: pointer to the file position
2228  *
2229  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2230  * values from/to the user buffer, treated as an ASCII string.
2231  * The values read are assumed to be in 1/USER_HZ seconds, and
2232  * are converted into jiffies.
2233  *
2234  * Returns 0 on success.
2235  */
2236 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2237 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2238 {
2239     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2240 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2241 }
2242 
2243 /**
2244  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2245  * @table: the sysctl table
2246  * @write: %TRUE if this is a write to the sysctl file
2247  * @filp: the file structure
2248  * @buffer: the user buffer
2249  * @lenp: the size of the user buffer
2250  * @ppos: file position
2251  * @ppos: the current position in the file
2252  *
2253  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2254  * values from/to the user buffer, treated as an ASCII string.
2255  * The values read are assumed to be in 1/1000 seconds, and
2256  * are converted into jiffies.
2257  *
2258  * Returns 0 on success.
2259  */
2260 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2261 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2262 {
2263 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2264 				do_proc_dointvec_ms_jiffies_conv, NULL);
2265 }
2266 
2267 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
2268 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2269 {
2270 	struct pid *new_pid;
2271 	pid_t tmp;
2272 	int r;
2273 
2274 	tmp = pid_nr(cad_pid);
2275 
2276 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2277 			       lenp, ppos, NULL, NULL);
2278 	if (r || !write)
2279 		return r;
2280 
2281 	new_pid = find_get_pid(tmp);
2282 	if (!new_pid)
2283 		return -ESRCH;
2284 
2285 	put_pid(xchg(&cad_pid, new_pid));
2286 	return 0;
2287 }
2288 
2289 #else /* CONFIG_PROC_FS */
2290 
2291 int proc_dostring(ctl_table *table, int write, struct file *filp,
2292 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2293 {
2294 	return -ENOSYS;
2295 }
2296 
2297 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2298 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2299 {
2300 	return -ENOSYS;
2301 }
2302 
2303 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2304 			void __user *buffer, size_t *lenp, loff_t *ppos)
2305 {
2306 	return -ENOSYS;
2307 }
2308 
2309 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2310 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2311 {
2312 	return -ENOSYS;
2313 }
2314 
2315 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2316 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2317 {
2318 	return -ENOSYS;
2319 }
2320 
2321 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2322 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2323 {
2324 	return -ENOSYS;
2325 }
2326 
2327 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2328 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2329 {
2330 	return -ENOSYS;
2331 }
2332 
2333 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2334 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2335 {
2336 	return -ENOSYS;
2337 }
2338 
2339 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2340 				      struct file *filp,
2341 				      void __user *buffer,
2342 				      size_t *lenp, loff_t *ppos)
2343 {
2344     return -ENOSYS;
2345 }
2346 
2347 
2348 #endif /* CONFIG_PROC_FS */
2349 
2350 
2351 #ifdef CONFIG_SYSCTL_SYSCALL
2352 /*
2353  * General sysctl support routines
2354  */
2355 
2356 /* The generic string strategy routine: */
2357 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2358 		  void __user *oldval, size_t __user *oldlenp,
2359 		  void __user *newval, size_t newlen)
2360 {
2361 	if (!table->data || !table->maxlen)
2362 		return -ENOTDIR;
2363 
2364 	if (oldval && oldlenp) {
2365 		size_t bufsize;
2366 		if (get_user(bufsize, oldlenp))
2367 			return -EFAULT;
2368 		if (bufsize) {
2369 			size_t len = strlen(table->data), copied;
2370 
2371 			/* This shouldn't trigger for a well-formed sysctl */
2372 			if (len > table->maxlen)
2373 				len = table->maxlen;
2374 
2375 			/* Copy up to a max of bufsize-1 bytes of the string */
2376 			copied = (len >= bufsize) ? bufsize - 1 : len;
2377 
2378 			if (copy_to_user(oldval, table->data, copied) ||
2379 			    put_user(0, (char __user *)(oldval + copied)))
2380 				return -EFAULT;
2381 			if (put_user(len, oldlenp))
2382 				return -EFAULT;
2383 		}
2384 	}
2385 	if (newval && newlen) {
2386 		size_t len = newlen;
2387 		if (len > table->maxlen)
2388 			len = table->maxlen;
2389 		if(copy_from_user(table->data, newval, len))
2390 			return -EFAULT;
2391 		if (len == table->maxlen)
2392 			len--;
2393 		((char *) table->data)[len] = 0;
2394 	}
2395 	return 1;
2396 }
2397 
2398 /*
2399  * This function makes sure that all of the integers in the vector
2400  * are between the minimum and maximum values given in the arrays
2401  * table->extra1 and table->extra2, respectively.
2402  */
2403 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2404 		void __user *oldval, size_t __user *oldlenp,
2405 		void __user *newval, size_t newlen)
2406 {
2407 
2408 	if (newval && newlen) {
2409 		int __user *vec = (int __user *) newval;
2410 		int *min = (int *) table->extra1;
2411 		int *max = (int *) table->extra2;
2412 		size_t length;
2413 		int i;
2414 
2415 		if (newlen % sizeof(int) != 0)
2416 			return -EINVAL;
2417 
2418 		if (!table->extra1 && !table->extra2)
2419 			return 0;
2420 
2421 		if (newlen > table->maxlen)
2422 			newlen = table->maxlen;
2423 		length = newlen / sizeof(int);
2424 
2425 		for (i = 0; i < length; i++) {
2426 			int value;
2427 			if (get_user(value, vec + i))
2428 				return -EFAULT;
2429 			if (min && value < min[i])
2430 				return -EINVAL;
2431 			if (max && value > max[i])
2432 				return -EINVAL;
2433 		}
2434 	}
2435 	return 0;
2436 }
2437 
2438 /* Strategy function to convert jiffies to seconds */
2439 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2440 		void __user *oldval, size_t __user *oldlenp,
2441 		void __user *newval, size_t newlen)
2442 {
2443 	if (oldval && oldlenp) {
2444 		size_t olen;
2445 
2446 		if (get_user(olen, oldlenp))
2447 			return -EFAULT;
2448 		if (olen) {
2449 			int val;
2450 
2451 			if (olen < sizeof(int))
2452 				return -EINVAL;
2453 
2454 			val = *(int *)(table->data) / HZ;
2455 			if (put_user(val, (int __user *)oldval))
2456 				return -EFAULT;
2457 			if (put_user(sizeof(int), oldlenp))
2458 				return -EFAULT;
2459 		}
2460 	}
2461 	if (newval && newlen) {
2462 		int new;
2463 		if (newlen != sizeof(int))
2464 			return -EINVAL;
2465 		if (get_user(new, (int __user *)newval))
2466 			return -EFAULT;
2467 		*(int *)(table->data) = new*HZ;
2468 	}
2469 	return 1;
2470 }
2471 
2472 /* Strategy function to convert jiffies to seconds */
2473 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2474 		void __user *oldval, size_t __user *oldlenp,
2475 		void __user *newval, size_t newlen)
2476 {
2477 	if (oldval && oldlenp) {
2478 		size_t olen;
2479 
2480 		if (get_user(olen, oldlenp))
2481 			return -EFAULT;
2482 		if (olen) {
2483 			int val;
2484 
2485 			if (olen < sizeof(int))
2486 				return -EINVAL;
2487 
2488 			val = jiffies_to_msecs(*(int *)(table->data));
2489 			if (put_user(val, (int __user *)oldval))
2490 				return -EFAULT;
2491 			if (put_user(sizeof(int), oldlenp))
2492 				return -EFAULT;
2493 		}
2494 	}
2495 	if (newval && newlen) {
2496 		int new;
2497 		if (newlen != sizeof(int))
2498 			return -EINVAL;
2499 		if (get_user(new, (int __user *)newval))
2500 			return -EFAULT;
2501 		*(int *)(table->data) = msecs_to_jiffies(new);
2502 	}
2503 	return 1;
2504 }
2505 
2506 
2507 
2508 #else /* CONFIG_SYSCTL_SYSCALL */
2509 
2510 
2511 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2512 {
2513 	static int msg_count;
2514 	struct __sysctl_args tmp;
2515 	int name[CTL_MAXNAME];
2516 	int i;
2517 
2518 	/* Read in the sysctl name for better debug message logging */
2519 	if (copy_from_user(&tmp, args, sizeof(tmp)))
2520 		return -EFAULT;
2521 	if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
2522 		return -ENOTDIR;
2523 	for (i = 0; i < tmp.nlen; i++)
2524 		if (get_user(name[i], tmp.name + i))
2525 			return -EFAULT;
2526 
2527 	/* Ignore accesses to kernel.version */
2528 	if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2529 		goto out;
2530 
2531 	if (msg_count < 5) {
2532 		msg_count++;
2533 		printk(KERN_INFO
2534 			"warning: process `%s' used the removed sysctl "
2535 			"system call with ", current->comm);
2536 		for (i = 0; i < tmp.nlen; i++)
2537 			printk("%d.", name[i]);
2538 		printk("\n");
2539 	}
2540 out:
2541 	return -ENOSYS;
2542 }
2543 
2544 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2545 		  void __user *oldval, size_t __user *oldlenp,
2546 		  void __user *newval, size_t newlen)
2547 {
2548 	return -ENOSYS;
2549 }
2550 
2551 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2552 		void __user *oldval, size_t __user *oldlenp,
2553 		void __user *newval, size_t newlen)
2554 {
2555 	return -ENOSYS;
2556 }
2557 
2558 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2559 		void __user *oldval, size_t __user *oldlenp,
2560 		void __user *newval, size_t newlen)
2561 {
2562 	return -ENOSYS;
2563 }
2564 
2565 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2566 		void __user *oldval, size_t __user *oldlenp,
2567 		void __user *newval, size_t newlen)
2568 {
2569 	return -ENOSYS;
2570 }
2571 
2572 #endif /* CONFIG_SYSCTL_SYSCALL */
2573 
2574 /*
2575  * No sense putting this after each symbol definition, twice,
2576  * exception granted :-)
2577  */
2578 EXPORT_SYMBOL(proc_dointvec);
2579 EXPORT_SYMBOL(proc_dointvec_jiffies);
2580 EXPORT_SYMBOL(proc_dointvec_minmax);
2581 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2582 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2583 EXPORT_SYMBOL(proc_dostring);
2584 EXPORT_SYMBOL(proc_doulongvec_minmax);
2585 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2586 EXPORT_SYMBOL(register_sysctl_table);
2587 EXPORT_SYMBOL(sysctl_intvec);
2588 EXPORT_SYMBOL(sysctl_jiffies);
2589 EXPORT_SYMBOL(sysctl_ms_jiffies);
2590 EXPORT_SYMBOL(sysctl_string);
2591 EXPORT_SYMBOL(unregister_sysctl_table);
2592