xref: /linux/kernel/sysctl.c (revision a17627ef8833ac30622a7b39b7be390e1b174405)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/capability.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/capability.h>
31 #include <linux/smp_lock.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
48 
49 #include <asm/uaccess.h>
50 #include <asm/processor.h>
51 
52 extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
53                      void __user *buffer, size_t *lenp, loff_t *ppos);
54 
55 #ifdef CONFIG_X86
56 #include <asm/nmi.h>
57 #include <asm/stacktrace.h>
58 #endif
59 
60 #if defined(CONFIG_SYSCTL)
61 
62 /* External variables not in a header file. */
63 extern int C_A_D;
64 extern int sysctl_overcommit_memory;
65 extern int sysctl_overcommit_ratio;
66 extern int sysctl_panic_on_oom;
67 extern int max_threads;
68 extern int core_uses_pid;
69 extern int suid_dumpable;
70 extern char core_pattern[];
71 extern int pid_max;
72 extern int min_free_kbytes;
73 extern int printk_ratelimit_jiffies;
74 extern int printk_ratelimit_burst;
75 extern int pid_max_min, pid_max_max;
76 extern int sysctl_drop_caches;
77 extern int percpu_pagelist_fraction;
78 extern int compat_log;
79 extern int maps_protect;
80 extern int sysctl_stat_interval;
81 
82 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
83 static int maxolduid = 65535;
84 static int minolduid;
85 static int min_percpu_pagelist_fract = 8;
86 
87 static int ngroups_max = NGROUPS_MAX;
88 
89 #ifdef CONFIG_KMOD
90 extern char modprobe_path[];
91 #endif
92 #ifdef CONFIG_CHR_DEV_SG
93 extern int sg_big_buff;
94 #endif
95 
96 #ifdef __sparc__
97 extern char reboot_command [];
98 extern int stop_a_enabled;
99 extern int scons_pwroff;
100 #endif
101 
102 #ifdef __hppa__
103 extern int pwrsw_enabled;
104 extern int unaligned_enabled;
105 #endif
106 
107 #ifdef CONFIG_S390
108 #ifdef CONFIG_MATHEMU
109 extern int sysctl_ieee_emulation_warnings;
110 #endif
111 extern int sysctl_userprocess_debug;
112 extern int spin_retry;
113 #endif
114 
115 extern int sysctl_hz_timer;
116 
117 #ifdef CONFIG_BSD_PROCESS_ACCT
118 extern int acct_parm[];
119 #endif
120 
121 #ifdef CONFIG_IA64
122 extern int no_unaligned_warning;
123 #endif
124 
125 #ifdef CONFIG_RT_MUTEXES
126 extern int max_lock_depth;
127 #endif
128 
129 #ifdef CONFIG_SYSCTL_SYSCALL
130 static int parse_table(int __user *, int, void __user *, size_t __user *,
131 		void __user *, size_t, ctl_table *);
132 #endif
133 
134 
135 #ifdef CONFIG_PROC_SYSCTL
136 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
137 		  void __user *buffer, size_t *lenp, loff_t *ppos);
138 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
139 			       void __user *buffer, size_t *lenp, loff_t *ppos);
140 #endif
141 
142 static ctl_table root_table[];
143 static struct ctl_table_header root_table_header =
144 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
145 
146 static ctl_table kern_table[];
147 static ctl_table vm_table[];
148 static ctl_table fs_table[];
149 static ctl_table debug_table[];
150 static ctl_table dev_table[];
151 extern ctl_table random_table[];
152 #ifdef CONFIG_UNIX98_PTYS
153 extern ctl_table pty_table[];
154 #endif
155 #ifdef CONFIG_INOTIFY_USER
156 extern ctl_table inotify_table[];
157 #endif
158 
159 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
160 int sysctl_legacy_va_layout;
161 #endif
162 
163 
164 /* The default sysctl tables: */
165 
166 static ctl_table root_table[] = {
167 	{
168 		.ctl_name	= CTL_KERN,
169 		.procname	= "kernel",
170 		.mode		= 0555,
171 		.child		= kern_table,
172 	},
173 	{
174 		.ctl_name	= CTL_VM,
175 		.procname	= "vm",
176 		.mode		= 0555,
177 		.child		= vm_table,
178 	},
179 #ifdef CONFIG_NET
180 	{
181 		.ctl_name	= CTL_NET,
182 		.procname	= "net",
183 		.mode		= 0555,
184 		.child		= net_table,
185 	},
186 #endif
187 	{
188 		.ctl_name	= CTL_FS,
189 		.procname	= "fs",
190 		.mode		= 0555,
191 		.child		= fs_table,
192 	},
193 	{
194 		.ctl_name	= CTL_DEBUG,
195 		.procname	= "debug",
196 		.mode		= 0555,
197 		.child		= debug_table,
198 	},
199 	{
200 		.ctl_name	= CTL_DEV,
201 		.procname	= "dev",
202 		.mode		= 0555,
203 		.child		= dev_table,
204 	},
205 
206 	{ .ctl_name = 0 }
207 };
208 
209 static ctl_table kern_table[] = {
210 	{
211 		.ctl_name	= KERN_PANIC,
212 		.procname	= "panic",
213 		.data		= &panic_timeout,
214 		.maxlen		= sizeof(int),
215 		.mode		= 0644,
216 		.proc_handler	= &proc_dointvec,
217 	},
218 	{
219 		.ctl_name	= KERN_CORE_USES_PID,
220 		.procname	= "core_uses_pid",
221 		.data		= &core_uses_pid,
222 		.maxlen		= sizeof(int),
223 		.mode		= 0644,
224 		.proc_handler	= &proc_dointvec,
225 	},
226 	{
227 		.ctl_name	= KERN_CORE_PATTERN,
228 		.procname	= "core_pattern",
229 		.data		= core_pattern,
230 		.maxlen		= CORENAME_MAX_SIZE,
231 		.mode		= 0644,
232 		.proc_handler	= &proc_dostring,
233 		.strategy	= &sysctl_string,
234 	},
235 #ifdef CONFIG_PROC_SYSCTL
236 	{
237 		.ctl_name	= KERN_TAINTED,
238 		.procname	= "tainted",
239 		.data		= &tainted,
240 		.maxlen		= sizeof(int),
241 		.mode		= 0644,
242 		.proc_handler	= &proc_dointvec_taint,
243 	},
244 #endif
245 	{
246 		.ctl_name	= KERN_CAP_BSET,
247 		.procname	= "cap-bound",
248 		.data		= &cap_bset,
249 		.maxlen		= sizeof(kernel_cap_t),
250 		.mode		= 0600,
251 		.proc_handler	= &proc_dointvec_bset,
252 	},
253 #ifdef CONFIG_BLK_DEV_INITRD
254 	{
255 		.ctl_name	= KERN_REALROOTDEV,
256 		.procname	= "real-root-dev",
257 		.data		= &real_root_dev,
258 		.maxlen		= sizeof(int),
259 		.mode		= 0644,
260 		.proc_handler	= &proc_dointvec,
261 	},
262 #endif
263 #ifdef __sparc__
264 	{
265 		.ctl_name	= KERN_SPARC_REBOOT,
266 		.procname	= "reboot-cmd",
267 		.data		= reboot_command,
268 		.maxlen		= 256,
269 		.mode		= 0644,
270 		.proc_handler	= &proc_dostring,
271 		.strategy	= &sysctl_string,
272 	},
273 	{
274 		.ctl_name	= KERN_SPARC_STOP_A,
275 		.procname	= "stop-a",
276 		.data		= &stop_a_enabled,
277 		.maxlen		= sizeof (int),
278 		.mode		= 0644,
279 		.proc_handler	= &proc_dointvec,
280 	},
281 	{
282 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
283 		.procname	= "scons-poweroff",
284 		.data		= &scons_pwroff,
285 		.maxlen		= sizeof (int),
286 		.mode		= 0644,
287 		.proc_handler	= &proc_dointvec,
288 	},
289 #endif
290 #ifdef __hppa__
291 	{
292 		.ctl_name	= KERN_HPPA_PWRSW,
293 		.procname	= "soft-power",
294 		.data		= &pwrsw_enabled,
295 		.maxlen		= sizeof (int),
296 	 	.mode		= 0644,
297 		.proc_handler	= &proc_dointvec,
298 	},
299 	{
300 		.ctl_name	= KERN_HPPA_UNALIGNED,
301 		.procname	= "unaligned-trap",
302 		.data		= &unaligned_enabled,
303 		.maxlen		= sizeof (int),
304 		.mode		= 0644,
305 		.proc_handler	= &proc_dointvec,
306 	},
307 #endif
308 	{
309 		.ctl_name	= KERN_CTLALTDEL,
310 		.procname	= "ctrl-alt-del",
311 		.data		= &C_A_D,
312 		.maxlen		= sizeof(int),
313 		.mode		= 0644,
314 		.proc_handler	= &proc_dointvec,
315 	},
316 	{
317 		.ctl_name	= KERN_PRINTK,
318 		.procname	= "printk",
319 		.data		= &console_loglevel,
320 		.maxlen		= 4*sizeof(int),
321 		.mode		= 0644,
322 		.proc_handler	= &proc_dointvec,
323 	},
324 #ifdef CONFIG_KMOD
325 	{
326 		.ctl_name	= KERN_MODPROBE,
327 		.procname	= "modprobe",
328 		.data		= &modprobe_path,
329 		.maxlen		= KMOD_PATH_LEN,
330 		.mode		= 0644,
331 		.proc_handler	= &proc_dostring,
332 		.strategy	= &sysctl_string,
333 	},
334 #endif
335 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
336 	{
337 		.ctl_name	= KERN_HOTPLUG,
338 		.procname	= "hotplug",
339 		.data		= &uevent_helper,
340 		.maxlen		= UEVENT_HELPER_PATH_LEN,
341 		.mode		= 0644,
342 		.proc_handler	= &proc_dostring,
343 		.strategy	= &sysctl_string,
344 	},
345 #endif
346 #ifdef CONFIG_CHR_DEV_SG
347 	{
348 		.ctl_name	= KERN_SG_BIG_BUFF,
349 		.procname	= "sg-big-buff",
350 		.data		= &sg_big_buff,
351 		.maxlen		= sizeof (int),
352 		.mode		= 0444,
353 		.proc_handler	= &proc_dointvec,
354 	},
355 #endif
356 #ifdef CONFIG_BSD_PROCESS_ACCT
357 	{
358 		.ctl_name	= KERN_ACCT,
359 		.procname	= "acct",
360 		.data		= &acct_parm,
361 		.maxlen		= 3*sizeof(int),
362 		.mode		= 0644,
363 		.proc_handler	= &proc_dointvec,
364 	},
365 #endif
366 #ifdef CONFIG_MAGIC_SYSRQ
367 	{
368 		.ctl_name	= KERN_SYSRQ,
369 		.procname	= "sysrq",
370 		.data		= &__sysrq_enabled,
371 		.maxlen		= sizeof (int),
372 		.mode		= 0644,
373 		.proc_handler	= &proc_dointvec,
374 	},
375 #endif
376 #ifdef CONFIG_PROC_SYSCTL
377 	{
378 		.ctl_name	= KERN_CADPID,
379 		.procname	= "cad_pid",
380 		.data		= NULL,
381 		.maxlen		= sizeof (int),
382 		.mode		= 0600,
383 		.proc_handler	= &proc_do_cad_pid,
384 	},
385 #endif
386 	{
387 		.ctl_name	= KERN_MAX_THREADS,
388 		.procname	= "threads-max",
389 		.data		= &max_threads,
390 		.maxlen		= sizeof(int),
391 		.mode		= 0644,
392 		.proc_handler	= &proc_dointvec,
393 	},
394 	{
395 		.ctl_name	= KERN_RANDOM,
396 		.procname	= "random",
397 		.mode		= 0555,
398 		.child		= random_table,
399 	},
400 #ifdef CONFIG_UNIX98_PTYS
401 	{
402 		.ctl_name	= KERN_PTY,
403 		.procname	= "pty",
404 		.mode		= 0555,
405 		.child		= pty_table,
406 	},
407 #endif
408 	{
409 		.ctl_name	= KERN_OVERFLOWUID,
410 		.procname	= "overflowuid",
411 		.data		= &overflowuid,
412 		.maxlen		= sizeof(int),
413 		.mode		= 0644,
414 		.proc_handler	= &proc_dointvec_minmax,
415 		.strategy	= &sysctl_intvec,
416 		.extra1		= &minolduid,
417 		.extra2		= &maxolduid,
418 	},
419 	{
420 		.ctl_name	= KERN_OVERFLOWGID,
421 		.procname	= "overflowgid",
422 		.data		= &overflowgid,
423 		.maxlen		= sizeof(int),
424 		.mode		= 0644,
425 		.proc_handler	= &proc_dointvec_minmax,
426 		.strategy	= &sysctl_intvec,
427 		.extra1		= &minolduid,
428 		.extra2		= &maxolduid,
429 	},
430 #ifdef CONFIG_S390
431 #ifdef CONFIG_MATHEMU
432 	{
433 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
434 		.procname	= "ieee_emulation_warnings",
435 		.data		= &sysctl_ieee_emulation_warnings,
436 		.maxlen		= sizeof(int),
437 		.mode		= 0644,
438 		.proc_handler	= &proc_dointvec,
439 	},
440 #endif
441 #ifdef CONFIG_NO_IDLE_HZ
442 	{
443 		.ctl_name       = KERN_HZ_TIMER,
444 		.procname       = "hz_timer",
445 		.data           = &sysctl_hz_timer,
446 		.maxlen         = sizeof(int),
447 		.mode           = 0644,
448 		.proc_handler   = &proc_dointvec,
449 	},
450 #endif
451 	{
452 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
453 		.procname	= "userprocess_debug",
454 		.data		= &sysctl_userprocess_debug,
455 		.maxlen		= sizeof(int),
456 		.mode		= 0644,
457 		.proc_handler	= &proc_dointvec,
458 	},
459 #endif
460 	{
461 		.ctl_name	= KERN_PIDMAX,
462 		.procname	= "pid_max",
463 		.data		= &pid_max,
464 		.maxlen		= sizeof (int),
465 		.mode		= 0644,
466 		.proc_handler	= &proc_dointvec_minmax,
467 		.strategy	= sysctl_intvec,
468 		.extra1		= &pid_max_min,
469 		.extra2		= &pid_max_max,
470 	},
471 	{
472 		.ctl_name	= KERN_PANIC_ON_OOPS,
473 		.procname	= "panic_on_oops",
474 		.data		= &panic_on_oops,
475 		.maxlen		= sizeof(int),
476 		.mode		= 0644,
477 		.proc_handler	= &proc_dointvec,
478 	},
479 	{
480 		.ctl_name	= KERN_PRINTK_RATELIMIT,
481 		.procname	= "printk_ratelimit",
482 		.data		= &printk_ratelimit_jiffies,
483 		.maxlen		= sizeof(int),
484 		.mode		= 0644,
485 		.proc_handler	= &proc_dointvec_jiffies,
486 		.strategy	= &sysctl_jiffies,
487 	},
488 	{
489 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
490 		.procname	= "printk_ratelimit_burst",
491 		.data		= &printk_ratelimit_burst,
492 		.maxlen		= sizeof(int),
493 		.mode		= 0644,
494 		.proc_handler	= &proc_dointvec,
495 	},
496 	{
497 		.ctl_name	= KERN_NGROUPS_MAX,
498 		.procname	= "ngroups_max",
499 		.data		= &ngroups_max,
500 		.maxlen		= sizeof (int),
501 		.mode		= 0444,
502 		.proc_handler	= &proc_dointvec,
503 	},
504 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
505 	{
506 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
507 		.procname       = "unknown_nmi_panic",
508 		.data           = &unknown_nmi_panic,
509 		.maxlen         = sizeof (int),
510 		.mode           = 0644,
511 		.proc_handler   = &proc_dointvec,
512 	},
513 	{
514 		.ctl_name       = KERN_NMI_WATCHDOG,
515 		.procname       = "nmi_watchdog",
516 		.data           = &nmi_watchdog_enabled,
517 		.maxlen         = sizeof (int),
518 		.mode           = 0644,
519 		.proc_handler   = &proc_nmi_enabled,
520 	},
521 #endif
522 #if defined(CONFIG_X86)
523 	{
524 		.ctl_name	= KERN_PANIC_ON_NMI,
525 		.procname	= "panic_on_unrecovered_nmi",
526 		.data		= &panic_on_unrecovered_nmi,
527 		.maxlen		= sizeof(int),
528 		.mode		= 0644,
529 		.proc_handler	= &proc_dointvec,
530 	},
531 	{
532 		.ctl_name	= KERN_BOOTLOADER_TYPE,
533 		.procname	= "bootloader_type",
534 		.data		= &bootloader_type,
535 		.maxlen		= sizeof (int),
536 		.mode		= 0444,
537 		.proc_handler	= &proc_dointvec,
538 	},
539 	{
540 		.ctl_name	= CTL_UNNUMBERED,
541 		.procname	= "kstack_depth_to_print",
542 		.data		= &kstack_depth_to_print,
543 		.maxlen		= sizeof(int),
544 		.mode		= 0644,
545 		.proc_handler	= &proc_dointvec,
546 	},
547 #endif
548 #if defined(CONFIG_MMU)
549 	{
550 		.ctl_name	= KERN_RANDOMIZE,
551 		.procname	= "randomize_va_space",
552 		.data		= &randomize_va_space,
553 		.maxlen		= sizeof(int),
554 		.mode		= 0644,
555 		.proc_handler	= &proc_dointvec,
556 	},
557 #endif
558 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
559 	{
560 		.ctl_name	= KERN_SPIN_RETRY,
561 		.procname	= "spin_retry",
562 		.data		= &spin_retry,
563 		.maxlen		= sizeof (int),
564 		.mode		= 0644,
565 		.proc_handler	= &proc_dointvec,
566 	},
567 #endif
568 #ifdef CONFIG_ACPI_SLEEP
569 	{
570 		.ctl_name	= KERN_ACPI_VIDEO_FLAGS,
571 		.procname	= "acpi_video_flags",
572 		.data		= &acpi_video_flags,
573 		.maxlen		= sizeof (unsigned long),
574 		.mode		= 0644,
575 		.proc_handler	= &proc_doulongvec_minmax,
576 	},
577 #endif
578 #ifdef CONFIG_IA64
579 	{
580 		.ctl_name	= KERN_IA64_UNALIGNED,
581 		.procname	= "ignore-unaligned-usertrap",
582 		.data		= &no_unaligned_warning,
583 		.maxlen		= sizeof (int),
584 	 	.mode		= 0644,
585 		.proc_handler	= &proc_dointvec,
586 	},
587 #endif
588 #ifdef CONFIG_COMPAT
589 	{
590 		.ctl_name	= KERN_COMPAT_LOG,
591 		.procname	= "compat-log",
592 		.data		= &compat_log,
593 		.maxlen		= sizeof (int),
594 	 	.mode		= 0644,
595 		.proc_handler	= &proc_dointvec,
596 	},
597 #endif
598 #ifdef CONFIG_RT_MUTEXES
599 	{
600 		.ctl_name	= KERN_MAX_LOCK_DEPTH,
601 		.procname	= "max_lock_depth",
602 		.data		= &max_lock_depth,
603 		.maxlen		= sizeof(int),
604 		.mode		= 0644,
605 		.proc_handler	= &proc_dointvec,
606 	},
607 #endif
608 #ifdef CONFIG_PROC_FS
609 	{
610 		.ctl_name       = CTL_UNNUMBERED,
611 		.procname       = "maps_protect",
612 		.data           = &maps_protect,
613 		.maxlen         = sizeof(int),
614 		.mode           = 0644,
615 		.proc_handler   = &proc_dointvec,
616 	},
617 #endif
618 
619 	{ .ctl_name = 0 }
620 };
621 
622 /* Constants for minimum and maximum testing in vm_table.
623    We use these as one-element integer vectors. */
624 static int zero;
625 static int one_hundred = 100;
626 
627 
628 static ctl_table vm_table[] = {
629 	{
630 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
631 		.procname	= "overcommit_memory",
632 		.data		= &sysctl_overcommit_memory,
633 		.maxlen		= sizeof(sysctl_overcommit_memory),
634 		.mode		= 0644,
635 		.proc_handler	= &proc_dointvec,
636 	},
637 	{
638 		.ctl_name	= VM_PANIC_ON_OOM,
639 		.procname	= "panic_on_oom",
640 		.data		= &sysctl_panic_on_oom,
641 		.maxlen		= sizeof(sysctl_panic_on_oom),
642 		.mode		= 0644,
643 		.proc_handler	= &proc_dointvec,
644 	},
645 	{
646 		.ctl_name	= VM_OVERCOMMIT_RATIO,
647 		.procname	= "overcommit_ratio",
648 		.data		= &sysctl_overcommit_ratio,
649 		.maxlen		= sizeof(sysctl_overcommit_ratio),
650 		.mode		= 0644,
651 		.proc_handler	= &proc_dointvec,
652 	},
653 	{
654 		.ctl_name	= VM_PAGE_CLUSTER,
655 		.procname	= "page-cluster",
656 		.data		= &page_cluster,
657 		.maxlen		= sizeof(int),
658 		.mode		= 0644,
659 		.proc_handler	= &proc_dointvec,
660 	},
661 	{
662 		.ctl_name	= VM_DIRTY_BACKGROUND,
663 		.procname	= "dirty_background_ratio",
664 		.data		= &dirty_background_ratio,
665 		.maxlen		= sizeof(dirty_background_ratio),
666 		.mode		= 0644,
667 		.proc_handler	= &proc_dointvec_minmax,
668 		.strategy	= &sysctl_intvec,
669 		.extra1		= &zero,
670 		.extra2		= &one_hundred,
671 	},
672 	{
673 		.ctl_name	= VM_DIRTY_RATIO,
674 		.procname	= "dirty_ratio",
675 		.data		= &vm_dirty_ratio,
676 		.maxlen		= sizeof(vm_dirty_ratio),
677 		.mode		= 0644,
678 		.proc_handler	= &proc_dointvec_minmax,
679 		.strategy	= &sysctl_intvec,
680 		.extra1		= &zero,
681 		.extra2		= &one_hundred,
682 	},
683 	{
684 		.ctl_name	= VM_DIRTY_WB_CS,
685 		.procname	= "dirty_writeback_centisecs",
686 		.data		= &dirty_writeback_interval,
687 		.maxlen		= sizeof(dirty_writeback_interval),
688 		.mode		= 0644,
689 		.proc_handler	= &dirty_writeback_centisecs_handler,
690 	},
691 	{
692 		.ctl_name	= VM_DIRTY_EXPIRE_CS,
693 		.procname	= "dirty_expire_centisecs",
694 		.data		= &dirty_expire_interval,
695 		.maxlen		= sizeof(dirty_expire_interval),
696 		.mode		= 0644,
697 		.proc_handler	= &proc_dointvec_userhz_jiffies,
698 	},
699 	{
700 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
701 		.procname	= "nr_pdflush_threads",
702 		.data		= &nr_pdflush_threads,
703 		.maxlen		= sizeof nr_pdflush_threads,
704 		.mode		= 0444 /* read-only*/,
705 		.proc_handler	= &proc_dointvec,
706 	},
707 	{
708 		.ctl_name	= VM_SWAPPINESS,
709 		.procname	= "swappiness",
710 		.data		= &vm_swappiness,
711 		.maxlen		= sizeof(vm_swappiness),
712 		.mode		= 0644,
713 		.proc_handler	= &proc_dointvec_minmax,
714 		.strategy	= &sysctl_intvec,
715 		.extra1		= &zero,
716 		.extra2		= &one_hundred,
717 	},
718 #ifdef CONFIG_HUGETLB_PAGE
719 	 {
720 		.ctl_name	= VM_HUGETLB_PAGES,
721 		.procname	= "nr_hugepages",
722 		.data		= &max_huge_pages,
723 		.maxlen		= sizeof(unsigned long),
724 		.mode		= 0644,
725 		.proc_handler	= &hugetlb_sysctl_handler,
726 		.extra1		= (void *)&hugetlb_zero,
727 		.extra2		= (void *)&hugetlb_infinity,
728 	 },
729 	 {
730 		.ctl_name	= VM_HUGETLB_GROUP,
731 		.procname	= "hugetlb_shm_group",
732 		.data		= &sysctl_hugetlb_shm_group,
733 		.maxlen		= sizeof(gid_t),
734 		.mode		= 0644,
735 		.proc_handler	= &proc_dointvec,
736 	 },
737 #endif
738 	{
739 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
740 		.procname	= "lowmem_reserve_ratio",
741 		.data		= &sysctl_lowmem_reserve_ratio,
742 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
743 		.mode		= 0644,
744 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
745 		.strategy	= &sysctl_intvec,
746 	},
747 	{
748 		.ctl_name	= VM_DROP_PAGECACHE,
749 		.procname	= "drop_caches",
750 		.data		= &sysctl_drop_caches,
751 		.maxlen		= sizeof(int),
752 		.mode		= 0644,
753 		.proc_handler	= drop_caches_sysctl_handler,
754 		.strategy	= &sysctl_intvec,
755 	},
756 	{
757 		.ctl_name	= VM_MIN_FREE_KBYTES,
758 		.procname	= "min_free_kbytes",
759 		.data		= &min_free_kbytes,
760 		.maxlen		= sizeof(min_free_kbytes),
761 		.mode		= 0644,
762 		.proc_handler	= &min_free_kbytes_sysctl_handler,
763 		.strategy	= &sysctl_intvec,
764 		.extra1		= &zero,
765 	},
766 	{
767 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
768 		.procname	= "percpu_pagelist_fraction",
769 		.data		= &percpu_pagelist_fraction,
770 		.maxlen		= sizeof(percpu_pagelist_fraction),
771 		.mode		= 0644,
772 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
773 		.strategy	= &sysctl_intvec,
774 		.extra1		= &min_percpu_pagelist_fract,
775 	},
776 #ifdef CONFIG_MMU
777 	{
778 		.ctl_name	= VM_MAX_MAP_COUNT,
779 		.procname	= "max_map_count",
780 		.data		= &sysctl_max_map_count,
781 		.maxlen		= sizeof(sysctl_max_map_count),
782 		.mode		= 0644,
783 		.proc_handler	= &proc_dointvec
784 	},
785 #endif
786 	{
787 		.ctl_name	= VM_LAPTOP_MODE,
788 		.procname	= "laptop_mode",
789 		.data		= &laptop_mode,
790 		.maxlen		= sizeof(laptop_mode),
791 		.mode		= 0644,
792 		.proc_handler	= &proc_dointvec_jiffies,
793 		.strategy	= &sysctl_jiffies,
794 	},
795 	{
796 		.ctl_name	= VM_BLOCK_DUMP,
797 		.procname	= "block_dump",
798 		.data		= &block_dump,
799 		.maxlen		= sizeof(block_dump),
800 		.mode		= 0644,
801 		.proc_handler	= &proc_dointvec,
802 		.strategy	= &sysctl_intvec,
803 		.extra1		= &zero,
804 	},
805 	{
806 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
807 		.procname	= "vfs_cache_pressure",
808 		.data		= &sysctl_vfs_cache_pressure,
809 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
810 		.mode		= 0644,
811 		.proc_handler	= &proc_dointvec,
812 		.strategy	= &sysctl_intvec,
813 		.extra1		= &zero,
814 	},
815 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
816 	{
817 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
818 		.procname	= "legacy_va_layout",
819 		.data		= &sysctl_legacy_va_layout,
820 		.maxlen		= sizeof(sysctl_legacy_va_layout),
821 		.mode		= 0644,
822 		.proc_handler	= &proc_dointvec,
823 		.strategy	= &sysctl_intvec,
824 		.extra1		= &zero,
825 	},
826 #endif
827 #ifdef CONFIG_NUMA
828 	{
829 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
830 		.procname	= "zone_reclaim_mode",
831 		.data		= &zone_reclaim_mode,
832 		.maxlen		= sizeof(zone_reclaim_mode),
833 		.mode		= 0644,
834 		.proc_handler	= &proc_dointvec,
835 		.strategy	= &sysctl_intvec,
836 		.extra1		= &zero,
837 	},
838 	{
839 		.ctl_name	= VM_MIN_UNMAPPED,
840 		.procname	= "min_unmapped_ratio",
841 		.data		= &sysctl_min_unmapped_ratio,
842 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
843 		.mode		= 0644,
844 		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler,
845 		.strategy	= &sysctl_intvec,
846 		.extra1		= &zero,
847 		.extra2		= &one_hundred,
848 	},
849 	{
850 		.ctl_name	= VM_MIN_SLAB,
851 		.procname	= "min_slab_ratio",
852 		.data		= &sysctl_min_slab_ratio,
853 		.maxlen		= sizeof(sysctl_min_slab_ratio),
854 		.mode		= 0644,
855 		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
856 		.strategy	= &sysctl_intvec,
857 		.extra1		= &zero,
858 		.extra2		= &one_hundred,
859 	},
860 #endif
861 #ifdef CONFIG_SMP
862 	{
863 		.ctl_name	= CTL_UNNUMBERED,
864 		.procname	= "stat_interval",
865 		.data		= &sysctl_stat_interval,
866 		.maxlen		= sizeof(sysctl_stat_interval),
867 		.mode		= 0644,
868 		.proc_handler	= &proc_dointvec_jiffies,
869 		.strategy	= &sysctl_jiffies,
870 	},
871 #endif
872 #if defined(CONFIG_X86_32) || \
873    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
874 	{
875 		.ctl_name	= VM_VDSO_ENABLED,
876 		.procname	= "vdso_enabled",
877 		.data		= &vdso_enabled,
878 		.maxlen		= sizeof(vdso_enabled),
879 		.mode		= 0644,
880 		.proc_handler	= &proc_dointvec,
881 		.strategy	= &sysctl_intvec,
882 		.extra1		= &zero,
883 	},
884 #endif
885 	{ .ctl_name = 0 }
886 };
887 
888 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
889 static ctl_table binfmt_misc_table[] = {
890 	{ .ctl_name = 0 }
891 };
892 #endif
893 
894 static ctl_table fs_table[] = {
895 	{
896 		.ctl_name	= FS_NRINODE,
897 		.procname	= "inode-nr",
898 		.data		= &inodes_stat,
899 		.maxlen		= 2*sizeof(int),
900 		.mode		= 0444,
901 		.proc_handler	= &proc_dointvec,
902 	},
903 	{
904 		.ctl_name	= FS_STATINODE,
905 		.procname	= "inode-state",
906 		.data		= &inodes_stat,
907 		.maxlen		= 7*sizeof(int),
908 		.mode		= 0444,
909 		.proc_handler	= &proc_dointvec,
910 	},
911 	{
912 		.ctl_name	= FS_NRFILE,
913 		.procname	= "file-nr",
914 		.data		= &files_stat,
915 		.maxlen		= 3*sizeof(int),
916 		.mode		= 0444,
917 		.proc_handler	= &proc_nr_files,
918 	},
919 	{
920 		.ctl_name	= FS_MAXFILE,
921 		.procname	= "file-max",
922 		.data		= &files_stat.max_files,
923 		.maxlen		= sizeof(int),
924 		.mode		= 0644,
925 		.proc_handler	= &proc_dointvec,
926 	},
927 	{
928 		.ctl_name	= FS_DENTRY,
929 		.procname	= "dentry-state",
930 		.data		= &dentry_stat,
931 		.maxlen		= 6*sizeof(int),
932 		.mode		= 0444,
933 		.proc_handler	= &proc_dointvec,
934 	},
935 	{
936 		.ctl_name	= FS_OVERFLOWUID,
937 		.procname	= "overflowuid",
938 		.data		= &fs_overflowuid,
939 		.maxlen		= sizeof(int),
940 		.mode		= 0644,
941 		.proc_handler	= &proc_dointvec_minmax,
942 		.strategy	= &sysctl_intvec,
943 		.extra1		= &minolduid,
944 		.extra2		= &maxolduid,
945 	},
946 	{
947 		.ctl_name	= FS_OVERFLOWGID,
948 		.procname	= "overflowgid",
949 		.data		= &fs_overflowgid,
950 		.maxlen		= sizeof(int),
951 		.mode		= 0644,
952 		.proc_handler	= &proc_dointvec_minmax,
953 		.strategy	= &sysctl_intvec,
954 		.extra1		= &minolduid,
955 		.extra2		= &maxolduid,
956 	},
957 	{
958 		.ctl_name	= FS_LEASES,
959 		.procname	= "leases-enable",
960 		.data		= &leases_enable,
961 		.maxlen		= sizeof(int),
962 		.mode		= 0644,
963 		.proc_handler	= &proc_dointvec,
964 	},
965 #ifdef CONFIG_DNOTIFY
966 	{
967 		.ctl_name	= FS_DIR_NOTIFY,
968 		.procname	= "dir-notify-enable",
969 		.data		= &dir_notify_enable,
970 		.maxlen		= sizeof(int),
971 		.mode		= 0644,
972 		.proc_handler	= &proc_dointvec,
973 	},
974 #endif
975 #ifdef CONFIG_MMU
976 	{
977 		.ctl_name	= FS_LEASE_TIME,
978 		.procname	= "lease-break-time",
979 		.data		= &lease_break_time,
980 		.maxlen		= sizeof(int),
981 		.mode		= 0644,
982 		.proc_handler	= &proc_dointvec,
983 	},
984 	{
985 		.ctl_name	= FS_AIO_NR,
986 		.procname	= "aio-nr",
987 		.data		= &aio_nr,
988 		.maxlen		= sizeof(aio_nr),
989 		.mode		= 0444,
990 		.proc_handler	= &proc_doulongvec_minmax,
991 	},
992 	{
993 		.ctl_name	= FS_AIO_MAX_NR,
994 		.procname	= "aio-max-nr",
995 		.data		= &aio_max_nr,
996 		.maxlen		= sizeof(aio_max_nr),
997 		.mode		= 0644,
998 		.proc_handler	= &proc_doulongvec_minmax,
999 	},
1000 #ifdef CONFIG_INOTIFY_USER
1001 	{
1002 		.ctl_name	= FS_INOTIFY,
1003 		.procname	= "inotify",
1004 		.mode		= 0555,
1005 		.child		= inotify_table,
1006 	},
1007 #endif
1008 #endif
1009 	{
1010 		.ctl_name	= KERN_SETUID_DUMPABLE,
1011 		.procname	= "suid_dumpable",
1012 		.data		= &suid_dumpable,
1013 		.maxlen		= sizeof(int),
1014 		.mode		= 0644,
1015 		.proc_handler	= &proc_dointvec,
1016 	},
1017 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1018 	{
1019 		.ctl_name	= CTL_UNNUMBERED,
1020 		.procname	= "binfmt_misc",
1021 		.mode		= 0555,
1022 		.child		= binfmt_misc_table,
1023 	},
1024 #endif
1025 	{ .ctl_name = 0 }
1026 };
1027 
1028 static ctl_table debug_table[] = {
1029 	{ .ctl_name = 0 }
1030 };
1031 
1032 static ctl_table dev_table[] = {
1033 	{ .ctl_name = 0 }
1034 };
1035 
1036 static DEFINE_SPINLOCK(sysctl_lock);
1037 
1038 /* called under sysctl_lock */
1039 static int use_table(struct ctl_table_header *p)
1040 {
1041 	if (unlikely(p->unregistering))
1042 		return 0;
1043 	p->used++;
1044 	return 1;
1045 }
1046 
1047 /* called under sysctl_lock */
1048 static void unuse_table(struct ctl_table_header *p)
1049 {
1050 	if (!--p->used)
1051 		if (unlikely(p->unregistering))
1052 			complete(p->unregistering);
1053 }
1054 
1055 /* called under sysctl_lock, will reacquire if has to wait */
1056 static void start_unregistering(struct ctl_table_header *p)
1057 {
1058 	/*
1059 	 * if p->used is 0, nobody will ever touch that entry again;
1060 	 * we'll eliminate all paths to it before dropping sysctl_lock
1061 	 */
1062 	if (unlikely(p->used)) {
1063 		struct completion wait;
1064 		init_completion(&wait);
1065 		p->unregistering = &wait;
1066 		spin_unlock(&sysctl_lock);
1067 		wait_for_completion(&wait);
1068 		spin_lock(&sysctl_lock);
1069 	}
1070 	/*
1071 	 * do not remove from the list until nobody holds it; walking the
1072 	 * list in do_sysctl() relies on that.
1073 	 */
1074 	list_del_init(&p->ctl_entry);
1075 }
1076 
1077 void sysctl_head_finish(struct ctl_table_header *head)
1078 {
1079 	if (!head)
1080 		return;
1081 	spin_lock(&sysctl_lock);
1082 	unuse_table(head);
1083 	spin_unlock(&sysctl_lock);
1084 }
1085 
1086 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1087 {
1088 	struct ctl_table_header *head;
1089 	struct list_head *tmp;
1090 	spin_lock(&sysctl_lock);
1091 	if (prev) {
1092 		tmp = &prev->ctl_entry;
1093 		unuse_table(prev);
1094 		goto next;
1095 	}
1096 	tmp = &root_table_header.ctl_entry;
1097 	for (;;) {
1098 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1099 
1100 		if (!use_table(head))
1101 			goto next;
1102 		spin_unlock(&sysctl_lock);
1103 		return head;
1104 	next:
1105 		tmp = tmp->next;
1106 		if (tmp == &root_table_header.ctl_entry)
1107 			break;
1108 	}
1109 	spin_unlock(&sysctl_lock);
1110 	return NULL;
1111 }
1112 
1113 #ifdef CONFIG_SYSCTL_SYSCALL
1114 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1115 	       void __user *newval, size_t newlen)
1116 {
1117 	struct ctl_table_header *head;
1118 	int error = -ENOTDIR;
1119 
1120 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1121 		return -ENOTDIR;
1122 	if (oldval) {
1123 		int old_len;
1124 		if (!oldlenp || get_user(old_len, oldlenp))
1125 			return -EFAULT;
1126 	}
1127 
1128 	for (head = sysctl_head_next(NULL); head;
1129 			head = sysctl_head_next(head)) {
1130 		error = parse_table(name, nlen, oldval, oldlenp,
1131 					newval, newlen, head->ctl_table);
1132 		if (error != -ENOTDIR) {
1133 			sysctl_head_finish(head);
1134 			break;
1135 		}
1136 	}
1137 	return error;
1138 }
1139 
1140 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1141 {
1142 	struct __sysctl_args tmp;
1143 	int error;
1144 
1145 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1146 		return -EFAULT;
1147 
1148 	lock_kernel();
1149 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1150 			  tmp.newval, tmp.newlen);
1151 	unlock_kernel();
1152 	return error;
1153 }
1154 #endif /* CONFIG_SYSCTL_SYSCALL */
1155 
1156 /*
1157  * sysctl_perm does NOT grant the superuser all rights automatically, because
1158  * some sysctl variables are readonly even to root.
1159  */
1160 
1161 static int test_perm(int mode, int op)
1162 {
1163 	if (!current->euid)
1164 		mode >>= 6;
1165 	else if (in_egroup_p(0))
1166 		mode >>= 3;
1167 	if ((mode & op & 0007) == op)
1168 		return 0;
1169 	return -EACCES;
1170 }
1171 
1172 int sysctl_perm(ctl_table *table, int op)
1173 {
1174 	int error;
1175 	error = security_sysctl(table, op);
1176 	if (error)
1177 		return error;
1178 	return test_perm(table->mode, op);
1179 }
1180 
1181 #ifdef CONFIG_SYSCTL_SYSCALL
1182 static int parse_table(int __user *name, int nlen,
1183 		       void __user *oldval, size_t __user *oldlenp,
1184 		       void __user *newval, size_t newlen,
1185 		       ctl_table *table)
1186 {
1187 	int n;
1188 repeat:
1189 	if (!nlen)
1190 		return -ENOTDIR;
1191 	if (get_user(n, name))
1192 		return -EFAULT;
1193 	for ( ; table->ctl_name || table->procname; table++) {
1194 		if (!table->ctl_name)
1195 			continue;
1196 		if (n == table->ctl_name) {
1197 			int error;
1198 			if (table->child) {
1199 				if (sysctl_perm(table, 001))
1200 					return -EPERM;
1201 				name++;
1202 				nlen--;
1203 				table = table->child;
1204 				goto repeat;
1205 			}
1206 			error = do_sysctl_strategy(table, name, nlen,
1207 						   oldval, oldlenp,
1208 						   newval, newlen);
1209 			return error;
1210 		}
1211 	}
1212 	return -ENOTDIR;
1213 }
1214 
1215 /* Perform the actual read/write of a sysctl table entry. */
1216 int do_sysctl_strategy (ctl_table *table,
1217 			int __user *name, int nlen,
1218 			void __user *oldval, size_t __user *oldlenp,
1219 			void __user *newval, size_t newlen)
1220 {
1221 	int op = 0, rc;
1222 	size_t len;
1223 
1224 	if (oldval)
1225 		op |= 004;
1226 	if (newval)
1227 		op |= 002;
1228 	if (sysctl_perm(table, op))
1229 		return -EPERM;
1230 
1231 	if (table->strategy) {
1232 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
1233 				     newval, newlen);
1234 		if (rc < 0)
1235 			return rc;
1236 		if (rc > 0)
1237 			return 0;
1238 	}
1239 
1240 	/* If there is no strategy routine, or if the strategy returns
1241 	 * zero, proceed with automatic r/w */
1242 	if (table->data && table->maxlen) {
1243 		if (oldval && oldlenp) {
1244 			if (get_user(len, oldlenp))
1245 				return -EFAULT;
1246 			if (len) {
1247 				if (len > table->maxlen)
1248 					len = table->maxlen;
1249 				if(copy_to_user(oldval, table->data, len))
1250 					return -EFAULT;
1251 				if(put_user(len, oldlenp))
1252 					return -EFAULT;
1253 			}
1254 		}
1255 		if (newval && newlen) {
1256 			len = newlen;
1257 			if (len > table->maxlen)
1258 				len = table->maxlen;
1259 			if(copy_from_user(table->data, newval, len))
1260 				return -EFAULT;
1261 		}
1262 	}
1263 	return 0;
1264 }
1265 #endif /* CONFIG_SYSCTL_SYSCALL */
1266 
1267 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1268 {
1269 	for (; table->ctl_name || table->procname; table++) {
1270 		table->parent = parent;
1271 		if (table->child)
1272 			sysctl_set_parent(table, table->child);
1273 	}
1274 }
1275 
1276 static __init int sysctl_init(void)
1277 {
1278 	sysctl_set_parent(NULL, root_table);
1279 	return 0;
1280 }
1281 
1282 core_initcall(sysctl_init);
1283 
1284 /**
1285  * register_sysctl_table - register a sysctl hierarchy
1286  * @table: the top-level table structure
1287  *
1288  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1289  * array. An entry with a ctl_name of 0 terminates the table.
1290  *
1291  * The members of the &ctl_table structure are used as follows:
1292  *
1293  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1294  *            must be unique within that level of sysctl
1295  *
1296  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1297  *            enter a sysctl file
1298  *
1299  * data - a pointer to data for use by proc_handler
1300  *
1301  * maxlen - the maximum size in bytes of the data
1302  *
1303  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1304  *
1305  * child - a pointer to the child sysctl table if this entry is a directory, or
1306  *         %NULL.
1307  *
1308  * proc_handler - the text handler routine (described below)
1309  *
1310  * strategy - the strategy routine (described below)
1311  *
1312  * de - for internal use by the sysctl routines
1313  *
1314  * extra1, extra2 - extra pointers usable by the proc handler routines
1315  *
1316  * Leaf nodes in the sysctl tree will be represented by a single file
1317  * under /proc; non-leaf nodes will be represented by directories.
1318  *
1319  * sysctl(2) can automatically manage read and write requests through
1320  * the sysctl table.  The data and maxlen fields of the ctl_table
1321  * struct enable minimal validation of the values being written to be
1322  * performed, and the mode field allows minimal authentication.
1323  *
1324  * More sophisticated management can be enabled by the provision of a
1325  * strategy routine with the table entry.  This will be called before
1326  * any automatic read or write of the data is performed.
1327  *
1328  * The strategy routine may return
1329  *
1330  * < 0 - Error occurred (error is passed to user process)
1331  *
1332  * 0   - OK - proceed with automatic read or write.
1333  *
1334  * > 0 - OK - read or write has been done by the strategy routine, so
1335  *       return immediately.
1336  *
1337  * There must be a proc_handler routine for any terminal nodes
1338  * mirrored under /proc/sys (non-terminals are handled by a built-in
1339  * directory handler).  Several default handlers are available to
1340  * cover common cases -
1341  *
1342  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1343  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1344  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1345  *
1346  * It is the handler's job to read the input buffer from user memory
1347  * and process it. The handler should return 0 on success.
1348  *
1349  * This routine returns %NULL on a failure to register, and a pointer
1350  * to the table header on success.
1351  */
1352 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1353 {
1354 	struct ctl_table_header *tmp;
1355 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1356 	if (!tmp)
1357 		return NULL;
1358 	tmp->ctl_table = table;
1359 	INIT_LIST_HEAD(&tmp->ctl_entry);
1360 	tmp->used = 0;
1361 	tmp->unregistering = NULL;
1362 	sysctl_set_parent(NULL, table);
1363 	spin_lock(&sysctl_lock);
1364 	list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1365 	spin_unlock(&sysctl_lock);
1366 	return tmp;
1367 }
1368 
1369 /**
1370  * unregister_sysctl_table - unregister a sysctl table hierarchy
1371  * @header: the header returned from register_sysctl_table
1372  *
1373  * Unregisters the sysctl table and all children. proc entries may not
1374  * actually be removed until they are no longer used by anyone.
1375  */
1376 void unregister_sysctl_table(struct ctl_table_header * header)
1377 {
1378 	might_sleep();
1379 	spin_lock(&sysctl_lock);
1380 	start_unregistering(header);
1381 	spin_unlock(&sysctl_lock);
1382 	kfree(header);
1383 }
1384 
1385 #else /* !CONFIG_SYSCTL */
1386 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1387 {
1388 	return NULL;
1389 }
1390 
1391 void unregister_sysctl_table(struct ctl_table_header * table)
1392 {
1393 }
1394 
1395 #endif /* CONFIG_SYSCTL */
1396 
1397 /*
1398  * /proc/sys support
1399  */
1400 
1401 #ifdef CONFIG_PROC_SYSCTL
1402 
1403 static int _proc_do_string(void* data, int maxlen, int write,
1404 			   struct file *filp, void __user *buffer,
1405 			   size_t *lenp, loff_t *ppos)
1406 {
1407 	size_t len;
1408 	char __user *p;
1409 	char c;
1410 
1411 	if (!data || !maxlen || !*lenp) {
1412 		*lenp = 0;
1413 		return 0;
1414 	}
1415 
1416 	if (write) {
1417 		len = 0;
1418 		p = buffer;
1419 		while (len < *lenp) {
1420 			if (get_user(c, p++))
1421 				return -EFAULT;
1422 			if (c == 0 || c == '\n')
1423 				break;
1424 			len++;
1425 		}
1426 		if (len >= maxlen)
1427 			len = maxlen-1;
1428 		if(copy_from_user(data, buffer, len))
1429 			return -EFAULT;
1430 		((char *) data)[len] = 0;
1431 		*ppos += *lenp;
1432 	} else {
1433 		len = strlen(data);
1434 		if (len > maxlen)
1435 			len = maxlen;
1436 
1437 		if (*ppos > len) {
1438 			*lenp = 0;
1439 			return 0;
1440 		}
1441 
1442 		data += *ppos;
1443 		len  -= *ppos;
1444 
1445 		if (len > *lenp)
1446 			len = *lenp;
1447 		if (len)
1448 			if(copy_to_user(buffer, data, len))
1449 				return -EFAULT;
1450 		if (len < *lenp) {
1451 			if(put_user('\n', ((char __user *) buffer) + len))
1452 				return -EFAULT;
1453 			len++;
1454 		}
1455 		*lenp = len;
1456 		*ppos += len;
1457 	}
1458 	return 0;
1459 }
1460 
1461 /**
1462  * proc_dostring - read a string sysctl
1463  * @table: the sysctl table
1464  * @write: %TRUE if this is a write to the sysctl file
1465  * @filp: the file structure
1466  * @buffer: the user buffer
1467  * @lenp: the size of the user buffer
1468  * @ppos: file position
1469  *
1470  * Reads/writes a string from/to the user buffer. If the kernel
1471  * buffer provided is not large enough to hold the string, the
1472  * string is truncated. The copied string is %NULL-terminated.
1473  * If the string is being read by the user process, it is copied
1474  * and a newline '\n' is added. It is truncated if the buffer is
1475  * not large enough.
1476  *
1477  * Returns 0 on success.
1478  */
1479 int proc_dostring(ctl_table *table, int write, struct file *filp,
1480 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1481 {
1482 	return _proc_do_string(table->data, table->maxlen, write, filp,
1483 			       buffer, lenp, ppos);
1484 }
1485 
1486 
1487 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1488 				 int *valp,
1489 				 int write, void *data)
1490 {
1491 	if (write) {
1492 		*valp = *negp ? -*lvalp : *lvalp;
1493 	} else {
1494 		int val = *valp;
1495 		if (val < 0) {
1496 			*negp = -1;
1497 			*lvalp = (unsigned long)-val;
1498 		} else {
1499 			*negp = 0;
1500 			*lvalp = (unsigned long)val;
1501 		}
1502 	}
1503 	return 0;
1504 }
1505 
1506 static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
1507 		  int write, struct file *filp, void __user *buffer,
1508 		  size_t *lenp, loff_t *ppos,
1509 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1510 			      int write, void *data),
1511 		  void *data)
1512 {
1513 #define TMPBUFLEN 21
1514 	int *i, vleft, first=1, neg, val;
1515 	unsigned long lval;
1516 	size_t left, len;
1517 
1518 	char buf[TMPBUFLEN], *p;
1519 	char __user *s = buffer;
1520 
1521 	if (!tbl_data || !table->maxlen || !*lenp ||
1522 	    (*ppos && !write)) {
1523 		*lenp = 0;
1524 		return 0;
1525 	}
1526 
1527 	i = (int *) tbl_data;
1528 	vleft = table->maxlen / sizeof(*i);
1529 	left = *lenp;
1530 
1531 	if (!conv)
1532 		conv = do_proc_dointvec_conv;
1533 
1534 	for (; left && vleft--; i++, first=0) {
1535 		if (write) {
1536 			while (left) {
1537 				char c;
1538 				if (get_user(c, s))
1539 					return -EFAULT;
1540 				if (!isspace(c))
1541 					break;
1542 				left--;
1543 				s++;
1544 			}
1545 			if (!left)
1546 				break;
1547 			neg = 0;
1548 			len = left;
1549 			if (len > sizeof(buf) - 1)
1550 				len = sizeof(buf) - 1;
1551 			if (copy_from_user(buf, s, len))
1552 				return -EFAULT;
1553 			buf[len] = 0;
1554 			p = buf;
1555 			if (*p == '-' && left > 1) {
1556 				neg = 1;
1557 				p++;
1558 			}
1559 			if (*p < '0' || *p > '9')
1560 				break;
1561 
1562 			lval = simple_strtoul(p, &p, 0);
1563 
1564 			len = p-buf;
1565 			if ((len < left) && *p && !isspace(*p))
1566 				break;
1567 			if (neg)
1568 				val = -val;
1569 			s += len;
1570 			left -= len;
1571 
1572 			if (conv(&neg, &lval, i, 1, data))
1573 				break;
1574 		} else {
1575 			p = buf;
1576 			if (!first)
1577 				*p++ = '\t';
1578 
1579 			if (conv(&neg, &lval, i, 0, data))
1580 				break;
1581 
1582 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
1583 			len = strlen(buf);
1584 			if (len > left)
1585 				len = left;
1586 			if(copy_to_user(s, buf, len))
1587 				return -EFAULT;
1588 			left -= len;
1589 			s += len;
1590 		}
1591 	}
1592 
1593 	if (!write && !first && left) {
1594 		if(put_user('\n', s))
1595 			return -EFAULT;
1596 		left--, s++;
1597 	}
1598 	if (write) {
1599 		while (left) {
1600 			char c;
1601 			if (get_user(c, s++))
1602 				return -EFAULT;
1603 			if (!isspace(c))
1604 				break;
1605 			left--;
1606 		}
1607 	}
1608 	if (write && first)
1609 		return -EINVAL;
1610 	*lenp -= left;
1611 	*ppos += *lenp;
1612 	return 0;
1613 #undef TMPBUFLEN
1614 }
1615 
1616 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1617 		  void __user *buffer, size_t *lenp, loff_t *ppos,
1618 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1619 			      int write, void *data),
1620 		  void *data)
1621 {
1622 	return __do_proc_dointvec(table->data, table, write, filp,
1623 			buffer, lenp, ppos, conv, data);
1624 }
1625 
1626 /**
1627  * proc_dointvec - read a vector of integers
1628  * @table: the sysctl table
1629  * @write: %TRUE if this is a write to the sysctl file
1630  * @filp: the file structure
1631  * @buffer: the user buffer
1632  * @lenp: the size of the user buffer
1633  * @ppos: file position
1634  *
1635  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1636  * values from/to the user buffer, treated as an ASCII string.
1637  *
1638  * Returns 0 on success.
1639  */
1640 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1641 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1642 {
1643     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1644 		    	    NULL,NULL);
1645 }
1646 
1647 #define OP_SET	0
1648 #define OP_AND	1
1649 #define OP_OR	2
1650 
1651 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1652 				      int *valp,
1653 				      int write, void *data)
1654 {
1655 	int op = *(int *)data;
1656 	if (write) {
1657 		int val = *negp ? -*lvalp : *lvalp;
1658 		switch(op) {
1659 		case OP_SET:	*valp = val; break;
1660 		case OP_AND:	*valp &= val; break;
1661 		case OP_OR:	*valp |= val; break;
1662 		}
1663 	} else {
1664 		int val = *valp;
1665 		if (val < 0) {
1666 			*negp = -1;
1667 			*lvalp = (unsigned long)-val;
1668 		} else {
1669 			*negp = 0;
1670 			*lvalp = (unsigned long)val;
1671 		}
1672 	}
1673 	return 0;
1674 }
1675 
1676 /*
1677  *	init may raise the set.
1678  */
1679 
1680 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1681 			void __user *buffer, size_t *lenp, loff_t *ppos)
1682 {
1683 	int op;
1684 
1685 	if (write && !capable(CAP_SYS_MODULE)) {
1686 		return -EPERM;
1687 	}
1688 
1689 	op = is_init(current) ? OP_SET : OP_AND;
1690 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1691 				do_proc_dointvec_bset_conv,&op);
1692 }
1693 
1694 /*
1695  *	Taint values can only be increased
1696  */
1697 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
1698 			       void __user *buffer, size_t *lenp, loff_t *ppos)
1699 {
1700 	int op;
1701 
1702 	if (write && !capable(CAP_SYS_ADMIN))
1703 		return -EPERM;
1704 
1705 	op = OP_OR;
1706 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1707 				do_proc_dointvec_bset_conv,&op);
1708 }
1709 
1710 struct do_proc_dointvec_minmax_conv_param {
1711 	int *min;
1712 	int *max;
1713 };
1714 
1715 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1716 					int *valp,
1717 					int write, void *data)
1718 {
1719 	struct do_proc_dointvec_minmax_conv_param *param = data;
1720 	if (write) {
1721 		int val = *negp ? -*lvalp : *lvalp;
1722 		if ((param->min && *param->min > val) ||
1723 		    (param->max && *param->max < val))
1724 			return -EINVAL;
1725 		*valp = val;
1726 	} else {
1727 		int val = *valp;
1728 		if (val < 0) {
1729 			*negp = -1;
1730 			*lvalp = (unsigned long)-val;
1731 		} else {
1732 			*negp = 0;
1733 			*lvalp = (unsigned long)val;
1734 		}
1735 	}
1736 	return 0;
1737 }
1738 
1739 /**
1740  * proc_dointvec_minmax - read a vector of integers with min/max values
1741  * @table: the sysctl table
1742  * @write: %TRUE if this is a write to the sysctl file
1743  * @filp: the file structure
1744  * @buffer: the user buffer
1745  * @lenp: the size of the user buffer
1746  * @ppos: file position
1747  *
1748  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1749  * values from/to the user buffer, treated as an ASCII string.
1750  *
1751  * This routine will ensure the values are within the range specified by
1752  * table->extra1 (min) and table->extra2 (max).
1753  *
1754  * Returns 0 on success.
1755  */
1756 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1757 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1758 {
1759 	struct do_proc_dointvec_minmax_conv_param param = {
1760 		.min = (int *) table->extra1,
1761 		.max = (int *) table->extra2,
1762 	};
1763 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1764 				do_proc_dointvec_minmax_conv, &param);
1765 }
1766 
1767 static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
1768 				     struct file *filp,
1769 				     void __user *buffer,
1770 				     size_t *lenp, loff_t *ppos,
1771 				     unsigned long convmul,
1772 				     unsigned long convdiv)
1773 {
1774 #define TMPBUFLEN 21
1775 	unsigned long *i, *min, *max, val;
1776 	int vleft, first=1, neg;
1777 	size_t len, left;
1778 	char buf[TMPBUFLEN], *p;
1779 	char __user *s = buffer;
1780 
1781 	if (!data || !table->maxlen || !*lenp ||
1782 	    (*ppos && !write)) {
1783 		*lenp = 0;
1784 		return 0;
1785 	}
1786 
1787 	i = (unsigned long *) data;
1788 	min = (unsigned long *) table->extra1;
1789 	max = (unsigned long *) table->extra2;
1790 	vleft = table->maxlen / sizeof(unsigned long);
1791 	left = *lenp;
1792 
1793 	for (; left && vleft--; i++, min++, max++, first=0) {
1794 		if (write) {
1795 			while (left) {
1796 				char c;
1797 				if (get_user(c, s))
1798 					return -EFAULT;
1799 				if (!isspace(c))
1800 					break;
1801 				left--;
1802 				s++;
1803 			}
1804 			if (!left)
1805 				break;
1806 			neg = 0;
1807 			len = left;
1808 			if (len > TMPBUFLEN-1)
1809 				len = TMPBUFLEN-1;
1810 			if (copy_from_user(buf, s, len))
1811 				return -EFAULT;
1812 			buf[len] = 0;
1813 			p = buf;
1814 			if (*p == '-' && left > 1) {
1815 				neg = 1;
1816 				p++;
1817 			}
1818 			if (*p < '0' || *p > '9')
1819 				break;
1820 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
1821 			len = p-buf;
1822 			if ((len < left) && *p && !isspace(*p))
1823 				break;
1824 			if (neg)
1825 				val = -val;
1826 			s += len;
1827 			left -= len;
1828 
1829 			if(neg)
1830 				continue;
1831 			if ((min && val < *min) || (max && val > *max))
1832 				continue;
1833 			*i = val;
1834 		} else {
1835 			p = buf;
1836 			if (!first)
1837 				*p++ = '\t';
1838 			sprintf(p, "%lu", convdiv * (*i) / convmul);
1839 			len = strlen(buf);
1840 			if (len > left)
1841 				len = left;
1842 			if(copy_to_user(s, buf, len))
1843 				return -EFAULT;
1844 			left -= len;
1845 			s += len;
1846 		}
1847 	}
1848 
1849 	if (!write && !first && left) {
1850 		if(put_user('\n', s))
1851 			return -EFAULT;
1852 		left--, s++;
1853 	}
1854 	if (write) {
1855 		while (left) {
1856 			char c;
1857 			if (get_user(c, s++))
1858 				return -EFAULT;
1859 			if (!isspace(c))
1860 				break;
1861 			left--;
1862 		}
1863 	}
1864 	if (write && first)
1865 		return -EINVAL;
1866 	*lenp -= left;
1867 	*ppos += *lenp;
1868 	return 0;
1869 #undef TMPBUFLEN
1870 }
1871 
1872 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
1873 				     struct file *filp,
1874 				     void __user *buffer,
1875 				     size_t *lenp, loff_t *ppos,
1876 				     unsigned long convmul,
1877 				     unsigned long convdiv)
1878 {
1879 	return __do_proc_doulongvec_minmax(table->data, table, write,
1880 			filp, buffer, lenp, ppos, convmul, convdiv);
1881 }
1882 
1883 /**
1884  * proc_doulongvec_minmax - read a vector of long integers with min/max values
1885  * @table: the sysctl table
1886  * @write: %TRUE if this is a write to the sysctl file
1887  * @filp: the file structure
1888  * @buffer: the user buffer
1889  * @lenp: the size of the user buffer
1890  * @ppos: file position
1891  *
1892  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1893  * values from/to the user buffer, treated as an ASCII string.
1894  *
1895  * This routine will ensure the values are within the range specified by
1896  * table->extra1 (min) and table->extra2 (max).
1897  *
1898  * Returns 0 on success.
1899  */
1900 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
1901 			   void __user *buffer, size_t *lenp, loff_t *ppos)
1902 {
1903     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
1904 }
1905 
1906 /**
1907  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
1908  * @table: the sysctl table
1909  * @write: %TRUE if this is a write to the sysctl file
1910  * @filp: the file structure
1911  * @buffer: the user buffer
1912  * @lenp: the size of the user buffer
1913  * @ppos: file position
1914  *
1915  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
1916  * values from/to the user buffer, treated as an ASCII string. The values
1917  * are treated as milliseconds, and converted to jiffies when they are stored.
1918  *
1919  * This routine will ensure the values are within the range specified by
1920  * table->extra1 (min) and table->extra2 (max).
1921  *
1922  * Returns 0 on success.
1923  */
1924 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
1925 				      struct file *filp,
1926 				      void __user *buffer,
1927 				      size_t *lenp, loff_t *ppos)
1928 {
1929     return do_proc_doulongvec_minmax(table, write, filp, buffer,
1930 				     lenp, ppos, HZ, 1000l);
1931 }
1932 
1933 
1934 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
1935 					 int *valp,
1936 					 int write, void *data)
1937 {
1938 	if (write) {
1939 		if (*lvalp > LONG_MAX / HZ)
1940 			return 1;
1941 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
1942 	} else {
1943 		int val = *valp;
1944 		unsigned long lval;
1945 		if (val < 0) {
1946 			*negp = -1;
1947 			lval = (unsigned long)-val;
1948 		} else {
1949 			*negp = 0;
1950 			lval = (unsigned long)val;
1951 		}
1952 		*lvalp = lval / HZ;
1953 	}
1954 	return 0;
1955 }
1956 
1957 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
1958 						int *valp,
1959 						int write, void *data)
1960 {
1961 	if (write) {
1962 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
1963 			return 1;
1964 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
1965 	} else {
1966 		int val = *valp;
1967 		unsigned long lval;
1968 		if (val < 0) {
1969 			*negp = -1;
1970 			lval = (unsigned long)-val;
1971 		} else {
1972 			*negp = 0;
1973 			lval = (unsigned long)val;
1974 		}
1975 		*lvalp = jiffies_to_clock_t(lval);
1976 	}
1977 	return 0;
1978 }
1979 
1980 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
1981 					    int *valp,
1982 					    int write, void *data)
1983 {
1984 	if (write) {
1985 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
1986 	} else {
1987 		int val = *valp;
1988 		unsigned long lval;
1989 		if (val < 0) {
1990 			*negp = -1;
1991 			lval = (unsigned long)-val;
1992 		} else {
1993 			*negp = 0;
1994 			lval = (unsigned long)val;
1995 		}
1996 		*lvalp = jiffies_to_msecs(lval);
1997 	}
1998 	return 0;
1999 }
2000 
2001 /**
2002  * proc_dointvec_jiffies - read a vector of integers as seconds
2003  * @table: the sysctl table
2004  * @write: %TRUE if this is a write to the sysctl file
2005  * @filp: the file structure
2006  * @buffer: the user buffer
2007  * @lenp: the size of the user buffer
2008  * @ppos: file position
2009  *
2010  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2011  * values from/to the user buffer, treated as an ASCII string.
2012  * The values read are assumed to be in seconds, and are converted into
2013  * jiffies.
2014  *
2015  * Returns 0 on success.
2016  */
2017 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2018 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2019 {
2020     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2021 		    	    do_proc_dointvec_jiffies_conv,NULL);
2022 }
2023 
2024 /**
2025  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2026  * @table: the sysctl table
2027  * @write: %TRUE if this is a write to the sysctl file
2028  * @filp: the file structure
2029  * @buffer: the user buffer
2030  * @lenp: the size of the user buffer
2031  * @ppos: pointer to the file position
2032  *
2033  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2034  * values from/to the user buffer, treated as an ASCII string.
2035  * The values read are assumed to be in 1/USER_HZ seconds, and
2036  * are converted into jiffies.
2037  *
2038  * Returns 0 on success.
2039  */
2040 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2041 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2042 {
2043     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2044 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2045 }
2046 
2047 /**
2048  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2049  * @table: the sysctl table
2050  * @write: %TRUE if this is a write to the sysctl file
2051  * @filp: the file structure
2052  * @buffer: the user buffer
2053  * @lenp: the size of the user buffer
2054  * @ppos: file position
2055  * @ppos: the current position in the file
2056  *
2057  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2058  * values from/to the user buffer, treated as an ASCII string.
2059  * The values read are assumed to be in 1/1000 seconds, and
2060  * are converted into jiffies.
2061  *
2062  * Returns 0 on success.
2063  */
2064 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2065 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2066 {
2067 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2068 				do_proc_dointvec_ms_jiffies_conv, NULL);
2069 }
2070 
2071 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
2072 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2073 {
2074 	struct pid *new_pid;
2075 	pid_t tmp;
2076 	int r;
2077 
2078 	tmp = pid_nr(cad_pid);
2079 
2080 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2081 			       lenp, ppos, NULL, NULL);
2082 	if (r || !write)
2083 		return r;
2084 
2085 	new_pid = find_get_pid(tmp);
2086 	if (!new_pid)
2087 		return -ESRCH;
2088 
2089 	put_pid(xchg(&cad_pid, new_pid));
2090 	return 0;
2091 }
2092 
2093 #else /* CONFIG_PROC_FS */
2094 
2095 int proc_dostring(ctl_table *table, int write, struct file *filp,
2096 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2097 {
2098 	return -ENOSYS;
2099 }
2100 
2101 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2102 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2103 {
2104 	return -ENOSYS;
2105 }
2106 
2107 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2108 			void __user *buffer, size_t *lenp, loff_t *ppos)
2109 {
2110 	return -ENOSYS;
2111 }
2112 
2113 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2114 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2115 {
2116 	return -ENOSYS;
2117 }
2118 
2119 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2120 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2121 {
2122 	return -ENOSYS;
2123 }
2124 
2125 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2126 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2127 {
2128 	return -ENOSYS;
2129 }
2130 
2131 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2132 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2133 {
2134 	return -ENOSYS;
2135 }
2136 
2137 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2138 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2139 {
2140 	return -ENOSYS;
2141 }
2142 
2143 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2144 				      struct file *filp,
2145 				      void __user *buffer,
2146 				      size_t *lenp, loff_t *ppos)
2147 {
2148     return -ENOSYS;
2149 }
2150 
2151 
2152 #endif /* CONFIG_PROC_FS */
2153 
2154 
2155 #ifdef CONFIG_SYSCTL_SYSCALL
2156 /*
2157  * General sysctl support routines
2158  */
2159 
2160 /* The generic string strategy routine: */
2161 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2162 		  void __user *oldval, size_t __user *oldlenp,
2163 		  void __user *newval, size_t newlen)
2164 {
2165 	if (!table->data || !table->maxlen)
2166 		return -ENOTDIR;
2167 
2168 	if (oldval && oldlenp) {
2169 		size_t bufsize;
2170 		if (get_user(bufsize, oldlenp))
2171 			return -EFAULT;
2172 		if (bufsize) {
2173 			size_t len = strlen(table->data), copied;
2174 
2175 			/* This shouldn't trigger for a well-formed sysctl */
2176 			if (len > table->maxlen)
2177 				len = table->maxlen;
2178 
2179 			/* Copy up to a max of bufsize-1 bytes of the string */
2180 			copied = (len >= bufsize) ? bufsize - 1 : len;
2181 
2182 			if (copy_to_user(oldval, table->data, copied) ||
2183 			    put_user(0, (char __user *)(oldval + copied)))
2184 				return -EFAULT;
2185 			if (put_user(len, oldlenp))
2186 				return -EFAULT;
2187 		}
2188 	}
2189 	if (newval && newlen) {
2190 		size_t len = newlen;
2191 		if (len > table->maxlen)
2192 			len = table->maxlen;
2193 		if(copy_from_user(table->data, newval, len))
2194 			return -EFAULT;
2195 		if (len == table->maxlen)
2196 			len--;
2197 		((char *) table->data)[len] = 0;
2198 	}
2199 	return 1;
2200 }
2201 
2202 /*
2203  * This function makes sure that all of the integers in the vector
2204  * are between the minimum and maximum values given in the arrays
2205  * table->extra1 and table->extra2, respectively.
2206  */
2207 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2208 		void __user *oldval, size_t __user *oldlenp,
2209 		void __user *newval, size_t newlen)
2210 {
2211 
2212 	if (newval && newlen) {
2213 		int __user *vec = (int __user *) newval;
2214 		int *min = (int *) table->extra1;
2215 		int *max = (int *) table->extra2;
2216 		size_t length;
2217 		int i;
2218 
2219 		if (newlen % sizeof(int) != 0)
2220 			return -EINVAL;
2221 
2222 		if (!table->extra1 && !table->extra2)
2223 			return 0;
2224 
2225 		if (newlen > table->maxlen)
2226 			newlen = table->maxlen;
2227 		length = newlen / sizeof(int);
2228 
2229 		for (i = 0; i < length; i++) {
2230 			int value;
2231 			if (get_user(value, vec + i))
2232 				return -EFAULT;
2233 			if (min && value < min[i])
2234 				return -EINVAL;
2235 			if (max && value > max[i])
2236 				return -EINVAL;
2237 		}
2238 	}
2239 	return 0;
2240 }
2241 
2242 /* Strategy function to convert jiffies to seconds */
2243 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2244 		void __user *oldval, size_t __user *oldlenp,
2245 		void __user *newval, size_t newlen)
2246 {
2247 	if (oldval && oldlenp) {
2248 		size_t olen;
2249 
2250 		if (get_user(olen, oldlenp))
2251 			return -EFAULT;
2252 		if (olen) {
2253 			int val;
2254 
2255 			if (olen < sizeof(int))
2256 				return -EINVAL;
2257 
2258 			val = *(int *)(table->data) / HZ;
2259 			if (put_user(val, (int __user *)oldval))
2260 				return -EFAULT;
2261 			if (put_user(sizeof(int), oldlenp))
2262 				return -EFAULT;
2263 		}
2264 	}
2265 	if (newval && newlen) {
2266 		int new;
2267 		if (newlen != sizeof(int))
2268 			return -EINVAL;
2269 		if (get_user(new, (int __user *)newval))
2270 			return -EFAULT;
2271 		*(int *)(table->data) = new*HZ;
2272 	}
2273 	return 1;
2274 }
2275 
2276 /* Strategy function to convert jiffies to seconds */
2277 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2278 		void __user *oldval, size_t __user *oldlenp,
2279 		void __user *newval, size_t newlen)
2280 {
2281 	if (oldval && oldlenp) {
2282 		size_t olen;
2283 
2284 		if (get_user(olen, oldlenp))
2285 			return -EFAULT;
2286 		if (olen) {
2287 			int val;
2288 
2289 			if (olen < sizeof(int))
2290 				return -EINVAL;
2291 
2292 			val = jiffies_to_msecs(*(int *)(table->data));
2293 			if (put_user(val, (int __user *)oldval))
2294 				return -EFAULT;
2295 			if (put_user(sizeof(int), oldlenp))
2296 				return -EFAULT;
2297 		}
2298 	}
2299 	if (newval && newlen) {
2300 		int new;
2301 		if (newlen != sizeof(int))
2302 			return -EINVAL;
2303 		if (get_user(new, (int __user *)newval))
2304 			return -EFAULT;
2305 		*(int *)(table->data) = msecs_to_jiffies(new);
2306 	}
2307 	return 1;
2308 }
2309 
2310 
2311 
2312 #else /* CONFIG_SYSCTL_SYSCALL */
2313 
2314 
2315 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2316 {
2317 	static int msg_count;
2318 	struct __sysctl_args tmp;
2319 	int name[CTL_MAXNAME];
2320 	int i;
2321 
2322 	/* Read in the sysctl name for better debug message logging */
2323 	if (copy_from_user(&tmp, args, sizeof(tmp)))
2324 		return -EFAULT;
2325 	if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
2326 		return -ENOTDIR;
2327 	for (i = 0; i < tmp.nlen; i++)
2328 		if (get_user(name[i], tmp.name + i))
2329 			return -EFAULT;
2330 
2331 	/* Ignore accesses to kernel.version */
2332 	if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2333 		goto out;
2334 
2335 	if (msg_count < 5) {
2336 		msg_count++;
2337 		printk(KERN_INFO
2338 			"warning: process `%s' used the removed sysctl "
2339 			"system call with ", current->comm);
2340 		for (i = 0; i < tmp.nlen; i++)
2341 			printk("%d.", name[i]);
2342 		printk("\n");
2343 	}
2344 out:
2345 	return -ENOSYS;
2346 }
2347 
2348 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2349 		  void __user *oldval, size_t __user *oldlenp,
2350 		  void __user *newval, size_t newlen)
2351 {
2352 	return -ENOSYS;
2353 }
2354 
2355 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2356 		void __user *oldval, size_t __user *oldlenp,
2357 		void __user *newval, size_t newlen)
2358 {
2359 	return -ENOSYS;
2360 }
2361 
2362 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2363 		void __user *oldval, size_t __user *oldlenp,
2364 		void __user *newval, size_t newlen)
2365 {
2366 	return -ENOSYS;
2367 }
2368 
2369 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2370 		void __user *oldval, size_t __user *oldlenp,
2371 		void __user *newval, size_t newlen)
2372 {
2373 	return -ENOSYS;
2374 }
2375 
2376 #endif /* CONFIG_SYSCTL_SYSCALL */
2377 
2378 /*
2379  * No sense putting this after each symbol definition, twice,
2380  * exception granted :-)
2381  */
2382 EXPORT_SYMBOL(proc_dointvec);
2383 EXPORT_SYMBOL(proc_dointvec_jiffies);
2384 EXPORT_SYMBOL(proc_dointvec_minmax);
2385 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2386 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2387 EXPORT_SYMBOL(proc_dostring);
2388 EXPORT_SYMBOL(proc_doulongvec_minmax);
2389 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2390 EXPORT_SYMBOL(register_sysctl_table);
2391 EXPORT_SYMBOL(sysctl_intvec);
2392 EXPORT_SYMBOL(sysctl_jiffies);
2393 EXPORT_SYMBOL(sysctl_ms_jiffies);
2394 EXPORT_SYMBOL(sysctl_string);
2395 EXPORT_SYMBOL(unregister_sysctl_table);
2396