xref: /linux/kernel/sysctl.c (revision d8327c784b51b57dac2c26cfad87dce0d68dfd98)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/config.h>
22 #include <linux/module.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/proc_fs.h>
28 #include <linux/capability.h>
29 #include <linux/ctype.h>
30 #include <linux/utsname.h>
31 #include <linux/capability.h>
32 #include <linux/smp_lock.h>
33 #include <linux/init.h>
34 #include <linux/kernel.h>
35 #include <linux/kobject.h>
36 #include <linux/net.h>
37 #include <linux/sysrq.h>
38 #include <linux/highuid.h>
39 #include <linux/writeback.h>
40 #include <linux/hugetlb.h>
41 #include <linux/security.h>
42 #include <linux/initrd.h>
43 #include <linux/times.h>
44 #include <linux/limits.h>
45 #include <linux/dcache.h>
46 #include <linux/syscalls.h>
47 #include <linux/nfs_fs.h>
48 #include <linux/acpi.h>
49 
50 #include <asm/uaccess.h>
51 #include <asm/processor.h>
52 
53 extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
54                      void __user *buffer, size_t *lenp, loff_t *ppos);
55 
56 #if defined(CONFIG_SYSCTL)
57 
58 /* External variables not in a header file. */
59 extern int C_A_D;
60 extern int sysctl_overcommit_memory;
61 extern int sysctl_overcommit_ratio;
62 extern int max_threads;
63 extern int sysrq_enabled;
64 extern int core_uses_pid;
65 extern int suid_dumpable;
66 extern char core_pattern[];
67 extern int cad_pid;
68 extern int pid_max;
69 extern int min_free_kbytes;
70 extern int printk_ratelimit_jiffies;
71 extern int printk_ratelimit_burst;
72 extern int pid_max_min, pid_max_max;
73 extern int sysctl_drop_caches;
74 extern int percpu_pagelist_fraction;
75 
76 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
77 int unknown_nmi_panic;
78 extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
79 				  void __user *, size_t *, loff_t *);
80 #endif
81 
82 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
83 static int maxolduid = 65535;
84 static int minolduid;
85 static int min_percpu_pagelist_fract = 8;
86 
87 static int ngroups_max = NGROUPS_MAX;
88 
89 #ifdef CONFIG_KMOD
90 extern char modprobe_path[];
91 #endif
92 #ifdef CONFIG_CHR_DEV_SG
93 extern int sg_big_buff;
94 #endif
95 #ifdef CONFIG_SYSVIPC
96 extern size_t shm_ctlmax;
97 extern size_t shm_ctlall;
98 extern int shm_ctlmni;
99 extern int msg_ctlmax;
100 extern int msg_ctlmnb;
101 extern int msg_ctlmni;
102 extern int sem_ctls[];
103 #endif
104 
105 #ifdef __sparc__
106 extern char reboot_command [];
107 extern int stop_a_enabled;
108 extern int scons_pwroff;
109 #endif
110 
111 #ifdef __hppa__
112 extern int pwrsw_enabled;
113 extern int unaligned_enabled;
114 #endif
115 
116 #ifdef CONFIG_S390
117 #ifdef CONFIG_MATHEMU
118 extern int sysctl_ieee_emulation_warnings;
119 #endif
120 extern int sysctl_userprocess_debug;
121 extern int spin_retry;
122 #endif
123 
124 extern int sysctl_hz_timer;
125 
126 #ifdef CONFIG_BSD_PROCESS_ACCT
127 extern int acct_parm[];
128 #endif
129 
130 #ifdef CONFIG_IA64
131 extern int no_unaligned_warning;
132 #endif
133 
134 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
135 		       ctl_table *, void **);
136 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
137 		  void __user *buffer, size_t *lenp, loff_t *ppos);
138 
139 static ctl_table root_table[];
140 static struct ctl_table_header root_table_header =
141 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
142 
143 static ctl_table kern_table[];
144 static ctl_table vm_table[];
145 static ctl_table proc_table[];
146 static ctl_table fs_table[];
147 static ctl_table debug_table[];
148 static ctl_table dev_table[];
149 extern ctl_table random_table[];
150 #ifdef CONFIG_UNIX98_PTYS
151 extern ctl_table pty_table[];
152 #endif
153 #ifdef CONFIG_INOTIFY
154 extern ctl_table inotify_table[];
155 #endif
156 
157 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
158 int sysctl_legacy_va_layout;
159 #endif
160 
161 /* /proc declarations: */
162 
163 #ifdef CONFIG_PROC_FS
164 
165 static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *);
166 static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *);
167 static int proc_opensys(struct inode *, struct file *);
168 
169 struct file_operations proc_sys_file_operations = {
170 	.open		= proc_opensys,
171 	.read		= proc_readsys,
172 	.write		= proc_writesys,
173 };
174 
175 extern struct proc_dir_entry *proc_sys_root;
176 
177 static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
178 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
179 #endif
180 
181 /* The default sysctl tables: */
182 
183 static ctl_table root_table[] = {
184 	{
185 		.ctl_name	= CTL_KERN,
186 		.procname	= "kernel",
187 		.mode		= 0555,
188 		.child		= kern_table,
189 	},
190 	{
191 		.ctl_name	= CTL_VM,
192 		.procname	= "vm",
193 		.mode		= 0555,
194 		.child		= vm_table,
195 	},
196 #ifdef CONFIG_NET
197 	{
198 		.ctl_name	= CTL_NET,
199 		.procname	= "net",
200 		.mode		= 0555,
201 		.child		= net_table,
202 	},
203 #endif
204 	{
205 		.ctl_name	= CTL_PROC,
206 		.procname	= "proc",
207 		.mode		= 0555,
208 		.child		= proc_table,
209 	},
210 	{
211 		.ctl_name	= CTL_FS,
212 		.procname	= "fs",
213 		.mode		= 0555,
214 		.child		= fs_table,
215 	},
216 	{
217 		.ctl_name	= CTL_DEBUG,
218 		.procname	= "debug",
219 		.mode		= 0555,
220 		.child		= debug_table,
221 	},
222 	{
223 		.ctl_name	= CTL_DEV,
224 		.procname	= "dev",
225 		.mode		= 0555,
226 		.child		= dev_table,
227 	},
228 
229 	{ .ctl_name = 0 }
230 };
231 
232 static ctl_table kern_table[] = {
233 	{
234 		.ctl_name	= KERN_OSTYPE,
235 		.procname	= "ostype",
236 		.data		= system_utsname.sysname,
237 		.maxlen		= sizeof(system_utsname.sysname),
238 		.mode		= 0444,
239 		.proc_handler	= &proc_doutsstring,
240 		.strategy	= &sysctl_string,
241 	},
242 	{
243 		.ctl_name	= KERN_OSRELEASE,
244 		.procname	= "osrelease",
245 		.data		= system_utsname.release,
246 		.maxlen		= sizeof(system_utsname.release),
247 		.mode		= 0444,
248 		.proc_handler	= &proc_doutsstring,
249 		.strategy	= &sysctl_string,
250 	},
251 	{
252 		.ctl_name	= KERN_VERSION,
253 		.procname	= "version",
254 		.data		= system_utsname.version,
255 		.maxlen		= sizeof(system_utsname.version),
256 		.mode		= 0444,
257 		.proc_handler	= &proc_doutsstring,
258 		.strategy	= &sysctl_string,
259 	},
260 	{
261 		.ctl_name	= KERN_NODENAME,
262 		.procname	= "hostname",
263 		.data		= system_utsname.nodename,
264 		.maxlen		= sizeof(system_utsname.nodename),
265 		.mode		= 0644,
266 		.proc_handler	= &proc_doutsstring,
267 		.strategy	= &sysctl_string,
268 	},
269 	{
270 		.ctl_name	= KERN_DOMAINNAME,
271 		.procname	= "domainname",
272 		.data		= system_utsname.domainname,
273 		.maxlen		= sizeof(system_utsname.domainname),
274 		.mode		= 0644,
275 		.proc_handler	= &proc_doutsstring,
276 		.strategy	= &sysctl_string,
277 	},
278 	{
279 		.ctl_name	= KERN_PANIC,
280 		.procname	= "panic",
281 		.data		= &panic_timeout,
282 		.maxlen		= sizeof(int),
283 		.mode		= 0644,
284 		.proc_handler	= &proc_dointvec,
285 	},
286 	{
287 		.ctl_name	= KERN_CORE_USES_PID,
288 		.procname	= "core_uses_pid",
289 		.data		= &core_uses_pid,
290 		.maxlen		= sizeof(int),
291 		.mode		= 0644,
292 		.proc_handler	= &proc_dointvec,
293 	},
294 	{
295 		.ctl_name	= KERN_CORE_PATTERN,
296 		.procname	= "core_pattern",
297 		.data		= core_pattern,
298 		.maxlen		= 64,
299 		.mode		= 0644,
300 		.proc_handler	= &proc_dostring,
301 		.strategy	= &sysctl_string,
302 	},
303 	{
304 		.ctl_name	= KERN_TAINTED,
305 		.procname	= "tainted",
306 		.data		= &tainted,
307 		.maxlen		= sizeof(int),
308 		.mode		= 0444,
309 		.proc_handler	= &proc_dointvec,
310 	},
311 	{
312 		.ctl_name	= KERN_CAP_BSET,
313 		.procname	= "cap-bound",
314 		.data		= &cap_bset,
315 		.maxlen		= sizeof(kernel_cap_t),
316 		.mode		= 0600,
317 		.proc_handler	= &proc_dointvec_bset,
318 	},
319 #ifdef CONFIG_BLK_DEV_INITRD
320 	{
321 		.ctl_name	= KERN_REALROOTDEV,
322 		.procname	= "real-root-dev",
323 		.data		= &real_root_dev,
324 		.maxlen		= sizeof(int),
325 		.mode		= 0644,
326 		.proc_handler	= &proc_dointvec,
327 	},
328 #endif
329 #ifdef __sparc__
330 	{
331 		.ctl_name	= KERN_SPARC_REBOOT,
332 		.procname	= "reboot-cmd",
333 		.data		= reboot_command,
334 		.maxlen		= 256,
335 		.mode		= 0644,
336 		.proc_handler	= &proc_dostring,
337 		.strategy	= &sysctl_string,
338 	},
339 	{
340 		.ctl_name	= KERN_SPARC_STOP_A,
341 		.procname	= "stop-a",
342 		.data		= &stop_a_enabled,
343 		.maxlen		= sizeof (int),
344 		.mode		= 0644,
345 		.proc_handler	= &proc_dointvec,
346 	},
347 	{
348 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
349 		.procname	= "scons-poweroff",
350 		.data		= &scons_pwroff,
351 		.maxlen		= sizeof (int),
352 		.mode		= 0644,
353 		.proc_handler	= &proc_dointvec,
354 	},
355 #endif
356 #ifdef __hppa__
357 	{
358 		.ctl_name	= KERN_HPPA_PWRSW,
359 		.procname	= "soft-power",
360 		.data		= &pwrsw_enabled,
361 		.maxlen		= sizeof (int),
362 	 	.mode		= 0644,
363 		.proc_handler	= &proc_dointvec,
364 	},
365 	{
366 		.ctl_name	= KERN_HPPA_UNALIGNED,
367 		.procname	= "unaligned-trap",
368 		.data		= &unaligned_enabled,
369 		.maxlen		= sizeof (int),
370 		.mode		= 0644,
371 		.proc_handler	= &proc_dointvec,
372 	},
373 #endif
374 	{
375 		.ctl_name	= KERN_CTLALTDEL,
376 		.procname	= "ctrl-alt-del",
377 		.data		= &C_A_D,
378 		.maxlen		= sizeof(int),
379 		.mode		= 0644,
380 		.proc_handler	= &proc_dointvec,
381 	},
382 	{
383 		.ctl_name	= KERN_PRINTK,
384 		.procname	= "printk",
385 		.data		= &console_loglevel,
386 		.maxlen		= 4*sizeof(int),
387 		.mode		= 0644,
388 		.proc_handler	= &proc_dointvec,
389 	},
390 #ifdef CONFIG_KMOD
391 	{
392 		.ctl_name	= KERN_MODPROBE,
393 		.procname	= "modprobe",
394 		.data		= &modprobe_path,
395 		.maxlen		= KMOD_PATH_LEN,
396 		.mode		= 0644,
397 		.proc_handler	= &proc_dostring,
398 		.strategy	= &sysctl_string,
399 	},
400 #endif
401 #ifdef CONFIG_HOTPLUG
402 	{
403 		.ctl_name	= KERN_HOTPLUG,
404 		.procname	= "hotplug",
405 		.data		= &uevent_helper,
406 		.maxlen		= UEVENT_HELPER_PATH_LEN,
407 		.mode		= 0644,
408 		.proc_handler	= &proc_dostring,
409 		.strategy	= &sysctl_string,
410 	},
411 #endif
412 #ifdef CONFIG_CHR_DEV_SG
413 	{
414 		.ctl_name	= KERN_SG_BIG_BUFF,
415 		.procname	= "sg-big-buff",
416 		.data		= &sg_big_buff,
417 		.maxlen		= sizeof (int),
418 		.mode		= 0444,
419 		.proc_handler	= &proc_dointvec,
420 	},
421 #endif
422 #ifdef CONFIG_BSD_PROCESS_ACCT
423 	{
424 		.ctl_name	= KERN_ACCT,
425 		.procname	= "acct",
426 		.data		= &acct_parm,
427 		.maxlen		= 3*sizeof(int),
428 		.mode		= 0644,
429 		.proc_handler	= &proc_dointvec,
430 	},
431 #endif
432 #ifdef CONFIG_SYSVIPC
433 	{
434 		.ctl_name	= KERN_SHMMAX,
435 		.procname	= "shmmax",
436 		.data		= &shm_ctlmax,
437 		.maxlen		= sizeof (size_t),
438 		.mode		= 0644,
439 		.proc_handler	= &proc_doulongvec_minmax,
440 	},
441 	{
442 		.ctl_name	= KERN_SHMALL,
443 		.procname	= "shmall",
444 		.data		= &shm_ctlall,
445 		.maxlen		= sizeof (size_t),
446 		.mode		= 0644,
447 		.proc_handler	= &proc_doulongvec_minmax,
448 	},
449 	{
450 		.ctl_name	= KERN_SHMMNI,
451 		.procname	= "shmmni",
452 		.data		= &shm_ctlmni,
453 		.maxlen		= sizeof (int),
454 		.mode		= 0644,
455 		.proc_handler	= &proc_dointvec,
456 	},
457 	{
458 		.ctl_name	= KERN_MSGMAX,
459 		.procname	= "msgmax",
460 		.data		= &msg_ctlmax,
461 		.maxlen		= sizeof (int),
462 		.mode		= 0644,
463 		.proc_handler	= &proc_dointvec,
464 	},
465 	{
466 		.ctl_name	= KERN_MSGMNI,
467 		.procname	= "msgmni",
468 		.data		= &msg_ctlmni,
469 		.maxlen		= sizeof (int),
470 		.mode		= 0644,
471 		.proc_handler	= &proc_dointvec,
472 	},
473 	{
474 		.ctl_name	= KERN_MSGMNB,
475 		.procname	=  "msgmnb",
476 		.data		= &msg_ctlmnb,
477 		.maxlen		= sizeof (int),
478 		.mode		= 0644,
479 		.proc_handler	= &proc_dointvec,
480 	},
481 	{
482 		.ctl_name	= KERN_SEM,
483 		.procname	= "sem",
484 		.data		= &sem_ctls,
485 		.maxlen		= 4*sizeof (int),
486 		.mode		= 0644,
487 		.proc_handler	= &proc_dointvec,
488 	},
489 #endif
490 #ifdef CONFIG_MAGIC_SYSRQ
491 	{
492 		.ctl_name	= KERN_SYSRQ,
493 		.procname	= "sysrq",
494 		.data		= &sysrq_enabled,
495 		.maxlen		= sizeof (int),
496 		.mode		= 0644,
497 		.proc_handler	= &proc_dointvec,
498 	},
499 #endif
500 	{
501 		.ctl_name	= KERN_CADPID,
502 		.procname	= "cad_pid",
503 		.data		= &cad_pid,
504 		.maxlen		= sizeof (int),
505 		.mode		= 0600,
506 		.proc_handler	= &proc_dointvec,
507 	},
508 	{
509 		.ctl_name	= KERN_MAX_THREADS,
510 		.procname	= "threads-max",
511 		.data		= &max_threads,
512 		.maxlen		= sizeof(int),
513 		.mode		= 0644,
514 		.proc_handler	= &proc_dointvec,
515 	},
516 	{
517 		.ctl_name	= KERN_RANDOM,
518 		.procname	= "random",
519 		.mode		= 0555,
520 		.child		= random_table,
521 	},
522 #ifdef CONFIG_UNIX98_PTYS
523 	{
524 		.ctl_name	= KERN_PTY,
525 		.procname	= "pty",
526 		.mode		= 0555,
527 		.child		= pty_table,
528 	},
529 #endif
530 	{
531 		.ctl_name	= KERN_OVERFLOWUID,
532 		.procname	= "overflowuid",
533 		.data		= &overflowuid,
534 		.maxlen		= sizeof(int),
535 		.mode		= 0644,
536 		.proc_handler	= &proc_dointvec_minmax,
537 		.strategy	= &sysctl_intvec,
538 		.extra1		= &minolduid,
539 		.extra2		= &maxolduid,
540 	},
541 	{
542 		.ctl_name	= KERN_OVERFLOWGID,
543 		.procname	= "overflowgid",
544 		.data		= &overflowgid,
545 		.maxlen		= sizeof(int),
546 		.mode		= 0644,
547 		.proc_handler	= &proc_dointvec_minmax,
548 		.strategy	= &sysctl_intvec,
549 		.extra1		= &minolduid,
550 		.extra2		= &maxolduid,
551 	},
552 #ifdef CONFIG_S390
553 #ifdef CONFIG_MATHEMU
554 	{
555 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
556 		.procname	= "ieee_emulation_warnings",
557 		.data		= &sysctl_ieee_emulation_warnings,
558 		.maxlen		= sizeof(int),
559 		.mode		= 0644,
560 		.proc_handler	= &proc_dointvec,
561 	},
562 #endif
563 #ifdef CONFIG_NO_IDLE_HZ
564 	{
565 		.ctl_name       = KERN_HZ_TIMER,
566 		.procname       = "hz_timer",
567 		.data           = &sysctl_hz_timer,
568 		.maxlen         = sizeof(int),
569 		.mode           = 0644,
570 		.proc_handler   = &proc_dointvec,
571 	},
572 #endif
573 	{
574 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
575 		.procname	= "userprocess_debug",
576 		.data		= &sysctl_userprocess_debug,
577 		.maxlen		= sizeof(int),
578 		.mode		= 0644,
579 		.proc_handler	= &proc_dointvec,
580 	},
581 #endif
582 	{
583 		.ctl_name	= KERN_PIDMAX,
584 		.procname	= "pid_max",
585 		.data		= &pid_max,
586 		.maxlen		= sizeof (int),
587 		.mode		= 0644,
588 		.proc_handler	= &proc_dointvec_minmax,
589 		.strategy	= sysctl_intvec,
590 		.extra1		= &pid_max_min,
591 		.extra2		= &pid_max_max,
592 	},
593 	{
594 		.ctl_name	= KERN_PANIC_ON_OOPS,
595 		.procname	= "panic_on_oops",
596 		.data		= &panic_on_oops,
597 		.maxlen		= sizeof(int),
598 		.mode		= 0644,
599 		.proc_handler	= &proc_dointvec,
600 	},
601 	{
602 		.ctl_name	= KERN_PRINTK_RATELIMIT,
603 		.procname	= "printk_ratelimit",
604 		.data		= &printk_ratelimit_jiffies,
605 		.maxlen		= sizeof(int),
606 		.mode		= 0644,
607 		.proc_handler	= &proc_dointvec_jiffies,
608 		.strategy	= &sysctl_jiffies,
609 	},
610 	{
611 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
612 		.procname	= "printk_ratelimit_burst",
613 		.data		= &printk_ratelimit_burst,
614 		.maxlen		= sizeof(int),
615 		.mode		= 0644,
616 		.proc_handler	= &proc_dointvec,
617 	},
618 	{
619 		.ctl_name	= KERN_NGROUPS_MAX,
620 		.procname	= "ngroups_max",
621 		.data		= &ngroups_max,
622 		.maxlen		= sizeof (int),
623 		.mode		= 0444,
624 		.proc_handler	= &proc_dointvec,
625 	},
626 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
627 	{
628 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
629 		.procname       = "unknown_nmi_panic",
630 		.data           = &unknown_nmi_panic,
631 		.maxlen         = sizeof (int),
632 		.mode           = 0644,
633 		.proc_handler   = &proc_unknown_nmi_panic,
634 	},
635 #endif
636 #if defined(CONFIG_X86)
637 	{
638 		.ctl_name	= KERN_BOOTLOADER_TYPE,
639 		.procname	= "bootloader_type",
640 		.data		= &bootloader_type,
641 		.maxlen		= sizeof (int),
642 		.mode		= 0444,
643 		.proc_handler	= &proc_dointvec,
644 	},
645 #endif
646 #if defined(CONFIG_MMU)
647 	{
648 		.ctl_name	= KERN_RANDOMIZE,
649 		.procname	= "randomize_va_space",
650 		.data		= &randomize_va_space,
651 		.maxlen		= sizeof(int),
652 		.mode		= 0644,
653 		.proc_handler	= &proc_dointvec,
654 	},
655 #endif
656 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
657 	{
658 		.ctl_name	= KERN_SPIN_RETRY,
659 		.procname	= "spin_retry",
660 		.data		= &spin_retry,
661 		.maxlen		= sizeof (int),
662 		.mode		= 0644,
663 		.proc_handler	= &proc_dointvec,
664 	},
665 #endif
666 #ifdef CONFIG_ACPI_SLEEP
667 	{
668 		.ctl_name	= KERN_ACPI_VIDEO_FLAGS,
669 		.procname	= "acpi_video_flags",
670 		.data		= &acpi_video_flags,
671 		.maxlen		= sizeof (unsigned long),
672 		.mode		= 0644,
673 		.proc_handler	= &proc_doulongvec_minmax,
674 	},
675 #endif
676 #ifdef CONFIG_IA64
677 	{
678 		.ctl_name	= KERN_IA64_UNALIGNED,
679 		.procname	= "ignore-unaligned-usertrap",
680 		.data		= &no_unaligned_warning,
681 		.maxlen		= sizeof (int),
682 	 	.mode		= 0644,
683 		.proc_handler	= &proc_dointvec,
684 	},
685 #endif
686 	{ .ctl_name = 0 }
687 };
688 
689 /* Constants for minimum and maximum testing in vm_table.
690    We use these as one-element integer vectors. */
691 static int zero;
692 static int one_hundred = 100;
693 
694 
695 static ctl_table vm_table[] = {
696 	{
697 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
698 		.procname	= "overcommit_memory",
699 		.data		= &sysctl_overcommit_memory,
700 		.maxlen		= sizeof(sysctl_overcommit_memory),
701 		.mode		= 0644,
702 		.proc_handler	= &proc_dointvec,
703 	},
704 	{
705 		.ctl_name	= VM_OVERCOMMIT_RATIO,
706 		.procname	= "overcommit_ratio",
707 		.data		= &sysctl_overcommit_ratio,
708 		.maxlen		= sizeof(sysctl_overcommit_ratio),
709 		.mode		= 0644,
710 		.proc_handler	= &proc_dointvec,
711 	},
712 	{
713 		.ctl_name	= VM_PAGE_CLUSTER,
714 		.procname	= "page-cluster",
715 		.data		= &page_cluster,
716 		.maxlen		= sizeof(int),
717 		.mode		= 0644,
718 		.proc_handler	= &proc_dointvec,
719 	},
720 	{
721 		.ctl_name	= VM_DIRTY_BACKGROUND,
722 		.procname	= "dirty_background_ratio",
723 		.data		= &dirty_background_ratio,
724 		.maxlen		= sizeof(dirty_background_ratio),
725 		.mode		= 0644,
726 		.proc_handler	= &proc_dointvec_minmax,
727 		.strategy	= &sysctl_intvec,
728 		.extra1		= &zero,
729 		.extra2		= &one_hundred,
730 	},
731 	{
732 		.ctl_name	= VM_DIRTY_RATIO,
733 		.procname	= "dirty_ratio",
734 		.data		= &vm_dirty_ratio,
735 		.maxlen		= sizeof(vm_dirty_ratio),
736 		.mode		= 0644,
737 		.proc_handler	= &proc_dointvec_minmax,
738 		.strategy	= &sysctl_intvec,
739 		.extra1		= &zero,
740 		.extra2		= &one_hundred,
741 	},
742 	{
743 		.ctl_name	= VM_DIRTY_WB_CS,
744 		.procname	= "dirty_writeback_centisecs",
745 		.data		= &dirty_writeback_centisecs,
746 		.maxlen		= sizeof(dirty_writeback_centisecs),
747 		.mode		= 0644,
748 		.proc_handler	= &dirty_writeback_centisecs_handler,
749 	},
750 	{
751 		.ctl_name	= VM_DIRTY_EXPIRE_CS,
752 		.procname	= "dirty_expire_centisecs",
753 		.data		= &dirty_expire_centisecs,
754 		.maxlen		= sizeof(dirty_expire_centisecs),
755 		.mode		= 0644,
756 		.proc_handler	= &proc_dointvec,
757 	},
758 	{
759 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
760 		.procname	= "nr_pdflush_threads",
761 		.data		= &nr_pdflush_threads,
762 		.maxlen		= sizeof nr_pdflush_threads,
763 		.mode		= 0444 /* read-only*/,
764 		.proc_handler	= &proc_dointvec,
765 	},
766 	{
767 		.ctl_name	= VM_SWAPPINESS,
768 		.procname	= "swappiness",
769 		.data		= &vm_swappiness,
770 		.maxlen		= sizeof(vm_swappiness),
771 		.mode		= 0644,
772 		.proc_handler	= &proc_dointvec_minmax,
773 		.strategy	= &sysctl_intvec,
774 		.extra1		= &zero,
775 		.extra2		= &one_hundred,
776 	},
777 #ifdef CONFIG_HUGETLB_PAGE
778 	 {
779 		.ctl_name	= VM_HUGETLB_PAGES,
780 		.procname	= "nr_hugepages",
781 		.data		= &max_huge_pages,
782 		.maxlen		= sizeof(unsigned long),
783 		.mode		= 0644,
784 		.proc_handler	= &hugetlb_sysctl_handler,
785 		.extra1		= (void *)&hugetlb_zero,
786 		.extra2		= (void *)&hugetlb_infinity,
787 	 },
788 	 {
789 		.ctl_name	= VM_HUGETLB_GROUP,
790 		.procname	= "hugetlb_shm_group",
791 		.data		= &sysctl_hugetlb_shm_group,
792 		.maxlen		= sizeof(gid_t),
793 		.mode		= 0644,
794 		.proc_handler	= &proc_dointvec,
795 	 },
796 #endif
797 	{
798 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
799 		.procname	= "lowmem_reserve_ratio",
800 		.data		= &sysctl_lowmem_reserve_ratio,
801 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
802 		.mode		= 0644,
803 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
804 		.strategy	= &sysctl_intvec,
805 	},
806 	{
807 		.ctl_name	= VM_DROP_PAGECACHE,
808 		.procname	= "drop_caches",
809 		.data		= &sysctl_drop_caches,
810 		.maxlen		= sizeof(int),
811 		.mode		= 0644,
812 		.proc_handler	= drop_caches_sysctl_handler,
813 		.strategy	= &sysctl_intvec,
814 	},
815 	{
816 		.ctl_name	= VM_MIN_FREE_KBYTES,
817 		.procname	= "min_free_kbytes",
818 		.data		= &min_free_kbytes,
819 		.maxlen		= sizeof(min_free_kbytes),
820 		.mode		= 0644,
821 		.proc_handler	= &min_free_kbytes_sysctl_handler,
822 		.strategy	= &sysctl_intvec,
823 		.extra1		= &zero,
824 	},
825 	{
826 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
827 		.procname	= "percpu_pagelist_fraction",
828 		.data		= &percpu_pagelist_fraction,
829 		.maxlen		= sizeof(percpu_pagelist_fraction),
830 		.mode		= 0644,
831 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
832 		.strategy	= &sysctl_intvec,
833 		.extra1		= &min_percpu_pagelist_fract,
834 	},
835 #ifdef CONFIG_MMU
836 	{
837 		.ctl_name	= VM_MAX_MAP_COUNT,
838 		.procname	= "max_map_count",
839 		.data		= &sysctl_max_map_count,
840 		.maxlen		= sizeof(sysctl_max_map_count),
841 		.mode		= 0644,
842 		.proc_handler	= &proc_dointvec
843 	},
844 #endif
845 	{
846 		.ctl_name	= VM_LAPTOP_MODE,
847 		.procname	= "laptop_mode",
848 		.data		= &laptop_mode,
849 		.maxlen		= sizeof(laptop_mode),
850 		.mode		= 0644,
851 		.proc_handler	= &proc_dointvec,
852 		.strategy	= &sysctl_intvec,
853 		.extra1		= &zero,
854 	},
855 	{
856 		.ctl_name	= VM_BLOCK_DUMP,
857 		.procname	= "block_dump",
858 		.data		= &block_dump,
859 		.maxlen		= sizeof(block_dump),
860 		.mode		= 0644,
861 		.proc_handler	= &proc_dointvec,
862 		.strategy	= &sysctl_intvec,
863 		.extra1		= &zero,
864 	},
865 	{
866 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
867 		.procname	= "vfs_cache_pressure",
868 		.data		= &sysctl_vfs_cache_pressure,
869 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
870 		.mode		= 0644,
871 		.proc_handler	= &proc_dointvec,
872 		.strategy	= &sysctl_intvec,
873 		.extra1		= &zero,
874 	},
875 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
876 	{
877 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
878 		.procname	= "legacy_va_layout",
879 		.data		= &sysctl_legacy_va_layout,
880 		.maxlen		= sizeof(sysctl_legacy_va_layout),
881 		.mode		= 0644,
882 		.proc_handler	= &proc_dointvec,
883 		.strategy	= &sysctl_intvec,
884 		.extra1		= &zero,
885 	},
886 #endif
887 #ifdef CONFIG_SWAP
888 	{
889 		.ctl_name	= VM_SWAP_TOKEN_TIMEOUT,
890 		.procname	= "swap_token_timeout",
891 		.data		= &swap_token_default_timeout,
892 		.maxlen		= sizeof(swap_token_default_timeout),
893 		.mode		= 0644,
894 		.proc_handler	= &proc_dointvec_jiffies,
895 		.strategy	= &sysctl_jiffies,
896 	},
897 #endif
898 #ifdef CONFIG_NUMA
899 	{
900 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
901 		.procname	= "zone_reclaim_mode",
902 		.data		= &zone_reclaim_mode,
903 		.maxlen		= sizeof(zone_reclaim_mode),
904 		.mode		= 0644,
905 		.proc_handler	= &proc_dointvec,
906 		.strategy	= &sysctl_intvec,
907 		.extra1		= &zero,
908 	},
909 	{
910 		.ctl_name	= VM_ZONE_RECLAIM_INTERVAL,
911 		.procname	= "zone_reclaim_interval",
912 		.data		= &zone_reclaim_interval,
913 		.maxlen		= sizeof(zone_reclaim_interval),
914 		.mode		= 0644,
915 		.proc_handler	= &proc_dointvec_jiffies,
916 		.strategy	= &sysctl_jiffies,
917 	},
918 #endif
919 	{ .ctl_name = 0 }
920 };
921 
922 static ctl_table proc_table[] = {
923 	{ .ctl_name = 0 }
924 };
925 
926 static ctl_table fs_table[] = {
927 	{
928 		.ctl_name	= FS_NRINODE,
929 		.procname	= "inode-nr",
930 		.data		= &inodes_stat,
931 		.maxlen		= 2*sizeof(int),
932 		.mode		= 0444,
933 		.proc_handler	= &proc_dointvec,
934 	},
935 	{
936 		.ctl_name	= FS_STATINODE,
937 		.procname	= "inode-state",
938 		.data		= &inodes_stat,
939 		.maxlen		= 7*sizeof(int),
940 		.mode		= 0444,
941 		.proc_handler	= &proc_dointvec,
942 	},
943 	{
944 		.ctl_name	= FS_NRFILE,
945 		.procname	= "file-nr",
946 		.data		= &files_stat,
947 		.maxlen		= 3*sizeof(int),
948 		.mode		= 0444,
949 		.proc_handler	= &proc_nr_files,
950 	},
951 	{
952 		.ctl_name	= FS_MAXFILE,
953 		.procname	= "file-max",
954 		.data		= &files_stat.max_files,
955 		.maxlen		= sizeof(int),
956 		.mode		= 0644,
957 		.proc_handler	= &proc_dointvec,
958 	},
959 	{
960 		.ctl_name	= FS_DENTRY,
961 		.procname	= "dentry-state",
962 		.data		= &dentry_stat,
963 		.maxlen		= 6*sizeof(int),
964 		.mode		= 0444,
965 		.proc_handler	= &proc_dointvec,
966 	},
967 	{
968 		.ctl_name	= FS_OVERFLOWUID,
969 		.procname	= "overflowuid",
970 		.data		= &fs_overflowuid,
971 		.maxlen		= sizeof(int),
972 		.mode		= 0644,
973 		.proc_handler	= &proc_dointvec_minmax,
974 		.strategy	= &sysctl_intvec,
975 		.extra1		= &minolduid,
976 		.extra2		= &maxolduid,
977 	},
978 	{
979 		.ctl_name	= FS_OVERFLOWGID,
980 		.procname	= "overflowgid",
981 		.data		= &fs_overflowgid,
982 		.maxlen		= sizeof(int),
983 		.mode		= 0644,
984 		.proc_handler	= &proc_dointvec_minmax,
985 		.strategy	= &sysctl_intvec,
986 		.extra1		= &minolduid,
987 		.extra2		= &maxolduid,
988 	},
989 	{
990 		.ctl_name	= FS_LEASES,
991 		.procname	= "leases-enable",
992 		.data		= &leases_enable,
993 		.maxlen		= sizeof(int),
994 		.mode		= 0644,
995 		.proc_handler	= &proc_dointvec,
996 	},
997 #ifdef CONFIG_DNOTIFY
998 	{
999 		.ctl_name	= FS_DIR_NOTIFY,
1000 		.procname	= "dir-notify-enable",
1001 		.data		= &dir_notify_enable,
1002 		.maxlen		= sizeof(int),
1003 		.mode		= 0644,
1004 		.proc_handler	= &proc_dointvec,
1005 	},
1006 #endif
1007 #ifdef CONFIG_MMU
1008 	{
1009 		.ctl_name	= FS_LEASE_TIME,
1010 		.procname	= "lease-break-time",
1011 		.data		= &lease_break_time,
1012 		.maxlen		= sizeof(int),
1013 		.mode		= 0644,
1014 		.proc_handler	= &proc_dointvec,
1015 	},
1016 	{
1017 		.ctl_name	= FS_AIO_NR,
1018 		.procname	= "aio-nr",
1019 		.data		= &aio_nr,
1020 		.maxlen		= sizeof(aio_nr),
1021 		.mode		= 0444,
1022 		.proc_handler	= &proc_doulongvec_minmax,
1023 	},
1024 	{
1025 		.ctl_name	= FS_AIO_MAX_NR,
1026 		.procname	= "aio-max-nr",
1027 		.data		= &aio_max_nr,
1028 		.maxlen		= sizeof(aio_max_nr),
1029 		.mode		= 0644,
1030 		.proc_handler	= &proc_doulongvec_minmax,
1031 	},
1032 #ifdef CONFIG_INOTIFY
1033 	{
1034 		.ctl_name	= FS_INOTIFY,
1035 		.procname	= "inotify",
1036 		.mode		= 0555,
1037 		.child		= inotify_table,
1038 	},
1039 #endif
1040 #endif
1041 	{
1042 		.ctl_name	= KERN_SETUID_DUMPABLE,
1043 		.procname	= "suid_dumpable",
1044 		.data		= &suid_dumpable,
1045 		.maxlen		= sizeof(int),
1046 		.mode		= 0644,
1047 		.proc_handler	= &proc_dointvec,
1048 	},
1049 	{ .ctl_name = 0 }
1050 };
1051 
1052 static ctl_table debug_table[] = {
1053 	{ .ctl_name = 0 }
1054 };
1055 
1056 static ctl_table dev_table[] = {
1057 	{ .ctl_name = 0 }
1058 };
1059 
1060 extern void init_irq_proc (void);
1061 
1062 static DEFINE_SPINLOCK(sysctl_lock);
1063 
1064 /* called under sysctl_lock */
1065 static int use_table(struct ctl_table_header *p)
1066 {
1067 	if (unlikely(p->unregistering))
1068 		return 0;
1069 	p->used++;
1070 	return 1;
1071 }
1072 
1073 /* called under sysctl_lock */
1074 static void unuse_table(struct ctl_table_header *p)
1075 {
1076 	if (!--p->used)
1077 		if (unlikely(p->unregistering))
1078 			complete(p->unregistering);
1079 }
1080 
1081 /* called under sysctl_lock, will reacquire if has to wait */
1082 static void start_unregistering(struct ctl_table_header *p)
1083 {
1084 	/*
1085 	 * if p->used is 0, nobody will ever touch that entry again;
1086 	 * we'll eliminate all paths to it before dropping sysctl_lock
1087 	 */
1088 	if (unlikely(p->used)) {
1089 		struct completion wait;
1090 		init_completion(&wait);
1091 		p->unregistering = &wait;
1092 		spin_unlock(&sysctl_lock);
1093 		wait_for_completion(&wait);
1094 		spin_lock(&sysctl_lock);
1095 	}
1096 	/*
1097 	 * do not remove from the list until nobody holds it; walking the
1098 	 * list in do_sysctl() relies on that.
1099 	 */
1100 	list_del_init(&p->ctl_entry);
1101 }
1102 
1103 void __init sysctl_init(void)
1104 {
1105 #ifdef CONFIG_PROC_FS
1106 	register_proc_table(root_table, proc_sys_root, &root_table_header);
1107 	init_irq_proc();
1108 #endif
1109 }
1110 
1111 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1112 	       void __user *newval, size_t newlen)
1113 {
1114 	struct list_head *tmp;
1115 	int error = -ENOTDIR;
1116 
1117 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1118 		return -ENOTDIR;
1119 	if (oldval) {
1120 		int old_len;
1121 		if (!oldlenp || get_user(old_len, oldlenp))
1122 			return -EFAULT;
1123 	}
1124 	spin_lock(&sysctl_lock);
1125 	tmp = &root_table_header.ctl_entry;
1126 	do {
1127 		struct ctl_table_header *head =
1128 			list_entry(tmp, struct ctl_table_header, ctl_entry);
1129 		void *context = NULL;
1130 
1131 		if (!use_table(head))
1132 			continue;
1133 
1134 		spin_unlock(&sysctl_lock);
1135 
1136 		error = parse_table(name, nlen, oldval, oldlenp,
1137 					newval, newlen, head->ctl_table,
1138 					&context);
1139 		kfree(context);
1140 
1141 		spin_lock(&sysctl_lock);
1142 		unuse_table(head);
1143 		if (error != -ENOTDIR)
1144 			break;
1145 	} while ((tmp = tmp->next) != &root_table_header.ctl_entry);
1146 	spin_unlock(&sysctl_lock);
1147 	return error;
1148 }
1149 
1150 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1151 {
1152 	struct __sysctl_args tmp;
1153 	int error;
1154 
1155 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1156 		return -EFAULT;
1157 
1158 	lock_kernel();
1159 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1160 			  tmp.newval, tmp.newlen);
1161 	unlock_kernel();
1162 	return error;
1163 }
1164 
1165 /*
1166  * ctl_perm does NOT grant the superuser all rights automatically, because
1167  * some sysctl variables are readonly even to root.
1168  */
1169 
1170 static int test_perm(int mode, int op)
1171 {
1172 	if (!current->euid)
1173 		mode >>= 6;
1174 	else if (in_egroup_p(0))
1175 		mode >>= 3;
1176 	if ((mode & op & 0007) == op)
1177 		return 0;
1178 	return -EACCES;
1179 }
1180 
1181 static inline int ctl_perm(ctl_table *table, int op)
1182 {
1183 	int error;
1184 	error = security_sysctl(table, op);
1185 	if (error)
1186 		return error;
1187 	return test_perm(table->mode, op);
1188 }
1189 
1190 static int parse_table(int __user *name, int nlen,
1191 		       void __user *oldval, size_t __user *oldlenp,
1192 		       void __user *newval, size_t newlen,
1193 		       ctl_table *table, void **context)
1194 {
1195 	int n;
1196 repeat:
1197 	if (!nlen)
1198 		return -ENOTDIR;
1199 	if (get_user(n, name))
1200 		return -EFAULT;
1201 	for ( ; table->ctl_name; table++) {
1202 		if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
1203 			int error;
1204 			if (table->child) {
1205 				if (ctl_perm(table, 001))
1206 					return -EPERM;
1207 				if (table->strategy) {
1208 					error = table->strategy(
1209 						table, name, nlen,
1210 						oldval, oldlenp,
1211 						newval, newlen, context);
1212 					if (error)
1213 						return error;
1214 				}
1215 				name++;
1216 				nlen--;
1217 				table = table->child;
1218 				goto repeat;
1219 			}
1220 			error = do_sysctl_strategy(table, name, nlen,
1221 						   oldval, oldlenp,
1222 						   newval, newlen, context);
1223 			return error;
1224 		}
1225 	}
1226 	return -ENOTDIR;
1227 }
1228 
1229 /* Perform the actual read/write of a sysctl table entry. */
1230 int do_sysctl_strategy (ctl_table *table,
1231 			int __user *name, int nlen,
1232 			void __user *oldval, size_t __user *oldlenp,
1233 			void __user *newval, size_t newlen, void **context)
1234 {
1235 	int op = 0, rc;
1236 	size_t len;
1237 
1238 	if (oldval)
1239 		op |= 004;
1240 	if (newval)
1241 		op |= 002;
1242 	if (ctl_perm(table, op))
1243 		return -EPERM;
1244 
1245 	if (table->strategy) {
1246 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
1247 				     newval, newlen, context);
1248 		if (rc < 0)
1249 			return rc;
1250 		if (rc > 0)
1251 			return 0;
1252 	}
1253 
1254 	/* If there is no strategy routine, or if the strategy returns
1255 	 * zero, proceed with automatic r/w */
1256 	if (table->data && table->maxlen) {
1257 		if (oldval && oldlenp) {
1258 			if (get_user(len, oldlenp))
1259 				return -EFAULT;
1260 			if (len) {
1261 				if (len > table->maxlen)
1262 					len = table->maxlen;
1263 				if(copy_to_user(oldval, table->data, len))
1264 					return -EFAULT;
1265 				if(put_user(len, oldlenp))
1266 					return -EFAULT;
1267 			}
1268 		}
1269 		if (newval && newlen) {
1270 			len = newlen;
1271 			if (len > table->maxlen)
1272 				len = table->maxlen;
1273 			if(copy_from_user(table->data, newval, len))
1274 				return -EFAULT;
1275 		}
1276 	}
1277 	return 0;
1278 }
1279 
1280 /**
1281  * register_sysctl_table - register a sysctl hierarchy
1282  * @table: the top-level table structure
1283  * @insert_at_head: whether the entry should be inserted in front or at the end
1284  *
1285  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1286  * array. An entry with a ctl_name of 0 terminates the table.
1287  *
1288  * The members of the &ctl_table structure are used as follows:
1289  *
1290  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1291  *            must be unique within that level of sysctl
1292  *
1293  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1294  *            enter a sysctl file
1295  *
1296  * data - a pointer to data for use by proc_handler
1297  *
1298  * maxlen - the maximum size in bytes of the data
1299  *
1300  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1301  *
1302  * child - a pointer to the child sysctl table if this entry is a directory, or
1303  *         %NULL.
1304  *
1305  * proc_handler - the text handler routine (described below)
1306  *
1307  * strategy - the strategy routine (described below)
1308  *
1309  * de - for internal use by the sysctl routines
1310  *
1311  * extra1, extra2 - extra pointers usable by the proc handler routines
1312  *
1313  * Leaf nodes in the sysctl tree will be represented by a single file
1314  * under /proc; non-leaf nodes will be represented by directories.
1315  *
1316  * sysctl(2) can automatically manage read and write requests through
1317  * the sysctl table.  The data and maxlen fields of the ctl_table
1318  * struct enable minimal validation of the values being written to be
1319  * performed, and the mode field allows minimal authentication.
1320  *
1321  * More sophisticated management can be enabled by the provision of a
1322  * strategy routine with the table entry.  This will be called before
1323  * any automatic read or write of the data is performed.
1324  *
1325  * The strategy routine may return
1326  *
1327  * < 0 - Error occurred (error is passed to user process)
1328  *
1329  * 0   - OK - proceed with automatic read or write.
1330  *
1331  * > 0 - OK - read or write has been done by the strategy routine, so
1332  *       return immediately.
1333  *
1334  * There must be a proc_handler routine for any terminal nodes
1335  * mirrored under /proc/sys (non-terminals are handled by a built-in
1336  * directory handler).  Several default handlers are available to
1337  * cover common cases -
1338  *
1339  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1340  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1341  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1342  *
1343  * It is the handler's job to read the input buffer from user memory
1344  * and process it. The handler should return 0 on success.
1345  *
1346  * This routine returns %NULL on a failure to register, and a pointer
1347  * to the table header on success.
1348  */
1349 struct ctl_table_header *register_sysctl_table(ctl_table * table,
1350 					       int insert_at_head)
1351 {
1352 	struct ctl_table_header *tmp;
1353 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1354 	if (!tmp)
1355 		return NULL;
1356 	tmp->ctl_table = table;
1357 	INIT_LIST_HEAD(&tmp->ctl_entry);
1358 	tmp->used = 0;
1359 	tmp->unregistering = NULL;
1360 	spin_lock(&sysctl_lock);
1361 	if (insert_at_head)
1362 		list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
1363 	else
1364 		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1365 	spin_unlock(&sysctl_lock);
1366 #ifdef CONFIG_PROC_FS
1367 	register_proc_table(table, proc_sys_root, tmp);
1368 #endif
1369 	return tmp;
1370 }
1371 
1372 /**
1373  * unregister_sysctl_table - unregister a sysctl table hierarchy
1374  * @header: the header returned from register_sysctl_table
1375  *
1376  * Unregisters the sysctl table and all children. proc entries may not
1377  * actually be removed until they are no longer used by anyone.
1378  */
1379 void unregister_sysctl_table(struct ctl_table_header * header)
1380 {
1381 	might_sleep();
1382 	spin_lock(&sysctl_lock);
1383 	start_unregistering(header);
1384 #ifdef CONFIG_PROC_FS
1385 	unregister_proc_table(header->ctl_table, proc_sys_root);
1386 #endif
1387 	spin_unlock(&sysctl_lock);
1388 	kfree(header);
1389 }
1390 
1391 /*
1392  * /proc/sys support
1393  */
1394 
1395 #ifdef CONFIG_PROC_FS
1396 
1397 /* Scan the sysctl entries in table and add them all into /proc */
1398 static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
1399 {
1400 	struct proc_dir_entry *de;
1401 	int len;
1402 	mode_t mode;
1403 
1404 	for (; table->ctl_name; table++) {
1405 		/* Can't do anything without a proc name. */
1406 		if (!table->procname)
1407 			continue;
1408 		/* Maybe we can't do anything with it... */
1409 		if (!table->proc_handler && !table->child) {
1410 			printk(KERN_WARNING "SYSCTL: Can't register %s\n",
1411 				table->procname);
1412 			continue;
1413 		}
1414 
1415 		len = strlen(table->procname);
1416 		mode = table->mode;
1417 
1418 		de = NULL;
1419 		if (table->proc_handler)
1420 			mode |= S_IFREG;
1421 		else {
1422 			mode |= S_IFDIR;
1423 			for (de = root->subdir; de; de = de->next) {
1424 				if (proc_match(len, table->procname, de))
1425 					break;
1426 			}
1427 			/* If the subdir exists already, de is non-NULL */
1428 		}
1429 
1430 		if (!de) {
1431 			de = create_proc_entry(table->procname, mode, root);
1432 			if (!de)
1433 				continue;
1434 			de->set = set;
1435 			de->data = (void *) table;
1436 			if (table->proc_handler)
1437 				de->proc_fops = &proc_sys_file_operations;
1438 		}
1439 		table->de = de;
1440 		if (de->mode & S_IFDIR)
1441 			register_proc_table(table->child, de, set);
1442 	}
1443 }
1444 
1445 /*
1446  * Unregister a /proc sysctl table and any subdirectories.
1447  */
1448 static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
1449 {
1450 	struct proc_dir_entry *de;
1451 	for (; table->ctl_name; table++) {
1452 		if (!(de = table->de))
1453 			continue;
1454 		if (de->mode & S_IFDIR) {
1455 			if (!table->child) {
1456 				printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
1457 				continue;
1458 			}
1459 			unregister_proc_table(table->child, de);
1460 
1461 			/* Don't unregister directories which still have entries.. */
1462 			if (de->subdir)
1463 				continue;
1464 		}
1465 
1466 		/*
1467 		 * In any case, mark the entry as goner; we'll keep it
1468 		 * around if it's busy, but we'll know to do nothing with
1469 		 * its fields.  We are under sysctl_lock here.
1470 		 */
1471 		de->data = NULL;
1472 
1473 		/* Don't unregister proc entries that are still being used.. */
1474 		if (atomic_read(&de->count))
1475 			continue;
1476 
1477 		table->de = NULL;
1478 		remove_proc_entry(table->procname, root);
1479 	}
1480 }
1481 
1482 static ssize_t do_rw_proc(int write, struct file * file, char __user * buf,
1483 			  size_t count, loff_t *ppos)
1484 {
1485 	int op;
1486 	struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
1487 	struct ctl_table *table;
1488 	size_t res;
1489 	ssize_t error = -ENOTDIR;
1490 
1491 	spin_lock(&sysctl_lock);
1492 	if (de && de->data && use_table(de->set)) {
1493 		/*
1494 		 * at that point we know that sysctl was not unregistered
1495 		 * and won't be until we finish
1496 		 */
1497 		spin_unlock(&sysctl_lock);
1498 		table = (struct ctl_table *) de->data;
1499 		if (!table || !table->proc_handler)
1500 			goto out;
1501 		error = -EPERM;
1502 		op = (write ? 002 : 004);
1503 		if (ctl_perm(table, op))
1504 			goto out;
1505 
1506 		/* careful: calling conventions are nasty here */
1507 		res = count;
1508 		error = (*table->proc_handler)(table, write, file,
1509 						buf, &res, ppos);
1510 		if (!error)
1511 			error = res;
1512 	out:
1513 		spin_lock(&sysctl_lock);
1514 		unuse_table(de->set);
1515 	}
1516 	spin_unlock(&sysctl_lock);
1517 	return error;
1518 }
1519 
1520 static int proc_opensys(struct inode *inode, struct file *file)
1521 {
1522 	if (file->f_mode & FMODE_WRITE) {
1523 		/*
1524 		 * sysctl entries that are not writable,
1525 		 * are _NOT_ writable, capabilities or not.
1526 		 */
1527 		if (!(inode->i_mode & S_IWUSR))
1528 			return -EPERM;
1529 	}
1530 
1531 	return 0;
1532 }
1533 
1534 static ssize_t proc_readsys(struct file * file, char __user * buf,
1535 			    size_t count, loff_t *ppos)
1536 {
1537 	return do_rw_proc(0, file, buf, count, ppos);
1538 }
1539 
1540 static ssize_t proc_writesys(struct file * file, const char __user * buf,
1541 			     size_t count, loff_t *ppos)
1542 {
1543 	return do_rw_proc(1, file, (char __user *) buf, count, ppos);
1544 }
1545 
1546 /**
1547  * proc_dostring - read a string sysctl
1548  * @table: the sysctl table
1549  * @write: %TRUE if this is a write to the sysctl file
1550  * @filp: the file structure
1551  * @buffer: the user buffer
1552  * @lenp: the size of the user buffer
1553  * @ppos: file position
1554  *
1555  * Reads/writes a string from/to the user buffer. If the kernel
1556  * buffer provided is not large enough to hold the string, the
1557  * string is truncated. The copied string is %NULL-terminated.
1558  * If the string is being read by the user process, it is copied
1559  * and a newline '\n' is added. It is truncated if the buffer is
1560  * not large enough.
1561  *
1562  * Returns 0 on success.
1563  */
1564 int proc_dostring(ctl_table *table, int write, struct file *filp,
1565 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1566 {
1567 	size_t len;
1568 	char __user *p;
1569 	char c;
1570 
1571 	if (!table->data || !table->maxlen || !*lenp ||
1572 	    (*ppos && !write)) {
1573 		*lenp = 0;
1574 		return 0;
1575 	}
1576 
1577 	if (write) {
1578 		len = 0;
1579 		p = buffer;
1580 		while (len < *lenp) {
1581 			if (get_user(c, p++))
1582 				return -EFAULT;
1583 			if (c == 0 || c == '\n')
1584 				break;
1585 			len++;
1586 		}
1587 		if (len >= table->maxlen)
1588 			len = table->maxlen-1;
1589 		if(copy_from_user(table->data, buffer, len))
1590 			return -EFAULT;
1591 		((char *) table->data)[len] = 0;
1592 		*ppos += *lenp;
1593 	} else {
1594 		len = strlen(table->data);
1595 		if (len > table->maxlen)
1596 			len = table->maxlen;
1597 		if (len > *lenp)
1598 			len = *lenp;
1599 		if (len)
1600 			if(copy_to_user(buffer, table->data, len))
1601 				return -EFAULT;
1602 		if (len < *lenp) {
1603 			if(put_user('\n', ((char __user *) buffer) + len))
1604 				return -EFAULT;
1605 			len++;
1606 		}
1607 		*lenp = len;
1608 		*ppos += len;
1609 	}
1610 	return 0;
1611 }
1612 
1613 /*
1614  *	Special case of dostring for the UTS structure. This has locks
1615  *	to observe. Should this be in kernel/sys.c ????
1616  */
1617 
1618 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
1619 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1620 {
1621 	int r;
1622 
1623 	if (!write) {
1624 		down_read(&uts_sem);
1625 		r=proc_dostring(table,0,filp,buffer,lenp, ppos);
1626 		up_read(&uts_sem);
1627 	} else {
1628 		down_write(&uts_sem);
1629 		r=proc_dostring(table,1,filp,buffer,lenp, ppos);
1630 		up_write(&uts_sem);
1631 	}
1632 	return r;
1633 }
1634 
1635 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1636 				 int *valp,
1637 				 int write, void *data)
1638 {
1639 	if (write) {
1640 		*valp = *negp ? -*lvalp : *lvalp;
1641 	} else {
1642 		int val = *valp;
1643 		if (val < 0) {
1644 			*negp = -1;
1645 			*lvalp = (unsigned long)-val;
1646 		} else {
1647 			*negp = 0;
1648 			*lvalp = (unsigned long)val;
1649 		}
1650 	}
1651 	return 0;
1652 }
1653 
1654 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1655 		  void __user *buffer, size_t *lenp, loff_t *ppos,
1656 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1657 			      int write, void *data),
1658 		  void *data)
1659 {
1660 #define TMPBUFLEN 21
1661 	int *i, vleft, first=1, neg, val;
1662 	unsigned long lval;
1663 	size_t left, len;
1664 
1665 	char buf[TMPBUFLEN], *p;
1666 	char __user *s = buffer;
1667 
1668 	if (!table->data || !table->maxlen || !*lenp ||
1669 	    (*ppos && !write)) {
1670 		*lenp = 0;
1671 		return 0;
1672 	}
1673 
1674 	i = (int *) table->data;
1675 	vleft = table->maxlen / sizeof(*i);
1676 	left = *lenp;
1677 
1678 	if (!conv)
1679 		conv = do_proc_dointvec_conv;
1680 
1681 	for (; left && vleft--; i++, first=0) {
1682 		if (write) {
1683 			while (left) {
1684 				char c;
1685 				if (get_user(c, s))
1686 					return -EFAULT;
1687 				if (!isspace(c))
1688 					break;
1689 				left--;
1690 				s++;
1691 			}
1692 			if (!left)
1693 				break;
1694 			neg = 0;
1695 			len = left;
1696 			if (len > sizeof(buf) - 1)
1697 				len = sizeof(buf) - 1;
1698 			if (copy_from_user(buf, s, len))
1699 				return -EFAULT;
1700 			buf[len] = 0;
1701 			p = buf;
1702 			if (*p == '-' && left > 1) {
1703 				neg = 1;
1704 				left--, p++;
1705 			}
1706 			if (*p < '0' || *p > '9')
1707 				break;
1708 
1709 			lval = simple_strtoul(p, &p, 0);
1710 
1711 			len = p-buf;
1712 			if ((len < left) && *p && !isspace(*p))
1713 				break;
1714 			if (neg)
1715 				val = -val;
1716 			s += len;
1717 			left -= len;
1718 
1719 			if (conv(&neg, &lval, i, 1, data))
1720 				break;
1721 		} else {
1722 			p = buf;
1723 			if (!first)
1724 				*p++ = '\t';
1725 
1726 			if (conv(&neg, &lval, i, 0, data))
1727 				break;
1728 
1729 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
1730 			len = strlen(buf);
1731 			if (len > left)
1732 				len = left;
1733 			if(copy_to_user(s, buf, len))
1734 				return -EFAULT;
1735 			left -= len;
1736 			s += len;
1737 		}
1738 	}
1739 
1740 	if (!write && !first && left) {
1741 		if(put_user('\n', s))
1742 			return -EFAULT;
1743 		left--, s++;
1744 	}
1745 	if (write) {
1746 		while (left) {
1747 			char c;
1748 			if (get_user(c, s++))
1749 				return -EFAULT;
1750 			if (!isspace(c))
1751 				break;
1752 			left--;
1753 		}
1754 	}
1755 	if (write && first)
1756 		return -EINVAL;
1757 	*lenp -= left;
1758 	*ppos += *lenp;
1759 	return 0;
1760 #undef TMPBUFLEN
1761 }
1762 
1763 /**
1764  * proc_dointvec - read a vector of integers
1765  * @table: the sysctl table
1766  * @write: %TRUE if this is a write to the sysctl file
1767  * @filp: the file structure
1768  * @buffer: the user buffer
1769  * @lenp: the size of the user buffer
1770  * @ppos: file position
1771  *
1772  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1773  * values from/to the user buffer, treated as an ASCII string.
1774  *
1775  * Returns 0 on success.
1776  */
1777 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1778 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1779 {
1780     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1781 		    	    NULL,NULL);
1782 }
1783 
1784 #define OP_SET	0
1785 #define OP_AND	1
1786 #define OP_OR	2
1787 #define OP_MAX	3
1788 #define OP_MIN	4
1789 
1790 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1791 				      int *valp,
1792 				      int write, void *data)
1793 {
1794 	int op = *(int *)data;
1795 	if (write) {
1796 		int val = *negp ? -*lvalp : *lvalp;
1797 		switch(op) {
1798 		case OP_SET:	*valp = val; break;
1799 		case OP_AND:	*valp &= val; break;
1800 		case OP_OR:	*valp |= val; break;
1801 		case OP_MAX:	if(*valp < val)
1802 					*valp = val;
1803 				break;
1804 		case OP_MIN:	if(*valp > val)
1805 				*valp = val;
1806 				break;
1807 		}
1808 	} else {
1809 		int val = *valp;
1810 		if (val < 0) {
1811 			*negp = -1;
1812 			*lvalp = (unsigned long)-val;
1813 		} else {
1814 			*negp = 0;
1815 			*lvalp = (unsigned long)val;
1816 		}
1817 	}
1818 	return 0;
1819 }
1820 
1821 /*
1822  *	init may raise the set.
1823  */
1824 
1825 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1826 			void __user *buffer, size_t *lenp, loff_t *ppos)
1827 {
1828 	int op;
1829 
1830 	if (!capable(CAP_SYS_MODULE)) {
1831 		return -EPERM;
1832 	}
1833 
1834 	op = (current->pid == 1) ? OP_SET : OP_AND;
1835 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1836 				do_proc_dointvec_bset_conv,&op);
1837 }
1838 
1839 struct do_proc_dointvec_minmax_conv_param {
1840 	int *min;
1841 	int *max;
1842 };
1843 
1844 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1845 					int *valp,
1846 					int write, void *data)
1847 {
1848 	struct do_proc_dointvec_minmax_conv_param *param = data;
1849 	if (write) {
1850 		int val = *negp ? -*lvalp : *lvalp;
1851 		if ((param->min && *param->min > val) ||
1852 		    (param->max && *param->max < val))
1853 			return -EINVAL;
1854 		*valp = val;
1855 	} else {
1856 		int val = *valp;
1857 		if (val < 0) {
1858 			*negp = -1;
1859 			*lvalp = (unsigned long)-val;
1860 		} else {
1861 			*negp = 0;
1862 			*lvalp = (unsigned long)val;
1863 		}
1864 	}
1865 	return 0;
1866 }
1867 
1868 /**
1869  * proc_dointvec_minmax - read a vector of integers with min/max values
1870  * @table: the sysctl table
1871  * @write: %TRUE if this is a write to the sysctl file
1872  * @filp: the file structure
1873  * @buffer: the user buffer
1874  * @lenp: the size of the user buffer
1875  * @ppos: file position
1876  *
1877  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1878  * values from/to the user buffer, treated as an ASCII string.
1879  *
1880  * This routine will ensure the values are within the range specified by
1881  * table->extra1 (min) and table->extra2 (max).
1882  *
1883  * Returns 0 on success.
1884  */
1885 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1886 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1887 {
1888 	struct do_proc_dointvec_minmax_conv_param param = {
1889 		.min = (int *) table->extra1,
1890 		.max = (int *) table->extra2,
1891 	};
1892 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1893 				do_proc_dointvec_minmax_conv, &param);
1894 }
1895 
1896 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
1897 				     struct file *filp,
1898 				     void __user *buffer,
1899 				     size_t *lenp, loff_t *ppos,
1900 				     unsigned long convmul,
1901 				     unsigned long convdiv)
1902 {
1903 #define TMPBUFLEN 21
1904 	unsigned long *i, *min, *max, val;
1905 	int vleft, first=1, neg;
1906 	size_t len, left;
1907 	char buf[TMPBUFLEN], *p;
1908 	char __user *s = buffer;
1909 
1910 	if (!table->data || !table->maxlen || !*lenp ||
1911 	    (*ppos && !write)) {
1912 		*lenp = 0;
1913 		return 0;
1914 	}
1915 
1916 	i = (unsigned long *) table->data;
1917 	min = (unsigned long *) table->extra1;
1918 	max = (unsigned long *) table->extra2;
1919 	vleft = table->maxlen / sizeof(unsigned long);
1920 	left = *lenp;
1921 
1922 	for (; left && vleft--; i++, min++, max++, first=0) {
1923 		if (write) {
1924 			while (left) {
1925 				char c;
1926 				if (get_user(c, s))
1927 					return -EFAULT;
1928 				if (!isspace(c))
1929 					break;
1930 				left--;
1931 				s++;
1932 			}
1933 			if (!left)
1934 				break;
1935 			neg = 0;
1936 			len = left;
1937 			if (len > TMPBUFLEN-1)
1938 				len = TMPBUFLEN-1;
1939 			if (copy_from_user(buf, s, len))
1940 				return -EFAULT;
1941 			buf[len] = 0;
1942 			p = buf;
1943 			if (*p == '-' && left > 1) {
1944 				neg = 1;
1945 				left--, p++;
1946 			}
1947 			if (*p < '0' || *p > '9')
1948 				break;
1949 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
1950 			len = p-buf;
1951 			if ((len < left) && *p && !isspace(*p))
1952 				break;
1953 			if (neg)
1954 				val = -val;
1955 			s += len;
1956 			left -= len;
1957 
1958 			if(neg)
1959 				continue;
1960 			if ((min && val < *min) || (max && val > *max))
1961 				continue;
1962 			*i = val;
1963 		} else {
1964 			p = buf;
1965 			if (!first)
1966 				*p++ = '\t';
1967 			sprintf(p, "%lu", convdiv * (*i) / convmul);
1968 			len = strlen(buf);
1969 			if (len > left)
1970 				len = left;
1971 			if(copy_to_user(s, buf, len))
1972 				return -EFAULT;
1973 			left -= len;
1974 			s += len;
1975 		}
1976 	}
1977 
1978 	if (!write && !first && left) {
1979 		if(put_user('\n', s))
1980 			return -EFAULT;
1981 		left--, s++;
1982 	}
1983 	if (write) {
1984 		while (left) {
1985 			char c;
1986 			if (get_user(c, s++))
1987 				return -EFAULT;
1988 			if (!isspace(c))
1989 				break;
1990 			left--;
1991 		}
1992 	}
1993 	if (write && first)
1994 		return -EINVAL;
1995 	*lenp -= left;
1996 	*ppos += *lenp;
1997 	return 0;
1998 #undef TMPBUFLEN
1999 }
2000 
2001 /**
2002  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2003  * @table: the sysctl table
2004  * @write: %TRUE if this is a write to the sysctl file
2005  * @filp: the file structure
2006  * @buffer: the user buffer
2007  * @lenp: the size of the user buffer
2008  * @ppos: file position
2009  *
2010  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2011  * values from/to the user buffer, treated as an ASCII string.
2012  *
2013  * This routine will ensure the values are within the range specified by
2014  * table->extra1 (min) and table->extra2 (max).
2015  *
2016  * Returns 0 on success.
2017  */
2018 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2019 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2020 {
2021     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2022 }
2023 
2024 /**
2025  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2026  * @table: the sysctl table
2027  * @write: %TRUE if this is a write to the sysctl file
2028  * @filp: the file structure
2029  * @buffer: the user buffer
2030  * @lenp: the size of the user buffer
2031  * @ppos: file position
2032  *
2033  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2034  * values from/to the user buffer, treated as an ASCII string. The values
2035  * are treated as milliseconds, and converted to jiffies when they are stored.
2036  *
2037  * This routine will ensure the values are within the range specified by
2038  * table->extra1 (min) and table->extra2 (max).
2039  *
2040  * Returns 0 on success.
2041  */
2042 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2043 				      struct file *filp,
2044 				      void __user *buffer,
2045 				      size_t *lenp, loff_t *ppos)
2046 {
2047     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2048 				     lenp, ppos, HZ, 1000l);
2049 }
2050 
2051 
2052 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2053 					 int *valp,
2054 					 int write, void *data)
2055 {
2056 	if (write) {
2057 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2058 	} else {
2059 		int val = *valp;
2060 		unsigned long lval;
2061 		if (val < 0) {
2062 			*negp = -1;
2063 			lval = (unsigned long)-val;
2064 		} else {
2065 			*negp = 0;
2066 			lval = (unsigned long)val;
2067 		}
2068 		*lvalp = lval / HZ;
2069 	}
2070 	return 0;
2071 }
2072 
2073 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2074 						int *valp,
2075 						int write, void *data)
2076 {
2077 	if (write) {
2078 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2079 	} else {
2080 		int val = *valp;
2081 		unsigned long lval;
2082 		if (val < 0) {
2083 			*negp = -1;
2084 			lval = (unsigned long)-val;
2085 		} else {
2086 			*negp = 0;
2087 			lval = (unsigned long)val;
2088 		}
2089 		*lvalp = jiffies_to_clock_t(lval);
2090 	}
2091 	return 0;
2092 }
2093 
2094 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2095 					    int *valp,
2096 					    int write, void *data)
2097 {
2098 	if (write) {
2099 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2100 	} else {
2101 		int val = *valp;
2102 		unsigned long lval;
2103 		if (val < 0) {
2104 			*negp = -1;
2105 			lval = (unsigned long)-val;
2106 		} else {
2107 			*negp = 0;
2108 			lval = (unsigned long)val;
2109 		}
2110 		*lvalp = jiffies_to_msecs(lval);
2111 	}
2112 	return 0;
2113 }
2114 
2115 /**
2116  * proc_dointvec_jiffies - read a vector of integers as seconds
2117  * @table: the sysctl table
2118  * @write: %TRUE if this is a write to the sysctl file
2119  * @filp: the file structure
2120  * @buffer: the user buffer
2121  * @lenp: the size of the user buffer
2122  * @ppos: file position
2123  *
2124  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2125  * values from/to the user buffer, treated as an ASCII string.
2126  * The values read are assumed to be in seconds, and are converted into
2127  * jiffies.
2128  *
2129  * Returns 0 on success.
2130  */
2131 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2132 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2133 {
2134     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2135 		    	    do_proc_dointvec_jiffies_conv,NULL);
2136 }
2137 
2138 /**
2139  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2140  * @table: the sysctl table
2141  * @write: %TRUE if this is a write to the sysctl file
2142  * @filp: the file structure
2143  * @buffer: the user buffer
2144  * @lenp: the size of the user buffer
2145  * @ppos: pointer to the file position
2146  *
2147  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2148  * values from/to the user buffer, treated as an ASCII string.
2149  * The values read are assumed to be in 1/USER_HZ seconds, and
2150  * are converted into jiffies.
2151  *
2152  * Returns 0 on success.
2153  */
2154 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2155 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2156 {
2157     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2158 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2159 }
2160 
2161 /**
2162  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2163  * @table: the sysctl table
2164  * @write: %TRUE if this is a write to the sysctl file
2165  * @filp: the file structure
2166  * @buffer: the user buffer
2167  * @lenp: the size of the user buffer
2168  * @ppos: file position
2169  * @ppos: the current position in the file
2170  *
2171  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2172  * values from/to the user buffer, treated as an ASCII string.
2173  * The values read are assumed to be in 1/1000 seconds, and
2174  * are converted into jiffies.
2175  *
2176  * Returns 0 on success.
2177  */
2178 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2179 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2180 {
2181 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2182 				do_proc_dointvec_ms_jiffies_conv, NULL);
2183 }
2184 
2185 #else /* CONFIG_PROC_FS */
2186 
2187 int proc_dostring(ctl_table *table, int write, struct file *filp,
2188 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2189 {
2190 	return -ENOSYS;
2191 }
2192 
2193 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
2194 			    void __user *buffer, size_t *lenp, loff_t *ppos)
2195 {
2196 	return -ENOSYS;
2197 }
2198 
2199 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2200 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2201 {
2202 	return -ENOSYS;
2203 }
2204 
2205 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2206 			void __user *buffer, size_t *lenp, loff_t *ppos)
2207 {
2208 	return -ENOSYS;
2209 }
2210 
2211 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2212 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2213 {
2214 	return -ENOSYS;
2215 }
2216 
2217 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2218 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2219 {
2220 	return -ENOSYS;
2221 }
2222 
2223 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2224 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2225 {
2226 	return -ENOSYS;
2227 }
2228 
2229 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2230 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2231 {
2232 	return -ENOSYS;
2233 }
2234 
2235 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2236 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2237 {
2238 	return -ENOSYS;
2239 }
2240 
2241 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2242 				      struct file *filp,
2243 				      void __user *buffer,
2244 				      size_t *lenp, loff_t *ppos)
2245 {
2246     return -ENOSYS;
2247 }
2248 
2249 
2250 #endif /* CONFIG_PROC_FS */
2251 
2252 
2253 /*
2254  * General sysctl support routines
2255  */
2256 
2257 /* The generic string strategy routine: */
2258 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2259 		  void __user *oldval, size_t __user *oldlenp,
2260 		  void __user *newval, size_t newlen, void **context)
2261 {
2262 	if (!table->data || !table->maxlen)
2263 		return -ENOTDIR;
2264 
2265 	if (oldval && oldlenp) {
2266 		size_t bufsize;
2267 		if (get_user(bufsize, oldlenp))
2268 			return -EFAULT;
2269 		if (bufsize) {
2270 			size_t len = strlen(table->data), copied;
2271 
2272 			/* This shouldn't trigger for a well-formed sysctl */
2273 			if (len > table->maxlen)
2274 				len = table->maxlen;
2275 
2276 			/* Copy up to a max of bufsize-1 bytes of the string */
2277 			copied = (len >= bufsize) ? bufsize - 1 : len;
2278 
2279 			if (copy_to_user(oldval, table->data, copied) ||
2280 			    put_user(0, (char __user *)(oldval + copied)))
2281 				return -EFAULT;
2282 			if (put_user(len, oldlenp))
2283 				return -EFAULT;
2284 		}
2285 	}
2286 	if (newval && newlen) {
2287 		size_t len = newlen;
2288 		if (len > table->maxlen)
2289 			len = table->maxlen;
2290 		if(copy_from_user(table->data, newval, len))
2291 			return -EFAULT;
2292 		if (len == table->maxlen)
2293 			len--;
2294 		((char *) table->data)[len] = 0;
2295 	}
2296 	return 1;
2297 }
2298 
2299 /*
2300  * This function makes sure that all of the integers in the vector
2301  * are between the minimum and maximum values given in the arrays
2302  * table->extra1 and table->extra2, respectively.
2303  */
2304 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2305 		void __user *oldval, size_t __user *oldlenp,
2306 		void __user *newval, size_t newlen, void **context)
2307 {
2308 
2309 	if (newval && newlen) {
2310 		int __user *vec = (int __user *) newval;
2311 		int *min = (int *) table->extra1;
2312 		int *max = (int *) table->extra2;
2313 		size_t length;
2314 		int i;
2315 
2316 		if (newlen % sizeof(int) != 0)
2317 			return -EINVAL;
2318 
2319 		if (!table->extra1 && !table->extra2)
2320 			return 0;
2321 
2322 		if (newlen > table->maxlen)
2323 			newlen = table->maxlen;
2324 		length = newlen / sizeof(int);
2325 
2326 		for (i = 0; i < length; i++) {
2327 			int value;
2328 			if (get_user(value, vec + i))
2329 				return -EFAULT;
2330 			if (min && value < min[i])
2331 				return -EINVAL;
2332 			if (max && value > max[i])
2333 				return -EINVAL;
2334 		}
2335 	}
2336 	return 0;
2337 }
2338 
2339 /* Strategy function to convert jiffies to seconds */
2340 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2341 		void __user *oldval, size_t __user *oldlenp,
2342 		void __user *newval, size_t newlen, void **context)
2343 {
2344 	if (oldval) {
2345 		size_t olen;
2346 		if (oldlenp) {
2347 			if (get_user(olen, oldlenp))
2348 				return -EFAULT;
2349 			if (olen!=sizeof(int))
2350 				return -EINVAL;
2351 		}
2352 		if (put_user(*(int *)(table->data)/HZ, (int __user *)oldval) ||
2353 		    (oldlenp && put_user(sizeof(int),oldlenp)))
2354 			return -EFAULT;
2355 	}
2356 	if (newval && newlen) {
2357 		int new;
2358 		if (newlen != sizeof(int))
2359 			return -EINVAL;
2360 		if (get_user(new, (int __user *)newval))
2361 			return -EFAULT;
2362 		*(int *)(table->data) = new*HZ;
2363 	}
2364 	return 1;
2365 }
2366 
2367 /* Strategy function to convert jiffies to seconds */
2368 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2369 		void __user *oldval, size_t __user *oldlenp,
2370 		void __user *newval, size_t newlen, void **context)
2371 {
2372 	if (oldval) {
2373 		size_t olen;
2374 		if (oldlenp) {
2375 			if (get_user(olen, oldlenp))
2376 				return -EFAULT;
2377 			if (olen!=sizeof(int))
2378 				return -EINVAL;
2379 		}
2380 		if (put_user(jiffies_to_msecs(*(int *)(table->data)), (int __user *)oldval) ||
2381 		    (oldlenp && put_user(sizeof(int),oldlenp)))
2382 			return -EFAULT;
2383 	}
2384 	if (newval && newlen) {
2385 		int new;
2386 		if (newlen != sizeof(int))
2387 			return -EINVAL;
2388 		if (get_user(new, (int __user *)newval))
2389 			return -EFAULT;
2390 		*(int *)(table->data) = msecs_to_jiffies(new);
2391 	}
2392 	return 1;
2393 }
2394 
2395 #else /* CONFIG_SYSCTL */
2396 
2397 
2398 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2399 {
2400 	return -ENOSYS;
2401 }
2402 
2403 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2404 		  void __user *oldval, size_t __user *oldlenp,
2405 		  void __user *newval, size_t newlen, void **context)
2406 {
2407 	return -ENOSYS;
2408 }
2409 
2410 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2411 		void __user *oldval, size_t __user *oldlenp,
2412 		void __user *newval, size_t newlen, void **context)
2413 {
2414 	return -ENOSYS;
2415 }
2416 
2417 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2418 		void __user *oldval, size_t __user *oldlenp,
2419 		void __user *newval, size_t newlen, void **context)
2420 {
2421 	return -ENOSYS;
2422 }
2423 
2424 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2425 		void __user *oldval, size_t __user *oldlenp,
2426 		void __user *newval, size_t newlen, void **context)
2427 {
2428 	return -ENOSYS;
2429 }
2430 
2431 int proc_dostring(ctl_table *table, int write, struct file *filp,
2432 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2433 {
2434 	return -ENOSYS;
2435 }
2436 
2437 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2438 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2439 {
2440 	return -ENOSYS;
2441 }
2442 
2443 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2444 			void __user *buffer, size_t *lenp, loff_t *ppos)
2445 {
2446 	return -ENOSYS;
2447 }
2448 
2449 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2450 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2451 {
2452 	return -ENOSYS;
2453 }
2454 
2455 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2456 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2457 {
2458 	return -ENOSYS;
2459 }
2460 
2461 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2462 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2463 {
2464 	return -ENOSYS;
2465 }
2466 
2467 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2468 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2469 {
2470 	return -ENOSYS;
2471 }
2472 
2473 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2474 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2475 {
2476 	return -ENOSYS;
2477 }
2478 
2479 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2480 				      struct file *filp,
2481 				      void __user *buffer,
2482 				      size_t *lenp, loff_t *ppos)
2483 {
2484     return -ENOSYS;
2485 }
2486 
2487 struct ctl_table_header * register_sysctl_table(ctl_table * table,
2488 						int insert_at_head)
2489 {
2490 	return NULL;
2491 }
2492 
2493 void unregister_sysctl_table(struct ctl_table_header * table)
2494 {
2495 }
2496 
2497 #endif /* CONFIG_SYSCTL */
2498 
2499 /*
2500  * No sense putting this after each symbol definition, twice,
2501  * exception granted :-)
2502  */
2503 EXPORT_SYMBOL(proc_dointvec);
2504 EXPORT_SYMBOL(proc_dointvec_jiffies);
2505 EXPORT_SYMBOL(proc_dointvec_minmax);
2506 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2507 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2508 EXPORT_SYMBOL(proc_dostring);
2509 EXPORT_SYMBOL(proc_doulongvec_minmax);
2510 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2511 EXPORT_SYMBOL(register_sysctl_table);
2512 EXPORT_SYMBOL(sysctl_intvec);
2513 EXPORT_SYMBOL(sysctl_jiffies);
2514 EXPORT_SYMBOL(sysctl_ms_jiffies);
2515 EXPORT_SYMBOL(sysctl_string);
2516 EXPORT_SYMBOL(unregister_sysctl_table);
2517