xref: /linux/kernel/sysctl.c (revision 5e8d780d745c1619aba81fe7166c5a4b5cad2b84)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/config.h>
22 #include <linux/module.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/proc_fs.h>
28 #include <linux/capability.h>
29 #include <linux/ctype.h>
30 #include <linux/utsname.h>
31 #include <linux/capability.h>
32 #include <linux/smp_lock.h>
33 #include <linux/init.h>
34 #include <linux/kernel.h>
35 #include <linux/kobject.h>
36 #include <linux/net.h>
37 #include <linux/sysrq.h>
38 #include <linux/highuid.h>
39 #include <linux/writeback.h>
40 #include <linux/hugetlb.h>
41 #include <linux/security.h>
42 #include <linux/initrd.h>
43 #include <linux/times.h>
44 #include <linux/limits.h>
45 #include <linux/dcache.h>
46 #include <linux/syscalls.h>
47 #include <linux/nfs_fs.h>
48 #include <linux/acpi.h>
49 
50 #include <asm/uaccess.h>
51 #include <asm/processor.h>
52 
53 extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
54                      void __user *buffer, size_t *lenp, loff_t *ppos);
55 
56 #if defined(CONFIG_SYSCTL)
57 
58 /* External variables not in a header file. */
59 extern int C_A_D;
60 extern int sysctl_overcommit_memory;
61 extern int sysctl_overcommit_ratio;
62 extern int sysctl_panic_on_oom;
63 extern int max_threads;
64 extern int sysrq_enabled;
65 extern int core_uses_pid;
66 extern int suid_dumpable;
67 extern char core_pattern[];
68 extern int cad_pid;
69 extern int pid_max;
70 extern int min_free_kbytes;
71 extern int printk_ratelimit_jiffies;
72 extern int printk_ratelimit_burst;
73 extern int pid_max_min, pid_max_max;
74 extern int sysctl_drop_caches;
75 extern int percpu_pagelist_fraction;
76 extern int compat_log;
77 
78 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
79 int unknown_nmi_panic;
80 extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
81 				  void __user *, size_t *, loff_t *);
82 #endif
83 
84 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
85 static int maxolduid = 65535;
86 static int minolduid;
87 static int min_percpu_pagelist_fract = 8;
88 
89 static int ngroups_max = NGROUPS_MAX;
90 
91 #ifdef CONFIG_KMOD
92 extern char modprobe_path[];
93 #endif
94 #ifdef CONFIG_CHR_DEV_SG
95 extern int sg_big_buff;
96 #endif
97 #ifdef CONFIG_SYSVIPC
98 extern size_t shm_ctlmax;
99 extern size_t shm_ctlall;
100 extern int shm_ctlmni;
101 extern int msg_ctlmax;
102 extern int msg_ctlmnb;
103 extern int msg_ctlmni;
104 extern int sem_ctls[];
105 #endif
106 
107 #ifdef __sparc__
108 extern char reboot_command [];
109 extern int stop_a_enabled;
110 extern int scons_pwroff;
111 #endif
112 
113 #ifdef __hppa__
114 extern int pwrsw_enabled;
115 extern int unaligned_enabled;
116 #endif
117 
118 #ifdef CONFIG_S390
119 #ifdef CONFIG_MATHEMU
120 extern int sysctl_ieee_emulation_warnings;
121 #endif
122 extern int sysctl_userprocess_debug;
123 extern int spin_retry;
124 #endif
125 
126 extern int sysctl_hz_timer;
127 
128 #ifdef CONFIG_BSD_PROCESS_ACCT
129 extern int acct_parm[];
130 #endif
131 
132 #ifdef CONFIG_IA64
133 extern int no_unaligned_warning;
134 #endif
135 
136 #ifdef CONFIG_RT_MUTEXES
137 extern int max_lock_depth;
138 #endif
139 
140 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
141 		       ctl_table *, void **);
142 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
143 		  void __user *buffer, size_t *lenp, loff_t *ppos);
144 
145 static ctl_table root_table[];
146 static struct ctl_table_header root_table_header =
147 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
148 
149 static ctl_table kern_table[];
150 static ctl_table vm_table[];
151 static ctl_table fs_table[];
152 static ctl_table debug_table[];
153 static ctl_table dev_table[];
154 extern ctl_table random_table[];
155 #ifdef CONFIG_UNIX98_PTYS
156 extern ctl_table pty_table[];
157 #endif
158 #ifdef CONFIG_INOTIFY_USER
159 extern ctl_table inotify_table[];
160 #endif
161 
162 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
163 int sysctl_legacy_va_layout;
164 #endif
165 
166 /* /proc declarations: */
167 
168 #ifdef CONFIG_PROC_FS
169 
170 static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *);
171 static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *);
172 static int proc_opensys(struct inode *, struct file *);
173 
174 struct file_operations proc_sys_file_operations = {
175 	.open		= proc_opensys,
176 	.read		= proc_readsys,
177 	.write		= proc_writesys,
178 };
179 
180 extern struct proc_dir_entry *proc_sys_root;
181 
182 static void register_proc_table(ctl_table *, struct proc_dir_entry *, void *);
183 static void unregister_proc_table(ctl_table *, struct proc_dir_entry *);
184 #endif
185 
186 /* The default sysctl tables: */
187 
188 static ctl_table root_table[] = {
189 	{
190 		.ctl_name	= CTL_KERN,
191 		.procname	= "kernel",
192 		.mode		= 0555,
193 		.child		= kern_table,
194 	},
195 	{
196 		.ctl_name	= CTL_VM,
197 		.procname	= "vm",
198 		.mode		= 0555,
199 		.child		= vm_table,
200 	},
201 #ifdef CONFIG_NET
202 	{
203 		.ctl_name	= CTL_NET,
204 		.procname	= "net",
205 		.mode		= 0555,
206 		.child		= net_table,
207 	},
208 #endif
209 	{
210 		.ctl_name	= CTL_FS,
211 		.procname	= "fs",
212 		.mode		= 0555,
213 		.child		= fs_table,
214 	},
215 	{
216 		.ctl_name	= CTL_DEBUG,
217 		.procname	= "debug",
218 		.mode		= 0555,
219 		.child		= debug_table,
220 	},
221 	{
222 		.ctl_name	= CTL_DEV,
223 		.procname	= "dev",
224 		.mode		= 0555,
225 		.child		= dev_table,
226 	},
227 
228 	{ .ctl_name = 0 }
229 };
230 
231 static ctl_table kern_table[] = {
232 	{
233 		.ctl_name	= KERN_OSTYPE,
234 		.procname	= "ostype",
235 		.data		= system_utsname.sysname,
236 		.maxlen		= sizeof(system_utsname.sysname),
237 		.mode		= 0444,
238 		.proc_handler	= &proc_doutsstring,
239 		.strategy	= &sysctl_string,
240 	},
241 	{
242 		.ctl_name	= KERN_OSRELEASE,
243 		.procname	= "osrelease",
244 		.data		= system_utsname.release,
245 		.maxlen		= sizeof(system_utsname.release),
246 		.mode		= 0444,
247 		.proc_handler	= &proc_doutsstring,
248 		.strategy	= &sysctl_string,
249 	},
250 	{
251 		.ctl_name	= KERN_VERSION,
252 		.procname	= "version",
253 		.data		= system_utsname.version,
254 		.maxlen		= sizeof(system_utsname.version),
255 		.mode		= 0444,
256 		.proc_handler	= &proc_doutsstring,
257 		.strategy	= &sysctl_string,
258 	},
259 	{
260 		.ctl_name	= KERN_NODENAME,
261 		.procname	= "hostname",
262 		.data		= system_utsname.nodename,
263 		.maxlen		= sizeof(system_utsname.nodename),
264 		.mode		= 0644,
265 		.proc_handler	= &proc_doutsstring,
266 		.strategy	= &sysctl_string,
267 	},
268 	{
269 		.ctl_name	= KERN_DOMAINNAME,
270 		.procname	= "domainname",
271 		.data		= system_utsname.domainname,
272 		.maxlen		= sizeof(system_utsname.domainname),
273 		.mode		= 0644,
274 		.proc_handler	= &proc_doutsstring,
275 		.strategy	= &sysctl_string,
276 	},
277 	{
278 		.ctl_name	= KERN_PANIC,
279 		.procname	= "panic",
280 		.data		= &panic_timeout,
281 		.maxlen		= sizeof(int),
282 		.mode		= 0644,
283 		.proc_handler	= &proc_dointvec,
284 	},
285 	{
286 		.ctl_name	= KERN_CORE_USES_PID,
287 		.procname	= "core_uses_pid",
288 		.data		= &core_uses_pid,
289 		.maxlen		= sizeof(int),
290 		.mode		= 0644,
291 		.proc_handler	= &proc_dointvec,
292 	},
293 	{
294 		.ctl_name	= KERN_CORE_PATTERN,
295 		.procname	= "core_pattern",
296 		.data		= core_pattern,
297 		.maxlen		= 64,
298 		.mode		= 0644,
299 		.proc_handler	= &proc_dostring,
300 		.strategy	= &sysctl_string,
301 	},
302 	{
303 		.ctl_name	= KERN_TAINTED,
304 		.procname	= "tainted",
305 		.data		= &tainted,
306 		.maxlen		= sizeof(int),
307 		.mode		= 0444,
308 		.proc_handler	= &proc_dointvec,
309 	},
310 	{
311 		.ctl_name	= KERN_CAP_BSET,
312 		.procname	= "cap-bound",
313 		.data		= &cap_bset,
314 		.maxlen		= sizeof(kernel_cap_t),
315 		.mode		= 0600,
316 		.proc_handler	= &proc_dointvec_bset,
317 	},
318 #ifdef CONFIG_BLK_DEV_INITRD
319 	{
320 		.ctl_name	= KERN_REALROOTDEV,
321 		.procname	= "real-root-dev",
322 		.data		= &real_root_dev,
323 		.maxlen		= sizeof(int),
324 		.mode		= 0644,
325 		.proc_handler	= &proc_dointvec,
326 	},
327 #endif
328 #ifdef __sparc__
329 	{
330 		.ctl_name	= KERN_SPARC_REBOOT,
331 		.procname	= "reboot-cmd",
332 		.data		= reboot_command,
333 		.maxlen		= 256,
334 		.mode		= 0644,
335 		.proc_handler	= &proc_dostring,
336 		.strategy	= &sysctl_string,
337 	},
338 	{
339 		.ctl_name	= KERN_SPARC_STOP_A,
340 		.procname	= "stop-a",
341 		.data		= &stop_a_enabled,
342 		.maxlen		= sizeof (int),
343 		.mode		= 0644,
344 		.proc_handler	= &proc_dointvec,
345 	},
346 	{
347 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
348 		.procname	= "scons-poweroff",
349 		.data		= &scons_pwroff,
350 		.maxlen		= sizeof (int),
351 		.mode		= 0644,
352 		.proc_handler	= &proc_dointvec,
353 	},
354 #endif
355 #ifdef __hppa__
356 	{
357 		.ctl_name	= KERN_HPPA_PWRSW,
358 		.procname	= "soft-power",
359 		.data		= &pwrsw_enabled,
360 		.maxlen		= sizeof (int),
361 	 	.mode		= 0644,
362 		.proc_handler	= &proc_dointvec,
363 	},
364 	{
365 		.ctl_name	= KERN_HPPA_UNALIGNED,
366 		.procname	= "unaligned-trap",
367 		.data		= &unaligned_enabled,
368 		.maxlen		= sizeof (int),
369 		.mode		= 0644,
370 		.proc_handler	= &proc_dointvec,
371 	},
372 #endif
373 	{
374 		.ctl_name	= KERN_CTLALTDEL,
375 		.procname	= "ctrl-alt-del",
376 		.data		= &C_A_D,
377 		.maxlen		= sizeof(int),
378 		.mode		= 0644,
379 		.proc_handler	= &proc_dointvec,
380 	},
381 	{
382 		.ctl_name	= KERN_PRINTK,
383 		.procname	= "printk",
384 		.data		= &console_loglevel,
385 		.maxlen		= 4*sizeof(int),
386 		.mode		= 0644,
387 		.proc_handler	= &proc_dointvec,
388 	},
389 #ifdef CONFIG_KMOD
390 	{
391 		.ctl_name	= KERN_MODPROBE,
392 		.procname	= "modprobe",
393 		.data		= &modprobe_path,
394 		.maxlen		= KMOD_PATH_LEN,
395 		.mode		= 0644,
396 		.proc_handler	= &proc_dostring,
397 		.strategy	= &sysctl_string,
398 	},
399 #endif
400 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
401 	{
402 		.ctl_name	= KERN_HOTPLUG,
403 		.procname	= "hotplug",
404 		.data		= &uevent_helper,
405 		.maxlen		= UEVENT_HELPER_PATH_LEN,
406 		.mode		= 0644,
407 		.proc_handler	= &proc_dostring,
408 		.strategy	= &sysctl_string,
409 	},
410 #endif
411 #ifdef CONFIG_CHR_DEV_SG
412 	{
413 		.ctl_name	= KERN_SG_BIG_BUFF,
414 		.procname	= "sg-big-buff",
415 		.data		= &sg_big_buff,
416 		.maxlen		= sizeof (int),
417 		.mode		= 0444,
418 		.proc_handler	= &proc_dointvec,
419 	},
420 #endif
421 #ifdef CONFIG_BSD_PROCESS_ACCT
422 	{
423 		.ctl_name	= KERN_ACCT,
424 		.procname	= "acct",
425 		.data		= &acct_parm,
426 		.maxlen		= 3*sizeof(int),
427 		.mode		= 0644,
428 		.proc_handler	= &proc_dointvec,
429 	},
430 #endif
431 #ifdef CONFIG_SYSVIPC
432 	{
433 		.ctl_name	= KERN_SHMMAX,
434 		.procname	= "shmmax",
435 		.data		= &shm_ctlmax,
436 		.maxlen		= sizeof (size_t),
437 		.mode		= 0644,
438 		.proc_handler	= &proc_doulongvec_minmax,
439 	},
440 	{
441 		.ctl_name	= KERN_SHMALL,
442 		.procname	= "shmall",
443 		.data		= &shm_ctlall,
444 		.maxlen		= sizeof (size_t),
445 		.mode		= 0644,
446 		.proc_handler	= &proc_doulongvec_minmax,
447 	},
448 	{
449 		.ctl_name	= KERN_SHMMNI,
450 		.procname	= "shmmni",
451 		.data		= &shm_ctlmni,
452 		.maxlen		= sizeof (int),
453 		.mode		= 0644,
454 		.proc_handler	= &proc_dointvec,
455 	},
456 	{
457 		.ctl_name	= KERN_MSGMAX,
458 		.procname	= "msgmax",
459 		.data		= &msg_ctlmax,
460 		.maxlen		= sizeof (int),
461 		.mode		= 0644,
462 		.proc_handler	= &proc_dointvec,
463 	},
464 	{
465 		.ctl_name	= KERN_MSGMNI,
466 		.procname	= "msgmni",
467 		.data		= &msg_ctlmni,
468 		.maxlen		= sizeof (int),
469 		.mode		= 0644,
470 		.proc_handler	= &proc_dointvec,
471 	},
472 	{
473 		.ctl_name	= KERN_MSGMNB,
474 		.procname	=  "msgmnb",
475 		.data		= &msg_ctlmnb,
476 		.maxlen		= sizeof (int),
477 		.mode		= 0644,
478 		.proc_handler	= &proc_dointvec,
479 	},
480 	{
481 		.ctl_name	= KERN_SEM,
482 		.procname	= "sem",
483 		.data		= &sem_ctls,
484 		.maxlen		= 4*sizeof (int),
485 		.mode		= 0644,
486 		.proc_handler	= &proc_dointvec,
487 	},
488 #endif
489 #ifdef CONFIG_MAGIC_SYSRQ
490 	{
491 		.ctl_name	= KERN_SYSRQ,
492 		.procname	= "sysrq",
493 		.data		= &sysrq_enabled,
494 		.maxlen		= sizeof (int),
495 		.mode		= 0644,
496 		.proc_handler	= &proc_dointvec,
497 	},
498 #endif
499 	{
500 		.ctl_name	= KERN_CADPID,
501 		.procname	= "cad_pid",
502 		.data		= &cad_pid,
503 		.maxlen		= sizeof (int),
504 		.mode		= 0600,
505 		.proc_handler	= &proc_dointvec,
506 	},
507 	{
508 		.ctl_name	= KERN_MAX_THREADS,
509 		.procname	= "threads-max",
510 		.data		= &max_threads,
511 		.maxlen		= sizeof(int),
512 		.mode		= 0644,
513 		.proc_handler	= &proc_dointvec,
514 	},
515 	{
516 		.ctl_name	= KERN_RANDOM,
517 		.procname	= "random",
518 		.mode		= 0555,
519 		.child		= random_table,
520 	},
521 #ifdef CONFIG_UNIX98_PTYS
522 	{
523 		.ctl_name	= KERN_PTY,
524 		.procname	= "pty",
525 		.mode		= 0555,
526 		.child		= pty_table,
527 	},
528 #endif
529 	{
530 		.ctl_name	= KERN_OVERFLOWUID,
531 		.procname	= "overflowuid",
532 		.data		= &overflowuid,
533 		.maxlen		= sizeof(int),
534 		.mode		= 0644,
535 		.proc_handler	= &proc_dointvec_minmax,
536 		.strategy	= &sysctl_intvec,
537 		.extra1		= &minolduid,
538 		.extra2		= &maxolduid,
539 	},
540 	{
541 		.ctl_name	= KERN_OVERFLOWGID,
542 		.procname	= "overflowgid",
543 		.data		= &overflowgid,
544 		.maxlen		= sizeof(int),
545 		.mode		= 0644,
546 		.proc_handler	= &proc_dointvec_minmax,
547 		.strategy	= &sysctl_intvec,
548 		.extra1		= &minolduid,
549 		.extra2		= &maxolduid,
550 	},
551 #ifdef CONFIG_S390
552 #ifdef CONFIG_MATHEMU
553 	{
554 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
555 		.procname	= "ieee_emulation_warnings",
556 		.data		= &sysctl_ieee_emulation_warnings,
557 		.maxlen		= sizeof(int),
558 		.mode		= 0644,
559 		.proc_handler	= &proc_dointvec,
560 	},
561 #endif
562 #ifdef CONFIG_NO_IDLE_HZ
563 	{
564 		.ctl_name       = KERN_HZ_TIMER,
565 		.procname       = "hz_timer",
566 		.data           = &sysctl_hz_timer,
567 		.maxlen         = sizeof(int),
568 		.mode           = 0644,
569 		.proc_handler   = &proc_dointvec,
570 	},
571 #endif
572 	{
573 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
574 		.procname	= "userprocess_debug",
575 		.data		= &sysctl_userprocess_debug,
576 		.maxlen		= sizeof(int),
577 		.mode		= 0644,
578 		.proc_handler	= &proc_dointvec,
579 	},
580 #endif
581 	{
582 		.ctl_name	= KERN_PIDMAX,
583 		.procname	= "pid_max",
584 		.data		= &pid_max,
585 		.maxlen		= sizeof (int),
586 		.mode		= 0644,
587 		.proc_handler	= &proc_dointvec_minmax,
588 		.strategy	= sysctl_intvec,
589 		.extra1		= &pid_max_min,
590 		.extra2		= &pid_max_max,
591 	},
592 	{
593 		.ctl_name	= KERN_PANIC_ON_OOPS,
594 		.procname	= "panic_on_oops",
595 		.data		= &panic_on_oops,
596 		.maxlen		= sizeof(int),
597 		.mode		= 0644,
598 		.proc_handler	= &proc_dointvec,
599 	},
600 	{
601 		.ctl_name	= KERN_PRINTK_RATELIMIT,
602 		.procname	= "printk_ratelimit",
603 		.data		= &printk_ratelimit_jiffies,
604 		.maxlen		= sizeof(int),
605 		.mode		= 0644,
606 		.proc_handler	= &proc_dointvec_jiffies,
607 		.strategy	= &sysctl_jiffies,
608 	},
609 	{
610 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
611 		.procname	= "printk_ratelimit_burst",
612 		.data		= &printk_ratelimit_burst,
613 		.maxlen		= sizeof(int),
614 		.mode		= 0644,
615 		.proc_handler	= &proc_dointvec,
616 	},
617 	{
618 		.ctl_name	= KERN_NGROUPS_MAX,
619 		.procname	= "ngroups_max",
620 		.data		= &ngroups_max,
621 		.maxlen		= sizeof (int),
622 		.mode		= 0444,
623 		.proc_handler	= &proc_dointvec,
624 	},
625 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
626 	{
627 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
628 		.procname       = "unknown_nmi_panic",
629 		.data           = &unknown_nmi_panic,
630 		.maxlen         = sizeof (int),
631 		.mode           = 0644,
632 		.proc_handler   = &proc_unknown_nmi_panic,
633 	},
634 #endif
635 #if defined(CONFIG_X86)
636 	{
637 		.ctl_name	= KERN_BOOTLOADER_TYPE,
638 		.procname	= "bootloader_type",
639 		.data		= &bootloader_type,
640 		.maxlen		= sizeof (int),
641 		.mode		= 0444,
642 		.proc_handler	= &proc_dointvec,
643 	},
644 #endif
645 #if defined(CONFIG_MMU)
646 	{
647 		.ctl_name	= KERN_RANDOMIZE,
648 		.procname	= "randomize_va_space",
649 		.data		= &randomize_va_space,
650 		.maxlen		= sizeof(int),
651 		.mode		= 0644,
652 		.proc_handler	= &proc_dointvec,
653 	},
654 #endif
655 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
656 	{
657 		.ctl_name	= KERN_SPIN_RETRY,
658 		.procname	= "spin_retry",
659 		.data		= &spin_retry,
660 		.maxlen		= sizeof (int),
661 		.mode		= 0644,
662 		.proc_handler	= &proc_dointvec,
663 	},
664 #endif
665 #ifdef CONFIG_ACPI_SLEEP
666 	{
667 		.ctl_name	= KERN_ACPI_VIDEO_FLAGS,
668 		.procname	= "acpi_video_flags",
669 		.data		= &acpi_video_flags,
670 		.maxlen		= sizeof (unsigned long),
671 		.mode		= 0644,
672 		.proc_handler	= &proc_doulongvec_minmax,
673 	},
674 #endif
675 #ifdef CONFIG_IA64
676 	{
677 		.ctl_name	= KERN_IA64_UNALIGNED,
678 		.procname	= "ignore-unaligned-usertrap",
679 		.data		= &no_unaligned_warning,
680 		.maxlen		= sizeof (int),
681 	 	.mode		= 0644,
682 		.proc_handler	= &proc_dointvec,
683 	},
684 #endif
685 #ifdef CONFIG_COMPAT
686 	{
687 		.ctl_name	= KERN_COMPAT_LOG,
688 		.procname	= "compat-log",
689 		.data		= &compat_log,
690 		.maxlen		= sizeof (int),
691 	 	.mode		= 0644,
692 		.proc_handler	= &proc_dointvec,
693 	},
694 #endif
695 #ifdef CONFIG_RT_MUTEXES
696 	{
697 		.ctl_name	= KERN_MAX_LOCK_DEPTH,
698 		.procname	= "max_lock_depth",
699 		.data		= &max_lock_depth,
700 		.maxlen		= sizeof(int),
701 		.mode		= 0644,
702 		.proc_handler	= &proc_dointvec,
703 	},
704 #endif
705 
706 	{ .ctl_name = 0 }
707 };
708 
709 /* Constants for minimum and maximum testing in vm_table.
710    We use these as one-element integer vectors. */
711 static int zero;
712 static int one_hundred = 100;
713 
714 
715 static ctl_table vm_table[] = {
716 	{
717 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
718 		.procname	= "overcommit_memory",
719 		.data		= &sysctl_overcommit_memory,
720 		.maxlen		= sizeof(sysctl_overcommit_memory),
721 		.mode		= 0644,
722 		.proc_handler	= &proc_dointvec,
723 	},
724 	{
725 		.ctl_name	= VM_PANIC_ON_OOM,
726 		.procname	= "panic_on_oom",
727 		.data		= &sysctl_panic_on_oom,
728 		.maxlen		= sizeof(sysctl_panic_on_oom),
729 		.mode		= 0644,
730 		.proc_handler	= &proc_dointvec,
731 	},
732 	{
733 		.ctl_name	= VM_OVERCOMMIT_RATIO,
734 		.procname	= "overcommit_ratio",
735 		.data		= &sysctl_overcommit_ratio,
736 		.maxlen		= sizeof(sysctl_overcommit_ratio),
737 		.mode		= 0644,
738 		.proc_handler	= &proc_dointvec,
739 	},
740 	{
741 		.ctl_name	= VM_PAGE_CLUSTER,
742 		.procname	= "page-cluster",
743 		.data		= &page_cluster,
744 		.maxlen		= sizeof(int),
745 		.mode		= 0644,
746 		.proc_handler	= &proc_dointvec,
747 	},
748 	{
749 		.ctl_name	= VM_DIRTY_BACKGROUND,
750 		.procname	= "dirty_background_ratio",
751 		.data		= &dirty_background_ratio,
752 		.maxlen		= sizeof(dirty_background_ratio),
753 		.mode		= 0644,
754 		.proc_handler	= &proc_dointvec_minmax,
755 		.strategy	= &sysctl_intvec,
756 		.extra1		= &zero,
757 		.extra2		= &one_hundred,
758 	},
759 	{
760 		.ctl_name	= VM_DIRTY_RATIO,
761 		.procname	= "dirty_ratio",
762 		.data		= &vm_dirty_ratio,
763 		.maxlen		= sizeof(vm_dirty_ratio),
764 		.mode		= 0644,
765 		.proc_handler	= &proc_dointvec_minmax,
766 		.strategy	= &sysctl_intvec,
767 		.extra1		= &zero,
768 		.extra2		= &one_hundred,
769 	},
770 	{
771 		.ctl_name	= VM_DIRTY_WB_CS,
772 		.procname	= "dirty_writeback_centisecs",
773 		.data		= &dirty_writeback_interval,
774 		.maxlen		= sizeof(dirty_writeback_interval),
775 		.mode		= 0644,
776 		.proc_handler	= &dirty_writeback_centisecs_handler,
777 	},
778 	{
779 		.ctl_name	= VM_DIRTY_EXPIRE_CS,
780 		.procname	= "dirty_expire_centisecs",
781 		.data		= &dirty_expire_interval,
782 		.maxlen		= sizeof(dirty_expire_interval),
783 		.mode		= 0644,
784 		.proc_handler	= &proc_dointvec_userhz_jiffies,
785 	},
786 	{
787 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
788 		.procname	= "nr_pdflush_threads",
789 		.data		= &nr_pdflush_threads,
790 		.maxlen		= sizeof nr_pdflush_threads,
791 		.mode		= 0444 /* read-only*/,
792 		.proc_handler	= &proc_dointvec,
793 	},
794 	{
795 		.ctl_name	= VM_SWAPPINESS,
796 		.procname	= "swappiness",
797 		.data		= &vm_swappiness,
798 		.maxlen		= sizeof(vm_swappiness),
799 		.mode		= 0644,
800 		.proc_handler	= &proc_dointvec_minmax,
801 		.strategy	= &sysctl_intvec,
802 		.extra1		= &zero,
803 		.extra2		= &one_hundred,
804 	},
805 #ifdef CONFIG_HUGETLB_PAGE
806 	 {
807 		.ctl_name	= VM_HUGETLB_PAGES,
808 		.procname	= "nr_hugepages",
809 		.data		= &max_huge_pages,
810 		.maxlen		= sizeof(unsigned long),
811 		.mode		= 0644,
812 		.proc_handler	= &hugetlb_sysctl_handler,
813 		.extra1		= (void *)&hugetlb_zero,
814 		.extra2		= (void *)&hugetlb_infinity,
815 	 },
816 	 {
817 		.ctl_name	= VM_HUGETLB_GROUP,
818 		.procname	= "hugetlb_shm_group",
819 		.data		= &sysctl_hugetlb_shm_group,
820 		.maxlen		= sizeof(gid_t),
821 		.mode		= 0644,
822 		.proc_handler	= &proc_dointvec,
823 	 },
824 #endif
825 	{
826 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
827 		.procname	= "lowmem_reserve_ratio",
828 		.data		= &sysctl_lowmem_reserve_ratio,
829 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
830 		.mode		= 0644,
831 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
832 		.strategy	= &sysctl_intvec,
833 	},
834 	{
835 		.ctl_name	= VM_DROP_PAGECACHE,
836 		.procname	= "drop_caches",
837 		.data		= &sysctl_drop_caches,
838 		.maxlen		= sizeof(int),
839 		.mode		= 0644,
840 		.proc_handler	= drop_caches_sysctl_handler,
841 		.strategy	= &sysctl_intvec,
842 	},
843 	{
844 		.ctl_name	= VM_MIN_FREE_KBYTES,
845 		.procname	= "min_free_kbytes",
846 		.data		= &min_free_kbytes,
847 		.maxlen		= sizeof(min_free_kbytes),
848 		.mode		= 0644,
849 		.proc_handler	= &min_free_kbytes_sysctl_handler,
850 		.strategy	= &sysctl_intvec,
851 		.extra1		= &zero,
852 	},
853 	{
854 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
855 		.procname	= "percpu_pagelist_fraction",
856 		.data		= &percpu_pagelist_fraction,
857 		.maxlen		= sizeof(percpu_pagelist_fraction),
858 		.mode		= 0644,
859 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
860 		.strategy	= &sysctl_intvec,
861 		.extra1		= &min_percpu_pagelist_fract,
862 	},
863 #ifdef CONFIG_MMU
864 	{
865 		.ctl_name	= VM_MAX_MAP_COUNT,
866 		.procname	= "max_map_count",
867 		.data		= &sysctl_max_map_count,
868 		.maxlen		= sizeof(sysctl_max_map_count),
869 		.mode		= 0644,
870 		.proc_handler	= &proc_dointvec
871 	},
872 #endif
873 	{
874 		.ctl_name	= VM_LAPTOP_MODE,
875 		.procname	= "laptop_mode",
876 		.data		= &laptop_mode,
877 		.maxlen		= sizeof(laptop_mode),
878 		.mode		= 0644,
879 		.proc_handler	= &proc_dointvec_jiffies,
880 		.strategy	= &sysctl_jiffies,
881 	},
882 	{
883 		.ctl_name	= VM_BLOCK_DUMP,
884 		.procname	= "block_dump",
885 		.data		= &block_dump,
886 		.maxlen		= sizeof(block_dump),
887 		.mode		= 0644,
888 		.proc_handler	= &proc_dointvec,
889 		.strategy	= &sysctl_intvec,
890 		.extra1		= &zero,
891 	},
892 	{
893 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
894 		.procname	= "vfs_cache_pressure",
895 		.data		= &sysctl_vfs_cache_pressure,
896 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
897 		.mode		= 0644,
898 		.proc_handler	= &proc_dointvec,
899 		.strategy	= &sysctl_intvec,
900 		.extra1		= &zero,
901 	},
902 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
903 	{
904 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
905 		.procname	= "legacy_va_layout",
906 		.data		= &sysctl_legacy_va_layout,
907 		.maxlen		= sizeof(sysctl_legacy_va_layout),
908 		.mode		= 0644,
909 		.proc_handler	= &proc_dointvec,
910 		.strategy	= &sysctl_intvec,
911 		.extra1		= &zero,
912 	},
913 #endif
914 #ifdef CONFIG_SWAP
915 	{
916 		.ctl_name	= VM_SWAP_TOKEN_TIMEOUT,
917 		.procname	= "swap_token_timeout",
918 		.data		= &swap_token_default_timeout,
919 		.maxlen		= sizeof(swap_token_default_timeout),
920 		.mode		= 0644,
921 		.proc_handler	= &proc_dointvec_jiffies,
922 		.strategy	= &sysctl_jiffies,
923 	},
924 #endif
925 #ifdef CONFIG_NUMA
926 	{
927 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
928 		.procname	= "zone_reclaim_mode",
929 		.data		= &zone_reclaim_mode,
930 		.maxlen		= sizeof(zone_reclaim_mode),
931 		.mode		= 0644,
932 		.proc_handler	= &proc_dointvec,
933 		.strategy	= &sysctl_intvec,
934 		.extra1		= &zero,
935 	},
936 #endif
937 #ifdef CONFIG_X86_32
938 	{
939 		.ctl_name	= VM_VDSO_ENABLED,
940 		.procname	= "vdso_enabled",
941 		.data		= &vdso_enabled,
942 		.maxlen		= sizeof(vdso_enabled),
943 		.mode		= 0644,
944 		.proc_handler	= &proc_dointvec,
945 		.strategy	= &sysctl_intvec,
946 		.extra1		= &zero,
947 	},
948 #endif
949 	{ .ctl_name = 0 }
950 };
951 
952 static ctl_table fs_table[] = {
953 	{
954 		.ctl_name	= FS_NRINODE,
955 		.procname	= "inode-nr",
956 		.data		= &inodes_stat,
957 		.maxlen		= 2*sizeof(int),
958 		.mode		= 0444,
959 		.proc_handler	= &proc_dointvec,
960 	},
961 	{
962 		.ctl_name	= FS_STATINODE,
963 		.procname	= "inode-state",
964 		.data		= &inodes_stat,
965 		.maxlen		= 7*sizeof(int),
966 		.mode		= 0444,
967 		.proc_handler	= &proc_dointvec,
968 	},
969 	{
970 		.ctl_name	= FS_NRFILE,
971 		.procname	= "file-nr",
972 		.data		= &files_stat,
973 		.maxlen		= 3*sizeof(int),
974 		.mode		= 0444,
975 		.proc_handler	= &proc_nr_files,
976 	},
977 	{
978 		.ctl_name	= FS_MAXFILE,
979 		.procname	= "file-max",
980 		.data		= &files_stat.max_files,
981 		.maxlen		= sizeof(int),
982 		.mode		= 0644,
983 		.proc_handler	= &proc_dointvec,
984 	},
985 	{
986 		.ctl_name	= FS_DENTRY,
987 		.procname	= "dentry-state",
988 		.data		= &dentry_stat,
989 		.maxlen		= 6*sizeof(int),
990 		.mode		= 0444,
991 		.proc_handler	= &proc_dointvec,
992 	},
993 	{
994 		.ctl_name	= FS_OVERFLOWUID,
995 		.procname	= "overflowuid",
996 		.data		= &fs_overflowuid,
997 		.maxlen		= sizeof(int),
998 		.mode		= 0644,
999 		.proc_handler	= &proc_dointvec_minmax,
1000 		.strategy	= &sysctl_intvec,
1001 		.extra1		= &minolduid,
1002 		.extra2		= &maxolduid,
1003 	},
1004 	{
1005 		.ctl_name	= FS_OVERFLOWGID,
1006 		.procname	= "overflowgid",
1007 		.data		= &fs_overflowgid,
1008 		.maxlen		= sizeof(int),
1009 		.mode		= 0644,
1010 		.proc_handler	= &proc_dointvec_minmax,
1011 		.strategy	= &sysctl_intvec,
1012 		.extra1		= &minolduid,
1013 		.extra2		= &maxolduid,
1014 	},
1015 	{
1016 		.ctl_name	= FS_LEASES,
1017 		.procname	= "leases-enable",
1018 		.data		= &leases_enable,
1019 		.maxlen		= sizeof(int),
1020 		.mode		= 0644,
1021 		.proc_handler	= &proc_dointvec,
1022 	},
1023 #ifdef CONFIG_DNOTIFY
1024 	{
1025 		.ctl_name	= FS_DIR_NOTIFY,
1026 		.procname	= "dir-notify-enable",
1027 		.data		= &dir_notify_enable,
1028 		.maxlen		= sizeof(int),
1029 		.mode		= 0644,
1030 		.proc_handler	= &proc_dointvec,
1031 	},
1032 #endif
1033 #ifdef CONFIG_MMU
1034 	{
1035 		.ctl_name	= FS_LEASE_TIME,
1036 		.procname	= "lease-break-time",
1037 		.data		= &lease_break_time,
1038 		.maxlen		= sizeof(int),
1039 		.mode		= 0644,
1040 		.proc_handler	= &proc_dointvec,
1041 	},
1042 	{
1043 		.ctl_name	= FS_AIO_NR,
1044 		.procname	= "aio-nr",
1045 		.data		= &aio_nr,
1046 		.maxlen		= sizeof(aio_nr),
1047 		.mode		= 0444,
1048 		.proc_handler	= &proc_doulongvec_minmax,
1049 	},
1050 	{
1051 		.ctl_name	= FS_AIO_MAX_NR,
1052 		.procname	= "aio-max-nr",
1053 		.data		= &aio_max_nr,
1054 		.maxlen		= sizeof(aio_max_nr),
1055 		.mode		= 0644,
1056 		.proc_handler	= &proc_doulongvec_minmax,
1057 	},
1058 #ifdef CONFIG_INOTIFY_USER
1059 	{
1060 		.ctl_name	= FS_INOTIFY,
1061 		.procname	= "inotify",
1062 		.mode		= 0555,
1063 		.child		= inotify_table,
1064 	},
1065 #endif
1066 #endif
1067 	{
1068 		.ctl_name	= KERN_SETUID_DUMPABLE,
1069 		.procname	= "suid_dumpable",
1070 		.data		= &suid_dumpable,
1071 		.maxlen		= sizeof(int),
1072 		.mode		= 0644,
1073 		.proc_handler	= &proc_dointvec,
1074 	},
1075 	{ .ctl_name = 0 }
1076 };
1077 
1078 static ctl_table debug_table[] = {
1079 	{ .ctl_name = 0 }
1080 };
1081 
1082 static ctl_table dev_table[] = {
1083 	{ .ctl_name = 0 }
1084 };
1085 
1086 extern void init_irq_proc (void);
1087 
1088 static DEFINE_SPINLOCK(sysctl_lock);
1089 
1090 /* called under sysctl_lock */
1091 static int use_table(struct ctl_table_header *p)
1092 {
1093 	if (unlikely(p->unregistering))
1094 		return 0;
1095 	p->used++;
1096 	return 1;
1097 }
1098 
1099 /* called under sysctl_lock */
1100 static void unuse_table(struct ctl_table_header *p)
1101 {
1102 	if (!--p->used)
1103 		if (unlikely(p->unregistering))
1104 			complete(p->unregistering);
1105 }
1106 
1107 /* called under sysctl_lock, will reacquire if has to wait */
1108 static void start_unregistering(struct ctl_table_header *p)
1109 {
1110 	/*
1111 	 * if p->used is 0, nobody will ever touch that entry again;
1112 	 * we'll eliminate all paths to it before dropping sysctl_lock
1113 	 */
1114 	if (unlikely(p->used)) {
1115 		struct completion wait;
1116 		init_completion(&wait);
1117 		p->unregistering = &wait;
1118 		spin_unlock(&sysctl_lock);
1119 		wait_for_completion(&wait);
1120 		spin_lock(&sysctl_lock);
1121 	}
1122 	/*
1123 	 * do not remove from the list until nobody holds it; walking the
1124 	 * list in do_sysctl() relies on that.
1125 	 */
1126 	list_del_init(&p->ctl_entry);
1127 }
1128 
1129 void __init sysctl_init(void)
1130 {
1131 #ifdef CONFIG_PROC_FS
1132 	register_proc_table(root_table, proc_sys_root, &root_table_header);
1133 	init_irq_proc();
1134 #endif
1135 }
1136 
1137 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1138 	       void __user *newval, size_t newlen)
1139 {
1140 	struct list_head *tmp;
1141 	int error = -ENOTDIR;
1142 
1143 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1144 		return -ENOTDIR;
1145 	if (oldval) {
1146 		int old_len;
1147 		if (!oldlenp || get_user(old_len, oldlenp))
1148 			return -EFAULT;
1149 	}
1150 	spin_lock(&sysctl_lock);
1151 	tmp = &root_table_header.ctl_entry;
1152 	do {
1153 		struct ctl_table_header *head =
1154 			list_entry(tmp, struct ctl_table_header, ctl_entry);
1155 		void *context = NULL;
1156 
1157 		if (!use_table(head))
1158 			continue;
1159 
1160 		spin_unlock(&sysctl_lock);
1161 
1162 		error = parse_table(name, nlen, oldval, oldlenp,
1163 					newval, newlen, head->ctl_table,
1164 					&context);
1165 		kfree(context);
1166 
1167 		spin_lock(&sysctl_lock);
1168 		unuse_table(head);
1169 		if (error != -ENOTDIR)
1170 			break;
1171 	} while ((tmp = tmp->next) != &root_table_header.ctl_entry);
1172 	spin_unlock(&sysctl_lock);
1173 	return error;
1174 }
1175 
1176 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1177 {
1178 	struct __sysctl_args tmp;
1179 	int error;
1180 
1181 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1182 		return -EFAULT;
1183 
1184 	lock_kernel();
1185 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1186 			  tmp.newval, tmp.newlen);
1187 	unlock_kernel();
1188 	return error;
1189 }
1190 
1191 /*
1192  * ctl_perm does NOT grant the superuser all rights automatically, because
1193  * some sysctl variables are readonly even to root.
1194  */
1195 
1196 static int test_perm(int mode, int op)
1197 {
1198 	if (!current->euid)
1199 		mode >>= 6;
1200 	else if (in_egroup_p(0))
1201 		mode >>= 3;
1202 	if ((mode & op & 0007) == op)
1203 		return 0;
1204 	return -EACCES;
1205 }
1206 
1207 static inline int ctl_perm(ctl_table *table, int op)
1208 {
1209 	int error;
1210 	error = security_sysctl(table, op);
1211 	if (error)
1212 		return error;
1213 	return test_perm(table->mode, op);
1214 }
1215 
1216 static int parse_table(int __user *name, int nlen,
1217 		       void __user *oldval, size_t __user *oldlenp,
1218 		       void __user *newval, size_t newlen,
1219 		       ctl_table *table, void **context)
1220 {
1221 	int n;
1222 repeat:
1223 	if (!nlen)
1224 		return -ENOTDIR;
1225 	if (get_user(n, name))
1226 		return -EFAULT;
1227 	for ( ; table->ctl_name; table++) {
1228 		if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
1229 			int error;
1230 			if (table->child) {
1231 				if (ctl_perm(table, 001))
1232 					return -EPERM;
1233 				if (table->strategy) {
1234 					error = table->strategy(
1235 						table, name, nlen,
1236 						oldval, oldlenp,
1237 						newval, newlen, context);
1238 					if (error)
1239 						return error;
1240 				}
1241 				name++;
1242 				nlen--;
1243 				table = table->child;
1244 				goto repeat;
1245 			}
1246 			error = do_sysctl_strategy(table, name, nlen,
1247 						   oldval, oldlenp,
1248 						   newval, newlen, context);
1249 			return error;
1250 		}
1251 	}
1252 	return -ENOTDIR;
1253 }
1254 
1255 /* Perform the actual read/write of a sysctl table entry. */
1256 int do_sysctl_strategy (ctl_table *table,
1257 			int __user *name, int nlen,
1258 			void __user *oldval, size_t __user *oldlenp,
1259 			void __user *newval, size_t newlen, void **context)
1260 {
1261 	int op = 0, rc;
1262 	size_t len;
1263 
1264 	if (oldval)
1265 		op |= 004;
1266 	if (newval)
1267 		op |= 002;
1268 	if (ctl_perm(table, op))
1269 		return -EPERM;
1270 
1271 	if (table->strategy) {
1272 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
1273 				     newval, newlen, context);
1274 		if (rc < 0)
1275 			return rc;
1276 		if (rc > 0)
1277 			return 0;
1278 	}
1279 
1280 	/* If there is no strategy routine, or if the strategy returns
1281 	 * zero, proceed with automatic r/w */
1282 	if (table->data && table->maxlen) {
1283 		if (oldval && oldlenp) {
1284 			if (get_user(len, oldlenp))
1285 				return -EFAULT;
1286 			if (len) {
1287 				if (len > table->maxlen)
1288 					len = table->maxlen;
1289 				if(copy_to_user(oldval, table->data, len))
1290 					return -EFAULT;
1291 				if(put_user(len, oldlenp))
1292 					return -EFAULT;
1293 			}
1294 		}
1295 		if (newval && newlen) {
1296 			len = newlen;
1297 			if (len > table->maxlen)
1298 				len = table->maxlen;
1299 			if(copy_from_user(table->data, newval, len))
1300 				return -EFAULT;
1301 		}
1302 	}
1303 	return 0;
1304 }
1305 
1306 /**
1307  * register_sysctl_table - register a sysctl hierarchy
1308  * @table: the top-level table structure
1309  * @insert_at_head: whether the entry should be inserted in front or at the end
1310  *
1311  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1312  * array. An entry with a ctl_name of 0 terminates the table.
1313  *
1314  * The members of the &ctl_table structure are used as follows:
1315  *
1316  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1317  *            must be unique within that level of sysctl
1318  *
1319  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1320  *            enter a sysctl file
1321  *
1322  * data - a pointer to data for use by proc_handler
1323  *
1324  * maxlen - the maximum size in bytes of the data
1325  *
1326  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1327  *
1328  * child - a pointer to the child sysctl table if this entry is a directory, or
1329  *         %NULL.
1330  *
1331  * proc_handler - the text handler routine (described below)
1332  *
1333  * strategy - the strategy routine (described below)
1334  *
1335  * de - for internal use by the sysctl routines
1336  *
1337  * extra1, extra2 - extra pointers usable by the proc handler routines
1338  *
1339  * Leaf nodes in the sysctl tree will be represented by a single file
1340  * under /proc; non-leaf nodes will be represented by directories.
1341  *
1342  * sysctl(2) can automatically manage read and write requests through
1343  * the sysctl table.  The data and maxlen fields of the ctl_table
1344  * struct enable minimal validation of the values being written to be
1345  * performed, and the mode field allows minimal authentication.
1346  *
1347  * More sophisticated management can be enabled by the provision of a
1348  * strategy routine with the table entry.  This will be called before
1349  * any automatic read or write of the data is performed.
1350  *
1351  * The strategy routine may return
1352  *
1353  * < 0 - Error occurred (error is passed to user process)
1354  *
1355  * 0   - OK - proceed with automatic read or write.
1356  *
1357  * > 0 - OK - read or write has been done by the strategy routine, so
1358  *       return immediately.
1359  *
1360  * There must be a proc_handler routine for any terminal nodes
1361  * mirrored under /proc/sys (non-terminals are handled by a built-in
1362  * directory handler).  Several default handlers are available to
1363  * cover common cases -
1364  *
1365  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1366  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1367  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1368  *
1369  * It is the handler's job to read the input buffer from user memory
1370  * and process it. The handler should return 0 on success.
1371  *
1372  * This routine returns %NULL on a failure to register, and a pointer
1373  * to the table header on success.
1374  */
1375 struct ctl_table_header *register_sysctl_table(ctl_table * table,
1376 					       int insert_at_head)
1377 {
1378 	struct ctl_table_header *tmp;
1379 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1380 	if (!tmp)
1381 		return NULL;
1382 	tmp->ctl_table = table;
1383 	INIT_LIST_HEAD(&tmp->ctl_entry);
1384 	tmp->used = 0;
1385 	tmp->unregistering = NULL;
1386 	spin_lock(&sysctl_lock);
1387 	if (insert_at_head)
1388 		list_add(&tmp->ctl_entry, &root_table_header.ctl_entry);
1389 	else
1390 		list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1391 	spin_unlock(&sysctl_lock);
1392 #ifdef CONFIG_PROC_FS
1393 	register_proc_table(table, proc_sys_root, tmp);
1394 #endif
1395 	return tmp;
1396 }
1397 
1398 /**
1399  * unregister_sysctl_table - unregister a sysctl table hierarchy
1400  * @header: the header returned from register_sysctl_table
1401  *
1402  * Unregisters the sysctl table and all children. proc entries may not
1403  * actually be removed until they are no longer used by anyone.
1404  */
1405 void unregister_sysctl_table(struct ctl_table_header * header)
1406 {
1407 	might_sleep();
1408 	spin_lock(&sysctl_lock);
1409 	start_unregistering(header);
1410 #ifdef CONFIG_PROC_FS
1411 	unregister_proc_table(header->ctl_table, proc_sys_root);
1412 #endif
1413 	spin_unlock(&sysctl_lock);
1414 	kfree(header);
1415 }
1416 
1417 /*
1418  * /proc/sys support
1419  */
1420 
1421 #ifdef CONFIG_PROC_FS
1422 
1423 /* Scan the sysctl entries in table and add them all into /proc */
1424 static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set)
1425 {
1426 	struct proc_dir_entry *de;
1427 	int len;
1428 	mode_t mode;
1429 
1430 	for (; table->ctl_name; table++) {
1431 		/* Can't do anything without a proc name. */
1432 		if (!table->procname)
1433 			continue;
1434 		/* Maybe we can't do anything with it... */
1435 		if (!table->proc_handler && !table->child) {
1436 			printk(KERN_WARNING "SYSCTL: Can't register %s\n",
1437 				table->procname);
1438 			continue;
1439 		}
1440 
1441 		len = strlen(table->procname);
1442 		mode = table->mode;
1443 
1444 		de = NULL;
1445 		if (table->proc_handler)
1446 			mode |= S_IFREG;
1447 		else {
1448 			mode |= S_IFDIR;
1449 			for (de = root->subdir; de; de = de->next) {
1450 				if (proc_match(len, table->procname, de))
1451 					break;
1452 			}
1453 			/* If the subdir exists already, de is non-NULL */
1454 		}
1455 
1456 		if (!de) {
1457 			de = create_proc_entry(table->procname, mode, root);
1458 			if (!de)
1459 				continue;
1460 			de->set = set;
1461 			de->data = (void *) table;
1462 			if (table->proc_handler)
1463 				de->proc_fops = &proc_sys_file_operations;
1464 		}
1465 		table->de = de;
1466 		if (de->mode & S_IFDIR)
1467 			register_proc_table(table->child, de, set);
1468 	}
1469 }
1470 
1471 /*
1472  * Unregister a /proc sysctl table and any subdirectories.
1473  */
1474 static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
1475 {
1476 	struct proc_dir_entry *de;
1477 	for (; table->ctl_name; table++) {
1478 		if (!(de = table->de))
1479 			continue;
1480 		if (de->mode & S_IFDIR) {
1481 			if (!table->child) {
1482 				printk (KERN_ALERT "Help - malformed sysctl tree on free\n");
1483 				continue;
1484 			}
1485 			unregister_proc_table(table->child, de);
1486 
1487 			/* Don't unregister directories which still have entries.. */
1488 			if (de->subdir)
1489 				continue;
1490 		}
1491 
1492 		/*
1493 		 * In any case, mark the entry as goner; we'll keep it
1494 		 * around if it's busy, but we'll know to do nothing with
1495 		 * its fields.  We are under sysctl_lock here.
1496 		 */
1497 		de->data = NULL;
1498 
1499 		/* Don't unregister proc entries that are still being used.. */
1500 		if (atomic_read(&de->count))
1501 			continue;
1502 
1503 		table->de = NULL;
1504 		remove_proc_entry(table->procname, root);
1505 	}
1506 }
1507 
1508 static ssize_t do_rw_proc(int write, struct file * file, char __user * buf,
1509 			  size_t count, loff_t *ppos)
1510 {
1511 	int op;
1512 	struct proc_dir_entry *de = PDE(file->f_dentry->d_inode);
1513 	struct ctl_table *table;
1514 	size_t res;
1515 	ssize_t error = -ENOTDIR;
1516 
1517 	spin_lock(&sysctl_lock);
1518 	if (de && de->data && use_table(de->set)) {
1519 		/*
1520 		 * at that point we know that sysctl was not unregistered
1521 		 * and won't be until we finish
1522 		 */
1523 		spin_unlock(&sysctl_lock);
1524 		table = (struct ctl_table *) de->data;
1525 		if (!table || !table->proc_handler)
1526 			goto out;
1527 		error = -EPERM;
1528 		op = (write ? 002 : 004);
1529 		if (ctl_perm(table, op))
1530 			goto out;
1531 
1532 		/* careful: calling conventions are nasty here */
1533 		res = count;
1534 		error = (*table->proc_handler)(table, write, file,
1535 						buf, &res, ppos);
1536 		if (!error)
1537 			error = res;
1538 	out:
1539 		spin_lock(&sysctl_lock);
1540 		unuse_table(de->set);
1541 	}
1542 	spin_unlock(&sysctl_lock);
1543 	return error;
1544 }
1545 
1546 static int proc_opensys(struct inode *inode, struct file *file)
1547 {
1548 	if (file->f_mode & FMODE_WRITE) {
1549 		/*
1550 		 * sysctl entries that are not writable,
1551 		 * are _NOT_ writable, capabilities or not.
1552 		 */
1553 		if (!(inode->i_mode & S_IWUSR))
1554 			return -EPERM;
1555 	}
1556 
1557 	return 0;
1558 }
1559 
1560 static ssize_t proc_readsys(struct file * file, char __user * buf,
1561 			    size_t count, loff_t *ppos)
1562 {
1563 	return do_rw_proc(0, file, buf, count, ppos);
1564 }
1565 
1566 static ssize_t proc_writesys(struct file * file, const char __user * buf,
1567 			     size_t count, loff_t *ppos)
1568 {
1569 	return do_rw_proc(1, file, (char __user *) buf, count, ppos);
1570 }
1571 
1572 /**
1573  * proc_dostring - read a string sysctl
1574  * @table: the sysctl table
1575  * @write: %TRUE if this is a write to the sysctl file
1576  * @filp: the file structure
1577  * @buffer: the user buffer
1578  * @lenp: the size of the user buffer
1579  * @ppos: file position
1580  *
1581  * Reads/writes a string from/to the user buffer. If the kernel
1582  * buffer provided is not large enough to hold the string, the
1583  * string is truncated. The copied string is %NULL-terminated.
1584  * If the string is being read by the user process, it is copied
1585  * and a newline '\n' is added. It is truncated if the buffer is
1586  * not large enough.
1587  *
1588  * Returns 0 on success.
1589  */
1590 int proc_dostring(ctl_table *table, int write, struct file *filp,
1591 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1592 {
1593 	size_t len;
1594 	char __user *p;
1595 	char c;
1596 
1597 	if (!table->data || !table->maxlen || !*lenp ||
1598 	    (*ppos && !write)) {
1599 		*lenp = 0;
1600 		return 0;
1601 	}
1602 
1603 	if (write) {
1604 		len = 0;
1605 		p = buffer;
1606 		while (len < *lenp) {
1607 			if (get_user(c, p++))
1608 				return -EFAULT;
1609 			if (c == 0 || c == '\n')
1610 				break;
1611 			len++;
1612 		}
1613 		if (len >= table->maxlen)
1614 			len = table->maxlen-1;
1615 		if(copy_from_user(table->data, buffer, len))
1616 			return -EFAULT;
1617 		((char *) table->data)[len] = 0;
1618 		*ppos += *lenp;
1619 	} else {
1620 		len = strlen(table->data);
1621 		if (len > table->maxlen)
1622 			len = table->maxlen;
1623 		if (len > *lenp)
1624 			len = *lenp;
1625 		if (len)
1626 			if(copy_to_user(buffer, table->data, len))
1627 				return -EFAULT;
1628 		if (len < *lenp) {
1629 			if(put_user('\n', ((char __user *) buffer) + len))
1630 				return -EFAULT;
1631 			len++;
1632 		}
1633 		*lenp = len;
1634 		*ppos += len;
1635 	}
1636 	return 0;
1637 }
1638 
1639 /*
1640  *	Special case of dostring for the UTS structure. This has locks
1641  *	to observe. Should this be in kernel/sys.c ????
1642  */
1643 
1644 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
1645 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1646 {
1647 	int r;
1648 
1649 	if (!write) {
1650 		down_read(&uts_sem);
1651 		r=proc_dostring(table,0,filp,buffer,lenp, ppos);
1652 		up_read(&uts_sem);
1653 	} else {
1654 		down_write(&uts_sem);
1655 		r=proc_dostring(table,1,filp,buffer,lenp, ppos);
1656 		up_write(&uts_sem);
1657 	}
1658 	return r;
1659 }
1660 
1661 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1662 				 int *valp,
1663 				 int write, void *data)
1664 {
1665 	if (write) {
1666 		*valp = *negp ? -*lvalp : *lvalp;
1667 	} else {
1668 		int val = *valp;
1669 		if (val < 0) {
1670 			*negp = -1;
1671 			*lvalp = (unsigned long)-val;
1672 		} else {
1673 			*negp = 0;
1674 			*lvalp = (unsigned long)val;
1675 		}
1676 	}
1677 	return 0;
1678 }
1679 
1680 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1681 		  void __user *buffer, size_t *lenp, loff_t *ppos,
1682 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1683 			      int write, void *data),
1684 		  void *data)
1685 {
1686 #define TMPBUFLEN 21
1687 	int *i, vleft, first=1, neg, val;
1688 	unsigned long lval;
1689 	size_t left, len;
1690 
1691 	char buf[TMPBUFLEN], *p;
1692 	char __user *s = buffer;
1693 
1694 	if (!table->data || !table->maxlen || !*lenp ||
1695 	    (*ppos && !write)) {
1696 		*lenp = 0;
1697 		return 0;
1698 	}
1699 
1700 	i = (int *) table->data;
1701 	vleft = table->maxlen / sizeof(*i);
1702 	left = *lenp;
1703 
1704 	if (!conv)
1705 		conv = do_proc_dointvec_conv;
1706 
1707 	for (; left && vleft--; i++, first=0) {
1708 		if (write) {
1709 			while (left) {
1710 				char c;
1711 				if (get_user(c, s))
1712 					return -EFAULT;
1713 				if (!isspace(c))
1714 					break;
1715 				left--;
1716 				s++;
1717 			}
1718 			if (!left)
1719 				break;
1720 			neg = 0;
1721 			len = left;
1722 			if (len > sizeof(buf) - 1)
1723 				len = sizeof(buf) - 1;
1724 			if (copy_from_user(buf, s, len))
1725 				return -EFAULT;
1726 			buf[len] = 0;
1727 			p = buf;
1728 			if (*p == '-' && left > 1) {
1729 				neg = 1;
1730 				left--, p++;
1731 			}
1732 			if (*p < '0' || *p > '9')
1733 				break;
1734 
1735 			lval = simple_strtoul(p, &p, 0);
1736 
1737 			len = p-buf;
1738 			if ((len < left) && *p && !isspace(*p))
1739 				break;
1740 			if (neg)
1741 				val = -val;
1742 			s += len;
1743 			left -= len;
1744 
1745 			if (conv(&neg, &lval, i, 1, data))
1746 				break;
1747 		} else {
1748 			p = buf;
1749 			if (!first)
1750 				*p++ = '\t';
1751 
1752 			if (conv(&neg, &lval, i, 0, data))
1753 				break;
1754 
1755 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
1756 			len = strlen(buf);
1757 			if (len > left)
1758 				len = left;
1759 			if(copy_to_user(s, buf, len))
1760 				return -EFAULT;
1761 			left -= len;
1762 			s += len;
1763 		}
1764 	}
1765 
1766 	if (!write && !first && left) {
1767 		if(put_user('\n', s))
1768 			return -EFAULT;
1769 		left--, s++;
1770 	}
1771 	if (write) {
1772 		while (left) {
1773 			char c;
1774 			if (get_user(c, s++))
1775 				return -EFAULT;
1776 			if (!isspace(c))
1777 				break;
1778 			left--;
1779 		}
1780 	}
1781 	if (write && first)
1782 		return -EINVAL;
1783 	*lenp -= left;
1784 	*ppos += *lenp;
1785 	return 0;
1786 #undef TMPBUFLEN
1787 }
1788 
1789 /**
1790  * proc_dointvec - read a vector of integers
1791  * @table: the sysctl table
1792  * @write: %TRUE if this is a write to the sysctl file
1793  * @filp: the file structure
1794  * @buffer: the user buffer
1795  * @lenp: the size of the user buffer
1796  * @ppos: file position
1797  *
1798  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1799  * values from/to the user buffer, treated as an ASCII string.
1800  *
1801  * Returns 0 on success.
1802  */
1803 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1804 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1805 {
1806     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1807 		    	    NULL,NULL);
1808 }
1809 
1810 #define OP_SET	0
1811 #define OP_AND	1
1812 #define OP_OR	2
1813 #define OP_MAX	3
1814 #define OP_MIN	4
1815 
1816 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1817 				      int *valp,
1818 				      int write, void *data)
1819 {
1820 	int op = *(int *)data;
1821 	if (write) {
1822 		int val = *negp ? -*lvalp : *lvalp;
1823 		switch(op) {
1824 		case OP_SET:	*valp = val; break;
1825 		case OP_AND:	*valp &= val; break;
1826 		case OP_OR:	*valp |= val; break;
1827 		case OP_MAX:	if(*valp < val)
1828 					*valp = val;
1829 				break;
1830 		case OP_MIN:	if(*valp > val)
1831 				*valp = val;
1832 				break;
1833 		}
1834 	} else {
1835 		int val = *valp;
1836 		if (val < 0) {
1837 			*negp = -1;
1838 			*lvalp = (unsigned long)-val;
1839 		} else {
1840 			*negp = 0;
1841 			*lvalp = (unsigned long)val;
1842 		}
1843 	}
1844 	return 0;
1845 }
1846 
1847 /*
1848  *	init may raise the set.
1849  */
1850 
1851 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1852 			void __user *buffer, size_t *lenp, loff_t *ppos)
1853 {
1854 	int op;
1855 
1856 	if (!capable(CAP_SYS_MODULE)) {
1857 		return -EPERM;
1858 	}
1859 
1860 	op = (current->pid == 1) ? OP_SET : OP_AND;
1861 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1862 				do_proc_dointvec_bset_conv,&op);
1863 }
1864 
1865 struct do_proc_dointvec_minmax_conv_param {
1866 	int *min;
1867 	int *max;
1868 };
1869 
1870 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1871 					int *valp,
1872 					int write, void *data)
1873 {
1874 	struct do_proc_dointvec_minmax_conv_param *param = data;
1875 	if (write) {
1876 		int val = *negp ? -*lvalp : *lvalp;
1877 		if ((param->min && *param->min > val) ||
1878 		    (param->max && *param->max < val))
1879 			return -EINVAL;
1880 		*valp = val;
1881 	} else {
1882 		int val = *valp;
1883 		if (val < 0) {
1884 			*negp = -1;
1885 			*lvalp = (unsigned long)-val;
1886 		} else {
1887 			*negp = 0;
1888 			*lvalp = (unsigned long)val;
1889 		}
1890 	}
1891 	return 0;
1892 }
1893 
1894 /**
1895  * proc_dointvec_minmax - read a vector of integers with min/max values
1896  * @table: the sysctl table
1897  * @write: %TRUE if this is a write to the sysctl file
1898  * @filp: the file structure
1899  * @buffer: the user buffer
1900  * @lenp: the size of the user buffer
1901  * @ppos: file position
1902  *
1903  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1904  * values from/to the user buffer, treated as an ASCII string.
1905  *
1906  * This routine will ensure the values are within the range specified by
1907  * table->extra1 (min) and table->extra2 (max).
1908  *
1909  * Returns 0 on success.
1910  */
1911 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1912 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1913 {
1914 	struct do_proc_dointvec_minmax_conv_param param = {
1915 		.min = (int *) table->extra1,
1916 		.max = (int *) table->extra2,
1917 	};
1918 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1919 				do_proc_dointvec_minmax_conv, &param);
1920 }
1921 
1922 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
1923 				     struct file *filp,
1924 				     void __user *buffer,
1925 				     size_t *lenp, loff_t *ppos,
1926 				     unsigned long convmul,
1927 				     unsigned long convdiv)
1928 {
1929 #define TMPBUFLEN 21
1930 	unsigned long *i, *min, *max, val;
1931 	int vleft, first=1, neg;
1932 	size_t len, left;
1933 	char buf[TMPBUFLEN], *p;
1934 	char __user *s = buffer;
1935 
1936 	if (!table->data || !table->maxlen || !*lenp ||
1937 	    (*ppos && !write)) {
1938 		*lenp = 0;
1939 		return 0;
1940 	}
1941 
1942 	i = (unsigned long *) table->data;
1943 	min = (unsigned long *) table->extra1;
1944 	max = (unsigned long *) table->extra2;
1945 	vleft = table->maxlen / sizeof(unsigned long);
1946 	left = *lenp;
1947 
1948 	for (; left && vleft--; i++, min++, max++, first=0) {
1949 		if (write) {
1950 			while (left) {
1951 				char c;
1952 				if (get_user(c, s))
1953 					return -EFAULT;
1954 				if (!isspace(c))
1955 					break;
1956 				left--;
1957 				s++;
1958 			}
1959 			if (!left)
1960 				break;
1961 			neg = 0;
1962 			len = left;
1963 			if (len > TMPBUFLEN-1)
1964 				len = TMPBUFLEN-1;
1965 			if (copy_from_user(buf, s, len))
1966 				return -EFAULT;
1967 			buf[len] = 0;
1968 			p = buf;
1969 			if (*p == '-' && left > 1) {
1970 				neg = 1;
1971 				left--, p++;
1972 			}
1973 			if (*p < '0' || *p > '9')
1974 				break;
1975 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
1976 			len = p-buf;
1977 			if ((len < left) && *p && !isspace(*p))
1978 				break;
1979 			if (neg)
1980 				val = -val;
1981 			s += len;
1982 			left -= len;
1983 
1984 			if(neg)
1985 				continue;
1986 			if ((min && val < *min) || (max && val > *max))
1987 				continue;
1988 			*i = val;
1989 		} else {
1990 			p = buf;
1991 			if (!first)
1992 				*p++ = '\t';
1993 			sprintf(p, "%lu", convdiv * (*i) / convmul);
1994 			len = strlen(buf);
1995 			if (len > left)
1996 				len = left;
1997 			if(copy_to_user(s, buf, len))
1998 				return -EFAULT;
1999 			left -= len;
2000 			s += len;
2001 		}
2002 	}
2003 
2004 	if (!write && !first && left) {
2005 		if(put_user('\n', s))
2006 			return -EFAULT;
2007 		left--, s++;
2008 	}
2009 	if (write) {
2010 		while (left) {
2011 			char c;
2012 			if (get_user(c, s++))
2013 				return -EFAULT;
2014 			if (!isspace(c))
2015 				break;
2016 			left--;
2017 		}
2018 	}
2019 	if (write && first)
2020 		return -EINVAL;
2021 	*lenp -= left;
2022 	*ppos += *lenp;
2023 	return 0;
2024 #undef TMPBUFLEN
2025 }
2026 
2027 /**
2028  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2029  * @table: the sysctl table
2030  * @write: %TRUE if this is a write to the sysctl file
2031  * @filp: the file structure
2032  * @buffer: the user buffer
2033  * @lenp: the size of the user buffer
2034  * @ppos: file position
2035  *
2036  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2037  * values from/to the user buffer, treated as an ASCII string.
2038  *
2039  * This routine will ensure the values are within the range specified by
2040  * table->extra1 (min) and table->extra2 (max).
2041  *
2042  * Returns 0 on success.
2043  */
2044 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2045 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2046 {
2047     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2048 }
2049 
2050 /**
2051  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2052  * @table: the sysctl table
2053  * @write: %TRUE if this is a write to the sysctl file
2054  * @filp: the file structure
2055  * @buffer: the user buffer
2056  * @lenp: the size of the user buffer
2057  * @ppos: file position
2058  *
2059  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2060  * values from/to the user buffer, treated as an ASCII string. The values
2061  * are treated as milliseconds, and converted to jiffies when they are stored.
2062  *
2063  * This routine will ensure the values are within the range specified by
2064  * table->extra1 (min) and table->extra2 (max).
2065  *
2066  * Returns 0 on success.
2067  */
2068 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2069 				      struct file *filp,
2070 				      void __user *buffer,
2071 				      size_t *lenp, loff_t *ppos)
2072 {
2073     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2074 				     lenp, ppos, HZ, 1000l);
2075 }
2076 
2077 
2078 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2079 					 int *valp,
2080 					 int write, void *data)
2081 {
2082 	if (write) {
2083 		if (*lvalp > LONG_MAX / HZ)
2084 			return 1;
2085 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2086 	} else {
2087 		int val = *valp;
2088 		unsigned long lval;
2089 		if (val < 0) {
2090 			*negp = -1;
2091 			lval = (unsigned long)-val;
2092 		} else {
2093 			*negp = 0;
2094 			lval = (unsigned long)val;
2095 		}
2096 		*lvalp = lval / HZ;
2097 	}
2098 	return 0;
2099 }
2100 
2101 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2102 						int *valp,
2103 						int write, void *data)
2104 {
2105 	if (write) {
2106 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2107 			return 1;
2108 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2109 	} else {
2110 		int val = *valp;
2111 		unsigned long lval;
2112 		if (val < 0) {
2113 			*negp = -1;
2114 			lval = (unsigned long)-val;
2115 		} else {
2116 			*negp = 0;
2117 			lval = (unsigned long)val;
2118 		}
2119 		*lvalp = jiffies_to_clock_t(lval);
2120 	}
2121 	return 0;
2122 }
2123 
2124 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2125 					    int *valp,
2126 					    int write, void *data)
2127 {
2128 	if (write) {
2129 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2130 	} else {
2131 		int val = *valp;
2132 		unsigned long lval;
2133 		if (val < 0) {
2134 			*negp = -1;
2135 			lval = (unsigned long)-val;
2136 		} else {
2137 			*negp = 0;
2138 			lval = (unsigned long)val;
2139 		}
2140 		*lvalp = jiffies_to_msecs(lval);
2141 	}
2142 	return 0;
2143 }
2144 
2145 /**
2146  * proc_dointvec_jiffies - read a vector of integers as seconds
2147  * @table: the sysctl table
2148  * @write: %TRUE if this is a write to the sysctl file
2149  * @filp: the file structure
2150  * @buffer: the user buffer
2151  * @lenp: the size of the user buffer
2152  * @ppos: file position
2153  *
2154  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2155  * values from/to the user buffer, treated as an ASCII string.
2156  * The values read are assumed to be in seconds, and are converted into
2157  * jiffies.
2158  *
2159  * Returns 0 on success.
2160  */
2161 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2162 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2163 {
2164     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2165 		    	    do_proc_dointvec_jiffies_conv,NULL);
2166 }
2167 
2168 /**
2169  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2170  * @table: the sysctl table
2171  * @write: %TRUE if this is a write to the sysctl file
2172  * @filp: the file structure
2173  * @buffer: the user buffer
2174  * @lenp: the size of the user buffer
2175  * @ppos: pointer to the file position
2176  *
2177  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2178  * values from/to the user buffer, treated as an ASCII string.
2179  * The values read are assumed to be in 1/USER_HZ seconds, and
2180  * are converted into jiffies.
2181  *
2182  * Returns 0 on success.
2183  */
2184 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2185 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2186 {
2187     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2188 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2189 }
2190 
2191 /**
2192  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2193  * @table: the sysctl table
2194  * @write: %TRUE if this is a write to the sysctl file
2195  * @filp: the file structure
2196  * @buffer: the user buffer
2197  * @lenp: the size of the user buffer
2198  * @ppos: file position
2199  * @ppos: the current position in the file
2200  *
2201  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2202  * values from/to the user buffer, treated as an ASCII string.
2203  * The values read are assumed to be in 1/1000 seconds, and
2204  * are converted into jiffies.
2205  *
2206  * Returns 0 on success.
2207  */
2208 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2209 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2210 {
2211 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2212 				do_proc_dointvec_ms_jiffies_conv, NULL);
2213 }
2214 
2215 #else /* CONFIG_PROC_FS */
2216 
2217 int proc_dostring(ctl_table *table, int write, struct file *filp,
2218 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2219 {
2220 	return -ENOSYS;
2221 }
2222 
2223 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
2224 			    void __user *buffer, size_t *lenp, loff_t *ppos)
2225 {
2226 	return -ENOSYS;
2227 }
2228 
2229 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2230 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2231 {
2232 	return -ENOSYS;
2233 }
2234 
2235 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2236 			void __user *buffer, size_t *lenp, loff_t *ppos)
2237 {
2238 	return -ENOSYS;
2239 }
2240 
2241 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2242 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2243 {
2244 	return -ENOSYS;
2245 }
2246 
2247 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2248 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2249 {
2250 	return -ENOSYS;
2251 }
2252 
2253 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2254 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2255 {
2256 	return -ENOSYS;
2257 }
2258 
2259 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2260 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2261 {
2262 	return -ENOSYS;
2263 }
2264 
2265 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2266 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2267 {
2268 	return -ENOSYS;
2269 }
2270 
2271 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2272 				      struct file *filp,
2273 				      void __user *buffer,
2274 				      size_t *lenp, loff_t *ppos)
2275 {
2276     return -ENOSYS;
2277 }
2278 
2279 
2280 #endif /* CONFIG_PROC_FS */
2281 
2282 
2283 /*
2284  * General sysctl support routines
2285  */
2286 
2287 /* The generic string strategy routine: */
2288 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2289 		  void __user *oldval, size_t __user *oldlenp,
2290 		  void __user *newval, size_t newlen, void **context)
2291 {
2292 	if (!table->data || !table->maxlen)
2293 		return -ENOTDIR;
2294 
2295 	if (oldval && oldlenp) {
2296 		size_t bufsize;
2297 		if (get_user(bufsize, oldlenp))
2298 			return -EFAULT;
2299 		if (bufsize) {
2300 			size_t len = strlen(table->data), copied;
2301 
2302 			/* This shouldn't trigger for a well-formed sysctl */
2303 			if (len > table->maxlen)
2304 				len = table->maxlen;
2305 
2306 			/* Copy up to a max of bufsize-1 bytes of the string */
2307 			copied = (len >= bufsize) ? bufsize - 1 : len;
2308 
2309 			if (copy_to_user(oldval, table->data, copied) ||
2310 			    put_user(0, (char __user *)(oldval + copied)))
2311 				return -EFAULT;
2312 			if (put_user(len, oldlenp))
2313 				return -EFAULT;
2314 		}
2315 	}
2316 	if (newval && newlen) {
2317 		size_t len = newlen;
2318 		if (len > table->maxlen)
2319 			len = table->maxlen;
2320 		if(copy_from_user(table->data, newval, len))
2321 			return -EFAULT;
2322 		if (len == table->maxlen)
2323 			len--;
2324 		((char *) table->data)[len] = 0;
2325 	}
2326 	return 1;
2327 }
2328 
2329 /*
2330  * This function makes sure that all of the integers in the vector
2331  * are between the minimum and maximum values given in the arrays
2332  * table->extra1 and table->extra2, respectively.
2333  */
2334 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2335 		void __user *oldval, size_t __user *oldlenp,
2336 		void __user *newval, size_t newlen, void **context)
2337 {
2338 
2339 	if (newval && newlen) {
2340 		int __user *vec = (int __user *) newval;
2341 		int *min = (int *) table->extra1;
2342 		int *max = (int *) table->extra2;
2343 		size_t length;
2344 		int i;
2345 
2346 		if (newlen % sizeof(int) != 0)
2347 			return -EINVAL;
2348 
2349 		if (!table->extra1 && !table->extra2)
2350 			return 0;
2351 
2352 		if (newlen > table->maxlen)
2353 			newlen = table->maxlen;
2354 		length = newlen / sizeof(int);
2355 
2356 		for (i = 0; i < length; i++) {
2357 			int value;
2358 			if (get_user(value, vec + i))
2359 				return -EFAULT;
2360 			if (min && value < min[i])
2361 				return -EINVAL;
2362 			if (max && value > max[i])
2363 				return -EINVAL;
2364 		}
2365 	}
2366 	return 0;
2367 }
2368 
2369 /* Strategy function to convert jiffies to seconds */
2370 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2371 		void __user *oldval, size_t __user *oldlenp,
2372 		void __user *newval, size_t newlen, void **context)
2373 {
2374 	if (oldval) {
2375 		size_t olen;
2376 		if (oldlenp) {
2377 			if (get_user(olen, oldlenp))
2378 				return -EFAULT;
2379 			if (olen!=sizeof(int))
2380 				return -EINVAL;
2381 		}
2382 		if (put_user(*(int *)(table->data)/HZ, (int __user *)oldval) ||
2383 		    (oldlenp && put_user(sizeof(int),oldlenp)))
2384 			return -EFAULT;
2385 	}
2386 	if (newval && newlen) {
2387 		int new;
2388 		if (newlen != sizeof(int))
2389 			return -EINVAL;
2390 		if (get_user(new, (int __user *)newval))
2391 			return -EFAULT;
2392 		*(int *)(table->data) = new*HZ;
2393 	}
2394 	return 1;
2395 }
2396 
2397 /* Strategy function to convert jiffies to seconds */
2398 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2399 		void __user *oldval, size_t __user *oldlenp,
2400 		void __user *newval, size_t newlen, void **context)
2401 {
2402 	if (oldval) {
2403 		size_t olen;
2404 		if (oldlenp) {
2405 			if (get_user(olen, oldlenp))
2406 				return -EFAULT;
2407 			if (olen!=sizeof(int))
2408 				return -EINVAL;
2409 		}
2410 		if (put_user(jiffies_to_msecs(*(int *)(table->data)), (int __user *)oldval) ||
2411 		    (oldlenp && put_user(sizeof(int),oldlenp)))
2412 			return -EFAULT;
2413 	}
2414 	if (newval && newlen) {
2415 		int new;
2416 		if (newlen != sizeof(int))
2417 			return -EINVAL;
2418 		if (get_user(new, (int __user *)newval))
2419 			return -EFAULT;
2420 		*(int *)(table->data) = msecs_to_jiffies(new);
2421 	}
2422 	return 1;
2423 }
2424 
2425 #else /* CONFIG_SYSCTL */
2426 
2427 
2428 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2429 {
2430 	return -ENOSYS;
2431 }
2432 
2433 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2434 		  void __user *oldval, size_t __user *oldlenp,
2435 		  void __user *newval, size_t newlen, void **context)
2436 {
2437 	return -ENOSYS;
2438 }
2439 
2440 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2441 		void __user *oldval, size_t __user *oldlenp,
2442 		void __user *newval, size_t newlen, void **context)
2443 {
2444 	return -ENOSYS;
2445 }
2446 
2447 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2448 		void __user *oldval, size_t __user *oldlenp,
2449 		void __user *newval, size_t newlen, void **context)
2450 {
2451 	return -ENOSYS;
2452 }
2453 
2454 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2455 		void __user *oldval, size_t __user *oldlenp,
2456 		void __user *newval, size_t newlen, void **context)
2457 {
2458 	return -ENOSYS;
2459 }
2460 
2461 int proc_dostring(ctl_table *table, int write, struct file *filp,
2462 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2463 {
2464 	return -ENOSYS;
2465 }
2466 
2467 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2468 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2469 {
2470 	return -ENOSYS;
2471 }
2472 
2473 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2474 			void __user *buffer, size_t *lenp, loff_t *ppos)
2475 {
2476 	return -ENOSYS;
2477 }
2478 
2479 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2480 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2481 {
2482 	return -ENOSYS;
2483 }
2484 
2485 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2486 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2487 {
2488 	return -ENOSYS;
2489 }
2490 
2491 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2492 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2493 {
2494 	return -ENOSYS;
2495 }
2496 
2497 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2498 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2499 {
2500 	return -ENOSYS;
2501 }
2502 
2503 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2504 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2505 {
2506 	return -ENOSYS;
2507 }
2508 
2509 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2510 				      struct file *filp,
2511 				      void __user *buffer,
2512 				      size_t *lenp, loff_t *ppos)
2513 {
2514     return -ENOSYS;
2515 }
2516 
2517 struct ctl_table_header * register_sysctl_table(ctl_table * table,
2518 						int insert_at_head)
2519 {
2520 	return NULL;
2521 }
2522 
2523 void unregister_sysctl_table(struct ctl_table_header * table)
2524 {
2525 }
2526 
2527 #endif /* CONFIG_SYSCTL */
2528 
2529 /*
2530  * No sense putting this after each symbol definition, twice,
2531  * exception granted :-)
2532  */
2533 EXPORT_SYMBOL(proc_dointvec);
2534 EXPORT_SYMBOL(proc_dointvec_jiffies);
2535 EXPORT_SYMBOL(proc_dointvec_minmax);
2536 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2537 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2538 EXPORT_SYMBOL(proc_dostring);
2539 EXPORT_SYMBOL(proc_doulongvec_minmax);
2540 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2541 EXPORT_SYMBOL(register_sysctl_table);
2542 EXPORT_SYMBOL(sysctl_intvec);
2543 EXPORT_SYMBOL(sysctl_jiffies);
2544 EXPORT_SYMBOL(sysctl_ms_jiffies);
2545 EXPORT_SYMBOL(sysctl_string);
2546 EXPORT_SYMBOL(unregister_sysctl_table);
2547