xref: /linux/kernel/sysctl.c (revision 8b4a40809e5330c9da5d20107d693d92d73b31dc)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/capability.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/capability.h>
31 #include <linux/smp_lock.h>
32 #include <linux/fs.h>
33 #include <linux/init.h>
34 #include <linux/kernel.h>
35 #include <linux/kobject.h>
36 #include <linux/net.h>
37 #include <linux/sysrq.h>
38 #include <linux/highuid.h>
39 #include <linux/writeback.h>
40 #include <linux/hugetlb.h>
41 #include <linux/security.h>
42 #include <linux/initrd.h>
43 #include <linux/times.h>
44 #include <linux/limits.h>
45 #include <linux/dcache.h>
46 #include <linux/syscalls.h>
47 #include <linux/nfs_fs.h>
48 #include <linux/acpi.h>
49 
50 #include <asm/uaccess.h>
51 #include <asm/processor.h>
52 
53 #ifdef CONFIG_X86
54 #include <asm/nmi.h>
55 #include <asm/stacktrace.h>
56 #endif
57 
58 #if defined(CONFIG_SYSCTL)
59 
60 /* External variables not in a header file. */
61 extern int C_A_D;
62 extern int print_fatal_signals;
63 extern int sysctl_overcommit_memory;
64 extern int sysctl_overcommit_ratio;
65 extern int sysctl_panic_on_oom;
66 extern int max_threads;
67 extern int core_uses_pid;
68 extern int suid_dumpable;
69 extern char core_pattern[];
70 extern int pid_max;
71 extern int min_free_kbytes;
72 extern int printk_ratelimit_jiffies;
73 extern int printk_ratelimit_burst;
74 extern int pid_max_min, pid_max_max;
75 extern int sysctl_drop_caches;
76 extern int percpu_pagelist_fraction;
77 extern int compat_log;
78 extern int maps_protect;
79 extern int sysctl_stat_interval;
80 
81 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
82 static int maxolduid = 65535;
83 static int minolduid;
84 static int min_percpu_pagelist_fract = 8;
85 
86 static int ngroups_max = NGROUPS_MAX;
87 
88 #ifdef CONFIG_KMOD
89 extern char modprobe_path[];
90 #endif
91 #ifdef CONFIG_CHR_DEV_SG
92 extern int sg_big_buff;
93 #endif
94 
95 #ifdef __sparc__
96 extern char reboot_command [];
97 extern int stop_a_enabled;
98 extern int scons_pwroff;
99 #endif
100 
101 #ifdef __hppa__
102 extern int pwrsw_enabled;
103 extern int unaligned_enabled;
104 #endif
105 
106 #ifdef CONFIG_S390
107 #ifdef CONFIG_MATHEMU
108 extern int sysctl_ieee_emulation_warnings;
109 #endif
110 extern int sysctl_userprocess_debug;
111 extern int spin_retry;
112 #endif
113 
114 extern int sysctl_hz_timer;
115 
116 #ifdef CONFIG_BSD_PROCESS_ACCT
117 extern int acct_parm[];
118 #endif
119 
120 #ifdef CONFIG_IA64
121 extern int no_unaligned_warning;
122 #endif
123 
124 #ifdef CONFIG_RT_MUTEXES
125 extern int max_lock_depth;
126 #endif
127 
128 #ifdef CONFIG_SYSCTL_SYSCALL
129 static int parse_table(int __user *, int, void __user *, size_t __user *,
130 		void __user *, size_t, ctl_table *);
131 #endif
132 
133 
134 #ifdef CONFIG_PROC_SYSCTL
135 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
136 		  void __user *buffer, size_t *lenp, loff_t *ppos);
137 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
138 			       void __user *buffer, size_t *lenp, loff_t *ppos);
139 #endif
140 
141 static ctl_table root_table[];
142 static struct ctl_table_header root_table_header =
143 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
144 
145 static ctl_table kern_table[];
146 static ctl_table vm_table[];
147 static ctl_table fs_table[];
148 static ctl_table debug_table[];
149 static ctl_table dev_table[];
150 extern ctl_table random_table[];
151 #ifdef CONFIG_UNIX98_PTYS
152 extern ctl_table pty_table[];
153 #endif
154 #ifdef CONFIG_INOTIFY_USER
155 extern ctl_table inotify_table[];
156 #endif
157 
158 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
159 int sysctl_legacy_va_layout;
160 #endif
161 
162 
163 /* The default sysctl tables: */
164 
165 static ctl_table root_table[] = {
166 	{
167 		.ctl_name	= CTL_KERN,
168 		.procname	= "kernel",
169 		.mode		= 0555,
170 		.child		= kern_table,
171 	},
172 	{
173 		.ctl_name	= CTL_VM,
174 		.procname	= "vm",
175 		.mode		= 0555,
176 		.child		= vm_table,
177 	},
178 #ifdef CONFIG_NET
179 	{
180 		.ctl_name	= CTL_NET,
181 		.procname	= "net",
182 		.mode		= 0555,
183 		.child		= net_table,
184 	},
185 #endif
186 	{
187 		.ctl_name	= CTL_FS,
188 		.procname	= "fs",
189 		.mode		= 0555,
190 		.child		= fs_table,
191 	},
192 	{
193 		.ctl_name	= CTL_DEBUG,
194 		.procname	= "debug",
195 		.mode		= 0555,
196 		.child		= debug_table,
197 	},
198 	{
199 		.ctl_name	= CTL_DEV,
200 		.procname	= "dev",
201 		.mode		= 0555,
202 		.child		= dev_table,
203 	},
204 /*
205  * NOTE: do not add new entries to this table unless you have read
206  * Documentation/sysctl/ctl_unnumbered.txt
207  */
208 	{ .ctl_name = 0 }
209 };
210 
211 #ifdef CONFIG_SCHED_DEBUG
212 static unsigned long min_sched_granularity_ns = 100000;		/* 100 usecs */
213 static unsigned long max_sched_granularity_ns = 1000000000;	/* 1 second */
214 static unsigned long min_wakeup_granularity_ns;			/* 0 usecs */
215 static unsigned long max_wakeup_granularity_ns = 1000000000;	/* 1 second */
216 #endif
217 
218 static ctl_table kern_table[] = {
219 #ifdef CONFIG_SCHED_DEBUG
220 	{
221 		.ctl_name	= CTL_UNNUMBERED,
222 		.procname	= "sched_granularity_ns",
223 		.data		= &sysctl_sched_granularity,
224 		.maxlen		= sizeof(unsigned int),
225 		.mode		= 0644,
226 		.proc_handler	= &proc_dointvec_minmax,
227 		.strategy	= &sysctl_intvec,
228 		.extra1		= &min_sched_granularity_ns,
229 		.extra2		= &max_sched_granularity_ns,
230 	},
231 	{
232 		.ctl_name	= CTL_UNNUMBERED,
233 		.procname	= "sched_wakeup_granularity_ns",
234 		.data		= &sysctl_sched_wakeup_granularity,
235 		.maxlen		= sizeof(unsigned int),
236 		.mode		= 0644,
237 		.proc_handler	= &proc_dointvec_minmax,
238 		.strategy	= &sysctl_intvec,
239 		.extra1		= &min_wakeup_granularity_ns,
240 		.extra2		= &max_wakeup_granularity_ns,
241 	},
242 	{
243 		.ctl_name	= CTL_UNNUMBERED,
244 		.procname	= "sched_batch_wakeup_granularity_ns",
245 		.data		= &sysctl_sched_batch_wakeup_granularity,
246 		.maxlen		= sizeof(unsigned int),
247 		.mode		= 0644,
248 		.proc_handler	= &proc_dointvec_minmax,
249 		.strategy	= &sysctl_intvec,
250 		.extra1		= &min_wakeup_granularity_ns,
251 		.extra2		= &max_wakeup_granularity_ns,
252 	},
253 	{
254 		.ctl_name	= CTL_UNNUMBERED,
255 		.procname	= "sched_stat_granularity_ns",
256 		.data		= &sysctl_sched_stat_granularity,
257 		.maxlen		= sizeof(unsigned int),
258 		.mode		= 0644,
259 		.proc_handler	= &proc_dointvec_minmax,
260 		.strategy	= &sysctl_intvec,
261 		.extra1		= &min_wakeup_granularity_ns,
262 		.extra2		= &max_wakeup_granularity_ns,
263 	},
264 	{
265 		.ctl_name	= CTL_UNNUMBERED,
266 		.procname	= "sched_runtime_limit_ns",
267 		.data		= &sysctl_sched_runtime_limit,
268 		.maxlen		= sizeof(unsigned int),
269 		.mode		= 0644,
270 		.proc_handler	= &proc_dointvec_minmax,
271 		.strategy	= &sysctl_intvec,
272 		.extra1		= &min_sched_granularity_ns,
273 		.extra2		= &max_sched_granularity_ns,
274 	},
275 	{
276 		.ctl_name	= CTL_UNNUMBERED,
277 		.procname	= "sched_child_runs_first",
278 		.data		= &sysctl_sched_child_runs_first,
279 		.maxlen		= sizeof(unsigned int),
280 		.mode		= 0644,
281 		.proc_handler	= &proc_dointvec,
282 	},
283 	{
284 		.ctl_name	= CTL_UNNUMBERED,
285 		.procname	= "sched_features",
286 		.data		= &sysctl_sched_features,
287 		.maxlen		= sizeof(unsigned int),
288 		.mode		= 0644,
289 		.proc_handler	= &proc_dointvec,
290 	},
291 #endif
292 	{
293 		.ctl_name	= KERN_PANIC,
294 		.procname	= "panic",
295 		.data		= &panic_timeout,
296 		.maxlen		= sizeof(int),
297 		.mode		= 0644,
298 		.proc_handler	= &proc_dointvec,
299 	},
300 	{
301 		.ctl_name	= KERN_CORE_USES_PID,
302 		.procname	= "core_uses_pid",
303 		.data		= &core_uses_pid,
304 		.maxlen		= sizeof(int),
305 		.mode		= 0644,
306 		.proc_handler	= &proc_dointvec,
307 	},
308 	{
309 		.ctl_name	= KERN_CORE_PATTERN,
310 		.procname	= "core_pattern",
311 		.data		= core_pattern,
312 		.maxlen		= CORENAME_MAX_SIZE,
313 		.mode		= 0644,
314 		.proc_handler	= &proc_dostring,
315 		.strategy	= &sysctl_string,
316 	},
317 #ifdef CONFIG_PROC_SYSCTL
318 	{
319 		.ctl_name	= KERN_TAINTED,
320 		.procname	= "tainted",
321 		.data		= &tainted,
322 		.maxlen		= sizeof(int),
323 		.mode		= 0644,
324 		.proc_handler	= &proc_dointvec_taint,
325 	},
326 #endif
327 	{
328 		.ctl_name	= KERN_CAP_BSET,
329 		.procname	= "cap-bound",
330 		.data		= &cap_bset,
331 		.maxlen		= sizeof(kernel_cap_t),
332 		.mode		= 0600,
333 		.proc_handler	= &proc_dointvec_bset,
334 	},
335 #ifdef CONFIG_BLK_DEV_INITRD
336 	{
337 		.ctl_name	= KERN_REALROOTDEV,
338 		.procname	= "real-root-dev",
339 		.data		= &real_root_dev,
340 		.maxlen		= sizeof(int),
341 		.mode		= 0644,
342 		.proc_handler	= &proc_dointvec,
343 	},
344 #endif
345 	{
346 		.ctl_name	= CTL_UNNUMBERED,
347 		.procname	= "print-fatal-signals",
348 		.data		= &print_fatal_signals,
349 		.maxlen		= sizeof(int),
350 		.mode		= 0644,
351 		.proc_handler	= &proc_dointvec,
352 	},
353 #ifdef __sparc__
354 	{
355 		.ctl_name	= KERN_SPARC_REBOOT,
356 		.procname	= "reboot-cmd",
357 		.data		= reboot_command,
358 		.maxlen		= 256,
359 		.mode		= 0644,
360 		.proc_handler	= &proc_dostring,
361 		.strategy	= &sysctl_string,
362 	},
363 	{
364 		.ctl_name	= KERN_SPARC_STOP_A,
365 		.procname	= "stop-a",
366 		.data		= &stop_a_enabled,
367 		.maxlen		= sizeof (int),
368 		.mode		= 0644,
369 		.proc_handler	= &proc_dointvec,
370 	},
371 	{
372 		.ctl_name	= KERN_SPARC_SCONS_PWROFF,
373 		.procname	= "scons-poweroff",
374 		.data		= &scons_pwroff,
375 		.maxlen		= sizeof (int),
376 		.mode		= 0644,
377 		.proc_handler	= &proc_dointvec,
378 	},
379 #endif
380 #ifdef __hppa__
381 	{
382 		.ctl_name	= KERN_HPPA_PWRSW,
383 		.procname	= "soft-power",
384 		.data		= &pwrsw_enabled,
385 		.maxlen		= sizeof (int),
386 	 	.mode		= 0644,
387 		.proc_handler	= &proc_dointvec,
388 	},
389 	{
390 		.ctl_name	= KERN_HPPA_UNALIGNED,
391 		.procname	= "unaligned-trap",
392 		.data		= &unaligned_enabled,
393 		.maxlen		= sizeof (int),
394 		.mode		= 0644,
395 		.proc_handler	= &proc_dointvec,
396 	},
397 #endif
398 	{
399 		.ctl_name	= KERN_CTLALTDEL,
400 		.procname	= "ctrl-alt-del",
401 		.data		= &C_A_D,
402 		.maxlen		= sizeof(int),
403 		.mode		= 0644,
404 		.proc_handler	= &proc_dointvec,
405 	},
406 	{
407 		.ctl_name	= KERN_PRINTK,
408 		.procname	= "printk",
409 		.data		= &console_loglevel,
410 		.maxlen		= 4*sizeof(int),
411 		.mode		= 0644,
412 		.proc_handler	= &proc_dointvec,
413 	},
414 #ifdef CONFIG_KMOD
415 	{
416 		.ctl_name	= KERN_MODPROBE,
417 		.procname	= "modprobe",
418 		.data		= &modprobe_path,
419 		.maxlen		= KMOD_PATH_LEN,
420 		.mode		= 0644,
421 		.proc_handler	= &proc_dostring,
422 		.strategy	= &sysctl_string,
423 	},
424 #endif
425 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
426 	{
427 		.ctl_name	= KERN_HOTPLUG,
428 		.procname	= "hotplug",
429 		.data		= &uevent_helper,
430 		.maxlen		= UEVENT_HELPER_PATH_LEN,
431 		.mode		= 0644,
432 		.proc_handler	= &proc_dostring,
433 		.strategy	= &sysctl_string,
434 	},
435 #endif
436 #ifdef CONFIG_CHR_DEV_SG
437 	{
438 		.ctl_name	= KERN_SG_BIG_BUFF,
439 		.procname	= "sg-big-buff",
440 		.data		= &sg_big_buff,
441 		.maxlen		= sizeof (int),
442 		.mode		= 0444,
443 		.proc_handler	= &proc_dointvec,
444 	},
445 #endif
446 #ifdef CONFIG_BSD_PROCESS_ACCT
447 	{
448 		.ctl_name	= KERN_ACCT,
449 		.procname	= "acct",
450 		.data		= &acct_parm,
451 		.maxlen		= 3*sizeof(int),
452 		.mode		= 0644,
453 		.proc_handler	= &proc_dointvec,
454 	},
455 #endif
456 #ifdef CONFIG_MAGIC_SYSRQ
457 	{
458 		.ctl_name	= KERN_SYSRQ,
459 		.procname	= "sysrq",
460 		.data		= &__sysrq_enabled,
461 		.maxlen		= sizeof (int),
462 		.mode		= 0644,
463 		.proc_handler	= &proc_dointvec,
464 	},
465 #endif
466 #ifdef CONFIG_PROC_SYSCTL
467 	{
468 		.ctl_name	= KERN_CADPID,
469 		.procname	= "cad_pid",
470 		.data		= NULL,
471 		.maxlen		= sizeof (int),
472 		.mode		= 0600,
473 		.proc_handler	= &proc_do_cad_pid,
474 	},
475 #endif
476 	{
477 		.ctl_name	= KERN_MAX_THREADS,
478 		.procname	= "threads-max",
479 		.data		= &max_threads,
480 		.maxlen		= sizeof(int),
481 		.mode		= 0644,
482 		.proc_handler	= &proc_dointvec,
483 	},
484 	{
485 		.ctl_name	= KERN_RANDOM,
486 		.procname	= "random",
487 		.mode		= 0555,
488 		.child		= random_table,
489 	},
490 #ifdef CONFIG_UNIX98_PTYS
491 	{
492 		.ctl_name	= KERN_PTY,
493 		.procname	= "pty",
494 		.mode		= 0555,
495 		.child		= pty_table,
496 	},
497 #endif
498 	{
499 		.ctl_name	= KERN_OVERFLOWUID,
500 		.procname	= "overflowuid",
501 		.data		= &overflowuid,
502 		.maxlen		= sizeof(int),
503 		.mode		= 0644,
504 		.proc_handler	= &proc_dointvec_minmax,
505 		.strategy	= &sysctl_intvec,
506 		.extra1		= &minolduid,
507 		.extra2		= &maxolduid,
508 	},
509 	{
510 		.ctl_name	= KERN_OVERFLOWGID,
511 		.procname	= "overflowgid",
512 		.data		= &overflowgid,
513 		.maxlen		= sizeof(int),
514 		.mode		= 0644,
515 		.proc_handler	= &proc_dointvec_minmax,
516 		.strategy	= &sysctl_intvec,
517 		.extra1		= &minolduid,
518 		.extra2		= &maxolduid,
519 	},
520 #ifdef CONFIG_S390
521 #ifdef CONFIG_MATHEMU
522 	{
523 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
524 		.procname	= "ieee_emulation_warnings",
525 		.data		= &sysctl_ieee_emulation_warnings,
526 		.maxlen		= sizeof(int),
527 		.mode		= 0644,
528 		.proc_handler	= &proc_dointvec,
529 	},
530 #endif
531 #ifdef CONFIG_NO_IDLE_HZ
532 	{
533 		.ctl_name       = KERN_HZ_TIMER,
534 		.procname       = "hz_timer",
535 		.data           = &sysctl_hz_timer,
536 		.maxlen         = sizeof(int),
537 		.mode           = 0644,
538 		.proc_handler   = &proc_dointvec,
539 	},
540 #endif
541 	{
542 		.ctl_name	= KERN_S390_USER_DEBUG_LOGGING,
543 		.procname	= "userprocess_debug",
544 		.data		= &sysctl_userprocess_debug,
545 		.maxlen		= sizeof(int),
546 		.mode		= 0644,
547 		.proc_handler	= &proc_dointvec,
548 	},
549 #endif
550 	{
551 		.ctl_name	= KERN_PIDMAX,
552 		.procname	= "pid_max",
553 		.data		= &pid_max,
554 		.maxlen		= sizeof (int),
555 		.mode		= 0644,
556 		.proc_handler	= &proc_dointvec_minmax,
557 		.strategy	= sysctl_intvec,
558 		.extra1		= &pid_max_min,
559 		.extra2		= &pid_max_max,
560 	},
561 	{
562 		.ctl_name	= KERN_PANIC_ON_OOPS,
563 		.procname	= "panic_on_oops",
564 		.data		= &panic_on_oops,
565 		.maxlen		= sizeof(int),
566 		.mode		= 0644,
567 		.proc_handler	= &proc_dointvec,
568 	},
569 	{
570 		.ctl_name	= KERN_PRINTK_RATELIMIT,
571 		.procname	= "printk_ratelimit",
572 		.data		= &printk_ratelimit_jiffies,
573 		.maxlen		= sizeof(int),
574 		.mode		= 0644,
575 		.proc_handler	= &proc_dointvec_jiffies,
576 		.strategy	= &sysctl_jiffies,
577 	},
578 	{
579 		.ctl_name	= KERN_PRINTK_RATELIMIT_BURST,
580 		.procname	= "printk_ratelimit_burst",
581 		.data		= &printk_ratelimit_burst,
582 		.maxlen		= sizeof(int),
583 		.mode		= 0644,
584 		.proc_handler	= &proc_dointvec,
585 	},
586 	{
587 		.ctl_name	= KERN_NGROUPS_MAX,
588 		.procname	= "ngroups_max",
589 		.data		= &ngroups_max,
590 		.maxlen		= sizeof (int),
591 		.mode		= 0444,
592 		.proc_handler	= &proc_dointvec,
593 	},
594 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
595 	{
596 		.ctl_name       = KERN_UNKNOWN_NMI_PANIC,
597 		.procname       = "unknown_nmi_panic",
598 		.data           = &unknown_nmi_panic,
599 		.maxlen         = sizeof (int),
600 		.mode           = 0644,
601 		.proc_handler   = &proc_dointvec,
602 	},
603 	{
604 		.ctl_name       = KERN_NMI_WATCHDOG,
605 		.procname       = "nmi_watchdog",
606 		.data           = &nmi_watchdog_enabled,
607 		.maxlen         = sizeof (int),
608 		.mode           = 0644,
609 		.proc_handler   = &proc_nmi_enabled,
610 	},
611 #endif
612 #if defined(CONFIG_X86)
613 	{
614 		.ctl_name	= KERN_PANIC_ON_NMI,
615 		.procname	= "panic_on_unrecovered_nmi",
616 		.data		= &panic_on_unrecovered_nmi,
617 		.maxlen		= sizeof(int),
618 		.mode		= 0644,
619 		.proc_handler	= &proc_dointvec,
620 	},
621 	{
622 		.ctl_name	= KERN_BOOTLOADER_TYPE,
623 		.procname	= "bootloader_type",
624 		.data		= &bootloader_type,
625 		.maxlen		= sizeof (int),
626 		.mode		= 0444,
627 		.proc_handler	= &proc_dointvec,
628 	},
629 	{
630 		.ctl_name	= CTL_UNNUMBERED,
631 		.procname	= "kstack_depth_to_print",
632 		.data		= &kstack_depth_to_print,
633 		.maxlen		= sizeof(int),
634 		.mode		= 0644,
635 		.proc_handler	= &proc_dointvec,
636 	},
637 #endif
638 #if defined(CONFIG_MMU)
639 	{
640 		.ctl_name	= KERN_RANDOMIZE,
641 		.procname	= "randomize_va_space",
642 		.data		= &randomize_va_space,
643 		.maxlen		= sizeof(int),
644 		.mode		= 0644,
645 		.proc_handler	= &proc_dointvec,
646 	},
647 #endif
648 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
649 	{
650 		.ctl_name	= KERN_SPIN_RETRY,
651 		.procname	= "spin_retry",
652 		.data		= &spin_retry,
653 		.maxlen		= sizeof (int),
654 		.mode		= 0644,
655 		.proc_handler	= &proc_dointvec,
656 	},
657 #endif
658 #ifdef CONFIG_ACPI_SLEEP
659 	{
660 		.ctl_name	= KERN_ACPI_VIDEO_FLAGS,
661 		.procname	= "acpi_video_flags",
662 		.data		= &acpi_video_flags,
663 		.maxlen		= sizeof (unsigned long),
664 		.mode		= 0644,
665 		.proc_handler	= &proc_doulongvec_minmax,
666 	},
667 #endif
668 #ifdef CONFIG_IA64
669 	{
670 		.ctl_name	= KERN_IA64_UNALIGNED,
671 		.procname	= "ignore-unaligned-usertrap",
672 		.data		= &no_unaligned_warning,
673 		.maxlen		= sizeof (int),
674 	 	.mode		= 0644,
675 		.proc_handler	= &proc_dointvec,
676 	},
677 #endif
678 #ifdef CONFIG_COMPAT
679 	{
680 		.ctl_name	= KERN_COMPAT_LOG,
681 		.procname	= "compat-log",
682 		.data		= &compat_log,
683 		.maxlen		= sizeof (int),
684 	 	.mode		= 0644,
685 		.proc_handler	= &proc_dointvec,
686 	},
687 #endif
688 #ifdef CONFIG_RT_MUTEXES
689 	{
690 		.ctl_name	= KERN_MAX_LOCK_DEPTH,
691 		.procname	= "max_lock_depth",
692 		.data		= &max_lock_depth,
693 		.maxlen		= sizeof(int),
694 		.mode		= 0644,
695 		.proc_handler	= &proc_dointvec,
696 	},
697 #endif
698 #ifdef CONFIG_PROC_FS
699 	{
700 		.ctl_name       = CTL_UNNUMBERED,
701 		.procname       = "maps_protect",
702 		.data           = &maps_protect,
703 		.maxlen         = sizeof(int),
704 		.mode           = 0644,
705 		.proc_handler   = &proc_dointvec,
706 	},
707 #endif
708 
709 	{ .ctl_name = 0 }
710 };
711 
712 /* Constants for minimum and maximum testing in vm_table.
713    We use these as one-element integer vectors. */
714 static int zero;
715 static int one_hundred = 100;
716 
717 
718 static ctl_table vm_table[] = {
719 	{
720 		.ctl_name	= VM_OVERCOMMIT_MEMORY,
721 		.procname	= "overcommit_memory",
722 		.data		= &sysctl_overcommit_memory,
723 		.maxlen		= sizeof(sysctl_overcommit_memory),
724 		.mode		= 0644,
725 		.proc_handler	= &proc_dointvec,
726 	},
727 	{
728 		.ctl_name	= VM_PANIC_ON_OOM,
729 		.procname	= "panic_on_oom",
730 		.data		= &sysctl_panic_on_oom,
731 		.maxlen		= sizeof(sysctl_panic_on_oom),
732 		.mode		= 0644,
733 		.proc_handler	= &proc_dointvec,
734 	},
735 	{
736 		.ctl_name	= VM_OVERCOMMIT_RATIO,
737 		.procname	= "overcommit_ratio",
738 		.data		= &sysctl_overcommit_ratio,
739 		.maxlen		= sizeof(sysctl_overcommit_ratio),
740 		.mode		= 0644,
741 		.proc_handler	= &proc_dointvec,
742 	},
743 	{
744 		.ctl_name	= VM_PAGE_CLUSTER,
745 		.procname	= "page-cluster",
746 		.data		= &page_cluster,
747 		.maxlen		= sizeof(int),
748 		.mode		= 0644,
749 		.proc_handler	= &proc_dointvec,
750 	},
751 	{
752 		.ctl_name	= VM_DIRTY_BACKGROUND,
753 		.procname	= "dirty_background_ratio",
754 		.data		= &dirty_background_ratio,
755 		.maxlen		= sizeof(dirty_background_ratio),
756 		.mode		= 0644,
757 		.proc_handler	= &proc_dointvec_minmax,
758 		.strategy	= &sysctl_intvec,
759 		.extra1		= &zero,
760 		.extra2		= &one_hundred,
761 	},
762 	{
763 		.ctl_name	= VM_DIRTY_RATIO,
764 		.procname	= "dirty_ratio",
765 		.data		= &vm_dirty_ratio,
766 		.maxlen		= sizeof(vm_dirty_ratio),
767 		.mode		= 0644,
768 		.proc_handler	= &proc_dointvec_minmax,
769 		.strategy	= &sysctl_intvec,
770 		.extra1		= &zero,
771 		.extra2		= &one_hundred,
772 	},
773 	{
774 		.ctl_name	= VM_DIRTY_WB_CS,
775 		.procname	= "dirty_writeback_centisecs",
776 		.data		= &dirty_writeback_interval,
777 		.maxlen		= sizeof(dirty_writeback_interval),
778 		.mode		= 0644,
779 		.proc_handler	= &dirty_writeback_centisecs_handler,
780 	},
781 	{
782 		.ctl_name	= VM_DIRTY_EXPIRE_CS,
783 		.procname	= "dirty_expire_centisecs",
784 		.data		= &dirty_expire_interval,
785 		.maxlen		= sizeof(dirty_expire_interval),
786 		.mode		= 0644,
787 		.proc_handler	= &proc_dointvec_userhz_jiffies,
788 	},
789 	{
790 		.ctl_name	= VM_NR_PDFLUSH_THREADS,
791 		.procname	= "nr_pdflush_threads",
792 		.data		= &nr_pdflush_threads,
793 		.maxlen		= sizeof nr_pdflush_threads,
794 		.mode		= 0444 /* read-only*/,
795 		.proc_handler	= &proc_dointvec,
796 	},
797 	{
798 		.ctl_name	= VM_SWAPPINESS,
799 		.procname	= "swappiness",
800 		.data		= &vm_swappiness,
801 		.maxlen		= sizeof(vm_swappiness),
802 		.mode		= 0644,
803 		.proc_handler	= &proc_dointvec_minmax,
804 		.strategy	= &sysctl_intvec,
805 		.extra1		= &zero,
806 		.extra2		= &one_hundred,
807 	},
808 #ifdef CONFIG_HUGETLB_PAGE
809 	 {
810 		.ctl_name	= VM_HUGETLB_PAGES,
811 		.procname	= "nr_hugepages",
812 		.data		= &max_huge_pages,
813 		.maxlen		= sizeof(unsigned long),
814 		.mode		= 0644,
815 		.proc_handler	= &hugetlb_sysctl_handler,
816 		.extra1		= (void *)&hugetlb_zero,
817 		.extra2		= (void *)&hugetlb_infinity,
818 	 },
819 	 {
820 		.ctl_name	= VM_HUGETLB_GROUP,
821 		.procname	= "hugetlb_shm_group",
822 		.data		= &sysctl_hugetlb_shm_group,
823 		.maxlen		= sizeof(gid_t),
824 		.mode		= 0644,
825 		.proc_handler	= &proc_dointvec,
826 	 },
827 	 {
828 		.ctl_name	= CTL_UNNUMBERED,
829 		.procname	= "hugepages_treat_as_movable",
830 		.data		= &hugepages_treat_as_movable,
831 		.maxlen		= sizeof(int),
832 		.mode		= 0644,
833 		.proc_handler	= &hugetlb_treat_movable_handler,
834 	},
835 #endif
836 	{
837 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
838 		.procname	= "lowmem_reserve_ratio",
839 		.data		= &sysctl_lowmem_reserve_ratio,
840 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
841 		.mode		= 0644,
842 		.proc_handler	= &lowmem_reserve_ratio_sysctl_handler,
843 		.strategy	= &sysctl_intvec,
844 	},
845 	{
846 		.ctl_name	= VM_DROP_PAGECACHE,
847 		.procname	= "drop_caches",
848 		.data		= &sysctl_drop_caches,
849 		.maxlen		= sizeof(int),
850 		.mode		= 0644,
851 		.proc_handler	= drop_caches_sysctl_handler,
852 		.strategy	= &sysctl_intvec,
853 	},
854 	{
855 		.ctl_name	= VM_MIN_FREE_KBYTES,
856 		.procname	= "min_free_kbytes",
857 		.data		= &min_free_kbytes,
858 		.maxlen		= sizeof(min_free_kbytes),
859 		.mode		= 0644,
860 		.proc_handler	= &min_free_kbytes_sysctl_handler,
861 		.strategy	= &sysctl_intvec,
862 		.extra1		= &zero,
863 	},
864 	{
865 		.ctl_name	= VM_PERCPU_PAGELIST_FRACTION,
866 		.procname	= "percpu_pagelist_fraction",
867 		.data		= &percpu_pagelist_fraction,
868 		.maxlen		= sizeof(percpu_pagelist_fraction),
869 		.mode		= 0644,
870 		.proc_handler	= &percpu_pagelist_fraction_sysctl_handler,
871 		.strategy	= &sysctl_intvec,
872 		.extra1		= &min_percpu_pagelist_fract,
873 	},
874 #ifdef CONFIG_MMU
875 	{
876 		.ctl_name	= VM_MAX_MAP_COUNT,
877 		.procname	= "max_map_count",
878 		.data		= &sysctl_max_map_count,
879 		.maxlen		= sizeof(sysctl_max_map_count),
880 		.mode		= 0644,
881 		.proc_handler	= &proc_dointvec
882 	},
883 #endif
884 	{
885 		.ctl_name	= VM_LAPTOP_MODE,
886 		.procname	= "laptop_mode",
887 		.data		= &laptop_mode,
888 		.maxlen		= sizeof(laptop_mode),
889 		.mode		= 0644,
890 		.proc_handler	= &proc_dointvec_jiffies,
891 		.strategy	= &sysctl_jiffies,
892 	},
893 	{
894 		.ctl_name	= VM_BLOCK_DUMP,
895 		.procname	= "block_dump",
896 		.data		= &block_dump,
897 		.maxlen		= sizeof(block_dump),
898 		.mode		= 0644,
899 		.proc_handler	= &proc_dointvec,
900 		.strategy	= &sysctl_intvec,
901 		.extra1		= &zero,
902 	},
903 	{
904 		.ctl_name	= VM_VFS_CACHE_PRESSURE,
905 		.procname	= "vfs_cache_pressure",
906 		.data		= &sysctl_vfs_cache_pressure,
907 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
908 		.mode		= 0644,
909 		.proc_handler	= &proc_dointvec,
910 		.strategy	= &sysctl_intvec,
911 		.extra1		= &zero,
912 	},
913 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
914 	{
915 		.ctl_name	= VM_LEGACY_VA_LAYOUT,
916 		.procname	= "legacy_va_layout",
917 		.data		= &sysctl_legacy_va_layout,
918 		.maxlen		= sizeof(sysctl_legacy_va_layout),
919 		.mode		= 0644,
920 		.proc_handler	= &proc_dointvec,
921 		.strategy	= &sysctl_intvec,
922 		.extra1		= &zero,
923 	},
924 #endif
925 #ifdef CONFIG_NUMA
926 	{
927 		.ctl_name	= VM_ZONE_RECLAIM_MODE,
928 		.procname	= "zone_reclaim_mode",
929 		.data		= &zone_reclaim_mode,
930 		.maxlen		= sizeof(zone_reclaim_mode),
931 		.mode		= 0644,
932 		.proc_handler	= &proc_dointvec,
933 		.strategy	= &sysctl_intvec,
934 		.extra1		= &zero,
935 	},
936 	{
937 		.ctl_name	= VM_MIN_UNMAPPED,
938 		.procname	= "min_unmapped_ratio",
939 		.data		= &sysctl_min_unmapped_ratio,
940 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
941 		.mode		= 0644,
942 		.proc_handler	= &sysctl_min_unmapped_ratio_sysctl_handler,
943 		.strategy	= &sysctl_intvec,
944 		.extra1		= &zero,
945 		.extra2		= &one_hundred,
946 	},
947 	{
948 		.ctl_name	= VM_MIN_SLAB,
949 		.procname	= "min_slab_ratio",
950 		.data		= &sysctl_min_slab_ratio,
951 		.maxlen		= sizeof(sysctl_min_slab_ratio),
952 		.mode		= 0644,
953 		.proc_handler	= &sysctl_min_slab_ratio_sysctl_handler,
954 		.strategy	= &sysctl_intvec,
955 		.extra1		= &zero,
956 		.extra2		= &one_hundred,
957 	},
958 #endif
959 #ifdef CONFIG_SMP
960 	{
961 		.ctl_name	= CTL_UNNUMBERED,
962 		.procname	= "stat_interval",
963 		.data		= &sysctl_stat_interval,
964 		.maxlen		= sizeof(sysctl_stat_interval),
965 		.mode		= 0644,
966 		.proc_handler	= &proc_dointvec_jiffies,
967 		.strategy	= &sysctl_jiffies,
968 	},
969 #endif
970 #ifdef CONFIG_SECURITY
971 	{
972 		.ctl_name	= CTL_UNNUMBERED,
973 		.procname	= "mmap_min_addr",
974 		.data		= &mmap_min_addr,
975 		.maxlen         = sizeof(unsigned long),
976 		.mode		= 0644,
977 		.proc_handler	= &proc_doulongvec_minmax,
978 	},
979 #ifdef CONFIG_NUMA
980 	{
981 		.ctl_name	= CTL_UNNUMBERED,
982 		.procname	= "numa_zonelist_order",
983 		.data		= &numa_zonelist_order,
984 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
985 		.mode		= 0644,
986 		.proc_handler	= &numa_zonelist_order_handler,
987 		.strategy	= &sysctl_string,
988 	},
989 #endif
990 #endif
991 #if defined(CONFIG_X86_32) || \
992    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
993 	{
994 		.ctl_name	= VM_VDSO_ENABLED,
995 		.procname	= "vdso_enabled",
996 		.data		= &vdso_enabled,
997 		.maxlen		= sizeof(vdso_enabled),
998 		.mode		= 0644,
999 		.proc_handler	= &proc_dointvec,
1000 		.strategy	= &sysctl_intvec,
1001 		.extra1		= &zero,
1002 	},
1003 #endif
1004 /*
1005  * NOTE: do not add new entries to this table unless you have read
1006  * Documentation/sysctl/ctl_unnumbered.txt
1007  */
1008 	{ .ctl_name = 0 }
1009 };
1010 
1011 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1012 static ctl_table binfmt_misc_table[] = {
1013 	{ .ctl_name = 0 }
1014 };
1015 #endif
1016 
1017 static ctl_table fs_table[] = {
1018 	{
1019 		.ctl_name	= FS_NRINODE,
1020 		.procname	= "inode-nr",
1021 		.data		= &inodes_stat,
1022 		.maxlen		= 2*sizeof(int),
1023 		.mode		= 0444,
1024 		.proc_handler	= &proc_dointvec,
1025 	},
1026 	{
1027 		.ctl_name	= FS_STATINODE,
1028 		.procname	= "inode-state",
1029 		.data		= &inodes_stat,
1030 		.maxlen		= 7*sizeof(int),
1031 		.mode		= 0444,
1032 		.proc_handler	= &proc_dointvec,
1033 	},
1034 	{
1035 		.ctl_name	= FS_NRFILE,
1036 		.procname	= "file-nr",
1037 		.data		= &files_stat,
1038 		.maxlen		= 3*sizeof(int),
1039 		.mode		= 0444,
1040 		.proc_handler	= &proc_nr_files,
1041 	},
1042 	{
1043 		.ctl_name	= FS_MAXFILE,
1044 		.procname	= "file-max",
1045 		.data		= &files_stat.max_files,
1046 		.maxlen		= sizeof(int),
1047 		.mode		= 0644,
1048 		.proc_handler	= &proc_dointvec,
1049 	},
1050 	{
1051 		.ctl_name	= FS_DENTRY,
1052 		.procname	= "dentry-state",
1053 		.data		= &dentry_stat,
1054 		.maxlen		= 6*sizeof(int),
1055 		.mode		= 0444,
1056 		.proc_handler	= &proc_dointvec,
1057 	},
1058 	{
1059 		.ctl_name	= FS_OVERFLOWUID,
1060 		.procname	= "overflowuid",
1061 		.data		= &fs_overflowuid,
1062 		.maxlen		= sizeof(int),
1063 		.mode		= 0644,
1064 		.proc_handler	= &proc_dointvec_minmax,
1065 		.strategy	= &sysctl_intvec,
1066 		.extra1		= &minolduid,
1067 		.extra2		= &maxolduid,
1068 	},
1069 	{
1070 		.ctl_name	= FS_OVERFLOWGID,
1071 		.procname	= "overflowgid",
1072 		.data		= &fs_overflowgid,
1073 		.maxlen		= sizeof(int),
1074 		.mode		= 0644,
1075 		.proc_handler	= &proc_dointvec_minmax,
1076 		.strategy	= &sysctl_intvec,
1077 		.extra1		= &minolduid,
1078 		.extra2		= &maxolduid,
1079 	},
1080 	{
1081 		.ctl_name	= FS_LEASES,
1082 		.procname	= "leases-enable",
1083 		.data		= &leases_enable,
1084 		.maxlen		= sizeof(int),
1085 		.mode		= 0644,
1086 		.proc_handler	= &proc_dointvec,
1087 	},
1088 #ifdef CONFIG_DNOTIFY
1089 	{
1090 		.ctl_name	= FS_DIR_NOTIFY,
1091 		.procname	= "dir-notify-enable",
1092 		.data		= &dir_notify_enable,
1093 		.maxlen		= sizeof(int),
1094 		.mode		= 0644,
1095 		.proc_handler	= &proc_dointvec,
1096 	},
1097 #endif
1098 #ifdef CONFIG_MMU
1099 	{
1100 		.ctl_name	= FS_LEASE_TIME,
1101 		.procname	= "lease-break-time",
1102 		.data		= &lease_break_time,
1103 		.maxlen		= sizeof(int),
1104 		.mode		= 0644,
1105 		.proc_handler	= &proc_dointvec,
1106 	},
1107 	{
1108 		.ctl_name	= FS_AIO_NR,
1109 		.procname	= "aio-nr",
1110 		.data		= &aio_nr,
1111 		.maxlen		= sizeof(aio_nr),
1112 		.mode		= 0444,
1113 		.proc_handler	= &proc_doulongvec_minmax,
1114 	},
1115 	{
1116 		.ctl_name	= FS_AIO_MAX_NR,
1117 		.procname	= "aio-max-nr",
1118 		.data		= &aio_max_nr,
1119 		.maxlen		= sizeof(aio_max_nr),
1120 		.mode		= 0644,
1121 		.proc_handler	= &proc_doulongvec_minmax,
1122 	},
1123 #ifdef CONFIG_INOTIFY_USER
1124 	{
1125 		.ctl_name	= FS_INOTIFY,
1126 		.procname	= "inotify",
1127 		.mode		= 0555,
1128 		.child		= inotify_table,
1129 	},
1130 #endif
1131 #endif
1132 	{
1133 		.ctl_name	= KERN_SETUID_DUMPABLE,
1134 		.procname	= "suid_dumpable",
1135 		.data		= &suid_dumpable,
1136 		.maxlen		= sizeof(int),
1137 		.mode		= 0644,
1138 		.proc_handler	= &proc_dointvec,
1139 	},
1140 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1141 	{
1142 		.ctl_name	= CTL_UNNUMBERED,
1143 		.procname	= "binfmt_misc",
1144 		.mode		= 0555,
1145 		.child		= binfmt_misc_table,
1146 	},
1147 #endif
1148 /*
1149  * NOTE: do not add new entries to this table unless you have read
1150  * Documentation/sysctl/ctl_unnumbered.txt
1151  */
1152 	{ .ctl_name = 0 }
1153 };
1154 
1155 static ctl_table debug_table[] = {
1156 	{ .ctl_name = 0 }
1157 };
1158 
1159 static ctl_table dev_table[] = {
1160 	{ .ctl_name = 0 }
1161 };
1162 
1163 static DEFINE_SPINLOCK(sysctl_lock);
1164 
1165 /* called under sysctl_lock */
1166 static int use_table(struct ctl_table_header *p)
1167 {
1168 	if (unlikely(p->unregistering))
1169 		return 0;
1170 	p->used++;
1171 	return 1;
1172 }
1173 
1174 /* called under sysctl_lock */
1175 static void unuse_table(struct ctl_table_header *p)
1176 {
1177 	if (!--p->used)
1178 		if (unlikely(p->unregistering))
1179 			complete(p->unregistering);
1180 }
1181 
1182 /* called under sysctl_lock, will reacquire if has to wait */
1183 static void start_unregistering(struct ctl_table_header *p)
1184 {
1185 	/*
1186 	 * if p->used is 0, nobody will ever touch that entry again;
1187 	 * we'll eliminate all paths to it before dropping sysctl_lock
1188 	 */
1189 	if (unlikely(p->used)) {
1190 		struct completion wait;
1191 		init_completion(&wait);
1192 		p->unregistering = &wait;
1193 		spin_unlock(&sysctl_lock);
1194 		wait_for_completion(&wait);
1195 		spin_lock(&sysctl_lock);
1196 	}
1197 	/*
1198 	 * do not remove from the list until nobody holds it; walking the
1199 	 * list in do_sysctl() relies on that.
1200 	 */
1201 	list_del_init(&p->ctl_entry);
1202 }
1203 
1204 void sysctl_head_finish(struct ctl_table_header *head)
1205 {
1206 	if (!head)
1207 		return;
1208 	spin_lock(&sysctl_lock);
1209 	unuse_table(head);
1210 	spin_unlock(&sysctl_lock);
1211 }
1212 
1213 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1214 {
1215 	struct ctl_table_header *head;
1216 	struct list_head *tmp;
1217 	spin_lock(&sysctl_lock);
1218 	if (prev) {
1219 		tmp = &prev->ctl_entry;
1220 		unuse_table(prev);
1221 		goto next;
1222 	}
1223 	tmp = &root_table_header.ctl_entry;
1224 	for (;;) {
1225 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1226 
1227 		if (!use_table(head))
1228 			goto next;
1229 		spin_unlock(&sysctl_lock);
1230 		return head;
1231 	next:
1232 		tmp = tmp->next;
1233 		if (tmp == &root_table_header.ctl_entry)
1234 			break;
1235 	}
1236 	spin_unlock(&sysctl_lock);
1237 	return NULL;
1238 }
1239 
1240 #ifdef CONFIG_SYSCTL_SYSCALL
1241 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1242 	       void __user *newval, size_t newlen)
1243 {
1244 	struct ctl_table_header *head;
1245 	int error = -ENOTDIR;
1246 
1247 	if (nlen <= 0 || nlen >= CTL_MAXNAME)
1248 		return -ENOTDIR;
1249 	if (oldval) {
1250 		int old_len;
1251 		if (!oldlenp || get_user(old_len, oldlenp))
1252 			return -EFAULT;
1253 	}
1254 
1255 	for (head = sysctl_head_next(NULL); head;
1256 			head = sysctl_head_next(head)) {
1257 		error = parse_table(name, nlen, oldval, oldlenp,
1258 					newval, newlen, head->ctl_table);
1259 		if (error != -ENOTDIR) {
1260 			sysctl_head_finish(head);
1261 			break;
1262 		}
1263 	}
1264 	return error;
1265 }
1266 
1267 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1268 {
1269 	struct __sysctl_args tmp;
1270 	int error;
1271 
1272 	if (copy_from_user(&tmp, args, sizeof(tmp)))
1273 		return -EFAULT;
1274 
1275 	lock_kernel();
1276 	error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1277 			  tmp.newval, tmp.newlen);
1278 	unlock_kernel();
1279 	return error;
1280 }
1281 #endif /* CONFIG_SYSCTL_SYSCALL */
1282 
1283 /*
1284  * sysctl_perm does NOT grant the superuser all rights automatically, because
1285  * some sysctl variables are readonly even to root.
1286  */
1287 
1288 static int test_perm(int mode, int op)
1289 {
1290 	if (!current->euid)
1291 		mode >>= 6;
1292 	else if (in_egroup_p(0))
1293 		mode >>= 3;
1294 	if ((mode & op & 0007) == op)
1295 		return 0;
1296 	return -EACCES;
1297 }
1298 
1299 int sysctl_perm(ctl_table *table, int op)
1300 {
1301 	int error;
1302 	error = security_sysctl(table, op);
1303 	if (error)
1304 		return error;
1305 	return test_perm(table->mode, op);
1306 }
1307 
1308 #ifdef CONFIG_SYSCTL_SYSCALL
1309 static int parse_table(int __user *name, int nlen,
1310 		       void __user *oldval, size_t __user *oldlenp,
1311 		       void __user *newval, size_t newlen,
1312 		       ctl_table *table)
1313 {
1314 	int n;
1315 repeat:
1316 	if (!nlen)
1317 		return -ENOTDIR;
1318 	if (get_user(n, name))
1319 		return -EFAULT;
1320 	for ( ; table->ctl_name || table->procname; table++) {
1321 		if (!table->ctl_name)
1322 			continue;
1323 		if (n == table->ctl_name) {
1324 			int error;
1325 			if (table->child) {
1326 				if (sysctl_perm(table, 001))
1327 					return -EPERM;
1328 				name++;
1329 				nlen--;
1330 				table = table->child;
1331 				goto repeat;
1332 			}
1333 			error = do_sysctl_strategy(table, name, nlen,
1334 						   oldval, oldlenp,
1335 						   newval, newlen);
1336 			return error;
1337 		}
1338 	}
1339 	return -ENOTDIR;
1340 }
1341 
1342 /* Perform the actual read/write of a sysctl table entry. */
1343 int do_sysctl_strategy (ctl_table *table,
1344 			int __user *name, int nlen,
1345 			void __user *oldval, size_t __user *oldlenp,
1346 			void __user *newval, size_t newlen)
1347 {
1348 	int op = 0, rc;
1349 	size_t len;
1350 
1351 	if (oldval)
1352 		op |= 004;
1353 	if (newval)
1354 		op |= 002;
1355 	if (sysctl_perm(table, op))
1356 		return -EPERM;
1357 
1358 	if (table->strategy) {
1359 		rc = table->strategy(table, name, nlen, oldval, oldlenp,
1360 				     newval, newlen);
1361 		if (rc < 0)
1362 			return rc;
1363 		if (rc > 0)
1364 			return 0;
1365 	}
1366 
1367 	/* If there is no strategy routine, or if the strategy returns
1368 	 * zero, proceed with automatic r/w */
1369 	if (table->data && table->maxlen) {
1370 		if (oldval && oldlenp) {
1371 			if (get_user(len, oldlenp))
1372 				return -EFAULT;
1373 			if (len) {
1374 				if (len > table->maxlen)
1375 					len = table->maxlen;
1376 				if(copy_to_user(oldval, table->data, len))
1377 					return -EFAULT;
1378 				if(put_user(len, oldlenp))
1379 					return -EFAULT;
1380 			}
1381 		}
1382 		if (newval && newlen) {
1383 			len = newlen;
1384 			if (len > table->maxlen)
1385 				len = table->maxlen;
1386 			if(copy_from_user(table->data, newval, len))
1387 				return -EFAULT;
1388 		}
1389 	}
1390 	return 0;
1391 }
1392 #endif /* CONFIG_SYSCTL_SYSCALL */
1393 
1394 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1395 {
1396 	for (; table->ctl_name || table->procname; table++) {
1397 		table->parent = parent;
1398 		if (table->child)
1399 			sysctl_set_parent(table, table->child);
1400 	}
1401 }
1402 
1403 static __init int sysctl_init(void)
1404 {
1405 	sysctl_set_parent(NULL, root_table);
1406 	return 0;
1407 }
1408 
1409 core_initcall(sysctl_init);
1410 
1411 /**
1412  * register_sysctl_table - register a sysctl hierarchy
1413  * @table: the top-level table structure
1414  *
1415  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1416  * array. An entry with a ctl_name of 0 terminates the table.
1417  *
1418  * The members of the &ctl_table structure are used as follows:
1419  *
1420  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1421  *            must be unique within that level of sysctl
1422  *
1423  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1424  *            enter a sysctl file
1425  *
1426  * data - a pointer to data for use by proc_handler
1427  *
1428  * maxlen - the maximum size in bytes of the data
1429  *
1430  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1431  *
1432  * child - a pointer to the child sysctl table if this entry is a directory, or
1433  *         %NULL.
1434  *
1435  * proc_handler - the text handler routine (described below)
1436  *
1437  * strategy - the strategy routine (described below)
1438  *
1439  * de - for internal use by the sysctl routines
1440  *
1441  * extra1, extra2 - extra pointers usable by the proc handler routines
1442  *
1443  * Leaf nodes in the sysctl tree will be represented by a single file
1444  * under /proc; non-leaf nodes will be represented by directories.
1445  *
1446  * sysctl(2) can automatically manage read and write requests through
1447  * the sysctl table.  The data and maxlen fields of the ctl_table
1448  * struct enable minimal validation of the values being written to be
1449  * performed, and the mode field allows minimal authentication.
1450  *
1451  * More sophisticated management can be enabled by the provision of a
1452  * strategy routine with the table entry.  This will be called before
1453  * any automatic read or write of the data is performed.
1454  *
1455  * The strategy routine may return
1456  *
1457  * < 0 - Error occurred (error is passed to user process)
1458  *
1459  * 0   - OK - proceed with automatic read or write.
1460  *
1461  * > 0 - OK - read or write has been done by the strategy routine, so
1462  *       return immediately.
1463  *
1464  * There must be a proc_handler routine for any terminal nodes
1465  * mirrored under /proc/sys (non-terminals are handled by a built-in
1466  * directory handler).  Several default handlers are available to
1467  * cover common cases -
1468  *
1469  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1470  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1471  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1472  *
1473  * It is the handler's job to read the input buffer from user memory
1474  * and process it. The handler should return 0 on success.
1475  *
1476  * This routine returns %NULL on a failure to register, and a pointer
1477  * to the table header on success.
1478  */
1479 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1480 {
1481 	struct ctl_table_header *tmp;
1482 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1483 	if (!tmp)
1484 		return NULL;
1485 	tmp->ctl_table = table;
1486 	INIT_LIST_HEAD(&tmp->ctl_entry);
1487 	tmp->used = 0;
1488 	tmp->unregistering = NULL;
1489 	sysctl_set_parent(NULL, table);
1490 	spin_lock(&sysctl_lock);
1491 	list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1492 	spin_unlock(&sysctl_lock);
1493 	return tmp;
1494 }
1495 
1496 /**
1497  * unregister_sysctl_table - unregister a sysctl table hierarchy
1498  * @header: the header returned from register_sysctl_table
1499  *
1500  * Unregisters the sysctl table and all children. proc entries may not
1501  * actually be removed until they are no longer used by anyone.
1502  */
1503 void unregister_sysctl_table(struct ctl_table_header * header)
1504 {
1505 	might_sleep();
1506 	spin_lock(&sysctl_lock);
1507 	start_unregistering(header);
1508 	spin_unlock(&sysctl_lock);
1509 	kfree(header);
1510 }
1511 
1512 #else /* !CONFIG_SYSCTL */
1513 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1514 {
1515 	return NULL;
1516 }
1517 
1518 void unregister_sysctl_table(struct ctl_table_header * table)
1519 {
1520 }
1521 
1522 #endif /* CONFIG_SYSCTL */
1523 
1524 /*
1525  * /proc/sys support
1526  */
1527 
1528 #ifdef CONFIG_PROC_SYSCTL
1529 
1530 static int _proc_do_string(void* data, int maxlen, int write,
1531 			   struct file *filp, void __user *buffer,
1532 			   size_t *lenp, loff_t *ppos)
1533 {
1534 	size_t len;
1535 	char __user *p;
1536 	char c;
1537 
1538 	if (!data || !maxlen || !*lenp) {
1539 		*lenp = 0;
1540 		return 0;
1541 	}
1542 
1543 	if (write) {
1544 		len = 0;
1545 		p = buffer;
1546 		while (len < *lenp) {
1547 			if (get_user(c, p++))
1548 				return -EFAULT;
1549 			if (c == 0 || c == '\n')
1550 				break;
1551 			len++;
1552 		}
1553 		if (len >= maxlen)
1554 			len = maxlen-1;
1555 		if(copy_from_user(data, buffer, len))
1556 			return -EFAULT;
1557 		((char *) data)[len] = 0;
1558 		*ppos += *lenp;
1559 	} else {
1560 		len = strlen(data);
1561 		if (len > maxlen)
1562 			len = maxlen;
1563 
1564 		if (*ppos > len) {
1565 			*lenp = 0;
1566 			return 0;
1567 		}
1568 
1569 		data += *ppos;
1570 		len  -= *ppos;
1571 
1572 		if (len > *lenp)
1573 			len = *lenp;
1574 		if (len)
1575 			if(copy_to_user(buffer, data, len))
1576 				return -EFAULT;
1577 		if (len < *lenp) {
1578 			if(put_user('\n', ((char __user *) buffer) + len))
1579 				return -EFAULT;
1580 			len++;
1581 		}
1582 		*lenp = len;
1583 		*ppos += len;
1584 	}
1585 	return 0;
1586 }
1587 
1588 /**
1589  * proc_dostring - read a string sysctl
1590  * @table: the sysctl table
1591  * @write: %TRUE if this is a write to the sysctl file
1592  * @filp: the file structure
1593  * @buffer: the user buffer
1594  * @lenp: the size of the user buffer
1595  * @ppos: file position
1596  *
1597  * Reads/writes a string from/to the user buffer. If the kernel
1598  * buffer provided is not large enough to hold the string, the
1599  * string is truncated. The copied string is %NULL-terminated.
1600  * If the string is being read by the user process, it is copied
1601  * and a newline '\n' is added. It is truncated if the buffer is
1602  * not large enough.
1603  *
1604  * Returns 0 on success.
1605  */
1606 int proc_dostring(ctl_table *table, int write, struct file *filp,
1607 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1608 {
1609 	return _proc_do_string(table->data, table->maxlen, write, filp,
1610 			       buffer, lenp, ppos);
1611 }
1612 
1613 
1614 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1615 				 int *valp,
1616 				 int write, void *data)
1617 {
1618 	if (write) {
1619 		*valp = *negp ? -*lvalp : *lvalp;
1620 	} else {
1621 		int val = *valp;
1622 		if (val < 0) {
1623 			*negp = -1;
1624 			*lvalp = (unsigned long)-val;
1625 		} else {
1626 			*negp = 0;
1627 			*lvalp = (unsigned long)val;
1628 		}
1629 	}
1630 	return 0;
1631 }
1632 
1633 static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
1634 		  int write, struct file *filp, void __user *buffer,
1635 		  size_t *lenp, loff_t *ppos,
1636 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1637 			      int write, void *data),
1638 		  void *data)
1639 {
1640 #define TMPBUFLEN 21
1641 	int *i, vleft, first=1, neg, val;
1642 	unsigned long lval;
1643 	size_t left, len;
1644 
1645 	char buf[TMPBUFLEN], *p;
1646 	char __user *s = buffer;
1647 
1648 	if (!tbl_data || !table->maxlen || !*lenp ||
1649 	    (*ppos && !write)) {
1650 		*lenp = 0;
1651 		return 0;
1652 	}
1653 
1654 	i = (int *) tbl_data;
1655 	vleft = table->maxlen / sizeof(*i);
1656 	left = *lenp;
1657 
1658 	if (!conv)
1659 		conv = do_proc_dointvec_conv;
1660 
1661 	for (; left && vleft--; i++, first=0) {
1662 		if (write) {
1663 			while (left) {
1664 				char c;
1665 				if (get_user(c, s))
1666 					return -EFAULT;
1667 				if (!isspace(c))
1668 					break;
1669 				left--;
1670 				s++;
1671 			}
1672 			if (!left)
1673 				break;
1674 			neg = 0;
1675 			len = left;
1676 			if (len > sizeof(buf) - 1)
1677 				len = sizeof(buf) - 1;
1678 			if (copy_from_user(buf, s, len))
1679 				return -EFAULT;
1680 			buf[len] = 0;
1681 			p = buf;
1682 			if (*p == '-' && left > 1) {
1683 				neg = 1;
1684 				p++;
1685 			}
1686 			if (*p < '0' || *p > '9')
1687 				break;
1688 
1689 			lval = simple_strtoul(p, &p, 0);
1690 
1691 			len = p-buf;
1692 			if ((len < left) && *p && !isspace(*p))
1693 				break;
1694 			if (neg)
1695 				val = -val;
1696 			s += len;
1697 			left -= len;
1698 
1699 			if (conv(&neg, &lval, i, 1, data))
1700 				break;
1701 		} else {
1702 			p = buf;
1703 			if (!first)
1704 				*p++ = '\t';
1705 
1706 			if (conv(&neg, &lval, i, 0, data))
1707 				break;
1708 
1709 			sprintf(p, "%s%lu", neg ? "-" : "", lval);
1710 			len = strlen(buf);
1711 			if (len > left)
1712 				len = left;
1713 			if(copy_to_user(s, buf, len))
1714 				return -EFAULT;
1715 			left -= len;
1716 			s += len;
1717 		}
1718 	}
1719 
1720 	if (!write && !first && left) {
1721 		if(put_user('\n', s))
1722 			return -EFAULT;
1723 		left--, s++;
1724 	}
1725 	if (write) {
1726 		while (left) {
1727 			char c;
1728 			if (get_user(c, s++))
1729 				return -EFAULT;
1730 			if (!isspace(c))
1731 				break;
1732 			left--;
1733 		}
1734 	}
1735 	if (write && first)
1736 		return -EINVAL;
1737 	*lenp -= left;
1738 	*ppos += *lenp;
1739 	return 0;
1740 #undef TMPBUFLEN
1741 }
1742 
1743 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1744 		  void __user *buffer, size_t *lenp, loff_t *ppos,
1745 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1746 			      int write, void *data),
1747 		  void *data)
1748 {
1749 	return __do_proc_dointvec(table->data, table, write, filp,
1750 			buffer, lenp, ppos, conv, data);
1751 }
1752 
1753 /**
1754  * proc_dointvec - read a vector of integers
1755  * @table: the sysctl table
1756  * @write: %TRUE if this is a write to the sysctl file
1757  * @filp: the file structure
1758  * @buffer: the user buffer
1759  * @lenp: the size of the user buffer
1760  * @ppos: file position
1761  *
1762  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1763  * values from/to the user buffer, treated as an ASCII string.
1764  *
1765  * Returns 0 on success.
1766  */
1767 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1768 		     void __user *buffer, size_t *lenp, loff_t *ppos)
1769 {
1770     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1771 		    	    NULL,NULL);
1772 }
1773 
1774 #define OP_SET	0
1775 #define OP_AND	1
1776 #define OP_OR	2
1777 
1778 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1779 				      int *valp,
1780 				      int write, void *data)
1781 {
1782 	int op = *(int *)data;
1783 	if (write) {
1784 		int val = *negp ? -*lvalp : *lvalp;
1785 		switch(op) {
1786 		case OP_SET:	*valp = val; break;
1787 		case OP_AND:	*valp &= val; break;
1788 		case OP_OR:	*valp |= val; break;
1789 		}
1790 	} else {
1791 		int val = *valp;
1792 		if (val < 0) {
1793 			*negp = -1;
1794 			*lvalp = (unsigned long)-val;
1795 		} else {
1796 			*negp = 0;
1797 			*lvalp = (unsigned long)val;
1798 		}
1799 	}
1800 	return 0;
1801 }
1802 
1803 /*
1804  *	init may raise the set.
1805  */
1806 
1807 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1808 			void __user *buffer, size_t *lenp, loff_t *ppos)
1809 {
1810 	int op;
1811 
1812 	if (write && !capable(CAP_SYS_MODULE)) {
1813 		return -EPERM;
1814 	}
1815 
1816 	op = is_init(current) ? OP_SET : OP_AND;
1817 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1818 				do_proc_dointvec_bset_conv,&op);
1819 }
1820 
1821 /*
1822  *	Taint values can only be increased
1823  */
1824 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
1825 			       void __user *buffer, size_t *lenp, loff_t *ppos)
1826 {
1827 	int op;
1828 
1829 	if (write && !capable(CAP_SYS_ADMIN))
1830 		return -EPERM;
1831 
1832 	op = OP_OR;
1833 	return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1834 				do_proc_dointvec_bset_conv,&op);
1835 }
1836 
1837 struct do_proc_dointvec_minmax_conv_param {
1838 	int *min;
1839 	int *max;
1840 };
1841 
1842 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
1843 					int *valp,
1844 					int write, void *data)
1845 {
1846 	struct do_proc_dointvec_minmax_conv_param *param = data;
1847 	if (write) {
1848 		int val = *negp ? -*lvalp : *lvalp;
1849 		if ((param->min && *param->min > val) ||
1850 		    (param->max && *param->max < val))
1851 			return -EINVAL;
1852 		*valp = val;
1853 	} else {
1854 		int val = *valp;
1855 		if (val < 0) {
1856 			*negp = -1;
1857 			*lvalp = (unsigned long)-val;
1858 		} else {
1859 			*negp = 0;
1860 			*lvalp = (unsigned long)val;
1861 		}
1862 	}
1863 	return 0;
1864 }
1865 
1866 /**
1867  * proc_dointvec_minmax - read a vector of integers with min/max values
1868  * @table: the sysctl table
1869  * @write: %TRUE if this is a write to the sysctl file
1870  * @filp: the file structure
1871  * @buffer: the user buffer
1872  * @lenp: the size of the user buffer
1873  * @ppos: file position
1874  *
1875  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1876  * values from/to the user buffer, treated as an ASCII string.
1877  *
1878  * This routine will ensure the values are within the range specified by
1879  * table->extra1 (min) and table->extra2 (max).
1880  *
1881  * Returns 0 on success.
1882  */
1883 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1884 		  void __user *buffer, size_t *lenp, loff_t *ppos)
1885 {
1886 	struct do_proc_dointvec_minmax_conv_param param = {
1887 		.min = (int *) table->extra1,
1888 		.max = (int *) table->extra2,
1889 	};
1890 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1891 				do_proc_dointvec_minmax_conv, &param);
1892 }
1893 
1894 static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
1895 				     struct file *filp,
1896 				     void __user *buffer,
1897 				     size_t *lenp, loff_t *ppos,
1898 				     unsigned long convmul,
1899 				     unsigned long convdiv)
1900 {
1901 #define TMPBUFLEN 21
1902 	unsigned long *i, *min, *max, val;
1903 	int vleft, first=1, neg;
1904 	size_t len, left;
1905 	char buf[TMPBUFLEN], *p;
1906 	char __user *s = buffer;
1907 
1908 	if (!data || !table->maxlen || !*lenp ||
1909 	    (*ppos && !write)) {
1910 		*lenp = 0;
1911 		return 0;
1912 	}
1913 
1914 	i = (unsigned long *) data;
1915 	min = (unsigned long *) table->extra1;
1916 	max = (unsigned long *) table->extra2;
1917 	vleft = table->maxlen / sizeof(unsigned long);
1918 	left = *lenp;
1919 
1920 	for (; left && vleft--; i++, min++, max++, first=0) {
1921 		if (write) {
1922 			while (left) {
1923 				char c;
1924 				if (get_user(c, s))
1925 					return -EFAULT;
1926 				if (!isspace(c))
1927 					break;
1928 				left--;
1929 				s++;
1930 			}
1931 			if (!left)
1932 				break;
1933 			neg = 0;
1934 			len = left;
1935 			if (len > TMPBUFLEN-1)
1936 				len = TMPBUFLEN-1;
1937 			if (copy_from_user(buf, s, len))
1938 				return -EFAULT;
1939 			buf[len] = 0;
1940 			p = buf;
1941 			if (*p == '-' && left > 1) {
1942 				neg = 1;
1943 				p++;
1944 			}
1945 			if (*p < '0' || *p > '9')
1946 				break;
1947 			val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
1948 			len = p-buf;
1949 			if ((len < left) && *p && !isspace(*p))
1950 				break;
1951 			if (neg)
1952 				val = -val;
1953 			s += len;
1954 			left -= len;
1955 
1956 			if(neg)
1957 				continue;
1958 			if ((min && val < *min) || (max && val > *max))
1959 				continue;
1960 			*i = val;
1961 		} else {
1962 			p = buf;
1963 			if (!first)
1964 				*p++ = '\t';
1965 			sprintf(p, "%lu", convdiv * (*i) / convmul);
1966 			len = strlen(buf);
1967 			if (len > left)
1968 				len = left;
1969 			if(copy_to_user(s, buf, len))
1970 				return -EFAULT;
1971 			left -= len;
1972 			s += len;
1973 		}
1974 	}
1975 
1976 	if (!write && !first && left) {
1977 		if(put_user('\n', s))
1978 			return -EFAULT;
1979 		left--, s++;
1980 	}
1981 	if (write) {
1982 		while (left) {
1983 			char c;
1984 			if (get_user(c, s++))
1985 				return -EFAULT;
1986 			if (!isspace(c))
1987 				break;
1988 			left--;
1989 		}
1990 	}
1991 	if (write && first)
1992 		return -EINVAL;
1993 	*lenp -= left;
1994 	*ppos += *lenp;
1995 	return 0;
1996 #undef TMPBUFLEN
1997 }
1998 
1999 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
2000 				     struct file *filp,
2001 				     void __user *buffer,
2002 				     size_t *lenp, loff_t *ppos,
2003 				     unsigned long convmul,
2004 				     unsigned long convdiv)
2005 {
2006 	return __do_proc_doulongvec_minmax(table->data, table, write,
2007 			filp, buffer, lenp, ppos, convmul, convdiv);
2008 }
2009 
2010 /**
2011  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2012  * @table: the sysctl table
2013  * @write: %TRUE if this is a write to the sysctl file
2014  * @filp: the file structure
2015  * @buffer: the user buffer
2016  * @lenp: the size of the user buffer
2017  * @ppos: file position
2018  *
2019  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2020  * values from/to the user buffer, treated as an ASCII string.
2021  *
2022  * This routine will ensure the values are within the range specified by
2023  * table->extra1 (min) and table->extra2 (max).
2024  *
2025  * Returns 0 on success.
2026  */
2027 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2028 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2029 {
2030     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2031 }
2032 
2033 /**
2034  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2035  * @table: the sysctl table
2036  * @write: %TRUE if this is a write to the sysctl file
2037  * @filp: the file structure
2038  * @buffer: the user buffer
2039  * @lenp: the size of the user buffer
2040  * @ppos: file position
2041  *
2042  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2043  * values from/to the user buffer, treated as an ASCII string. The values
2044  * are treated as milliseconds, and converted to jiffies when they are stored.
2045  *
2046  * This routine will ensure the values are within the range specified by
2047  * table->extra1 (min) and table->extra2 (max).
2048  *
2049  * Returns 0 on success.
2050  */
2051 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2052 				      struct file *filp,
2053 				      void __user *buffer,
2054 				      size_t *lenp, loff_t *ppos)
2055 {
2056     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2057 				     lenp, ppos, HZ, 1000l);
2058 }
2059 
2060 
2061 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2062 					 int *valp,
2063 					 int write, void *data)
2064 {
2065 	if (write) {
2066 		if (*lvalp > LONG_MAX / HZ)
2067 			return 1;
2068 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2069 	} else {
2070 		int val = *valp;
2071 		unsigned long lval;
2072 		if (val < 0) {
2073 			*negp = -1;
2074 			lval = (unsigned long)-val;
2075 		} else {
2076 			*negp = 0;
2077 			lval = (unsigned long)val;
2078 		}
2079 		*lvalp = lval / HZ;
2080 	}
2081 	return 0;
2082 }
2083 
2084 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2085 						int *valp,
2086 						int write, void *data)
2087 {
2088 	if (write) {
2089 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2090 			return 1;
2091 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2092 	} else {
2093 		int val = *valp;
2094 		unsigned long lval;
2095 		if (val < 0) {
2096 			*negp = -1;
2097 			lval = (unsigned long)-val;
2098 		} else {
2099 			*negp = 0;
2100 			lval = (unsigned long)val;
2101 		}
2102 		*lvalp = jiffies_to_clock_t(lval);
2103 	}
2104 	return 0;
2105 }
2106 
2107 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2108 					    int *valp,
2109 					    int write, void *data)
2110 {
2111 	if (write) {
2112 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2113 	} else {
2114 		int val = *valp;
2115 		unsigned long lval;
2116 		if (val < 0) {
2117 			*negp = -1;
2118 			lval = (unsigned long)-val;
2119 		} else {
2120 			*negp = 0;
2121 			lval = (unsigned long)val;
2122 		}
2123 		*lvalp = jiffies_to_msecs(lval);
2124 	}
2125 	return 0;
2126 }
2127 
2128 /**
2129  * proc_dointvec_jiffies - read a vector of integers as seconds
2130  * @table: the sysctl table
2131  * @write: %TRUE if this is a write to the sysctl file
2132  * @filp: the file structure
2133  * @buffer: the user buffer
2134  * @lenp: the size of the user buffer
2135  * @ppos: file position
2136  *
2137  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2138  * values from/to the user buffer, treated as an ASCII string.
2139  * The values read are assumed to be in seconds, and are converted into
2140  * jiffies.
2141  *
2142  * Returns 0 on success.
2143  */
2144 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2145 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2146 {
2147     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2148 		    	    do_proc_dointvec_jiffies_conv,NULL);
2149 }
2150 
2151 /**
2152  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2153  * @table: the sysctl table
2154  * @write: %TRUE if this is a write to the sysctl file
2155  * @filp: the file structure
2156  * @buffer: the user buffer
2157  * @lenp: the size of the user buffer
2158  * @ppos: pointer to the file position
2159  *
2160  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2161  * values from/to the user buffer, treated as an ASCII string.
2162  * The values read are assumed to be in 1/USER_HZ seconds, and
2163  * are converted into jiffies.
2164  *
2165  * Returns 0 on success.
2166  */
2167 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2168 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2169 {
2170     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2171 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2172 }
2173 
2174 /**
2175  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2176  * @table: the sysctl table
2177  * @write: %TRUE if this is a write to the sysctl file
2178  * @filp: the file structure
2179  * @buffer: the user buffer
2180  * @lenp: the size of the user buffer
2181  * @ppos: file position
2182  * @ppos: the current position in the file
2183  *
2184  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2185  * values from/to the user buffer, treated as an ASCII string.
2186  * The values read are assumed to be in 1/1000 seconds, and
2187  * are converted into jiffies.
2188  *
2189  * Returns 0 on success.
2190  */
2191 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2192 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2193 {
2194 	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2195 				do_proc_dointvec_ms_jiffies_conv, NULL);
2196 }
2197 
2198 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
2199 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2200 {
2201 	struct pid *new_pid;
2202 	pid_t tmp;
2203 	int r;
2204 
2205 	tmp = pid_nr(cad_pid);
2206 
2207 	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2208 			       lenp, ppos, NULL, NULL);
2209 	if (r || !write)
2210 		return r;
2211 
2212 	new_pid = find_get_pid(tmp);
2213 	if (!new_pid)
2214 		return -ESRCH;
2215 
2216 	put_pid(xchg(&cad_pid, new_pid));
2217 	return 0;
2218 }
2219 
2220 #else /* CONFIG_PROC_FS */
2221 
2222 int proc_dostring(ctl_table *table, int write, struct file *filp,
2223 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2224 {
2225 	return -ENOSYS;
2226 }
2227 
2228 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2229 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2230 {
2231 	return -ENOSYS;
2232 }
2233 
2234 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2235 			void __user *buffer, size_t *lenp, loff_t *ppos)
2236 {
2237 	return -ENOSYS;
2238 }
2239 
2240 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2241 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2242 {
2243 	return -ENOSYS;
2244 }
2245 
2246 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2247 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2248 {
2249 	return -ENOSYS;
2250 }
2251 
2252 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2253 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2254 {
2255 	return -ENOSYS;
2256 }
2257 
2258 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2259 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2260 {
2261 	return -ENOSYS;
2262 }
2263 
2264 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2265 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2266 {
2267 	return -ENOSYS;
2268 }
2269 
2270 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2271 				      struct file *filp,
2272 				      void __user *buffer,
2273 				      size_t *lenp, loff_t *ppos)
2274 {
2275     return -ENOSYS;
2276 }
2277 
2278 
2279 #endif /* CONFIG_PROC_FS */
2280 
2281 
2282 #ifdef CONFIG_SYSCTL_SYSCALL
2283 /*
2284  * General sysctl support routines
2285  */
2286 
2287 /* The generic string strategy routine: */
2288 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2289 		  void __user *oldval, size_t __user *oldlenp,
2290 		  void __user *newval, size_t newlen)
2291 {
2292 	if (!table->data || !table->maxlen)
2293 		return -ENOTDIR;
2294 
2295 	if (oldval && oldlenp) {
2296 		size_t bufsize;
2297 		if (get_user(bufsize, oldlenp))
2298 			return -EFAULT;
2299 		if (bufsize) {
2300 			size_t len = strlen(table->data), copied;
2301 
2302 			/* This shouldn't trigger for a well-formed sysctl */
2303 			if (len > table->maxlen)
2304 				len = table->maxlen;
2305 
2306 			/* Copy up to a max of bufsize-1 bytes of the string */
2307 			copied = (len >= bufsize) ? bufsize - 1 : len;
2308 
2309 			if (copy_to_user(oldval, table->data, copied) ||
2310 			    put_user(0, (char __user *)(oldval + copied)))
2311 				return -EFAULT;
2312 			if (put_user(len, oldlenp))
2313 				return -EFAULT;
2314 		}
2315 	}
2316 	if (newval && newlen) {
2317 		size_t len = newlen;
2318 		if (len > table->maxlen)
2319 			len = table->maxlen;
2320 		if(copy_from_user(table->data, newval, len))
2321 			return -EFAULT;
2322 		if (len == table->maxlen)
2323 			len--;
2324 		((char *) table->data)[len] = 0;
2325 	}
2326 	return 1;
2327 }
2328 
2329 /*
2330  * This function makes sure that all of the integers in the vector
2331  * are between the minimum and maximum values given in the arrays
2332  * table->extra1 and table->extra2, respectively.
2333  */
2334 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2335 		void __user *oldval, size_t __user *oldlenp,
2336 		void __user *newval, size_t newlen)
2337 {
2338 
2339 	if (newval && newlen) {
2340 		int __user *vec = (int __user *) newval;
2341 		int *min = (int *) table->extra1;
2342 		int *max = (int *) table->extra2;
2343 		size_t length;
2344 		int i;
2345 
2346 		if (newlen % sizeof(int) != 0)
2347 			return -EINVAL;
2348 
2349 		if (!table->extra1 && !table->extra2)
2350 			return 0;
2351 
2352 		if (newlen > table->maxlen)
2353 			newlen = table->maxlen;
2354 		length = newlen / sizeof(int);
2355 
2356 		for (i = 0; i < length; i++) {
2357 			int value;
2358 			if (get_user(value, vec + i))
2359 				return -EFAULT;
2360 			if (min && value < min[i])
2361 				return -EINVAL;
2362 			if (max && value > max[i])
2363 				return -EINVAL;
2364 		}
2365 	}
2366 	return 0;
2367 }
2368 
2369 /* Strategy function to convert jiffies to seconds */
2370 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2371 		void __user *oldval, size_t __user *oldlenp,
2372 		void __user *newval, size_t newlen)
2373 {
2374 	if (oldval && oldlenp) {
2375 		size_t olen;
2376 
2377 		if (get_user(olen, oldlenp))
2378 			return -EFAULT;
2379 		if (olen) {
2380 			int val;
2381 
2382 			if (olen < sizeof(int))
2383 				return -EINVAL;
2384 
2385 			val = *(int *)(table->data) / HZ;
2386 			if (put_user(val, (int __user *)oldval))
2387 				return -EFAULT;
2388 			if (put_user(sizeof(int), oldlenp))
2389 				return -EFAULT;
2390 		}
2391 	}
2392 	if (newval && newlen) {
2393 		int new;
2394 		if (newlen != sizeof(int))
2395 			return -EINVAL;
2396 		if (get_user(new, (int __user *)newval))
2397 			return -EFAULT;
2398 		*(int *)(table->data) = new*HZ;
2399 	}
2400 	return 1;
2401 }
2402 
2403 /* Strategy function to convert jiffies to seconds */
2404 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2405 		void __user *oldval, size_t __user *oldlenp,
2406 		void __user *newval, size_t newlen)
2407 {
2408 	if (oldval && oldlenp) {
2409 		size_t olen;
2410 
2411 		if (get_user(olen, oldlenp))
2412 			return -EFAULT;
2413 		if (olen) {
2414 			int val;
2415 
2416 			if (olen < sizeof(int))
2417 				return -EINVAL;
2418 
2419 			val = jiffies_to_msecs(*(int *)(table->data));
2420 			if (put_user(val, (int __user *)oldval))
2421 				return -EFAULT;
2422 			if (put_user(sizeof(int), oldlenp))
2423 				return -EFAULT;
2424 		}
2425 	}
2426 	if (newval && newlen) {
2427 		int new;
2428 		if (newlen != sizeof(int))
2429 			return -EINVAL;
2430 		if (get_user(new, (int __user *)newval))
2431 			return -EFAULT;
2432 		*(int *)(table->data) = msecs_to_jiffies(new);
2433 	}
2434 	return 1;
2435 }
2436 
2437 
2438 
2439 #else /* CONFIG_SYSCTL_SYSCALL */
2440 
2441 
2442 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2443 {
2444 	static int msg_count;
2445 	struct __sysctl_args tmp;
2446 	int name[CTL_MAXNAME];
2447 	int i;
2448 
2449 	/* Read in the sysctl name for better debug message logging */
2450 	if (copy_from_user(&tmp, args, sizeof(tmp)))
2451 		return -EFAULT;
2452 	if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
2453 		return -ENOTDIR;
2454 	for (i = 0; i < tmp.nlen; i++)
2455 		if (get_user(name[i], tmp.name + i))
2456 			return -EFAULT;
2457 
2458 	/* Ignore accesses to kernel.version */
2459 	if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2460 		goto out;
2461 
2462 	if (msg_count < 5) {
2463 		msg_count++;
2464 		printk(KERN_INFO
2465 			"warning: process `%s' used the removed sysctl "
2466 			"system call with ", current->comm);
2467 		for (i = 0; i < tmp.nlen; i++)
2468 			printk("%d.", name[i]);
2469 		printk("\n");
2470 	}
2471 out:
2472 	return -ENOSYS;
2473 }
2474 
2475 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2476 		  void __user *oldval, size_t __user *oldlenp,
2477 		  void __user *newval, size_t newlen)
2478 {
2479 	return -ENOSYS;
2480 }
2481 
2482 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2483 		void __user *oldval, size_t __user *oldlenp,
2484 		void __user *newval, size_t newlen)
2485 {
2486 	return -ENOSYS;
2487 }
2488 
2489 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2490 		void __user *oldval, size_t __user *oldlenp,
2491 		void __user *newval, size_t newlen)
2492 {
2493 	return -ENOSYS;
2494 }
2495 
2496 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2497 		void __user *oldval, size_t __user *oldlenp,
2498 		void __user *newval, size_t newlen)
2499 {
2500 	return -ENOSYS;
2501 }
2502 
2503 #endif /* CONFIG_SYSCTL_SYSCALL */
2504 
2505 /*
2506  * No sense putting this after each symbol definition, twice,
2507  * exception granted :-)
2508  */
2509 EXPORT_SYMBOL(proc_dointvec);
2510 EXPORT_SYMBOL(proc_dointvec_jiffies);
2511 EXPORT_SYMBOL(proc_dointvec_minmax);
2512 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2513 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2514 EXPORT_SYMBOL(proc_dostring);
2515 EXPORT_SYMBOL(proc_doulongvec_minmax);
2516 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2517 EXPORT_SYMBOL(register_sysctl_table);
2518 EXPORT_SYMBOL(sysctl_intvec);
2519 EXPORT_SYMBOL(sysctl_jiffies);
2520 EXPORT_SYMBOL(sysctl_ms_jiffies);
2521 EXPORT_SYMBOL(sysctl_string);
2522 EXPORT_SYMBOL(unregister_sysctl_table);
2523