xref: /linux/kernel/sysctl.c (revision 60e13231561b3a4c5269bfa1ef6c0569ad6f28ec)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/signal.h>
27 #include <linux/printk.h>
28 #include <linux/proc_fs.h>
29 #include <linux/security.h>
30 #include <linux/ctype.h>
31 #include <linux/kmemcheck.h>
32 #include <linux/fs.h>
33 #include <linux/init.h>
34 #include <linux/kernel.h>
35 #include <linux/kobject.h>
36 #include <linux/net.h>
37 #include <linux/sysrq.h>
38 #include <linux/highuid.h>
39 #include <linux/writeback.h>
40 #include <linux/ratelimit.h>
41 #include <linux/compaction.h>
42 #include <linux/hugetlb.h>
43 #include <linux/initrd.h>
44 #include <linux/key.h>
45 #include <linux/times.h>
46 #include <linux/limits.h>
47 #include <linux/dcache.h>
48 #include <linux/dnotify.h>
49 #include <linux/syscalls.h>
50 #include <linux/vmstat.h>
51 #include <linux/nfs_fs.h>
52 #include <linux/acpi.h>
53 #include <linux/reboot.h>
54 #include <linux/ftrace.h>
55 #include <linux/perf_event.h>
56 #include <linux/kprobes.h>
57 #include <linux/pipe_fs_i.h>
58 #include <linux/oom.h>
59 #include <linux/kmod.h>
60 
61 #include <asm/uaccess.h>
62 #include <asm/processor.h>
63 
64 #ifdef CONFIG_X86
65 #include <asm/nmi.h>
66 #include <asm/stacktrace.h>
67 #include <asm/io.h>
68 #endif
69 #ifdef CONFIG_BSD_PROCESS_ACCT
70 #include <linux/acct.h>
71 #endif
72 #ifdef CONFIG_RT_MUTEXES
73 #include <linux/rtmutex.h>
74 #endif
75 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
76 #include <linux/lockdep.h>
77 #endif
78 #ifdef CONFIG_CHR_DEV_SG
79 #include <scsi/sg.h>
80 #endif
81 
82 #ifdef CONFIG_LOCKUP_DETECTOR
83 #include <linux/nmi.h>
84 #endif
85 
86 
87 #if defined(CONFIG_SYSCTL)
88 
89 /* External variables not in a header file. */
90 extern int sysctl_overcommit_memory;
91 extern int sysctl_overcommit_ratio;
92 extern int max_threads;
93 extern int core_uses_pid;
94 extern int suid_dumpable;
95 extern char core_pattern[];
96 extern unsigned int core_pipe_limit;
97 extern int pid_max;
98 extern int min_free_kbytes;
99 extern int pid_max_min, pid_max_max;
100 extern int sysctl_drop_caches;
101 extern int percpu_pagelist_fraction;
102 extern int compat_log;
103 extern int latencytop_enabled;
104 extern int sysctl_nr_open_min, sysctl_nr_open_max;
105 #ifndef CONFIG_MMU
106 extern int sysctl_nr_trim_pages;
107 #endif
108 #ifdef CONFIG_BLOCK
109 extern int blk_iopoll_enabled;
110 #endif
111 
112 /* Constants used for minimum and  maximum */
113 #ifdef CONFIG_LOCKUP_DETECTOR
114 static int sixty = 60;
115 static int neg_one = -1;
116 #endif
117 
118 static int zero;
119 static int __maybe_unused one = 1;
120 static int __maybe_unused two = 2;
121 static int __maybe_unused three = 3;
122 static unsigned long one_ul = 1;
123 static int one_hundred = 100;
124 #ifdef CONFIG_PRINTK
125 static int ten_thousand = 10000;
126 #endif
127 
128 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
129 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
130 
131 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
132 static int maxolduid = 65535;
133 static int minolduid;
134 static int min_percpu_pagelist_fract = 8;
135 
136 static int ngroups_max = NGROUPS_MAX;
137 
138 #ifdef CONFIG_INOTIFY_USER
139 #include <linux/inotify.h>
140 #endif
141 #ifdef CONFIG_SPARC
142 #include <asm/system.h>
143 #endif
144 
145 #ifdef CONFIG_SPARC64
146 extern int sysctl_tsb_ratio;
147 #endif
148 
149 #ifdef __hppa__
150 extern int pwrsw_enabled;
151 extern int unaligned_enabled;
152 #endif
153 
154 #ifdef CONFIG_S390
155 #ifdef CONFIG_MATHEMU
156 extern int sysctl_ieee_emulation_warnings;
157 #endif
158 extern int sysctl_userprocess_debug;
159 extern int spin_retry;
160 #endif
161 
162 #ifdef CONFIG_IA64
163 extern int no_unaligned_warning;
164 extern int unaligned_dump_stack;
165 #endif
166 
167 #ifdef CONFIG_PROC_SYSCTL
168 static int proc_do_cad_pid(struct ctl_table *table, int write,
169 		  void __user *buffer, size_t *lenp, loff_t *ppos);
170 static int proc_taint(struct ctl_table *table, int write,
171 			       void __user *buffer, size_t *lenp, loff_t *ppos);
172 #endif
173 
174 #ifdef CONFIG_PRINTK
175 static int proc_dmesg_restrict(struct ctl_table *table, int write,
176 				void __user *buffer, size_t *lenp, loff_t *ppos);
177 #endif
178 
179 #ifdef CONFIG_MAGIC_SYSRQ
180 /* Note: sysrq code uses it's own private copy */
181 static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
182 
183 static int sysrq_sysctl_handler(ctl_table *table, int write,
184 				void __user *buffer, size_t *lenp,
185 				loff_t *ppos)
186 {
187 	int error;
188 
189 	error = proc_dointvec(table, write, buffer, lenp, ppos);
190 	if (error)
191 		return error;
192 
193 	if (write)
194 		sysrq_toggle_support(__sysrq_enabled);
195 
196 	return 0;
197 }
198 
199 #endif
200 
201 static struct ctl_table root_table[];
202 static struct ctl_table_root sysctl_table_root;
203 static struct ctl_table_header root_table_header = {
204 	{{.count = 1,
205 	.ctl_table = root_table,
206 	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
207 	.root = &sysctl_table_root,
208 	.set = &sysctl_table_root.default_set,
209 };
210 static struct ctl_table_root sysctl_table_root = {
211 	.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
212 	.default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
213 };
214 
215 static struct ctl_table kern_table[];
216 static struct ctl_table vm_table[];
217 static struct ctl_table fs_table[];
218 static struct ctl_table debug_table[];
219 static struct ctl_table dev_table[];
220 extern struct ctl_table random_table[];
221 #ifdef CONFIG_EPOLL
222 extern struct ctl_table epoll_table[];
223 #endif
224 
225 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
226 int sysctl_legacy_va_layout;
227 #endif
228 
229 /* The default sysctl tables: */
230 
231 static struct ctl_table root_table[] = {
232 	{
233 		.procname	= "kernel",
234 		.mode		= 0555,
235 		.child		= kern_table,
236 	},
237 	{
238 		.procname	= "vm",
239 		.mode		= 0555,
240 		.child		= vm_table,
241 	},
242 	{
243 		.procname	= "fs",
244 		.mode		= 0555,
245 		.child		= fs_table,
246 	},
247 	{
248 		.procname	= "debug",
249 		.mode		= 0555,
250 		.child		= debug_table,
251 	},
252 	{
253 		.procname	= "dev",
254 		.mode		= 0555,
255 		.child		= dev_table,
256 	},
257 	{ }
258 };
259 
260 #ifdef CONFIG_SCHED_DEBUG
261 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
262 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
263 static int min_wakeup_granularity_ns;			/* 0 usecs */
264 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
265 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
266 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
267 #endif
268 
269 #ifdef CONFIG_COMPACTION
270 static int min_extfrag_threshold;
271 static int max_extfrag_threshold = 1000;
272 #endif
273 
274 static struct ctl_table kern_table[] = {
275 	{
276 		.procname	= "sched_child_runs_first",
277 		.data		= &sysctl_sched_child_runs_first,
278 		.maxlen		= sizeof(unsigned int),
279 		.mode		= 0644,
280 		.proc_handler	= proc_dointvec,
281 	},
282 #ifdef CONFIG_SCHED_DEBUG
283 	{
284 		.procname	= "sched_min_granularity_ns",
285 		.data		= &sysctl_sched_min_granularity,
286 		.maxlen		= sizeof(unsigned int),
287 		.mode		= 0644,
288 		.proc_handler	= sched_proc_update_handler,
289 		.extra1		= &min_sched_granularity_ns,
290 		.extra2		= &max_sched_granularity_ns,
291 	},
292 	{
293 		.procname	= "sched_latency_ns",
294 		.data		= &sysctl_sched_latency,
295 		.maxlen		= sizeof(unsigned int),
296 		.mode		= 0644,
297 		.proc_handler	= sched_proc_update_handler,
298 		.extra1		= &min_sched_granularity_ns,
299 		.extra2		= &max_sched_granularity_ns,
300 	},
301 	{
302 		.procname	= "sched_wakeup_granularity_ns",
303 		.data		= &sysctl_sched_wakeup_granularity,
304 		.maxlen		= sizeof(unsigned int),
305 		.mode		= 0644,
306 		.proc_handler	= sched_proc_update_handler,
307 		.extra1		= &min_wakeup_granularity_ns,
308 		.extra2		= &max_wakeup_granularity_ns,
309 	},
310 	{
311 		.procname	= "sched_tunable_scaling",
312 		.data		= &sysctl_sched_tunable_scaling,
313 		.maxlen		= sizeof(enum sched_tunable_scaling),
314 		.mode		= 0644,
315 		.proc_handler	= sched_proc_update_handler,
316 		.extra1		= &min_sched_tunable_scaling,
317 		.extra2		= &max_sched_tunable_scaling,
318 	},
319 	{
320 		.procname	= "sched_migration_cost",
321 		.data		= &sysctl_sched_migration_cost,
322 		.maxlen		= sizeof(unsigned int),
323 		.mode		= 0644,
324 		.proc_handler	= proc_dointvec,
325 	},
326 	{
327 		.procname	= "sched_nr_migrate",
328 		.data		= &sysctl_sched_nr_migrate,
329 		.maxlen		= sizeof(unsigned int),
330 		.mode		= 0644,
331 		.proc_handler	= proc_dointvec,
332 	},
333 	{
334 		.procname	= "sched_time_avg",
335 		.data		= &sysctl_sched_time_avg,
336 		.maxlen		= sizeof(unsigned int),
337 		.mode		= 0644,
338 		.proc_handler	= proc_dointvec,
339 	},
340 	{
341 		.procname	= "sched_shares_window",
342 		.data		= &sysctl_sched_shares_window,
343 		.maxlen		= sizeof(unsigned int),
344 		.mode		= 0644,
345 		.proc_handler	= proc_dointvec,
346 	},
347 	{
348 		.procname	= "timer_migration",
349 		.data		= &sysctl_timer_migration,
350 		.maxlen		= sizeof(unsigned int),
351 		.mode		= 0644,
352 		.proc_handler	= proc_dointvec_minmax,
353 		.extra1		= &zero,
354 		.extra2		= &one,
355 	},
356 #endif
357 	{
358 		.procname	= "sched_rt_period_us",
359 		.data		= &sysctl_sched_rt_period,
360 		.maxlen		= sizeof(unsigned int),
361 		.mode		= 0644,
362 		.proc_handler	= sched_rt_handler,
363 	},
364 	{
365 		.procname	= "sched_rt_runtime_us",
366 		.data		= &sysctl_sched_rt_runtime,
367 		.maxlen		= sizeof(int),
368 		.mode		= 0644,
369 		.proc_handler	= sched_rt_handler,
370 	},
371 #ifdef CONFIG_SCHED_AUTOGROUP
372 	{
373 		.procname	= "sched_autogroup_enabled",
374 		.data		= &sysctl_sched_autogroup_enabled,
375 		.maxlen		= sizeof(unsigned int),
376 		.mode		= 0644,
377 		.proc_handler	= proc_dointvec_minmax,
378 		.extra1		= &zero,
379 		.extra2		= &one,
380 	},
381 #endif
382 #ifdef CONFIG_PROVE_LOCKING
383 	{
384 		.procname	= "prove_locking",
385 		.data		= &prove_locking,
386 		.maxlen		= sizeof(int),
387 		.mode		= 0644,
388 		.proc_handler	= proc_dointvec,
389 	},
390 #endif
391 #ifdef CONFIG_LOCK_STAT
392 	{
393 		.procname	= "lock_stat",
394 		.data		= &lock_stat,
395 		.maxlen		= sizeof(int),
396 		.mode		= 0644,
397 		.proc_handler	= proc_dointvec,
398 	},
399 #endif
400 	{
401 		.procname	= "panic",
402 		.data		= &panic_timeout,
403 		.maxlen		= sizeof(int),
404 		.mode		= 0644,
405 		.proc_handler	= proc_dointvec,
406 	},
407 	{
408 		.procname	= "core_uses_pid",
409 		.data		= &core_uses_pid,
410 		.maxlen		= sizeof(int),
411 		.mode		= 0644,
412 		.proc_handler	= proc_dointvec,
413 	},
414 	{
415 		.procname	= "core_pattern",
416 		.data		= core_pattern,
417 		.maxlen		= CORENAME_MAX_SIZE,
418 		.mode		= 0644,
419 		.proc_handler	= proc_dostring,
420 	},
421 	{
422 		.procname	= "core_pipe_limit",
423 		.data		= &core_pipe_limit,
424 		.maxlen		= sizeof(unsigned int),
425 		.mode		= 0644,
426 		.proc_handler	= proc_dointvec,
427 	},
428 #ifdef CONFIG_PROC_SYSCTL
429 	{
430 		.procname	= "tainted",
431 		.maxlen 	= sizeof(long),
432 		.mode		= 0644,
433 		.proc_handler	= proc_taint,
434 	},
435 #endif
436 #ifdef CONFIG_LATENCYTOP
437 	{
438 		.procname	= "latencytop",
439 		.data		= &latencytop_enabled,
440 		.maxlen		= sizeof(int),
441 		.mode		= 0644,
442 		.proc_handler	= proc_dointvec,
443 	},
444 #endif
445 #ifdef CONFIG_BLK_DEV_INITRD
446 	{
447 		.procname	= "real-root-dev",
448 		.data		= &real_root_dev,
449 		.maxlen		= sizeof(int),
450 		.mode		= 0644,
451 		.proc_handler	= proc_dointvec,
452 	},
453 #endif
454 	{
455 		.procname	= "print-fatal-signals",
456 		.data		= &print_fatal_signals,
457 		.maxlen		= sizeof(int),
458 		.mode		= 0644,
459 		.proc_handler	= proc_dointvec,
460 	},
461 #ifdef CONFIG_SPARC
462 	{
463 		.procname	= "reboot-cmd",
464 		.data		= reboot_command,
465 		.maxlen		= 256,
466 		.mode		= 0644,
467 		.proc_handler	= proc_dostring,
468 	},
469 	{
470 		.procname	= "stop-a",
471 		.data		= &stop_a_enabled,
472 		.maxlen		= sizeof (int),
473 		.mode		= 0644,
474 		.proc_handler	= proc_dointvec,
475 	},
476 	{
477 		.procname	= "scons-poweroff",
478 		.data		= &scons_pwroff,
479 		.maxlen		= sizeof (int),
480 		.mode		= 0644,
481 		.proc_handler	= proc_dointvec,
482 	},
483 #endif
484 #ifdef CONFIG_SPARC64
485 	{
486 		.procname	= "tsb-ratio",
487 		.data		= &sysctl_tsb_ratio,
488 		.maxlen		= sizeof (int),
489 		.mode		= 0644,
490 		.proc_handler	= proc_dointvec,
491 	},
492 #endif
493 #ifdef __hppa__
494 	{
495 		.procname	= "soft-power",
496 		.data		= &pwrsw_enabled,
497 		.maxlen		= sizeof (int),
498 	 	.mode		= 0644,
499 		.proc_handler	= proc_dointvec,
500 	},
501 	{
502 		.procname	= "unaligned-trap",
503 		.data		= &unaligned_enabled,
504 		.maxlen		= sizeof (int),
505 		.mode		= 0644,
506 		.proc_handler	= proc_dointvec,
507 	},
508 #endif
509 	{
510 		.procname	= "ctrl-alt-del",
511 		.data		= &C_A_D,
512 		.maxlen		= sizeof(int),
513 		.mode		= 0644,
514 		.proc_handler	= proc_dointvec,
515 	},
516 #ifdef CONFIG_FUNCTION_TRACER
517 	{
518 		.procname	= "ftrace_enabled",
519 		.data		= &ftrace_enabled,
520 		.maxlen		= sizeof(int),
521 		.mode		= 0644,
522 		.proc_handler	= ftrace_enable_sysctl,
523 	},
524 #endif
525 #ifdef CONFIG_STACK_TRACER
526 	{
527 		.procname	= "stack_tracer_enabled",
528 		.data		= &stack_tracer_enabled,
529 		.maxlen		= sizeof(int),
530 		.mode		= 0644,
531 		.proc_handler	= stack_trace_sysctl,
532 	},
533 #endif
534 #ifdef CONFIG_TRACING
535 	{
536 		.procname	= "ftrace_dump_on_oops",
537 		.data		= &ftrace_dump_on_oops,
538 		.maxlen		= sizeof(int),
539 		.mode		= 0644,
540 		.proc_handler	= proc_dointvec,
541 	},
542 #endif
543 #ifdef CONFIG_MODULES
544 	{
545 		.procname	= "modprobe",
546 		.data		= &modprobe_path,
547 		.maxlen		= KMOD_PATH_LEN,
548 		.mode		= 0644,
549 		.proc_handler	= proc_dostring,
550 	},
551 	{
552 		.procname	= "modules_disabled",
553 		.data		= &modules_disabled,
554 		.maxlen		= sizeof(int),
555 		.mode		= 0644,
556 		/* only handle a transition from default "0" to "1" */
557 		.proc_handler	= proc_dointvec_minmax,
558 		.extra1		= &one,
559 		.extra2		= &one,
560 	},
561 #endif
562 #ifdef CONFIG_HOTPLUG
563 	{
564 		.procname	= "hotplug",
565 		.data		= &uevent_helper,
566 		.maxlen		= UEVENT_HELPER_PATH_LEN,
567 		.mode		= 0644,
568 		.proc_handler	= proc_dostring,
569 	},
570 #endif
571 #ifdef CONFIG_CHR_DEV_SG
572 	{
573 		.procname	= "sg-big-buff",
574 		.data		= &sg_big_buff,
575 		.maxlen		= sizeof (int),
576 		.mode		= 0444,
577 		.proc_handler	= proc_dointvec,
578 	},
579 #endif
580 #ifdef CONFIG_BSD_PROCESS_ACCT
581 	{
582 		.procname	= "acct",
583 		.data		= &acct_parm,
584 		.maxlen		= 3*sizeof(int),
585 		.mode		= 0644,
586 		.proc_handler	= proc_dointvec,
587 	},
588 #endif
589 #ifdef CONFIG_MAGIC_SYSRQ
590 	{
591 		.procname	= "sysrq",
592 		.data		= &__sysrq_enabled,
593 		.maxlen		= sizeof (int),
594 		.mode		= 0644,
595 		.proc_handler	= sysrq_sysctl_handler,
596 	},
597 #endif
598 #ifdef CONFIG_PROC_SYSCTL
599 	{
600 		.procname	= "cad_pid",
601 		.data		= NULL,
602 		.maxlen		= sizeof (int),
603 		.mode		= 0600,
604 		.proc_handler	= proc_do_cad_pid,
605 	},
606 #endif
607 	{
608 		.procname	= "threads-max",
609 		.data		= &max_threads,
610 		.maxlen		= sizeof(int),
611 		.mode		= 0644,
612 		.proc_handler	= proc_dointvec,
613 	},
614 	{
615 		.procname	= "random",
616 		.mode		= 0555,
617 		.child		= random_table,
618 	},
619 	{
620 		.procname	= "usermodehelper",
621 		.mode		= 0555,
622 		.child		= usermodehelper_table,
623 	},
624 	{
625 		.procname	= "overflowuid",
626 		.data		= &overflowuid,
627 		.maxlen		= sizeof(int),
628 		.mode		= 0644,
629 		.proc_handler	= proc_dointvec_minmax,
630 		.extra1		= &minolduid,
631 		.extra2		= &maxolduid,
632 	},
633 	{
634 		.procname	= "overflowgid",
635 		.data		= &overflowgid,
636 		.maxlen		= sizeof(int),
637 		.mode		= 0644,
638 		.proc_handler	= proc_dointvec_minmax,
639 		.extra1		= &minolduid,
640 		.extra2		= &maxolduid,
641 	},
642 #ifdef CONFIG_S390
643 #ifdef CONFIG_MATHEMU
644 	{
645 		.procname	= "ieee_emulation_warnings",
646 		.data		= &sysctl_ieee_emulation_warnings,
647 		.maxlen		= sizeof(int),
648 		.mode		= 0644,
649 		.proc_handler	= proc_dointvec,
650 	},
651 #endif
652 	{
653 		.procname	= "userprocess_debug",
654 		.data		= &show_unhandled_signals,
655 		.maxlen		= sizeof(int),
656 		.mode		= 0644,
657 		.proc_handler	= proc_dointvec,
658 	},
659 #endif
660 	{
661 		.procname	= "pid_max",
662 		.data		= &pid_max,
663 		.maxlen		= sizeof (int),
664 		.mode		= 0644,
665 		.proc_handler	= proc_dointvec_minmax,
666 		.extra1		= &pid_max_min,
667 		.extra2		= &pid_max_max,
668 	},
669 	{
670 		.procname	= "panic_on_oops",
671 		.data		= &panic_on_oops,
672 		.maxlen		= sizeof(int),
673 		.mode		= 0644,
674 		.proc_handler	= proc_dointvec,
675 	},
676 #if defined CONFIG_PRINTK
677 	{
678 		.procname	= "printk",
679 		.data		= &console_loglevel,
680 		.maxlen		= 4*sizeof(int),
681 		.mode		= 0644,
682 		.proc_handler	= proc_dointvec,
683 	},
684 	{
685 		.procname	= "printk_ratelimit",
686 		.data		= &printk_ratelimit_state.interval,
687 		.maxlen		= sizeof(int),
688 		.mode		= 0644,
689 		.proc_handler	= proc_dointvec_jiffies,
690 	},
691 	{
692 		.procname	= "printk_ratelimit_burst",
693 		.data		= &printk_ratelimit_state.burst,
694 		.maxlen		= sizeof(int),
695 		.mode		= 0644,
696 		.proc_handler	= proc_dointvec,
697 	},
698 	{
699 		.procname	= "printk_delay",
700 		.data		= &printk_delay_msec,
701 		.maxlen		= sizeof(int),
702 		.mode		= 0644,
703 		.proc_handler	= proc_dointvec_minmax,
704 		.extra1		= &zero,
705 		.extra2		= &ten_thousand,
706 	},
707 	{
708 		.procname	= "dmesg_restrict",
709 		.data		= &dmesg_restrict,
710 		.maxlen		= sizeof(int),
711 		.mode		= 0644,
712 		.proc_handler	= proc_dointvec_minmax,
713 		.extra1		= &zero,
714 		.extra2		= &one,
715 	},
716 	{
717 		.procname	= "kptr_restrict",
718 		.data		= &kptr_restrict,
719 		.maxlen		= sizeof(int),
720 		.mode		= 0644,
721 		.proc_handler	= proc_dmesg_restrict,
722 		.extra1		= &zero,
723 		.extra2		= &two,
724 	},
725 #endif
726 	{
727 		.procname	= "ngroups_max",
728 		.data		= &ngroups_max,
729 		.maxlen		= sizeof (int),
730 		.mode		= 0444,
731 		.proc_handler	= proc_dointvec,
732 	},
733 #if defined(CONFIG_LOCKUP_DETECTOR)
734 	{
735 		.procname       = "watchdog",
736 		.data           = &watchdog_enabled,
737 		.maxlen         = sizeof (int),
738 		.mode           = 0644,
739 		.proc_handler   = proc_dowatchdog,
740 		.extra1		= &zero,
741 		.extra2		= &one,
742 	},
743 	{
744 		.procname	= "watchdog_thresh",
745 		.data		= &watchdog_thresh,
746 		.maxlen		= sizeof(int),
747 		.mode		= 0644,
748 		.proc_handler	= proc_dowatchdog,
749 		.extra1		= &neg_one,
750 		.extra2		= &sixty,
751 	},
752 	{
753 		.procname	= "softlockup_panic",
754 		.data		= &softlockup_panic,
755 		.maxlen		= sizeof(int),
756 		.mode		= 0644,
757 		.proc_handler	= proc_dointvec_minmax,
758 		.extra1		= &zero,
759 		.extra2		= &one,
760 	},
761 	{
762 		.procname       = "nmi_watchdog",
763 		.data           = &watchdog_enabled,
764 		.maxlen         = sizeof (int),
765 		.mode           = 0644,
766 		.proc_handler   = proc_dowatchdog,
767 		.extra1		= &zero,
768 		.extra2		= &one,
769 	},
770 #endif
771 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
772 	{
773 		.procname       = "unknown_nmi_panic",
774 		.data           = &unknown_nmi_panic,
775 		.maxlen         = sizeof (int),
776 		.mode           = 0644,
777 		.proc_handler   = proc_dointvec,
778 	},
779 #endif
780 #if defined(CONFIG_X86)
781 	{
782 		.procname	= "panic_on_unrecovered_nmi",
783 		.data		= &panic_on_unrecovered_nmi,
784 		.maxlen		= sizeof(int),
785 		.mode		= 0644,
786 		.proc_handler	= proc_dointvec,
787 	},
788 	{
789 		.procname	= "panic_on_io_nmi",
790 		.data		= &panic_on_io_nmi,
791 		.maxlen		= sizeof(int),
792 		.mode		= 0644,
793 		.proc_handler	= proc_dointvec,
794 	},
795 	{
796 		.procname	= "bootloader_type",
797 		.data		= &bootloader_type,
798 		.maxlen		= sizeof (int),
799 		.mode		= 0444,
800 		.proc_handler	= proc_dointvec,
801 	},
802 	{
803 		.procname	= "bootloader_version",
804 		.data		= &bootloader_version,
805 		.maxlen		= sizeof (int),
806 		.mode		= 0444,
807 		.proc_handler	= proc_dointvec,
808 	},
809 	{
810 		.procname	= "kstack_depth_to_print",
811 		.data		= &kstack_depth_to_print,
812 		.maxlen		= sizeof(int),
813 		.mode		= 0644,
814 		.proc_handler	= proc_dointvec,
815 	},
816 	{
817 		.procname	= "io_delay_type",
818 		.data		= &io_delay_type,
819 		.maxlen		= sizeof(int),
820 		.mode		= 0644,
821 		.proc_handler	= proc_dointvec,
822 	},
823 #endif
824 #if defined(CONFIG_MMU)
825 	{
826 		.procname	= "randomize_va_space",
827 		.data		= &randomize_va_space,
828 		.maxlen		= sizeof(int),
829 		.mode		= 0644,
830 		.proc_handler	= proc_dointvec,
831 	},
832 #endif
833 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
834 	{
835 		.procname	= "spin_retry",
836 		.data		= &spin_retry,
837 		.maxlen		= sizeof (int),
838 		.mode		= 0644,
839 		.proc_handler	= proc_dointvec,
840 	},
841 #endif
842 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
843 	{
844 		.procname	= "acpi_video_flags",
845 		.data		= &acpi_realmode_flags,
846 		.maxlen		= sizeof (unsigned long),
847 		.mode		= 0644,
848 		.proc_handler	= proc_doulongvec_minmax,
849 	},
850 #endif
851 #ifdef CONFIG_IA64
852 	{
853 		.procname	= "ignore-unaligned-usertrap",
854 		.data		= &no_unaligned_warning,
855 		.maxlen		= sizeof (int),
856 	 	.mode		= 0644,
857 		.proc_handler	= proc_dointvec,
858 	},
859 	{
860 		.procname	= "unaligned-dump-stack",
861 		.data		= &unaligned_dump_stack,
862 		.maxlen		= sizeof (int),
863 		.mode		= 0644,
864 		.proc_handler	= proc_dointvec,
865 	},
866 #endif
867 #ifdef CONFIG_DETECT_HUNG_TASK
868 	{
869 		.procname	= "hung_task_panic",
870 		.data		= &sysctl_hung_task_panic,
871 		.maxlen		= sizeof(int),
872 		.mode		= 0644,
873 		.proc_handler	= proc_dointvec_minmax,
874 		.extra1		= &zero,
875 		.extra2		= &one,
876 	},
877 	{
878 		.procname	= "hung_task_check_count",
879 		.data		= &sysctl_hung_task_check_count,
880 		.maxlen		= sizeof(unsigned long),
881 		.mode		= 0644,
882 		.proc_handler	= proc_doulongvec_minmax,
883 	},
884 	{
885 		.procname	= "hung_task_timeout_secs",
886 		.data		= &sysctl_hung_task_timeout_secs,
887 		.maxlen		= sizeof(unsigned long),
888 		.mode		= 0644,
889 		.proc_handler	= proc_dohung_task_timeout_secs,
890 	},
891 	{
892 		.procname	= "hung_task_warnings",
893 		.data		= &sysctl_hung_task_warnings,
894 		.maxlen		= sizeof(unsigned long),
895 		.mode		= 0644,
896 		.proc_handler	= proc_doulongvec_minmax,
897 	},
898 #endif
899 #ifdef CONFIG_COMPAT
900 	{
901 		.procname	= "compat-log",
902 		.data		= &compat_log,
903 		.maxlen		= sizeof (int),
904 	 	.mode		= 0644,
905 		.proc_handler	= proc_dointvec,
906 	},
907 #endif
908 #ifdef CONFIG_RT_MUTEXES
909 	{
910 		.procname	= "max_lock_depth",
911 		.data		= &max_lock_depth,
912 		.maxlen		= sizeof(int),
913 		.mode		= 0644,
914 		.proc_handler	= proc_dointvec,
915 	},
916 #endif
917 	{
918 		.procname	= "poweroff_cmd",
919 		.data		= &poweroff_cmd,
920 		.maxlen		= POWEROFF_CMD_PATH_LEN,
921 		.mode		= 0644,
922 		.proc_handler	= proc_dostring,
923 	},
924 #ifdef CONFIG_KEYS
925 	{
926 		.procname	= "keys",
927 		.mode		= 0555,
928 		.child		= key_sysctls,
929 	},
930 #endif
931 #ifdef CONFIG_RCU_TORTURE_TEST
932 	{
933 		.procname       = "rcutorture_runnable",
934 		.data           = &rcutorture_runnable,
935 		.maxlen         = sizeof(int),
936 		.mode           = 0644,
937 		.proc_handler	= proc_dointvec,
938 	},
939 #endif
940 #ifdef CONFIG_PERF_EVENTS
941 	/*
942 	 * User-space scripts rely on the existence of this file
943 	 * as a feature check for perf_events being enabled.
944 	 *
945 	 * So it's an ABI, do not remove!
946 	 */
947 	{
948 		.procname	= "perf_event_paranoid",
949 		.data		= &sysctl_perf_event_paranoid,
950 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
951 		.mode		= 0644,
952 		.proc_handler	= proc_dointvec,
953 	},
954 	{
955 		.procname	= "perf_event_mlock_kb",
956 		.data		= &sysctl_perf_event_mlock,
957 		.maxlen		= sizeof(sysctl_perf_event_mlock),
958 		.mode		= 0644,
959 		.proc_handler	= proc_dointvec,
960 	},
961 	{
962 		.procname	= "perf_event_max_sample_rate",
963 		.data		= &sysctl_perf_event_sample_rate,
964 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
965 		.mode		= 0644,
966 		.proc_handler	= perf_proc_update_handler,
967 	},
968 #endif
969 #ifdef CONFIG_KMEMCHECK
970 	{
971 		.procname	= "kmemcheck",
972 		.data		= &kmemcheck_enabled,
973 		.maxlen		= sizeof(int),
974 		.mode		= 0644,
975 		.proc_handler	= proc_dointvec,
976 	},
977 #endif
978 #ifdef CONFIG_BLOCK
979 	{
980 		.procname	= "blk_iopoll",
981 		.data		= &blk_iopoll_enabled,
982 		.maxlen		= sizeof(int),
983 		.mode		= 0644,
984 		.proc_handler	= proc_dointvec,
985 	},
986 #endif
987 	{ }
988 };
989 
990 static struct ctl_table vm_table[] = {
991 	{
992 		.procname	= "overcommit_memory",
993 		.data		= &sysctl_overcommit_memory,
994 		.maxlen		= sizeof(sysctl_overcommit_memory),
995 		.mode		= 0644,
996 		.proc_handler	= proc_dointvec_minmax,
997 		.extra1		= &zero,
998 		.extra2		= &two,
999 	},
1000 	{
1001 		.procname	= "panic_on_oom",
1002 		.data		= &sysctl_panic_on_oom,
1003 		.maxlen		= sizeof(sysctl_panic_on_oom),
1004 		.mode		= 0644,
1005 		.proc_handler	= proc_dointvec_minmax,
1006 		.extra1		= &zero,
1007 		.extra2		= &two,
1008 	},
1009 	{
1010 		.procname	= "oom_kill_allocating_task",
1011 		.data		= &sysctl_oom_kill_allocating_task,
1012 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
1013 		.mode		= 0644,
1014 		.proc_handler	= proc_dointvec,
1015 	},
1016 	{
1017 		.procname	= "oom_dump_tasks",
1018 		.data		= &sysctl_oom_dump_tasks,
1019 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
1020 		.mode		= 0644,
1021 		.proc_handler	= proc_dointvec,
1022 	},
1023 	{
1024 		.procname	= "overcommit_ratio",
1025 		.data		= &sysctl_overcommit_ratio,
1026 		.maxlen		= sizeof(sysctl_overcommit_ratio),
1027 		.mode		= 0644,
1028 		.proc_handler	= proc_dointvec,
1029 	},
1030 	{
1031 		.procname	= "page-cluster",
1032 		.data		= &page_cluster,
1033 		.maxlen		= sizeof(int),
1034 		.mode		= 0644,
1035 		.proc_handler	= proc_dointvec_minmax,
1036 		.extra1		= &zero,
1037 	},
1038 	{
1039 		.procname	= "dirty_background_ratio",
1040 		.data		= &dirty_background_ratio,
1041 		.maxlen		= sizeof(dirty_background_ratio),
1042 		.mode		= 0644,
1043 		.proc_handler	= dirty_background_ratio_handler,
1044 		.extra1		= &zero,
1045 		.extra2		= &one_hundred,
1046 	},
1047 	{
1048 		.procname	= "dirty_background_bytes",
1049 		.data		= &dirty_background_bytes,
1050 		.maxlen		= sizeof(dirty_background_bytes),
1051 		.mode		= 0644,
1052 		.proc_handler	= dirty_background_bytes_handler,
1053 		.extra1		= &one_ul,
1054 	},
1055 	{
1056 		.procname	= "dirty_ratio",
1057 		.data		= &vm_dirty_ratio,
1058 		.maxlen		= sizeof(vm_dirty_ratio),
1059 		.mode		= 0644,
1060 		.proc_handler	= dirty_ratio_handler,
1061 		.extra1		= &zero,
1062 		.extra2		= &one_hundred,
1063 	},
1064 	{
1065 		.procname	= "dirty_bytes",
1066 		.data		= &vm_dirty_bytes,
1067 		.maxlen		= sizeof(vm_dirty_bytes),
1068 		.mode		= 0644,
1069 		.proc_handler	= dirty_bytes_handler,
1070 		.extra1		= &dirty_bytes_min,
1071 	},
1072 	{
1073 		.procname	= "dirty_writeback_centisecs",
1074 		.data		= &dirty_writeback_interval,
1075 		.maxlen		= sizeof(dirty_writeback_interval),
1076 		.mode		= 0644,
1077 		.proc_handler	= dirty_writeback_centisecs_handler,
1078 	},
1079 	{
1080 		.procname	= "dirty_expire_centisecs",
1081 		.data		= &dirty_expire_interval,
1082 		.maxlen		= sizeof(dirty_expire_interval),
1083 		.mode		= 0644,
1084 		.proc_handler	= proc_dointvec_minmax,
1085 		.extra1		= &zero,
1086 	},
1087 	{
1088 		.procname	= "nr_pdflush_threads",
1089 		.data		= &nr_pdflush_threads,
1090 		.maxlen		= sizeof nr_pdflush_threads,
1091 		.mode		= 0444 /* read-only*/,
1092 		.proc_handler	= proc_dointvec,
1093 	},
1094 	{
1095 		.procname	= "swappiness",
1096 		.data		= &vm_swappiness,
1097 		.maxlen		= sizeof(vm_swappiness),
1098 		.mode		= 0644,
1099 		.proc_handler	= proc_dointvec_minmax,
1100 		.extra1		= &zero,
1101 		.extra2		= &one_hundred,
1102 	},
1103 #ifdef CONFIG_HUGETLB_PAGE
1104 	{
1105 		.procname	= "nr_hugepages",
1106 		.data		= NULL,
1107 		.maxlen		= sizeof(unsigned long),
1108 		.mode		= 0644,
1109 		.proc_handler	= hugetlb_sysctl_handler,
1110 		.extra1		= (void *)&hugetlb_zero,
1111 		.extra2		= (void *)&hugetlb_infinity,
1112 	},
1113 #ifdef CONFIG_NUMA
1114 	{
1115 		.procname       = "nr_hugepages_mempolicy",
1116 		.data           = NULL,
1117 		.maxlen         = sizeof(unsigned long),
1118 		.mode           = 0644,
1119 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1120 		.extra1		= (void *)&hugetlb_zero,
1121 		.extra2		= (void *)&hugetlb_infinity,
1122 	},
1123 #endif
1124 	 {
1125 		.procname	= "hugetlb_shm_group",
1126 		.data		= &sysctl_hugetlb_shm_group,
1127 		.maxlen		= sizeof(gid_t),
1128 		.mode		= 0644,
1129 		.proc_handler	= proc_dointvec,
1130 	 },
1131 	 {
1132 		.procname	= "hugepages_treat_as_movable",
1133 		.data		= &hugepages_treat_as_movable,
1134 		.maxlen		= sizeof(int),
1135 		.mode		= 0644,
1136 		.proc_handler	= hugetlb_treat_movable_handler,
1137 	},
1138 	{
1139 		.procname	= "nr_overcommit_hugepages",
1140 		.data		= NULL,
1141 		.maxlen		= sizeof(unsigned long),
1142 		.mode		= 0644,
1143 		.proc_handler	= hugetlb_overcommit_handler,
1144 		.extra1		= (void *)&hugetlb_zero,
1145 		.extra2		= (void *)&hugetlb_infinity,
1146 	},
1147 #endif
1148 	{
1149 		.procname	= "lowmem_reserve_ratio",
1150 		.data		= &sysctl_lowmem_reserve_ratio,
1151 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
1152 		.mode		= 0644,
1153 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
1154 	},
1155 	{
1156 		.procname	= "drop_caches",
1157 		.data		= &sysctl_drop_caches,
1158 		.maxlen		= sizeof(int),
1159 		.mode		= 0644,
1160 		.proc_handler	= drop_caches_sysctl_handler,
1161 		.extra1		= &one,
1162 		.extra2		= &three,
1163 	},
1164 #ifdef CONFIG_COMPACTION
1165 	{
1166 		.procname	= "compact_memory",
1167 		.data		= &sysctl_compact_memory,
1168 		.maxlen		= sizeof(int),
1169 		.mode		= 0200,
1170 		.proc_handler	= sysctl_compaction_handler,
1171 	},
1172 	{
1173 		.procname	= "extfrag_threshold",
1174 		.data		= &sysctl_extfrag_threshold,
1175 		.maxlen		= sizeof(int),
1176 		.mode		= 0644,
1177 		.proc_handler	= sysctl_extfrag_handler,
1178 		.extra1		= &min_extfrag_threshold,
1179 		.extra2		= &max_extfrag_threshold,
1180 	},
1181 
1182 #endif /* CONFIG_COMPACTION */
1183 	{
1184 		.procname	= "min_free_kbytes",
1185 		.data		= &min_free_kbytes,
1186 		.maxlen		= sizeof(min_free_kbytes),
1187 		.mode		= 0644,
1188 		.proc_handler	= min_free_kbytes_sysctl_handler,
1189 		.extra1		= &zero,
1190 	},
1191 	{
1192 		.procname	= "percpu_pagelist_fraction",
1193 		.data		= &percpu_pagelist_fraction,
1194 		.maxlen		= sizeof(percpu_pagelist_fraction),
1195 		.mode		= 0644,
1196 		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
1197 		.extra1		= &min_percpu_pagelist_fract,
1198 	},
1199 #ifdef CONFIG_MMU
1200 	{
1201 		.procname	= "max_map_count",
1202 		.data		= &sysctl_max_map_count,
1203 		.maxlen		= sizeof(sysctl_max_map_count),
1204 		.mode		= 0644,
1205 		.proc_handler	= proc_dointvec_minmax,
1206 		.extra1		= &zero,
1207 	},
1208 #else
1209 	{
1210 		.procname	= "nr_trim_pages",
1211 		.data		= &sysctl_nr_trim_pages,
1212 		.maxlen		= sizeof(sysctl_nr_trim_pages),
1213 		.mode		= 0644,
1214 		.proc_handler	= proc_dointvec_minmax,
1215 		.extra1		= &zero,
1216 	},
1217 #endif
1218 	{
1219 		.procname	= "laptop_mode",
1220 		.data		= &laptop_mode,
1221 		.maxlen		= sizeof(laptop_mode),
1222 		.mode		= 0644,
1223 		.proc_handler	= proc_dointvec_jiffies,
1224 	},
1225 	{
1226 		.procname	= "block_dump",
1227 		.data		= &block_dump,
1228 		.maxlen		= sizeof(block_dump),
1229 		.mode		= 0644,
1230 		.proc_handler	= proc_dointvec,
1231 		.extra1		= &zero,
1232 	},
1233 	{
1234 		.procname	= "vfs_cache_pressure",
1235 		.data		= &sysctl_vfs_cache_pressure,
1236 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
1237 		.mode		= 0644,
1238 		.proc_handler	= proc_dointvec,
1239 		.extra1		= &zero,
1240 	},
1241 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1242 	{
1243 		.procname	= "legacy_va_layout",
1244 		.data		= &sysctl_legacy_va_layout,
1245 		.maxlen		= sizeof(sysctl_legacy_va_layout),
1246 		.mode		= 0644,
1247 		.proc_handler	= proc_dointvec,
1248 		.extra1		= &zero,
1249 	},
1250 #endif
1251 #ifdef CONFIG_NUMA
1252 	{
1253 		.procname	= "zone_reclaim_mode",
1254 		.data		= &zone_reclaim_mode,
1255 		.maxlen		= sizeof(zone_reclaim_mode),
1256 		.mode		= 0644,
1257 		.proc_handler	= proc_dointvec,
1258 		.extra1		= &zero,
1259 	},
1260 	{
1261 		.procname	= "min_unmapped_ratio",
1262 		.data		= &sysctl_min_unmapped_ratio,
1263 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1264 		.mode		= 0644,
1265 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
1266 		.extra1		= &zero,
1267 		.extra2		= &one_hundred,
1268 	},
1269 	{
1270 		.procname	= "min_slab_ratio",
1271 		.data		= &sysctl_min_slab_ratio,
1272 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1273 		.mode		= 0644,
1274 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
1275 		.extra1		= &zero,
1276 		.extra2		= &one_hundred,
1277 	},
1278 #endif
1279 #ifdef CONFIG_SMP
1280 	{
1281 		.procname	= "stat_interval",
1282 		.data		= &sysctl_stat_interval,
1283 		.maxlen		= sizeof(sysctl_stat_interval),
1284 		.mode		= 0644,
1285 		.proc_handler	= proc_dointvec_jiffies,
1286 	},
1287 #endif
1288 #ifdef CONFIG_MMU
1289 	{
1290 		.procname	= "mmap_min_addr",
1291 		.data		= &dac_mmap_min_addr,
1292 		.maxlen		= sizeof(unsigned long),
1293 		.mode		= 0644,
1294 		.proc_handler	= mmap_min_addr_handler,
1295 	},
1296 #endif
1297 #ifdef CONFIG_NUMA
1298 	{
1299 		.procname	= "numa_zonelist_order",
1300 		.data		= &numa_zonelist_order,
1301 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1302 		.mode		= 0644,
1303 		.proc_handler	= numa_zonelist_order_handler,
1304 	},
1305 #endif
1306 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1307    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1308 	{
1309 		.procname	= "vdso_enabled",
1310 		.data		= &vdso_enabled,
1311 		.maxlen		= sizeof(vdso_enabled),
1312 		.mode		= 0644,
1313 		.proc_handler	= proc_dointvec,
1314 		.extra1		= &zero,
1315 	},
1316 #endif
1317 #ifdef CONFIG_HIGHMEM
1318 	{
1319 		.procname	= "highmem_is_dirtyable",
1320 		.data		= &vm_highmem_is_dirtyable,
1321 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
1322 		.mode		= 0644,
1323 		.proc_handler	= proc_dointvec_minmax,
1324 		.extra1		= &zero,
1325 		.extra2		= &one,
1326 	},
1327 #endif
1328 	{
1329 		.procname	= "scan_unevictable_pages",
1330 		.data		= &scan_unevictable_pages,
1331 		.maxlen		= sizeof(scan_unevictable_pages),
1332 		.mode		= 0644,
1333 		.proc_handler	= scan_unevictable_handler,
1334 	},
1335 #ifdef CONFIG_MEMORY_FAILURE
1336 	{
1337 		.procname	= "memory_failure_early_kill",
1338 		.data		= &sysctl_memory_failure_early_kill,
1339 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
1340 		.mode		= 0644,
1341 		.proc_handler	= proc_dointvec_minmax,
1342 		.extra1		= &zero,
1343 		.extra2		= &one,
1344 	},
1345 	{
1346 		.procname	= "memory_failure_recovery",
1347 		.data		= &sysctl_memory_failure_recovery,
1348 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
1349 		.mode		= 0644,
1350 		.proc_handler	= proc_dointvec_minmax,
1351 		.extra1		= &zero,
1352 		.extra2		= &one,
1353 	},
1354 #endif
1355 	{ }
1356 };
1357 
1358 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1359 static struct ctl_table binfmt_misc_table[] = {
1360 	{ }
1361 };
1362 #endif
1363 
1364 static struct ctl_table fs_table[] = {
1365 	{
1366 		.procname	= "inode-nr",
1367 		.data		= &inodes_stat,
1368 		.maxlen		= 2*sizeof(int),
1369 		.mode		= 0444,
1370 		.proc_handler	= proc_nr_inodes,
1371 	},
1372 	{
1373 		.procname	= "inode-state",
1374 		.data		= &inodes_stat,
1375 		.maxlen		= 7*sizeof(int),
1376 		.mode		= 0444,
1377 		.proc_handler	= proc_nr_inodes,
1378 	},
1379 	{
1380 		.procname	= "file-nr",
1381 		.data		= &files_stat,
1382 		.maxlen		= sizeof(files_stat),
1383 		.mode		= 0444,
1384 		.proc_handler	= proc_nr_files,
1385 	},
1386 	{
1387 		.procname	= "file-max",
1388 		.data		= &files_stat.max_files,
1389 		.maxlen		= sizeof(files_stat.max_files),
1390 		.mode		= 0644,
1391 		.proc_handler	= proc_doulongvec_minmax,
1392 	},
1393 	{
1394 		.procname	= "nr_open",
1395 		.data		= &sysctl_nr_open,
1396 		.maxlen		= sizeof(int),
1397 		.mode		= 0644,
1398 		.proc_handler	= proc_dointvec_minmax,
1399 		.extra1		= &sysctl_nr_open_min,
1400 		.extra2		= &sysctl_nr_open_max,
1401 	},
1402 	{
1403 		.procname	= "dentry-state",
1404 		.data		= &dentry_stat,
1405 		.maxlen		= 6*sizeof(int),
1406 		.mode		= 0444,
1407 		.proc_handler	= proc_nr_dentry,
1408 	},
1409 	{
1410 		.procname	= "overflowuid",
1411 		.data		= &fs_overflowuid,
1412 		.maxlen		= sizeof(int),
1413 		.mode		= 0644,
1414 		.proc_handler	= proc_dointvec_minmax,
1415 		.extra1		= &minolduid,
1416 		.extra2		= &maxolduid,
1417 	},
1418 	{
1419 		.procname	= "overflowgid",
1420 		.data		= &fs_overflowgid,
1421 		.maxlen		= sizeof(int),
1422 		.mode		= 0644,
1423 		.proc_handler	= proc_dointvec_minmax,
1424 		.extra1		= &minolduid,
1425 		.extra2		= &maxolduid,
1426 	},
1427 #ifdef CONFIG_FILE_LOCKING
1428 	{
1429 		.procname	= "leases-enable",
1430 		.data		= &leases_enable,
1431 		.maxlen		= sizeof(int),
1432 		.mode		= 0644,
1433 		.proc_handler	= proc_dointvec,
1434 	},
1435 #endif
1436 #ifdef CONFIG_DNOTIFY
1437 	{
1438 		.procname	= "dir-notify-enable",
1439 		.data		= &dir_notify_enable,
1440 		.maxlen		= sizeof(int),
1441 		.mode		= 0644,
1442 		.proc_handler	= proc_dointvec,
1443 	},
1444 #endif
1445 #ifdef CONFIG_MMU
1446 #ifdef CONFIG_FILE_LOCKING
1447 	{
1448 		.procname	= "lease-break-time",
1449 		.data		= &lease_break_time,
1450 		.maxlen		= sizeof(int),
1451 		.mode		= 0644,
1452 		.proc_handler	= proc_dointvec,
1453 	},
1454 #endif
1455 #ifdef CONFIG_AIO
1456 	{
1457 		.procname	= "aio-nr",
1458 		.data		= &aio_nr,
1459 		.maxlen		= sizeof(aio_nr),
1460 		.mode		= 0444,
1461 		.proc_handler	= proc_doulongvec_minmax,
1462 	},
1463 	{
1464 		.procname	= "aio-max-nr",
1465 		.data		= &aio_max_nr,
1466 		.maxlen		= sizeof(aio_max_nr),
1467 		.mode		= 0644,
1468 		.proc_handler	= proc_doulongvec_minmax,
1469 	},
1470 #endif /* CONFIG_AIO */
1471 #ifdef CONFIG_INOTIFY_USER
1472 	{
1473 		.procname	= "inotify",
1474 		.mode		= 0555,
1475 		.child		= inotify_table,
1476 	},
1477 #endif
1478 #ifdef CONFIG_EPOLL
1479 	{
1480 		.procname	= "epoll",
1481 		.mode		= 0555,
1482 		.child		= epoll_table,
1483 	},
1484 #endif
1485 #endif
1486 	{
1487 		.procname	= "suid_dumpable",
1488 		.data		= &suid_dumpable,
1489 		.maxlen		= sizeof(int),
1490 		.mode		= 0644,
1491 		.proc_handler	= proc_dointvec_minmax,
1492 		.extra1		= &zero,
1493 		.extra2		= &two,
1494 	},
1495 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1496 	{
1497 		.procname	= "binfmt_misc",
1498 		.mode		= 0555,
1499 		.child		= binfmt_misc_table,
1500 	},
1501 #endif
1502 	{
1503 		.procname	= "pipe-max-size",
1504 		.data		= &pipe_max_size,
1505 		.maxlen		= sizeof(int),
1506 		.mode		= 0644,
1507 		.proc_handler	= &pipe_proc_fn,
1508 		.extra1		= &pipe_min_size,
1509 	},
1510 	{ }
1511 };
1512 
1513 static struct ctl_table debug_table[] = {
1514 #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
1515     defined(CONFIG_S390) || defined(CONFIG_TILE)
1516 	{
1517 		.procname	= "exception-trace",
1518 		.data		= &show_unhandled_signals,
1519 		.maxlen		= sizeof(int),
1520 		.mode		= 0644,
1521 		.proc_handler	= proc_dointvec
1522 	},
1523 #endif
1524 #if defined(CONFIG_OPTPROBES)
1525 	{
1526 		.procname	= "kprobes-optimization",
1527 		.data		= &sysctl_kprobes_optimization,
1528 		.maxlen		= sizeof(int),
1529 		.mode		= 0644,
1530 		.proc_handler	= proc_kprobes_optimization_handler,
1531 		.extra1		= &zero,
1532 		.extra2		= &one,
1533 	},
1534 #endif
1535 	{ }
1536 };
1537 
1538 static struct ctl_table dev_table[] = {
1539 	{ }
1540 };
1541 
1542 static DEFINE_SPINLOCK(sysctl_lock);
1543 
1544 /* called under sysctl_lock */
1545 static int use_table(struct ctl_table_header *p)
1546 {
1547 	if (unlikely(p->unregistering))
1548 		return 0;
1549 	p->used++;
1550 	return 1;
1551 }
1552 
1553 /* called under sysctl_lock */
1554 static void unuse_table(struct ctl_table_header *p)
1555 {
1556 	if (!--p->used)
1557 		if (unlikely(p->unregistering))
1558 			complete(p->unregistering);
1559 }
1560 
1561 /* called under sysctl_lock, will reacquire if has to wait */
1562 static void start_unregistering(struct ctl_table_header *p)
1563 {
1564 	/*
1565 	 * if p->used is 0, nobody will ever touch that entry again;
1566 	 * we'll eliminate all paths to it before dropping sysctl_lock
1567 	 */
1568 	if (unlikely(p->used)) {
1569 		struct completion wait;
1570 		init_completion(&wait);
1571 		p->unregistering = &wait;
1572 		spin_unlock(&sysctl_lock);
1573 		wait_for_completion(&wait);
1574 		spin_lock(&sysctl_lock);
1575 	} else {
1576 		/* anything non-NULL; we'll never dereference it */
1577 		p->unregistering = ERR_PTR(-EINVAL);
1578 	}
1579 	/*
1580 	 * do not remove from the list until nobody holds it; walking the
1581 	 * list in do_sysctl() relies on that.
1582 	 */
1583 	list_del_init(&p->ctl_entry);
1584 }
1585 
1586 void sysctl_head_get(struct ctl_table_header *head)
1587 {
1588 	spin_lock(&sysctl_lock);
1589 	head->count++;
1590 	spin_unlock(&sysctl_lock);
1591 }
1592 
1593 void sysctl_head_put(struct ctl_table_header *head)
1594 {
1595 	spin_lock(&sysctl_lock);
1596 	if (!--head->count)
1597 		kfree_rcu(head, rcu);
1598 	spin_unlock(&sysctl_lock);
1599 }
1600 
1601 struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1602 {
1603 	if (!head)
1604 		BUG();
1605 	spin_lock(&sysctl_lock);
1606 	if (!use_table(head))
1607 		head = ERR_PTR(-ENOENT);
1608 	spin_unlock(&sysctl_lock);
1609 	return head;
1610 }
1611 
1612 void sysctl_head_finish(struct ctl_table_header *head)
1613 {
1614 	if (!head)
1615 		return;
1616 	spin_lock(&sysctl_lock);
1617 	unuse_table(head);
1618 	spin_unlock(&sysctl_lock);
1619 }
1620 
1621 static struct ctl_table_set *
1622 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1623 {
1624 	struct ctl_table_set *set = &root->default_set;
1625 	if (root->lookup)
1626 		set = root->lookup(root, namespaces);
1627 	return set;
1628 }
1629 
1630 static struct list_head *
1631 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1632 {
1633 	struct ctl_table_set *set = lookup_header_set(root, namespaces);
1634 	return &set->list;
1635 }
1636 
1637 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1638 					    struct ctl_table_header *prev)
1639 {
1640 	struct ctl_table_root *root;
1641 	struct list_head *header_list;
1642 	struct ctl_table_header *head;
1643 	struct list_head *tmp;
1644 
1645 	spin_lock(&sysctl_lock);
1646 	if (prev) {
1647 		head = prev;
1648 		tmp = &prev->ctl_entry;
1649 		unuse_table(prev);
1650 		goto next;
1651 	}
1652 	tmp = &root_table_header.ctl_entry;
1653 	for (;;) {
1654 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1655 
1656 		if (!use_table(head))
1657 			goto next;
1658 		spin_unlock(&sysctl_lock);
1659 		return head;
1660 	next:
1661 		root = head->root;
1662 		tmp = tmp->next;
1663 		header_list = lookup_header_list(root, namespaces);
1664 		if (tmp != header_list)
1665 			continue;
1666 
1667 		do {
1668 			root = list_entry(root->root_list.next,
1669 					struct ctl_table_root, root_list);
1670 			if (root == &sysctl_table_root)
1671 				goto out;
1672 			header_list = lookup_header_list(root, namespaces);
1673 		} while (list_empty(header_list));
1674 		tmp = header_list->next;
1675 	}
1676 out:
1677 	spin_unlock(&sysctl_lock);
1678 	return NULL;
1679 }
1680 
1681 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1682 {
1683 	return __sysctl_head_next(current->nsproxy, prev);
1684 }
1685 
1686 void register_sysctl_root(struct ctl_table_root *root)
1687 {
1688 	spin_lock(&sysctl_lock);
1689 	list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1690 	spin_unlock(&sysctl_lock);
1691 }
1692 
1693 /*
1694  * sysctl_perm does NOT grant the superuser all rights automatically, because
1695  * some sysctl variables are readonly even to root.
1696  */
1697 
1698 static int test_perm(int mode, int op)
1699 {
1700 	if (!current_euid())
1701 		mode >>= 6;
1702 	else if (in_egroup_p(0))
1703 		mode >>= 3;
1704 	if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1705 		return 0;
1706 	return -EACCES;
1707 }
1708 
1709 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1710 {
1711 	int mode;
1712 
1713 	if (root->permissions)
1714 		mode = root->permissions(root, current->nsproxy, table);
1715 	else
1716 		mode = table->mode;
1717 
1718 	return test_perm(mode, op);
1719 }
1720 
1721 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1722 {
1723 	for (; table->procname; table++) {
1724 		table->parent = parent;
1725 		if (table->child)
1726 			sysctl_set_parent(table, table->child);
1727 	}
1728 }
1729 
1730 static __init int sysctl_init(void)
1731 {
1732 	sysctl_set_parent(NULL, root_table);
1733 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1734 	sysctl_check_table(current->nsproxy, root_table);
1735 #endif
1736 	return 0;
1737 }
1738 
1739 core_initcall(sysctl_init);
1740 
1741 static struct ctl_table *is_branch_in(struct ctl_table *branch,
1742 				      struct ctl_table *table)
1743 {
1744 	struct ctl_table *p;
1745 	const char *s = branch->procname;
1746 
1747 	/* branch should have named subdirectory as its first element */
1748 	if (!s || !branch->child)
1749 		return NULL;
1750 
1751 	/* ... and nothing else */
1752 	if (branch[1].procname)
1753 		return NULL;
1754 
1755 	/* table should contain subdirectory with the same name */
1756 	for (p = table; p->procname; p++) {
1757 		if (!p->child)
1758 			continue;
1759 		if (p->procname && strcmp(p->procname, s) == 0)
1760 			return p;
1761 	}
1762 	return NULL;
1763 }
1764 
1765 /* see if attaching q to p would be an improvement */
1766 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1767 {
1768 	struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1769 	struct ctl_table *next;
1770 	int is_better = 0;
1771 	int not_in_parent = !p->attached_by;
1772 
1773 	while ((next = is_branch_in(by, to)) != NULL) {
1774 		if (by == q->attached_by)
1775 			is_better = 1;
1776 		if (to == p->attached_by)
1777 			not_in_parent = 1;
1778 		by = by->child;
1779 		to = next->child;
1780 	}
1781 
1782 	if (is_better && not_in_parent) {
1783 		q->attached_by = by;
1784 		q->attached_to = to;
1785 		q->parent = p;
1786 	}
1787 }
1788 
1789 /**
1790  * __register_sysctl_paths - register a sysctl hierarchy
1791  * @root: List of sysctl headers to register on
1792  * @namespaces: Data to compute which lists of sysctl entries are visible
1793  * @path: The path to the directory the sysctl table is in.
1794  * @table: the top-level table structure
1795  *
1796  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1797  * array. A completely 0 filled entry terminates the table.
1798  *
1799  * The members of the &struct ctl_table structure are used as follows:
1800  *
1801  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1802  *            enter a sysctl file
1803  *
1804  * data - a pointer to data for use by proc_handler
1805  *
1806  * maxlen - the maximum size in bytes of the data
1807  *
1808  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1809  *
1810  * child - a pointer to the child sysctl table if this entry is a directory, or
1811  *         %NULL.
1812  *
1813  * proc_handler - the text handler routine (described below)
1814  *
1815  * de - for internal use by the sysctl routines
1816  *
1817  * extra1, extra2 - extra pointers usable by the proc handler routines
1818  *
1819  * Leaf nodes in the sysctl tree will be represented by a single file
1820  * under /proc; non-leaf nodes will be represented by directories.
1821  *
1822  * sysctl(2) can automatically manage read and write requests through
1823  * the sysctl table.  The data and maxlen fields of the ctl_table
1824  * struct enable minimal validation of the values being written to be
1825  * performed, and the mode field allows minimal authentication.
1826  *
1827  * There must be a proc_handler routine for any terminal nodes
1828  * mirrored under /proc/sys (non-terminals are handled by a built-in
1829  * directory handler).  Several default handlers are available to
1830  * cover common cases -
1831  *
1832  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1833  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1834  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1835  *
1836  * It is the handler's job to read the input buffer from user memory
1837  * and process it. The handler should return 0 on success.
1838  *
1839  * This routine returns %NULL on a failure to register, and a pointer
1840  * to the table header on success.
1841  */
1842 struct ctl_table_header *__register_sysctl_paths(
1843 	struct ctl_table_root *root,
1844 	struct nsproxy *namespaces,
1845 	const struct ctl_path *path, struct ctl_table *table)
1846 {
1847 	struct ctl_table_header *header;
1848 	struct ctl_table *new, **prevp;
1849 	unsigned int n, npath;
1850 	struct ctl_table_set *set;
1851 
1852 	/* Count the path components */
1853 	for (npath = 0; path[npath].procname; ++npath)
1854 		;
1855 
1856 	/*
1857 	 * For each path component, allocate a 2-element ctl_table array.
1858 	 * The first array element will be filled with the sysctl entry
1859 	 * for this, the second will be the sentinel (procname == 0).
1860 	 *
1861 	 * We allocate everything in one go so that we don't have to
1862 	 * worry about freeing additional memory in unregister_sysctl_table.
1863 	 */
1864 	header = kzalloc(sizeof(struct ctl_table_header) +
1865 			 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1866 	if (!header)
1867 		return NULL;
1868 
1869 	new = (struct ctl_table *) (header + 1);
1870 
1871 	/* Now connect the dots */
1872 	prevp = &header->ctl_table;
1873 	for (n = 0; n < npath; ++n, ++path) {
1874 		/* Copy the procname */
1875 		new->procname = path->procname;
1876 		new->mode     = 0555;
1877 
1878 		*prevp = new;
1879 		prevp = &new->child;
1880 
1881 		new += 2;
1882 	}
1883 	*prevp = table;
1884 	header->ctl_table_arg = table;
1885 
1886 	INIT_LIST_HEAD(&header->ctl_entry);
1887 	header->used = 0;
1888 	header->unregistering = NULL;
1889 	header->root = root;
1890 	sysctl_set_parent(NULL, header->ctl_table);
1891 	header->count = 1;
1892 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1893 	if (sysctl_check_table(namespaces, header->ctl_table)) {
1894 		kfree(header);
1895 		return NULL;
1896 	}
1897 #endif
1898 	spin_lock(&sysctl_lock);
1899 	header->set = lookup_header_set(root, namespaces);
1900 	header->attached_by = header->ctl_table;
1901 	header->attached_to = root_table;
1902 	header->parent = &root_table_header;
1903 	for (set = header->set; set; set = set->parent) {
1904 		struct ctl_table_header *p;
1905 		list_for_each_entry(p, &set->list, ctl_entry) {
1906 			if (p->unregistering)
1907 				continue;
1908 			try_attach(p, header);
1909 		}
1910 	}
1911 	header->parent->count++;
1912 	list_add_tail(&header->ctl_entry, &header->set->list);
1913 	spin_unlock(&sysctl_lock);
1914 
1915 	return header;
1916 }
1917 
1918 /**
1919  * register_sysctl_table_path - register a sysctl table hierarchy
1920  * @path: The path to the directory the sysctl table is in.
1921  * @table: the top-level table structure
1922  *
1923  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1924  * array. A completely 0 filled entry terminates the table.
1925  *
1926  * See __register_sysctl_paths for more details.
1927  */
1928 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1929 						struct ctl_table *table)
1930 {
1931 	return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1932 					path, table);
1933 }
1934 
1935 /**
1936  * register_sysctl_table - register a sysctl table hierarchy
1937  * @table: the top-level table structure
1938  *
1939  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1940  * array. A completely 0 filled entry terminates the table.
1941  *
1942  * See register_sysctl_paths for more details.
1943  */
1944 struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1945 {
1946 	static const struct ctl_path null_path[] = { {} };
1947 
1948 	return register_sysctl_paths(null_path, table);
1949 }
1950 
1951 /**
1952  * unregister_sysctl_table - unregister a sysctl table hierarchy
1953  * @header: the header returned from register_sysctl_table
1954  *
1955  * Unregisters the sysctl table and all children. proc entries may not
1956  * actually be removed until they are no longer used by anyone.
1957  */
1958 void unregister_sysctl_table(struct ctl_table_header * header)
1959 {
1960 	might_sleep();
1961 
1962 	if (header == NULL)
1963 		return;
1964 
1965 	spin_lock(&sysctl_lock);
1966 	start_unregistering(header);
1967 	if (!--header->parent->count) {
1968 		WARN_ON(1);
1969 		kfree_rcu(header->parent, rcu);
1970 	}
1971 	if (!--header->count)
1972 		kfree_rcu(header, rcu);
1973 	spin_unlock(&sysctl_lock);
1974 }
1975 
1976 int sysctl_is_seen(struct ctl_table_header *p)
1977 {
1978 	struct ctl_table_set *set = p->set;
1979 	int res;
1980 	spin_lock(&sysctl_lock);
1981 	if (p->unregistering)
1982 		res = 0;
1983 	else if (!set->is_seen)
1984 		res = 1;
1985 	else
1986 		res = set->is_seen(set);
1987 	spin_unlock(&sysctl_lock);
1988 	return res;
1989 }
1990 
1991 void setup_sysctl_set(struct ctl_table_set *p,
1992 	struct ctl_table_set *parent,
1993 	int (*is_seen)(struct ctl_table_set *))
1994 {
1995 	INIT_LIST_HEAD(&p->list);
1996 	p->parent = parent ? parent : &sysctl_table_root.default_set;
1997 	p->is_seen = is_seen;
1998 }
1999 
2000 #else /* !CONFIG_SYSCTL */
2001 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
2002 {
2003 	return NULL;
2004 }
2005 
2006 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2007 						    struct ctl_table *table)
2008 {
2009 	return NULL;
2010 }
2011 
2012 void unregister_sysctl_table(struct ctl_table_header * table)
2013 {
2014 }
2015 
2016 void setup_sysctl_set(struct ctl_table_set *p,
2017 	struct ctl_table_set *parent,
2018 	int (*is_seen)(struct ctl_table_set *))
2019 {
2020 }
2021 
2022 void sysctl_head_put(struct ctl_table_header *head)
2023 {
2024 }
2025 
2026 #endif /* CONFIG_SYSCTL */
2027 
2028 /*
2029  * /proc/sys support
2030  */
2031 
2032 #ifdef CONFIG_PROC_SYSCTL
2033 
2034 static int _proc_do_string(void* data, int maxlen, int write,
2035 			   void __user *buffer,
2036 			   size_t *lenp, loff_t *ppos)
2037 {
2038 	size_t len;
2039 	char __user *p;
2040 	char c;
2041 
2042 	if (!data || !maxlen || !*lenp) {
2043 		*lenp = 0;
2044 		return 0;
2045 	}
2046 
2047 	if (write) {
2048 		len = 0;
2049 		p = buffer;
2050 		while (len < *lenp) {
2051 			if (get_user(c, p++))
2052 				return -EFAULT;
2053 			if (c == 0 || c == '\n')
2054 				break;
2055 			len++;
2056 		}
2057 		if (len >= maxlen)
2058 			len = maxlen-1;
2059 		if(copy_from_user(data, buffer, len))
2060 			return -EFAULT;
2061 		((char *) data)[len] = 0;
2062 		*ppos += *lenp;
2063 	} else {
2064 		len = strlen(data);
2065 		if (len > maxlen)
2066 			len = maxlen;
2067 
2068 		if (*ppos > len) {
2069 			*lenp = 0;
2070 			return 0;
2071 		}
2072 
2073 		data += *ppos;
2074 		len  -= *ppos;
2075 
2076 		if (len > *lenp)
2077 			len = *lenp;
2078 		if (len)
2079 			if(copy_to_user(buffer, data, len))
2080 				return -EFAULT;
2081 		if (len < *lenp) {
2082 			if(put_user('\n', ((char __user *) buffer) + len))
2083 				return -EFAULT;
2084 			len++;
2085 		}
2086 		*lenp = len;
2087 		*ppos += len;
2088 	}
2089 	return 0;
2090 }
2091 
2092 /**
2093  * proc_dostring - read a string sysctl
2094  * @table: the sysctl table
2095  * @write: %TRUE if this is a write to the sysctl file
2096  * @buffer: the user buffer
2097  * @lenp: the size of the user buffer
2098  * @ppos: file position
2099  *
2100  * Reads/writes a string from/to the user buffer. If the kernel
2101  * buffer provided is not large enough to hold the string, the
2102  * string is truncated. The copied string is %NULL-terminated.
2103  * If the string is being read by the user process, it is copied
2104  * and a newline '\n' is added. It is truncated if the buffer is
2105  * not large enough.
2106  *
2107  * Returns 0 on success.
2108  */
2109 int proc_dostring(struct ctl_table *table, int write,
2110 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2111 {
2112 	return _proc_do_string(table->data, table->maxlen, write,
2113 			       buffer, lenp, ppos);
2114 }
2115 
2116 static size_t proc_skip_spaces(char **buf)
2117 {
2118 	size_t ret;
2119 	char *tmp = skip_spaces(*buf);
2120 	ret = tmp - *buf;
2121 	*buf = tmp;
2122 	return ret;
2123 }
2124 
2125 static void proc_skip_char(char **buf, size_t *size, const char v)
2126 {
2127 	while (*size) {
2128 		if (**buf != v)
2129 			break;
2130 		(*size)--;
2131 		(*buf)++;
2132 	}
2133 }
2134 
2135 #define TMPBUFLEN 22
2136 /**
2137  * proc_get_long - reads an ASCII formatted integer from a user buffer
2138  *
2139  * @buf: a kernel buffer
2140  * @size: size of the kernel buffer
2141  * @val: this is where the number will be stored
2142  * @neg: set to %TRUE if number is negative
2143  * @perm_tr: a vector which contains the allowed trailers
2144  * @perm_tr_len: size of the perm_tr vector
2145  * @tr: pointer to store the trailer character
2146  *
2147  * In case of success %0 is returned and @buf and @size are updated with
2148  * the amount of bytes read. If @tr is non-NULL and a trailing
2149  * character exists (size is non-zero after returning from this
2150  * function), @tr is updated with the trailing character.
2151  */
2152 static int proc_get_long(char **buf, size_t *size,
2153 			  unsigned long *val, bool *neg,
2154 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
2155 {
2156 	int len;
2157 	char *p, tmp[TMPBUFLEN];
2158 
2159 	if (!*size)
2160 		return -EINVAL;
2161 
2162 	len = *size;
2163 	if (len > TMPBUFLEN - 1)
2164 		len = TMPBUFLEN - 1;
2165 
2166 	memcpy(tmp, *buf, len);
2167 
2168 	tmp[len] = 0;
2169 	p = tmp;
2170 	if (*p == '-' && *size > 1) {
2171 		*neg = true;
2172 		p++;
2173 	} else
2174 		*neg = false;
2175 	if (!isdigit(*p))
2176 		return -EINVAL;
2177 
2178 	*val = simple_strtoul(p, &p, 0);
2179 
2180 	len = p - tmp;
2181 
2182 	/* We don't know if the next char is whitespace thus we may accept
2183 	 * invalid integers (e.g. 1234...a) or two integers instead of one
2184 	 * (e.g. 123...1). So lets not allow such large numbers. */
2185 	if (len == TMPBUFLEN - 1)
2186 		return -EINVAL;
2187 
2188 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2189 		return -EINVAL;
2190 
2191 	if (tr && (len < *size))
2192 		*tr = *p;
2193 
2194 	*buf += len;
2195 	*size -= len;
2196 
2197 	return 0;
2198 }
2199 
2200 /**
2201  * proc_put_long - converts an integer to a decimal ASCII formatted string
2202  *
2203  * @buf: the user buffer
2204  * @size: the size of the user buffer
2205  * @val: the integer to be converted
2206  * @neg: sign of the number, %TRUE for negative
2207  *
2208  * In case of success %0 is returned and @buf and @size are updated with
2209  * the amount of bytes written.
2210  */
2211 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2212 			  bool neg)
2213 {
2214 	int len;
2215 	char tmp[TMPBUFLEN], *p = tmp;
2216 
2217 	sprintf(p, "%s%lu", neg ? "-" : "", val);
2218 	len = strlen(tmp);
2219 	if (len > *size)
2220 		len = *size;
2221 	if (copy_to_user(*buf, tmp, len))
2222 		return -EFAULT;
2223 	*size -= len;
2224 	*buf += len;
2225 	return 0;
2226 }
2227 #undef TMPBUFLEN
2228 
2229 static int proc_put_char(void __user **buf, size_t *size, char c)
2230 {
2231 	if (*size) {
2232 		char __user **buffer = (char __user **)buf;
2233 		if (put_user(c, *buffer))
2234 			return -EFAULT;
2235 		(*size)--, (*buffer)++;
2236 		*buf = *buffer;
2237 	}
2238 	return 0;
2239 }
2240 
2241 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2242 				 int *valp,
2243 				 int write, void *data)
2244 {
2245 	if (write) {
2246 		*valp = *negp ? -*lvalp : *lvalp;
2247 	} else {
2248 		int val = *valp;
2249 		if (val < 0) {
2250 			*negp = true;
2251 			*lvalp = (unsigned long)-val;
2252 		} else {
2253 			*negp = false;
2254 			*lvalp = (unsigned long)val;
2255 		}
2256 	}
2257 	return 0;
2258 }
2259 
2260 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2261 
2262 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2263 		  int write, void __user *buffer,
2264 		  size_t *lenp, loff_t *ppos,
2265 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2266 			      int write, void *data),
2267 		  void *data)
2268 {
2269 	int *i, vleft, first = 1, err = 0;
2270 	unsigned long page = 0;
2271 	size_t left;
2272 	char *kbuf;
2273 
2274 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2275 		*lenp = 0;
2276 		return 0;
2277 	}
2278 
2279 	i = (int *) tbl_data;
2280 	vleft = table->maxlen / sizeof(*i);
2281 	left = *lenp;
2282 
2283 	if (!conv)
2284 		conv = do_proc_dointvec_conv;
2285 
2286 	if (write) {
2287 		if (left > PAGE_SIZE - 1)
2288 			left = PAGE_SIZE - 1;
2289 		page = __get_free_page(GFP_TEMPORARY);
2290 		kbuf = (char *) page;
2291 		if (!kbuf)
2292 			return -ENOMEM;
2293 		if (copy_from_user(kbuf, buffer, left)) {
2294 			err = -EFAULT;
2295 			goto free;
2296 		}
2297 		kbuf[left] = 0;
2298 	}
2299 
2300 	for (; left && vleft--; i++, first=0) {
2301 		unsigned long lval;
2302 		bool neg;
2303 
2304 		if (write) {
2305 			left -= proc_skip_spaces(&kbuf);
2306 
2307 			if (!left)
2308 				break;
2309 			err = proc_get_long(&kbuf, &left, &lval, &neg,
2310 					     proc_wspace_sep,
2311 					     sizeof(proc_wspace_sep), NULL);
2312 			if (err)
2313 				break;
2314 			if (conv(&neg, &lval, i, 1, data)) {
2315 				err = -EINVAL;
2316 				break;
2317 			}
2318 		} else {
2319 			if (conv(&neg, &lval, i, 0, data)) {
2320 				err = -EINVAL;
2321 				break;
2322 			}
2323 			if (!first)
2324 				err = proc_put_char(&buffer, &left, '\t');
2325 			if (err)
2326 				break;
2327 			err = proc_put_long(&buffer, &left, lval, neg);
2328 			if (err)
2329 				break;
2330 		}
2331 	}
2332 
2333 	if (!write && !first && left && !err)
2334 		err = proc_put_char(&buffer, &left, '\n');
2335 	if (write && !err && left)
2336 		left -= proc_skip_spaces(&kbuf);
2337 free:
2338 	if (write) {
2339 		free_page(page);
2340 		if (first)
2341 			return err ? : -EINVAL;
2342 	}
2343 	*lenp -= left;
2344 	*ppos += *lenp;
2345 	return err;
2346 }
2347 
2348 static int do_proc_dointvec(struct ctl_table *table, int write,
2349 		  void __user *buffer, size_t *lenp, loff_t *ppos,
2350 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2351 			      int write, void *data),
2352 		  void *data)
2353 {
2354 	return __do_proc_dointvec(table->data, table, write,
2355 			buffer, lenp, ppos, conv, data);
2356 }
2357 
2358 /**
2359  * proc_dointvec - read a vector of integers
2360  * @table: the sysctl table
2361  * @write: %TRUE if this is a write to the sysctl file
2362  * @buffer: the user buffer
2363  * @lenp: the size of the user buffer
2364  * @ppos: file position
2365  *
2366  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2367  * values from/to the user buffer, treated as an ASCII string.
2368  *
2369  * Returns 0 on success.
2370  */
2371 int proc_dointvec(struct ctl_table *table, int write,
2372 		     void __user *buffer, size_t *lenp, loff_t *ppos)
2373 {
2374     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2375 		    	    NULL,NULL);
2376 }
2377 
2378 /*
2379  * Taint values can only be increased
2380  * This means we can safely use a temporary.
2381  */
2382 static int proc_taint(struct ctl_table *table, int write,
2383 			       void __user *buffer, size_t *lenp, loff_t *ppos)
2384 {
2385 	struct ctl_table t;
2386 	unsigned long tmptaint = get_taint();
2387 	int err;
2388 
2389 	if (write && !capable(CAP_SYS_ADMIN))
2390 		return -EPERM;
2391 
2392 	t = *table;
2393 	t.data = &tmptaint;
2394 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2395 	if (err < 0)
2396 		return err;
2397 
2398 	if (write) {
2399 		/*
2400 		 * Poor man's atomic or. Not worth adding a primitive
2401 		 * to everyone's atomic.h for this
2402 		 */
2403 		int i;
2404 		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2405 			if ((tmptaint >> i) & 1)
2406 				add_taint(i);
2407 		}
2408 	}
2409 
2410 	return err;
2411 }
2412 
2413 #ifdef CONFIG_PRINTK
2414 static int proc_dmesg_restrict(struct ctl_table *table, int write,
2415 				void __user *buffer, size_t *lenp, loff_t *ppos)
2416 {
2417 	if (write && !capable(CAP_SYS_ADMIN))
2418 		return -EPERM;
2419 
2420 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2421 }
2422 #endif
2423 
2424 struct do_proc_dointvec_minmax_conv_param {
2425 	int *min;
2426 	int *max;
2427 };
2428 
2429 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2430 					int *valp,
2431 					int write, void *data)
2432 {
2433 	struct do_proc_dointvec_minmax_conv_param *param = data;
2434 	if (write) {
2435 		int val = *negp ? -*lvalp : *lvalp;
2436 		if ((param->min && *param->min > val) ||
2437 		    (param->max && *param->max < val))
2438 			return -EINVAL;
2439 		*valp = val;
2440 	} else {
2441 		int val = *valp;
2442 		if (val < 0) {
2443 			*negp = true;
2444 			*lvalp = (unsigned long)-val;
2445 		} else {
2446 			*negp = false;
2447 			*lvalp = (unsigned long)val;
2448 		}
2449 	}
2450 	return 0;
2451 }
2452 
2453 /**
2454  * proc_dointvec_minmax - read a vector of integers with min/max values
2455  * @table: the sysctl table
2456  * @write: %TRUE if this is a write to the sysctl file
2457  * @buffer: the user buffer
2458  * @lenp: the size of the user buffer
2459  * @ppos: file position
2460  *
2461  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2462  * values from/to the user buffer, treated as an ASCII string.
2463  *
2464  * This routine will ensure the values are within the range specified by
2465  * table->extra1 (min) and table->extra2 (max).
2466  *
2467  * Returns 0 on success.
2468  */
2469 int proc_dointvec_minmax(struct ctl_table *table, int write,
2470 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2471 {
2472 	struct do_proc_dointvec_minmax_conv_param param = {
2473 		.min = (int *) table->extra1,
2474 		.max = (int *) table->extra2,
2475 	};
2476 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2477 				do_proc_dointvec_minmax_conv, &param);
2478 }
2479 
2480 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2481 				     void __user *buffer,
2482 				     size_t *lenp, loff_t *ppos,
2483 				     unsigned long convmul,
2484 				     unsigned long convdiv)
2485 {
2486 	unsigned long *i, *min, *max;
2487 	int vleft, first = 1, err = 0;
2488 	unsigned long page = 0;
2489 	size_t left;
2490 	char *kbuf;
2491 
2492 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2493 		*lenp = 0;
2494 		return 0;
2495 	}
2496 
2497 	i = (unsigned long *) data;
2498 	min = (unsigned long *) table->extra1;
2499 	max = (unsigned long *) table->extra2;
2500 	vleft = table->maxlen / sizeof(unsigned long);
2501 	left = *lenp;
2502 
2503 	if (write) {
2504 		if (left > PAGE_SIZE - 1)
2505 			left = PAGE_SIZE - 1;
2506 		page = __get_free_page(GFP_TEMPORARY);
2507 		kbuf = (char *) page;
2508 		if (!kbuf)
2509 			return -ENOMEM;
2510 		if (copy_from_user(kbuf, buffer, left)) {
2511 			err = -EFAULT;
2512 			goto free;
2513 		}
2514 		kbuf[left] = 0;
2515 	}
2516 
2517 	for (; left && vleft--; i++, first = 0) {
2518 		unsigned long val;
2519 
2520 		if (write) {
2521 			bool neg;
2522 
2523 			left -= proc_skip_spaces(&kbuf);
2524 
2525 			err = proc_get_long(&kbuf, &left, &val, &neg,
2526 					     proc_wspace_sep,
2527 					     sizeof(proc_wspace_sep), NULL);
2528 			if (err)
2529 				break;
2530 			if (neg)
2531 				continue;
2532 			if ((min && val < *min) || (max && val > *max))
2533 				continue;
2534 			*i = val;
2535 		} else {
2536 			val = convdiv * (*i) / convmul;
2537 			if (!first)
2538 				err = proc_put_char(&buffer, &left, '\t');
2539 			err = proc_put_long(&buffer, &left, val, false);
2540 			if (err)
2541 				break;
2542 		}
2543 	}
2544 
2545 	if (!write && !first && left && !err)
2546 		err = proc_put_char(&buffer, &left, '\n');
2547 	if (write && !err)
2548 		left -= proc_skip_spaces(&kbuf);
2549 free:
2550 	if (write) {
2551 		free_page(page);
2552 		if (first)
2553 			return err ? : -EINVAL;
2554 	}
2555 	*lenp -= left;
2556 	*ppos += *lenp;
2557 	return err;
2558 }
2559 
2560 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2561 				     void __user *buffer,
2562 				     size_t *lenp, loff_t *ppos,
2563 				     unsigned long convmul,
2564 				     unsigned long convdiv)
2565 {
2566 	return __do_proc_doulongvec_minmax(table->data, table, write,
2567 			buffer, lenp, ppos, convmul, convdiv);
2568 }
2569 
2570 /**
2571  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2572  * @table: the sysctl table
2573  * @write: %TRUE if this is a write to the sysctl file
2574  * @buffer: the user buffer
2575  * @lenp: the size of the user buffer
2576  * @ppos: file position
2577  *
2578  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2579  * values from/to the user buffer, treated as an ASCII string.
2580  *
2581  * This routine will ensure the values are within the range specified by
2582  * table->extra1 (min) and table->extra2 (max).
2583  *
2584  * Returns 0 on success.
2585  */
2586 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2587 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2588 {
2589     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2590 }
2591 
2592 /**
2593  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2594  * @table: the sysctl table
2595  * @write: %TRUE if this is a write to the sysctl file
2596  * @buffer: the user buffer
2597  * @lenp: the size of the user buffer
2598  * @ppos: file position
2599  *
2600  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2601  * values from/to the user buffer, treated as an ASCII string. The values
2602  * are treated as milliseconds, and converted to jiffies when they are stored.
2603  *
2604  * This routine will ensure the values are within the range specified by
2605  * table->extra1 (min) and table->extra2 (max).
2606  *
2607  * Returns 0 on success.
2608  */
2609 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2610 				      void __user *buffer,
2611 				      size_t *lenp, loff_t *ppos)
2612 {
2613     return do_proc_doulongvec_minmax(table, write, buffer,
2614 				     lenp, ppos, HZ, 1000l);
2615 }
2616 
2617 
2618 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2619 					 int *valp,
2620 					 int write, void *data)
2621 {
2622 	if (write) {
2623 		if (*lvalp > LONG_MAX / HZ)
2624 			return 1;
2625 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2626 	} else {
2627 		int val = *valp;
2628 		unsigned long lval;
2629 		if (val < 0) {
2630 			*negp = true;
2631 			lval = (unsigned long)-val;
2632 		} else {
2633 			*negp = false;
2634 			lval = (unsigned long)val;
2635 		}
2636 		*lvalp = lval / HZ;
2637 	}
2638 	return 0;
2639 }
2640 
2641 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2642 						int *valp,
2643 						int write, void *data)
2644 {
2645 	if (write) {
2646 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2647 			return 1;
2648 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2649 	} else {
2650 		int val = *valp;
2651 		unsigned long lval;
2652 		if (val < 0) {
2653 			*negp = true;
2654 			lval = (unsigned long)-val;
2655 		} else {
2656 			*negp = false;
2657 			lval = (unsigned long)val;
2658 		}
2659 		*lvalp = jiffies_to_clock_t(lval);
2660 	}
2661 	return 0;
2662 }
2663 
2664 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2665 					    int *valp,
2666 					    int write, void *data)
2667 {
2668 	if (write) {
2669 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2670 	} else {
2671 		int val = *valp;
2672 		unsigned long lval;
2673 		if (val < 0) {
2674 			*negp = true;
2675 			lval = (unsigned long)-val;
2676 		} else {
2677 			*negp = false;
2678 			lval = (unsigned long)val;
2679 		}
2680 		*lvalp = jiffies_to_msecs(lval);
2681 	}
2682 	return 0;
2683 }
2684 
2685 /**
2686  * proc_dointvec_jiffies - read a vector of integers as seconds
2687  * @table: the sysctl table
2688  * @write: %TRUE if this is a write to the sysctl file
2689  * @buffer: the user buffer
2690  * @lenp: the size of the user buffer
2691  * @ppos: file position
2692  *
2693  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2694  * values from/to the user buffer, treated as an ASCII string.
2695  * The values read are assumed to be in seconds, and are converted into
2696  * jiffies.
2697  *
2698  * Returns 0 on success.
2699  */
2700 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2701 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2702 {
2703     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2704 		    	    do_proc_dointvec_jiffies_conv,NULL);
2705 }
2706 
2707 /**
2708  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2709  * @table: the sysctl table
2710  * @write: %TRUE if this is a write to the sysctl file
2711  * @buffer: the user buffer
2712  * @lenp: the size of the user buffer
2713  * @ppos: pointer to the file position
2714  *
2715  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2716  * values from/to the user buffer, treated as an ASCII string.
2717  * The values read are assumed to be in 1/USER_HZ seconds, and
2718  * are converted into jiffies.
2719  *
2720  * Returns 0 on success.
2721  */
2722 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2723 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2724 {
2725     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2726 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2727 }
2728 
2729 /**
2730  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2731  * @table: the sysctl table
2732  * @write: %TRUE if this is a write to the sysctl file
2733  * @buffer: the user buffer
2734  * @lenp: the size of the user buffer
2735  * @ppos: file position
2736  * @ppos: the current position in the file
2737  *
2738  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2739  * values from/to the user buffer, treated as an ASCII string.
2740  * The values read are assumed to be in 1/1000 seconds, and
2741  * are converted into jiffies.
2742  *
2743  * Returns 0 on success.
2744  */
2745 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2746 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2747 {
2748 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2749 				do_proc_dointvec_ms_jiffies_conv, NULL);
2750 }
2751 
2752 static int proc_do_cad_pid(struct ctl_table *table, int write,
2753 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2754 {
2755 	struct pid *new_pid;
2756 	pid_t tmp;
2757 	int r;
2758 
2759 	tmp = pid_vnr(cad_pid);
2760 
2761 	r = __do_proc_dointvec(&tmp, table, write, buffer,
2762 			       lenp, ppos, NULL, NULL);
2763 	if (r || !write)
2764 		return r;
2765 
2766 	new_pid = find_get_pid(tmp);
2767 	if (!new_pid)
2768 		return -ESRCH;
2769 
2770 	put_pid(xchg(&cad_pid, new_pid));
2771 	return 0;
2772 }
2773 
2774 /**
2775  * proc_do_large_bitmap - read/write from/to a large bitmap
2776  * @table: the sysctl table
2777  * @write: %TRUE if this is a write to the sysctl file
2778  * @buffer: the user buffer
2779  * @lenp: the size of the user buffer
2780  * @ppos: file position
2781  *
2782  * The bitmap is stored at table->data and the bitmap length (in bits)
2783  * in table->maxlen.
2784  *
2785  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2786  * large bitmaps may be represented in a compact manner. Writing into
2787  * the file will clear the bitmap then update it with the given input.
2788  *
2789  * Returns 0 on success.
2790  */
2791 int proc_do_large_bitmap(struct ctl_table *table, int write,
2792 			 void __user *buffer, size_t *lenp, loff_t *ppos)
2793 {
2794 	int err = 0;
2795 	bool first = 1;
2796 	size_t left = *lenp;
2797 	unsigned long bitmap_len = table->maxlen;
2798 	unsigned long *bitmap = (unsigned long *) table->data;
2799 	unsigned long *tmp_bitmap = NULL;
2800 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2801 
2802 	if (!bitmap_len || !left || (*ppos && !write)) {
2803 		*lenp = 0;
2804 		return 0;
2805 	}
2806 
2807 	if (write) {
2808 		unsigned long page = 0;
2809 		char *kbuf;
2810 
2811 		if (left > PAGE_SIZE - 1)
2812 			left = PAGE_SIZE - 1;
2813 
2814 		page = __get_free_page(GFP_TEMPORARY);
2815 		kbuf = (char *) page;
2816 		if (!kbuf)
2817 			return -ENOMEM;
2818 		if (copy_from_user(kbuf, buffer, left)) {
2819 			free_page(page);
2820 			return -EFAULT;
2821                 }
2822 		kbuf[left] = 0;
2823 
2824 		tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2825 				     GFP_KERNEL);
2826 		if (!tmp_bitmap) {
2827 			free_page(page);
2828 			return -ENOMEM;
2829 		}
2830 		proc_skip_char(&kbuf, &left, '\n');
2831 		while (!err && left) {
2832 			unsigned long val_a, val_b;
2833 			bool neg;
2834 
2835 			err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2836 					     sizeof(tr_a), &c);
2837 			if (err)
2838 				break;
2839 			if (val_a >= bitmap_len || neg) {
2840 				err = -EINVAL;
2841 				break;
2842 			}
2843 
2844 			val_b = val_a;
2845 			if (left) {
2846 				kbuf++;
2847 				left--;
2848 			}
2849 
2850 			if (c == '-') {
2851 				err = proc_get_long(&kbuf, &left, &val_b,
2852 						     &neg, tr_b, sizeof(tr_b),
2853 						     &c);
2854 				if (err)
2855 					break;
2856 				if (val_b >= bitmap_len || neg ||
2857 				    val_a > val_b) {
2858 					err = -EINVAL;
2859 					break;
2860 				}
2861 				if (left) {
2862 					kbuf++;
2863 					left--;
2864 				}
2865 			}
2866 
2867 			while (val_a <= val_b)
2868 				set_bit(val_a++, tmp_bitmap);
2869 
2870 			first = 0;
2871 			proc_skip_char(&kbuf, &left, '\n');
2872 		}
2873 		free_page(page);
2874 	} else {
2875 		unsigned long bit_a, bit_b = 0;
2876 
2877 		while (left) {
2878 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2879 			if (bit_a >= bitmap_len)
2880 				break;
2881 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
2882 						   bit_a + 1) - 1;
2883 
2884 			if (!first) {
2885 				err = proc_put_char(&buffer, &left, ',');
2886 				if (err)
2887 					break;
2888 			}
2889 			err = proc_put_long(&buffer, &left, bit_a, false);
2890 			if (err)
2891 				break;
2892 			if (bit_a != bit_b) {
2893 				err = proc_put_char(&buffer, &left, '-');
2894 				if (err)
2895 					break;
2896 				err = proc_put_long(&buffer, &left, bit_b, false);
2897 				if (err)
2898 					break;
2899 			}
2900 
2901 			first = 0; bit_b++;
2902 		}
2903 		if (!err)
2904 			err = proc_put_char(&buffer, &left, '\n');
2905 	}
2906 
2907 	if (!err) {
2908 		if (write) {
2909 			if (*ppos)
2910 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2911 			else
2912 				memcpy(bitmap, tmp_bitmap,
2913 					BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
2914 		}
2915 		kfree(tmp_bitmap);
2916 		*lenp -= left;
2917 		*ppos += *lenp;
2918 		return 0;
2919 	} else {
2920 		kfree(tmp_bitmap);
2921 		return err;
2922 	}
2923 }
2924 
2925 #else /* CONFIG_PROC_SYSCTL */
2926 
2927 int proc_dostring(struct ctl_table *table, int write,
2928 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2929 {
2930 	return -ENOSYS;
2931 }
2932 
2933 int proc_dointvec(struct ctl_table *table, int write,
2934 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2935 {
2936 	return -ENOSYS;
2937 }
2938 
2939 int proc_dointvec_minmax(struct ctl_table *table, int write,
2940 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2941 {
2942 	return -ENOSYS;
2943 }
2944 
2945 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2946 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2947 {
2948 	return -ENOSYS;
2949 }
2950 
2951 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2952 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2953 {
2954 	return -ENOSYS;
2955 }
2956 
2957 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2958 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2959 {
2960 	return -ENOSYS;
2961 }
2962 
2963 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2964 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2965 {
2966 	return -ENOSYS;
2967 }
2968 
2969 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2970 				      void __user *buffer,
2971 				      size_t *lenp, loff_t *ppos)
2972 {
2973     return -ENOSYS;
2974 }
2975 
2976 
2977 #endif /* CONFIG_PROC_SYSCTL */
2978 
2979 /*
2980  * No sense putting this after each symbol definition, twice,
2981  * exception granted :-)
2982  */
2983 EXPORT_SYMBOL(proc_dointvec);
2984 EXPORT_SYMBOL(proc_dointvec_jiffies);
2985 EXPORT_SYMBOL(proc_dointvec_minmax);
2986 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2987 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2988 EXPORT_SYMBOL(proc_dostring);
2989 EXPORT_SYMBOL(proc_doulongvec_minmax);
2990 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2991 EXPORT_SYMBOL(register_sysctl_table);
2992 EXPORT_SYMBOL(register_sysctl_paths);
2993 EXPORT_SYMBOL(unregister_sysctl_table);
2994