xref: /linux/kernel/sysctl.c (revision da155d5b40587815a4397e1a69382fe2366d940b)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/signal.h>
27 #include <linux/printk.h>
28 #include <linux/proc_fs.h>
29 #include <linux/security.h>
30 #include <linux/ctype.h>
31 #include <linux/kmemcheck.h>
32 #include <linux/fs.h>
33 #include <linux/init.h>
34 #include <linux/kernel.h>
35 #include <linux/kobject.h>
36 #include <linux/net.h>
37 #include <linux/sysrq.h>
38 #include <linux/highuid.h>
39 #include <linux/writeback.h>
40 #include <linux/ratelimit.h>
41 #include <linux/compaction.h>
42 #include <linux/hugetlb.h>
43 #include <linux/initrd.h>
44 #include <linux/key.h>
45 #include <linux/times.h>
46 #include <linux/limits.h>
47 #include <linux/dcache.h>
48 #include <linux/dnotify.h>
49 #include <linux/syscalls.h>
50 #include <linux/vmstat.h>
51 #include <linux/nfs_fs.h>
52 #include <linux/acpi.h>
53 #include <linux/reboot.h>
54 #include <linux/ftrace.h>
55 #include <linux/perf_event.h>
56 #include <linux/kprobes.h>
57 #include <linux/pipe_fs_i.h>
58 #include <linux/oom.h>
59 #include <linux/kmod.h>
60 
61 #include <asm/uaccess.h>
62 #include <asm/processor.h>
63 
64 #ifdef CONFIG_X86
65 #include <asm/nmi.h>
66 #include <asm/stacktrace.h>
67 #include <asm/io.h>
68 #endif
69 #ifdef CONFIG_BSD_PROCESS_ACCT
70 #include <linux/acct.h>
71 #endif
72 #ifdef CONFIG_RT_MUTEXES
73 #include <linux/rtmutex.h>
74 #endif
75 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
76 #include <linux/lockdep.h>
77 #endif
78 #ifdef CONFIG_CHR_DEV_SG
79 #include <scsi/sg.h>
80 #endif
81 
82 #ifdef CONFIG_LOCKUP_DETECTOR
83 #include <linux/nmi.h>
84 #endif
85 
86 
87 #if defined(CONFIG_SYSCTL)
88 
89 /* External variables not in a header file. */
90 extern int sysctl_overcommit_memory;
91 extern int sysctl_overcommit_ratio;
92 extern int max_threads;
93 extern int core_uses_pid;
94 extern int suid_dumpable;
95 extern char core_pattern[];
96 extern unsigned int core_pipe_limit;
97 extern int pid_max;
98 extern int min_free_kbytes;
99 extern int pid_max_min, pid_max_max;
100 extern int sysctl_drop_caches;
101 extern int percpu_pagelist_fraction;
102 extern int compat_log;
103 extern int latencytop_enabled;
104 extern int sysctl_nr_open_min, sysctl_nr_open_max;
105 #ifndef CONFIG_MMU
106 extern int sysctl_nr_trim_pages;
107 #endif
108 #ifdef CONFIG_BLOCK
109 extern int blk_iopoll_enabled;
110 #endif
111 
112 /* Constants used for minimum and  maximum */
113 #ifdef CONFIG_LOCKUP_DETECTOR
114 static int sixty = 60;
115 static int neg_one = -1;
116 #endif
117 
118 static int zero;
119 static int __maybe_unused one = 1;
120 static int __maybe_unused two = 2;
121 static int __maybe_unused three = 3;
122 static unsigned long one_ul = 1;
123 static int one_hundred = 100;
124 #ifdef CONFIG_PRINTK
125 static int ten_thousand = 10000;
126 #endif
127 
128 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
129 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
130 
131 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
132 static int maxolduid = 65535;
133 static int minolduid;
134 static int min_percpu_pagelist_fract = 8;
135 
136 static int ngroups_max = NGROUPS_MAX;
137 
138 #ifdef CONFIG_INOTIFY_USER
139 #include <linux/inotify.h>
140 #endif
141 #ifdef CONFIG_SPARC
142 #include <asm/system.h>
143 #endif
144 
145 #ifdef CONFIG_SPARC64
146 extern int sysctl_tsb_ratio;
147 #endif
148 
149 #ifdef __hppa__
150 extern int pwrsw_enabled;
151 extern int unaligned_enabled;
152 #endif
153 
154 #ifdef CONFIG_S390
155 #ifdef CONFIG_MATHEMU
156 extern int sysctl_ieee_emulation_warnings;
157 #endif
158 extern int sysctl_userprocess_debug;
159 extern int spin_retry;
160 #endif
161 
162 #ifdef CONFIG_IA64
163 extern int no_unaligned_warning;
164 extern int unaligned_dump_stack;
165 #endif
166 
167 #ifdef CONFIG_PROC_SYSCTL
168 static int proc_do_cad_pid(struct ctl_table *table, int write,
169 		  void __user *buffer, size_t *lenp, loff_t *ppos);
170 static int proc_taint(struct ctl_table *table, int write,
171 			       void __user *buffer, size_t *lenp, loff_t *ppos);
172 #endif
173 
174 #ifdef CONFIG_PRINTK
175 static int proc_dmesg_restrict(struct ctl_table *table, int write,
176 				void __user *buffer, size_t *lenp, loff_t *ppos);
177 #endif
178 
179 #ifdef CONFIG_MAGIC_SYSRQ
180 /* Note: sysrq code uses it's own private copy */
181 static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
182 
183 static int sysrq_sysctl_handler(ctl_table *table, int write,
184 				void __user *buffer, size_t *lenp,
185 				loff_t *ppos)
186 {
187 	int error;
188 
189 	error = proc_dointvec(table, write, buffer, lenp, ppos);
190 	if (error)
191 		return error;
192 
193 	if (write)
194 		sysrq_toggle_support(__sysrq_enabled);
195 
196 	return 0;
197 }
198 
199 #endif
200 
201 static struct ctl_table root_table[];
202 static struct ctl_table_root sysctl_table_root;
203 static struct ctl_table_header root_table_header = {
204 	{{.count = 1,
205 	.ctl_table = root_table,
206 	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
207 	.root = &sysctl_table_root,
208 	.set = &sysctl_table_root.default_set,
209 };
210 static struct ctl_table_root sysctl_table_root = {
211 	.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
212 	.default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
213 };
214 
215 static struct ctl_table kern_table[];
216 static struct ctl_table vm_table[];
217 static struct ctl_table fs_table[];
218 static struct ctl_table debug_table[];
219 static struct ctl_table dev_table[];
220 extern struct ctl_table random_table[];
221 #ifdef CONFIG_EPOLL
222 extern struct ctl_table epoll_table[];
223 #endif
224 
225 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
226 int sysctl_legacy_va_layout;
227 #endif
228 
229 /* The default sysctl tables: */
230 
231 static struct ctl_table root_table[] = {
232 	{
233 		.procname	= "kernel",
234 		.mode		= 0555,
235 		.child		= kern_table,
236 	},
237 	{
238 		.procname	= "vm",
239 		.mode		= 0555,
240 		.child		= vm_table,
241 	},
242 	{
243 		.procname	= "fs",
244 		.mode		= 0555,
245 		.child		= fs_table,
246 	},
247 	{
248 		.procname	= "debug",
249 		.mode		= 0555,
250 		.child		= debug_table,
251 	},
252 	{
253 		.procname	= "dev",
254 		.mode		= 0555,
255 		.child		= dev_table,
256 	},
257 	{ }
258 };
259 
260 #ifdef CONFIG_SCHED_DEBUG
261 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
262 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
263 static int min_wakeup_granularity_ns;			/* 0 usecs */
264 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
265 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
266 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
267 #endif
268 
269 #ifdef CONFIG_COMPACTION
270 static int min_extfrag_threshold;
271 static int max_extfrag_threshold = 1000;
272 #endif
273 
274 static struct ctl_table kern_table[] = {
275 	{
276 		.procname	= "sched_child_runs_first",
277 		.data		= &sysctl_sched_child_runs_first,
278 		.maxlen		= sizeof(unsigned int),
279 		.mode		= 0644,
280 		.proc_handler	= proc_dointvec,
281 	},
282 #ifdef CONFIG_SCHED_DEBUG
283 	{
284 		.procname	= "sched_min_granularity_ns",
285 		.data		= &sysctl_sched_min_granularity,
286 		.maxlen		= sizeof(unsigned int),
287 		.mode		= 0644,
288 		.proc_handler	= sched_proc_update_handler,
289 		.extra1		= &min_sched_granularity_ns,
290 		.extra2		= &max_sched_granularity_ns,
291 	},
292 	{
293 		.procname	= "sched_latency_ns",
294 		.data		= &sysctl_sched_latency,
295 		.maxlen		= sizeof(unsigned int),
296 		.mode		= 0644,
297 		.proc_handler	= sched_proc_update_handler,
298 		.extra1		= &min_sched_granularity_ns,
299 		.extra2		= &max_sched_granularity_ns,
300 	},
301 	{
302 		.procname	= "sched_wakeup_granularity_ns",
303 		.data		= &sysctl_sched_wakeup_granularity,
304 		.maxlen		= sizeof(unsigned int),
305 		.mode		= 0644,
306 		.proc_handler	= sched_proc_update_handler,
307 		.extra1		= &min_wakeup_granularity_ns,
308 		.extra2		= &max_wakeup_granularity_ns,
309 	},
310 	{
311 		.procname	= "sched_tunable_scaling",
312 		.data		= &sysctl_sched_tunable_scaling,
313 		.maxlen		= sizeof(enum sched_tunable_scaling),
314 		.mode		= 0644,
315 		.proc_handler	= sched_proc_update_handler,
316 		.extra1		= &min_sched_tunable_scaling,
317 		.extra2		= &max_sched_tunable_scaling,
318 	},
319 	{
320 		.procname	= "sched_migration_cost",
321 		.data		= &sysctl_sched_migration_cost,
322 		.maxlen		= sizeof(unsigned int),
323 		.mode		= 0644,
324 		.proc_handler	= proc_dointvec,
325 	},
326 	{
327 		.procname	= "sched_nr_migrate",
328 		.data		= &sysctl_sched_nr_migrate,
329 		.maxlen		= sizeof(unsigned int),
330 		.mode		= 0644,
331 		.proc_handler	= proc_dointvec,
332 	},
333 	{
334 		.procname	= "sched_time_avg",
335 		.data		= &sysctl_sched_time_avg,
336 		.maxlen		= sizeof(unsigned int),
337 		.mode		= 0644,
338 		.proc_handler	= proc_dointvec,
339 	},
340 	{
341 		.procname	= "sched_shares_window",
342 		.data		= &sysctl_sched_shares_window,
343 		.maxlen		= sizeof(unsigned int),
344 		.mode		= 0644,
345 		.proc_handler	= proc_dointvec,
346 	},
347 	{
348 		.procname	= "timer_migration",
349 		.data		= &sysctl_timer_migration,
350 		.maxlen		= sizeof(unsigned int),
351 		.mode		= 0644,
352 		.proc_handler	= proc_dointvec_minmax,
353 		.extra1		= &zero,
354 		.extra2		= &one,
355 	},
356 #endif
357 	{
358 		.procname	= "sched_rt_period_us",
359 		.data		= &sysctl_sched_rt_period,
360 		.maxlen		= sizeof(unsigned int),
361 		.mode		= 0644,
362 		.proc_handler	= sched_rt_handler,
363 	},
364 	{
365 		.procname	= "sched_rt_runtime_us",
366 		.data		= &sysctl_sched_rt_runtime,
367 		.maxlen		= sizeof(int),
368 		.mode		= 0644,
369 		.proc_handler	= sched_rt_handler,
370 	},
371 #ifdef CONFIG_SCHED_AUTOGROUP
372 	{
373 		.procname	= "sched_autogroup_enabled",
374 		.data		= &sysctl_sched_autogroup_enabled,
375 		.maxlen		= sizeof(unsigned int),
376 		.mode		= 0644,
377 		.proc_handler	= proc_dointvec_minmax,
378 		.extra1		= &zero,
379 		.extra2		= &one,
380 	},
381 #endif
382 #ifdef CONFIG_CFS_BANDWIDTH
383 	{
384 		.procname	= "sched_cfs_bandwidth_slice_us",
385 		.data		= &sysctl_sched_cfs_bandwidth_slice,
386 		.maxlen		= sizeof(unsigned int),
387 		.mode		= 0644,
388 		.proc_handler	= proc_dointvec_minmax,
389 		.extra1		= &one,
390 	},
391 #endif
392 #ifdef CONFIG_PROVE_LOCKING
393 	{
394 		.procname	= "prove_locking",
395 		.data		= &prove_locking,
396 		.maxlen		= sizeof(int),
397 		.mode		= 0644,
398 		.proc_handler	= proc_dointvec,
399 	},
400 #endif
401 #ifdef CONFIG_LOCK_STAT
402 	{
403 		.procname	= "lock_stat",
404 		.data		= &lock_stat,
405 		.maxlen		= sizeof(int),
406 		.mode		= 0644,
407 		.proc_handler	= proc_dointvec,
408 	},
409 #endif
410 	{
411 		.procname	= "panic",
412 		.data		= &panic_timeout,
413 		.maxlen		= sizeof(int),
414 		.mode		= 0644,
415 		.proc_handler	= proc_dointvec,
416 	},
417 	{
418 		.procname	= "core_uses_pid",
419 		.data		= &core_uses_pid,
420 		.maxlen		= sizeof(int),
421 		.mode		= 0644,
422 		.proc_handler	= proc_dointvec,
423 	},
424 	{
425 		.procname	= "core_pattern",
426 		.data		= core_pattern,
427 		.maxlen		= CORENAME_MAX_SIZE,
428 		.mode		= 0644,
429 		.proc_handler	= proc_dostring,
430 	},
431 	{
432 		.procname	= "core_pipe_limit",
433 		.data		= &core_pipe_limit,
434 		.maxlen		= sizeof(unsigned int),
435 		.mode		= 0644,
436 		.proc_handler	= proc_dointvec,
437 	},
438 #ifdef CONFIG_PROC_SYSCTL
439 	{
440 		.procname	= "tainted",
441 		.maxlen 	= sizeof(long),
442 		.mode		= 0644,
443 		.proc_handler	= proc_taint,
444 	},
445 #endif
446 #ifdef CONFIG_LATENCYTOP
447 	{
448 		.procname	= "latencytop",
449 		.data		= &latencytop_enabled,
450 		.maxlen		= sizeof(int),
451 		.mode		= 0644,
452 		.proc_handler	= proc_dointvec,
453 	},
454 #endif
455 #ifdef CONFIG_BLK_DEV_INITRD
456 	{
457 		.procname	= "real-root-dev",
458 		.data		= &real_root_dev,
459 		.maxlen		= sizeof(int),
460 		.mode		= 0644,
461 		.proc_handler	= proc_dointvec,
462 	},
463 #endif
464 	{
465 		.procname	= "print-fatal-signals",
466 		.data		= &print_fatal_signals,
467 		.maxlen		= sizeof(int),
468 		.mode		= 0644,
469 		.proc_handler	= proc_dointvec,
470 	},
471 #ifdef CONFIG_SPARC
472 	{
473 		.procname	= "reboot-cmd",
474 		.data		= reboot_command,
475 		.maxlen		= 256,
476 		.mode		= 0644,
477 		.proc_handler	= proc_dostring,
478 	},
479 	{
480 		.procname	= "stop-a",
481 		.data		= &stop_a_enabled,
482 		.maxlen		= sizeof (int),
483 		.mode		= 0644,
484 		.proc_handler	= proc_dointvec,
485 	},
486 	{
487 		.procname	= "scons-poweroff",
488 		.data		= &scons_pwroff,
489 		.maxlen		= sizeof (int),
490 		.mode		= 0644,
491 		.proc_handler	= proc_dointvec,
492 	},
493 #endif
494 #ifdef CONFIG_SPARC64
495 	{
496 		.procname	= "tsb-ratio",
497 		.data		= &sysctl_tsb_ratio,
498 		.maxlen		= sizeof (int),
499 		.mode		= 0644,
500 		.proc_handler	= proc_dointvec,
501 	},
502 #endif
503 #ifdef __hppa__
504 	{
505 		.procname	= "soft-power",
506 		.data		= &pwrsw_enabled,
507 		.maxlen		= sizeof (int),
508 	 	.mode		= 0644,
509 		.proc_handler	= proc_dointvec,
510 	},
511 	{
512 		.procname	= "unaligned-trap",
513 		.data		= &unaligned_enabled,
514 		.maxlen		= sizeof (int),
515 		.mode		= 0644,
516 		.proc_handler	= proc_dointvec,
517 	},
518 #endif
519 	{
520 		.procname	= "ctrl-alt-del",
521 		.data		= &C_A_D,
522 		.maxlen		= sizeof(int),
523 		.mode		= 0644,
524 		.proc_handler	= proc_dointvec,
525 	},
526 #ifdef CONFIG_FUNCTION_TRACER
527 	{
528 		.procname	= "ftrace_enabled",
529 		.data		= &ftrace_enabled,
530 		.maxlen		= sizeof(int),
531 		.mode		= 0644,
532 		.proc_handler	= ftrace_enable_sysctl,
533 	},
534 #endif
535 #ifdef CONFIG_STACK_TRACER
536 	{
537 		.procname	= "stack_tracer_enabled",
538 		.data		= &stack_tracer_enabled,
539 		.maxlen		= sizeof(int),
540 		.mode		= 0644,
541 		.proc_handler	= stack_trace_sysctl,
542 	},
543 #endif
544 #ifdef CONFIG_TRACING
545 	{
546 		.procname	= "ftrace_dump_on_oops",
547 		.data		= &ftrace_dump_on_oops,
548 		.maxlen		= sizeof(int),
549 		.mode		= 0644,
550 		.proc_handler	= proc_dointvec,
551 	},
552 #endif
553 #ifdef CONFIG_MODULES
554 	{
555 		.procname	= "modprobe",
556 		.data		= &modprobe_path,
557 		.maxlen		= KMOD_PATH_LEN,
558 		.mode		= 0644,
559 		.proc_handler	= proc_dostring,
560 	},
561 	{
562 		.procname	= "modules_disabled",
563 		.data		= &modules_disabled,
564 		.maxlen		= sizeof(int),
565 		.mode		= 0644,
566 		/* only handle a transition from default "0" to "1" */
567 		.proc_handler	= proc_dointvec_minmax,
568 		.extra1		= &one,
569 		.extra2		= &one,
570 	},
571 #endif
572 #ifdef CONFIG_HOTPLUG
573 	{
574 		.procname	= "hotplug",
575 		.data		= &uevent_helper,
576 		.maxlen		= UEVENT_HELPER_PATH_LEN,
577 		.mode		= 0644,
578 		.proc_handler	= proc_dostring,
579 	},
580 #endif
581 #ifdef CONFIG_CHR_DEV_SG
582 	{
583 		.procname	= "sg-big-buff",
584 		.data		= &sg_big_buff,
585 		.maxlen		= sizeof (int),
586 		.mode		= 0444,
587 		.proc_handler	= proc_dointvec,
588 	},
589 #endif
590 #ifdef CONFIG_BSD_PROCESS_ACCT
591 	{
592 		.procname	= "acct",
593 		.data		= &acct_parm,
594 		.maxlen		= 3*sizeof(int),
595 		.mode		= 0644,
596 		.proc_handler	= proc_dointvec,
597 	},
598 #endif
599 #ifdef CONFIG_MAGIC_SYSRQ
600 	{
601 		.procname	= "sysrq",
602 		.data		= &__sysrq_enabled,
603 		.maxlen		= sizeof (int),
604 		.mode		= 0644,
605 		.proc_handler	= sysrq_sysctl_handler,
606 	},
607 #endif
608 #ifdef CONFIG_PROC_SYSCTL
609 	{
610 		.procname	= "cad_pid",
611 		.data		= NULL,
612 		.maxlen		= sizeof (int),
613 		.mode		= 0600,
614 		.proc_handler	= proc_do_cad_pid,
615 	},
616 #endif
617 	{
618 		.procname	= "threads-max",
619 		.data		= &max_threads,
620 		.maxlen		= sizeof(int),
621 		.mode		= 0644,
622 		.proc_handler	= proc_dointvec,
623 	},
624 	{
625 		.procname	= "random",
626 		.mode		= 0555,
627 		.child		= random_table,
628 	},
629 	{
630 		.procname	= "usermodehelper",
631 		.mode		= 0555,
632 		.child		= usermodehelper_table,
633 	},
634 	{
635 		.procname	= "overflowuid",
636 		.data		= &overflowuid,
637 		.maxlen		= sizeof(int),
638 		.mode		= 0644,
639 		.proc_handler	= proc_dointvec_minmax,
640 		.extra1		= &minolduid,
641 		.extra2		= &maxolduid,
642 	},
643 	{
644 		.procname	= "overflowgid",
645 		.data		= &overflowgid,
646 		.maxlen		= sizeof(int),
647 		.mode		= 0644,
648 		.proc_handler	= proc_dointvec_minmax,
649 		.extra1		= &minolduid,
650 		.extra2		= &maxolduid,
651 	},
652 #ifdef CONFIG_S390
653 #ifdef CONFIG_MATHEMU
654 	{
655 		.procname	= "ieee_emulation_warnings",
656 		.data		= &sysctl_ieee_emulation_warnings,
657 		.maxlen		= sizeof(int),
658 		.mode		= 0644,
659 		.proc_handler	= proc_dointvec,
660 	},
661 #endif
662 	{
663 		.procname	= "userprocess_debug",
664 		.data		= &show_unhandled_signals,
665 		.maxlen		= sizeof(int),
666 		.mode		= 0644,
667 		.proc_handler	= proc_dointvec,
668 	},
669 #endif
670 	{
671 		.procname	= "pid_max",
672 		.data		= &pid_max,
673 		.maxlen		= sizeof (int),
674 		.mode		= 0644,
675 		.proc_handler	= proc_dointvec_minmax,
676 		.extra1		= &pid_max_min,
677 		.extra2		= &pid_max_max,
678 	},
679 	{
680 		.procname	= "panic_on_oops",
681 		.data		= &panic_on_oops,
682 		.maxlen		= sizeof(int),
683 		.mode		= 0644,
684 		.proc_handler	= proc_dointvec,
685 	},
686 #if defined CONFIG_PRINTK
687 	{
688 		.procname	= "printk",
689 		.data		= &console_loglevel,
690 		.maxlen		= 4*sizeof(int),
691 		.mode		= 0644,
692 		.proc_handler	= proc_dointvec,
693 	},
694 	{
695 		.procname	= "printk_ratelimit",
696 		.data		= &printk_ratelimit_state.interval,
697 		.maxlen		= sizeof(int),
698 		.mode		= 0644,
699 		.proc_handler	= proc_dointvec_jiffies,
700 	},
701 	{
702 		.procname	= "printk_ratelimit_burst",
703 		.data		= &printk_ratelimit_state.burst,
704 		.maxlen		= sizeof(int),
705 		.mode		= 0644,
706 		.proc_handler	= proc_dointvec,
707 	},
708 	{
709 		.procname	= "printk_delay",
710 		.data		= &printk_delay_msec,
711 		.maxlen		= sizeof(int),
712 		.mode		= 0644,
713 		.proc_handler	= proc_dointvec_minmax,
714 		.extra1		= &zero,
715 		.extra2		= &ten_thousand,
716 	},
717 	{
718 		.procname	= "dmesg_restrict",
719 		.data		= &dmesg_restrict,
720 		.maxlen		= sizeof(int),
721 		.mode		= 0644,
722 		.proc_handler	= proc_dointvec_minmax,
723 		.extra1		= &zero,
724 		.extra2		= &one,
725 	},
726 	{
727 		.procname	= "kptr_restrict",
728 		.data		= &kptr_restrict,
729 		.maxlen		= sizeof(int),
730 		.mode		= 0644,
731 		.proc_handler	= proc_dmesg_restrict,
732 		.extra1		= &zero,
733 		.extra2		= &two,
734 	},
735 #endif
736 	{
737 		.procname	= "ngroups_max",
738 		.data		= &ngroups_max,
739 		.maxlen		= sizeof (int),
740 		.mode		= 0444,
741 		.proc_handler	= proc_dointvec,
742 	},
743 #if defined(CONFIG_LOCKUP_DETECTOR)
744 	{
745 		.procname       = "watchdog",
746 		.data           = &watchdog_enabled,
747 		.maxlen         = sizeof (int),
748 		.mode           = 0644,
749 		.proc_handler   = proc_dowatchdog,
750 		.extra1		= &zero,
751 		.extra2		= &one,
752 	},
753 	{
754 		.procname	= "watchdog_thresh",
755 		.data		= &watchdog_thresh,
756 		.maxlen		= sizeof(int),
757 		.mode		= 0644,
758 		.proc_handler	= proc_dowatchdog,
759 		.extra1		= &neg_one,
760 		.extra2		= &sixty,
761 	},
762 	{
763 		.procname	= "softlockup_panic",
764 		.data		= &softlockup_panic,
765 		.maxlen		= sizeof(int),
766 		.mode		= 0644,
767 		.proc_handler	= proc_dointvec_minmax,
768 		.extra1		= &zero,
769 		.extra2		= &one,
770 	},
771 	{
772 		.procname       = "nmi_watchdog",
773 		.data           = &watchdog_enabled,
774 		.maxlen         = sizeof (int),
775 		.mode           = 0644,
776 		.proc_handler   = proc_dowatchdog,
777 		.extra1		= &zero,
778 		.extra2		= &one,
779 	},
780 #endif
781 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
782 	{
783 		.procname       = "unknown_nmi_panic",
784 		.data           = &unknown_nmi_panic,
785 		.maxlen         = sizeof (int),
786 		.mode           = 0644,
787 		.proc_handler   = proc_dointvec,
788 	},
789 #endif
790 #if defined(CONFIG_X86)
791 	{
792 		.procname	= "panic_on_unrecovered_nmi",
793 		.data		= &panic_on_unrecovered_nmi,
794 		.maxlen		= sizeof(int),
795 		.mode		= 0644,
796 		.proc_handler	= proc_dointvec,
797 	},
798 	{
799 		.procname	= "panic_on_io_nmi",
800 		.data		= &panic_on_io_nmi,
801 		.maxlen		= sizeof(int),
802 		.mode		= 0644,
803 		.proc_handler	= proc_dointvec,
804 	},
805 	{
806 		.procname	= "bootloader_type",
807 		.data		= &bootloader_type,
808 		.maxlen		= sizeof (int),
809 		.mode		= 0444,
810 		.proc_handler	= proc_dointvec,
811 	},
812 	{
813 		.procname	= "bootloader_version",
814 		.data		= &bootloader_version,
815 		.maxlen		= sizeof (int),
816 		.mode		= 0444,
817 		.proc_handler	= proc_dointvec,
818 	},
819 	{
820 		.procname	= "kstack_depth_to_print",
821 		.data		= &kstack_depth_to_print,
822 		.maxlen		= sizeof(int),
823 		.mode		= 0644,
824 		.proc_handler	= proc_dointvec,
825 	},
826 	{
827 		.procname	= "io_delay_type",
828 		.data		= &io_delay_type,
829 		.maxlen		= sizeof(int),
830 		.mode		= 0644,
831 		.proc_handler	= proc_dointvec,
832 	},
833 #endif
834 #if defined(CONFIG_MMU)
835 	{
836 		.procname	= "randomize_va_space",
837 		.data		= &randomize_va_space,
838 		.maxlen		= sizeof(int),
839 		.mode		= 0644,
840 		.proc_handler	= proc_dointvec,
841 	},
842 #endif
843 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
844 	{
845 		.procname	= "spin_retry",
846 		.data		= &spin_retry,
847 		.maxlen		= sizeof (int),
848 		.mode		= 0644,
849 		.proc_handler	= proc_dointvec,
850 	},
851 #endif
852 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
853 	{
854 		.procname	= "acpi_video_flags",
855 		.data		= &acpi_realmode_flags,
856 		.maxlen		= sizeof (unsigned long),
857 		.mode		= 0644,
858 		.proc_handler	= proc_doulongvec_minmax,
859 	},
860 #endif
861 #ifdef CONFIG_IA64
862 	{
863 		.procname	= "ignore-unaligned-usertrap",
864 		.data		= &no_unaligned_warning,
865 		.maxlen		= sizeof (int),
866 	 	.mode		= 0644,
867 		.proc_handler	= proc_dointvec,
868 	},
869 	{
870 		.procname	= "unaligned-dump-stack",
871 		.data		= &unaligned_dump_stack,
872 		.maxlen		= sizeof (int),
873 		.mode		= 0644,
874 		.proc_handler	= proc_dointvec,
875 	},
876 #endif
877 #ifdef CONFIG_DETECT_HUNG_TASK
878 	{
879 		.procname	= "hung_task_panic",
880 		.data		= &sysctl_hung_task_panic,
881 		.maxlen		= sizeof(int),
882 		.mode		= 0644,
883 		.proc_handler	= proc_dointvec_minmax,
884 		.extra1		= &zero,
885 		.extra2		= &one,
886 	},
887 	{
888 		.procname	= "hung_task_check_count",
889 		.data		= &sysctl_hung_task_check_count,
890 		.maxlen		= sizeof(unsigned long),
891 		.mode		= 0644,
892 		.proc_handler	= proc_doulongvec_minmax,
893 	},
894 	{
895 		.procname	= "hung_task_timeout_secs",
896 		.data		= &sysctl_hung_task_timeout_secs,
897 		.maxlen		= sizeof(unsigned long),
898 		.mode		= 0644,
899 		.proc_handler	= proc_dohung_task_timeout_secs,
900 	},
901 	{
902 		.procname	= "hung_task_warnings",
903 		.data		= &sysctl_hung_task_warnings,
904 		.maxlen		= sizeof(unsigned long),
905 		.mode		= 0644,
906 		.proc_handler	= proc_doulongvec_minmax,
907 	},
908 #endif
909 #ifdef CONFIG_COMPAT
910 	{
911 		.procname	= "compat-log",
912 		.data		= &compat_log,
913 		.maxlen		= sizeof (int),
914 	 	.mode		= 0644,
915 		.proc_handler	= proc_dointvec,
916 	},
917 #endif
918 #ifdef CONFIG_RT_MUTEXES
919 	{
920 		.procname	= "max_lock_depth",
921 		.data		= &max_lock_depth,
922 		.maxlen		= sizeof(int),
923 		.mode		= 0644,
924 		.proc_handler	= proc_dointvec,
925 	},
926 #endif
927 	{
928 		.procname	= "poweroff_cmd",
929 		.data		= &poweroff_cmd,
930 		.maxlen		= POWEROFF_CMD_PATH_LEN,
931 		.mode		= 0644,
932 		.proc_handler	= proc_dostring,
933 	},
934 #ifdef CONFIG_KEYS
935 	{
936 		.procname	= "keys",
937 		.mode		= 0555,
938 		.child		= key_sysctls,
939 	},
940 #endif
941 #ifdef CONFIG_RCU_TORTURE_TEST
942 	{
943 		.procname       = "rcutorture_runnable",
944 		.data           = &rcutorture_runnable,
945 		.maxlen         = sizeof(int),
946 		.mode           = 0644,
947 		.proc_handler	= proc_dointvec,
948 	},
949 #endif
950 #ifdef CONFIG_PERF_EVENTS
951 	/*
952 	 * User-space scripts rely on the existence of this file
953 	 * as a feature check for perf_events being enabled.
954 	 *
955 	 * So it's an ABI, do not remove!
956 	 */
957 	{
958 		.procname	= "perf_event_paranoid",
959 		.data		= &sysctl_perf_event_paranoid,
960 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
961 		.mode		= 0644,
962 		.proc_handler	= proc_dointvec,
963 	},
964 	{
965 		.procname	= "perf_event_mlock_kb",
966 		.data		= &sysctl_perf_event_mlock,
967 		.maxlen		= sizeof(sysctl_perf_event_mlock),
968 		.mode		= 0644,
969 		.proc_handler	= proc_dointvec,
970 	},
971 	{
972 		.procname	= "perf_event_max_sample_rate",
973 		.data		= &sysctl_perf_event_sample_rate,
974 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
975 		.mode		= 0644,
976 		.proc_handler	= perf_proc_update_handler,
977 	},
978 #endif
979 #ifdef CONFIG_KMEMCHECK
980 	{
981 		.procname	= "kmemcheck",
982 		.data		= &kmemcheck_enabled,
983 		.maxlen		= sizeof(int),
984 		.mode		= 0644,
985 		.proc_handler	= proc_dointvec,
986 	},
987 #endif
988 #ifdef CONFIG_BLOCK
989 	{
990 		.procname	= "blk_iopoll",
991 		.data		= &blk_iopoll_enabled,
992 		.maxlen		= sizeof(int),
993 		.mode		= 0644,
994 		.proc_handler	= proc_dointvec,
995 	},
996 #endif
997 	{ }
998 };
999 
1000 static struct ctl_table vm_table[] = {
1001 	{
1002 		.procname	= "overcommit_memory",
1003 		.data		= &sysctl_overcommit_memory,
1004 		.maxlen		= sizeof(sysctl_overcommit_memory),
1005 		.mode		= 0644,
1006 		.proc_handler	= proc_dointvec_minmax,
1007 		.extra1		= &zero,
1008 		.extra2		= &two,
1009 	},
1010 	{
1011 		.procname	= "panic_on_oom",
1012 		.data		= &sysctl_panic_on_oom,
1013 		.maxlen		= sizeof(sysctl_panic_on_oom),
1014 		.mode		= 0644,
1015 		.proc_handler	= proc_dointvec_minmax,
1016 		.extra1		= &zero,
1017 		.extra2		= &two,
1018 	},
1019 	{
1020 		.procname	= "oom_kill_allocating_task",
1021 		.data		= &sysctl_oom_kill_allocating_task,
1022 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
1023 		.mode		= 0644,
1024 		.proc_handler	= proc_dointvec,
1025 	},
1026 	{
1027 		.procname	= "oom_dump_tasks",
1028 		.data		= &sysctl_oom_dump_tasks,
1029 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
1030 		.mode		= 0644,
1031 		.proc_handler	= proc_dointvec,
1032 	},
1033 	{
1034 		.procname	= "overcommit_ratio",
1035 		.data		= &sysctl_overcommit_ratio,
1036 		.maxlen		= sizeof(sysctl_overcommit_ratio),
1037 		.mode		= 0644,
1038 		.proc_handler	= proc_dointvec,
1039 	},
1040 	{
1041 		.procname	= "page-cluster",
1042 		.data		= &page_cluster,
1043 		.maxlen		= sizeof(int),
1044 		.mode		= 0644,
1045 		.proc_handler	= proc_dointvec_minmax,
1046 		.extra1		= &zero,
1047 	},
1048 	{
1049 		.procname	= "dirty_background_ratio",
1050 		.data		= &dirty_background_ratio,
1051 		.maxlen		= sizeof(dirty_background_ratio),
1052 		.mode		= 0644,
1053 		.proc_handler	= dirty_background_ratio_handler,
1054 		.extra1		= &zero,
1055 		.extra2		= &one_hundred,
1056 	},
1057 	{
1058 		.procname	= "dirty_background_bytes",
1059 		.data		= &dirty_background_bytes,
1060 		.maxlen		= sizeof(dirty_background_bytes),
1061 		.mode		= 0644,
1062 		.proc_handler	= dirty_background_bytes_handler,
1063 		.extra1		= &one_ul,
1064 	},
1065 	{
1066 		.procname	= "dirty_ratio",
1067 		.data		= &vm_dirty_ratio,
1068 		.maxlen		= sizeof(vm_dirty_ratio),
1069 		.mode		= 0644,
1070 		.proc_handler	= dirty_ratio_handler,
1071 		.extra1		= &zero,
1072 		.extra2		= &one_hundred,
1073 	},
1074 	{
1075 		.procname	= "dirty_bytes",
1076 		.data		= &vm_dirty_bytes,
1077 		.maxlen		= sizeof(vm_dirty_bytes),
1078 		.mode		= 0644,
1079 		.proc_handler	= dirty_bytes_handler,
1080 		.extra1		= &dirty_bytes_min,
1081 	},
1082 	{
1083 		.procname	= "dirty_writeback_centisecs",
1084 		.data		= &dirty_writeback_interval,
1085 		.maxlen		= sizeof(dirty_writeback_interval),
1086 		.mode		= 0644,
1087 		.proc_handler	= dirty_writeback_centisecs_handler,
1088 	},
1089 	{
1090 		.procname	= "dirty_expire_centisecs",
1091 		.data		= &dirty_expire_interval,
1092 		.maxlen		= sizeof(dirty_expire_interval),
1093 		.mode		= 0644,
1094 		.proc_handler	= proc_dointvec_minmax,
1095 		.extra1		= &zero,
1096 	},
1097 	{
1098 		.procname	= "nr_pdflush_threads",
1099 		.data		= &nr_pdflush_threads,
1100 		.maxlen		= sizeof nr_pdflush_threads,
1101 		.mode		= 0444 /* read-only*/,
1102 		.proc_handler	= proc_dointvec,
1103 	},
1104 	{
1105 		.procname	= "swappiness",
1106 		.data		= &vm_swappiness,
1107 		.maxlen		= sizeof(vm_swappiness),
1108 		.mode		= 0644,
1109 		.proc_handler	= proc_dointvec_minmax,
1110 		.extra1		= &zero,
1111 		.extra2		= &one_hundred,
1112 	},
1113 #ifdef CONFIG_HUGETLB_PAGE
1114 	{
1115 		.procname	= "nr_hugepages",
1116 		.data		= NULL,
1117 		.maxlen		= sizeof(unsigned long),
1118 		.mode		= 0644,
1119 		.proc_handler	= hugetlb_sysctl_handler,
1120 		.extra1		= (void *)&hugetlb_zero,
1121 		.extra2		= (void *)&hugetlb_infinity,
1122 	},
1123 #ifdef CONFIG_NUMA
1124 	{
1125 		.procname       = "nr_hugepages_mempolicy",
1126 		.data           = NULL,
1127 		.maxlen         = sizeof(unsigned long),
1128 		.mode           = 0644,
1129 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1130 		.extra1		= (void *)&hugetlb_zero,
1131 		.extra2		= (void *)&hugetlb_infinity,
1132 	},
1133 #endif
1134 	 {
1135 		.procname	= "hugetlb_shm_group",
1136 		.data		= &sysctl_hugetlb_shm_group,
1137 		.maxlen		= sizeof(gid_t),
1138 		.mode		= 0644,
1139 		.proc_handler	= proc_dointvec,
1140 	 },
1141 	 {
1142 		.procname	= "hugepages_treat_as_movable",
1143 		.data		= &hugepages_treat_as_movable,
1144 		.maxlen		= sizeof(int),
1145 		.mode		= 0644,
1146 		.proc_handler	= hugetlb_treat_movable_handler,
1147 	},
1148 	{
1149 		.procname	= "nr_overcommit_hugepages",
1150 		.data		= NULL,
1151 		.maxlen		= sizeof(unsigned long),
1152 		.mode		= 0644,
1153 		.proc_handler	= hugetlb_overcommit_handler,
1154 		.extra1		= (void *)&hugetlb_zero,
1155 		.extra2		= (void *)&hugetlb_infinity,
1156 	},
1157 #endif
1158 	{
1159 		.procname	= "lowmem_reserve_ratio",
1160 		.data		= &sysctl_lowmem_reserve_ratio,
1161 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
1162 		.mode		= 0644,
1163 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
1164 	},
1165 	{
1166 		.procname	= "drop_caches",
1167 		.data		= &sysctl_drop_caches,
1168 		.maxlen		= sizeof(int),
1169 		.mode		= 0644,
1170 		.proc_handler	= drop_caches_sysctl_handler,
1171 		.extra1		= &one,
1172 		.extra2		= &three,
1173 	},
1174 #ifdef CONFIG_COMPACTION
1175 	{
1176 		.procname	= "compact_memory",
1177 		.data		= &sysctl_compact_memory,
1178 		.maxlen		= sizeof(int),
1179 		.mode		= 0200,
1180 		.proc_handler	= sysctl_compaction_handler,
1181 	},
1182 	{
1183 		.procname	= "extfrag_threshold",
1184 		.data		= &sysctl_extfrag_threshold,
1185 		.maxlen		= sizeof(int),
1186 		.mode		= 0644,
1187 		.proc_handler	= sysctl_extfrag_handler,
1188 		.extra1		= &min_extfrag_threshold,
1189 		.extra2		= &max_extfrag_threshold,
1190 	},
1191 
1192 #endif /* CONFIG_COMPACTION */
1193 	{
1194 		.procname	= "min_free_kbytes",
1195 		.data		= &min_free_kbytes,
1196 		.maxlen		= sizeof(min_free_kbytes),
1197 		.mode		= 0644,
1198 		.proc_handler	= min_free_kbytes_sysctl_handler,
1199 		.extra1		= &zero,
1200 	},
1201 	{
1202 		.procname	= "percpu_pagelist_fraction",
1203 		.data		= &percpu_pagelist_fraction,
1204 		.maxlen		= sizeof(percpu_pagelist_fraction),
1205 		.mode		= 0644,
1206 		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
1207 		.extra1		= &min_percpu_pagelist_fract,
1208 	},
1209 #ifdef CONFIG_MMU
1210 	{
1211 		.procname	= "max_map_count",
1212 		.data		= &sysctl_max_map_count,
1213 		.maxlen		= sizeof(sysctl_max_map_count),
1214 		.mode		= 0644,
1215 		.proc_handler	= proc_dointvec_minmax,
1216 		.extra1		= &zero,
1217 	},
1218 #else
1219 	{
1220 		.procname	= "nr_trim_pages",
1221 		.data		= &sysctl_nr_trim_pages,
1222 		.maxlen		= sizeof(sysctl_nr_trim_pages),
1223 		.mode		= 0644,
1224 		.proc_handler	= proc_dointvec_minmax,
1225 		.extra1		= &zero,
1226 	},
1227 #endif
1228 	{
1229 		.procname	= "laptop_mode",
1230 		.data		= &laptop_mode,
1231 		.maxlen		= sizeof(laptop_mode),
1232 		.mode		= 0644,
1233 		.proc_handler	= proc_dointvec_jiffies,
1234 	},
1235 	{
1236 		.procname	= "block_dump",
1237 		.data		= &block_dump,
1238 		.maxlen		= sizeof(block_dump),
1239 		.mode		= 0644,
1240 		.proc_handler	= proc_dointvec,
1241 		.extra1		= &zero,
1242 	},
1243 	{
1244 		.procname	= "vfs_cache_pressure",
1245 		.data		= &sysctl_vfs_cache_pressure,
1246 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
1247 		.mode		= 0644,
1248 		.proc_handler	= proc_dointvec,
1249 		.extra1		= &zero,
1250 	},
1251 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1252 	{
1253 		.procname	= "legacy_va_layout",
1254 		.data		= &sysctl_legacy_va_layout,
1255 		.maxlen		= sizeof(sysctl_legacy_va_layout),
1256 		.mode		= 0644,
1257 		.proc_handler	= proc_dointvec,
1258 		.extra1		= &zero,
1259 	},
1260 #endif
1261 #ifdef CONFIG_NUMA
1262 	{
1263 		.procname	= "zone_reclaim_mode",
1264 		.data		= &zone_reclaim_mode,
1265 		.maxlen		= sizeof(zone_reclaim_mode),
1266 		.mode		= 0644,
1267 		.proc_handler	= proc_dointvec,
1268 		.extra1		= &zero,
1269 	},
1270 	{
1271 		.procname	= "min_unmapped_ratio",
1272 		.data		= &sysctl_min_unmapped_ratio,
1273 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1274 		.mode		= 0644,
1275 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
1276 		.extra1		= &zero,
1277 		.extra2		= &one_hundred,
1278 	},
1279 	{
1280 		.procname	= "min_slab_ratio",
1281 		.data		= &sysctl_min_slab_ratio,
1282 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1283 		.mode		= 0644,
1284 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
1285 		.extra1		= &zero,
1286 		.extra2		= &one_hundred,
1287 	},
1288 #endif
1289 #ifdef CONFIG_SMP
1290 	{
1291 		.procname	= "stat_interval",
1292 		.data		= &sysctl_stat_interval,
1293 		.maxlen		= sizeof(sysctl_stat_interval),
1294 		.mode		= 0644,
1295 		.proc_handler	= proc_dointvec_jiffies,
1296 	},
1297 #endif
1298 #ifdef CONFIG_MMU
1299 	{
1300 		.procname	= "mmap_min_addr",
1301 		.data		= &dac_mmap_min_addr,
1302 		.maxlen		= sizeof(unsigned long),
1303 		.mode		= 0644,
1304 		.proc_handler	= mmap_min_addr_handler,
1305 	},
1306 #endif
1307 #ifdef CONFIG_NUMA
1308 	{
1309 		.procname	= "numa_zonelist_order",
1310 		.data		= &numa_zonelist_order,
1311 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1312 		.mode		= 0644,
1313 		.proc_handler	= numa_zonelist_order_handler,
1314 	},
1315 #endif
1316 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1317    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1318 	{
1319 		.procname	= "vdso_enabled",
1320 		.data		= &vdso_enabled,
1321 		.maxlen		= sizeof(vdso_enabled),
1322 		.mode		= 0644,
1323 		.proc_handler	= proc_dointvec,
1324 		.extra1		= &zero,
1325 	},
1326 #endif
1327 #ifdef CONFIG_HIGHMEM
1328 	{
1329 		.procname	= "highmem_is_dirtyable",
1330 		.data		= &vm_highmem_is_dirtyable,
1331 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
1332 		.mode		= 0644,
1333 		.proc_handler	= proc_dointvec_minmax,
1334 		.extra1		= &zero,
1335 		.extra2		= &one,
1336 	},
1337 #endif
1338 	{
1339 		.procname	= "scan_unevictable_pages",
1340 		.data		= &scan_unevictable_pages,
1341 		.maxlen		= sizeof(scan_unevictable_pages),
1342 		.mode		= 0644,
1343 		.proc_handler	= scan_unevictable_handler,
1344 	},
1345 #ifdef CONFIG_MEMORY_FAILURE
1346 	{
1347 		.procname	= "memory_failure_early_kill",
1348 		.data		= &sysctl_memory_failure_early_kill,
1349 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
1350 		.mode		= 0644,
1351 		.proc_handler	= proc_dointvec_minmax,
1352 		.extra1		= &zero,
1353 		.extra2		= &one,
1354 	},
1355 	{
1356 		.procname	= "memory_failure_recovery",
1357 		.data		= &sysctl_memory_failure_recovery,
1358 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
1359 		.mode		= 0644,
1360 		.proc_handler	= proc_dointvec_minmax,
1361 		.extra1		= &zero,
1362 		.extra2		= &one,
1363 	},
1364 #endif
1365 	{ }
1366 };
1367 
1368 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1369 static struct ctl_table binfmt_misc_table[] = {
1370 	{ }
1371 };
1372 #endif
1373 
1374 static struct ctl_table fs_table[] = {
1375 	{
1376 		.procname	= "inode-nr",
1377 		.data		= &inodes_stat,
1378 		.maxlen		= 2*sizeof(int),
1379 		.mode		= 0444,
1380 		.proc_handler	= proc_nr_inodes,
1381 	},
1382 	{
1383 		.procname	= "inode-state",
1384 		.data		= &inodes_stat,
1385 		.maxlen		= 7*sizeof(int),
1386 		.mode		= 0444,
1387 		.proc_handler	= proc_nr_inodes,
1388 	},
1389 	{
1390 		.procname	= "file-nr",
1391 		.data		= &files_stat,
1392 		.maxlen		= sizeof(files_stat),
1393 		.mode		= 0444,
1394 		.proc_handler	= proc_nr_files,
1395 	},
1396 	{
1397 		.procname	= "file-max",
1398 		.data		= &files_stat.max_files,
1399 		.maxlen		= sizeof(files_stat.max_files),
1400 		.mode		= 0644,
1401 		.proc_handler	= proc_doulongvec_minmax,
1402 	},
1403 	{
1404 		.procname	= "nr_open",
1405 		.data		= &sysctl_nr_open,
1406 		.maxlen		= sizeof(int),
1407 		.mode		= 0644,
1408 		.proc_handler	= proc_dointvec_minmax,
1409 		.extra1		= &sysctl_nr_open_min,
1410 		.extra2		= &sysctl_nr_open_max,
1411 	},
1412 	{
1413 		.procname	= "dentry-state",
1414 		.data		= &dentry_stat,
1415 		.maxlen		= 6*sizeof(int),
1416 		.mode		= 0444,
1417 		.proc_handler	= proc_nr_dentry,
1418 	},
1419 	{
1420 		.procname	= "overflowuid",
1421 		.data		= &fs_overflowuid,
1422 		.maxlen		= sizeof(int),
1423 		.mode		= 0644,
1424 		.proc_handler	= proc_dointvec_minmax,
1425 		.extra1		= &minolduid,
1426 		.extra2		= &maxolduid,
1427 	},
1428 	{
1429 		.procname	= "overflowgid",
1430 		.data		= &fs_overflowgid,
1431 		.maxlen		= sizeof(int),
1432 		.mode		= 0644,
1433 		.proc_handler	= proc_dointvec_minmax,
1434 		.extra1		= &minolduid,
1435 		.extra2		= &maxolduid,
1436 	},
1437 #ifdef CONFIG_FILE_LOCKING
1438 	{
1439 		.procname	= "leases-enable",
1440 		.data		= &leases_enable,
1441 		.maxlen		= sizeof(int),
1442 		.mode		= 0644,
1443 		.proc_handler	= proc_dointvec,
1444 	},
1445 #endif
1446 #ifdef CONFIG_DNOTIFY
1447 	{
1448 		.procname	= "dir-notify-enable",
1449 		.data		= &dir_notify_enable,
1450 		.maxlen		= sizeof(int),
1451 		.mode		= 0644,
1452 		.proc_handler	= proc_dointvec,
1453 	},
1454 #endif
1455 #ifdef CONFIG_MMU
1456 #ifdef CONFIG_FILE_LOCKING
1457 	{
1458 		.procname	= "lease-break-time",
1459 		.data		= &lease_break_time,
1460 		.maxlen		= sizeof(int),
1461 		.mode		= 0644,
1462 		.proc_handler	= proc_dointvec,
1463 	},
1464 #endif
1465 #ifdef CONFIG_AIO
1466 	{
1467 		.procname	= "aio-nr",
1468 		.data		= &aio_nr,
1469 		.maxlen		= sizeof(aio_nr),
1470 		.mode		= 0444,
1471 		.proc_handler	= proc_doulongvec_minmax,
1472 	},
1473 	{
1474 		.procname	= "aio-max-nr",
1475 		.data		= &aio_max_nr,
1476 		.maxlen		= sizeof(aio_max_nr),
1477 		.mode		= 0644,
1478 		.proc_handler	= proc_doulongvec_minmax,
1479 	},
1480 #endif /* CONFIG_AIO */
1481 #ifdef CONFIG_INOTIFY_USER
1482 	{
1483 		.procname	= "inotify",
1484 		.mode		= 0555,
1485 		.child		= inotify_table,
1486 	},
1487 #endif
1488 #ifdef CONFIG_EPOLL
1489 	{
1490 		.procname	= "epoll",
1491 		.mode		= 0555,
1492 		.child		= epoll_table,
1493 	},
1494 #endif
1495 #endif
1496 	{
1497 		.procname	= "suid_dumpable",
1498 		.data		= &suid_dumpable,
1499 		.maxlen		= sizeof(int),
1500 		.mode		= 0644,
1501 		.proc_handler	= proc_dointvec_minmax,
1502 		.extra1		= &zero,
1503 		.extra2		= &two,
1504 	},
1505 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1506 	{
1507 		.procname	= "binfmt_misc",
1508 		.mode		= 0555,
1509 		.child		= binfmt_misc_table,
1510 	},
1511 #endif
1512 	{
1513 		.procname	= "pipe-max-size",
1514 		.data		= &pipe_max_size,
1515 		.maxlen		= sizeof(int),
1516 		.mode		= 0644,
1517 		.proc_handler	= &pipe_proc_fn,
1518 		.extra1		= &pipe_min_size,
1519 	},
1520 	{ }
1521 };
1522 
1523 static struct ctl_table debug_table[] = {
1524 #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
1525     defined(CONFIG_S390) || defined(CONFIG_TILE)
1526 	{
1527 		.procname	= "exception-trace",
1528 		.data		= &show_unhandled_signals,
1529 		.maxlen		= sizeof(int),
1530 		.mode		= 0644,
1531 		.proc_handler	= proc_dointvec
1532 	},
1533 #endif
1534 #if defined(CONFIG_OPTPROBES)
1535 	{
1536 		.procname	= "kprobes-optimization",
1537 		.data		= &sysctl_kprobes_optimization,
1538 		.maxlen		= sizeof(int),
1539 		.mode		= 0644,
1540 		.proc_handler	= proc_kprobes_optimization_handler,
1541 		.extra1		= &zero,
1542 		.extra2		= &one,
1543 	},
1544 #endif
1545 	{ }
1546 };
1547 
1548 static struct ctl_table dev_table[] = {
1549 	{ }
1550 };
1551 
1552 static DEFINE_SPINLOCK(sysctl_lock);
1553 
1554 /* called under sysctl_lock */
1555 static int use_table(struct ctl_table_header *p)
1556 {
1557 	if (unlikely(p->unregistering))
1558 		return 0;
1559 	p->used++;
1560 	return 1;
1561 }
1562 
1563 /* called under sysctl_lock */
1564 static void unuse_table(struct ctl_table_header *p)
1565 {
1566 	if (!--p->used)
1567 		if (unlikely(p->unregistering))
1568 			complete(p->unregistering);
1569 }
1570 
1571 /* called under sysctl_lock, will reacquire if has to wait */
1572 static void start_unregistering(struct ctl_table_header *p)
1573 {
1574 	/*
1575 	 * if p->used is 0, nobody will ever touch that entry again;
1576 	 * we'll eliminate all paths to it before dropping sysctl_lock
1577 	 */
1578 	if (unlikely(p->used)) {
1579 		struct completion wait;
1580 		init_completion(&wait);
1581 		p->unregistering = &wait;
1582 		spin_unlock(&sysctl_lock);
1583 		wait_for_completion(&wait);
1584 		spin_lock(&sysctl_lock);
1585 	} else {
1586 		/* anything non-NULL; we'll never dereference it */
1587 		p->unregistering = ERR_PTR(-EINVAL);
1588 	}
1589 	/*
1590 	 * do not remove from the list until nobody holds it; walking the
1591 	 * list in do_sysctl() relies on that.
1592 	 */
1593 	list_del_init(&p->ctl_entry);
1594 }
1595 
1596 void sysctl_head_get(struct ctl_table_header *head)
1597 {
1598 	spin_lock(&sysctl_lock);
1599 	head->count++;
1600 	spin_unlock(&sysctl_lock);
1601 }
1602 
1603 void sysctl_head_put(struct ctl_table_header *head)
1604 {
1605 	spin_lock(&sysctl_lock);
1606 	if (!--head->count)
1607 		kfree_rcu(head, rcu);
1608 	spin_unlock(&sysctl_lock);
1609 }
1610 
1611 struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1612 {
1613 	if (!head)
1614 		BUG();
1615 	spin_lock(&sysctl_lock);
1616 	if (!use_table(head))
1617 		head = ERR_PTR(-ENOENT);
1618 	spin_unlock(&sysctl_lock);
1619 	return head;
1620 }
1621 
1622 void sysctl_head_finish(struct ctl_table_header *head)
1623 {
1624 	if (!head)
1625 		return;
1626 	spin_lock(&sysctl_lock);
1627 	unuse_table(head);
1628 	spin_unlock(&sysctl_lock);
1629 }
1630 
1631 static struct ctl_table_set *
1632 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1633 {
1634 	struct ctl_table_set *set = &root->default_set;
1635 	if (root->lookup)
1636 		set = root->lookup(root, namespaces);
1637 	return set;
1638 }
1639 
1640 static struct list_head *
1641 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1642 {
1643 	struct ctl_table_set *set = lookup_header_set(root, namespaces);
1644 	return &set->list;
1645 }
1646 
1647 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1648 					    struct ctl_table_header *prev)
1649 {
1650 	struct ctl_table_root *root;
1651 	struct list_head *header_list;
1652 	struct ctl_table_header *head;
1653 	struct list_head *tmp;
1654 
1655 	spin_lock(&sysctl_lock);
1656 	if (prev) {
1657 		head = prev;
1658 		tmp = &prev->ctl_entry;
1659 		unuse_table(prev);
1660 		goto next;
1661 	}
1662 	tmp = &root_table_header.ctl_entry;
1663 	for (;;) {
1664 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1665 
1666 		if (!use_table(head))
1667 			goto next;
1668 		spin_unlock(&sysctl_lock);
1669 		return head;
1670 	next:
1671 		root = head->root;
1672 		tmp = tmp->next;
1673 		header_list = lookup_header_list(root, namespaces);
1674 		if (tmp != header_list)
1675 			continue;
1676 
1677 		do {
1678 			root = list_entry(root->root_list.next,
1679 					struct ctl_table_root, root_list);
1680 			if (root == &sysctl_table_root)
1681 				goto out;
1682 			header_list = lookup_header_list(root, namespaces);
1683 		} while (list_empty(header_list));
1684 		tmp = header_list->next;
1685 	}
1686 out:
1687 	spin_unlock(&sysctl_lock);
1688 	return NULL;
1689 }
1690 
1691 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1692 {
1693 	return __sysctl_head_next(current->nsproxy, prev);
1694 }
1695 
1696 void register_sysctl_root(struct ctl_table_root *root)
1697 {
1698 	spin_lock(&sysctl_lock);
1699 	list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1700 	spin_unlock(&sysctl_lock);
1701 }
1702 
1703 /*
1704  * sysctl_perm does NOT grant the superuser all rights automatically, because
1705  * some sysctl variables are readonly even to root.
1706  */
1707 
1708 static int test_perm(int mode, int op)
1709 {
1710 	if (!current_euid())
1711 		mode >>= 6;
1712 	else if (in_egroup_p(0))
1713 		mode >>= 3;
1714 	if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1715 		return 0;
1716 	return -EACCES;
1717 }
1718 
1719 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1720 {
1721 	int mode;
1722 
1723 	if (root->permissions)
1724 		mode = root->permissions(root, current->nsproxy, table);
1725 	else
1726 		mode = table->mode;
1727 
1728 	return test_perm(mode, op);
1729 }
1730 
1731 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1732 {
1733 	for (; table->procname; table++) {
1734 		table->parent = parent;
1735 		if (table->child)
1736 			sysctl_set_parent(table, table->child);
1737 	}
1738 }
1739 
1740 static __init int sysctl_init(void)
1741 {
1742 	sysctl_set_parent(NULL, root_table);
1743 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1744 	sysctl_check_table(current->nsproxy, root_table);
1745 #endif
1746 	return 0;
1747 }
1748 
1749 core_initcall(sysctl_init);
1750 
1751 static struct ctl_table *is_branch_in(struct ctl_table *branch,
1752 				      struct ctl_table *table)
1753 {
1754 	struct ctl_table *p;
1755 	const char *s = branch->procname;
1756 
1757 	/* branch should have named subdirectory as its first element */
1758 	if (!s || !branch->child)
1759 		return NULL;
1760 
1761 	/* ... and nothing else */
1762 	if (branch[1].procname)
1763 		return NULL;
1764 
1765 	/* table should contain subdirectory with the same name */
1766 	for (p = table; p->procname; p++) {
1767 		if (!p->child)
1768 			continue;
1769 		if (p->procname && strcmp(p->procname, s) == 0)
1770 			return p;
1771 	}
1772 	return NULL;
1773 }
1774 
1775 /* see if attaching q to p would be an improvement */
1776 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1777 {
1778 	struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1779 	struct ctl_table *next;
1780 	int is_better = 0;
1781 	int not_in_parent = !p->attached_by;
1782 
1783 	while ((next = is_branch_in(by, to)) != NULL) {
1784 		if (by == q->attached_by)
1785 			is_better = 1;
1786 		if (to == p->attached_by)
1787 			not_in_parent = 1;
1788 		by = by->child;
1789 		to = next->child;
1790 	}
1791 
1792 	if (is_better && not_in_parent) {
1793 		q->attached_by = by;
1794 		q->attached_to = to;
1795 		q->parent = p;
1796 	}
1797 }
1798 
1799 /**
1800  * __register_sysctl_paths - register a sysctl hierarchy
1801  * @root: List of sysctl headers to register on
1802  * @namespaces: Data to compute which lists of sysctl entries are visible
1803  * @path: The path to the directory the sysctl table is in.
1804  * @table: the top-level table structure
1805  *
1806  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1807  * array. A completely 0 filled entry terminates the table.
1808  *
1809  * The members of the &struct ctl_table structure are used as follows:
1810  *
1811  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1812  *            enter a sysctl file
1813  *
1814  * data - a pointer to data for use by proc_handler
1815  *
1816  * maxlen - the maximum size in bytes of the data
1817  *
1818  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1819  *
1820  * child - a pointer to the child sysctl table if this entry is a directory, or
1821  *         %NULL.
1822  *
1823  * proc_handler - the text handler routine (described below)
1824  *
1825  * de - for internal use by the sysctl routines
1826  *
1827  * extra1, extra2 - extra pointers usable by the proc handler routines
1828  *
1829  * Leaf nodes in the sysctl tree will be represented by a single file
1830  * under /proc; non-leaf nodes will be represented by directories.
1831  *
1832  * sysctl(2) can automatically manage read and write requests through
1833  * the sysctl table.  The data and maxlen fields of the ctl_table
1834  * struct enable minimal validation of the values being written to be
1835  * performed, and the mode field allows minimal authentication.
1836  *
1837  * There must be a proc_handler routine for any terminal nodes
1838  * mirrored under /proc/sys (non-terminals are handled by a built-in
1839  * directory handler).  Several default handlers are available to
1840  * cover common cases -
1841  *
1842  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1843  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1844  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1845  *
1846  * It is the handler's job to read the input buffer from user memory
1847  * and process it. The handler should return 0 on success.
1848  *
1849  * This routine returns %NULL on a failure to register, and a pointer
1850  * to the table header on success.
1851  */
1852 struct ctl_table_header *__register_sysctl_paths(
1853 	struct ctl_table_root *root,
1854 	struct nsproxy *namespaces,
1855 	const struct ctl_path *path, struct ctl_table *table)
1856 {
1857 	struct ctl_table_header *header;
1858 	struct ctl_table *new, **prevp;
1859 	unsigned int n, npath;
1860 	struct ctl_table_set *set;
1861 
1862 	/* Count the path components */
1863 	for (npath = 0; path[npath].procname; ++npath)
1864 		;
1865 
1866 	/*
1867 	 * For each path component, allocate a 2-element ctl_table array.
1868 	 * The first array element will be filled with the sysctl entry
1869 	 * for this, the second will be the sentinel (procname == 0).
1870 	 *
1871 	 * We allocate everything in one go so that we don't have to
1872 	 * worry about freeing additional memory in unregister_sysctl_table.
1873 	 */
1874 	header = kzalloc(sizeof(struct ctl_table_header) +
1875 			 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1876 	if (!header)
1877 		return NULL;
1878 
1879 	new = (struct ctl_table *) (header + 1);
1880 
1881 	/* Now connect the dots */
1882 	prevp = &header->ctl_table;
1883 	for (n = 0; n < npath; ++n, ++path) {
1884 		/* Copy the procname */
1885 		new->procname = path->procname;
1886 		new->mode     = 0555;
1887 
1888 		*prevp = new;
1889 		prevp = &new->child;
1890 
1891 		new += 2;
1892 	}
1893 	*prevp = table;
1894 	header->ctl_table_arg = table;
1895 
1896 	INIT_LIST_HEAD(&header->ctl_entry);
1897 	header->used = 0;
1898 	header->unregistering = NULL;
1899 	header->root = root;
1900 	sysctl_set_parent(NULL, header->ctl_table);
1901 	header->count = 1;
1902 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1903 	if (sysctl_check_table(namespaces, header->ctl_table)) {
1904 		kfree(header);
1905 		return NULL;
1906 	}
1907 #endif
1908 	spin_lock(&sysctl_lock);
1909 	header->set = lookup_header_set(root, namespaces);
1910 	header->attached_by = header->ctl_table;
1911 	header->attached_to = root_table;
1912 	header->parent = &root_table_header;
1913 	for (set = header->set; set; set = set->parent) {
1914 		struct ctl_table_header *p;
1915 		list_for_each_entry(p, &set->list, ctl_entry) {
1916 			if (p->unregistering)
1917 				continue;
1918 			try_attach(p, header);
1919 		}
1920 	}
1921 	header->parent->count++;
1922 	list_add_tail(&header->ctl_entry, &header->set->list);
1923 	spin_unlock(&sysctl_lock);
1924 
1925 	return header;
1926 }
1927 
1928 /**
1929  * register_sysctl_table_path - register a sysctl table hierarchy
1930  * @path: The path to the directory the sysctl table is in.
1931  * @table: the top-level table structure
1932  *
1933  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1934  * array. A completely 0 filled entry terminates the table.
1935  *
1936  * See __register_sysctl_paths for more details.
1937  */
1938 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1939 						struct ctl_table *table)
1940 {
1941 	return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1942 					path, table);
1943 }
1944 
1945 /**
1946  * register_sysctl_table - register a sysctl table hierarchy
1947  * @table: the top-level table structure
1948  *
1949  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1950  * array. A completely 0 filled entry terminates the table.
1951  *
1952  * See register_sysctl_paths for more details.
1953  */
1954 struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1955 {
1956 	static const struct ctl_path null_path[] = { {} };
1957 
1958 	return register_sysctl_paths(null_path, table);
1959 }
1960 
1961 /**
1962  * unregister_sysctl_table - unregister a sysctl table hierarchy
1963  * @header: the header returned from register_sysctl_table
1964  *
1965  * Unregisters the sysctl table and all children. proc entries may not
1966  * actually be removed until they are no longer used by anyone.
1967  */
1968 void unregister_sysctl_table(struct ctl_table_header * header)
1969 {
1970 	might_sleep();
1971 
1972 	if (header == NULL)
1973 		return;
1974 
1975 	spin_lock(&sysctl_lock);
1976 	start_unregistering(header);
1977 	if (!--header->parent->count) {
1978 		WARN_ON(1);
1979 		kfree_rcu(header->parent, rcu);
1980 	}
1981 	if (!--header->count)
1982 		kfree_rcu(header, rcu);
1983 	spin_unlock(&sysctl_lock);
1984 }
1985 
1986 int sysctl_is_seen(struct ctl_table_header *p)
1987 {
1988 	struct ctl_table_set *set = p->set;
1989 	int res;
1990 	spin_lock(&sysctl_lock);
1991 	if (p->unregistering)
1992 		res = 0;
1993 	else if (!set->is_seen)
1994 		res = 1;
1995 	else
1996 		res = set->is_seen(set);
1997 	spin_unlock(&sysctl_lock);
1998 	return res;
1999 }
2000 
2001 void setup_sysctl_set(struct ctl_table_set *p,
2002 	struct ctl_table_set *parent,
2003 	int (*is_seen)(struct ctl_table_set *))
2004 {
2005 	INIT_LIST_HEAD(&p->list);
2006 	p->parent = parent ? parent : &sysctl_table_root.default_set;
2007 	p->is_seen = is_seen;
2008 }
2009 
2010 #else /* !CONFIG_SYSCTL */
2011 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
2012 {
2013 	return NULL;
2014 }
2015 
2016 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2017 						    struct ctl_table *table)
2018 {
2019 	return NULL;
2020 }
2021 
2022 void unregister_sysctl_table(struct ctl_table_header * table)
2023 {
2024 }
2025 
2026 void setup_sysctl_set(struct ctl_table_set *p,
2027 	struct ctl_table_set *parent,
2028 	int (*is_seen)(struct ctl_table_set *))
2029 {
2030 }
2031 
2032 void sysctl_head_put(struct ctl_table_header *head)
2033 {
2034 }
2035 
2036 #endif /* CONFIG_SYSCTL */
2037 
2038 /*
2039  * /proc/sys support
2040  */
2041 
2042 #ifdef CONFIG_PROC_SYSCTL
2043 
2044 static int _proc_do_string(void* data, int maxlen, int write,
2045 			   void __user *buffer,
2046 			   size_t *lenp, loff_t *ppos)
2047 {
2048 	size_t len;
2049 	char __user *p;
2050 	char c;
2051 
2052 	if (!data || !maxlen || !*lenp) {
2053 		*lenp = 0;
2054 		return 0;
2055 	}
2056 
2057 	if (write) {
2058 		len = 0;
2059 		p = buffer;
2060 		while (len < *lenp) {
2061 			if (get_user(c, p++))
2062 				return -EFAULT;
2063 			if (c == 0 || c == '\n')
2064 				break;
2065 			len++;
2066 		}
2067 		if (len >= maxlen)
2068 			len = maxlen-1;
2069 		if(copy_from_user(data, buffer, len))
2070 			return -EFAULT;
2071 		((char *) data)[len] = 0;
2072 		*ppos += *lenp;
2073 	} else {
2074 		len = strlen(data);
2075 		if (len > maxlen)
2076 			len = maxlen;
2077 
2078 		if (*ppos > len) {
2079 			*lenp = 0;
2080 			return 0;
2081 		}
2082 
2083 		data += *ppos;
2084 		len  -= *ppos;
2085 
2086 		if (len > *lenp)
2087 			len = *lenp;
2088 		if (len)
2089 			if(copy_to_user(buffer, data, len))
2090 				return -EFAULT;
2091 		if (len < *lenp) {
2092 			if(put_user('\n', ((char __user *) buffer) + len))
2093 				return -EFAULT;
2094 			len++;
2095 		}
2096 		*lenp = len;
2097 		*ppos += len;
2098 	}
2099 	return 0;
2100 }
2101 
2102 /**
2103  * proc_dostring - read a string sysctl
2104  * @table: the sysctl table
2105  * @write: %TRUE if this is a write to the sysctl file
2106  * @buffer: the user buffer
2107  * @lenp: the size of the user buffer
2108  * @ppos: file position
2109  *
2110  * Reads/writes a string from/to the user buffer. If the kernel
2111  * buffer provided is not large enough to hold the string, the
2112  * string is truncated. The copied string is %NULL-terminated.
2113  * If the string is being read by the user process, it is copied
2114  * and a newline '\n' is added. It is truncated if the buffer is
2115  * not large enough.
2116  *
2117  * Returns 0 on success.
2118  */
2119 int proc_dostring(struct ctl_table *table, int write,
2120 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2121 {
2122 	return _proc_do_string(table->data, table->maxlen, write,
2123 			       buffer, lenp, ppos);
2124 }
2125 
2126 static size_t proc_skip_spaces(char **buf)
2127 {
2128 	size_t ret;
2129 	char *tmp = skip_spaces(*buf);
2130 	ret = tmp - *buf;
2131 	*buf = tmp;
2132 	return ret;
2133 }
2134 
2135 static void proc_skip_char(char **buf, size_t *size, const char v)
2136 {
2137 	while (*size) {
2138 		if (**buf != v)
2139 			break;
2140 		(*size)--;
2141 		(*buf)++;
2142 	}
2143 }
2144 
2145 #define TMPBUFLEN 22
2146 /**
2147  * proc_get_long - reads an ASCII formatted integer from a user buffer
2148  *
2149  * @buf: a kernel buffer
2150  * @size: size of the kernel buffer
2151  * @val: this is where the number will be stored
2152  * @neg: set to %TRUE if number is negative
2153  * @perm_tr: a vector which contains the allowed trailers
2154  * @perm_tr_len: size of the perm_tr vector
2155  * @tr: pointer to store the trailer character
2156  *
2157  * In case of success %0 is returned and @buf and @size are updated with
2158  * the amount of bytes read. If @tr is non-NULL and a trailing
2159  * character exists (size is non-zero after returning from this
2160  * function), @tr is updated with the trailing character.
2161  */
2162 static int proc_get_long(char **buf, size_t *size,
2163 			  unsigned long *val, bool *neg,
2164 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
2165 {
2166 	int len;
2167 	char *p, tmp[TMPBUFLEN];
2168 
2169 	if (!*size)
2170 		return -EINVAL;
2171 
2172 	len = *size;
2173 	if (len > TMPBUFLEN - 1)
2174 		len = TMPBUFLEN - 1;
2175 
2176 	memcpy(tmp, *buf, len);
2177 
2178 	tmp[len] = 0;
2179 	p = tmp;
2180 	if (*p == '-' && *size > 1) {
2181 		*neg = true;
2182 		p++;
2183 	} else
2184 		*neg = false;
2185 	if (!isdigit(*p))
2186 		return -EINVAL;
2187 
2188 	*val = simple_strtoul(p, &p, 0);
2189 
2190 	len = p - tmp;
2191 
2192 	/* We don't know if the next char is whitespace thus we may accept
2193 	 * invalid integers (e.g. 1234...a) or two integers instead of one
2194 	 * (e.g. 123...1). So lets not allow such large numbers. */
2195 	if (len == TMPBUFLEN - 1)
2196 		return -EINVAL;
2197 
2198 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2199 		return -EINVAL;
2200 
2201 	if (tr && (len < *size))
2202 		*tr = *p;
2203 
2204 	*buf += len;
2205 	*size -= len;
2206 
2207 	return 0;
2208 }
2209 
2210 /**
2211  * proc_put_long - converts an integer to a decimal ASCII formatted string
2212  *
2213  * @buf: the user buffer
2214  * @size: the size of the user buffer
2215  * @val: the integer to be converted
2216  * @neg: sign of the number, %TRUE for negative
2217  *
2218  * In case of success %0 is returned and @buf and @size are updated with
2219  * the amount of bytes written.
2220  */
2221 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2222 			  bool neg)
2223 {
2224 	int len;
2225 	char tmp[TMPBUFLEN], *p = tmp;
2226 
2227 	sprintf(p, "%s%lu", neg ? "-" : "", val);
2228 	len = strlen(tmp);
2229 	if (len > *size)
2230 		len = *size;
2231 	if (copy_to_user(*buf, tmp, len))
2232 		return -EFAULT;
2233 	*size -= len;
2234 	*buf += len;
2235 	return 0;
2236 }
2237 #undef TMPBUFLEN
2238 
2239 static int proc_put_char(void __user **buf, size_t *size, char c)
2240 {
2241 	if (*size) {
2242 		char __user **buffer = (char __user **)buf;
2243 		if (put_user(c, *buffer))
2244 			return -EFAULT;
2245 		(*size)--, (*buffer)++;
2246 		*buf = *buffer;
2247 	}
2248 	return 0;
2249 }
2250 
2251 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2252 				 int *valp,
2253 				 int write, void *data)
2254 {
2255 	if (write) {
2256 		*valp = *negp ? -*lvalp : *lvalp;
2257 	} else {
2258 		int val = *valp;
2259 		if (val < 0) {
2260 			*negp = true;
2261 			*lvalp = (unsigned long)-val;
2262 		} else {
2263 			*negp = false;
2264 			*lvalp = (unsigned long)val;
2265 		}
2266 	}
2267 	return 0;
2268 }
2269 
2270 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2271 
2272 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2273 		  int write, void __user *buffer,
2274 		  size_t *lenp, loff_t *ppos,
2275 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2276 			      int write, void *data),
2277 		  void *data)
2278 {
2279 	int *i, vleft, first = 1, err = 0;
2280 	unsigned long page = 0;
2281 	size_t left;
2282 	char *kbuf;
2283 
2284 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2285 		*lenp = 0;
2286 		return 0;
2287 	}
2288 
2289 	i = (int *) tbl_data;
2290 	vleft = table->maxlen / sizeof(*i);
2291 	left = *lenp;
2292 
2293 	if (!conv)
2294 		conv = do_proc_dointvec_conv;
2295 
2296 	if (write) {
2297 		if (left > PAGE_SIZE - 1)
2298 			left = PAGE_SIZE - 1;
2299 		page = __get_free_page(GFP_TEMPORARY);
2300 		kbuf = (char *) page;
2301 		if (!kbuf)
2302 			return -ENOMEM;
2303 		if (copy_from_user(kbuf, buffer, left)) {
2304 			err = -EFAULT;
2305 			goto free;
2306 		}
2307 		kbuf[left] = 0;
2308 	}
2309 
2310 	for (; left && vleft--; i++, first=0) {
2311 		unsigned long lval;
2312 		bool neg;
2313 
2314 		if (write) {
2315 			left -= proc_skip_spaces(&kbuf);
2316 
2317 			if (!left)
2318 				break;
2319 			err = proc_get_long(&kbuf, &left, &lval, &neg,
2320 					     proc_wspace_sep,
2321 					     sizeof(proc_wspace_sep), NULL);
2322 			if (err)
2323 				break;
2324 			if (conv(&neg, &lval, i, 1, data)) {
2325 				err = -EINVAL;
2326 				break;
2327 			}
2328 		} else {
2329 			if (conv(&neg, &lval, i, 0, data)) {
2330 				err = -EINVAL;
2331 				break;
2332 			}
2333 			if (!first)
2334 				err = proc_put_char(&buffer, &left, '\t');
2335 			if (err)
2336 				break;
2337 			err = proc_put_long(&buffer, &left, lval, neg);
2338 			if (err)
2339 				break;
2340 		}
2341 	}
2342 
2343 	if (!write && !first && left && !err)
2344 		err = proc_put_char(&buffer, &left, '\n');
2345 	if (write && !err && left)
2346 		left -= proc_skip_spaces(&kbuf);
2347 free:
2348 	if (write) {
2349 		free_page(page);
2350 		if (first)
2351 			return err ? : -EINVAL;
2352 	}
2353 	*lenp -= left;
2354 	*ppos += *lenp;
2355 	return err;
2356 }
2357 
2358 static int do_proc_dointvec(struct ctl_table *table, int write,
2359 		  void __user *buffer, size_t *lenp, loff_t *ppos,
2360 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2361 			      int write, void *data),
2362 		  void *data)
2363 {
2364 	return __do_proc_dointvec(table->data, table, write,
2365 			buffer, lenp, ppos, conv, data);
2366 }
2367 
2368 /**
2369  * proc_dointvec - read a vector of integers
2370  * @table: the sysctl table
2371  * @write: %TRUE if this is a write to the sysctl file
2372  * @buffer: the user buffer
2373  * @lenp: the size of the user buffer
2374  * @ppos: file position
2375  *
2376  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2377  * values from/to the user buffer, treated as an ASCII string.
2378  *
2379  * Returns 0 on success.
2380  */
2381 int proc_dointvec(struct ctl_table *table, int write,
2382 		     void __user *buffer, size_t *lenp, loff_t *ppos)
2383 {
2384     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2385 		    	    NULL,NULL);
2386 }
2387 
2388 /*
2389  * Taint values can only be increased
2390  * This means we can safely use a temporary.
2391  */
2392 static int proc_taint(struct ctl_table *table, int write,
2393 			       void __user *buffer, size_t *lenp, loff_t *ppos)
2394 {
2395 	struct ctl_table t;
2396 	unsigned long tmptaint = get_taint();
2397 	int err;
2398 
2399 	if (write && !capable(CAP_SYS_ADMIN))
2400 		return -EPERM;
2401 
2402 	t = *table;
2403 	t.data = &tmptaint;
2404 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2405 	if (err < 0)
2406 		return err;
2407 
2408 	if (write) {
2409 		/*
2410 		 * Poor man's atomic or. Not worth adding a primitive
2411 		 * to everyone's atomic.h for this
2412 		 */
2413 		int i;
2414 		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2415 			if ((tmptaint >> i) & 1)
2416 				add_taint(i);
2417 		}
2418 	}
2419 
2420 	return err;
2421 }
2422 
2423 #ifdef CONFIG_PRINTK
2424 static int proc_dmesg_restrict(struct ctl_table *table, int write,
2425 				void __user *buffer, size_t *lenp, loff_t *ppos)
2426 {
2427 	if (write && !capable(CAP_SYS_ADMIN))
2428 		return -EPERM;
2429 
2430 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2431 }
2432 #endif
2433 
2434 struct do_proc_dointvec_minmax_conv_param {
2435 	int *min;
2436 	int *max;
2437 };
2438 
2439 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2440 					int *valp,
2441 					int write, void *data)
2442 {
2443 	struct do_proc_dointvec_minmax_conv_param *param = data;
2444 	if (write) {
2445 		int val = *negp ? -*lvalp : *lvalp;
2446 		if ((param->min && *param->min > val) ||
2447 		    (param->max && *param->max < val))
2448 			return -EINVAL;
2449 		*valp = val;
2450 	} else {
2451 		int val = *valp;
2452 		if (val < 0) {
2453 			*negp = true;
2454 			*lvalp = (unsigned long)-val;
2455 		} else {
2456 			*negp = false;
2457 			*lvalp = (unsigned long)val;
2458 		}
2459 	}
2460 	return 0;
2461 }
2462 
2463 /**
2464  * proc_dointvec_minmax - read a vector of integers with min/max values
2465  * @table: the sysctl table
2466  * @write: %TRUE if this is a write to the sysctl file
2467  * @buffer: the user buffer
2468  * @lenp: the size of the user buffer
2469  * @ppos: file position
2470  *
2471  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2472  * values from/to the user buffer, treated as an ASCII string.
2473  *
2474  * This routine will ensure the values are within the range specified by
2475  * table->extra1 (min) and table->extra2 (max).
2476  *
2477  * Returns 0 on success.
2478  */
2479 int proc_dointvec_minmax(struct ctl_table *table, int write,
2480 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2481 {
2482 	struct do_proc_dointvec_minmax_conv_param param = {
2483 		.min = (int *) table->extra1,
2484 		.max = (int *) table->extra2,
2485 	};
2486 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2487 				do_proc_dointvec_minmax_conv, &param);
2488 }
2489 
2490 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2491 				     void __user *buffer,
2492 				     size_t *lenp, loff_t *ppos,
2493 				     unsigned long convmul,
2494 				     unsigned long convdiv)
2495 {
2496 	unsigned long *i, *min, *max;
2497 	int vleft, first = 1, err = 0;
2498 	unsigned long page = 0;
2499 	size_t left;
2500 	char *kbuf;
2501 
2502 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2503 		*lenp = 0;
2504 		return 0;
2505 	}
2506 
2507 	i = (unsigned long *) data;
2508 	min = (unsigned long *) table->extra1;
2509 	max = (unsigned long *) table->extra2;
2510 	vleft = table->maxlen / sizeof(unsigned long);
2511 	left = *lenp;
2512 
2513 	if (write) {
2514 		if (left > PAGE_SIZE - 1)
2515 			left = PAGE_SIZE - 1;
2516 		page = __get_free_page(GFP_TEMPORARY);
2517 		kbuf = (char *) page;
2518 		if (!kbuf)
2519 			return -ENOMEM;
2520 		if (copy_from_user(kbuf, buffer, left)) {
2521 			err = -EFAULT;
2522 			goto free;
2523 		}
2524 		kbuf[left] = 0;
2525 	}
2526 
2527 	for (; left && vleft--; i++, first = 0) {
2528 		unsigned long val;
2529 
2530 		if (write) {
2531 			bool neg;
2532 
2533 			left -= proc_skip_spaces(&kbuf);
2534 
2535 			err = proc_get_long(&kbuf, &left, &val, &neg,
2536 					     proc_wspace_sep,
2537 					     sizeof(proc_wspace_sep), NULL);
2538 			if (err)
2539 				break;
2540 			if (neg)
2541 				continue;
2542 			if ((min && val < *min) || (max && val > *max))
2543 				continue;
2544 			*i = val;
2545 		} else {
2546 			val = convdiv * (*i) / convmul;
2547 			if (!first)
2548 				err = proc_put_char(&buffer, &left, '\t');
2549 			err = proc_put_long(&buffer, &left, val, false);
2550 			if (err)
2551 				break;
2552 		}
2553 	}
2554 
2555 	if (!write && !first && left && !err)
2556 		err = proc_put_char(&buffer, &left, '\n');
2557 	if (write && !err)
2558 		left -= proc_skip_spaces(&kbuf);
2559 free:
2560 	if (write) {
2561 		free_page(page);
2562 		if (first)
2563 			return err ? : -EINVAL;
2564 	}
2565 	*lenp -= left;
2566 	*ppos += *lenp;
2567 	return err;
2568 }
2569 
2570 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2571 				     void __user *buffer,
2572 				     size_t *lenp, loff_t *ppos,
2573 				     unsigned long convmul,
2574 				     unsigned long convdiv)
2575 {
2576 	return __do_proc_doulongvec_minmax(table->data, table, write,
2577 			buffer, lenp, ppos, convmul, convdiv);
2578 }
2579 
2580 /**
2581  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2582  * @table: the sysctl table
2583  * @write: %TRUE if this is a write to the sysctl file
2584  * @buffer: the user buffer
2585  * @lenp: the size of the user buffer
2586  * @ppos: file position
2587  *
2588  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2589  * values from/to the user buffer, treated as an ASCII string.
2590  *
2591  * This routine will ensure the values are within the range specified by
2592  * table->extra1 (min) and table->extra2 (max).
2593  *
2594  * Returns 0 on success.
2595  */
2596 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2597 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2598 {
2599     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2600 }
2601 
2602 /**
2603  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2604  * @table: the sysctl table
2605  * @write: %TRUE if this is a write to the sysctl file
2606  * @buffer: the user buffer
2607  * @lenp: the size of the user buffer
2608  * @ppos: file position
2609  *
2610  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2611  * values from/to the user buffer, treated as an ASCII string. The values
2612  * are treated as milliseconds, and converted to jiffies when they are stored.
2613  *
2614  * This routine will ensure the values are within the range specified by
2615  * table->extra1 (min) and table->extra2 (max).
2616  *
2617  * Returns 0 on success.
2618  */
2619 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2620 				      void __user *buffer,
2621 				      size_t *lenp, loff_t *ppos)
2622 {
2623     return do_proc_doulongvec_minmax(table, write, buffer,
2624 				     lenp, ppos, HZ, 1000l);
2625 }
2626 
2627 
2628 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2629 					 int *valp,
2630 					 int write, void *data)
2631 {
2632 	if (write) {
2633 		if (*lvalp > LONG_MAX / HZ)
2634 			return 1;
2635 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2636 	} else {
2637 		int val = *valp;
2638 		unsigned long lval;
2639 		if (val < 0) {
2640 			*negp = true;
2641 			lval = (unsigned long)-val;
2642 		} else {
2643 			*negp = false;
2644 			lval = (unsigned long)val;
2645 		}
2646 		*lvalp = lval / HZ;
2647 	}
2648 	return 0;
2649 }
2650 
2651 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2652 						int *valp,
2653 						int write, void *data)
2654 {
2655 	if (write) {
2656 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2657 			return 1;
2658 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2659 	} else {
2660 		int val = *valp;
2661 		unsigned long lval;
2662 		if (val < 0) {
2663 			*negp = true;
2664 			lval = (unsigned long)-val;
2665 		} else {
2666 			*negp = false;
2667 			lval = (unsigned long)val;
2668 		}
2669 		*lvalp = jiffies_to_clock_t(lval);
2670 	}
2671 	return 0;
2672 }
2673 
2674 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2675 					    int *valp,
2676 					    int write, void *data)
2677 {
2678 	if (write) {
2679 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2680 	} else {
2681 		int val = *valp;
2682 		unsigned long lval;
2683 		if (val < 0) {
2684 			*negp = true;
2685 			lval = (unsigned long)-val;
2686 		} else {
2687 			*negp = false;
2688 			lval = (unsigned long)val;
2689 		}
2690 		*lvalp = jiffies_to_msecs(lval);
2691 	}
2692 	return 0;
2693 }
2694 
2695 /**
2696  * proc_dointvec_jiffies - read a vector of integers as seconds
2697  * @table: the sysctl table
2698  * @write: %TRUE if this is a write to the sysctl file
2699  * @buffer: the user buffer
2700  * @lenp: the size of the user buffer
2701  * @ppos: file position
2702  *
2703  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2704  * values from/to the user buffer, treated as an ASCII string.
2705  * The values read are assumed to be in seconds, and are converted into
2706  * jiffies.
2707  *
2708  * Returns 0 on success.
2709  */
2710 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2711 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2712 {
2713     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2714 		    	    do_proc_dointvec_jiffies_conv,NULL);
2715 }
2716 
2717 /**
2718  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2719  * @table: the sysctl table
2720  * @write: %TRUE if this is a write to the sysctl file
2721  * @buffer: the user buffer
2722  * @lenp: the size of the user buffer
2723  * @ppos: pointer to the file position
2724  *
2725  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2726  * values from/to the user buffer, treated as an ASCII string.
2727  * The values read are assumed to be in 1/USER_HZ seconds, and
2728  * are converted into jiffies.
2729  *
2730  * Returns 0 on success.
2731  */
2732 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2733 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2734 {
2735     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2736 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2737 }
2738 
2739 /**
2740  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2741  * @table: the sysctl table
2742  * @write: %TRUE if this is a write to the sysctl file
2743  * @buffer: the user buffer
2744  * @lenp: the size of the user buffer
2745  * @ppos: file position
2746  * @ppos: the current position in the file
2747  *
2748  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2749  * values from/to the user buffer, treated as an ASCII string.
2750  * The values read are assumed to be in 1/1000 seconds, and
2751  * are converted into jiffies.
2752  *
2753  * Returns 0 on success.
2754  */
2755 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2756 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2757 {
2758 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2759 				do_proc_dointvec_ms_jiffies_conv, NULL);
2760 }
2761 
2762 static int proc_do_cad_pid(struct ctl_table *table, int write,
2763 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2764 {
2765 	struct pid *new_pid;
2766 	pid_t tmp;
2767 	int r;
2768 
2769 	tmp = pid_vnr(cad_pid);
2770 
2771 	r = __do_proc_dointvec(&tmp, table, write, buffer,
2772 			       lenp, ppos, NULL, NULL);
2773 	if (r || !write)
2774 		return r;
2775 
2776 	new_pid = find_get_pid(tmp);
2777 	if (!new_pid)
2778 		return -ESRCH;
2779 
2780 	put_pid(xchg(&cad_pid, new_pid));
2781 	return 0;
2782 }
2783 
2784 /**
2785  * proc_do_large_bitmap - read/write from/to a large bitmap
2786  * @table: the sysctl table
2787  * @write: %TRUE if this is a write to the sysctl file
2788  * @buffer: the user buffer
2789  * @lenp: the size of the user buffer
2790  * @ppos: file position
2791  *
2792  * The bitmap is stored at table->data and the bitmap length (in bits)
2793  * in table->maxlen.
2794  *
2795  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2796  * large bitmaps may be represented in a compact manner. Writing into
2797  * the file will clear the bitmap then update it with the given input.
2798  *
2799  * Returns 0 on success.
2800  */
2801 int proc_do_large_bitmap(struct ctl_table *table, int write,
2802 			 void __user *buffer, size_t *lenp, loff_t *ppos)
2803 {
2804 	int err = 0;
2805 	bool first = 1;
2806 	size_t left = *lenp;
2807 	unsigned long bitmap_len = table->maxlen;
2808 	unsigned long *bitmap = (unsigned long *) table->data;
2809 	unsigned long *tmp_bitmap = NULL;
2810 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2811 
2812 	if (!bitmap_len || !left || (*ppos && !write)) {
2813 		*lenp = 0;
2814 		return 0;
2815 	}
2816 
2817 	if (write) {
2818 		unsigned long page = 0;
2819 		char *kbuf;
2820 
2821 		if (left > PAGE_SIZE - 1)
2822 			left = PAGE_SIZE - 1;
2823 
2824 		page = __get_free_page(GFP_TEMPORARY);
2825 		kbuf = (char *) page;
2826 		if (!kbuf)
2827 			return -ENOMEM;
2828 		if (copy_from_user(kbuf, buffer, left)) {
2829 			free_page(page);
2830 			return -EFAULT;
2831                 }
2832 		kbuf[left] = 0;
2833 
2834 		tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2835 				     GFP_KERNEL);
2836 		if (!tmp_bitmap) {
2837 			free_page(page);
2838 			return -ENOMEM;
2839 		}
2840 		proc_skip_char(&kbuf, &left, '\n');
2841 		while (!err && left) {
2842 			unsigned long val_a, val_b;
2843 			bool neg;
2844 
2845 			err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2846 					     sizeof(tr_a), &c);
2847 			if (err)
2848 				break;
2849 			if (val_a >= bitmap_len || neg) {
2850 				err = -EINVAL;
2851 				break;
2852 			}
2853 
2854 			val_b = val_a;
2855 			if (left) {
2856 				kbuf++;
2857 				left--;
2858 			}
2859 
2860 			if (c == '-') {
2861 				err = proc_get_long(&kbuf, &left, &val_b,
2862 						     &neg, tr_b, sizeof(tr_b),
2863 						     &c);
2864 				if (err)
2865 					break;
2866 				if (val_b >= bitmap_len || neg ||
2867 				    val_a > val_b) {
2868 					err = -EINVAL;
2869 					break;
2870 				}
2871 				if (left) {
2872 					kbuf++;
2873 					left--;
2874 				}
2875 			}
2876 
2877 			while (val_a <= val_b)
2878 				set_bit(val_a++, tmp_bitmap);
2879 
2880 			first = 0;
2881 			proc_skip_char(&kbuf, &left, '\n');
2882 		}
2883 		free_page(page);
2884 	} else {
2885 		unsigned long bit_a, bit_b = 0;
2886 
2887 		while (left) {
2888 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2889 			if (bit_a >= bitmap_len)
2890 				break;
2891 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
2892 						   bit_a + 1) - 1;
2893 
2894 			if (!first) {
2895 				err = proc_put_char(&buffer, &left, ',');
2896 				if (err)
2897 					break;
2898 			}
2899 			err = proc_put_long(&buffer, &left, bit_a, false);
2900 			if (err)
2901 				break;
2902 			if (bit_a != bit_b) {
2903 				err = proc_put_char(&buffer, &left, '-');
2904 				if (err)
2905 					break;
2906 				err = proc_put_long(&buffer, &left, bit_b, false);
2907 				if (err)
2908 					break;
2909 			}
2910 
2911 			first = 0; bit_b++;
2912 		}
2913 		if (!err)
2914 			err = proc_put_char(&buffer, &left, '\n');
2915 	}
2916 
2917 	if (!err) {
2918 		if (write) {
2919 			if (*ppos)
2920 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2921 			else
2922 				memcpy(bitmap, tmp_bitmap,
2923 					BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
2924 		}
2925 		kfree(tmp_bitmap);
2926 		*lenp -= left;
2927 		*ppos += *lenp;
2928 		return 0;
2929 	} else {
2930 		kfree(tmp_bitmap);
2931 		return err;
2932 	}
2933 }
2934 
2935 #else /* CONFIG_PROC_SYSCTL */
2936 
2937 int proc_dostring(struct ctl_table *table, int write,
2938 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2939 {
2940 	return -ENOSYS;
2941 }
2942 
2943 int proc_dointvec(struct ctl_table *table, int write,
2944 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2945 {
2946 	return -ENOSYS;
2947 }
2948 
2949 int proc_dointvec_minmax(struct ctl_table *table, int write,
2950 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2951 {
2952 	return -ENOSYS;
2953 }
2954 
2955 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2956 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2957 {
2958 	return -ENOSYS;
2959 }
2960 
2961 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2962 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2963 {
2964 	return -ENOSYS;
2965 }
2966 
2967 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2968 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2969 {
2970 	return -ENOSYS;
2971 }
2972 
2973 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2974 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2975 {
2976 	return -ENOSYS;
2977 }
2978 
2979 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2980 				      void __user *buffer,
2981 				      size_t *lenp, loff_t *ppos)
2982 {
2983     return -ENOSYS;
2984 }
2985 
2986 
2987 #endif /* CONFIG_PROC_SYSCTL */
2988 
2989 /*
2990  * No sense putting this after each symbol definition, twice,
2991  * exception granted :-)
2992  */
2993 EXPORT_SYMBOL(proc_dointvec);
2994 EXPORT_SYMBOL(proc_dointvec_jiffies);
2995 EXPORT_SYMBOL(proc_dointvec_minmax);
2996 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2997 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2998 EXPORT_SYMBOL(proc_dostring);
2999 EXPORT_SYMBOL(proc_doulongvec_minmax);
3000 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3001 EXPORT_SYMBOL(register_sysctl_table);
3002 EXPORT_SYMBOL(register_sysctl_paths);
3003 EXPORT_SYMBOL(unregister_sysctl_table);
3004