xref: /linux/kernel/sysctl.c (revision a5c4300389bb33ade2515c082709217f0614cf15)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/signal.h>
27 #include <linux/proc_fs.h>
28 #include <linux/security.h>
29 #include <linux/ctype.h>
30 #include <linux/kmemcheck.h>
31 #include <linux/fs.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/ratelimit.h>
40 #include <linux/hugetlb.h>
41 #include <linux/initrd.h>
42 #include <linux/key.h>
43 #include <linux/times.h>
44 #include <linux/limits.h>
45 #include <linux/dcache.h>
46 #include <linux/syscalls.h>
47 #include <linux/vmstat.h>
48 #include <linux/nfs_fs.h>
49 #include <linux/acpi.h>
50 #include <linux/reboot.h>
51 #include <linux/ftrace.h>
52 #include <linux/slow-work.h>
53 #include <linux/perf_event.h>
54 #include <linux/kprobes.h>
55 #include <linux/pipe_fs_i.h>
56 
57 #include <asm/uaccess.h>
58 #include <asm/processor.h>
59 
60 #ifdef CONFIG_X86
61 #include <asm/nmi.h>
62 #include <asm/stacktrace.h>
63 #include <asm/io.h>
64 #endif
65 #ifdef CONFIG_BSD_PROCESS_ACCT
66 #include <linux/acct.h>
67 #endif
68 #ifdef CONFIG_RT_MUTEXES
69 #include <linux/rtmutex.h>
70 #endif
71 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
72 #include <linux/lockdep.h>
73 #endif
74 #ifdef CONFIG_CHR_DEV_SG
75 #include <scsi/sg.h>
76 #endif
77 
78 
79 #if defined(CONFIG_SYSCTL)
80 
81 /* External variables not in a header file. */
82 extern int sysctl_overcommit_memory;
83 extern int sysctl_overcommit_ratio;
84 extern int sysctl_panic_on_oom;
85 extern int sysctl_oom_kill_allocating_task;
86 extern int sysctl_oom_dump_tasks;
87 extern int max_threads;
88 extern int core_uses_pid;
89 extern int suid_dumpable;
90 extern char core_pattern[];
91 extern unsigned int core_pipe_limit;
92 extern int pid_max;
93 extern int min_free_kbytes;
94 extern int pid_max_min, pid_max_max;
95 extern int sysctl_drop_caches;
96 extern int percpu_pagelist_fraction;
97 extern int compat_log;
98 extern int latencytop_enabled;
99 extern int sysctl_nr_open_min, sysctl_nr_open_max;
100 #ifndef CONFIG_MMU
101 extern int sysctl_nr_trim_pages;
102 #endif
103 #ifdef CONFIG_BLOCK
104 extern int blk_iopoll_enabled;
105 #endif
106 
107 /* Constants used for minimum and  maximum */
108 #ifdef CONFIG_DETECT_SOFTLOCKUP
109 static int sixty = 60;
110 static int neg_one = -1;
111 #endif
112 
113 static int zero;
114 static int __maybe_unused one = 1;
115 static int __maybe_unused two = 2;
116 static unsigned long one_ul = 1;
117 static int one_hundred = 100;
118 #ifdef CONFIG_PRINTK
119 static int ten_thousand = 10000;
120 #endif
121 
122 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
123 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
124 
125 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
126 static int maxolduid = 65535;
127 static int minolduid;
128 static int min_percpu_pagelist_fract = 8;
129 
130 static int ngroups_max = NGROUPS_MAX;
131 
132 #ifdef CONFIG_SPARC
133 #include <asm/system.h>
134 #endif
135 
136 #ifdef CONFIG_SPARC64
137 extern int sysctl_tsb_ratio;
138 #endif
139 
140 #ifdef __hppa__
141 extern int pwrsw_enabled;
142 extern int unaligned_enabled;
143 #endif
144 
145 #ifdef CONFIG_S390
146 #ifdef CONFIG_MATHEMU
147 extern int sysctl_ieee_emulation_warnings;
148 #endif
149 extern int sysctl_userprocess_debug;
150 extern int spin_retry;
151 #endif
152 
153 #ifdef CONFIG_IA64
154 extern int no_unaligned_warning;
155 extern int unaligned_dump_stack;
156 #endif
157 
158 extern struct ratelimit_state printk_ratelimit_state;
159 
160 #ifdef CONFIG_PROC_SYSCTL
161 static int proc_do_cad_pid(struct ctl_table *table, int write,
162 		  void __user *buffer, size_t *lenp, loff_t *ppos);
163 static int proc_taint(struct ctl_table *table, int write,
164 			       void __user *buffer, size_t *lenp, loff_t *ppos);
165 #endif
166 
167 #ifdef CONFIG_MAGIC_SYSRQ
168 static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */
169 
170 static int sysrq_sysctl_handler(ctl_table *table, int write,
171 				void __user *buffer, size_t *lenp,
172 				loff_t *ppos)
173 {
174 	int error;
175 
176 	error = proc_dointvec(table, write, buffer, lenp, ppos);
177 	if (error)
178 		return error;
179 
180 	if (write)
181 		sysrq_toggle_support(__sysrq_enabled);
182 
183 	return 0;
184 }
185 
186 #endif
187 
188 static struct ctl_table root_table[];
189 static struct ctl_table_root sysctl_table_root;
190 static struct ctl_table_header root_table_header = {
191 	.count = 1,
192 	.ctl_table = root_table,
193 	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
194 	.root = &sysctl_table_root,
195 	.set = &sysctl_table_root.default_set,
196 };
197 static struct ctl_table_root sysctl_table_root = {
198 	.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
199 	.default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
200 };
201 
202 static struct ctl_table kern_table[];
203 static struct ctl_table vm_table[];
204 static struct ctl_table fs_table[];
205 static struct ctl_table debug_table[];
206 static struct ctl_table dev_table[];
207 extern struct ctl_table random_table[];
208 #ifdef CONFIG_INOTIFY_USER
209 extern struct ctl_table inotify_table[];
210 #endif
211 #ifdef CONFIG_EPOLL
212 extern struct ctl_table epoll_table[];
213 #endif
214 
215 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
216 int sysctl_legacy_va_layout;
217 #endif
218 
219 /* The default sysctl tables: */
220 
221 static struct ctl_table root_table[] = {
222 	{
223 		.procname	= "kernel",
224 		.mode		= 0555,
225 		.child		= kern_table,
226 	},
227 	{
228 		.procname	= "vm",
229 		.mode		= 0555,
230 		.child		= vm_table,
231 	},
232 	{
233 		.procname	= "fs",
234 		.mode		= 0555,
235 		.child		= fs_table,
236 	},
237 	{
238 		.procname	= "debug",
239 		.mode		= 0555,
240 		.child		= debug_table,
241 	},
242 	{
243 		.procname	= "dev",
244 		.mode		= 0555,
245 		.child		= dev_table,
246 	},
247 /*
248  * NOTE: do not add new entries to this table unless you have read
249  * Documentation/sysctl/ctl_unnumbered.txt
250  */
251 	{ }
252 };
253 
254 #ifdef CONFIG_SCHED_DEBUG
255 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
256 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
257 static int min_wakeup_granularity_ns;			/* 0 usecs */
258 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
259 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
260 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
261 static int min_sched_shares_ratelimit = 100000; /* 100 usec */
262 static int max_sched_shares_ratelimit = NSEC_PER_SEC; /* 1 second */
263 #endif
264 
265 static struct ctl_table kern_table[] = {
266 	{
267 		.procname	= "sched_child_runs_first",
268 		.data		= &sysctl_sched_child_runs_first,
269 		.maxlen		= sizeof(unsigned int),
270 		.mode		= 0644,
271 		.proc_handler	= proc_dointvec,
272 	},
273 #ifdef CONFIG_SCHED_DEBUG
274 	{
275 		.procname	= "sched_min_granularity_ns",
276 		.data		= &sysctl_sched_min_granularity,
277 		.maxlen		= sizeof(unsigned int),
278 		.mode		= 0644,
279 		.proc_handler	= sched_proc_update_handler,
280 		.extra1		= &min_sched_granularity_ns,
281 		.extra2		= &max_sched_granularity_ns,
282 	},
283 	{
284 		.procname	= "sched_latency_ns",
285 		.data		= &sysctl_sched_latency,
286 		.maxlen		= sizeof(unsigned int),
287 		.mode		= 0644,
288 		.proc_handler	= sched_proc_update_handler,
289 		.extra1		= &min_sched_granularity_ns,
290 		.extra2		= &max_sched_granularity_ns,
291 	},
292 	{
293 		.procname	= "sched_wakeup_granularity_ns",
294 		.data		= &sysctl_sched_wakeup_granularity,
295 		.maxlen		= sizeof(unsigned int),
296 		.mode		= 0644,
297 		.proc_handler	= sched_proc_update_handler,
298 		.extra1		= &min_wakeup_granularity_ns,
299 		.extra2		= &max_wakeup_granularity_ns,
300 	},
301 	{
302 		.procname	= "sched_shares_ratelimit",
303 		.data		= &sysctl_sched_shares_ratelimit,
304 		.maxlen		= sizeof(unsigned int),
305 		.mode		= 0644,
306 		.proc_handler	= sched_proc_update_handler,
307 		.extra1		= &min_sched_shares_ratelimit,
308 		.extra2		= &max_sched_shares_ratelimit,
309 	},
310 	{
311 		.procname	= "sched_tunable_scaling",
312 		.data		= &sysctl_sched_tunable_scaling,
313 		.maxlen		= sizeof(enum sched_tunable_scaling),
314 		.mode		= 0644,
315 		.proc_handler	= sched_proc_update_handler,
316 		.extra1		= &min_sched_tunable_scaling,
317 		.extra2		= &max_sched_tunable_scaling,
318 	},
319 	{
320 		.procname	= "sched_shares_thresh",
321 		.data		= &sysctl_sched_shares_thresh,
322 		.maxlen		= sizeof(unsigned int),
323 		.mode		= 0644,
324 		.proc_handler	= proc_dointvec_minmax,
325 		.extra1		= &zero,
326 	},
327 	{
328 		.procname	= "sched_migration_cost",
329 		.data		= &sysctl_sched_migration_cost,
330 		.maxlen		= sizeof(unsigned int),
331 		.mode		= 0644,
332 		.proc_handler	= proc_dointvec,
333 	},
334 	{
335 		.procname	= "sched_nr_migrate",
336 		.data		= &sysctl_sched_nr_migrate,
337 		.maxlen		= sizeof(unsigned int),
338 		.mode		= 0644,
339 		.proc_handler	= proc_dointvec,
340 	},
341 	{
342 		.procname	= "sched_time_avg",
343 		.data		= &sysctl_sched_time_avg,
344 		.maxlen		= sizeof(unsigned int),
345 		.mode		= 0644,
346 		.proc_handler	= proc_dointvec,
347 	},
348 	{
349 		.procname	= "timer_migration",
350 		.data		= &sysctl_timer_migration,
351 		.maxlen		= sizeof(unsigned int),
352 		.mode		= 0644,
353 		.proc_handler	= proc_dointvec_minmax,
354 		.extra1		= &zero,
355 		.extra2		= &one,
356 	},
357 #endif
358 	{
359 		.procname	= "sched_rt_period_us",
360 		.data		= &sysctl_sched_rt_period,
361 		.maxlen		= sizeof(unsigned int),
362 		.mode		= 0644,
363 		.proc_handler	= sched_rt_handler,
364 	},
365 	{
366 		.procname	= "sched_rt_runtime_us",
367 		.data		= &sysctl_sched_rt_runtime,
368 		.maxlen		= sizeof(int),
369 		.mode		= 0644,
370 		.proc_handler	= sched_rt_handler,
371 	},
372 	{
373 		.procname	= "sched_compat_yield",
374 		.data		= &sysctl_sched_compat_yield,
375 		.maxlen		= sizeof(unsigned int),
376 		.mode		= 0644,
377 		.proc_handler	= proc_dointvec,
378 	},
379 #ifdef CONFIG_PROVE_LOCKING
380 	{
381 		.procname	= "prove_locking",
382 		.data		= &prove_locking,
383 		.maxlen		= sizeof(int),
384 		.mode		= 0644,
385 		.proc_handler	= proc_dointvec,
386 	},
387 #endif
388 #ifdef CONFIG_LOCK_STAT
389 	{
390 		.procname	= "lock_stat",
391 		.data		= &lock_stat,
392 		.maxlen		= sizeof(int),
393 		.mode		= 0644,
394 		.proc_handler	= proc_dointvec,
395 	},
396 #endif
397 	{
398 		.procname	= "panic",
399 		.data		= &panic_timeout,
400 		.maxlen		= sizeof(int),
401 		.mode		= 0644,
402 		.proc_handler	= proc_dointvec,
403 	},
404 	{
405 		.procname	= "core_uses_pid",
406 		.data		= &core_uses_pid,
407 		.maxlen		= sizeof(int),
408 		.mode		= 0644,
409 		.proc_handler	= proc_dointvec,
410 	},
411 	{
412 		.procname	= "core_pattern",
413 		.data		= core_pattern,
414 		.maxlen		= CORENAME_MAX_SIZE,
415 		.mode		= 0644,
416 		.proc_handler	= proc_dostring,
417 	},
418 	{
419 		.procname	= "core_pipe_limit",
420 		.data		= &core_pipe_limit,
421 		.maxlen		= sizeof(unsigned int),
422 		.mode		= 0644,
423 		.proc_handler	= proc_dointvec,
424 	},
425 #ifdef CONFIG_PROC_SYSCTL
426 	{
427 		.procname	= "tainted",
428 		.maxlen 	= sizeof(long),
429 		.mode		= 0644,
430 		.proc_handler	= proc_taint,
431 	},
432 #endif
433 #ifdef CONFIG_LATENCYTOP
434 	{
435 		.procname	= "latencytop",
436 		.data		= &latencytop_enabled,
437 		.maxlen		= sizeof(int),
438 		.mode		= 0644,
439 		.proc_handler	= proc_dointvec,
440 	},
441 #endif
442 #ifdef CONFIG_BLK_DEV_INITRD
443 	{
444 		.procname	= "real-root-dev",
445 		.data		= &real_root_dev,
446 		.maxlen		= sizeof(int),
447 		.mode		= 0644,
448 		.proc_handler	= proc_dointvec,
449 	},
450 #endif
451 	{
452 		.procname	= "print-fatal-signals",
453 		.data		= &print_fatal_signals,
454 		.maxlen		= sizeof(int),
455 		.mode		= 0644,
456 		.proc_handler	= proc_dointvec,
457 	},
458 #ifdef CONFIG_SPARC
459 	{
460 		.procname	= "reboot-cmd",
461 		.data		= reboot_command,
462 		.maxlen		= 256,
463 		.mode		= 0644,
464 		.proc_handler	= proc_dostring,
465 	},
466 	{
467 		.procname	= "stop-a",
468 		.data		= &stop_a_enabled,
469 		.maxlen		= sizeof (int),
470 		.mode		= 0644,
471 		.proc_handler	= proc_dointvec,
472 	},
473 	{
474 		.procname	= "scons-poweroff",
475 		.data		= &scons_pwroff,
476 		.maxlen		= sizeof (int),
477 		.mode		= 0644,
478 		.proc_handler	= proc_dointvec,
479 	},
480 #endif
481 #ifdef CONFIG_SPARC64
482 	{
483 		.procname	= "tsb-ratio",
484 		.data		= &sysctl_tsb_ratio,
485 		.maxlen		= sizeof (int),
486 		.mode		= 0644,
487 		.proc_handler	= proc_dointvec,
488 	},
489 #endif
490 #ifdef __hppa__
491 	{
492 		.procname	= "soft-power",
493 		.data		= &pwrsw_enabled,
494 		.maxlen		= sizeof (int),
495 	 	.mode		= 0644,
496 		.proc_handler	= proc_dointvec,
497 	},
498 	{
499 		.procname	= "unaligned-trap",
500 		.data		= &unaligned_enabled,
501 		.maxlen		= sizeof (int),
502 		.mode		= 0644,
503 		.proc_handler	= proc_dointvec,
504 	},
505 #endif
506 	{
507 		.procname	= "ctrl-alt-del",
508 		.data		= &C_A_D,
509 		.maxlen		= sizeof(int),
510 		.mode		= 0644,
511 		.proc_handler	= proc_dointvec,
512 	},
513 #ifdef CONFIG_FUNCTION_TRACER
514 	{
515 		.procname	= "ftrace_enabled",
516 		.data		= &ftrace_enabled,
517 		.maxlen		= sizeof(int),
518 		.mode		= 0644,
519 		.proc_handler	= ftrace_enable_sysctl,
520 	},
521 #endif
522 #ifdef CONFIG_STACK_TRACER
523 	{
524 		.procname	= "stack_tracer_enabled",
525 		.data		= &stack_tracer_enabled,
526 		.maxlen		= sizeof(int),
527 		.mode		= 0644,
528 		.proc_handler	= stack_trace_sysctl,
529 	},
530 #endif
531 #ifdef CONFIG_TRACING
532 	{
533 		.procname	= "ftrace_dump_on_oops",
534 		.data		= &ftrace_dump_on_oops,
535 		.maxlen		= sizeof(int),
536 		.mode		= 0644,
537 		.proc_handler	= proc_dointvec,
538 	},
539 #endif
540 #ifdef CONFIG_MODULES
541 	{
542 		.procname	= "modprobe",
543 		.data		= &modprobe_path,
544 		.maxlen		= KMOD_PATH_LEN,
545 		.mode		= 0644,
546 		.proc_handler	= proc_dostring,
547 	},
548 	{
549 		.procname	= "modules_disabled",
550 		.data		= &modules_disabled,
551 		.maxlen		= sizeof(int),
552 		.mode		= 0644,
553 		/* only handle a transition from default "0" to "1" */
554 		.proc_handler	= proc_dointvec_minmax,
555 		.extra1		= &one,
556 		.extra2		= &one,
557 	},
558 #endif
559 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
560 	{
561 		.procname	= "hotplug",
562 		.data		= &uevent_helper,
563 		.maxlen		= UEVENT_HELPER_PATH_LEN,
564 		.mode		= 0644,
565 		.proc_handler	= proc_dostring,
566 	},
567 #endif
568 #ifdef CONFIG_CHR_DEV_SG
569 	{
570 		.procname	= "sg-big-buff",
571 		.data		= &sg_big_buff,
572 		.maxlen		= sizeof (int),
573 		.mode		= 0444,
574 		.proc_handler	= proc_dointvec,
575 	},
576 #endif
577 #ifdef CONFIG_BSD_PROCESS_ACCT
578 	{
579 		.procname	= "acct",
580 		.data		= &acct_parm,
581 		.maxlen		= 3*sizeof(int),
582 		.mode		= 0644,
583 		.proc_handler	= proc_dointvec,
584 	},
585 #endif
586 #ifdef CONFIG_MAGIC_SYSRQ
587 	{
588 		.procname	= "sysrq",
589 		.data		= &__sysrq_enabled,
590 		.maxlen		= sizeof (int),
591 		.mode		= 0644,
592 		.proc_handler	= sysrq_sysctl_handler,
593 	},
594 #endif
595 #ifdef CONFIG_PROC_SYSCTL
596 	{
597 		.procname	= "cad_pid",
598 		.data		= NULL,
599 		.maxlen		= sizeof (int),
600 		.mode		= 0600,
601 		.proc_handler	= proc_do_cad_pid,
602 	},
603 #endif
604 	{
605 		.procname	= "threads-max",
606 		.data		= &max_threads,
607 		.maxlen		= sizeof(int),
608 		.mode		= 0644,
609 		.proc_handler	= proc_dointvec,
610 	},
611 	{
612 		.procname	= "random",
613 		.mode		= 0555,
614 		.child		= random_table,
615 	},
616 	{
617 		.procname	= "overflowuid",
618 		.data		= &overflowuid,
619 		.maxlen		= sizeof(int),
620 		.mode		= 0644,
621 		.proc_handler	= proc_dointvec_minmax,
622 		.extra1		= &minolduid,
623 		.extra2		= &maxolduid,
624 	},
625 	{
626 		.procname	= "overflowgid",
627 		.data		= &overflowgid,
628 		.maxlen		= sizeof(int),
629 		.mode		= 0644,
630 		.proc_handler	= proc_dointvec_minmax,
631 		.extra1		= &minolduid,
632 		.extra2		= &maxolduid,
633 	},
634 #ifdef CONFIG_S390
635 #ifdef CONFIG_MATHEMU
636 	{
637 		.procname	= "ieee_emulation_warnings",
638 		.data		= &sysctl_ieee_emulation_warnings,
639 		.maxlen		= sizeof(int),
640 		.mode		= 0644,
641 		.proc_handler	= proc_dointvec,
642 	},
643 #endif
644 	{
645 		.procname	= "userprocess_debug",
646 		.data		= &show_unhandled_signals,
647 		.maxlen		= sizeof(int),
648 		.mode		= 0644,
649 		.proc_handler	= proc_dointvec,
650 	},
651 #endif
652 	{
653 		.procname	= "pid_max",
654 		.data		= &pid_max,
655 		.maxlen		= sizeof (int),
656 		.mode		= 0644,
657 		.proc_handler	= proc_dointvec_minmax,
658 		.extra1		= &pid_max_min,
659 		.extra2		= &pid_max_max,
660 	},
661 	{
662 		.procname	= "panic_on_oops",
663 		.data		= &panic_on_oops,
664 		.maxlen		= sizeof(int),
665 		.mode		= 0644,
666 		.proc_handler	= proc_dointvec,
667 	},
668 #if defined CONFIG_PRINTK
669 	{
670 		.procname	= "printk",
671 		.data		= &console_loglevel,
672 		.maxlen		= 4*sizeof(int),
673 		.mode		= 0644,
674 		.proc_handler	= proc_dointvec,
675 	},
676 	{
677 		.procname	= "printk_ratelimit",
678 		.data		= &printk_ratelimit_state.interval,
679 		.maxlen		= sizeof(int),
680 		.mode		= 0644,
681 		.proc_handler	= proc_dointvec_jiffies,
682 	},
683 	{
684 		.procname	= "printk_ratelimit_burst",
685 		.data		= &printk_ratelimit_state.burst,
686 		.maxlen		= sizeof(int),
687 		.mode		= 0644,
688 		.proc_handler	= proc_dointvec,
689 	},
690 	{
691 		.procname	= "printk_delay",
692 		.data		= &printk_delay_msec,
693 		.maxlen		= sizeof(int),
694 		.mode		= 0644,
695 		.proc_handler	= proc_dointvec_minmax,
696 		.extra1		= &zero,
697 		.extra2		= &ten_thousand,
698 	},
699 #endif
700 	{
701 		.procname	= "ngroups_max",
702 		.data		= &ngroups_max,
703 		.maxlen		= sizeof (int),
704 		.mode		= 0444,
705 		.proc_handler	= proc_dointvec,
706 	},
707 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
708 	{
709 		.procname       = "unknown_nmi_panic",
710 		.data           = &unknown_nmi_panic,
711 		.maxlen         = sizeof (int),
712 		.mode           = 0644,
713 		.proc_handler   = proc_dointvec,
714 	},
715 	{
716 		.procname       = "nmi_watchdog",
717 		.data           = &nmi_watchdog_enabled,
718 		.maxlen         = sizeof (int),
719 		.mode           = 0644,
720 		.proc_handler   = proc_nmi_enabled,
721 	},
722 #endif
723 #if defined(CONFIG_X86)
724 	{
725 		.procname	= "panic_on_unrecovered_nmi",
726 		.data		= &panic_on_unrecovered_nmi,
727 		.maxlen		= sizeof(int),
728 		.mode		= 0644,
729 		.proc_handler	= proc_dointvec,
730 	},
731 	{
732 		.procname	= "panic_on_io_nmi",
733 		.data		= &panic_on_io_nmi,
734 		.maxlen		= sizeof(int),
735 		.mode		= 0644,
736 		.proc_handler	= proc_dointvec,
737 	},
738 	{
739 		.procname	= "bootloader_type",
740 		.data		= &bootloader_type,
741 		.maxlen		= sizeof (int),
742 		.mode		= 0444,
743 		.proc_handler	= proc_dointvec,
744 	},
745 	{
746 		.procname	= "bootloader_version",
747 		.data		= &bootloader_version,
748 		.maxlen		= sizeof (int),
749 		.mode		= 0444,
750 		.proc_handler	= proc_dointvec,
751 	},
752 	{
753 		.procname	= "kstack_depth_to_print",
754 		.data		= &kstack_depth_to_print,
755 		.maxlen		= sizeof(int),
756 		.mode		= 0644,
757 		.proc_handler	= proc_dointvec,
758 	},
759 	{
760 		.procname	= "io_delay_type",
761 		.data		= &io_delay_type,
762 		.maxlen		= sizeof(int),
763 		.mode		= 0644,
764 		.proc_handler	= proc_dointvec,
765 	},
766 #endif
767 #if defined(CONFIG_MMU)
768 	{
769 		.procname	= "randomize_va_space",
770 		.data		= &randomize_va_space,
771 		.maxlen		= sizeof(int),
772 		.mode		= 0644,
773 		.proc_handler	= proc_dointvec,
774 	},
775 #endif
776 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
777 	{
778 		.procname	= "spin_retry",
779 		.data		= &spin_retry,
780 		.maxlen		= sizeof (int),
781 		.mode		= 0644,
782 		.proc_handler	= proc_dointvec,
783 	},
784 #endif
785 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
786 	{
787 		.procname	= "acpi_video_flags",
788 		.data		= &acpi_realmode_flags,
789 		.maxlen		= sizeof (unsigned long),
790 		.mode		= 0644,
791 		.proc_handler	= proc_doulongvec_minmax,
792 	},
793 #endif
794 #ifdef CONFIG_IA64
795 	{
796 		.procname	= "ignore-unaligned-usertrap",
797 		.data		= &no_unaligned_warning,
798 		.maxlen		= sizeof (int),
799 	 	.mode		= 0644,
800 		.proc_handler	= proc_dointvec,
801 	},
802 	{
803 		.procname	= "unaligned-dump-stack",
804 		.data		= &unaligned_dump_stack,
805 		.maxlen		= sizeof (int),
806 		.mode		= 0644,
807 		.proc_handler	= proc_dointvec,
808 	},
809 #endif
810 #ifdef CONFIG_DETECT_SOFTLOCKUP
811 	{
812 		.procname	= "softlockup_panic",
813 		.data		= &softlockup_panic,
814 		.maxlen		= sizeof(int),
815 		.mode		= 0644,
816 		.proc_handler	= proc_dointvec_minmax,
817 		.extra1		= &zero,
818 		.extra2		= &one,
819 	},
820 	{
821 		.procname	= "softlockup_thresh",
822 		.data		= &softlockup_thresh,
823 		.maxlen		= sizeof(int),
824 		.mode		= 0644,
825 		.proc_handler	= proc_dosoftlockup_thresh,
826 		.extra1		= &neg_one,
827 		.extra2		= &sixty,
828 	},
829 #endif
830 #ifdef CONFIG_DETECT_HUNG_TASK
831 	{
832 		.procname	= "hung_task_panic",
833 		.data		= &sysctl_hung_task_panic,
834 		.maxlen		= sizeof(int),
835 		.mode		= 0644,
836 		.proc_handler	= proc_dointvec_minmax,
837 		.extra1		= &zero,
838 		.extra2		= &one,
839 	},
840 	{
841 		.procname	= "hung_task_check_count",
842 		.data		= &sysctl_hung_task_check_count,
843 		.maxlen		= sizeof(unsigned long),
844 		.mode		= 0644,
845 		.proc_handler	= proc_doulongvec_minmax,
846 	},
847 	{
848 		.procname	= "hung_task_timeout_secs",
849 		.data		= &sysctl_hung_task_timeout_secs,
850 		.maxlen		= sizeof(unsigned long),
851 		.mode		= 0644,
852 		.proc_handler	= proc_dohung_task_timeout_secs,
853 	},
854 	{
855 		.procname	= "hung_task_warnings",
856 		.data		= &sysctl_hung_task_warnings,
857 		.maxlen		= sizeof(unsigned long),
858 		.mode		= 0644,
859 		.proc_handler	= proc_doulongvec_minmax,
860 	},
861 #endif
862 #ifdef CONFIG_COMPAT
863 	{
864 		.procname	= "compat-log",
865 		.data		= &compat_log,
866 		.maxlen		= sizeof (int),
867 	 	.mode		= 0644,
868 		.proc_handler	= proc_dointvec,
869 	},
870 #endif
871 #ifdef CONFIG_RT_MUTEXES
872 	{
873 		.procname	= "max_lock_depth",
874 		.data		= &max_lock_depth,
875 		.maxlen		= sizeof(int),
876 		.mode		= 0644,
877 		.proc_handler	= proc_dointvec,
878 	},
879 #endif
880 	{
881 		.procname	= "poweroff_cmd",
882 		.data		= &poweroff_cmd,
883 		.maxlen		= POWEROFF_CMD_PATH_LEN,
884 		.mode		= 0644,
885 		.proc_handler	= proc_dostring,
886 	},
887 #ifdef CONFIG_KEYS
888 	{
889 		.procname	= "keys",
890 		.mode		= 0555,
891 		.child		= key_sysctls,
892 	},
893 #endif
894 #ifdef CONFIG_RCU_TORTURE_TEST
895 	{
896 		.procname       = "rcutorture_runnable",
897 		.data           = &rcutorture_runnable,
898 		.maxlen         = sizeof(int),
899 		.mode           = 0644,
900 		.proc_handler	= proc_dointvec,
901 	},
902 #endif
903 #ifdef CONFIG_SLOW_WORK
904 	{
905 		.procname	= "slow-work",
906 		.mode		= 0555,
907 		.child		= slow_work_sysctls,
908 	},
909 #endif
910 #ifdef CONFIG_PERF_EVENTS
911 	{
912 		.procname	= "perf_event_paranoid",
913 		.data		= &sysctl_perf_event_paranoid,
914 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
915 		.mode		= 0644,
916 		.proc_handler	= proc_dointvec,
917 	},
918 	{
919 		.procname	= "perf_event_mlock_kb",
920 		.data		= &sysctl_perf_event_mlock,
921 		.maxlen		= sizeof(sysctl_perf_event_mlock),
922 		.mode		= 0644,
923 		.proc_handler	= proc_dointvec,
924 	},
925 	{
926 		.procname	= "perf_event_max_sample_rate",
927 		.data		= &sysctl_perf_event_sample_rate,
928 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
929 		.mode		= 0644,
930 		.proc_handler	= proc_dointvec,
931 	},
932 #endif
933 #ifdef CONFIG_KMEMCHECK
934 	{
935 		.procname	= "kmemcheck",
936 		.data		= &kmemcheck_enabled,
937 		.maxlen		= sizeof(int),
938 		.mode		= 0644,
939 		.proc_handler	= proc_dointvec,
940 	},
941 #endif
942 #ifdef CONFIG_BLOCK
943 	{
944 		.procname	= "blk_iopoll",
945 		.data		= &blk_iopoll_enabled,
946 		.maxlen		= sizeof(int),
947 		.mode		= 0644,
948 		.proc_handler	= proc_dointvec,
949 	},
950 #endif
951 /*
952  * NOTE: do not add new entries to this table unless you have read
953  * Documentation/sysctl/ctl_unnumbered.txt
954  */
955 	{ }
956 };
957 
958 static struct ctl_table vm_table[] = {
959 	{
960 		.procname	= "overcommit_memory",
961 		.data		= &sysctl_overcommit_memory,
962 		.maxlen		= sizeof(sysctl_overcommit_memory),
963 		.mode		= 0644,
964 		.proc_handler	= proc_dointvec,
965 	},
966 	{
967 		.procname	= "panic_on_oom",
968 		.data		= &sysctl_panic_on_oom,
969 		.maxlen		= sizeof(sysctl_panic_on_oom),
970 		.mode		= 0644,
971 		.proc_handler	= proc_dointvec,
972 	},
973 	{
974 		.procname	= "oom_kill_allocating_task",
975 		.data		= &sysctl_oom_kill_allocating_task,
976 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
977 		.mode		= 0644,
978 		.proc_handler	= proc_dointvec,
979 	},
980 	{
981 		.procname	= "oom_dump_tasks",
982 		.data		= &sysctl_oom_dump_tasks,
983 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
984 		.mode		= 0644,
985 		.proc_handler	= proc_dointvec,
986 	},
987 	{
988 		.procname	= "overcommit_ratio",
989 		.data		= &sysctl_overcommit_ratio,
990 		.maxlen		= sizeof(sysctl_overcommit_ratio),
991 		.mode		= 0644,
992 		.proc_handler	= proc_dointvec,
993 	},
994 	{
995 		.procname	= "page-cluster",
996 		.data		= &page_cluster,
997 		.maxlen		= sizeof(int),
998 		.mode		= 0644,
999 		.proc_handler	= proc_dointvec,
1000 	},
1001 	{
1002 		.procname	= "dirty_background_ratio",
1003 		.data		= &dirty_background_ratio,
1004 		.maxlen		= sizeof(dirty_background_ratio),
1005 		.mode		= 0644,
1006 		.proc_handler	= dirty_background_ratio_handler,
1007 		.extra1		= &zero,
1008 		.extra2		= &one_hundred,
1009 	},
1010 	{
1011 		.procname	= "dirty_background_bytes",
1012 		.data		= &dirty_background_bytes,
1013 		.maxlen		= sizeof(dirty_background_bytes),
1014 		.mode		= 0644,
1015 		.proc_handler	= dirty_background_bytes_handler,
1016 		.extra1		= &one_ul,
1017 	},
1018 	{
1019 		.procname	= "dirty_ratio",
1020 		.data		= &vm_dirty_ratio,
1021 		.maxlen		= sizeof(vm_dirty_ratio),
1022 		.mode		= 0644,
1023 		.proc_handler	= dirty_ratio_handler,
1024 		.extra1		= &zero,
1025 		.extra2		= &one_hundred,
1026 	},
1027 	{
1028 		.procname	= "dirty_bytes",
1029 		.data		= &vm_dirty_bytes,
1030 		.maxlen		= sizeof(vm_dirty_bytes),
1031 		.mode		= 0644,
1032 		.proc_handler	= dirty_bytes_handler,
1033 		.extra1		= &dirty_bytes_min,
1034 	},
1035 	{
1036 		.procname	= "dirty_writeback_centisecs",
1037 		.data		= &dirty_writeback_interval,
1038 		.maxlen		= sizeof(dirty_writeback_interval),
1039 		.mode		= 0644,
1040 		.proc_handler	= dirty_writeback_centisecs_handler,
1041 	},
1042 	{
1043 		.procname	= "dirty_expire_centisecs",
1044 		.data		= &dirty_expire_interval,
1045 		.maxlen		= sizeof(dirty_expire_interval),
1046 		.mode		= 0644,
1047 		.proc_handler	= proc_dointvec,
1048 	},
1049 	{
1050 		.procname	= "nr_pdflush_threads",
1051 		.data		= &nr_pdflush_threads,
1052 		.maxlen		= sizeof nr_pdflush_threads,
1053 		.mode		= 0444 /* read-only*/,
1054 		.proc_handler	= proc_dointvec,
1055 	},
1056 	{
1057 		.procname	= "swappiness",
1058 		.data		= &vm_swappiness,
1059 		.maxlen		= sizeof(vm_swappiness),
1060 		.mode		= 0644,
1061 		.proc_handler	= proc_dointvec_minmax,
1062 		.extra1		= &zero,
1063 		.extra2		= &one_hundred,
1064 	},
1065 #ifdef CONFIG_HUGETLB_PAGE
1066 	{
1067 		.procname	= "nr_hugepages",
1068 		.data		= NULL,
1069 		.maxlen		= sizeof(unsigned long),
1070 		.mode		= 0644,
1071 		.proc_handler	= hugetlb_sysctl_handler,
1072 		.extra1		= (void *)&hugetlb_zero,
1073 		.extra2		= (void *)&hugetlb_infinity,
1074 	},
1075 #ifdef CONFIG_NUMA
1076 	{
1077 		.procname       = "nr_hugepages_mempolicy",
1078 		.data           = NULL,
1079 		.maxlen         = sizeof(unsigned long),
1080 		.mode           = 0644,
1081 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1082 		.extra1		= (void *)&hugetlb_zero,
1083 		.extra2		= (void *)&hugetlb_infinity,
1084 	},
1085 #endif
1086 	 {
1087 		.procname	= "hugetlb_shm_group",
1088 		.data		= &sysctl_hugetlb_shm_group,
1089 		.maxlen		= sizeof(gid_t),
1090 		.mode		= 0644,
1091 		.proc_handler	= proc_dointvec,
1092 	 },
1093 	 {
1094 		.procname	= "hugepages_treat_as_movable",
1095 		.data		= &hugepages_treat_as_movable,
1096 		.maxlen		= sizeof(int),
1097 		.mode		= 0644,
1098 		.proc_handler	= hugetlb_treat_movable_handler,
1099 	},
1100 	{
1101 		.procname	= "nr_overcommit_hugepages",
1102 		.data		= NULL,
1103 		.maxlen		= sizeof(unsigned long),
1104 		.mode		= 0644,
1105 		.proc_handler	= hugetlb_overcommit_handler,
1106 		.extra1		= (void *)&hugetlb_zero,
1107 		.extra2		= (void *)&hugetlb_infinity,
1108 	},
1109 #endif
1110 	{
1111 		.procname	= "lowmem_reserve_ratio",
1112 		.data		= &sysctl_lowmem_reserve_ratio,
1113 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
1114 		.mode		= 0644,
1115 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
1116 	},
1117 	{
1118 		.procname	= "drop_caches",
1119 		.data		= &sysctl_drop_caches,
1120 		.maxlen		= sizeof(int),
1121 		.mode		= 0644,
1122 		.proc_handler	= drop_caches_sysctl_handler,
1123 	},
1124 	{
1125 		.procname	= "min_free_kbytes",
1126 		.data		= &min_free_kbytes,
1127 		.maxlen		= sizeof(min_free_kbytes),
1128 		.mode		= 0644,
1129 		.proc_handler	= min_free_kbytes_sysctl_handler,
1130 		.extra1		= &zero,
1131 	},
1132 	{
1133 		.procname	= "percpu_pagelist_fraction",
1134 		.data		= &percpu_pagelist_fraction,
1135 		.maxlen		= sizeof(percpu_pagelist_fraction),
1136 		.mode		= 0644,
1137 		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
1138 		.extra1		= &min_percpu_pagelist_fract,
1139 	},
1140 #ifdef CONFIG_MMU
1141 	{
1142 		.procname	= "max_map_count",
1143 		.data		= &sysctl_max_map_count,
1144 		.maxlen		= sizeof(sysctl_max_map_count),
1145 		.mode		= 0644,
1146 		.proc_handler	= proc_dointvec_minmax,
1147 		.extra1		= &zero,
1148 	},
1149 #else
1150 	{
1151 		.procname	= "nr_trim_pages",
1152 		.data		= &sysctl_nr_trim_pages,
1153 		.maxlen		= sizeof(sysctl_nr_trim_pages),
1154 		.mode		= 0644,
1155 		.proc_handler	= proc_dointvec_minmax,
1156 		.extra1		= &zero,
1157 	},
1158 #endif
1159 	{
1160 		.procname	= "laptop_mode",
1161 		.data		= &laptop_mode,
1162 		.maxlen		= sizeof(laptop_mode),
1163 		.mode		= 0644,
1164 		.proc_handler	= proc_dointvec_jiffies,
1165 	},
1166 	{
1167 		.procname	= "block_dump",
1168 		.data		= &block_dump,
1169 		.maxlen		= sizeof(block_dump),
1170 		.mode		= 0644,
1171 		.proc_handler	= proc_dointvec,
1172 		.extra1		= &zero,
1173 	},
1174 	{
1175 		.procname	= "vfs_cache_pressure",
1176 		.data		= &sysctl_vfs_cache_pressure,
1177 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
1178 		.mode		= 0644,
1179 		.proc_handler	= proc_dointvec,
1180 		.extra1		= &zero,
1181 	},
1182 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1183 	{
1184 		.procname	= "legacy_va_layout",
1185 		.data		= &sysctl_legacy_va_layout,
1186 		.maxlen		= sizeof(sysctl_legacy_va_layout),
1187 		.mode		= 0644,
1188 		.proc_handler	= proc_dointvec,
1189 		.extra1		= &zero,
1190 	},
1191 #endif
1192 #ifdef CONFIG_NUMA
1193 	{
1194 		.procname	= "zone_reclaim_mode",
1195 		.data		= &zone_reclaim_mode,
1196 		.maxlen		= sizeof(zone_reclaim_mode),
1197 		.mode		= 0644,
1198 		.proc_handler	= proc_dointvec,
1199 		.extra1		= &zero,
1200 	},
1201 	{
1202 		.procname	= "min_unmapped_ratio",
1203 		.data		= &sysctl_min_unmapped_ratio,
1204 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1205 		.mode		= 0644,
1206 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
1207 		.extra1		= &zero,
1208 		.extra2		= &one_hundred,
1209 	},
1210 	{
1211 		.procname	= "min_slab_ratio",
1212 		.data		= &sysctl_min_slab_ratio,
1213 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1214 		.mode		= 0644,
1215 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
1216 		.extra1		= &zero,
1217 		.extra2		= &one_hundred,
1218 	},
1219 #endif
1220 #ifdef CONFIG_SMP
1221 	{
1222 		.procname	= "stat_interval",
1223 		.data		= &sysctl_stat_interval,
1224 		.maxlen		= sizeof(sysctl_stat_interval),
1225 		.mode		= 0644,
1226 		.proc_handler	= proc_dointvec_jiffies,
1227 	},
1228 #endif
1229 #ifdef CONFIG_MMU
1230 	{
1231 		.procname	= "mmap_min_addr",
1232 		.data		= &dac_mmap_min_addr,
1233 		.maxlen		= sizeof(unsigned long),
1234 		.mode		= 0644,
1235 		.proc_handler	= mmap_min_addr_handler,
1236 	},
1237 #endif
1238 #ifdef CONFIG_NUMA
1239 	{
1240 		.procname	= "numa_zonelist_order",
1241 		.data		= &numa_zonelist_order,
1242 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1243 		.mode		= 0644,
1244 		.proc_handler	= numa_zonelist_order_handler,
1245 	},
1246 #endif
1247 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1248    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1249 	{
1250 		.procname	= "vdso_enabled",
1251 		.data		= &vdso_enabled,
1252 		.maxlen		= sizeof(vdso_enabled),
1253 		.mode		= 0644,
1254 		.proc_handler	= proc_dointvec,
1255 		.extra1		= &zero,
1256 	},
1257 #endif
1258 #ifdef CONFIG_HIGHMEM
1259 	{
1260 		.procname	= "highmem_is_dirtyable",
1261 		.data		= &vm_highmem_is_dirtyable,
1262 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
1263 		.mode		= 0644,
1264 		.proc_handler	= proc_dointvec_minmax,
1265 		.extra1		= &zero,
1266 		.extra2		= &one,
1267 	},
1268 #endif
1269 	{
1270 		.procname	= "scan_unevictable_pages",
1271 		.data		= &scan_unevictable_pages,
1272 		.maxlen		= sizeof(scan_unevictable_pages),
1273 		.mode		= 0644,
1274 		.proc_handler	= scan_unevictable_handler,
1275 	},
1276 #ifdef CONFIG_MEMORY_FAILURE
1277 	{
1278 		.procname	= "memory_failure_early_kill",
1279 		.data		= &sysctl_memory_failure_early_kill,
1280 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
1281 		.mode		= 0644,
1282 		.proc_handler	= proc_dointvec_minmax,
1283 		.extra1		= &zero,
1284 		.extra2		= &one,
1285 	},
1286 	{
1287 		.procname	= "memory_failure_recovery",
1288 		.data		= &sysctl_memory_failure_recovery,
1289 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
1290 		.mode		= 0644,
1291 		.proc_handler	= proc_dointvec_minmax,
1292 		.extra1		= &zero,
1293 		.extra2		= &one,
1294 	},
1295 #endif
1296 
1297 /*
1298  * NOTE: do not add new entries to this table unless you have read
1299  * Documentation/sysctl/ctl_unnumbered.txt
1300  */
1301 	{ }
1302 };
1303 
1304 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1305 static struct ctl_table binfmt_misc_table[] = {
1306 	{ }
1307 };
1308 #endif
1309 
1310 static struct ctl_table fs_table[] = {
1311 	{
1312 		.procname	= "inode-nr",
1313 		.data		= &inodes_stat,
1314 		.maxlen		= 2*sizeof(int),
1315 		.mode		= 0444,
1316 		.proc_handler	= proc_dointvec,
1317 	},
1318 	{
1319 		.procname	= "inode-state",
1320 		.data		= &inodes_stat,
1321 		.maxlen		= 7*sizeof(int),
1322 		.mode		= 0444,
1323 		.proc_handler	= proc_dointvec,
1324 	},
1325 	{
1326 		.procname	= "file-nr",
1327 		.data		= &files_stat,
1328 		.maxlen		= 3*sizeof(int),
1329 		.mode		= 0444,
1330 		.proc_handler	= proc_nr_files,
1331 	},
1332 	{
1333 		.procname	= "file-max",
1334 		.data		= &files_stat.max_files,
1335 		.maxlen		= sizeof(int),
1336 		.mode		= 0644,
1337 		.proc_handler	= proc_dointvec,
1338 	},
1339 	{
1340 		.procname	= "nr_open",
1341 		.data		= &sysctl_nr_open,
1342 		.maxlen		= sizeof(int),
1343 		.mode		= 0644,
1344 		.proc_handler	= proc_dointvec_minmax,
1345 		.extra1		= &sysctl_nr_open_min,
1346 		.extra2		= &sysctl_nr_open_max,
1347 	},
1348 	{
1349 		.procname	= "dentry-state",
1350 		.data		= &dentry_stat,
1351 		.maxlen		= 6*sizeof(int),
1352 		.mode		= 0444,
1353 		.proc_handler	= proc_dointvec,
1354 	},
1355 	{
1356 		.procname	= "overflowuid",
1357 		.data		= &fs_overflowuid,
1358 		.maxlen		= sizeof(int),
1359 		.mode		= 0644,
1360 		.proc_handler	= proc_dointvec_minmax,
1361 		.extra1		= &minolduid,
1362 		.extra2		= &maxolduid,
1363 	},
1364 	{
1365 		.procname	= "overflowgid",
1366 		.data		= &fs_overflowgid,
1367 		.maxlen		= sizeof(int),
1368 		.mode		= 0644,
1369 		.proc_handler	= proc_dointvec_minmax,
1370 		.extra1		= &minolduid,
1371 		.extra2		= &maxolduid,
1372 	},
1373 #ifdef CONFIG_FILE_LOCKING
1374 	{
1375 		.procname	= "leases-enable",
1376 		.data		= &leases_enable,
1377 		.maxlen		= sizeof(int),
1378 		.mode		= 0644,
1379 		.proc_handler	= proc_dointvec,
1380 	},
1381 #endif
1382 #ifdef CONFIG_DNOTIFY
1383 	{
1384 		.procname	= "dir-notify-enable",
1385 		.data		= &dir_notify_enable,
1386 		.maxlen		= sizeof(int),
1387 		.mode		= 0644,
1388 		.proc_handler	= proc_dointvec,
1389 	},
1390 #endif
1391 #ifdef CONFIG_MMU
1392 #ifdef CONFIG_FILE_LOCKING
1393 	{
1394 		.procname	= "lease-break-time",
1395 		.data		= &lease_break_time,
1396 		.maxlen		= sizeof(int),
1397 		.mode		= 0644,
1398 		.proc_handler	= proc_dointvec,
1399 	},
1400 #endif
1401 #ifdef CONFIG_AIO
1402 	{
1403 		.procname	= "aio-nr",
1404 		.data		= &aio_nr,
1405 		.maxlen		= sizeof(aio_nr),
1406 		.mode		= 0444,
1407 		.proc_handler	= proc_doulongvec_minmax,
1408 	},
1409 	{
1410 		.procname	= "aio-max-nr",
1411 		.data		= &aio_max_nr,
1412 		.maxlen		= sizeof(aio_max_nr),
1413 		.mode		= 0644,
1414 		.proc_handler	= proc_doulongvec_minmax,
1415 	},
1416 #endif /* CONFIG_AIO */
1417 #ifdef CONFIG_INOTIFY_USER
1418 	{
1419 		.procname	= "inotify",
1420 		.mode		= 0555,
1421 		.child		= inotify_table,
1422 	},
1423 #endif
1424 #ifdef CONFIG_EPOLL
1425 	{
1426 		.procname	= "epoll",
1427 		.mode		= 0555,
1428 		.child		= epoll_table,
1429 	},
1430 #endif
1431 #endif
1432 	{
1433 		.procname	= "suid_dumpable",
1434 		.data		= &suid_dumpable,
1435 		.maxlen		= sizeof(int),
1436 		.mode		= 0644,
1437 		.proc_handler	= proc_dointvec_minmax,
1438 		.extra1		= &zero,
1439 		.extra2		= &two,
1440 	},
1441 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1442 	{
1443 		.procname	= "binfmt_misc",
1444 		.mode		= 0555,
1445 		.child		= binfmt_misc_table,
1446 	},
1447 #endif
1448 	{
1449 		.procname	= "pipe-max-pages",
1450 		.data		= &pipe_max_pages,
1451 		.maxlen		= sizeof(int),
1452 		.mode		= 0644,
1453 		.proc_handler	= &proc_dointvec_minmax,
1454 		.extra1		= &two,
1455 	},
1456 /*
1457  * NOTE: do not add new entries to this table unless you have read
1458  * Documentation/sysctl/ctl_unnumbered.txt
1459  */
1460 	{ }
1461 };
1462 
1463 static struct ctl_table debug_table[] = {
1464 #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
1465     defined(CONFIG_S390)
1466 	{
1467 		.procname	= "exception-trace",
1468 		.data		= &show_unhandled_signals,
1469 		.maxlen		= sizeof(int),
1470 		.mode		= 0644,
1471 		.proc_handler	= proc_dointvec
1472 	},
1473 #endif
1474 #if defined(CONFIG_OPTPROBES)
1475 	{
1476 		.procname	= "kprobes-optimization",
1477 		.data		= &sysctl_kprobes_optimization,
1478 		.maxlen		= sizeof(int),
1479 		.mode		= 0644,
1480 		.proc_handler	= proc_kprobes_optimization_handler,
1481 		.extra1		= &zero,
1482 		.extra2		= &one,
1483 	},
1484 #endif
1485 	{ }
1486 };
1487 
1488 static struct ctl_table dev_table[] = {
1489 	{ }
1490 };
1491 
1492 static DEFINE_SPINLOCK(sysctl_lock);
1493 
1494 /* called under sysctl_lock */
1495 static int use_table(struct ctl_table_header *p)
1496 {
1497 	if (unlikely(p->unregistering))
1498 		return 0;
1499 	p->used++;
1500 	return 1;
1501 }
1502 
1503 /* called under sysctl_lock */
1504 static void unuse_table(struct ctl_table_header *p)
1505 {
1506 	if (!--p->used)
1507 		if (unlikely(p->unregistering))
1508 			complete(p->unregistering);
1509 }
1510 
1511 /* called under sysctl_lock, will reacquire if has to wait */
1512 static void start_unregistering(struct ctl_table_header *p)
1513 {
1514 	/*
1515 	 * if p->used is 0, nobody will ever touch that entry again;
1516 	 * we'll eliminate all paths to it before dropping sysctl_lock
1517 	 */
1518 	if (unlikely(p->used)) {
1519 		struct completion wait;
1520 		init_completion(&wait);
1521 		p->unregistering = &wait;
1522 		spin_unlock(&sysctl_lock);
1523 		wait_for_completion(&wait);
1524 		spin_lock(&sysctl_lock);
1525 	} else {
1526 		/* anything non-NULL; we'll never dereference it */
1527 		p->unregistering = ERR_PTR(-EINVAL);
1528 	}
1529 	/*
1530 	 * do not remove from the list until nobody holds it; walking the
1531 	 * list in do_sysctl() relies on that.
1532 	 */
1533 	list_del_init(&p->ctl_entry);
1534 }
1535 
1536 void sysctl_head_get(struct ctl_table_header *head)
1537 {
1538 	spin_lock(&sysctl_lock);
1539 	head->count++;
1540 	spin_unlock(&sysctl_lock);
1541 }
1542 
1543 void sysctl_head_put(struct ctl_table_header *head)
1544 {
1545 	spin_lock(&sysctl_lock);
1546 	if (!--head->count)
1547 		kfree(head);
1548 	spin_unlock(&sysctl_lock);
1549 }
1550 
1551 struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1552 {
1553 	if (!head)
1554 		BUG();
1555 	spin_lock(&sysctl_lock);
1556 	if (!use_table(head))
1557 		head = ERR_PTR(-ENOENT);
1558 	spin_unlock(&sysctl_lock);
1559 	return head;
1560 }
1561 
1562 void sysctl_head_finish(struct ctl_table_header *head)
1563 {
1564 	if (!head)
1565 		return;
1566 	spin_lock(&sysctl_lock);
1567 	unuse_table(head);
1568 	spin_unlock(&sysctl_lock);
1569 }
1570 
1571 static struct ctl_table_set *
1572 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1573 {
1574 	struct ctl_table_set *set = &root->default_set;
1575 	if (root->lookup)
1576 		set = root->lookup(root, namespaces);
1577 	return set;
1578 }
1579 
1580 static struct list_head *
1581 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1582 {
1583 	struct ctl_table_set *set = lookup_header_set(root, namespaces);
1584 	return &set->list;
1585 }
1586 
1587 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1588 					    struct ctl_table_header *prev)
1589 {
1590 	struct ctl_table_root *root;
1591 	struct list_head *header_list;
1592 	struct ctl_table_header *head;
1593 	struct list_head *tmp;
1594 
1595 	spin_lock(&sysctl_lock);
1596 	if (prev) {
1597 		head = prev;
1598 		tmp = &prev->ctl_entry;
1599 		unuse_table(prev);
1600 		goto next;
1601 	}
1602 	tmp = &root_table_header.ctl_entry;
1603 	for (;;) {
1604 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1605 
1606 		if (!use_table(head))
1607 			goto next;
1608 		spin_unlock(&sysctl_lock);
1609 		return head;
1610 	next:
1611 		root = head->root;
1612 		tmp = tmp->next;
1613 		header_list = lookup_header_list(root, namespaces);
1614 		if (tmp != header_list)
1615 			continue;
1616 
1617 		do {
1618 			root = list_entry(root->root_list.next,
1619 					struct ctl_table_root, root_list);
1620 			if (root == &sysctl_table_root)
1621 				goto out;
1622 			header_list = lookup_header_list(root, namespaces);
1623 		} while (list_empty(header_list));
1624 		tmp = header_list->next;
1625 	}
1626 out:
1627 	spin_unlock(&sysctl_lock);
1628 	return NULL;
1629 }
1630 
1631 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1632 {
1633 	return __sysctl_head_next(current->nsproxy, prev);
1634 }
1635 
1636 void register_sysctl_root(struct ctl_table_root *root)
1637 {
1638 	spin_lock(&sysctl_lock);
1639 	list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1640 	spin_unlock(&sysctl_lock);
1641 }
1642 
1643 /*
1644  * sysctl_perm does NOT grant the superuser all rights automatically, because
1645  * some sysctl variables are readonly even to root.
1646  */
1647 
1648 static int test_perm(int mode, int op)
1649 {
1650 	if (!current_euid())
1651 		mode >>= 6;
1652 	else if (in_egroup_p(0))
1653 		mode >>= 3;
1654 	if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1655 		return 0;
1656 	return -EACCES;
1657 }
1658 
1659 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1660 {
1661 	int error;
1662 	int mode;
1663 
1664 	error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
1665 	if (error)
1666 		return error;
1667 
1668 	if (root->permissions)
1669 		mode = root->permissions(root, current->nsproxy, table);
1670 	else
1671 		mode = table->mode;
1672 
1673 	return test_perm(mode, op);
1674 }
1675 
1676 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1677 {
1678 	for (; table->procname; table++) {
1679 		table->parent = parent;
1680 		if (table->child)
1681 			sysctl_set_parent(table, table->child);
1682 	}
1683 }
1684 
1685 static __init int sysctl_init(void)
1686 {
1687 	sysctl_set_parent(NULL, root_table);
1688 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1689 	{
1690 		int err;
1691 		err = sysctl_check_table(current->nsproxy, root_table);
1692 	}
1693 #endif
1694 	return 0;
1695 }
1696 
1697 core_initcall(sysctl_init);
1698 
1699 static struct ctl_table *is_branch_in(struct ctl_table *branch,
1700 				      struct ctl_table *table)
1701 {
1702 	struct ctl_table *p;
1703 	const char *s = branch->procname;
1704 
1705 	/* branch should have named subdirectory as its first element */
1706 	if (!s || !branch->child)
1707 		return NULL;
1708 
1709 	/* ... and nothing else */
1710 	if (branch[1].procname)
1711 		return NULL;
1712 
1713 	/* table should contain subdirectory with the same name */
1714 	for (p = table; p->procname; p++) {
1715 		if (!p->child)
1716 			continue;
1717 		if (p->procname && strcmp(p->procname, s) == 0)
1718 			return p;
1719 	}
1720 	return NULL;
1721 }
1722 
1723 /* see if attaching q to p would be an improvement */
1724 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1725 {
1726 	struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1727 	struct ctl_table *next;
1728 	int is_better = 0;
1729 	int not_in_parent = !p->attached_by;
1730 
1731 	while ((next = is_branch_in(by, to)) != NULL) {
1732 		if (by == q->attached_by)
1733 			is_better = 1;
1734 		if (to == p->attached_by)
1735 			not_in_parent = 1;
1736 		by = by->child;
1737 		to = next->child;
1738 	}
1739 
1740 	if (is_better && not_in_parent) {
1741 		q->attached_by = by;
1742 		q->attached_to = to;
1743 		q->parent = p;
1744 	}
1745 }
1746 
1747 /**
1748  * __register_sysctl_paths - register a sysctl hierarchy
1749  * @root: List of sysctl headers to register on
1750  * @namespaces: Data to compute which lists of sysctl entries are visible
1751  * @path: The path to the directory the sysctl table is in.
1752  * @table: the top-level table structure
1753  *
1754  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1755  * array. A completely 0 filled entry terminates the table.
1756  *
1757  * The members of the &struct ctl_table structure are used as follows:
1758  *
1759  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1760  *            enter a sysctl file
1761  *
1762  * data - a pointer to data for use by proc_handler
1763  *
1764  * maxlen - the maximum size in bytes of the data
1765  *
1766  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1767  *
1768  * child - a pointer to the child sysctl table if this entry is a directory, or
1769  *         %NULL.
1770  *
1771  * proc_handler - the text handler routine (described below)
1772  *
1773  * de - for internal use by the sysctl routines
1774  *
1775  * extra1, extra2 - extra pointers usable by the proc handler routines
1776  *
1777  * Leaf nodes in the sysctl tree will be represented by a single file
1778  * under /proc; non-leaf nodes will be represented by directories.
1779  *
1780  * sysctl(2) can automatically manage read and write requests through
1781  * the sysctl table.  The data and maxlen fields of the ctl_table
1782  * struct enable minimal validation of the values being written to be
1783  * performed, and the mode field allows minimal authentication.
1784  *
1785  * There must be a proc_handler routine for any terminal nodes
1786  * mirrored under /proc/sys (non-terminals are handled by a built-in
1787  * directory handler).  Several default handlers are available to
1788  * cover common cases -
1789  *
1790  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1791  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1792  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1793  *
1794  * It is the handler's job to read the input buffer from user memory
1795  * and process it. The handler should return 0 on success.
1796  *
1797  * This routine returns %NULL on a failure to register, and a pointer
1798  * to the table header on success.
1799  */
1800 struct ctl_table_header *__register_sysctl_paths(
1801 	struct ctl_table_root *root,
1802 	struct nsproxy *namespaces,
1803 	const struct ctl_path *path, struct ctl_table *table)
1804 {
1805 	struct ctl_table_header *header;
1806 	struct ctl_table *new, **prevp;
1807 	unsigned int n, npath;
1808 	struct ctl_table_set *set;
1809 
1810 	/* Count the path components */
1811 	for (npath = 0; path[npath].procname; ++npath)
1812 		;
1813 
1814 	/*
1815 	 * For each path component, allocate a 2-element ctl_table array.
1816 	 * The first array element will be filled with the sysctl entry
1817 	 * for this, the second will be the sentinel (procname == 0).
1818 	 *
1819 	 * We allocate everything in one go so that we don't have to
1820 	 * worry about freeing additional memory in unregister_sysctl_table.
1821 	 */
1822 	header = kzalloc(sizeof(struct ctl_table_header) +
1823 			 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1824 	if (!header)
1825 		return NULL;
1826 
1827 	new = (struct ctl_table *) (header + 1);
1828 
1829 	/* Now connect the dots */
1830 	prevp = &header->ctl_table;
1831 	for (n = 0; n < npath; ++n, ++path) {
1832 		/* Copy the procname */
1833 		new->procname = path->procname;
1834 		new->mode     = 0555;
1835 
1836 		*prevp = new;
1837 		prevp = &new->child;
1838 
1839 		new += 2;
1840 	}
1841 	*prevp = table;
1842 	header->ctl_table_arg = table;
1843 
1844 	INIT_LIST_HEAD(&header->ctl_entry);
1845 	header->used = 0;
1846 	header->unregistering = NULL;
1847 	header->root = root;
1848 	sysctl_set_parent(NULL, header->ctl_table);
1849 	header->count = 1;
1850 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1851 	if (sysctl_check_table(namespaces, header->ctl_table)) {
1852 		kfree(header);
1853 		return NULL;
1854 	}
1855 #endif
1856 	spin_lock(&sysctl_lock);
1857 	header->set = lookup_header_set(root, namespaces);
1858 	header->attached_by = header->ctl_table;
1859 	header->attached_to = root_table;
1860 	header->parent = &root_table_header;
1861 	for (set = header->set; set; set = set->parent) {
1862 		struct ctl_table_header *p;
1863 		list_for_each_entry(p, &set->list, ctl_entry) {
1864 			if (p->unregistering)
1865 				continue;
1866 			try_attach(p, header);
1867 		}
1868 	}
1869 	header->parent->count++;
1870 	list_add_tail(&header->ctl_entry, &header->set->list);
1871 	spin_unlock(&sysctl_lock);
1872 
1873 	return header;
1874 }
1875 
1876 /**
1877  * register_sysctl_table_path - register a sysctl table hierarchy
1878  * @path: The path to the directory the sysctl table is in.
1879  * @table: the top-level table structure
1880  *
1881  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1882  * array. A completely 0 filled entry terminates the table.
1883  *
1884  * See __register_sysctl_paths for more details.
1885  */
1886 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1887 						struct ctl_table *table)
1888 {
1889 	return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1890 					path, table);
1891 }
1892 
1893 /**
1894  * register_sysctl_table - register a sysctl table hierarchy
1895  * @table: the top-level table structure
1896  *
1897  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1898  * array. A completely 0 filled entry terminates the table.
1899  *
1900  * See register_sysctl_paths for more details.
1901  */
1902 struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1903 {
1904 	static const struct ctl_path null_path[] = { {} };
1905 
1906 	return register_sysctl_paths(null_path, table);
1907 }
1908 
1909 /**
1910  * unregister_sysctl_table - unregister a sysctl table hierarchy
1911  * @header: the header returned from register_sysctl_table
1912  *
1913  * Unregisters the sysctl table and all children. proc entries may not
1914  * actually be removed until they are no longer used by anyone.
1915  */
1916 void unregister_sysctl_table(struct ctl_table_header * header)
1917 {
1918 	might_sleep();
1919 
1920 	if (header == NULL)
1921 		return;
1922 
1923 	spin_lock(&sysctl_lock);
1924 	start_unregistering(header);
1925 	if (!--header->parent->count) {
1926 		WARN_ON(1);
1927 		kfree(header->parent);
1928 	}
1929 	if (!--header->count)
1930 		kfree(header);
1931 	spin_unlock(&sysctl_lock);
1932 }
1933 
1934 int sysctl_is_seen(struct ctl_table_header *p)
1935 {
1936 	struct ctl_table_set *set = p->set;
1937 	int res;
1938 	spin_lock(&sysctl_lock);
1939 	if (p->unregistering)
1940 		res = 0;
1941 	else if (!set->is_seen)
1942 		res = 1;
1943 	else
1944 		res = set->is_seen(set);
1945 	spin_unlock(&sysctl_lock);
1946 	return res;
1947 }
1948 
1949 void setup_sysctl_set(struct ctl_table_set *p,
1950 	struct ctl_table_set *parent,
1951 	int (*is_seen)(struct ctl_table_set *))
1952 {
1953 	INIT_LIST_HEAD(&p->list);
1954 	p->parent = parent ? parent : &sysctl_table_root.default_set;
1955 	p->is_seen = is_seen;
1956 }
1957 
1958 #else /* !CONFIG_SYSCTL */
1959 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
1960 {
1961 	return NULL;
1962 }
1963 
1964 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1965 						    struct ctl_table *table)
1966 {
1967 	return NULL;
1968 }
1969 
1970 void unregister_sysctl_table(struct ctl_table_header * table)
1971 {
1972 }
1973 
1974 void setup_sysctl_set(struct ctl_table_set *p,
1975 	struct ctl_table_set *parent,
1976 	int (*is_seen)(struct ctl_table_set *))
1977 {
1978 }
1979 
1980 void sysctl_head_put(struct ctl_table_header *head)
1981 {
1982 }
1983 
1984 #endif /* CONFIG_SYSCTL */
1985 
1986 /*
1987  * /proc/sys support
1988  */
1989 
1990 #ifdef CONFIG_PROC_SYSCTL
1991 
1992 static int _proc_do_string(void* data, int maxlen, int write,
1993 			   void __user *buffer,
1994 			   size_t *lenp, loff_t *ppos)
1995 {
1996 	size_t len;
1997 	char __user *p;
1998 	char c;
1999 
2000 	if (!data || !maxlen || !*lenp) {
2001 		*lenp = 0;
2002 		return 0;
2003 	}
2004 
2005 	if (write) {
2006 		len = 0;
2007 		p = buffer;
2008 		while (len < *lenp) {
2009 			if (get_user(c, p++))
2010 				return -EFAULT;
2011 			if (c == 0 || c == '\n')
2012 				break;
2013 			len++;
2014 		}
2015 		if (len >= maxlen)
2016 			len = maxlen-1;
2017 		if(copy_from_user(data, buffer, len))
2018 			return -EFAULT;
2019 		((char *) data)[len] = 0;
2020 		*ppos += *lenp;
2021 	} else {
2022 		len = strlen(data);
2023 		if (len > maxlen)
2024 			len = maxlen;
2025 
2026 		if (*ppos > len) {
2027 			*lenp = 0;
2028 			return 0;
2029 		}
2030 
2031 		data += *ppos;
2032 		len  -= *ppos;
2033 
2034 		if (len > *lenp)
2035 			len = *lenp;
2036 		if (len)
2037 			if(copy_to_user(buffer, data, len))
2038 				return -EFAULT;
2039 		if (len < *lenp) {
2040 			if(put_user('\n', ((char __user *) buffer) + len))
2041 				return -EFAULT;
2042 			len++;
2043 		}
2044 		*lenp = len;
2045 		*ppos += len;
2046 	}
2047 	return 0;
2048 }
2049 
2050 /**
2051  * proc_dostring - read a string sysctl
2052  * @table: the sysctl table
2053  * @write: %TRUE if this is a write to the sysctl file
2054  * @buffer: the user buffer
2055  * @lenp: the size of the user buffer
2056  * @ppos: file position
2057  *
2058  * Reads/writes a string from/to the user buffer. If the kernel
2059  * buffer provided is not large enough to hold the string, the
2060  * string is truncated. The copied string is %NULL-terminated.
2061  * If the string is being read by the user process, it is copied
2062  * and a newline '\n' is added. It is truncated if the buffer is
2063  * not large enough.
2064  *
2065  * Returns 0 on success.
2066  */
2067 int proc_dostring(struct ctl_table *table, int write,
2068 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2069 {
2070 	return _proc_do_string(table->data, table->maxlen, write,
2071 			       buffer, lenp, ppos);
2072 }
2073 
2074 static size_t proc_skip_spaces(char **buf)
2075 {
2076 	size_t ret;
2077 	char *tmp = skip_spaces(*buf);
2078 	ret = tmp - *buf;
2079 	*buf = tmp;
2080 	return ret;
2081 }
2082 
2083 static void proc_skip_char(char **buf, size_t *size, const char v)
2084 {
2085 	while (*size) {
2086 		if (**buf != v)
2087 			break;
2088 		(*size)--;
2089 		(*buf)++;
2090 	}
2091 }
2092 
2093 #define TMPBUFLEN 22
2094 /**
2095  * proc_get_long - reads an ASCII formatted integer from a user buffer
2096  *
2097  * @buf: a kernel buffer
2098  * @size: size of the kernel buffer
2099  * @val: this is where the number will be stored
2100  * @neg: set to %TRUE if number is negative
2101  * @perm_tr: a vector which contains the allowed trailers
2102  * @perm_tr_len: size of the perm_tr vector
2103  * @tr: pointer to store the trailer character
2104  *
2105  * In case of success %0 is returned and @buf and @size are updated with
2106  * the amount of bytes read. If @tr is non-NULL and a trailing
2107  * character exists (size is non-zero after returning from this
2108  * function), @tr is updated with the trailing character.
2109  */
2110 static int proc_get_long(char **buf, size_t *size,
2111 			  unsigned long *val, bool *neg,
2112 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
2113 {
2114 	int len;
2115 	char *p, tmp[TMPBUFLEN];
2116 
2117 	if (!*size)
2118 		return -EINVAL;
2119 
2120 	len = *size;
2121 	if (len > TMPBUFLEN - 1)
2122 		len = TMPBUFLEN - 1;
2123 
2124 	memcpy(tmp, *buf, len);
2125 
2126 	tmp[len] = 0;
2127 	p = tmp;
2128 	if (*p == '-' && *size > 1) {
2129 		*neg = true;
2130 		p++;
2131 	} else
2132 		*neg = false;
2133 	if (!isdigit(*p))
2134 		return -EINVAL;
2135 
2136 	*val = simple_strtoul(p, &p, 0);
2137 
2138 	len = p - tmp;
2139 
2140 	/* We don't know if the next char is whitespace thus we may accept
2141 	 * invalid integers (e.g. 1234...a) or two integers instead of one
2142 	 * (e.g. 123...1). So lets not allow such large numbers. */
2143 	if (len == TMPBUFLEN - 1)
2144 		return -EINVAL;
2145 
2146 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2147 		return -EINVAL;
2148 
2149 	if (tr && (len < *size))
2150 		*tr = *p;
2151 
2152 	*buf += len;
2153 	*size -= len;
2154 
2155 	return 0;
2156 }
2157 
2158 /**
2159  * proc_put_long - converts an integer to a decimal ASCII formatted string
2160  *
2161  * @buf: the user buffer
2162  * @size: the size of the user buffer
2163  * @val: the integer to be converted
2164  * @neg: sign of the number, %TRUE for negative
2165  *
2166  * In case of success %0 is returned and @buf and @size are updated with
2167  * the amount of bytes written.
2168  */
2169 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2170 			  bool neg)
2171 {
2172 	int len;
2173 	char tmp[TMPBUFLEN], *p = tmp;
2174 
2175 	sprintf(p, "%s%lu", neg ? "-" : "", val);
2176 	len = strlen(tmp);
2177 	if (len > *size)
2178 		len = *size;
2179 	if (copy_to_user(*buf, tmp, len))
2180 		return -EFAULT;
2181 	*size -= len;
2182 	*buf += len;
2183 	return 0;
2184 }
2185 #undef TMPBUFLEN
2186 
2187 static int proc_put_char(void __user **buf, size_t *size, char c)
2188 {
2189 	if (*size) {
2190 		char __user **buffer = (char __user **)buf;
2191 		if (put_user(c, *buffer))
2192 			return -EFAULT;
2193 		(*size)--, (*buffer)++;
2194 		*buf = *buffer;
2195 	}
2196 	return 0;
2197 }
2198 
2199 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2200 				 int *valp,
2201 				 int write, void *data)
2202 {
2203 	if (write) {
2204 		*valp = *negp ? -*lvalp : *lvalp;
2205 	} else {
2206 		int val = *valp;
2207 		if (val < 0) {
2208 			*negp = true;
2209 			*lvalp = (unsigned long)-val;
2210 		} else {
2211 			*negp = false;
2212 			*lvalp = (unsigned long)val;
2213 		}
2214 	}
2215 	return 0;
2216 }
2217 
2218 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2219 
2220 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2221 		  int write, void __user *buffer,
2222 		  size_t *lenp, loff_t *ppos,
2223 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2224 			      int write, void *data),
2225 		  void *data)
2226 {
2227 	int *i, vleft, first = 1, err = 0;
2228 	unsigned long page = 0;
2229 	size_t left;
2230 	char *kbuf;
2231 
2232 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2233 		*lenp = 0;
2234 		return 0;
2235 	}
2236 
2237 	i = (int *) tbl_data;
2238 	vleft = table->maxlen / sizeof(*i);
2239 	left = *lenp;
2240 
2241 	if (!conv)
2242 		conv = do_proc_dointvec_conv;
2243 
2244 	if (write) {
2245 		if (left > PAGE_SIZE - 1)
2246 			left = PAGE_SIZE - 1;
2247 		page = __get_free_page(GFP_TEMPORARY);
2248 		kbuf = (char *) page;
2249 		if (!kbuf)
2250 			return -ENOMEM;
2251 		if (copy_from_user(kbuf, buffer, left)) {
2252 			err = -EFAULT;
2253 			goto free;
2254 		}
2255 		kbuf[left] = 0;
2256 	}
2257 
2258 	for (; left && vleft--; i++, first=0) {
2259 		unsigned long lval;
2260 		bool neg;
2261 
2262 		if (write) {
2263 			left -= proc_skip_spaces(&kbuf);
2264 
2265 			err = proc_get_long(&kbuf, &left, &lval, &neg,
2266 					     proc_wspace_sep,
2267 					     sizeof(proc_wspace_sep), NULL);
2268 			if (err)
2269 				break;
2270 			if (conv(&neg, &lval, i, 1, data)) {
2271 				err = -EINVAL;
2272 				break;
2273 			}
2274 		} else {
2275 			if (conv(&neg, &lval, i, 0, data)) {
2276 				err = -EINVAL;
2277 				break;
2278 			}
2279 			if (!first)
2280 				err = proc_put_char(&buffer, &left, '\t');
2281 			if (err)
2282 				break;
2283 			err = proc_put_long(&buffer, &left, lval, neg);
2284 			if (err)
2285 				break;
2286 		}
2287 	}
2288 
2289 	if (!write && !first && left && !err)
2290 		err = proc_put_char(&buffer, &left, '\n');
2291 	if (write && !err)
2292 		left -= proc_skip_spaces(&kbuf);
2293 free:
2294 	if (write) {
2295 		free_page(page);
2296 		if (first)
2297 			return err ? : -EINVAL;
2298 	}
2299 	*lenp -= left;
2300 	*ppos += *lenp;
2301 	return err;
2302 }
2303 
2304 static int do_proc_dointvec(struct ctl_table *table, int write,
2305 		  void __user *buffer, size_t *lenp, loff_t *ppos,
2306 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2307 			      int write, void *data),
2308 		  void *data)
2309 {
2310 	return __do_proc_dointvec(table->data, table, write,
2311 			buffer, lenp, ppos, conv, data);
2312 }
2313 
2314 /**
2315  * proc_dointvec - read a vector of integers
2316  * @table: the sysctl table
2317  * @write: %TRUE if this is a write to the sysctl file
2318  * @buffer: the user buffer
2319  * @lenp: the size of the user buffer
2320  * @ppos: file position
2321  *
2322  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2323  * values from/to the user buffer, treated as an ASCII string.
2324  *
2325  * Returns 0 on success.
2326  */
2327 int proc_dointvec(struct ctl_table *table, int write,
2328 		     void __user *buffer, size_t *lenp, loff_t *ppos)
2329 {
2330     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2331 		    	    NULL,NULL);
2332 }
2333 
2334 /*
2335  * Taint values can only be increased
2336  * This means we can safely use a temporary.
2337  */
2338 static int proc_taint(struct ctl_table *table, int write,
2339 			       void __user *buffer, size_t *lenp, loff_t *ppos)
2340 {
2341 	struct ctl_table t;
2342 	unsigned long tmptaint = get_taint();
2343 	int err;
2344 
2345 	if (write && !capable(CAP_SYS_ADMIN))
2346 		return -EPERM;
2347 
2348 	t = *table;
2349 	t.data = &tmptaint;
2350 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2351 	if (err < 0)
2352 		return err;
2353 
2354 	if (write) {
2355 		/*
2356 		 * Poor man's atomic or. Not worth adding a primitive
2357 		 * to everyone's atomic.h for this
2358 		 */
2359 		int i;
2360 		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2361 			if ((tmptaint >> i) & 1)
2362 				add_taint(i);
2363 		}
2364 	}
2365 
2366 	return err;
2367 }
2368 
2369 struct do_proc_dointvec_minmax_conv_param {
2370 	int *min;
2371 	int *max;
2372 };
2373 
2374 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2375 					int *valp,
2376 					int write, void *data)
2377 {
2378 	struct do_proc_dointvec_minmax_conv_param *param = data;
2379 	if (write) {
2380 		int val = *negp ? -*lvalp : *lvalp;
2381 		if ((param->min && *param->min > val) ||
2382 		    (param->max && *param->max < val))
2383 			return -EINVAL;
2384 		*valp = val;
2385 	} else {
2386 		int val = *valp;
2387 		if (val < 0) {
2388 			*negp = true;
2389 			*lvalp = (unsigned long)-val;
2390 		} else {
2391 			*negp = false;
2392 			*lvalp = (unsigned long)val;
2393 		}
2394 	}
2395 	return 0;
2396 }
2397 
2398 /**
2399  * proc_dointvec_minmax - read a vector of integers with min/max values
2400  * @table: the sysctl table
2401  * @write: %TRUE if this is a write to the sysctl file
2402  * @buffer: the user buffer
2403  * @lenp: the size of the user buffer
2404  * @ppos: file position
2405  *
2406  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2407  * values from/to the user buffer, treated as an ASCII string.
2408  *
2409  * This routine will ensure the values are within the range specified by
2410  * table->extra1 (min) and table->extra2 (max).
2411  *
2412  * Returns 0 on success.
2413  */
2414 int proc_dointvec_minmax(struct ctl_table *table, int write,
2415 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2416 {
2417 	struct do_proc_dointvec_minmax_conv_param param = {
2418 		.min = (int *) table->extra1,
2419 		.max = (int *) table->extra2,
2420 	};
2421 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2422 				do_proc_dointvec_minmax_conv, &param);
2423 }
2424 
2425 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2426 				     void __user *buffer,
2427 				     size_t *lenp, loff_t *ppos,
2428 				     unsigned long convmul,
2429 				     unsigned long convdiv)
2430 {
2431 	unsigned long *i, *min, *max;
2432 	int vleft, first = 1, err = 0;
2433 	unsigned long page = 0;
2434 	size_t left;
2435 	char *kbuf;
2436 
2437 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2438 		*lenp = 0;
2439 		return 0;
2440 	}
2441 
2442 	i = (unsigned long *) data;
2443 	min = (unsigned long *) table->extra1;
2444 	max = (unsigned long *) table->extra2;
2445 	vleft = table->maxlen / sizeof(unsigned long);
2446 	left = *lenp;
2447 
2448 	if (write) {
2449 		if (left > PAGE_SIZE - 1)
2450 			left = PAGE_SIZE - 1;
2451 		page = __get_free_page(GFP_TEMPORARY);
2452 		kbuf = (char *) page;
2453 		if (!kbuf)
2454 			return -ENOMEM;
2455 		if (copy_from_user(kbuf, buffer, left)) {
2456 			err = -EFAULT;
2457 			goto free;
2458 		}
2459 		kbuf[left] = 0;
2460 	}
2461 
2462 	for (; left && vleft--; i++, min++, max++, first=0) {
2463 		unsigned long val;
2464 
2465 		if (write) {
2466 			bool neg;
2467 
2468 			left -= proc_skip_spaces(&kbuf);
2469 
2470 			err = proc_get_long(&kbuf, &left, &val, &neg,
2471 					     proc_wspace_sep,
2472 					     sizeof(proc_wspace_sep), NULL);
2473 			if (err)
2474 				break;
2475 			if (neg)
2476 				continue;
2477 			if ((min && val < *min) || (max && val > *max))
2478 				continue;
2479 			*i = val;
2480 		} else {
2481 			val = convdiv * (*i) / convmul;
2482 			if (!first)
2483 				err = proc_put_char(&buffer, &left, '\t');
2484 			err = proc_put_long(&buffer, &left, val, false);
2485 			if (err)
2486 				break;
2487 		}
2488 	}
2489 
2490 	if (!write && !first && left && !err)
2491 		err = proc_put_char(&buffer, &left, '\n');
2492 	if (write && !err)
2493 		left -= proc_skip_spaces(&kbuf);
2494 free:
2495 	if (write) {
2496 		free_page(page);
2497 		if (first)
2498 			return err ? : -EINVAL;
2499 	}
2500 	*lenp -= left;
2501 	*ppos += *lenp;
2502 	return err;
2503 }
2504 
2505 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2506 				     void __user *buffer,
2507 				     size_t *lenp, loff_t *ppos,
2508 				     unsigned long convmul,
2509 				     unsigned long convdiv)
2510 {
2511 	return __do_proc_doulongvec_minmax(table->data, table, write,
2512 			buffer, lenp, ppos, convmul, convdiv);
2513 }
2514 
2515 /**
2516  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2517  * @table: the sysctl table
2518  * @write: %TRUE if this is a write to the sysctl file
2519  * @buffer: the user buffer
2520  * @lenp: the size of the user buffer
2521  * @ppos: file position
2522  *
2523  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2524  * values from/to the user buffer, treated as an ASCII string.
2525  *
2526  * This routine will ensure the values are within the range specified by
2527  * table->extra1 (min) and table->extra2 (max).
2528  *
2529  * Returns 0 on success.
2530  */
2531 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2532 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2533 {
2534     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2535 }
2536 
2537 /**
2538  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2539  * @table: the sysctl table
2540  * @write: %TRUE if this is a write to the sysctl file
2541  * @buffer: the user buffer
2542  * @lenp: the size of the user buffer
2543  * @ppos: file position
2544  *
2545  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2546  * values from/to the user buffer, treated as an ASCII string. The values
2547  * are treated as milliseconds, and converted to jiffies when they are stored.
2548  *
2549  * This routine will ensure the values are within the range specified by
2550  * table->extra1 (min) and table->extra2 (max).
2551  *
2552  * Returns 0 on success.
2553  */
2554 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2555 				      void __user *buffer,
2556 				      size_t *lenp, loff_t *ppos)
2557 {
2558     return do_proc_doulongvec_minmax(table, write, buffer,
2559 				     lenp, ppos, HZ, 1000l);
2560 }
2561 
2562 
2563 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2564 					 int *valp,
2565 					 int write, void *data)
2566 {
2567 	if (write) {
2568 		if (*lvalp > LONG_MAX / HZ)
2569 			return 1;
2570 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2571 	} else {
2572 		int val = *valp;
2573 		unsigned long lval;
2574 		if (val < 0) {
2575 			*negp = true;
2576 			lval = (unsigned long)-val;
2577 		} else {
2578 			*negp = false;
2579 			lval = (unsigned long)val;
2580 		}
2581 		*lvalp = lval / HZ;
2582 	}
2583 	return 0;
2584 }
2585 
2586 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2587 						int *valp,
2588 						int write, void *data)
2589 {
2590 	if (write) {
2591 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2592 			return 1;
2593 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2594 	} else {
2595 		int val = *valp;
2596 		unsigned long lval;
2597 		if (val < 0) {
2598 			*negp = true;
2599 			lval = (unsigned long)-val;
2600 		} else {
2601 			*negp = false;
2602 			lval = (unsigned long)val;
2603 		}
2604 		*lvalp = jiffies_to_clock_t(lval);
2605 	}
2606 	return 0;
2607 }
2608 
2609 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2610 					    int *valp,
2611 					    int write, void *data)
2612 {
2613 	if (write) {
2614 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2615 	} else {
2616 		int val = *valp;
2617 		unsigned long lval;
2618 		if (val < 0) {
2619 			*negp = true;
2620 			lval = (unsigned long)-val;
2621 		} else {
2622 			*negp = false;
2623 			lval = (unsigned long)val;
2624 		}
2625 		*lvalp = jiffies_to_msecs(lval);
2626 	}
2627 	return 0;
2628 }
2629 
2630 /**
2631  * proc_dointvec_jiffies - read a vector of integers as seconds
2632  * @table: the sysctl table
2633  * @write: %TRUE if this is a write to the sysctl file
2634  * @buffer: the user buffer
2635  * @lenp: the size of the user buffer
2636  * @ppos: file position
2637  *
2638  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2639  * values from/to the user buffer, treated as an ASCII string.
2640  * The values read are assumed to be in seconds, and are converted into
2641  * jiffies.
2642  *
2643  * Returns 0 on success.
2644  */
2645 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2646 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2647 {
2648     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2649 		    	    do_proc_dointvec_jiffies_conv,NULL);
2650 }
2651 
2652 /**
2653  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2654  * @table: the sysctl table
2655  * @write: %TRUE if this is a write to the sysctl file
2656  * @buffer: the user buffer
2657  * @lenp: the size of the user buffer
2658  * @ppos: pointer to the file position
2659  *
2660  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2661  * values from/to the user buffer, treated as an ASCII string.
2662  * The values read are assumed to be in 1/USER_HZ seconds, and
2663  * are converted into jiffies.
2664  *
2665  * Returns 0 on success.
2666  */
2667 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2668 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2669 {
2670     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2671 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2672 }
2673 
2674 /**
2675  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2676  * @table: the sysctl table
2677  * @write: %TRUE if this is a write to the sysctl file
2678  * @buffer: the user buffer
2679  * @lenp: the size of the user buffer
2680  * @ppos: file position
2681  * @ppos: the current position in the file
2682  *
2683  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2684  * values from/to the user buffer, treated as an ASCII string.
2685  * The values read are assumed to be in 1/1000 seconds, and
2686  * are converted into jiffies.
2687  *
2688  * Returns 0 on success.
2689  */
2690 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2691 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2692 {
2693 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2694 				do_proc_dointvec_ms_jiffies_conv, NULL);
2695 }
2696 
2697 static int proc_do_cad_pid(struct ctl_table *table, int write,
2698 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2699 {
2700 	struct pid *new_pid;
2701 	pid_t tmp;
2702 	int r;
2703 
2704 	tmp = pid_vnr(cad_pid);
2705 
2706 	r = __do_proc_dointvec(&tmp, table, write, buffer,
2707 			       lenp, ppos, NULL, NULL);
2708 	if (r || !write)
2709 		return r;
2710 
2711 	new_pid = find_get_pid(tmp);
2712 	if (!new_pid)
2713 		return -ESRCH;
2714 
2715 	put_pid(xchg(&cad_pid, new_pid));
2716 	return 0;
2717 }
2718 
2719 /**
2720  * proc_do_large_bitmap - read/write from/to a large bitmap
2721  * @table: the sysctl table
2722  * @write: %TRUE if this is a write to the sysctl file
2723  * @buffer: the user buffer
2724  * @lenp: the size of the user buffer
2725  * @ppos: file position
2726  *
2727  * The bitmap is stored at table->data and the bitmap length (in bits)
2728  * in table->maxlen.
2729  *
2730  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2731  * large bitmaps may be represented in a compact manner. Writing into
2732  * the file will clear the bitmap then update it with the given input.
2733  *
2734  * Returns 0 on success.
2735  */
2736 int proc_do_large_bitmap(struct ctl_table *table, int write,
2737 			 void __user *buffer, size_t *lenp, loff_t *ppos)
2738 {
2739 	int err = 0;
2740 	bool first = 1;
2741 	size_t left = *lenp;
2742 	unsigned long bitmap_len = table->maxlen;
2743 	unsigned long *bitmap = (unsigned long *) table->data;
2744 	unsigned long *tmp_bitmap = NULL;
2745 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2746 
2747 	if (!bitmap_len || !left || (*ppos && !write)) {
2748 		*lenp = 0;
2749 		return 0;
2750 	}
2751 
2752 	if (write) {
2753 		unsigned long page = 0;
2754 		char *kbuf;
2755 
2756 		if (left > PAGE_SIZE - 1)
2757 			left = PAGE_SIZE - 1;
2758 
2759 		page = __get_free_page(GFP_TEMPORARY);
2760 		kbuf = (char *) page;
2761 		if (!kbuf)
2762 			return -ENOMEM;
2763 		if (copy_from_user(kbuf, buffer, left)) {
2764 			free_page(page);
2765 			return -EFAULT;
2766                 }
2767 		kbuf[left] = 0;
2768 
2769 		tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2770 				     GFP_KERNEL);
2771 		if (!tmp_bitmap) {
2772 			free_page(page);
2773 			return -ENOMEM;
2774 		}
2775 		proc_skip_char(&kbuf, &left, '\n');
2776 		while (!err && left) {
2777 			unsigned long val_a, val_b;
2778 			bool neg;
2779 
2780 			err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2781 					     sizeof(tr_a), &c);
2782 			if (err)
2783 				break;
2784 			if (val_a >= bitmap_len || neg) {
2785 				err = -EINVAL;
2786 				break;
2787 			}
2788 
2789 			val_b = val_a;
2790 			if (left) {
2791 				kbuf++;
2792 				left--;
2793 			}
2794 
2795 			if (c == '-') {
2796 				err = proc_get_long(&kbuf, &left, &val_b,
2797 						     &neg, tr_b, sizeof(tr_b),
2798 						     &c);
2799 				if (err)
2800 					break;
2801 				if (val_b >= bitmap_len || neg ||
2802 				    val_a > val_b) {
2803 					err = -EINVAL;
2804 					break;
2805 				}
2806 				if (left) {
2807 					kbuf++;
2808 					left--;
2809 				}
2810 			}
2811 
2812 			while (val_a <= val_b)
2813 				set_bit(val_a++, tmp_bitmap);
2814 
2815 			first = 0;
2816 			proc_skip_char(&kbuf, &left, '\n');
2817 		}
2818 		free_page(page);
2819 	} else {
2820 		unsigned long bit_a, bit_b = 0;
2821 
2822 		while (left) {
2823 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2824 			if (bit_a >= bitmap_len)
2825 				break;
2826 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
2827 						   bit_a + 1) - 1;
2828 
2829 			if (!first) {
2830 				err = proc_put_char(&buffer, &left, ',');
2831 				if (err)
2832 					break;
2833 			}
2834 			err = proc_put_long(&buffer, &left, bit_a, false);
2835 			if (err)
2836 				break;
2837 			if (bit_a != bit_b) {
2838 				err = proc_put_char(&buffer, &left, '-');
2839 				if (err)
2840 					break;
2841 				err = proc_put_long(&buffer, &left, bit_b, false);
2842 				if (err)
2843 					break;
2844 			}
2845 
2846 			first = 0; bit_b++;
2847 		}
2848 		if (!err)
2849 			err = proc_put_char(&buffer, &left, '\n');
2850 	}
2851 
2852 	if (!err) {
2853 		if (write) {
2854 			if (*ppos)
2855 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2856 			else
2857 				memcpy(bitmap, tmp_bitmap,
2858 					BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
2859 		}
2860 		kfree(tmp_bitmap);
2861 		*lenp -= left;
2862 		*ppos += *lenp;
2863 		return 0;
2864 	} else {
2865 		kfree(tmp_bitmap);
2866 		return err;
2867 	}
2868 }
2869 
2870 #else /* CONFIG_PROC_FS */
2871 
2872 int proc_dostring(struct ctl_table *table, int write,
2873 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2874 {
2875 	return -ENOSYS;
2876 }
2877 
2878 int proc_dointvec(struct ctl_table *table, int write,
2879 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2880 {
2881 	return -ENOSYS;
2882 }
2883 
2884 int proc_dointvec_minmax(struct ctl_table *table, int write,
2885 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2886 {
2887 	return -ENOSYS;
2888 }
2889 
2890 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2891 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2892 {
2893 	return -ENOSYS;
2894 }
2895 
2896 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2897 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2898 {
2899 	return -ENOSYS;
2900 }
2901 
2902 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2903 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2904 {
2905 	return -ENOSYS;
2906 }
2907 
2908 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2909 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2910 {
2911 	return -ENOSYS;
2912 }
2913 
2914 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2915 				      void __user *buffer,
2916 				      size_t *lenp, loff_t *ppos)
2917 {
2918     return -ENOSYS;
2919 }
2920 
2921 
2922 #endif /* CONFIG_PROC_FS */
2923 
2924 /*
2925  * No sense putting this after each symbol definition, twice,
2926  * exception granted :-)
2927  */
2928 EXPORT_SYMBOL(proc_dointvec);
2929 EXPORT_SYMBOL(proc_dointvec_jiffies);
2930 EXPORT_SYMBOL(proc_dointvec_minmax);
2931 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2932 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2933 EXPORT_SYMBOL(proc_dostring);
2934 EXPORT_SYMBOL(proc_doulongvec_minmax);
2935 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2936 EXPORT_SYMBOL(register_sysctl_table);
2937 EXPORT_SYMBOL(register_sysctl_paths);
2938 EXPORT_SYMBOL(unregister_sysctl_table);
2939