xref: /linux/kernel/sysctl.c (revision 9ffc93f203c18a70623f21950f1dd473c9ec48cd)
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20 
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/signal.h>
27 #include <linux/printk.h>
28 #include <linux/proc_fs.h>
29 #include <linux/security.h>
30 #include <linux/ctype.h>
31 #include <linux/kmemcheck.h>
32 #include <linux/fs.h>
33 #include <linux/init.h>
34 #include <linux/kernel.h>
35 #include <linux/kobject.h>
36 #include <linux/net.h>
37 #include <linux/sysrq.h>
38 #include <linux/highuid.h>
39 #include <linux/writeback.h>
40 #include <linux/ratelimit.h>
41 #include <linux/compaction.h>
42 #include <linux/hugetlb.h>
43 #include <linux/initrd.h>
44 #include <linux/key.h>
45 #include <linux/times.h>
46 #include <linux/limits.h>
47 #include <linux/dcache.h>
48 #include <linux/dnotify.h>
49 #include <linux/syscalls.h>
50 #include <linux/vmstat.h>
51 #include <linux/nfs_fs.h>
52 #include <linux/acpi.h>
53 #include <linux/reboot.h>
54 #include <linux/ftrace.h>
55 #include <linux/perf_event.h>
56 #include <linux/kprobes.h>
57 #include <linux/pipe_fs_i.h>
58 #include <linux/oom.h>
59 #include <linux/kmod.h>
60 #include <linux/capability.h>
61 #include <linux/binfmts.h>
62 
63 #include <asm/uaccess.h>
64 #include <asm/processor.h>
65 
66 #ifdef CONFIG_X86
67 #include <asm/nmi.h>
68 #include <asm/stacktrace.h>
69 #include <asm/io.h>
70 #endif
71 #ifdef CONFIG_SPARC
72 #include <asm/setup.h>
73 #endif
74 #ifdef CONFIG_BSD_PROCESS_ACCT
75 #include <linux/acct.h>
76 #endif
77 #ifdef CONFIG_RT_MUTEXES
78 #include <linux/rtmutex.h>
79 #endif
80 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
81 #include <linux/lockdep.h>
82 #endif
83 #ifdef CONFIG_CHR_DEV_SG
84 #include <scsi/sg.h>
85 #endif
86 
87 #ifdef CONFIG_LOCKUP_DETECTOR
88 #include <linux/nmi.h>
89 #endif
90 
91 
92 #if defined(CONFIG_SYSCTL)
93 
94 /* External variables not in a header file. */
95 extern int sysctl_overcommit_memory;
96 extern int sysctl_overcommit_ratio;
97 extern int max_threads;
98 extern int core_uses_pid;
99 extern int suid_dumpable;
100 extern char core_pattern[];
101 extern unsigned int core_pipe_limit;
102 extern int pid_max;
103 extern int min_free_kbytes;
104 extern int pid_max_min, pid_max_max;
105 extern int sysctl_drop_caches;
106 extern int percpu_pagelist_fraction;
107 extern int compat_log;
108 extern int latencytop_enabled;
109 extern int sysctl_nr_open_min, sysctl_nr_open_max;
110 #ifndef CONFIG_MMU
111 extern int sysctl_nr_trim_pages;
112 #endif
113 #ifdef CONFIG_BLOCK
114 extern int blk_iopoll_enabled;
115 #endif
116 
117 /* Constants used for minimum and  maximum */
118 #ifdef CONFIG_LOCKUP_DETECTOR
119 static int sixty = 60;
120 static int neg_one = -1;
121 #endif
122 
123 static int zero;
124 static int __maybe_unused one = 1;
125 static int __maybe_unused two = 2;
126 static int __maybe_unused three = 3;
127 static unsigned long one_ul = 1;
128 static int one_hundred = 100;
129 #ifdef CONFIG_PRINTK
130 static int ten_thousand = 10000;
131 #endif
132 
133 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
134 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
135 
136 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
137 static int maxolduid = 65535;
138 static int minolduid;
139 static int min_percpu_pagelist_fract = 8;
140 
141 static int ngroups_max = NGROUPS_MAX;
142 static const int cap_last_cap = CAP_LAST_CAP;
143 
144 #ifdef CONFIG_INOTIFY_USER
145 #include <linux/inotify.h>
146 #endif
147 #ifdef CONFIG_SPARC
148 #endif
149 
150 #ifdef CONFIG_SPARC64
151 extern int sysctl_tsb_ratio;
152 #endif
153 
154 #ifdef __hppa__
155 extern int pwrsw_enabled;
156 extern int unaligned_enabled;
157 #endif
158 
159 #ifdef CONFIG_IA64
160 extern int no_unaligned_warning;
161 extern int unaligned_dump_stack;
162 #endif
163 
164 #ifdef CONFIG_PROC_SYSCTL
165 static int proc_do_cad_pid(struct ctl_table *table, int write,
166 		  void __user *buffer, size_t *lenp, loff_t *ppos);
167 static int proc_taint(struct ctl_table *table, int write,
168 			       void __user *buffer, size_t *lenp, loff_t *ppos);
169 #endif
170 
171 #ifdef CONFIG_PRINTK
172 static int proc_dmesg_restrict(struct ctl_table *table, int write,
173 				void __user *buffer, size_t *lenp, loff_t *ppos);
174 #endif
175 
176 #ifdef CONFIG_MAGIC_SYSRQ
177 /* Note: sysrq code uses it's own private copy */
178 static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
179 
180 static int sysrq_sysctl_handler(ctl_table *table, int write,
181 				void __user *buffer, size_t *lenp,
182 				loff_t *ppos)
183 {
184 	int error;
185 
186 	error = proc_dointvec(table, write, buffer, lenp, ppos);
187 	if (error)
188 		return error;
189 
190 	if (write)
191 		sysrq_toggle_support(__sysrq_enabled);
192 
193 	return 0;
194 }
195 
196 #endif
197 
198 static struct ctl_table root_table[];
199 static struct ctl_table_root sysctl_table_root;
200 static struct ctl_table_header root_table_header = {
201 	{{.count = 1,
202 	.ctl_table = root_table,
203 	.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),}},
204 	.root = &sysctl_table_root,
205 	.set = &sysctl_table_root.default_set,
206 };
207 static struct ctl_table_root sysctl_table_root = {
208 	.root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
209 	.default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
210 };
211 
212 static struct ctl_table kern_table[];
213 static struct ctl_table vm_table[];
214 static struct ctl_table fs_table[];
215 static struct ctl_table debug_table[];
216 static struct ctl_table dev_table[];
217 extern struct ctl_table random_table[];
218 #ifdef CONFIG_EPOLL
219 extern struct ctl_table epoll_table[];
220 #endif
221 
222 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
223 int sysctl_legacy_va_layout;
224 #endif
225 
226 /* The default sysctl tables: */
227 
228 static struct ctl_table root_table[] = {
229 	{
230 		.procname	= "kernel",
231 		.mode		= 0555,
232 		.child		= kern_table,
233 	},
234 	{
235 		.procname	= "vm",
236 		.mode		= 0555,
237 		.child		= vm_table,
238 	},
239 	{
240 		.procname	= "fs",
241 		.mode		= 0555,
242 		.child		= fs_table,
243 	},
244 	{
245 		.procname	= "debug",
246 		.mode		= 0555,
247 		.child		= debug_table,
248 	},
249 	{
250 		.procname	= "dev",
251 		.mode		= 0555,
252 		.child		= dev_table,
253 	},
254 	{ }
255 };
256 
257 #ifdef CONFIG_SCHED_DEBUG
258 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
259 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
260 static int min_wakeup_granularity_ns;			/* 0 usecs */
261 static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
262 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
263 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
264 #endif
265 
266 #ifdef CONFIG_COMPACTION
267 static int min_extfrag_threshold;
268 static int max_extfrag_threshold = 1000;
269 #endif
270 
271 static struct ctl_table kern_table[] = {
272 	{
273 		.procname	= "sched_child_runs_first",
274 		.data		= &sysctl_sched_child_runs_first,
275 		.maxlen		= sizeof(unsigned int),
276 		.mode		= 0644,
277 		.proc_handler	= proc_dointvec,
278 	},
279 #ifdef CONFIG_SCHED_DEBUG
280 	{
281 		.procname	= "sched_min_granularity_ns",
282 		.data		= &sysctl_sched_min_granularity,
283 		.maxlen		= sizeof(unsigned int),
284 		.mode		= 0644,
285 		.proc_handler	= sched_proc_update_handler,
286 		.extra1		= &min_sched_granularity_ns,
287 		.extra2		= &max_sched_granularity_ns,
288 	},
289 	{
290 		.procname	= "sched_latency_ns",
291 		.data		= &sysctl_sched_latency,
292 		.maxlen		= sizeof(unsigned int),
293 		.mode		= 0644,
294 		.proc_handler	= sched_proc_update_handler,
295 		.extra1		= &min_sched_granularity_ns,
296 		.extra2		= &max_sched_granularity_ns,
297 	},
298 	{
299 		.procname	= "sched_wakeup_granularity_ns",
300 		.data		= &sysctl_sched_wakeup_granularity,
301 		.maxlen		= sizeof(unsigned int),
302 		.mode		= 0644,
303 		.proc_handler	= sched_proc_update_handler,
304 		.extra1		= &min_wakeup_granularity_ns,
305 		.extra2		= &max_wakeup_granularity_ns,
306 	},
307 	{
308 		.procname	= "sched_tunable_scaling",
309 		.data		= &sysctl_sched_tunable_scaling,
310 		.maxlen		= sizeof(enum sched_tunable_scaling),
311 		.mode		= 0644,
312 		.proc_handler	= sched_proc_update_handler,
313 		.extra1		= &min_sched_tunable_scaling,
314 		.extra2		= &max_sched_tunable_scaling,
315 	},
316 	{
317 		.procname	= "sched_migration_cost",
318 		.data		= &sysctl_sched_migration_cost,
319 		.maxlen		= sizeof(unsigned int),
320 		.mode		= 0644,
321 		.proc_handler	= proc_dointvec,
322 	},
323 	{
324 		.procname	= "sched_nr_migrate",
325 		.data		= &sysctl_sched_nr_migrate,
326 		.maxlen		= sizeof(unsigned int),
327 		.mode		= 0644,
328 		.proc_handler	= proc_dointvec,
329 	},
330 	{
331 		.procname	= "sched_time_avg",
332 		.data		= &sysctl_sched_time_avg,
333 		.maxlen		= sizeof(unsigned int),
334 		.mode		= 0644,
335 		.proc_handler	= proc_dointvec,
336 	},
337 	{
338 		.procname	= "sched_shares_window",
339 		.data		= &sysctl_sched_shares_window,
340 		.maxlen		= sizeof(unsigned int),
341 		.mode		= 0644,
342 		.proc_handler	= proc_dointvec,
343 	},
344 	{
345 		.procname	= "timer_migration",
346 		.data		= &sysctl_timer_migration,
347 		.maxlen		= sizeof(unsigned int),
348 		.mode		= 0644,
349 		.proc_handler	= proc_dointvec_minmax,
350 		.extra1		= &zero,
351 		.extra2		= &one,
352 	},
353 #endif
354 	{
355 		.procname	= "sched_rt_period_us",
356 		.data		= &sysctl_sched_rt_period,
357 		.maxlen		= sizeof(unsigned int),
358 		.mode		= 0644,
359 		.proc_handler	= sched_rt_handler,
360 	},
361 	{
362 		.procname	= "sched_rt_runtime_us",
363 		.data		= &sysctl_sched_rt_runtime,
364 		.maxlen		= sizeof(int),
365 		.mode		= 0644,
366 		.proc_handler	= sched_rt_handler,
367 	},
368 #ifdef CONFIG_SCHED_AUTOGROUP
369 	{
370 		.procname	= "sched_autogroup_enabled",
371 		.data		= &sysctl_sched_autogroup_enabled,
372 		.maxlen		= sizeof(unsigned int),
373 		.mode		= 0644,
374 		.proc_handler	= proc_dointvec_minmax,
375 		.extra1		= &zero,
376 		.extra2		= &one,
377 	},
378 #endif
379 #ifdef CONFIG_CFS_BANDWIDTH
380 	{
381 		.procname	= "sched_cfs_bandwidth_slice_us",
382 		.data		= &sysctl_sched_cfs_bandwidth_slice,
383 		.maxlen		= sizeof(unsigned int),
384 		.mode		= 0644,
385 		.proc_handler	= proc_dointvec_minmax,
386 		.extra1		= &one,
387 	},
388 #endif
389 #ifdef CONFIG_PROVE_LOCKING
390 	{
391 		.procname	= "prove_locking",
392 		.data		= &prove_locking,
393 		.maxlen		= sizeof(int),
394 		.mode		= 0644,
395 		.proc_handler	= proc_dointvec,
396 	},
397 #endif
398 #ifdef CONFIG_LOCK_STAT
399 	{
400 		.procname	= "lock_stat",
401 		.data		= &lock_stat,
402 		.maxlen		= sizeof(int),
403 		.mode		= 0644,
404 		.proc_handler	= proc_dointvec,
405 	},
406 #endif
407 	{
408 		.procname	= "panic",
409 		.data		= &panic_timeout,
410 		.maxlen		= sizeof(int),
411 		.mode		= 0644,
412 		.proc_handler	= proc_dointvec,
413 	},
414 	{
415 		.procname	= "core_uses_pid",
416 		.data		= &core_uses_pid,
417 		.maxlen		= sizeof(int),
418 		.mode		= 0644,
419 		.proc_handler	= proc_dointvec,
420 	},
421 	{
422 		.procname	= "core_pattern",
423 		.data		= core_pattern,
424 		.maxlen		= CORENAME_MAX_SIZE,
425 		.mode		= 0644,
426 		.proc_handler	= proc_dostring,
427 	},
428 	{
429 		.procname	= "core_pipe_limit",
430 		.data		= &core_pipe_limit,
431 		.maxlen		= sizeof(unsigned int),
432 		.mode		= 0644,
433 		.proc_handler	= proc_dointvec,
434 	},
435 #ifdef CONFIG_PROC_SYSCTL
436 	{
437 		.procname	= "tainted",
438 		.maxlen 	= sizeof(long),
439 		.mode		= 0644,
440 		.proc_handler	= proc_taint,
441 	},
442 #endif
443 #ifdef CONFIG_LATENCYTOP
444 	{
445 		.procname	= "latencytop",
446 		.data		= &latencytop_enabled,
447 		.maxlen		= sizeof(int),
448 		.mode		= 0644,
449 		.proc_handler	= proc_dointvec,
450 	},
451 #endif
452 #ifdef CONFIG_BLK_DEV_INITRD
453 	{
454 		.procname	= "real-root-dev",
455 		.data		= &real_root_dev,
456 		.maxlen		= sizeof(int),
457 		.mode		= 0644,
458 		.proc_handler	= proc_dointvec,
459 	},
460 #endif
461 	{
462 		.procname	= "print-fatal-signals",
463 		.data		= &print_fatal_signals,
464 		.maxlen		= sizeof(int),
465 		.mode		= 0644,
466 		.proc_handler	= proc_dointvec,
467 	},
468 #ifdef CONFIG_SPARC
469 	{
470 		.procname	= "reboot-cmd",
471 		.data		= reboot_command,
472 		.maxlen		= 256,
473 		.mode		= 0644,
474 		.proc_handler	= proc_dostring,
475 	},
476 	{
477 		.procname	= "stop-a",
478 		.data		= &stop_a_enabled,
479 		.maxlen		= sizeof (int),
480 		.mode		= 0644,
481 		.proc_handler	= proc_dointvec,
482 	},
483 	{
484 		.procname	= "scons-poweroff",
485 		.data		= &scons_pwroff,
486 		.maxlen		= sizeof (int),
487 		.mode		= 0644,
488 		.proc_handler	= proc_dointvec,
489 	},
490 #endif
491 #ifdef CONFIG_SPARC64
492 	{
493 		.procname	= "tsb-ratio",
494 		.data		= &sysctl_tsb_ratio,
495 		.maxlen		= sizeof (int),
496 		.mode		= 0644,
497 		.proc_handler	= proc_dointvec,
498 	},
499 #endif
500 #ifdef __hppa__
501 	{
502 		.procname	= "soft-power",
503 		.data		= &pwrsw_enabled,
504 		.maxlen		= sizeof (int),
505 	 	.mode		= 0644,
506 		.proc_handler	= proc_dointvec,
507 	},
508 	{
509 		.procname	= "unaligned-trap",
510 		.data		= &unaligned_enabled,
511 		.maxlen		= sizeof (int),
512 		.mode		= 0644,
513 		.proc_handler	= proc_dointvec,
514 	},
515 #endif
516 	{
517 		.procname	= "ctrl-alt-del",
518 		.data		= &C_A_D,
519 		.maxlen		= sizeof(int),
520 		.mode		= 0644,
521 		.proc_handler	= proc_dointvec,
522 	},
523 #ifdef CONFIG_FUNCTION_TRACER
524 	{
525 		.procname	= "ftrace_enabled",
526 		.data		= &ftrace_enabled,
527 		.maxlen		= sizeof(int),
528 		.mode		= 0644,
529 		.proc_handler	= ftrace_enable_sysctl,
530 	},
531 #endif
532 #ifdef CONFIG_STACK_TRACER
533 	{
534 		.procname	= "stack_tracer_enabled",
535 		.data		= &stack_tracer_enabled,
536 		.maxlen		= sizeof(int),
537 		.mode		= 0644,
538 		.proc_handler	= stack_trace_sysctl,
539 	},
540 #endif
541 #ifdef CONFIG_TRACING
542 	{
543 		.procname	= "ftrace_dump_on_oops",
544 		.data		= &ftrace_dump_on_oops,
545 		.maxlen		= sizeof(int),
546 		.mode		= 0644,
547 		.proc_handler	= proc_dointvec,
548 	},
549 #endif
550 #ifdef CONFIG_MODULES
551 	{
552 		.procname	= "modprobe",
553 		.data		= &modprobe_path,
554 		.maxlen		= KMOD_PATH_LEN,
555 		.mode		= 0644,
556 		.proc_handler	= proc_dostring,
557 	},
558 	{
559 		.procname	= "modules_disabled",
560 		.data		= &modules_disabled,
561 		.maxlen		= sizeof(int),
562 		.mode		= 0644,
563 		/* only handle a transition from default "0" to "1" */
564 		.proc_handler	= proc_dointvec_minmax,
565 		.extra1		= &one,
566 		.extra2		= &one,
567 	},
568 #endif
569 #ifdef CONFIG_HOTPLUG
570 	{
571 		.procname	= "hotplug",
572 		.data		= &uevent_helper,
573 		.maxlen		= UEVENT_HELPER_PATH_LEN,
574 		.mode		= 0644,
575 		.proc_handler	= proc_dostring,
576 	},
577 #endif
578 #ifdef CONFIG_CHR_DEV_SG
579 	{
580 		.procname	= "sg-big-buff",
581 		.data		= &sg_big_buff,
582 		.maxlen		= sizeof (int),
583 		.mode		= 0444,
584 		.proc_handler	= proc_dointvec,
585 	},
586 #endif
587 #ifdef CONFIG_BSD_PROCESS_ACCT
588 	{
589 		.procname	= "acct",
590 		.data		= &acct_parm,
591 		.maxlen		= 3*sizeof(int),
592 		.mode		= 0644,
593 		.proc_handler	= proc_dointvec,
594 	},
595 #endif
596 #ifdef CONFIG_MAGIC_SYSRQ
597 	{
598 		.procname	= "sysrq",
599 		.data		= &__sysrq_enabled,
600 		.maxlen		= sizeof (int),
601 		.mode		= 0644,
602 		.proc_handler	= sysrq_sysctl_handler,
603 	},
604 #endif
605 #ifdef CONFIG_PROC_SYSCTL
606 	{
607 		.procname	= "cad_pid",
608 		.data		= NULL,
609 		.maxlen		= sizeof (int),
610 		.mode		= 0600,
611 		.proc_handler	= proc_do_cad_pid,
612 	},
613 #endif
614 	{
615 		.procname	= "threads-max",
616 		.data		= &max_threads,
617 		.maxlen		= sizeof(int),
618 		.mode		= 0644,
619 		.proc_handler	= proc_dointvec,
620 	},
621 	{
622 		.procname	= "random",
623 		.mode		= 0555,
624 		.child		= random_table,
625 	},
626 	{
627 		.procname	= "usermodehelper",
628 		.mode		= 0555,
629 		.child		= usermodehelper_table,
630 	},
631 	{
632 		.procname	= "overflowuid",
633 		.data		= &overflowuid,
634 		.maxlen		= sizeof(int),
635 		.mode		= 0644,
636 		.proc_handler	= proc_dointvec_minmax,
637 		.extra1		= &minolduid,
638 		.extra2		= &maxolduid,
639 	},
640 	{
641 		.procname	= "overflowgid",
642 		.data		= &overflowgid,
643 		.maxlen		= sizeof(int),
644 		.mode		= 0644,
645 		.proc_handler	= proc_dointvec_minmax,
646 		.extra1		= &minolduid,
647 		.extra2		= &maxolduid,
648 	},
649 #ifdef CONFIG_S390
650 #ifdef CONFIG_MATHEMU
651 	{
652 		.procname	= "ieee_emulation_warnings",
653 		.data		= &sysctl_ieee_emulation_warnings,
654 		.maxlen		= sizeof(int),
655 		.mode		= 0644,
656 		.proc_handler	= proc_dointvec,
657 	},
658 #endif
659 	{
660 		.procname	= "userprocess_debug",
661 		.data		= &show_unhandled_signals,
662 		.maxlen		= sizeof(int),
663 		.mode		= 0644,
664 		.proc_handler	= proc_dointvec,
665 	},
666 #endif
667 	{
668 		.procname	= "pid_max",
669 		.data		= &pid_max,
670 		.maxlen		= sizeof (int),
671 		.mode		= 0644,
672 		.proc_handler	= proc_dointvec_minmax,
673 		.extra1		= &pid_max_min,
674 		.extra2		= &pid_max_max,
675 	},
676 	{
677 		.procname	= "panic_on_oops",
678 		.data		= &panic_on_oops,
679 		.maxlen		= sizeof(int),
680 		.mode		= 0644,
681 		.proc_handler	= proc_dointvec,
682 	},
683 #if defined CONFIG_PRINTK
684 	{
685 		.procname	= "printk",
686 		.data		= &console_loglevel,
687 		.maxlen		= 4*sizeof(int),
688 		.mode		= 0644,
689 		.proc_handler	= proc_dointvec,
690 	},
691 	{
692 		.procname	= "printk_ratelimit",
693 		.data		= &printk_ratelimit_state.interval,
694 		.maxlen		= sizeof(int),
695 		.mode		= 0644,
696 		.proc_handler	= proc_dointvec_jiffies,
697 	},
698 	{
699 		.procname	= "printk_ratelimit_burst",
700 		.data		= &printk_ratelimit_state.burst,
701 		.maxlen		= sizeof(int),
702 		.mode		= 0644,
703 		.proc_handler	= proc_dointvec,
704 	},
705 	{
706 		.procname	= "printk_delay",
707 		.data		= &printk_delay_msec,
708 		.maxlen		= sizeof(int),
709 		.mode		= 0644,
710 		.proc_handler	= proc_dointvec_minmax,
711 		.extra1		= &zero,
712 		.extra2		= &ten_thousand,
713 	},
714 	{
715 		.procname	= "dmesg_restrict",
716 		.data		= &dmesg_restrict,
717 		.maxlen		= sizeof(int),
718 		.mode		= 0644,
719 		.proc_handler	= proc_dointvec_minmax,
720 		.extra1		= &zero,
721 		.extra2		= &one,
722 	},
723 	{
724 		.procname	= "kptr_restrict",
725 		.data		= &kptr_restrict,
726 		.maxlen		= sizeof(int),
727 		.mode		= 0644,
728 		.proc_handler	= proc_dmesg_restrict,
729 		.extra1		= &zero,
730 		.extra2		= &two,
731 	},
732 #endif
733 	{
734 		.procname	= "ngroups_max",
735 		.data		= &ngroups_max,
736 		.maxlen		= sizeof (int),
737 		.mode		= 0444,
738 		.proc_handler	= proc_dointvec,
739 	},
740 	{
741 		.procname	= "cap_last_cap",
742 		.data		= (void *)&cap_last_cap,
743 		.maxlen		= sizeof(int),
744 		.mode		= 0444,
745 		.proc_handler	= proc_dointvec,
746 	},
747 #if defined(CONFIG_LOCKUP_DETECTOR)
748 	{
749 		.procname       = "watchdog",
750 		.data           = &watchdog_enabled,
751 		.maxlen         = sizeof (int),
752 		.mode           = 0644,
753 		.proc_handler   = proc_dowatchdog,
754 		.extra1		= &zero,
755 		.extra2		= &one,
756 	},
757 	{
758 		.procname	= "watchdog_thresh",
759 		.data		= &watchdog_thresh,
760 		.maxlen		= sizeof(int),
761 		.mode		= 0644,
762 		.proc_handler	= proc_dowatchdog,
763 		.extra1		= &neg_one,
764 		.extra2		= &sixty,
765 	},
766 	{
767 		.procname	= "softlockup_panic",
768 		.data		= &softlockup_panic,
769 		.maxlen		= sizeof(int),
770 		.mode		= 0644,
771 		.proc_handler	= proc_dointvec_minmax,
772 		.extra1		= &zero,
773 		.extra2		= &one,
774 	},
775 	{
776 		.procname       = "nmi_watchdog",
777 		.data           = &watchdog_enabled,
778 		.maxlen         = sizeof (int),
779 		.mode           = 0644,
780 		.proc_handler   = proc_dowatchdog,
781 		.extra1		= &zero,
782 		.extra2		= &one,
783 	},
784 #endif
785 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
786 	{
787 		.procname       = "unknown_nmi_panic",
788 		.data           = &unknown_nmi_panic,
789 		.maxlen         = sizeof (int),
790 		.mode           = 0644,
791 		.proc_handler   = proc_dointvec,
792 	},
793 #endif
794 #if defined(CONFIG_X86)
795 	{
796 		.procname	= "panic_on_unrecovered_nmi",
797 		.data		= &panic_on_unrecovered_nmi,
798 		.maxlen		= sizeof(int),
799 		.mode		= 0644,
800 		.proc_handler	= proc_dointvec,
801 	},
802 	{
803 		.procname	= "panic_on_io_nmi",
804 		.data		= &panic_on_io_nmi,
805 		.maxlen		= sizeof(int),
806 		.mode		= 0644,
807 		.proc_handler	= proc_dointvec,
808 	},
809 #ifdef CONFIG_DEBUG_STACKOVERFLOW
810 	{
811 		.procname	= "panic_on_stackoverflow",
812 		.data		= &sysctl_panic_on_stackoverflow,
813 		.maxlen		= sizeof(int),
814 		.mode		= 0644,
815 		.proc_handler	= proc_dointvec,
816 	},
817 #endif
818 	{
819 		.procname	= "bootloader_type",
820 		.data		= &bootloader_type,
821 		.maxlen		= sizeof (int),
822 		.mode		= 0444,
823 		.proc_handler	= proc_dointvec,
824 	},
825 	{
826 		.procname	= "bootloader_version",
827 		.data		= &bootloader_version,
828 		.maxlen		= sizeof (int),
829 		.mode		= 0444,
830 		.proc_handler	= proc_dointvec,
831 	},
832 	{
833 		.procname	= "kstack_depth_to_print",
834 		.data		= &kstack_depth_to_print,
835 		.maxlen		= sizeof(int),
836 		.mode		= 0644,
837 		.proc_handler	= proc_dointvec,
838 	},
839 	{
840 		.procname	= "io_delay_type",
841 		.data		= &io_delay_type,
842 		.maxlen		= sizeof(int),
843 		.mode		= 0644,
844 		.proc_handler	= proc_dointvec,
845 	},
846 #endif
847 #if defined(CONFIG_MMU)
848 	{
849 		.procname	= "randomize_va_space",
850 		.data		= &randomize_va_space,
851 		.maxlen		= sizeof(int),
852 		.mode		= 0644,
853 		.proc_handler	= proc_dointvec,
854 	},
855 #endif
856 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
857 	{
858 		.procname	= "spin_retry",
859 		.data		= &spin_retry,
860 		.maxlen		= sizeof (int),
861 		.mode		= 0644,
862 		.proc_handler	= proc_dointvec,
863 	},
864 #endif
865 #if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
866 	{
867 		.procname	= "acpi_video_flags",
868 		.data		= &acpi_realmode_flags,
869 		.maxlen		= sizeof (unsigned long),
870 		.mode		= 0644,
871 		.proc_handler	= proc_doulongvec_minmax,
872 	},
873 #endif
874 #ifdef CONFIG_IA64
875 	{
876 		.procname	= "ignore-unaligned-usertrap",
877 		.data		= &no_unaligned_warning,
878 		.maxlen		= sizeof (int),
879 	 	.mode		= 0644,
880 		.proc_handler	= proc_dointvec,
881 	},
882 	{
883 		.procname	= "unaligned-dump-stack",
884 		.data		= &unaligned_dump_stack,
885 		.maxlen		= sizeof (int),
886 		.mode		= 0644,
887 		.proc_handler	= proc_dointvec,
888 	},
889 #endif
890 #ifdef CONFIG_DETECT_HUNG_TASK
891 	{
892 		.procname	= "hung_task_panic",
893 		.data		= &sysctl_hung_task_panic,
894 		.maxlen		= sizeof(int),
895 		.mode		= 0644,
896 		.proc_handler	= proc_dointvec_minmax,
897 		.extra1		= &zero,
898 		.extra2		= &one,
899 	},
900 	{
901 		.procname	= "hung_task_check_count",
902 		.data		= &sysctl_hung_task_check_count,
903 		.maxlen		= sizeof(unsigned long),
904 		.mode		= 0644,
905 		.proc_handler	= proc_doulongvec_minmax,
906 	},
907 	{
908 		.procname	= "hung_task_timeout_secs",
909 		.data		= &sysctl_hung_task_timeout_secs,
910 		.maxlen		= sizeof(unsigned long),
911 		.mode		= 0644,
912 		.proc_handler	= proc_dohung_task_timeout_secs,
913 	},
914 	{
915 		.procname	= "hung_task_warnings",
916 		.data		= &sysctl_hung_task_warnings,
917 		.maxlen		= sizeof(unsigned long),
918 		.mode		= 0644,
919 		.proc_handler	= proc_doulongvec_minmax,
920 	},
921 #endif
922 #ifdef CONFIG_COMPAT
923 	{
924 		.procname	= "compat-log",
925 		.data		= &compat_log,
926 		.maxlen		= sizeof (int),
927 	 	.mode		= 0644,
928 		.proc_handler	= proc_dointvec,
929 	},
930 #endif
931 #ifdef CONFIG_RT_MUTEXES
932 	{
933 		.procname	= "max_lock_depth",
934 		.data		= &max_lock_depth,
935 		.maxlen		= sizeof(int),
936 		.mode		= 0644,
937 		.proc_handler	= proc_dointvec,
938 	},
939 #endif
940 	{
941 		.procname	= "poweroff_cmd",
942 		.data		= &poweroff_cmd,
943 		.maxlen		= POWEROFF_CMD_PATH_LEN,
944 		.mode		= 0644,
945 		.proc_handler	= proc_dostring,
946 	},
947 #ifdef CONFIG_KEYS
948 	{
949 		.procname	= "keys",
950 		.mode		= 0555,
951 		.child		= key_sysctls,
952 	},
953 #endif
954 #ifdef CONFIG_RCU_TORTURE_TEST
955 	{
956 		.procname       = "rcutorture_runnable",
957 		.data           = &rcutorture_runnable,
958 		.maxlen         = sizeof(int),
959 		.mode           = 0644,
960 		.proc_handler	= proc_dointvec,
961 	},
962 #endif
963 #ifdef CONFIG_PERF_EVENTS
964 	/*
965 	 * User-space scripts rely on the existence of this file
966 	 * as a feature check for perf_events being enabled.
967 	 *
968 	 * So it's an ABI, do not remove!
969 	 */
970 	{
971 		.procname	= "perf_event_paranoid",
972 		.data		= &sysctl_perf_event_paranoid,
973 		.maxlen		= sizeof(sysctl_perf_event_paranoid),
974 		.mode		= 0644,
975 		.proc_handler	= proc_dointvec,
976 	},
977 	{
978 		.procname	= "perf_event_mlock_kb",
979 		.data		= &sysctl_perf_event_mlock,
980 		.maxlen		= sizeof(sysctl_perf_event_mlock),
981 		.mode		= 0644,
982 		.proc_handler	= proc_dointvec,
983 	},
984 	{
985 		.procname	= "perf_event_max_sample_rate",
986 		.data		= &sysctl_perf_event_sample_rate,
987 		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
988 		.mode		= 0644,
989 		.proc_handler	= perf_proc_update_handler,
990 	},
991 #endif
992 #ifdef CONFIG_KMEMCHECK
993 	{
994 		.procname	= "kmemcheck",
995 		.data		= &kmemcheck_enabled,
996 		.maxlen		= sizeof(int),
997 		.mode		= 0644,
998 		.proc_handler	= proc_dointvec,
999 	},
1000 #endif
1001 #ifdef CONFIG_BLOCK
1002 	{
1003 		.procname	= "blk_iopoll",
1004 		.data		= &blk_iopoll_enabled,
1005 		.maxlen		= sizeof(int),
1006 		.mode		= 0644,
1007 		.proc_handler	= proc_dointvec,
1008 	},
1009 #endif
1010 	{ }
1011 };
1012 
1013 static struct ctl_table vm_table[] = {
1014 	{
1015 		.procname	= "overcommit_memory",
1016 		.data		= &sysctl_overcommit_memory,
1017 		.maxlen		= sizeof(sysctl_overcommit_memory),
1018 		.mode		= 0644,
1019 		.proc_handler	= proc_dointvec_minmax,
1020 		.extra1		= &zero,
1021 		.extra2		= &two,
1022 	},
1023 	{
1024 		.procname	= "panic_on_oom",
1025 		.data		= &sysctl_panic_on_oom,
1026 		.maxlen		= sizeof(sysctl_panic_on_oom),
1027 		.mode		= 0644,
1028 		.proc_handler	= proc_dointvec_minmax,
1029 		.extra1		= &zero,
1030 		.extra2		= &two,
1031 	},
1032 	{
1033 		.procname	= "oom_kill_allocating_task",
1034 		.data		= &sysctl_oom_kill_allocating_task,
1035 		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
1036 		.mode		= 0644,
1037 		.proc_handler	= proc_dointvec,
1038 	},
1039 	{
1040 		.procname	= "oom_dump_tasks",
1041 		.data		= &sysctl_oom_dump_tasks,
1042 		.maxlen		= sizeof(sysctl_oom_dump_tasks),
1043 		.mode		= 0644,
1044 		.proc_handler	= proc_dointvec,
1045 	},
1046 	{
1047 		.procname	= "overcommit_ratio",
1048 		.data		= &sysctl_overcommit_ratio,
1049 		.maxlen		= sizeof(sysctl_overcommit_ratio),
1050 		.mode		= 0644,
1051 		.proc_handler	= proc_dointvec,
1052 	},
1053 	{
1054 		.procname	= "page-cluster",
1055 		.data		= &page_cluster,
1056 		.maxlen		= sizeof(int),
1057 		.mode		= 0644,
1058 		.proc_handler	= proc_dointvec_minmax,
1059 		.extra1		= &zero,
1060 	},
1061 	{
1062 		.procname	= "dirty_background_ratio",
1063 		.data		= &dirty_background_ratio,
1064 		.maxlen		= sizeof(dirty_background_ratio),
1065 		.mode		= 0644,
1066 		.proc_handler	= dirty_background_ratio_handler,
1067 		.extra1		= &zero,
1068 		.extra2		= &one_hundred,
1069 	},
1070 	{
1071 		.procname	= "dirty_background_bytes",
1072 		.data		= &dirty_background_bytes,
1073 		.maxlen		= sizeof(dirty_background_bytes),
1074 		.mode		= 0644,
1075 		.proc_handler	= dirty_background_bytes_handler,
1076 		.extra1		= &one_ul,
1077 	},
1078 	{
1079 		.procname	= "dirty_ratio",
1080 		.data		= &vm_dirty_ratio,
1081 		.maxlen		= sizeof(vm_dirty_ratio),
1082 		.mode		= 0644,
1083 		.proc_handler	= dirty_ratio_handler,
1084 		.extra1		= &zero,
1085 		.extra2		= &one_hundred,
1086 	},
1087 	{
1088 		.procname	= "dirty_bytes",
1089 		.data		= &vm_dirty_bytes,
1090 		.maxlen		= sizeof(vm_dirty_bytes),
1091 		.mode		= 0644,
1092 		.proc_handler	= dirty_bytes_handler,
1093 		.extra1		= &dirty_bytes_min,
1094 	},
1095 	{
1096 		.procname	= "dirty_writeback_centisecs",
1097 		.data		= &dirty_writeback_interval,
1098 		.maxlen		= sizeof(dirty_writeback_interval),
1099 		.mode		= 0644,
1100 		.proc_handler	= dirty_writeback_centisecs_handler,
1101 	},
1102 	{
1103 		.procname	= "dirty_expire_centisecs",
1104 		.data		= &dirty_expire_interval,
1105 		.maxlen		= sizeof(dirty_expire_interval),
1106 		.mode		= 0644,
1107 		.proc_handler	= proc_dointvec_minmax,
1108 		.extra1		= &zero,
1109 	},
1110 	{
1111 		.procname	= "nr_pdflush_threads",
1112 		.data		= &nr_pdflush_threads,
1113 		.maxlen		= sizeof nr_pdflush_threads,
1114 		.mode		= 0444 /* read-only*/,
1115 		.proc_handler	= proc_dointvec,
1116 	},
1117 	{
1118 		.procname	= "swappiness",
1119 		.data		= &vm_swappiness,
1120 		.maxlen		= sizeof(vm_swappiness),
1121 		.mode		= 0644,
1122 		.proc_handler	= proc_dointvec_minmax,
1123 		.extra1		= &zero,
1124 		.extra2		= &one_hundred,
1125 	},
1126 #ifdef CONFIG_HUGETLB_PAGE
1127 	{
1128 		.procname	= "nr_hugepages",
1129 		.data		= NULL,
1130 		.maxlen		= sizeof(unsigned long),
1131 		.mode		= 0644,
1132 		.proc_handler	= hugetlb_sysctl_handler,
1133 		.extra1		= (void *)&hugetlb_zero,
1134 		.extra2		= (void *)&hugetlb_infinity,
1135 	},
1136 #ifdef CONFIG_NUMA
1137 	{
1138 		.procname       = "nr_hugepages_mempolicy",
1139 		.data           = NULL,
1140 		.maxlen         = sizeof(unsigned long),
1141 		.mode           = 0644,
1142 		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1143 		.extra1		= (void *)&hugetlb_zero,
1144 		.extra2		= (void *)&hugetlb_infinity,
1145 	},
1146 #endif
1147 	 {
1148 		.procname	= "hugetlb_shm_group",
1149 		.data		= &sysctl_hugetlb_shm_group,
1150 		.maxlen		= sizeof(gid_t),
1151 		.mode		= 0644,
1152 		.proc_handler	= proc_dointvec,
1153 	 },
1154 	 {
1155 		.procname	= "hugepages_treat_as_movable",
1156 		.data		= &hugepages_treat_as_movable,
1157 		.maxlen		= sizeof(int),
1158 		.mode		= 0644,
1159 		.proc_handler	= hugetlb_treat_movable_handler,
1160 	},
1161 	{
1162 		.procname	= "nr_overcommit_hugepages",
1163 		.data		= NULL,
1164 		.maxlen		= sizeof(unsigned long),
1165 		.mode		= 0644,
1166 		.proc_handler	= hugetlb_overcommit_handler,
1167 		.extra1		= (void *)&hugetlb_zero,
1168 		.extra2		= (void *)&hugetlb_infinity,
1169 	},
1170 #endif
1171 	{
1172 		.procname	= "lowmem_reserve_ratio",
1173 		.data		= &sysctl_lowmem_reserve_ratio,
1174 		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
1175 		.mode		= 0644,
1176 		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
1177 	},
1178 	{
1179 		.procname	= "drop_caches",
1180 		.data		= &sysctl_drop_caches,
1181 		.maxlen		= sizeof(int),
1182 		.mode		= 0644,
1183 		.proc_handler	= drop_caches_sysctl_handler,
1184 		.extra1		= &one,
1185 		.extra2		= &three,
1186 	},
1187 #ifdef CONFIG_COMPACTION
1188 	{
1189 		.procname	= "compact_memory",
1190 		.data		= &sysctl_compact_memory,
1191 		.maxlen		= sizeof(int),
1192 		.mode		= 0200,
1193 		.proc_handler	= sysctl_compaction_handler,
1194 	},
1195 	{
1196 		.procname	= "extfrag_threshold",
1197 		.data		= &sysctl_extfrag_threshold,
1198 		.maxlen		= sizeof(int),
1199 		.mode		= 0644,
1200 		.proc_handler	= sysctl_extfrag_handler,
1201 		.extra1		= &min_extfrag_threshold,
1202 		.extra2		= &max_extfrag_threshold,
1203 	},
1204 
1205 #endif /* CONFIG_COMPACTION */
1206 	{
1207 		.procname	= "min_free_kbytes",
1208 		.data		= &min_free_kbytes,
1209 		.maxlen		= sizeof(min_free_kbytes),
1210 		.mode		= 0644,
1211 		.proc_handler	= min_free_kbytes_sysctl_handler,
1212 		.extra1		= &zero,
1213 	},
1214 	{
1215 		.procname	= "percpu_pagelist_fraction",
1216 		.data		= &percpu_pagelist_fraction,
1217 		.maxlen		= sizeof(percpu_pagelist_fraction),
1218 		.mode		= 0644,
1219 		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
1220 		.extra1		= &min_percpu_pagelist_fract,
1221 	},
1222 #ifdef CONFIG_MMU
1223 	{
1224 		.procname	= "max_map_count",
1225 		.data		= &sysctl_max_map_count,
1226 		.maxlen		= sizeof(sysctl_max_map_count),
1227 		.mode		= 0644,
1228 		.proc_handler	= proc_dointvec_minmax,
1229 		.extra1		= &zero,
1230 	},
1231 #else
1232 	{
1233 		.procname	= "nr_trim_pages",
1234 		.data		= &sysctl_nr_trim_pages,
1235 		.maxlen		= sizeof(sysctl_nr_trim_pages),
1236 		.mode		= 0644,
1237 		.proc_handler	= proc_dointvec_minmax,
1238 		.extra1		= &zero,
1239 	},
1240 #endif
1241 	{
1242 		.procname	= "laptop_mode",
1243 		.data		= &laptop_mode,
1244 		.maxlen		= sizeof(laptop_mode),
1245 		.mode		= 0644,
1246 		.proc_handler	= proc_dointvec_jiffies,
1247 	},
1248 	{
1249 		.procname	= "block_dump",
1250 		.data		= &block_dump,
1251 		.maxlen		= sizeof(block_dump),
1252 		.mode		= 0644,
1253 		.proc_handler	= proc_dointvec,
1254 		.extra1		= &zero,
1255 	},
1256 	{
1257 		.procname	= "vfs_cache_pressure",
1258 		.data		= &sysctl_vfs_cache_pressure,
1259 		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
1260 		.mode		= 0644,
1261 		.proc_handler	= proc_dointvec,
1262 		.extra1		= &zero,
1263 	},
1264 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1265 	{
1266 		.procname	= "legacy_va_layout",
1267 		.data		= &sysctl_legacy_va_layout,
1268 		.maxlen		= sizeof(sysctl_legacy_va_layout),
1269 		.mode		= 0644,
1270 		.proc_handler	= proc_dointvec,
1271 		.extra1		= &zero,
1272 	},
1273 #endif
1274 #ifdef CONFIG_NUMA
1275 	{
1276 		.procname	= "zone_reclaim_mode",
1277 		.data		= &zone_reclaim_mode,
1278 		.maxlen		= sizeof(zone_reclaim_mode),
1279 		.mode		= 0644,
1280 		.proc_handler	= proc_dointvec,
1281 		.extra1		= &zero,
1282 	},
1283 	{
1284 		.procname	= "min_unmapped_ratio",
1285 		.data		= &sysctl_min_unmapped_ratio,
1286 		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
1287 		.mode		= 0644,
1288 		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
1289 		.extra1		= &zero,
1290 		.extra2		= &one_hundred,
1291 	},
1292 	{
1293 		.procname	= "min_slab_ratio",
1294 		.data		= &sysctl_min_slab_ratio,
1295 		.maxlen		= sizeof(sysctl_min_slab_ratio),
1296 		.mode		= 0644,
1297 		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
1298 		.extra1		= &zero,
1299 		.extra2		= &one_hundred,
1300 	},
1301 #endif
1302 #ifdef CONFIG_SMP
1303 	{
1304 		.procname	= "stat_interval",
1305 		.data		= &sysctl_stat_interval,
1306 		.maxlen		= sizeof(sysctl_stat_interval),
1307 		.mode		= 0644,
1308 		.proc_handler	= proc_dointvec_jiffies,
1309 	},
1310 #endif
1311 #ifdef CONFIG_MMU
1312 	{
1313 		.procname	= "mmap_min_addr",
1314 		.data		= &dac_mmap_min_addr,
1315 		.maxlen		= sizeof(unsigned long),
1316 		.mode		= 0644,
1317 		.proc_handler	= mmap_min_addr_handler,
1318 	},
1319 #endif
1320 #ifdef CONFIG_NUMA
1321 	{
1322 		.procname	= "numa_zonelist_order",
1323 		.data		= &numa_zonelist_order,
1324 		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
1325 		.mode		= 0644,
1326 		.proc_handler	= numa_zonelist_order_handler,
1327 	},
1328 #endif
1329 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1330    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1331 	{
1332 		.procname	= "vdso_enabled",
1333 		.data		= &vdso_enabled,
1334 		.maxlen		= sizeof(vdso_enabled),
1335 		.mode		= 0644,
1336 		.proc_handler	= proc_dointvec,
1337 		.extra1		= &zero,
1338 	},
1339 #endif
1340 #ifdef CONFIG_HIGHMEM
1341 	{
1342 		.procname	= "highmem_is_dirtyable",
1343 		.data		= &vm_highmem_is_dirtyable,
1344 		.maxlen		= sizeof(vm_highmem_is_dirtyable),
1345 		.mode		= 0644,
1346 		.proc_handler	= proc_dointvec_minmax,
1347 		.extra1		= &zero,
1348 		.extra2		= &one,
1349 	},
1350 #endif
1351 	{
1352 		.procname	= "scan_unevictable_pages",
1353 		.data		= &scan_unevictable_pages,
1354 		.maxlen		= sizeof(scan_unevictable_pages),
1355 		.mode		= 0644,
1356 		.proc_handler	= scan_unevictable_handler,
1357 	},
1358 #ifdef CONFIG_MEMORY_FAILURE
1359 	{
1360 		.procname	= "memory_failure_early_kill",
1361 		.data		= &sysctl_memory_failure_early_kill,
1362 		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
1363 		.mode		= 0644,
1364 		.proc_handler	= proc_dointvec_minmax,
1365 		.extra1		= &zero,
1366 		.extra2		= &one,
1367 	},
1368 	{
1369 		.procname	= "memory_failure_recovery",
1370 		.data		= &sysctl_memory_failure_recovery,
1371 		.maxlen		= sizeof(sysctl_memory_failure_recovery),
1372 		.mode		= 0644,
1373 		.proc_handler	= proc_dointvec_minmax,
1374 		.extra1		= &zero,
1375 		.extra2		= &one,
1376 	},
1377 #endif
1378 	{ }
1379 };
1380 
1381 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1382 static struct ctl_table binfmt_misc_table[] = {
1383 	{ }
1384 };
1385 #endif
1386 
1387 static struct ctl_table fs_table[] = {
1388 	{
1389 		.procname	= "inode-nr",
1390 		.data		= &inodes_stat,
1391 		.maxlen		= 2*sizeof(int),
1392 		.mode		= 0444,
1393 		.proc_handler	= proc_nr_inodes,
1394 	},
1395 	{
1396 		.procname	= "inode-state",
1397 		.data		= &inodes_stat,
1398 		.maxlen		= 7*sizeof(int),
1399 		.mode		= 0444,
1400 		.proc_handler	= proc_nr_inodes,
1401 	},
1402 	{
1403 		.procname	= "file-nr",
1404 		.data		= &files_stat,
1405 		.maxlen		= sizeof(files_stat),
1406 		.mode		= 0444,
1407 		.proc_handler	= proc_nr_files,
1408 	},
1409 	{
1410 		.procname	= "file-max",
1411 		.data		= &files_stat.max_files,
1412 		.maxlen		= sizeof(files_stat.max_files),
1413 		.mode		= 0644,
1414 		.proc_handler	= proc_doulongvec_minmax,
1415 	},
1416 	{
1417 		.procname	= "nr_open",
1418 		.data		= &sysctl_nr_open,
1419 		.maxlen		= sizeof(int),
1420 		.mode		= 0644,
1421 		.proc_handler	= proc_dointvec_minmax,
1422 		.extra1		= &sysctl_nr_open_min,
1423 		.extra2		= &sysctl_nr_open_max,
1424 	},
1425 	{
1426 		.procname	= "dentry-state",
1427 		.data		= &dentry_stat,
1428 		.maxlen		= 6*sizeof(int),
1429 		.mode		= 0444,
1430 		.proc_handler	= proc_nr_dentry,
1431 	},
1432 	{
1433 		.procname	= "overflowuid",
1434 		.data		= &fs_overflowuid,
1435 		.maxlen		= sizeof(int),
1436 		.mode		= 0644,
1437 		.proc_handler	= proc_dointvec_minmax,
1438 		.extra1		= &minolduid,
1439 		.extra2		= &maxolduid,
1440 	},
1441 	{
1442 		.procname	= "overflowgid",
1443 		.data		= &fs_overflowgid,
1444 		.maxlen		= sizeof(int),
1445 		.mode		= 0644,
1446 		.proc_handler	= proc_dointvec_minmax,
1447 		.extra1		= &minolduid,
1448 		.extra2		= &maxolduid,
1449 	},
1450 #ifdef CONFIG_FILE_LOCKING
1451 	{
1452 		.procname	= "leases-enable",
1453 		.data		= &leases_enable,
1454 		.maxlen		= sizeof(int),
1455 		.mode		= 0644,
1456 		.proc_handler	= proc_dointvec,
1457 	},
1458 #endif
1459 #ifdef CONFIG_DNOTIFY
1460 	{
1461 		.procname	= "dir-notify-enable",
1462 		.data		= &dir_notify_enable,
1463 		.maxlen		= sizeof(int),
1464 		.mode		= 0644,
1465 		.proc_handler	= proc_dointvec,
1466 	},
1467 #endif
1468 #ifdef CONFIG_MMU
1469 #ifdef CONFIG_FILE_LOCKING
1470 	{
1471 		.procname	= "lease-break-time",
1472 		.data		= &lease_break_time,
1473 		.maxlen		= sizeof(int),
1474 		.mode		= 0644,
1475 		.proc_handler	= proc_dointvec,
1476 	},
1477 #endif
1478 #ifdef CONFIG_AIO
1479 	{
1480 		.procname	= "aio-nr",
1481 		.data		= &aio_nr,
1482 		.maxlen		= sizeof(aio_nr),
1483 		.mode		= 0444,
1484 		.proc_handler	= proc_doulongvec_minmax,
1485 	},
1486 	{
1487 		.procname	= "aio-max-nr",
1488 		.data		= &aio_max_nr,
1489 		.maxlen		= sizeof(aio_max_nr),
1490 		.mode		= 0644,
1491 		.proc_handler	= proc_doulongvec_minmax,
1492 	},
1493 #endif /* CONFIG_AIO */
1494 #ifdef CONFIG_INOTIFY_USER
1495 	{
1496 		.procname	= "inotify",
1497 		.mode		= 0555,
1498 		.child		= inotify_table,
1499 	},
1500 #endif
1501 #ifdef CONFIG_EPOLL
1502 	{
1503 		.procname	= "epoll",
1504 		.mode		= 0555,
1505 		.child		= epoll_table,
1506 	},
1507 #endif
1508 #endif
1509 	{
1510 		.procname	= "suid_dumpable",
1511 		.data		= &suid_dumpable,
1512 		.maxlen		= sizeof(int),
1513 		.mode		= 0644,
1514 		.proc_handler	= proc_dointvec_minmax,
1515 		.extra1		= &zero,
1516 		.extra2		= &two,
1517 	},
1518 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1519 	{
1520 		.procname	= "binfmt_misc",
1521 		.mode		= 0555,
1522 		.child		= binfmt_misc_table,
1523 	},
1524 #endif
1525 	{
1526 		.procname	= "pipe-max-size",
1527 		.data		= &pipe_max_size,
1528 		.maxlen		= sizeof(int),
1529 		.mode		= 0644,
1530 		.proc_handler	= &pipe_proc_fn,
1531 		.extra1		= &pipe_min_size,
1532 	},
1533 	{ }
1534 };
1535 
1536 static struct ctl_table debug_table[] = {
1537 #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
1538     defined(CONFIG_S390) || defined(CONFIG_TILE)
1539 	{
1540 		.procname	= "exception-trace",
1541 		.data		= &show_unhandled_signals,
1542 		.maxlen		= sizeof(int),
1543 		.mode		= 0644,
1544 		.proc_handler	= proc_dointvec
1545 	},
1546 #endif
1547 #if defined(CONFIG_OPTPROBES)
1548 	{
1549 		.procname	= "kprobes-optimization",
1550 		.data		= &sysctl_kprobes_optimization,
1551 		.maxlen		= sizeof(int),
1552 		.mode		= 0644,
1553 		.proc_handler	= proc_kprobes_optimization_handler,
1554 		.extra1		= &zero,
1555 		.extra2		= &one,
1556 	},
1557 #endif
1558 	{ }
1559 };
1560 
1561 static struct ctl_table dev_table[] = {
1562 	{ }
1563 };
1564 
1565 static DEFINE_SPINLOCK(sysctl_lock);
1566 
1567 /* called under sysctl_lock */
1568 static int use_table(struct ctl_table_header *p)
1569 {
1570 	if (unlikely(p->unregistering))
1571 		return 0;
1572 	p->used++;
1573 	return 1;
1574 }
1575 
1576 /* called under sysctl_lock */
1577 static void unuse_table(struct ctl_table_header *p)
1578 {
1579 	if (!--p->used)
1580 		if (unlikely(p->unregistering))
1581 			complete(p->unregistering);
1582 }
1583 
1584 /* called under sysctl_lock, will reacquire if has to wait */
1585 static void start_unregistering(struct ctl_table_header *p)
1586 {
1587 	/*
1588 	 * if p->used is 0, nobody will ever touch that entry again;
1589 	 * we'll eliminate all paths to it before dropping sysctl_lock
1590 	 */
1591 	if (unlikely(p->used)) {
1592 		struct completion wait;
1593 		init_completion(&wait);
1594 		p->unregistering = &wait;
1595 		spin_unlock(&sysctl_lock);
1596 		wait_for_completion(&wait);
1597 		spin_lock(&sysctl_lock);
1598 	} else {
1599 		/* anything non-NULL; we'll never dereference it */
1600 		p->unregistering = ERR_PTR(-EINVAL);
1601 	}
1602 	/*
1603 	 * do not remove from the list until nobody holds it; walking the
1604 	 * list in do_sysctl() relies on that.
1605 	 */
1606 	list_del_init(&p->ctl_entry);
1607 }
1608 
1609 void sysctl_head_get(struct ctl_table_header *head)
1610 {
1611 	spin_lock(&sysctl_lock);
1612 	head->count++;
1613 	spin_unlock(&sysctl_lock);
1614 }
1615 
1616 void sysctl_head_put(struct ctl_table_header *head)
1617 {
1618 	spin_lock(&sysctl_lock);
1619 	if (!--head->count)
1620 		kfree_rcu(head, rcu);
1621 	spin_unlock(&sysctl_lock);
1622 }
1623 
1624 struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1625 {
1626 	if (!head)
1627 		BUG();
1628 	spin_lock(&sysctl_lock);
1629 	if (!use_table(head))
1630 		head = ERR_PTR(-ENOENT);
1631 	spin_unlock(&sysctl_lock);
1632 	return head;
1633 }
1634 
1635 void sysctl_head_finish(struct ctl_table_header *head)
1636 {
1637 	if (!head)
1638 		return;
1639 	spin_lock(&sysctl_lock);
1640 	unuse_table(head);
1641 	spin_unlock(&sysctl_lock);
1642 }
1643 
1644 static struct ctl_table_set *
1645 lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1646 {
1647 	struct ctl_table_set *set = &root->default_set;
1648 	if (root->lookup)
1649 		set = root->lookup(root, namespaces);
1650 	return set;
1651 }
1652 
1653 static struct list_head *
1654 lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
1655 {
1656 	struct ctl_table_set *set = lookup_header_set(root, namespaces);
1657 	return &set->list;
1658 }
1659 
1660 struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1661 					    struct ctl_table_header *prev)
1662 {
1663 	struct ctl_table_root *root;
1664 	struct list_head *header_list;
1665 	struct ctl_table_header *head;
1666 	struct list_head *tmp;
1667 
1668 	spin_lock(&sysctl_lock);
1669 	if (prev) {
1670 		head = prev;
1671 		tmp = &prev->ctl_entry;
1672 		unuse_table(prev);
1673 		goto next;
1674 	}
1675 	tmp = &root_table_header.ctl_entry;
1676 	for (;;) {
1677 		head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1678 
1679 		if (!use_table(head))
1680 			goto next;
1681 		spin_unlock(&sysctl_lock);
1682 		return head;
1683 	next:
1684 		root = head->root;
1685 		tmp = tmp->next;
1686 		header_list = lookup_header_list(root, namespaces);
1687 		if (tmp != header_list)
1688 			continue;
1689 
1690 		do {
1691 			root = list_entry(root->root_list.next,
1692 					struct ctl_table_root, root_list);
1693 			if (root == &sysctl_table_root)
1694 				goto out;
1695 			header_list = lookup_header_list(root, namespaces);
1696 		} while (list_empty(header_list));
1697 		tmp = header_list->next;
1698 	}
1699 out:
1700 	spin_unlock(&sysctl_lock);
1701 	return NULL;
1702 }
1703 
1704 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1705 {
1706 	return __sysctl_head_next(current->nsproxy, prev);
1707 }
1708 
1709 void register_sysctl_root(struct ctl_table_root *root)
1710 {
1711 	spin_lock(&sysctl_lock);
1712 	list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1713 	spin_unlock(&sysctl_lock);
1714 }
1715 
1716 /*
1717  * sysctl_perm does NOT grant the superuser all rights automatically, because
1718  * some sysctl variables are readonly even to root.
1719  */
1720 
1721 static int test_perm(int mode, int op)
1722 {
1723 	if (!current_euid())
1724 		mode >>= 6;
1725 	else if (in_egroup_p(0))
1726 		mode >>= 3;
1727 	if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1728 		return 0;
1729 	return -EACCES;
1730 }
1731 
1732 int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1733 {
1734 	int mode;
1735 
1736 	if (root->permissions)
1737 		mode = root->permissions(root, current->nsproxy, table);
1738 	else
1739 		mode = table->mode;
1740 
1741 	return test_perm(mode, op);
1742 }
1743 
1744 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1745 {
1746 	for (; table->procname; table++) {
1747 		table->parent = parent;
1748 		if (table->child)
1749 			sysctl_set_parent(table, table->child);
1750 	}
1751 }
1752 
1753 static __init int sysctl_init(void)
1754 {
1755 	sysctl_set_parent(NULL, root_table);
1756 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1757 	sysctl_check_table(current->nsproxy, root_table);
1758 #endif
1759 	return 0;
1760 }
1761 
1762 core_initcall(sysctl_init);
1763 
1764 static struct ctl_table *is_branch_in(struct ctl_table *branch,
1765 				      struct ctl_table *table)
1766 {
1767 	struct ctl_table *p;
1768 	const char *s = branch->procname;
1769 
1770 	/* branch should have named subdirectory as its first element */
1771 	if (!s || !branch->child)
1772 		return NULL;
1773 
1774 	/* ... and nothing else */
1775 	if (branch[1].procname)
1776 		return NULL;
1777 
1778 	/* table should contain subdirectory with the same name */
1779 	for (p = table; p->procname; p++) {
1780 		if (!p->child)
1781 			continue;
1782 		if (p->procname && strcmp(p->procname, s) == 0)
1783 			return p;
1784 	}
1785 	return NULL;
1786 }
1787 
1788 /* see if attaching q to p would be an improvement */
1789 static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1790 {
1791 	struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
1792 	struct ctl_table *next;
1793 	int is_better = 0;
1794 	int not_in_parent = !p->attached_by;
1795 
1796 	while ((next = is_branch_in(by, to)) != NULL) {
1797 		if (by == q->attached_by)
1798 			is_better = 1;
1799 		if (to == p->attached_by)
1800 			not_in_parent = 1;
1801 		by = by->child;
1802 		to = next->child;
1803 	}
1804 
1805 	if (is_better && not_in_parent) {
1806 		q->attached_by = by;
1807 		q->attached_to = to;
1808 		q->parent = p;
1809 	}
1810 }
1811 
1812 /**
1813  * __register_sysctl_paths - register a sysctl hierarchy
1814  * @root: List of sysctl headers to register on
1815  * @namespaces: Data to compute which lists of sysctl entries are visible
1816  * @path: The path to the directory the sysctl table is in.
1817  * @table: the top-level table structure
1818  *
1819  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1820  * array. A completely 0 filled entry terminates the table.
1821  *
1822  * The members of the &struct ctl_table structure are used as follows:
1823  *
1824  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1825  *            enter a sysctl file
1826  *
1827  * data - a pointer to data for use by proc_handler
1828  *
1829  * maxlen - the maximum size in bytes of the data
1830  *
1831  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1832  *
1833  * child - a pointer to the child sysctl table if this entry is a directory, or
1834  *         %NULL.
1835  *
1836  * proc_handler - the text handler routine (described below)
1837  *
1838  * de - for internal use by the sysctl routines
1839  *
1840  * extra1, extra2 - extra pointers usable by the proc handler routines
1841  *
1842  * Leaf nodes in the sysctl tree will be represented by a single file
1843  * under /proc; non-leaf nodes will be represented by directories.
1844  *
1845  * sysctl(2) can automatically manage read and write requests through
1846  * the sysctl table.  The data and maxlen fields of the ctl_table
1847  * struct enable minimal validation of the values being written to be
1848  * performed, and the mode field allows minimal authentication.
1849  *
1850  * There must be a proc_handler routine for any terminal nodes
1851  * mirrored under /proc/sys (non-terminals are handled by a built-in
1852  * directory handler).  Several default handlers are available to
1853  * cover common cases -
1854  *
1855  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1856  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1857  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1858  *
1859  * It is the handler's job to read the input buffer from user memory
1860  * and process it. The handler should return 0 on success.
1861  *
1862  * This routine returns %NULL on a failure to register, and a pointer
1863  * to the table header on success.
1864  */
1865 struct ctl_table_header *__register_sysctl_paths(
1866 	struct ctl_table_root *root,
1867 	struct nsproxy *namespaces,
1868 	const struct ctl_path *path, struct ctl_table *table)
1869 {
1870 	struct ctl_table_header *header;
1871 	struct ctl_table *new, **prevp;
1872 	unsigned int n, npath;
1873 	struct ctl_table_set *set;
1874 
1875 	/* Count the path components */
1876 	for (npath = 0; path[npath].procname; ++npath)
1877 		;
1878 
1879 	/*
1880 	 * For each path component, allocate a 2-element ctl_table array.
1881 	 * The first array element will be filled with the sysctl entry
1882 	 * for this, the second will be the sentinel (procname == 0).
1883 	 *
1884 	 * We allocate everything in one go so that we don't have to
1885 	 * worry about freeing additional memory in unregister_sysctl_table.
1886 	 */
1887 	header = kzalloc(sizeof(struct ctl_table_header) +
1888 			 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1889 	if (!header)
1890 		return NULL;
1891 
1892 	new = (struct ctl_table *) (header + 1);
1893 
1894 	/* Now connect the dots */
1895 	prevp = &header->ctl_table;
1896 	for (n = 0; n < npath; ++n, ++path) {
1897 		/* Copy the procname */
1898 		new->procname = path->procname;
1899 		new->mode     = 0555;
1900 
1901 		*prevp = new;
1902 		prevp = &new->child;
1903 
1904 		new += 2;
1905 	}
1906 	*prevp = table;
1907 	header->ctl_table_arg = table;
1908 
1909 	INIT_LIST_HEAD(&header->ctl_entry);
1910 	header->used = 0;
1911 	header->unregistering = NULL;
1912 	header->root = root;
1913 	sysctl_set_parent(NULL, header->ctl_table);
1914 	header->count = 1;
1915 #ifdef CONFIG_SYSCTL_SYSCALL_CHECK
1916 	if (sysctl_check_table(namespaces, header->ctl_table)) {
1917 		kfree(header);
1918 		return NULL;
1919 	}
1920 #endif
1921 	spin_lock(&sysctl_lock);
1922 	header->set = lookup_header_set(root, namespaces);
1923 	header->attached_by = header->ctl_table;
1924 	header->attached_to = root_table;
1925 	header->parent = &root_table_header;
1926 	for (set = header->set; set; set = set->parent) {
1927 		struct ctl_table_header *p;
1928 		list_for_each_entry(p, &set->list, ctl_entry) {
1929 			if (p->unregistering)
1930 				continue;
1931 			try_attach(p, header);
1932 		}
1933 	}
1934 	header->parent->count++;
1935 	list_add_tail(&header->ctl_entry, &header->set->list);
1936 	spin_unlock(&sysctl_lock);
1937 
1938 	return header;
1939 }
1940 
1941 /**
1942  * register_sysctl_table_path - register a sysctl table hierarchy
1943  * @path: The path to the directory the sysctl table is in.
1944  * @table: the top-level table structure
1945  *
1946  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1947  * array. A completely 0 filled entry terminates the table.
1948  *
1949  * See __register_sysctl_paths for more details.
1950  */
1951 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1952 						struct ctl_table *table)
1953 {
1954 	return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1955 					path, table);
1956 }
1957 
1958 /**
1959  * register_sysctl_table - register a sysctl table hierarchy
1960  * @table: the top-level table structure
1961  *
1962  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1963  * array. A completely 0 filled entry terminates the table.
1964  *
1965  * See register_sysctl_paths for more details.
1966  */
1967 struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1968 {
1969 	static const struct ctl_path null_path[] = { {} };
1970 
1971 	return register_sysctl_paths(null_path, table);
1972 }
1973 
1974 /**
1975  * unregister_sysctl_table - unregister a sysctl table hierarchy
1976  * @header: the header returned from register_sysctl_table
1977  *
1978  * Unregisters the sysctl table and all children. proc entries may not
1979  * actually be removed until they are no longer used by anyone.
1980  */
1981 void unregister_sysctl_table(struct ctl_table_header * header)
1982 {
1983 	might_sleep();
1984 
1985 	if (header == NULL)
1986 		return;
1987 
1988 	spin_lock(&sysctl_lock);
1989 	start_unregistering(header);
1990 	if (!--header->parent->count) {
1991 		WARN_ON(1);
1992 		kfree_rcu(header->parent, rcu);
1993 	}
1994 	if (!--header->count)
1995 		kfree_rcu(header, rcu);
1996 	spin_unlock(&sysctl_lock);
1997 }
1998 
1999 int sysctl_is_seen(struct ctl_table_header *p)
2000 {
2001 	struct ctl_table_set *set = p->set;
2002 	int res;
2003 	spin_lock(&sysctl_lock);
2004 	if (p->unregistering)
2005 		res = 0;
2006 	else if (!set->is_seen)
2007 		res = 1;
2008 	else
2009 		res = set->is_seen(set);
2010 	spin_unlock(&sysctl_lock);
2011 	return res;
2012 }
2013 
2014 void setup_sysctl_set(struct ctl_table_set *p,
2015 	struct ctl_table_set *parent,
2016 	int (*is_seen)(struct ctl_table_set *))
2017 {
2018 	INIT_LIST_HEAD(&p->list);
2019 	p->parent = parent ? parent : &sysctl_table_root.default_set;
2020 	p->is_seen = is_seen;
2021 }
2022 
2023 #else /* !CONFIG_SYSCTL */
2024 struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
2025 {
2026 	return NULL;
2027 }
2028 
2029 struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
2030 						    struct ctl_table *table)
2031 {
2032 	return NULL;
2033 }
2034 
2035 void unregister_sysctl_table(struct ctl_table_header * table)
2036 {
2037 }
2038 
2039 void setup_sysctl_set(struct ctl_table_set *p,
2040 	struct ctl_table_set *parent,
2041 	int (*is_seen)(struct ctl_table_set *))
2042 {
2043 }
2044 
2045 void sysctl_head_put(struct ctl_table_header *head)
2046 {
2047 }
2048 
2049 #endif /* CONFIG_SYSCTL */
2050 
2051 /*
2052  * /proc/sys support
2053  */
2054 
2055 #ifdef CONFIG_PROC_SYSCTL
2056 
2057 static int _proc_do_string(void* data, int maxlen, int write,
2058 			   void __user *buffer,
2059 			   size_t *lenp, loff_t *ppos)
2060 {
2061 	size_t len;
2062 	char __user *p;
2063 	char c;
2064 
2065 	if (!data || !maxlen || !*lenp) {
2066 		*lenp = 0;
2067 		return 0;
2068 	}
2069 
2070 	if (write) {
2071 		len = 0;
2072 		p = buffer;
2073 		while (len < *lenp) {
2074 			if (get_user(c, p++))
2075 				return -EFAULT;
2076 			if (c == 0 || c == '\n')
2077 				break;
2078 			len++;
2079 		}
2080 		if (len >= maxlen)
2081 			len = maxlen-1;
2082 		if(copy_from_user(data, buffer, len))
2083 			return -EFAULT;
2084 		((char *) data)[len] = 0;
2085 		*ppos += *lenp;
2086 	} else {
2087 		len = strlen(data);
2088 		if (len > maxlen)
2089 			len = maxlen;
2090 
2091 		if (*ppos > len) {
2092 			*lenp = 0;
2093 			return 0;
2094 		}
2095 
2096 		data += *ppos;
2097 		len  -= *ppos;
2098 
2099 		if (len > *lenp)
2100 			len = *lenp;
2101 		if (len)
2102 			if(copy_to_user(buffer, data, len))
2103 				return -EFAULT;
2104 		if (len < *lenp) {
2105 			if(put_user('\n', ((char __user *) buffer) + len))
2106 				return -EFAULT;
2107 			len++;
2108 		}
2109 		*lenp = len;
2110 		*ppos += len;
2111 	}
2112 	return 0;
2113 }
2114 
2115 /**
2116  * proc_dostring - read a string sysctl
2117  * @table: the sysctl table
2118  * @write: %TRUE if this is a write to the sysctl file
2119  * @buffer: the user buffer
2120  * @lenp: the size of the user buffer
2121  * @ppos: file position
2122  *
2123  * Reads/writes a string from/to the user buffer. If the kernel
2124  * buffer provided is not large enough to hold the string, the
2125  * string is truncated. The copied string is %NULL-terminated.
2126  * If the string is being read by the user process, it is copied
2127  * and a newline '\n' is added. It is truncated if the buffer is
2128  * not large enough.
2129  *
2130  * Returns 0 on success.
2131  */
2132 int proc_dostring(struct ctl_table *table, int write,
2133 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2134 {
2135 	return _proc_do_string(table->data, table->maxlen, write,
2136 			       buffer, lenp, ppos);
2137 }
2138 
2139 static size_t proc_skip_spaces(char **buf)
2140 {
2141 	size_t ret;
2142 	char *tmp = skip_spaces(*buf);
2143 	ret = tmp - *buf;
2144 	*buf = tmp;
2145 	return ret;
2146 }
2147 
2148 static void proc_skip_char(char **buf, size_t *size, const char v)
2149 {
2150 	while (*size) {
2151 		if (**buf != v)
2152 			break;
2153 		(*size)--;
2154 		(*buf)++;
2155 	}
2156 }
2157 
2158 #define TMPBUFLEN 22
2159 /**
2160  * proc_get_long - reads an ASCII formatted integer from a user buffer
2161  *
2162  * @buf: a kernel buffer
2163  * @size: size of the kernel buffer
2164  * @val: this is where the number will be stored
2165  * @neg: set to %TRUE if number is negative
2166  * @perm_tr: a vector which contains the allowed trailers
2167  * @perm_tr_len: size of the perm_tr vector
2168  * @tr: pointer to store the trailer character
2169  *
2170  * In case of success %0 is returned and @buf and @size are updated with
2171  * the amount of bytes read. If @tr is non-NULL and a trailing
2172  * character exists (size is non-zero after returning from this
2173  * function), @tr is updated with the trailing character.
2174  */
2175 static int proc_get_long(char **buf, size_t *size,
2176 			  unsigned long *val, bool *neg,
2177 			  const char *perm_tr, unsigned perm_tr_len, char *tr)
2178 {
2179 	int len;
2180 	char *p, tmp[TMPBUFLEN];
2181 
2182 	if (!*size)
2183 		return -EINVAL;
2184 
2185 	len = *size;
2186 	if (len > TMPBUFLEN - 1)
2187 		len = TMPBUFLEN - 1;
2188 
2189 	memcpy(tmp, *buf, len);
2190 
2191 	tmp[len] = 0;
2192 	p = tmp;
2193 	if (*p == '-' && *size > 1) {
2194 		*neg = true;
2195 		p++;
2196 	} else
2197 		*neg = false;
2198 	if (!isdigit(*p))
2199 		return -EINVAL;
2200 
2201 	*val = simple_strtoul(p, &p, 0);
2202 
2203 	len = p - tmp;
2204 
2205 	/* We don't know if the next char is whitespace thus we may accept
2206 	 * invalid integers (e.g. 1234...a) or two integers instead of one
2207 	 * (e.g. 123...1). So lets not allow such large numbers. */
2208 	if (len == TMPBUFLEN - 1)
2209 		return -EINVAL;
2210 
2211 	if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2212 		return -EINVAL;
2213 
2214 	if (tr && (len < *size))
2215 		*tr = *p;
2216 
2217 	*buf += len;
2218 	*size -= len;
2219 
2220 	return 0;
2221 }
2222 
2223 /**
2224  * proc_put_long - converts an integer to a decimal ASCII formatted string
2225  *
2226  * @buf: the user buffer
2227  * @size: the size of the user buffer
2228  * @val: the integer to be converted
2229  * @neg: sign of the number, %TRUE for negative
2230  *
2231  * In case of success %0 is returned and @buf and @size are updated with
2232  * the amount of bytes written.
2233  */
2234 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2235 			  bool neg)
2236 {
2237 	int len;
2238 	char tmp[TMPBUFLEN], *p = tmp;
2239 
2240 	sprintf(p, "%s%lu", neg ? "-" : "", val);
2241 	len = strlen(tmp);
2242 	if (len > *size)
2243 		len = *size;
2244 	if (copy_to_user(*buf, tmp, len))
2245 		return -EFAULT;
2246 	*size -= len;
2247 	*buf += len;
2248 	return 0;
2249 }
2250 #undef TMPBUFLEN
2251 
2252 static int proc_put_char(void __user **buf, size_t *size, char c)
2253 {
2254 	if (*size) {
2255 		char __user **buffer = (char __user **)buf;
2256 		if (put_user(c, *buffer))
2257 			return -EFAULT;
2258 		(*size)--, (*buffer)++;
2259 		*buf = *buffer;
2260 	}
2261 	return 0;
2262 }
2263 
2264 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2265 				 int *valp,
2266 				 int write, void *data)
2267 {
2268 	if (write) {
2269 		*valp = *negp ? -*lvalp : *lvalp;
2270 	} else {
2271 		int val = *valp;
2272 		if (val < 0) {
2273 			*negp = true;
2274 			*lvalp = (unsigned long)-val;
2275 		} else {
2276 			*negp = false;
2277 			*lvalp = (unsigned long)val;
2278 		}
2279 	}
2280 	return 0;
2281 }
2282 
2283 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2284 
2285 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2286 		  int write, void __user *buffer,
2287 		  size_t *lenp, loff_t *ppos,
2288 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2289 			      int write, void *data),
2290 		  void *data)
2291 {
2292 	int *i, vleft, first = 1, err = 0;
2293 	unsigned long page = 0;
2294 	size_t left;
2295 	char *kbuf;
2296 
2297 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2298 		*lenp = 0;
2299 		return 0;
2300 	}
2301 
2302 	i = (int *) tbl_data;
2303 	vleft = table->maxlen / sizeof(*i);
2304 	left = *lenp;
2305 
2306 	if (!conv)
2307 		conv = do_proc_dointvec_conv;
2308 
2309 	if (write) {
2310 		if (left > PAGE_SIZE - 1)
2311 			left = PAGE_SIZE - 1;
2312 		page = __get_free_page(GFP_TEMPORARY);
2313 		kbuf = (char *) page;
2314 		if (!kbuf)
2315 			return -ENOMEM;
2316 		if (copy_from_user(kbuf, buffer, left)) {
2317 			err = -EFAULT;
2318 			goto free;
2319 		}
2320 		kbuf[left] = 0;
2321 	}
2322 
2323 	for (; left && vleft--; i++, first=0) {
2324 		unsigned long lval;
2325 		bool neg;
2326 
2327 		if (write) {
2328 			left -= proc_skip_spaces(&kbuf);
2329 
2330 			if (!left)
2331 				break;
2332 			err = proc_get_long(&kbuf, &left, &lval, &neg,
2333 					     proc_wspace_sep,
2334 					     sizeof(proc_wspace_sep), NULL);
2335 			if (err)
2336 				break;
2337 			if (conv(&neg, &lval, i, 1, data)) {
2338 				err = -EINVAL;
2339 				break;
2340 			}
2341 		} else {
2342 			if (conv(&neg, &lval, i, 0, data)) {
2343 				err = -EINVAL;
2344 				break;
2345 			}
2346 			if (!first)
2347 				err = proc_put_char(&buffer, &left, '\t');
2348 			if (err)
2349 				break;
2350 			err = proc_put_long(&buffer, &left, lval, neg);
2351 			if (err)
2352 				break;
2353 		}
2354 	}
2355 
2356 	if (!write && !first && left && !err)
2357 		err = proc_put_char(&buffer, &left, '\n');
2358 	if (write && !err && left)
2359 		left -= proc_skip_spaces(&kbuf);
2360 free:
2361 	if (write) {
2362 		free_page(page);
2363 		if (first)
2364 			return err ? : -EINVAL;
2365 	}
2366 	*lenp -= left;
2367 	*ppos += *lenp;
2368 	return err;
2369 }
2370 
2371 static int do_proc_dointvec(struct ctl_table *table, int write,
2372 		  void __user *buffer, size_t *lenp, loff_t *ppos,
2373 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2374 			      int write, void *data),
2375 		  void *data)
2376 {
2377 	return __do_proc_dointvec(table->data, table, write,
2378 			buffer, lenp, ppos, conv, data);
2379 }
2380 
2381 /**
2382  * proc_dointvec - read a vector of integers
2383  * @table: the sysctl table
2384  * @write: %TRUE if this is a write to the sysctl file
2385  * @buffer: the user buffer
2386  * @lenp: the size of the user buffer
2387  * @ppos: file position
2388  *
2389  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2390  * values from/to the user buffer, treated as an ASCII string.
2391  *
2392  * Returns 0 on success.
2393  */
2394 int proc_dointvec(struct ctl_table *table, int write,
2395 		     void __user *buffer, size_t *lenp, loff_t *ppos)
2396 {
2397     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2398 		    	    NULL,NULL);
2399 }
2400 
2401 /*
2402  * Taint values can only be increased
2403  * This means we can safely use a temporary.
2404  */
2405 static int proc_taint(struct ctl_table *table, int write,
2406 			       void __user *buffer, size_t *lenp, loff_t *ppos)
2407 {
2408 	struct ctl_table t;
2409 	unsigned long tmptaint = get_taint();
2410 	int err;
2411 
2412 	if (write && !capable(CAP_SYS_ADMIN))
2413 		return -EPERM;
2414 
2415 	t = *table;
2416 	t.data = &tmptaint;
2417 	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2418 	if (err < 0)
2419 		return err;
2420 
2421 	if (write) {
2422 		/*
2423 		 * Poor man's atomic or. Not worth adding a primitive
2424 		 * to everyone's atomic.h for this
2425 		 */
2426 		int i;
2427 		for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2428 			if ((tmptaint >> i) & 1)
2429 				add_taint(i);
2430 		}
2431 	}
2432 
2433 	return err;
2434 }
2435 
2436 #ifdef CONFIG_PRINTK
2437 static int proc_dmesg_restrict(struct ctl_table *table, int write,
2438 				void __user *buffer, size_t *lenp, loff_t *ppos)
2439 {
2440 	if (write && !capable(CAP_SYS_ADMIN))
2441 		return -EPERM;
2442 
2443 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2444 }
2445 #endif
2446 
2447 struct do_proc_dointvec_minmax_conv_param {
2448 	int *min;
2449 	int *max;
2450 };
2451 
2452 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2453 					int *valp,
2454 					int write, void *data)
2455 {
2456 	struct do_proc_dointvec_minmax_conv_param *param = data;
2457 	if (write) {
2458 		int val = *negp ? -*lvalp : *lvalp;
2459 		if ((param->min && *param->min > val) ||
2460 		    (param->max && *param->max < val))
2461 			return -EINVAL;
2462 		*valp = val;
2463 	} else {
2464 		int val = *valp;
2465 		if (val < 0) {
2466 			*negp = true;
2467 			*lvalp = (unsigned long)-val;
2468 		} else {
2469 			*negp = false;
2470 			*lvalp = (unsigned long)val;
2471 		}
2472 	}
2473 	return 0;
2474 }
2475 
2476 /**
2477  * proc_dointvec_minmax - read a vector of integers with min/max values
2478  * @table: the sysctl table
2479  * @write: %TRUE if this is a write to the sysctl file
2480  * @buffer: the user buffer
2481  * @lenp: the size of the user buffer
2482  * @ppos: file position
2483  *
2484  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2485  * values from/to the user buffer, treated as an ASCII string.
2486  *
2487  * This routine will ensure the values are within the range specified by
2488  * table->extra1 (min) and table->extra2 (max).
2489  *
2490  * Returns 0 on success.
2491  */
2492 int proc_dointvec_minmax(struct ctl_table *table, int write,
2493 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2494 {
2495 	struct do_proc_dointvec_minmax_conv_param param = {
2496 		.min = (int *) table->extra1,
2497 		.max = (int *) table->extra2,
2498 	};
2499 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2500 				do_proc_dointvec_minmax_conv, &param);
2501 }
2502 
2503 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2504 				     void __user *buffer,
2505 				     size_t *lenp, loff_t *ppos,
2506 				     unsigned long convmul,
2507 				     unsigned long convdiv)
2508 {
2509 	unsigned long *i, *min, *max;
2510 	int vleft, first = 1, err = 0;
2511 	unsigned long page = 0;
2512 	size_t left;
2513 	char *kbuf;
2514 
2515 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2516 		*lenp = 0;
2517 		return 0;
2518 	}
2519 
2520 	i = (unsigned long *) data;
2521 	min = (unsigned long *) table->extra1;
2522 	max = (unsigned long *) table->extra2;
2523 	vleft = table->maxlen / sizeof(unsigned long);
2524 	left = *lenp;
2525 
2526 	if (write) {
2527 		if (left > PAGE_SIZE - 1)
2528 			left = PAGE_SIZE - 1;
2529 		page = __get_free_page(GFP_TEMPORARY);
2530 		kbuf = (char *) page;
2531 		if (!kbuf)
2532 			return -ENOMEM;
2533 		if (copy_from_user(kbuf, buffer, left)) {
2534 			err = -EFAULT;
2535 			goto free;
2536 		}
2537 		kbuf[left] = 0;
2538 	}
2539 
2540 	for (; left && vleft--; i++, first = 0) {
2541 		unsigned long val;
2542 
2543 		if (write) {
2544 			bool neg;
2545 
2546 			left -= proc_skip_spaces(&kbuf);
2547 
2548 			err = proc_get_long(&kbuf, &left, &val, &neg,
2549 					     proc_wspace_sep,
2550 					     sizeof(proc_wspace_sep), NULL);
2551 			if (err)
2552 				break;
2553 			if (neg)
2554 				continue;
2555 			if ((min && val < *min) || (max && val > *max))
2556 				continue;
2557 			*i = val;
2558 		} else {
2559 			val = convdiv * (*i) / convmul;
2560 			if (!first)
2561 				err = proc_put_char(&buffer, &left, '\t');
2562 			err = proc_put_long(&buffer, &left, val, false);
2563 			if (err)
2564 				break;
2565 		}
2566 	}
2567 
2568 	if (!write && !first && left && !err)
2569 		err = proc_put_char(&buffer, &left, '\n');
2570 	if (write && !err)
2571 		left -= proc_skip_spaces(&kbuf);
2572 free:
2573 	if (write) {
2574 		free_page(page);
2575 		if (first)
2576 			return err ? : -EINVAL;
2577 	}
2578 	*lenp -= left;
2579 	*ppos += *lenp;
2580 	return err;
2581 }
2582 
2583 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2584 				     void __user *buffer,
2585 				     size_t *lenp, loff_t *ppos,
2586 				     unsigned long convmul,
2587 				     unsigned long convdiv)
2588 {
2589 	return __do_proc_doulongvec_minmax(table->data, table, write,
2590 			buffer, lenp, ppos, convmul, convdiv);
2591 }
2592 
2593 /**
2594  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2595  * @table: the sysctl table
2596  * @write: %TRUE if this is a write to the sysctl file
2597  * @buffer: the user buffer
2598  * @lenp: the size of the user buffer
2599  * @ppos: file position
2600  *
2601  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2602  * values from/to the user buffer, treated as an ASCII string.
2603  *
2604  * This routine will ensure the values are within the range specified by
2605  * table->extra1 (min) and table->extra2 (max).
2606  *
2607  * Returns 0 on success.
2608  */
2609 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2610 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2611 {
2612     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2613 }
2614 
2615 /**
2616  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2617  * @table: the sysctl table
2618  * @write: %TRUE if this is a write to the sysctl file
2619  * @buffer: the user buffer
2620  * @lenp: the size of the user buffer
2621  * @ppos: file position
2622  *
2623  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2624  * values from/to the user buffer, treated as an ASCII string. The values
2625  * are treated as milliseconds, and converted to jiffies when they are stored.
2626  *
2627  * This routine will ensure the values are within the range specified by
2628  * table->extra1 (min) and table->extra2 (max).
2629  *
2630  * Returns 0 on success.
2631  */
2632 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2633 				      void __user *buffer,
2634 				      size_t *lenp, loff_t *ppos)
2635 {
2636     return do_proc_doulongvec_minmax(table, write, buffer,
2637 				     lenp, ppos, HZ, 1000l);
2638 }
2639 
2640 
2641 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2642 					 int *valp,
2643 					 int write, void *data)
2644 {
2645 	if (write) {
2646 		if (*lvalp > LONG_MAX / HZ)
2647 			return 1;
2648 		*valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2649 	} else {
2650 		int val = *valp;
2651 		unsigned long lval;
2652 		if (val < 0) {
2653 			*negp = true;
2654 			lval = (unsigned long)-val;
2655 		} else {
2656 			*negp = false;
2657 			lval = (unsigned long)val;
2658 		}
2659 		*lvalp = lval / HZ;
2660 	}
2661 	return 0;
2662 }
2663 
2664 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2665 						int *valp,
2666 						int write, void *data)
2667 {
2668 	if (write) {
2669 		if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2670 			return 1;
2671 		*valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2672 	} else {
2673 		int val = *valp;
2674 		unsigned long lval;
2675 		if (val < 0) {
2676 			*negp = true;
2677 			lval = (unsigned long)-val;
2678 		} else {
2679 			*negp = false;
2680 			lval = (unsigned long)val;
2681 		}
2682 		*lvalp = jiffies_to_clock_t(lval);
2683 	}
2684 	return 0;
2685 }
2686 
2687 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2688 					    int *valp,
2689 					    int write, void *data)
2690 {
2691 	if (write) {
2692 		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2693 	} else {
2694 		int val = *valp;
2695 		unsigned long lval;
2696 		if (val < 0) {
2697 			*negp = true;
2698 			lval = (unsigned long)-val;
2699 		} else {
2700 			*negp = false;
2701 			lval = (unsigned long)val;
2702 		}
2703 		*lvalp = jiffies_to_msecs(lval);
2704 	}
2705 	return 0;
2706 }
2707 
2708 /**
2709  * proc_dointvec_jiffies - read a vector of integers as seconds
2710  * @table: the sysctl table
2711  * @write: %TRUE if this is a write to the sysctl file
2712  * @buffer: the user buffer
2713  * @lenp: the size of the user buffer
2714  * @ppos: file position
2715  *
2716  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2717  * values from/to the user buffer, treated as an ASCII string.
2718  * The values read are assumed to be in seconds, and are converted into
2719  * jiffies.
2720  *
2721  * Returns 0 on success.
2722  */
2723 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2724 			  void __user *buffer, size_t *lenp, loff_t *ppos)
2725 {
2726     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2727 		    	    do_proc_dointvec_jiffies_conv,NULL);
2728 }
2729 
2730 /**
2731  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2732  * @table: the sysctl table
2733  * @write: %TRUE if this is a write to the sysctl file
2734  * @buffer: the user buffer
2735  * @lenp: the size of the user buffer
2736  * @ppos: pointer to the file position
2737  *
2738  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2739  * values from/to the user buffer, treated as an ASCII string.
2740  * The values read are assumed to be in 1/USER_HZ seconds, and
2741  * are converted into jiffies.
2742  *
2743  * Returns 0 on success.
2744  */
2745 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2746 				 void __user *buffer, size_t *lenp, loff_t *ppos)
2747 {
2748     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2749 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
2750 }
2751 
2752 /**
2753  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2754  * @table: the sysctl table
2755  * @write: %TRUE if this is a write to the sysctl file
2756  * @buffer: the user buffer
2757  * @lenp: the size of the user buffer
2758  * @ppos: file position
2759  * @ppos: the current position in the file
2760  *
2761  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2762  * values from/to the user buffer, treated as an ASCII string.
2763  * The values read are assumed to be in 1/1000 seconds, and
2764  * are converted into jiffies.
2765  *
2766  * Returns 0 on success.
2767  */
2768 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2769 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2770 {
2771 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
2772 				do_proc_dointvec_ms_jiffies_conv, NULL);
2773 }
2774 
2775 static int proc_do_cad_pid(struct ctl_table *table, int write,
2776 			   void __user *buffer, size_t *lenp, loff_t *ppos)
2777 {
2778 	struct pid *new_pid;
2779 	pid_t tmp;
2780 	int r;
2781 
2782 	tmp = pid_vnr(cad_pid);
2783 
2784 	r = __do_proc_dointvec(&tmp, table, write, buffer,
2785 			       lenp, ppos, NULL, NULL);
2786 	if (r || !write)
2787 		return r;
2788 
2789 	new_pid = find_get_pid(tmp);
2790 	if (!new_pid)
2791 		return -ESRCH;
2792 
2793 	put_pid(xchg(&cad_pid, new_pid));
2794 	return 0;
2795 }
2796 
2797 /**
2798  * proc_do_large_bitmap - read/write from/to a large bitmap
2799  * @table: the sysctl table
2800  * @write: %TRUE if this is a write to the sysctl file
2801  * @buffer: the user buffer
2802  * @lenp: the size of the user buffer
2803  * @ppos: file position
2804  *
2805  * The bitmap is stored at table->data and the bitmap length (in bits)
2806  * in table->maxlen.
2807  *
2808  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2809  * large bitmaps may be represented in a compact manner. Writing into
2810  * the file will clear the bitmap then update it with the given input.
2811  *
2812  * Returns 0 on success.
2813  */
2814 int proc_do_large_bitmap(struct ctl_table *table, int write,
2815 			 void __user *buffer, size_t *lenp, loff_t *ppos)
2816 {
2817 	int err = 0;
2818 	bool first = 1;
2819 	size_t left = *lenp;
2820 	unsigned long bitmap_len = table->maxlen;
2821 	unsigned long *bitmap = (unsigned long *) table->data;
2822 	unsigned long *tmp_bitmap = NULL;
2823 	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2824 
2825 	if (!bitmap_len || !left || (*ppos && !write)) {
2826 		*lenp = 0;
2827 		return 0;
2828 	}
2829 
2830 	if (write) {
2831 		unsigned long page = 0;
2832 		char *kbuf;
2833 
2834 		if (left > PAGE_SIZE - 1)
2835 			left = PAGE_SIZE - 1;
2836 
2837 		page = __get_free_page(GFP_TEMPORARY);
2838 		kbuf = (char *) page;
2839 		if (!kbuf)
2840 			return -ENOMEM;
2841 		if (copy_from_user(kbuf, buffer, left)) {
2842 			free_page(page);
2843 			return -EFAULT;
2844                 }
2845 		kbuf[left] = 0;
2846 
2847 		tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2848 				     GFP_KERNEL);
2849 		if (!tmp_bitmap) {
2850 			free_page(page);
2851 			return -ENOMEM;
2852 		}
2853 		proc_skip_char(&kbuf, &left, '\n');
2854 		while (!err && left) {
2855 			unsigned long val_a, val_b;
2856 			bool neg;
2857 
2858 			err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2859 					     sizeof(tr_a), &c);
2860 			if (err)
2861 				break;
2862 			if (val_a >= bitmap_len || neg) {
2863 				err = -EINVAL;
2864 				break;
2865 			}
2866 
2867 			val_b = val_a;
2868 			if (left) {
2869 				kbuf++;
2870 				left--;
2871 			}
2872 
2873 			if (c == '-') {
2874 				err = proc_get_long(&kbuf, &left, &val_b,
2875 						     &neg, tr_b, sizeof(tr_b),
2876 						     &c);
2877 				if (err)
2878 					break;
2879 				if (val_b >= bitmap_len || neg ||
2880 				    val_a > val_b) {
2881 					err = -EINVAL;
2882 					break;
2883 				}
2884 				if (left) {
2885 					kbuf++;
2886 					left--;
2887 				}
2888 			}
2889 
2890 			while (val_a <= val_b)
2891 				set_bit(val_a++, tmp_bitmap);
2892 
2893 			first = 0;
2894 			proc_skip_char(&kbuf, &left, '\n');
2895 		}
2896 		free_page(page);
2897 	} else {
2898 		unsigned long bit_a, bit_b = 0;
2899 
2900 		while (left) {
2901 			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2902 			if (bit_a >= bitmap_len)
2903 				break;
2904 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
2905 						   bit_a + 1) - 1;
2906 
2907 			if (!first) {
2908 				err = proc_put_char(&buffer, &left, ',');
2909 				if (err)
2910 					break;
2911 			}
2912 			err = proc_put_long(&buffer, &left, bit_a, false);
2913 			if (err)
2914 				break;
2915 			if (bit_a != bit_b) {
2916 				err = proc_put_char(&buffer, &left, '-');
2917 				if (err)
2918 					break;
2919 				err = proc_put_long(&buffer, &left, bit_b, false);
2920 				if (err)
2921 					break;
2922 			}
2923 
2924 			first = 0; bit_b++;
2925 		}
2926 		if (!err)
2927 			err = proc_put_char(&buffer, &left, '\n');
2928 	}
2929 
2930 	if (!err) {
2931 		if (write) {
2932 			if (*ppos)
2933 				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2934 			else
2935 				memcpy(bitmap, tmp_bitmap,
2936 					BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
2937 		}
2938 		kfree(tmp_bitmap);
2939 		*lenp -= left;
2940 		*ppos += *lenp;
2941 		return 0;
2942 	} else {
2943 		kfree(tmp_bitmap);
2944 		return err;
2945 	}
2946 }
2947 
2948 #else /* CONFIG_PROC_SYSCTL */
2949 
2950 int proc_dostring(struct ctl_table *table, int write,
2951 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2952 {
2953 	return -ENOSYS;
2954 }
2955 
2956 int proc_dointvec(struct ctl_table *table, int write,
2957 		  void __user *buffer, size_t *lenp, loff_t *ppos)
2958 {
2959 	return -ENOSYS;
2960 }
2961 
2962 int proc_dointvec_minmax(struct ctl_table *table, int write,
2963 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2964 {
2965 	return -ENOSYS;
2966 }
2967 
2968 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2969 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2970 {
2971 	return -ENOSYS;
2972 }
2973 
2974 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2975 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2976 {
2977 	return -ENOSYS;
2978 }
2979 
2980 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
2981 			     void __user *buffer, size_t *lenp, loff_t *ppos)
2982 {
2983 	return -ENOSYS;
2984 }
2985 
2986 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2987 		    void __user *buffer, size_t *lenp, loff_t *ppos)
2988 {
2989 	return -ENOSYS;
2990 }
2991 
2992 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2993 				      void __user *buffer,
2994 				      size_t *lenp, loff_t *ppos)
2995 {
2996     return -ENOSYS;
2997 }
2998 
2999 
3000 #endif /* CONFIG_PROC_SYSCTL */
3001 
3002 /*
3003  * No sense putting this after each symbol definition, twice,
3004  * exception granted :-)
3005  */
3006 EXPORT_SYMBOL(proc_dointvec);
3007 EXPORT_SYMBOL(proc_dointvec_jiffies);
3008 EXPORT_SYMBOL(proc_dointvec_minmax);
3009 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3010 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3011 EXPORT_SYMBOL(proc_dostring);
3012 EXPORT_SYMBOL(proc_doulongvec_minmax);
3013 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
3014 EXPORT_SYMBOL(register_sysctl_table);
3015 EXPORT_SYMBOL(register_sysctl_paths);
3016 EXPORT_SYMBOL(unregister_sysctl_table);
3017