1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * sysctl.c: General linux system control interface 4 * 5 * Begun 24 March 1995, Stephen Tweedie 6 * Added /proc support, Dec 1995 7 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas. 8 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver. 9 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver. 10 * Dynamic registration fixes, Stephen Tweedie. 11 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn. 12 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris 13 * Horn. 14 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer. 15 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer. 16 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill 17 * Wendling. 18 * The list_for_each() macro wasn't appropriate for the sysctl loop. 19 * Removed it and replaced it with older style, 03/23/00, Bill Wendling 20 */ 21 22 #include <linux/module.h> 23 #include <linux/aio.h> 24 #include <linux/mm.h> 25 #include <linux/swap.h> 26 #include <linux/slab.h> 27 #include <linux/sysctl.h> 28 #include <linux/bitmap.h> 29 #include <linux/signal.h> 30 #include <linux/panic.h> 31 #include <linux/printk.h> 32 #include <linux/proc_fs.h> 33 #include <linux/security.h> 34 #include <linux/ctype.h> 35 #include <linux/kmemleak.h> 36 #include <linux/fs.h> 37 #include <linux/init.h> 38 #include <linux/kernel.h> 39 #include <linux/kobject.h> 40 #include <linux/net.h> 41 #include <linux/sysrq.h> 42 #include <linux/highuid.h> 43 #include <linux/writeback.h> 44 #include <linux/ratelimit.h> 45 #include <linux/compaction.h> 46 #include <linux/hugetlb.h> 47 #include <linux/initrd.h> 48 #include <linux/key.h> 49 #include <linux/times.h> 50 #include <linux/limits.h> 51 #include <linux/dcache.h> 52 #include <linux/dnotify.h> 53 #include <linux/syscalls.h> 54 #include <linux/vmstat.h> 55 #include <linux/nfs_fs.h> 56 #include <linux/acpi.h> 57 #include <linux/reboot.h> 58 #include <linux/ftrace.h> 59 #include <linux/perf_event.h> 60 #include <linux/kprobes.h> 61 #include <linux/pipe_fs_i.h> 62 #include <linux/oom.h> 63 #include <linux/kmod.h> 64 #include <linux/capability.h> 65 #include <linux/binfmts.h> 66 #include <linux/sched/sysctl.h> 67 #include <linux/sched/coredump.h> 68 #include <linux/kexec.h> 69 #include <linux/bpf.h> 70 #include <linux/mount.h> 71 #include <linux/userfaultfd_k.h> 72 #include <linux/coredump.h> 73 #include <linux/latencytop.h> 74 #include <linux/pid.h> 75 #include <linux/delayacct.h> 76 77 #include "../lib/kstrtox.h" 78 79 #include <linux/uaccess.h> 80 #include <asm/processor.h> 81 82 #ifdef CONFIG_X86 83 #include <asm/nmi.h> 84 #include <asm/stacktrace.h> 85 #include <asm/io.h> 86 #endif 87 #ifdef CONFIG_SPARC 88 #include <asm/setup.h> 89 #endif 90 #ifdef CONFIG_BSD_PROCESS_ACCT 91 #include <linux/acct.h> 92 #endif 93 #ifdef CONFIG_RT_MUTEXES 94 #include <linux/rtmutex.h> 95 #endif 96 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT) 97 #include <linux/lockdep.h> 98 #endif 99 #ifdef CONFIG_CHR_DEV_SG 100 #include <scsi/sg.h> 101 #endif 102 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE 103 #include <linux/stackleak.h> 104 #endif 105 #ifdef CONFIG_LOCKUP_DETECTOR 106 #include <linux/nmi.h> 107 #endif 108 109 #if defined(CONFIG_SYSCTL) 110 111 /* Constants used for minimum and maximum */ 112 #ifdef CONFIG_LOCKUP_DETECTOR 113 static int sixty = 60; 114 #endif 115 116 static int __maybe_unused neg_one = -1; 117 static int __maybe_unused two = 2; 118 static int __maybe_unused four = 4; 119 static unsigned long zero_ul; 120 static unsigned long one_ul = 1; 121 static unsigned long long_max = LONG_MAX; 122 static int one_hundred = 100; 123 static int two_hundred = 200; 124 static int one_thousand = 1000; 125 #ifdef CONFIG_PRINTK 126 static int ten_thousand = 10000; 127 #endif 128 #ifdef CONFIG_PERF_EVENTS 129 static int six_hundred_forty_kb = 640 * 1024; 130 #endif 131 132 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ 133 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; 134 135 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 136 static int maxolduid = 65535; 137 static int minolduid; 138 139 static int ngroups_max = NGROUPS_MAX; 140 static const int cap_last_cap = CAP_LAST_CAP; 141 142 /* 143 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs 144 * and hung_task_check_interval_secs 145 */ 146 #ifdef CONFIG_DETECT_HUNG_TASK 147 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); 148 #endif 149 150 #ifdef CONFIG_INOTIFY_USER 151 #include <linux/inotify.h> 152 #endif 153 #ifdef CONFIG_FANOTIFY 154 #include <linux/fanotify.h> 155 #endif 156 157 #ifdef CONFIG_PROC_SYSCTL 158 159 /** 160 * enum sysctl_writes_mode - supported sysctl write modes 161 * 162 * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value 163 * to be written, and multiple writes on the same sysctl file descriptor 164 * will rewrite the sysctl value, regardless of file position. No warning 165 * is issued when the initial position is not 0. 166 * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is 167 * not 0. 168 * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at 169 * file position 0 and the value must be fully contained in the buffer 170 * sent to the write syscall. If dealing with strings respect the file 171 * position, but restrict this to the max length of the buffer, anything 172 * passed the max length will be ignored. Multiple writes will append 173 * to the buffer. 174 * 175 * These write modes control how current file position affects the behavior of 176 * updating sysctl values through the proc interface on each write. 177 */ 178 enum sysctl_writes_mode { 179 SYSCTL_WRITES_LEGACY = -1, 180 SYSCTL_WRITES_WARN = 0, 181 SYSCTL_WRITES_STRICT = 1, 182 }; 183 184 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT; 185 #endif /* CONFIG_PROC_SYSCTL */ 186 187 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \ 188 defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT) 189 int sysctl_legacy_va_layout; 190 #endif 191 192 #ifdef CONFIG_COMPACTION 193 static int min_extfrag_threshold; 194 static int max_extfrag_threshold = 1000; 195 #endif 196 197 #endif /* CONFIG_SYSCTL */ 198 199 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL) 200 static int bpf_stats_handler(struct ctl_table *table, int write, 201 void *buffer, size_t *lenp, loff_t *ppos) 202 { 203 struct static_key *key = (struct static_key *)table->data; 204 static int saved_val; 205 int val, ret; 206 struct ctl_table tmp = { 207 .data = &val, 208 .maxlen = sizeof(val), 209 .mode = table->mode, 210 .extra1 = SYSCTL_ZERO, 211 .extra2 = SYSCTL_ONE, 212 }; 213 214 if (write && !capable(CAP_SYS_ADMIN)) 215 return -EPERM; 216 217 mutex_lock(&bpf_stats_enabled_mutex); 218 val = saved_val; 219 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 220 if (write && !ret && val != saved_val) { 221 if (val) 222 static_key_slow_inc(key); 223 else 224 static_key_slow_dec(key); 225 saved_val = val; 226 } 227 mutex_unlock(&bpf_stats_enabled_mutex); 228 return ret; 229 } 230 231 static int bpf_unpriv_handler(struct ctl_table *table, int write, 232 void *buffer, size_t *lenp, loff_t *ppos) 233 { 234 int ret, unpriv_enable = *(int *)table->data; 235 bool locked_state = unpriv_enable == 1; 236 struct ctl_table tmp = *table; 237 238 if (write && !capable(CAP_SYS_ADMIN)) 239 return -EPERM; 240 241 tmp.data = &unpriv_enable; 242 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 243 if (write && !ret) { 244 if (locked_state && unpriv_enable != 1) 245 return -EPERM; 246 *(int *)table->data = unpriv_enable; 247 } 248 return ret; 249 } 250 #endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ 251 252 /* 253 * /proc/sys support 254 */ 255 256 #ifdef CONFIG_PROC_SYSCTL 257 258 static int _proc_do_string(char *data, int maxlen, int write, 259 char *buffer, size_t *lenp, loff_t *ppos) 260 { 261 size_t len; 262 char c, *p; 263 264 if (!data || !maxlen || !*lenp) { 265 *lenp = 0; 266 return 0; 267 } 268 269 if (write) { 270 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) { 271 /* Only continue writes not past the end of buffer. */ 272 len = strlen(data); 273 if (len > maxlen - 1) 274 len = maxlen - 1; 275 276 if (*ppos > len) 277 return 0; 278 len = *ppos; 279 } else { 280 /* Start writing from beginning of buffer. */ 281 len = 0; 282 } 283 284 *ppos += *lenp; 285 p = buffer; 286 while ((p - buffer) < *lenp && len < maxlen - 1) { 287 c = *(p++); 288 if (c == 0 || c == '\n') 289 break; 290 data[len++] = c; 291 } 292 data[len] = 0; 293 } else { 294 len = strlen(data); 295 if (len > maxlen) 296 len = maxlen; 297 298 if (*ppos > len) { 299 *lenp = 0; 300 return 0; 301 } 302 303 data += *ppos; 304 len -= *ppos; 305 306 if (len > *lenp) 307 len = *lenp; 308 if (len) 309 memcpy(buffer, data, len); 310 if (len < *lenp) { 311 buffer[len] = '\n'; 312 len++; 313 } 314 *lenp = len; 315 *ppos += len; 316 } 317 return 0; 318 } 319 320 static void warn_sysctl_write(struct ctl_table *table) 321 { 322 pr_warn_once("%s wrote to %s when file position was not 0!\n" 323 "This will not be supported in the future. To silence this\n" 324 "warning, set kernel.sysctl_writes_strict = -1\n", 325 current->comm, table->procname); 326 } 327 328 /** 329 * proc_first_pos_non_zero_ignore - check if first position is allowed 330 * @ppos: file position 331 * @table: the sysctl table 332 * 333 * Returns true if the first position is non-zero and the sysctl_writes_strict 334 * mode indicates this is not allowed for numeric input types. String proc 335 * handlers can ignore the return value. 336 */ 337 static bool proc_first_pos_non_zero_ignore(loff_t *ppos, 338 struct ctl_table *table) 339 { 340 if (!*ppos) 341 return false; 342 343 switch (sysctl_writes_strict) { 344 case SYSCTL_WRITES_STRICT: 345 return true; 346 case SYSCTL_WRITES_WARN: 347 warn_sysctl_write(table); 348 return false; 349 default: 350 return false; 351 } 352 } 353 354 /** 355 * proc_dostring - read a string sysctl 356 * @table: the sysctl table 357 * @write: %TRUE if this is a write to the sysctl file 358 * @buffer: the user buffer 359 * @lenp: the size of the user buffer 360 * @ppos: file position 361 * 362 * Reads/writes a string from/to the user buffer. If the kernel 363 * buffer provided is not large enough to hold the string, the 364 * string is truncated. The copied string is %NULL-terminated. 365 * If the string is being read by the user process, it is copied 366 * and a newline '\n' is added. It is truncated if the buffer is 367 * not large enough. 368 * 369 * Returns 0 on success. 370 */ 371 int proc_dostring(struct ctl_table *table, int write, 372 void *buffer, size_t *lenp, loff_t *ppos) 373 { 374 if (write) 375 proc_first_pos_non_zero_ignore(ppos, table); 376 377 return _proc_do_string(table->data, table->maxlen, write, buffer, lenp, 378 ppos); 379 } 380 381 static size_t proc_skip_spaces(char **buf) 382 { 383 size_t ret; 384 char *tmp = skip_spaces(*buf); 385 ret = tmp - *buf; 386 *buf = tmp; 387 return ret; 388 } 389 390 static void proc_skip_char(char **buf, size_t *size, const char v) 391 { 392 while (*size) { 393 if (**buf != v) 394 break; 395 (*size)--; 396 (*buf)++; 397 } 398 } 399 400 /** 401 * strtoul_lenient - parse an ASCII formatted integer from a buffer and only 402 * fail on overflow 403 * 404 * @cp: kernel buffer containing the string to parse 405 * @endp: pointer to store the trailing characters 406 * @base: the base to use 407 * @res: where the parsed integer will be stored 408 * 409 * In case of success 0 is returned and @res will contain the parsed integer, 410 * @endp will hold any trailing characters. 411 * This function will fail the parse on overflow. If there wasn't an overflow 412 * the function will defer the decision what characters count as invalid to the 413 * caller. 414 */ 415 static int strtoul_lenient(const char *cp, char **endp, unsigned int base, 416 unsigned long *res) 417 { 418 unsigned long long result; 419 unsigned int rv; 420 421 cp = _parse_integer_fixup_radix(cp, &base); 422 rv = _parse_integer(cp, base, &result); 423 if ((rv & KSTRTOX_OVERFLOW) || (result != (unsigned long)result)) 424 return -ERANGE; 425 426 cp += rv; 427 428 if (endp) 429 *endp = (char *)cp; 430 431 *res = (unsigned long)result; 432 return 0; 433 } 434 435 #define TMPBUFLEN 22 436 /** 437 * proc_get_long - reads an ASCII formatted integer from a user buffer 438 * 439 * @buf: a kernel buffer 440 * @size: size of the kernel buffer 441 * @val: this is where the number will be stored 442 * @neg: set to %TRUE if number is negative 443 * @perm_tr: a vector which contains the allowed trailers 444 * @perm_tr_len: size of the perm_tr vector 445 * @tr: pointer to store the trailer character 446 * 447 * In case of success %0 is returned and @buf and @size are updated with 448 * the amount of bytes read. If @tr is non-NULL and a trailing 449 * character exists (size is non-zero after returning from this 450 * function), @tr is updated with the trailing character. 451 */ 452 static int proc_get_long(char **buf, size_t *size, 453 unsigned long *val, bool *neg, 454 const char *perm_tr, unsigned perm_tr_len, char *tr) 455 { 456 int len; 457 char *p, tmp[TMPBUFLEN]; 458 459 if (!*size) 460 return -EINVAL; 461 462 len = *size; 463 if (len > TMPBUFLEN - 1) 464 len = TMPBUFLEN - 1; 465 466 memcpy(tmp, *buf, len); 467 468 tmp[len] = 0; 469 p = tmp; 470 if (*p == '-' && *size > 1) { 471 *neg = true; 472 p++; 473 } else 474 *neg = false; 475 if (!isdigit(*p)) 476 return -EINVAL; 477 478 if (strtoul_lenient(p, &p, 0, val)) 479 return -EINVAL; 480 481 len = p - tmp; 482 483 /* We don't know if the next char is whitespace thus we may accept 484 * invalid integers (e.g. 1234...a) or two integers instead of one 485 * (e.g. 123...1). So lets not allow such large numbers. */ 486 if (len == TMPBUFLEN - 1) 487 return -EINVAL; 488 489 if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len)) 490 return -EINVAL; 491 492 if (tr && (len < *size)) 493 *tr = *p; 494 495 *buf += len; 496 *size -= len; 497 498 return 0; 499 } 500 501 /** 502 * proc_put_long - converts an integer to a decimal ASCII formatted string 503 * 504 * @buf: the user buffer 505 * @size: the size of the user buffer 506 * @val: the integer to be converted 507 * @neg: sign of the number, %TRUE for negative 508 * 509 * In case of success @buf and @size are updated with the amount of bytes 510 * written. 511 */ 512 static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg) 513 { 514 int len; 515 char tmp[TMPBUFLEN], *p = tmp; 516 517 sprintf(p, "%s%lu", neg ? "-" : "", val); 518 len = strlen(tmp); 519 if (len > *size) 520 len = *size; 521 memcpy(*buf, tmp, len); 522 *size -= len; 523 *buf += len; 524 } 525 #undef TMPBUFLEN 526 527 static void proc_put_char(void **buf, size_t *size, char c) 528 { 529 if (*size) { 530 char **buffer = (char **)buf; 531 **buffer = c; 532 533 (*size)--; 534 (*buffer)++; 535 *buf = *buffer; 536 } 537 } 538 539 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, 540 int *valp, 541 int write, void *data) 542 { 543 if (write) { 544 if (*negp) { 545 if (*lvalp > (unsigned long) INT_MAX + 1) 546 return -EINVAL; 547 *valp = -*lvalp; 548 } else { 549 if (*lvalp > (unsigned long) INT_MAX) 550 return -EINVAL; 551 *valp = *lvalp; 552 } 553 } else { 554 int val = *valp; 555 if (val < 0) { 556 *negp = true; 557 *lvalp = -(unsigned long)val; 558 } else { 559 *negp = false; 560 *lvalp = (unsigned long)val; 561 } 562 } 563 return 0; 564 } 565 566 static int do_proc_douintvec_conv(unsigned long *lvalp, 567 unsigned int *valp, 568 int write, void *data) 569 { 570 if (write) { 571 if (*lvalp > UINT_MAX) 572 return -EINVAL; 573 *valp = *lvalp; 574 } else { 575 unsigned int val = *valp; 576 *lvalp = (unsigned long)val; 577 } 578 return 0; 579 } 580 581 static const char proc_wspace_sep[] = { ' ', '\t', '\n' }; 582 583 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, 584 int write, void *buffer, 585 size_t *lenp, loff_t *ppos, 586 int (*conv)(bool *negp, unsigned long *lvalp, int *valp, 587 int write, void *data), 588 void *data) 589 { 590 int *i, vleft, first = 1, err = 0; 591 size_t left; 592 char *p; 593 594 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) { 595 *lenp = 0; 596 return 0; 597 } 598 599 i = (int *) tbl_data; 600 vleft = table->maxlen / sizeof(*i); 601 left = *lenp; 602 603 if (!conv) 604 conv = do_proc_dointvec_conv; 605 606 if (write) { 607 if (proc_first_pos_non_zero_ignore(ppos, table)) 608 goto out; 609 610 if (left > PAGE_SIZE - 1) 611 left = PAGE_SIZE - 1; 612 p = buffer; 613 } 614 615 for (; left && vleft--; i++, first=0) { 616 unsigned long lval; 617 bool neg; 618 619 if (write) { 620 left -= proc_skip_spaces(&p); 621 622 if (!left) 623 break; 624 err = proc_get_long(&p, &left, &lval, &neg, 625 proc_wspace_sep, 626 sizeof(proc_wspace_sep), NULL); 627 if (err) 628 break; 629 if (conv(&neg, &lval, i, 1, data)) { 630 err = -EINVAL; 631 break; 632 } 633 } else { 634 if (conv(&neg, &lval, i, 0, data)) { 635 err = -EINVAL; 636 break; 637 } 638 if (!first) 639 proc_put_char(&buffer, &left, '\t'); 640 proc_put_long(&buffer, &left, lval, neg); 641 } 642 } 643 644 if (!write && !first && left && !err) 645 proc_put_char(&buffer, &left, '\n'); 646 if (write && !err && left) 647 left -= proc_skip_spaces(&p); 648 if (write && first) 649 return err ? : -EINVAL; 650 *lenp -= left; 651 out: 652 *ppos += *lenp; 653 return err; 654 } 655 656 static int do_proc_dointvec(struct ctl_table *table, int write, 657 void *buffer, size_t *lenp, loff_t *ppos, 658 int (*conv)(bool *negp, unsigned long *lvalp, int *valp, 659 int write, void *data), 660 void *data) 661 { 662 return __do_proc_dointvec(table->data, table, write, 663 buffer, lenp, ppos, conv, data); 664 } 665 666 static int do_proc_douintvec_w(unsigned int *tbl_data, 667 struct ctl_table *table, 668 void *buffer, 669 size_t *lenp, loff_t *ppos, 670 int (*conv)(unsigned long *lvalp, 671 unsigned int *valp, 672 int write, void *data), 673 void *data) 674 { 675 unsigned long lval; 676 int err = 0; 677 size_t left; 678 bool neg; 679 char *p = buffer; 680 681 left = *lenp; 682 683 if (proc_first_pos_non_zero_ignore(ppos, table)) 684 goto bail_early; 685 686 if (left > PAGE_SIZE - 1) 687 left = PAGE_SIZE - 1; 688 689 left -= proc_skip_spaces(&p); 690 if (!left) { 691 err = -EINVAL; 692 goto out_free; 693 } 694 695 err = proc_get_long(&p, &left, &lval, &neg, 696 proc_wspace_sep, 697 sizeof(proc_wspace_sep), NULL); 698 if (err || neg) { 699 err = -EINVAL; 700 goto out_free; 701 } 702 703 if (conv(&lval, tbl_data, 1, data)) { 704 err = -EINVAL; 705 goto out_free; 706 } 707 708 if (!err && left) 709 left -= proc_skip_spaces(&p); 710 711 out_free: 712 if (err) 713 return -EINVAL; 714 715 return 0; 716 717 /* This is in keeping with old __do_proc_dointvec() */ 718 bail_early: 719 *ppos += *lenp; 720 return err; 721 } 722 723 static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer, 724 size_t *lenp, loff_t *ppos, 725 int (*conv)(unsigned long *lvalp, 726 unsigned int *valp, 727 int write, void *data), 728 void *data) 729 { 730 unsigned long lval; 731 int err = 0; 732 size_t left; 733 734 left = *lenp; 735 736 if (conv(&lval, tbl_data, 0, data)) { 737 err = -EINVAL; 738 goto out; 739 } 740 741 proc_put_long(&buffer, &left, lval, false); 742 if (!left) 743 goto out; 744 745 proc_put_char(&buffer, &left, '\n'); 746 747 out: 748 *lenp -= left; 749 *ppos += *lenp; 750 751 return err; 752 } 753 754 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table, 755 int write, void *buffer, 756 size_t *lenp, loff_t *ppos, 757 int (*conv)(unsigned long *lvalp, 758 unsigned int *valp, 759 int write, void *data), 760 void *data) 761 { 762 unsigned int *i, vleft; 763 764 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) { 765 *lenp = 0; 766 return 0; 767 } 768 769 i = (unsigned int *) tbl_data; 770 vleft = table->maxlen / sizeof(*i); 771 772 /* 773 * Arrays are not supported, keep this simple. *Do not* add 774 * support for them. 775 */ 776 if (vleft != 1) { 777 *lenp = 0; 778 return -EINVAL; 779 } 780 781 if (!conv) 782 conv = do_proc_douintvec_conv; 783 784 if (write) 785 return do_proc_douintvec_w(i, table, buffer, lenp, ppos, 786 conv, data); 787 return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data); 788 } 789 790 static int do_proc_douintvec(struct ctl_table *table, int write, 791 void *buffer, size_t *lenp, loff_t *ppos, 792 int (*conv)(unsigned long *lvalp, 793 unsigned int *valp, 794 int write, void *data), 795 void *data) 796 { 797 return __do_proc_douintvec(table->data, table, write, 798 buffer, lenp, ppos, conv, data); 799 } 800 801 /** 802 * proc_dointvec - read a vector of integers 803 * @table: the sysctl table 804 * @write: %TRUE if this is a write to the sysctl file 805 * @buffer: the user buffer 806 * @lenp: the size of the user buffer 807 * @ppos: file position 808 * 809 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 810 * values from/to the user buffer, treated as an ASCII string. 811 * 812 * Returns 0 on success. 813 */ 814 int proc_dointvec(struct ctl_table *table, int write, void *buffer, 815 size_t *lenp, loff_t *ppos) 816 { 817 return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL); 818 } 819 820 #ifdef CONFIG_COMPACTION 821 static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table, 822 int write, void *buffer, size_t *lenp, loff_t *ppos) 823 { 824 int ret, old; 825 826 if (!IS_ENABLED(CONFIG_PREEMPT_RT) || !write) 827 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 828 829 old = *(int *)table->data; 830 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 831 if (ret) 832 return ret; 833 if (old != *(int *)table->data) 834 pr_warn_once("sysctl attribute %s changed by %s[%d]\n", 835 table->procname, current->comm, 836 task_pid_nr(current)); 837 return ret; 838 } 839 #endif 840 841 /** 842 * proc_douintvec - read a vector of unsigned integers 843 * @table: the sysctl table 844 * @write: %TRUE if this is a write to the sysctl file 845 * @buffer: the user buffer 846 * @lenp: the size of the user buffer 847 * @ppos: file position 848 * 849 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer 850 * values from/to the user buffer, treated as an ASCII string. 851 * 852 * Returns 0 on success. 853 */ 854 int proc_douintvec(struct ctl_table *table, int write, void *buffer, 855 size_t *lenp, loff_t *ppos) 856 { 857 return do_proc_douintvec(table, write, buffer, lenp, ppos, 858 do_proc_douintvec_conv, NULL); 859 } 860 861 /* 862 * Taint values can only be increased 863 * This means we can safely use a temporary. 864 */ 865 static int proc_taint(struct ctl_table *table, int write, 866 void *buffer, size_t *lenp, loff_t *ppos) 867 { 868 struct ctl_table t; 869 unsigned long tmptaint = get_taint(); 870 int err; 871 872 if (write && !capable(CAP_SYS_ADMIN)) 873 return -EPERM; 874 875 t = *table; 876 t.data = &tmptaint; 877 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); 878 if (err < 0) 879 return err; 880 881 if (write) { 882 int i; 883 884 /* 885 * If we are relying on panic_on_taint not producing 886 * false positives due to userspace input, bail out 887 * before setting the requested taint flags. 888 */ 889 if (panic_on_taint_nousertaint && (tmptaint & panic_on_taint)) 890 return -EINVAL; 891 892 /* 893 * Poor man's atomic or. Not worth adding a primitive 894 * to everyone's atomic.h for this 895 */ 896 for (i = 0; i < TAINT_FLAGS_COUNT; i++) 897 if ((1UL << i) & tmptaint) 898 add_taint(i, LOCKDEP_STILL_OK); 899 } 900 901 return err; 902 } 903 904 #ifdef CONFIG_PRINTK 905 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, 906 void *buffer, size_t *lenp, loff_t *ppos) 907 { 908 if (write && !capable(CAP_SYS_ADMIN)) 909 return -EPERM; 910 911 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 912 } 913 #endif 914 915 /** 916 * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure 917 * @min: pointer to minimum allowable value 918 * @max: pointer to maximum allowable value 919 * 920 * The do_proc_dointvec_minmax_conv_param structure provides the 921 * minimum and maximum values for doing range checking for those sysctl 922 * parameters that use the proc_dointvec_minmax() handler. 923 */ 924 struct do_proc_dointvec_minmax_conv_param { 925 int *min; 926 int *max; 927 }; 928 929 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, 930 int *valp, 931 int write, void *data) 932 { 933 int tmp, ret; 934 struct do_proc_dointvec_minmax_conv_param *param = data; 935 /* 936 * If writing, first do so via a temporary local int so we can 937 * bounds-check it before touching *valp. 938 */ 939 int *ip = write ? &tmp : valp; 940 941 ret = do_proc_dointvec_conv(negp, lvalp, ip, write, data); 942 if (ret) 943 return ret; 944 945 if (write) { 946 if ((param->min && *param->min > tmp) || 947 (param->max && *param->max < tmp)) 948 return -EINVAL; 949 *valp = tmp; 950 } 951 952 return 0; 953 } 954 955 /** 956 * proc_dointvec_minmax - read a vector of integers with min/max values 957 * @table: the sysctl table 958 * @write: %TRUE if this is a write to the sysctl file 959 * @buffer: the user buffer 960 * @lenp: the size of the user buffer 961 * @ppos: file position 962 * 963 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 964 * values from/to the user buffer, treated as an ASCII string. 965 * 966 * This routine will ensure the values are within the range specified by 967 * table->extra1 (min) and table->extra2 (max). 968 * 969 * Returns 0 on success or -EINVAL on write when the range check fails. 970 */ 971 int proc_dointvec_minmax(struct ctl_table *table, int write, 972 void *buffer, size_t *lenp, loff_t *ppos) 973 { 974 struct do_proc_dointvec_minmax_conv_param param = { 975 .min = (int *) table->extra1, 976 .max = (int *) table->extra2, 977 }; 978 return do_proc_dointvec(table, write, buffer, lenp, ppos, 979 do_proc_dointvec_minmax_conv, ¶m); 980 } 981 982 /** 983 * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure 984 * @min: pointer to minimum allowable value 985 * @max: pointer to maximum allowable value 986 * 987 * The do_proc_douintvec_minmax_conv_param structure provides the 988 * minimum and maximum values for doing range checking for those sysctl 989 * parameters that use the proc_douintvec_minmax() handler. 990 */ 991 struct do_proc_douintvec_minmax_conv_param { 992 unsigned int *min; 993 unsigned int *max; 994 }; 995 996 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp, 997 unsigned int *valp, 998 int write, void *data) 999 { 1000 int ret; 1001 unsigned int tmp; 1002 struct do_proc_douintvec_minmax_conv_param *param = data; 1003 /* write via temporary local uint for bounds-checking */ 1004 unsigned int *up = write ? &tmp : valp; 1005 1006 ret = do_proc_douintvec_conv(lvalp, up, write, data); 1007 if (ret) 1008 return ret; 1009 1010 if (write) { 1011 if ((param->min && *param->min > tmp) || 1012 (param->max && *param->max < tmp)) 1013 return -ERANGE; 1014 1015 *valp = tmp; 1016 } 1017 1018 return 0; 1019 } 1020 1021 /** 1022 * proc_douintvec_minmax - read a vector of unsigned ints with min/max values 1023 * @table: the sysctl table 1024 * @write: %TRUE if this is a write to the sysctl file 1025 * @buffer: the user buffer 1026 * @lenp: the size of the user buffer 1027 * @ppos: file position 1028 * 1029 * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer 1030 * values from/to the user buffer, treated as an ASCII string. Negative 1031 * strings are not allowed. 1032 * 1033 * This routine will ensure the values are within the range specified by 1034 * table->extra1 (min) and table->extra2 (max). There is a final sanity 1035 * check for UINT_MAX to avoid having to support wrap around uses from 1036 * userspace. 1037 * 1038 * Returns 0 on success or -ERANGE on write when the range check fails. 1039 */ 1040 int proc_douintvec_minmax(struct ctl_table *table, int write, 1041 void *buffer, size_t *lenp, loff_t *ppos) 1042 { 1043 struct do_proc_douintvec_minmax_conv_param param = { 1044 .min = (unsigned int *) table->extra1, 1045 .max = (unsigned int *) table->extra2, 1046 }; 1047 return do_proc_douintvec(table, write, buffer, lenp, ppos, 1048 do_proc_douintvec_minmax_conv, ¶m); 1049 } 1050 1051 /** 1052 * proc_dou8vec_minmax - read a vector of unsigned chars with min/max values 1053 * @table: the sysctl table 1054 * @write: %TRUE if this is a write to the sysctl file 1055 * @buffer: the user buffer 1056 * @lenp: the size of the user buffer 1057 * @ppos: file position 1058 * 1059 * Reads/writes up to table->maxlen/sizeof(u8) unsigned chars 1060 * values from/to the user buffer, treated as an ASCII string. Negative 1061 * strings are not allowed. 1062 * 1063 * This routine will ensure the values are within the range specified by 1064 * table->extra1 (min) and table->extra2 (max). 1065 * 1066 * Returns 0 on success or an error on write when the range check fails. 1067 */ 1068 int proc_dou8vec_minmax(struct ctl_table *table, int write, 1069 void *buffer, size_t *lenp, loff_t *ppos) 1070 { 1071 struct ctl_table tmp; 1072 unsigned int min = 0, max = 255U, val; 1073 u8 *data = table->data; 1074 struct do_proc_douintvec_minmax_conv_param param = { 1075 .min = &min, 1076 .max = &max, 1077 }; 1078 int res; 1079 1080 /* Do not support arrays yet. */ 1081 if (table->maxlen != sizeof(u8)) 1082 return -EINVAL; 1083 1084 if (table->extra1) { 1085 min = *(unsigned int *) table->extra1; 1086 if (min > 255U) 1087 return -EINVAL; 1088 } 1089 if (table->extra2) { 1090 max = *(unsigned int *) table->extra2; 1091 if (max > 255U) 1092 return -EINVAL; 1093 } 1094 1095 tmp = *table; 1096 1097 tmp.maxlen = sizeof(val); 1098 tmp.data = &val; 1099 val = *data; 1100 res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos, 1101 do_proc_douintvec_minmax_conv, ¶m); 1102 if (res) 1103 return res; 1104 if (write) 1105 *data = val; 1106 return 0; 1107 } 1108 EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); 1109 1110 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, 1111 unsigned int *valp, 1112 int write, void *data) 1113 { 1114 if (write) { 1115 unsigned int val; 1116 1117 val = round_pipe_size(*lvalp); 1118 if (val == 0) 1119 return -EINVAL; 1120 1121 *valp = val; 1122 } else { 1123 unsigned int val = *valp; 1124 *lvalp = (unsigned long) val; 1125 } 1126 1127 return 0; 1128 } 1129 1130 static int proc_dopipe_max_size(struct ctl_table *table, int write, 1131 void *buffer, size_t *lenp, loff_t *ppos) 1132 { 1133 return do_proc_douintvec(table, write, buffer, lenp, ppos, 1134 do_proc_dopipe_max_size_conv, NULL); 1135 } 1136 1137 static void validate_coredump_safety(void) 1138 { 1139 #ifdef CONFIG_COREDUMP 1140 if (suid_dumpable == SUID_DUMP_ROOT && 1141 core_pattern[0] != '/' && core_pattern[0] != '|') { 1142 printk(KERN_WARNING 1143 "Unsafe core_pattern used with fs.suid_dumpable=2.\n" 1144 "Pipe handler or fully qualified core dump path required.\n" 1145 "Set kernel.core_pattern before fs.suid_dumpable.\n" 1146 ); 1147 } 1148 #endif 1149 } 1150 1151 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, 1152 void *buffer, size_t *lenp, loff_t *ppos) 1153 { 1154 int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 1155 if (!error) 1156 validate_coredump_safety(); 1157 return error; 1158 } 1159 1160 #ifdef CONFIG_COREDUMP 1161 static int proc_dostring_coredump(struct ctl_table *table, int write, 1162 void *buffer, size_t *lenp, loff_t *ppos) 1163 { 1164 int error = proc_dostring(table, write, buffer, lenp, ppos); 1165 if (!error) 1166 validate_coredump_safety(); 1167 return error; 1168 } 1169 #endif 1170 1171 #ifdef CONFIG_MAGIC_SYSRQ 1172 static int sysrq_sysctl_handler(struct ctl_table *table, int write, 1173 void *buffer, size_t *lenp, loff_t *ppos) 1174 { 1175 int tmp, ret; 1176 1177 tmp = sysrq_mask(); 1178 1179 ret = __do_proc_dointvec(&tmp, table, write, buffer, 1180 lenp, ppos, NULL, NULL); 1181 if (ret || !write) 1182 return ret; 1183 1184 if (write) 1185 sysrq_toggle_support(tmp); 1186 1187 return 0; 1188 } 1189 #endif 1190 1191 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, 1192 int write, void *buffer, size_t *lenp, loff_t *ppos, 1193 unsigned long convmul, unsigned long convdiv) 1194 { 1195 unsigned long *i, *min, *max; 1196 int vleft, first = 1, err = 0; 1197 size_t left; 1198 char *p; 1199 1200 if (!data || !table->maxlen || !*lenp || (*ppos && !write)) { 1201 *lenp = 0; 1202 return 0; 1203 } 1204 1205 i = (unsigned long *) data; 1206 min = (unsigned long *) table->extra1; 1207 max = (unsigned long *) table->extra2; 1208 vleft = table->maxlen / sizeof(unsigned long); 1209 left = *lenp; 1210 1211 if (write) { 1212 if (proc_first_pos_non_zero_ignore(ppos, table)) 1213 goto out; 1214 1215 if (left > PAGE_SIZE - 1) 1216 left = PAGE_SIZE - 1; 1217 p = buffer; 1218 } 1219 1220 for (; left && vleft--; i++, first = 0) { 1221 unsigned long val; 1222 1223 if (write) { 1224 bool neg; 1225 1226 left -= proc_skip_spaces(&p); 1227 if (!left) 1228 break; 1229 1230 err = proc_get_long(&p, &left, &val, &neg, 1231 proc_wspace_sep, 1232 sizeof(proc_wspace_sep), NULL); 1233 if (err) 1234 break; 1235 if (neg) 1236 continue; 1237 val = convmul * val / convdiv; 1238 if ((min && val < *min) || (max && val > *max)) { 1239 err = -EINVAL; 1240 break; 1241 } 1242 *i = val; 1243 } else { 1244 val = convdiv * (*i) / convmul; 1245 if (!first) 1246 proc_put_char(&buffer, &left, '\t'); 1247 proc_put_long(&buffer, &left, val, false); 1248 } 1249 } 1250 1251 if (!write && !first && left && !err) 1252 proc_put_char(&buffer, &left, '\n'); 1253 if (write && !err) 1254 left -= proc_skip_spaces(&p); 1255 if (write && first) 1256 return err ? : -EINVAL; 1257 *lenp -= left; 1258 out: 1259 *ppos += *lenp; 1260 return err; 1261 } 1262 1263 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, 1264 void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, 1265 unsigned long convdiv) 1266 { 1267 return __do_proc_doulongvec_minmax(table->data, table, write, 1268 buffer, lenp, ppos, convmul, convdiv); 1269 } 1270 1271 /** 1272 * proc_doulongvec_minmax - read a vector of long integers with min/max values 1273 * @table: the sysctl table 1274 * @write: %TRUE if this is a write to the sysctl file 1275 * @buffer: the user buffer 1276 * @lenp: the size of the user buffer 1277 * @ppos: file position 1278 * 1279 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long 1280 * values from/to the user buffer, treated as an ASCII string. 1281 * 1282 * This routine will ensure the values are within the range specified by 1283 * table->extra1 (min) and table->extra2 (max). 1284 * 1285 * Returns 0 on success. 1286 */ 1287 int proc_doulongvec_minmax(struct ctl_table *table, int write, 1288 void *buffer, size_t *lenp, loff_t *ppos) 1289 { 1290 return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l); 1291 } 1292 1293 /** 1294 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values 1295 * @table: the sysctl table 1296 * @write: %TRUE if this is a write to the sysctl file 1297 * @buffer: the user buffer 1298 * @lenp: the size of the user buffer 1299 * @ppos: file position 1300 * 1301 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long 1302 * values from/to the user buffer, treated as an ASCII string. The values 1303 * are treated as milliseconds, and converted to jiffies when they are stored. 1304 * 1305 * This routine will ensure the values are within the range specified by 1306 * table->extra1 (min) and table->extra2 (max). 1307 * 1308 * Returns 0 on success. 1309 */ 1310 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, 1311 void *buffer, size_t *lenp, loff_t *ppos) 1312 { 1313 return do_proc_doulongvec_minmax(table, write, buffer, 1314 lenp, ppos, HZ, 1000l); 1315 } 1316 1317 1318 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp, 1319 int *valp, 1320 int write, void *data) 1321 { 1322 if (write) { 1323 if (*lvalp > INT_MAX / HZ) 1324 return 1; 1325 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ); 1326 } else { 1327 int val = *valp; 1328 unsigned long lval; 1329 if (val < 0) { 1330 *negp = true; 1331 lval = -(unsigned long)val; 1332 } else { 1333 *negp = false; 1334 lval = (unsigned long)val; 1335 } 1336 *lvalp = lval / HZ; 1337 } 1338 return 0; 1339 } 1340 1341 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp, 1342 int *valp, 1343 int write, void *data) 1344 { 1345 if (write) { 1346 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ) 1347 return 1; 1348 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp); 1349 } else { 1350 int val = *valp; 1351 unsigned long lval; 1352 if (val < 0) { 1353 *negp = true; 1354 lval = -(unsigned long)val; 1355 } else { 1356 *negp = false; 1357 lval = (unsigned long)val; 1358 } 1359 *lvalp = jiffies_to_clock_t(lval); 1360 } 1361 return 0; 1362 } 1363 1364 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, 1365 int *valp, 1366 int write, void *data) 1367 { 1368 if (write) { 1369 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp); 1370 1371 if (jif > INT_MAX) 1372 return 1; 1373 *valp = (int)jif; 1374 } else { 1375 int val = *valp; 1376 unsigned long lval; 1377 if (val < 0) { 1378 *negp = true; 1379 lval = -(unsigned long)val; 1380 } else { 1381 *negp = false; 1382 lval = (unsigned long)val; 1383 } 1384 *lvalp = jiffies_to_msecs(lval); 1385 } 1386 return 0; 1387 } 1388 1389 /** 1390 * proc_dointvec_jiffies - read a vector of integers as seconds 1391 * @table: the sysctl table 1392 * @write: %TRUE if this is a write to the sysctl file 1393 * @buffer: the user buffer 1394 * @lenp: the size of the user buffer 1395 * @ppos: file position 1396 * 1397 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 1398 * values from/to the user buffer, treated as an ASCII string. 1399 * The values read are assumed to be in seconds, and are converted into 1400 * jiffies. 1401 * 1402 * Returns 0 on success. 1403 */ 1404 int proc_dointvec_jiffies(struct ctl_table *table, int write, 1405 void *buffer, size_t *lenp, loff_t *ppos) 1406 { 1407 return do_proc_dointvec(table,write,buffer,lenp,ppos, 1408 do_proc_dointvec_jiffies_conv,NULL); 1409 } 1410 1411 /** 1412 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds 1413 * @table: the sysctl table 1414 * @write: %TRUE if this is a write to the sysctl file 1415 * @buffer: the user buffer 1416 * @lenp: the size of the user buffer 1417 * @ppos: pointer to the file position 1418 * 1419 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 1420 * values from/to the user buffer, treated as an ASCII string. 1421 * The values read are assumed to be in 1/USER_HZ seconds, and 1422 * are converted into jiffies. 1423 * 1424 * Returns 0 on success. 1425 */ 1426 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, 1427 void *buffer, size_t *lenp, loff_t *ppos) 1428 { 1429 return do_proc_dointvec(table,write,buffer,lenp,ppos, 1430 do_proc_dointvec_userhz_jiffies_conv,NULL); 1431 } 1432 1433 /** 1434 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds 1435 * @table: the sysctl table 1436 * @write: %TRUE if this is a write to the sysctl file 1437 * @buffer: the user buffer 1438 * @lenp: the size of the user buffer 1439 * @ppos: file position 1440 * @ppos: the current position in the file 1441 * 1442 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer 1443 * values from/to the user buffer, treated as an ASCII string. 1444 * The values read are assumed to be in 1/1000 seconds, and 1445 * are converted into jiffies. 1446 * 1447 * Returns 0 on success. 1448 */ 1449 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer, 1450 size_t *lenp, loff_t *ppos) 1451 { 1452 return do_proc_dointvec(table, write, buffer, lenp, ppos, 1453 do_proc_dointvec_ms_jiffies_conv, NULL); 1454 } 1455 1456 static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer, 1457 size_t *lenp, loff_t *ppos) 1458 { 1459 struct pid *new_pid; 1460 pid_t tmp; 1461 int r; 1462 1463 tmp = pid_vnr(cad_pid); 1464 1465 r = __do_proc_dointvec(&tmp, table, write, buffer, 1466 lenp, ppos, NULL, NULL); 1467 if (r || !write) 1468 return r; 1469 1470 new_pid = find_get_pid(tmp); 1471 if (!new_pid) 1472 return -ESRCH; 1473 1474 put_pid(xchg(&cad_pid, new_pid)); 1475 return 0; 1476 } 1477 1478 /** 1479 * proc_do_large_bitmap - read/write from/to a large bitmap 1480 * @table: the sysctl table 1481 * @write: %TRUE if this is a write to the sysctl file 1482 * @buffer: the user buffer 1483 * @lenp: the size of the user buffer 1484 * @ppos: file position 1485 * 1486 * The bitmap is stored at table->data and the bitmap length (in bits) 1487 * in table->maxlen. 1488 * 1489 * We use a range comma separated format (e.g. 1,3-4,10-10) so that 1490 * large bitmaps may be represented in a compact manner. Writing into 1491 * the file will clear the bitmap then update it with the given input. 1492 * 1493 * Returns 0 on success. 1494 */ 1495 int proc_do_large_bitmap(struct ctl_table *table, int write, 1496 void *buffer, size_t *lenp, loff_t *ppos) 1497 { 1498 int err = 0; 1499 size_t left = *lenp; 1500 unsigned long bitmap_len = table->maxlen; 1501 unsigned long *bitmap = *(unsigned long **) table->data; 1502 unsigned long *tmp_bitmap = NULL; 1503 char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c; 1504 1505 if (!bitmap || !bitmap_len || !left || (*ppos && !write)) { 1506 *lenp = 0; 1507 return 0; 1508 } 1509 1510 if (write) { 1511 char *p = buffer; 1512 size_t skipped = 0; 1513 1514 if (left > PAGE_SIZE - 1) { 1515 left = PAGE_SIZE - 1; 1516 /* How much of the buffer we'll skip this pass */ 1517 skipped = *lenp - left; 1518 } 1519 1520 tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL); 1521 if (!tmp_bitmap) 1522 return -ENOMEM; 1523 proc_skip_char(&p, &left, '\n'); 1524 while (!err && left) { 1525 unsigned long val_a, val_b; 1526 bool neg; 1527 size_t saved_left; 1528 1529 /* In case we stop parsing mid-number, we can reset */ 1530 saved_left = left; 1531 err = proc_get_long(&p, &left, &val_a, &neg, tr_a, 1532 sizeof(tr_a), &c); 1533 /* 1534 * If we consumed the entirety of a truncated buffer or 1535 * only one char is left (may be a "-"), then stop here, 1536 * reset, & come back for more. 1537 */ 1538 if ((left <= 1) && skipped) { 1539 left = saved_left; 1540 break; 1541 } 1542 1543 if (err) 1544 break; 1545 if (val_a >= bitmap_len || neg) { 1546 err = -EINVAL; 1547 break; 1548 } 1549 1550 val_b = val_a; 1551 if (left) { 1552 p++; 1553 left--; 1554 } 1555 1556 if (c == '-') { 1557 err = proc_get_long(&p, &left, &val_b, 1558 &neg, tr_b, sizeof(tr_b), 1559 &c); 1560 /* 1561 * If we consumed all of a truncated buffer or 1562 * then stop here, reset, & come back for more. 1563 */ 1564 if (!left && skipped) { 1565 left = saved_left; 1566 break; 1567 } 1568 1569 if (err) 1570 break; 1571 if (val_b >= bitmap_len || neg || 1572 val_a > val_b) { 1573 err = -EINVAL; 1574 break; 1575 } 1576 if (left) { 1577 p++; 1578 left--; 1579 } 1580 } 1581 1582 bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1); 1583 proc_skip_char(&p, &left, '\n'); 1584 } 1585 left += skipped; 1586 } else { 1587 unsigned long bit_a, bit_b = 0; 1588 bool first = 1; 1589 1590 while (left) { 1591 bit_a = find_next_bit(bitmap, bitmap_len, bit_b); 1592 if (bit_a >= bitmap_len) 1593 break; 1594 bit_b = find_next_zero_bit(bitmap, bitmap_len, 1595 bit_a + 1) - 1; 1596 1597 if (!first) 1598 proc_put_char(&buffer, &left, ','); 1599 proc_put_long(&buffer, &left, bit_a, false); 1600 if (bit_a != bit_b) { 1601 proc_put_char(&buffer, &left, '-'); 1602 proc_put_long(&buffer, &left, bit_b, false); 1603 } 1604 1605 first = 0; bit_b++; 1606 } 1607 proc_put_char(&buffer, &left, '\n'); 1608 } 1609 1610 if (!err) { 1611 if (write) { 1612 if (*ppos) 1613 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len); 1614 else 1615 bitmap_copy(bitmap, tmp_bitmap, bitmap_len); 1616 } 1617 *lenp -= left; 1618 *ppos += *lenp; 1619 } 1620 1621 bitmap_free(tmp_bitmap); 1622 return err; 1623 } 1624 1625 #else /* CONFIG_PROC_SYSCTL */ 1626 1627 int proc_dostring(struct ctl_table *table, int write, 1628 void *buffer, size_t *lenp, loff_t *ppos) 1629 { 1630 return -ENOSYS; 1631 } 1632 1633 int proc_dointvec(struct ctl_table *table, int write, 1634 void *buffer, size_t *lenp, loff_t *ppos) 1635 { 1636 return -ENOSYS; 1637 } 1638 1639 int proc_douintvec(struct ctl_table *table, int write, 1640 void *buffer, size_t *lenp, loff_t *ppos) 1641 { 1642 return -ENOSYS; 1643 } 1644 1645 int proc_dointvec_minmax(struct ctl_table *table, int write, 1646 void *buffer, size_t *lenp, loff_t *ppos) 1647 { 1648 return -ENOSYS; 1649 } 1650 1651 int proc_douintvec_minmax(struct ctl_table *table, int write, 1652 void *buffer, size_t *lenp, loff_t *ppos) 1653 { 1654 return -ENOSYS; 1655 } 1656 1657 int proc_dou8vec_minmax(struct ctl_table *table, int write, 1658 void *buffer, size_t *lenp, loff_t *ppos) 1659 { 1660 return -ENOSYS; 1661 } 1662 1663 int proc_dointvec_jiffies(struct ctl_table *table, int write, 1664 void *buffer, size_t *lenp, loff_t *ppos) 1665 { 1666 return -ENOSYS; 1667 } 1668 1669 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, 1670 void *buffer, size_t *lenp, loff_t *ppos) 1671 { 1672 return -ENOSYS; 1673 } 1674 1675 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, 1676 void *buffer, size_t *lenp, loff_t *ppos) 1677 { 1678 return -ENOSYS; 1679 } 1680 1681 int proc_doulongvec_minmax(struct ctl_table *table, int write, 1682 void *buffer, size_t *lenp, loff_t *ppos) 1683 { 1684 return -ENOSYS; 1685 } 1686 1687 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, 1688 void *buffer, size_t *lenp, loff_t *ppos) 1689 { 1690 return -ENOSYS; 1691 } 1692 1693 int proc_do_large_bitmap(struct ctl_table *table, int write, 1694 void *buffer, size_t *lenp, loff_t *ppos) 1695 { 1696 return -ENOSYS; 1697 } 1698 1699 #endif /* CONFIG_PROC_SYSCTL */ 1700 1701 #if defined(CONFIG_SYSCTL) 1702 int proc_do_static_key(struct ctl_table *table, int write, 1703 void *buffer, size_t *lenp, loff_t *ppos) 1704 { 1705 struct static_key *key = (struct static_key *)table->data; 1706 static DEFINE_MUTEX(static_key_mutex); 1707 int val, ret; 1708 struct ctl_table tmp = { 1709 .data = &val, 1710 .maxlen = sizeof(val), 1711 .mode = table->mode, 1712 .extra1 = SYSCTL_ZERO, 1713 .extra2 = SYSCTL_ONE, 1714 }; 1715 1716 if (write && !capable(CAP_SYS_ADMIN)) 1717 return -EPERM; 1718 1719 mutex_lock(&static_key_mutex); 1720 val = static_key_enabled(key); 1721 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 1722 if (write && !ret) { 1723 if (val) 1724 static_key_enable(key); 1725 else 1726 static_key_disable(key); 1727 } 1728 mutex_unlock(&static_key_mutex); 1729 return ret; 1730 } 1731 1732 static struct ctl_table kern_table[] = { 1733 { 1734 .procname = "sched_child_runs_first", 1735 .data = &sysctl_sched_child_runs_first, 1736 .maxlen = sizeof(unsigned int), 1737 .mode = 0644, 1738 .proc_handler = proc_dointvec, 1739 }, 1740 #ifdef CONFIG_SCHEDSTATS 1741 { 1742 .procname = "sched_schedstats", 1743 .data = NULL, 1744 .maxlen = sizeof(unsigned int), 1745 .mode = 0644, 1746 .proc_handler = sysctl_schedstats, 1747 .extra1 = SYSCTL_ZERO, 1748 .extra2 = SYSCTL_ONE, 1749 }, 1750 #endif /* CONFIG_SCHEDSTATS */ 1751 #ifdef CONFIG_TASK_DELAY_ACCT 1752 { 1753 .procname = "task_delayacct", 1754 .data = NULL, 1755 .maxlen = sizeof(unsigned int), 1756 .mode = 0644, 1757 .proc_handler = sysctl_delayacct, 1758 .extra1 = SYSCTL_ZERO, 1759 .extra2 = SYSCTL_ONE, 1760 }, 1761 #endif /* CONFIG_TASK_DELAY_ACCT */ 1762 #ifdef CONFIG_NUMA_BALANCING 1763 { 1764 .procname = "numa_balancing", 1765 .data = NULL, /* filled in by handler */ 1766 .maxlen = sizeof(unsigned int), 1767 .mode = 0644, 1768 .proc_handler = sysctl_numa_balancing, 1769 .extra1 = SYSCTL_ZERO, 1770 .extra2 = SYSCTL_ONE, 1771 }, 1772 #endif /* CONFIG_NUMA_BALANCING */ 1773 { 1774 .procname = "sched_rt_period_us", 1775 .data = &sysctl_sched_rt_period, 1776 .maxlen = sizeof(unsigned int), 1777 .mode = 0644, 1778 .proc_handler = sched_rt_handler, 1779 }, 1780 { 1781 .procname = "sched_rt_runtime_us", 1782 .data = &sysctl_sched_rt_runtime, 1783 .maxlen = sizeof(int), 1784 .mode = 0644, 1785 .proc_handler = sched_rt_handler, 1786 }, 1787 { 1788 .procname = "sched_deadline_period_max_us", 1789 .data = &sysctl_sched_dl_period_max, 1790 .maxlen = sizeof(unsigned int), 1791 .mode = 0644, 1792 .proc_handler = proc_dointvec, 1793 }, 1794 { 1795 .procname = "sched_deadline_period_min_us", 1796 .data = &sysctl_sched_dl_period_min, 1797 .maxlen = sizeof(unsigned int), 1798 .mode = 0644, 1799 .proc_handler = proc_dointvec, 1800 }, 1801 { 1802 .procname = "sched_rr_timeslice_ms", 1803 .data = &sysctl_sched_rr_timeslice, 1804 .maxlen = sizeof(int), 1805 .mode = 0644, 1806 .proc_handler = sched_rr_handler, 1807 }, 1808 #ifdef CONFIG_UCLAMP_TASK 1809 { 1810 .procname = "sched_util_clamp_min", 1811 .data = &sysctl_sched_uclamp_util_min, 1812 .maxlen = sizeof(unsigned int), 1813 .mode = 0644, 1814 .proc_handler = sysctl_sched_uclamp_handler, 1815 }, 1816 { 1817 .procname = "sched_util_clamp_max", 1818 .data = &sysctl_sched_uclamp_util_max, 1819 .maxlen = sizeof(unsigned int), 1820 .mode = 0644, 1821 .proc_handler = sysctl_sched_uclamp_handler, 1822 }, 1823 { 1824 .procname = "sched_util_clamp_min_rt_default", 1825 .data = &sysctl_sched_uclamp_util_min_rt_default, 1826 .maxlen = sizeof(unsigned int), 1827 .mode = 0644, 1828 .proc_handler = sysctl_sched_uclamp_handler, 1829 }, 1830 #endif 1831 #ifdef CONFIG_SCHED_AUTOGROUP 1832 { 1833 .procname = "sched_autogroup_enabled", 1834 .data = &sysctl_sched_autogroup_enabled, 1835 .maxlen = sizeof(unsigned int), 1836 .mode = 0644, 1837 .proc_handler = proc_dointvec_minmax, 1838 .extra1 = SYSCTL_ZERO, 1839 .extra2 = SYSCTL_ONE, 1840 }, 1841 #endif 1842 #ifdef CONFIG_CFS_BANDWIDTH 1843 { 1844 .procname = "sched_cfs_bandwidth_slice_us", 1845 .data = &sysctl_sched_cfs_bandwidth_slice, 1846 .maxlen = sizeof(unsigned int), 1847 .mode = 0644, 1848 .proc_handler = proc_dointvec_minmax, 1849 .extra1 = SYSCTL_ONE, 1850 }, 1851 #endif 1852 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) 1853 { 1854 .procname = "sched_energy_aware", 1855 .data = &sysctl_sched_energy_aware, 1856 .maxlen = sizeof(unsigned int), 1857 .mode = 0644, 1858 .proc_handler = sched_energy_aware_handler, 1859 .extra1 = SYSCTL_ZERO, 1860 .extra2 = SYSCTL_ONE, 1861 }, 1862 #endif 1863 #ifdef CONFIG_PROVE_LOCKING 1864 { 1865 .procname = "prove_locking", 1866 .data = &prove_locking, 1867 .maxlen = sizeof(int), 1868 .mode = 0644, 1869 .proc_handler = proc_dointvec, 1870 }, 1871 #endif 1872 #ifdef CONFIG_LOCK_STAT 1873 { 1874 .procname = "lock_stat", 1875 .data = &lock_stat, 1876 .maxlen = sizeof(int), 1877 .mode = 0644, 1878 .proc_handler = proc_dointvec, 1879 }, 1880 #endif 1881 { 1882 .procname = "panic", 1883 .data = &panic_timeout, 1884 .maxlen = sizeof(int), 1885 .mode = 0644, 1886 .proc_handler = proc_dointvec, 1887 }, 1888 #ifdef CONFIG_COREDUMP 1889 { 1890 .procname = "core_uses_pid", 1891 .data = &core_uses_pid, 1892 .maxlen = sizeof(int), 1893 .mode = 0644, 1894 .proc_handler = proc_dointvec, 1895 }, 1896 { 1897 .procname = "core_pattern", 1898 .data = core_pattern, 1899 .maxlen = CORENAME_MAX_SIZE, 1900 .mode = 0644, 1901 .proc_handler = proc_dostring_coredump, 1902 }, 1903 { 1904 .procname = "core_pipe_limit", 1905 .data = &core_pipe_limit, 1906 .maxlen = sizeof(unsigned int), 1907 .mode = 0644, 1908 .proc_handler = proc_dointvec, 1909 }, 1910 #endif 1911 #ifdef CONFIG_PROC_SYSCTL 1912 { 1913 .procname = "tainted", 1914 .maxlen = sizeof(long), 1915 .mode = 0644, 1916 .proc_handler = proc_taint, 1917 }, 1918 { 1919 .procname = "sysctl_writes_strict", 1920 .data = &sysctl_writes_strict, 1921 .maxlen = sizeof(int), 1922 .mode = 0644, 1923 .proc_handler = proc_dointvec_minmax, 1924 .extra1 = &neg_one, 1925 .extra2 = SYSCTL_ONE, 1926 }, 1927 #endif 1928 #ifdef CONFIG_LATENCYTOP 1929 { 1930 .procname = "latencytop", 1931 .data = &latencytop_enabled, 1932 .maxlen = sizeof(int), 1933 .mode = 0644, 1934 .proc_handler = sysctl_latencytop, 1935 }, 1936 #endif 1937 #ifdef CONFIG_BLK_DEV_INITRD 1938 { 1939 .procname = "real-root-dev", 1940 .data = &real_root_dev, 1941 .maxlen = sizeof(int), 1942 .mode = 0644, 1943 .proc_handler = proc_dointvec, 1944 }, 1945 #endif 1946 { 1947 .procname = "print-fatal-signals", 1948 .data = &print_fatal_signals, 1949 .maxlen = sizeof(int), 1950 .mode = 0644, 1951 .proc_handler = proc_dointvec, 1952 }, 1953 #ifdef CONFIG_SPARC 1954 { 1955 .procname = "reboot-cmd", 1956 .data = reboot_command, 1957 .maxlen = 256, 1958 .mode = 0644, 1959 .proc_handler = proc_dostring, 1960 }, 1961 { 1962 .procname = "stop-a", 1963 .data = &stop_a_enabled, 1964 .maxlen = sizeof (int), 1965 .mode = 0644, 1966 .proc_handler = proc_dointvec, 1967 }, 1968 { 1969 .procname = "scons-poweroff", 1970 .data = &scons_pwroff, 1971 .maxlen = sizeof (int), 1972 .mode = 0644, 1973 .proc_handler = proc_dointvec, 1974 }, 1975 #endif 1976 #ifdef CONFIG_SPARC64 1977 { 1978 .procname = "tsb-ratio", 1979 .data = &sysctl_tsb_ratio, 1980 .maxlen = sizeof (int), 1981 .mode = 0644, 1982 .proc_handler = proc_dointvec, 1983 }, 1984 #endif 1985 #ifdef CONFIG_PARISC 1986 { 1987 .procname = "soft-power", 1988 .data = &pwrsw_enabled, 1989 .maxlen = sizeof (int), 1990 .mode = 0644, 1991 .proc_handler = proc_dointvec, 1992 }, 1993 #endif 1994 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW 1995 { 1996 .procname = "unaligned-trap", 1997 .data = &unaligned_enabled, 1998 .maxlen = sizeof (int), 1999 .mode = 0644, 2000 .proc_handler = proc_dointvec, 2001 }, 2002 #endif 2003 { 2004 .procname = "ctrl-alt-del", 2005 .data = &C_A_D, 2006 .maxlen = sizeof(int), 2007 .mode = 0644, 2008 .proc_handler = proc_dointvec, 2009 }, 2010 #ifdef CONFIG_FUNCTION_TRACER 2011 { 2012 .procname = "ftrace_enabled", 2013 .data = &ftrace_enabled, 2014 .maxlen = sizeof(int), 2015 .mode = 0644, 2016 .proc_handler = ftrace_enable_sysctl, 2017 }, 2018 #endif 2019 #ifdef CONFIG_STACK_TRACER 2020 { 2021 .procname = "stack_tracer_enabled", 2022 .data = &stack_tracer_enabled, 2023 .maxlen = sizeof(int), 2024 .mode = 0644, 2025 .proc_handler = stack_trace_sysctl, 2026 }, 2027 #endif 2028 #ifdef CONFIG_TRACING 2029 { 2030 .procname = "ftrace_dump_on_oops", 2031 .data = &ftrace_dump_on_oops, 2032 .maxlen = sizeof(int), 2033 .mode = 0644, 2034 .proc_handler = proc_dointvec, 2035 }, 2036 { 2037 .procname = "traceoff_on_warning", 2038 .data = &__disable_trace_on_warning, 2039 .maxlen = sizeof(__disable_trace_on_warning), 2040 .mode = 0644, 2041 .proc_handler = proc_dointvec, 2042 }, 2043 { 2044 .procname = "tracepoint_printk", 2045 .data = &tracepoint_printk, 2046 .maxlen = sizeof(tracepoint_printk), 2047 .mode = 0644, 2048 .proc_handler = tracepoint_printk_sysctl, 2049 }, 2050 #endif 2051 #ifdef CONFIG_KEXEC_CORE 2052 { 2053 .procname = "kexec_load_disabled", 2054 .data = &kexec_load_disabled, 2055 .maxlen = sizeof(int), 2056 .mode = 0644, 2057 /* only handle a transition from default "0" to "1" */ 2058 .proc_handler = proc_dointvec_minmax, 2059 .extra1 = SYSCTL_ONE, 2060 .extra2 = SYSCTL_ONE, 2061 }, 2062 #endif 2063 #ifdef CONFIG_MODULES 2064 { 2065 .procname = "modprobe", 2066 .data = &modprobe_path, 2067 .maxlen = KMOD_PATH_LEN, 2068 .mode = 0644, 2069 .proc_handler = proc_dostring, 2070 }, 2071 { 2072 .procname = "modules_disabled", 2073 .data = &modules_disabled, 2074 .maxlen = sizeof(int), 2075 .mode = 0644, 2076 /* only handle a transition from default "0" to "1" */ 2077 .proc_handler = proc_dointvec_minmax, 2078 .extra1 = SYSCTL_ONE, 2079 .extra2 = SYSCTL_ONE, 2080 }, 2081 #endif 2082 #ifdef CONFIG_UEVENT_HELPER 2083 { 2084 .procname = "hotplug", 2085 .data = &uevent_helper, 2086 .maxlen = UEVENT_HELPER_PATH_LEN, 2087 .mode = 0644, 2088 .proc_handler = proc_dostring, 2089 }, 2090 #endif 2091 #ifdef CONFIG_CHR_DEV_SG 2092 { 2093 .procname = "sg-big-buff", 2094 .data = &sg_big_buff, 2095 .maxlen = sizeof (int), 2096 .mode = 0444, 2097 .proc_handler = proc_dointvec, 2098 }, 2099 #endif 2100 #ifdef CONFIG_BSD_PROCESS_ACCT 2101 { 2102 .procname = "acct", 2103 .data = &acct_parm, 2104 .maxlen = 3*sizeof(int), 2105 .mode = 0644, 2106 .proc_handler = proc_dointvec, 2107 }, 2108 #endif 2109 #ifdef CONFIG_MAGIC_SYSRQ 2110 { 2111 .procname = "sysrq", 2112 .data = NULL, 2113 .maxlen = sizeof (int), 2114 .mode = 0644, 2115 .proc_handler = sysrq_sysctl_handler, 2116 }, 2117 #endif 2118 #ifdef CONFIG_PROC_SYSCTL 2119 { 2120 .procname = "cad_pid", 2121 .data = NULL, 2122 .maxlen = sizeof (int), 2123 .mode = 0600, 2124 .proc_handler = proc_do_cad_pid, 2125 }, 2126 #endif 2127 { 2128 .procname = "threads-max", 2129 .data = NULL, 2130 .maxlen = sizeof(int), 2131 .mode = 0644, 2132 .proc_handler = sysctl_max_threads, 2133 }, 2134 { 2135 .procname = "random", 2136 .mode = 0555, 2137 .child = random_table, 2138 }, 2139 { 2140 .procname = "usermodehelper", 2141 .mode = 0555, 2142 .child = usermodehelper_table, 2143 }, 2144 #ifdef CONFIG_FW_LOADER_USER_HELPER 2145 { 2146 .procname = "firmware_config", 2147 .mode = 0555, 2148 .child = firmware_config_table, 2149 }, 2150 #endif 2151 { 2152 .procname = "overflowuid", 2153 .data = &overflowuid, 2154 .maxlen = sizeof(int), 2155 .mode = 0644, 2156 .proc_handler = proc_dointvec_minmax, 2157 .extra1 = &minolduid, 2158 .extra2 = &maxolduid, 2159 }, 2160 { 2161 .procname = "overflowgid", 2162 .data = &overflowgid, 2163 .maxlen = sizeof(int), 2164 .mode = 0644, 2165 .proc_handler = proc_dointvec_minmax, 2166 .extra1 = &minolduid, 2167 .extra2 = &maxolduid, 2168 }, 2169 #ifdef CONFIG_S390 2170 { 2171 .procname = "userprocess_debug", 2172 .data = &show_unhandled_signals, 2173 .maxlen = sizeof(int), 2174 .mode = 0644, 2175 .proc_handler = proc_dointvec, 2176 }, 2177 #endif 2178 #ifdef CONFIG_SMP 2179 { 2180 .procname = "oops_all_cpu_backtrace", 2181 .data = &sysctl_oops_all_cpu_backtrace, 2182 .maxlen = sizeof(int), 2183 .mode = 0644, 2184 .proc_handler = proc_dointvec_minmax, 2185 .extra1 = SYSCTL_ZERO, 2186 .extra2 = SYSCTL_ONE, 2187 }, 2188 #endif /* CONFIG_SMP */ 2189 { 2190 .procname = "pid_max", 2191 .data = &pid_max, 2192 .maxlen = sizeof (int), 2193 .mode = 0644, 2194 .proc_handler = proc_dointvec_minmax, 2195 .extra1 = &pid_max_min, 2196 .extra2 = &pid_max_max, 2197 }, 2198 { 2199 .procname = "panic_on_oops", 2200 .data = &panic_on_oops, 2201 .maxlen = sizeof(int), 2202 .mode = 0644, 2203 .proc_handler = proc_dointvec, 2204 }, 2205 { 2206 .procname = "panic_print", 2207 .data = &panic_print, 2208 .maxlen = sizeof(unsigned long), 2209 .mode = 0644, 2210 .proc_handler = proc_doulongvec_minmax, 2211 }, 2212 #if defined CONFIG_PRINTK 2213 { 2214 .procname = "printk", 2215 .data = &console_loglevel, 2216 .maxlen = 4*sizeof(int), 2217 .mode = 0644, 2218 .proc_handler = proc_dointvec, 2219 }, 2220 { 2221 .procname = "printk_ratelimit", 2222 .data = &printk_ratelimit_state.interval, 2223 .maxlen = sizeof(int), 2224 .mode = 0644, 2225 .proc_handler = proc_dointvec_jiffies, 2226 }, 2227 { 2228 .procname = "printk_ratelimit_burst", 2229 .data = &printk_ratelimit_state.burst, 2230 .maxlen = sizeof(int), 2231 .mode = 0644, 2232 .proc_handler = proc_dointvec, 2233 }, 2234 { 2235 .procname = "printk_delay", 2236 .data = &printk_delay_msec, 2237 .maxlen = sizeof(int), 2238 .mode = 0644, 2239 .proc_handler = proc_dointvec_minmax, 2240 .extra1 = SYSCTL_ZERO, 2241 .extra2 = &ten_thousand, 2242 }, 2243 { 2244 .procname = "printk_devkmsg", 2245 .data = devkmsg_log_str, 2246 .maxlen = DEVKMSG_STR_MAX_SIZE, 2247 .mode = 0644, 2248 .proc_handler = devkmsg_sysctl_set_loglvl, 2249 }, 2250 { 2251 .procname = "dmesg_restrict", 2252 .data = &dmesg_restrict, 2253 .maxlen = sizeof(int), 2254 .mode = 0644, 2255 .proc_handler = proc_dointvec_minmax_sysadmin, 2256 .extra1 = SYSCTL_ZERO, 2257 .extra2 = SYSCTL_ONE, 2258 }, 2259 { 2260 .procname = "kptr_restrict", 2261 .data = &kptr_restrict, 2262 .maxlen = sizeof(int), 2263 .mode = 0644, 2264 .proc_handler = proc_dointvec_minmax_sysadmin, 2265 .extra1 = SYSCTL_ZERO, 2266 .extra2 = &two, 2267 }, 2268 #endif 2269 { 2270 .procname = "ngroups_max", 2271 .data = &ngroups_max, 2272 .maxlen = sizeof (int), 2273 .mode = 0444, 2274 .proc_handler = proc_dointvec, 2275 }, 2276 { 2277 .procname = "cap_last_cap", 2278 .data = (void *)&cap_last_cap, 2279 .maxlen = sizeof(int), 2280 .mode = 0444, 2281 .proc_handler = proc_dointvec, 2282 }, 2283 #if defined(CONFIG_LOCKUP_DETECTOR) 2284 { 2285 .procname = "watchdog", 2286 .data = &watchdog_user_enabled, 2287 .maxlen = sizeof(int), 2288 .mode = 0644, 2289 .proc_handler = proc_watchdog, 2290 .extra1 = SYSCTL_ZERO, 2291 .extra2 = SYSCTL_ONE, 2292 }, 2293 { 2294 .procname = "watchdog_thresh", 2295 .data = &watchdog_thresh, 2296 .maxlen = sizeof(int), 2297 .mode = 0644, 2298 .proc_handler = proc_watchdog_thresh, 2299 .extra1 = SYSCTL_ZERO, 2300 .extra2 = &sixty, 2301 }, 2302 { 2303 .procname = "nmi_watchdog", 2304 .data = &nmi_watchdog_user_enabled, 2305 .maxlen = sizeof(int), 2306 .mode = NMI_WATCHDOG_SYSCTL_PERM, 2307 .proc_handler = proc_nmi_watchdog, 2308 .extra1 = SYSCTL_ZERO, 2309 .extra2 = SYSCTL_ONE, 2310 }, 2311 { 2312 .procname = "watchdog_cpumask", 2313 .data = &watchdog_cpumask_bits, 2314 .maxlen = NR_CPUS, 2315 .mode = 0644, 2316 .proc_handler = proc_watchdog_cpumask, 2317 }, 2318 #ifdef CONFIG_SOFTLOCKUP_DETECTOR 2319 { 2320 .procname = "soft_watchdog", 2321 .data = &soft_watchdog_user_enabled, 2322 .maxlen = sizeof(int), 2323 .mode = 0644, 2324 .proc_handler = proc_soft_watchdog, 2325 .extra1 = SYSCTL_ZERO, 2326 .extra2 = SYSCTL_ONE, 2327 }, 2328 { 2329 .procname = "softlockup_panic", 2330 .data = &softlockup_panic, 2331 .maxlen = sizeof(int), 2332 .mode = 0644, 2333 .proc_handler = proc_dointvec_minmax, 2334 .extra1 = SYSCTL_ZERO, 2335 .extra2 = SYSCTL_ONE, 2336 }, 2337 #ifdef CONFIG_SMP 2338 { 2339 .procname = "softlockup_all_cpu_backtrace", 2340 .data = &sysctl_softlockup_all_cpu_backtrace, 2341 .maxlen = sizeof(int), 2342 .mode = 0644, 2343 .proc_handler = proc_dointvec_minmax, 2344 .extra1 = SYSCTL_ZERO, 2345 .extra2 = SYSCTL_ONE, 2346 }, 2347 #endif /* CONFIG_SMP */ 2348 #endif 2349 #ifdef CONFIG_HARDLOCKUP_DETECTOR 2350 { 2351 .procname = "hardlockup_panic", 2352 .data = &hardlockup_panic, 2353 .maxlen = sizeof(int), 2354 .mode = 0644, 2355 .proc_handler = proc_dointvec_minmax, 2356 .extra1 = SYSCTL_ZERO, 2357 .extra2 = SYSCTL_ONE, 2358 }, 2359 #ifdef CONFIG_SMP 2360 { 2361 .procname = "hardlockup_all_cpu_backtrace", 2362 .data = &sysctl_hardlockup_all_cpu_backtrace, 2363 .maxlen = sizeof(int), 2364 .mode = 0644, 2365 .proc_handler = proc_dointvec_minmax, 2366 .extra1 = SYSCTL_ZERO, 2367 .extra2 = SYSCTL_ONE, 2368 }, 2369 #endif /* CONFIG_SMP */ 2370 #endif 2371 #endif 2372 2373 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) 2374 { 2375 .procname = "unknown_nmi_panic", 2376 .data = &unknown_nmi_panic, 2377 .maxlen = sizeof (int), 2378 .mode = 0644, 2379 .proc_handler = proc_dointvec, 2380 }, 2381 #endif 2382 2383 #if (defined(CONFIG_X86_32) || defined(CONFIG_PARISC)) && \ 2384 defined(CONFIG_DEBUG_STACKOVERFLOW) 2385 { 2386 .procname = "panic_on_stackoverflow", 2387 .data = &sysctl_panic_on_stackoverflow, 2388 .maxlen = sizeof(int), 2389 .mode = 0644, 2390 .proc_handler = proc_dointvec, 2391 }, 2392 #endif 2393 #if defined(CONFIG_X86) 2394 { 2395 .procname = "panic_on_unrecovered_nmi", 2396 .data = &panic_on_unrecovered_nmi, 2397 .maxlen = sizeof(int), 2398 .mode = 0644, 2399 .proc_handler = proc_dointvec, 2400 }, 2401 { 2402 .procname = "panic_on_io_nmi", 2403 .data = &panic_on_io_nmi, 2404 .maxlen = sizeof(int), 2405 .mode = 0644, 2406 .proc_handler = proc_dointvec, 2407 }, 2408 { 2409 .procname = "bootloader_type", 2410 .data = &bootloader_type, 2411 .maxlen = sizeof (int), 2412 .mode = 0444, 2413 .proc_handler = proc_dointvec, 2414 }, 2415 { 2416 .procname = "bootloader_version", 2417 .data = &bootloader_version, 2418 .maxlen = sizeof (int), 2419 .mode = 0444, 2420 .proc_handler = proc_dointvec, 2421 }, 2422 { 2423 .procname = "io_delay_type", 2424 .data = &io_delay_type, 2425 .maxlen = sizeof(int), 2426 .mode = 0644, 2427 .proc_handler = proc_dointvec, 2428 }, 2429 #endif 2430 #if defined(CONFIG_MMU) 2431 { 2432 .procname = "randomize_va_space", 2433 .data = &randomize_va_space, 2434 .maxlen = sizeof(int), 2435 .mode = 0644, 2436 .proc_handler = proc_dointvec, 2437 }, 2438 #endif 2439 #if defined(CONFIG_S390) && defined(CONFIG_SMP) 2440 { 2441 .procname = "spin_retry", 2442 .data = &spin_retry, 2443 .maxlen = sizeof (int), 2444 .mode = 0644, 2445 .proc_handler = proc_dointvec, 2446 }, 2447 #endif 2448 #if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86) 2449 { 2450 .procname = "acpi_video_flags", 2451 .data = &acpi_realmode_flags, 2452 .maxlen = sizeof (unsigned long), 2453 .mode = 0644, 2454 .proc_handler = proc_doulongvec_minmax, 2455 }, 2456 #endif 2457 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN 2458 { 2459 .procname = "ignore-unaligned-usertrap", 2460 .data = &no_unaligned_warning, 2461 .maxlen = sizeof (int), 2462 .mode = 0644, 2463 .proc_handler = proc_dointvec, 2464 }, 2465 #endif 2466 #ifdef CONFIG_IA64 2467 { 2468 .procname = "unaligned-dump-stack", 2469 .data = &unaligned_dump_stack, 2470 .maxlen = sizeof (int), 2471 .mode = 0644, 2472 .proc_handler = proc_dointvec, 2473 }, 2474 #endif 2475 #ifdef CONFIG_DETECT_HUNG_TASK 2476 #ifdef CONFIG_SMP 2477 { 2478 .procname = "hung_task_all_cpu_backtrace", 2479 .data = &sysctl_hung_task_all_cpu_backtrace, 2480 .maxlen = sizeof(int), 2481 .mode = 0644, 2482 .proc_handler = proc_dointvec_minmax, 2483 .extra1 = SYSCTL_ZERO, 2484 .extra2 = SYSCTL_ONE, 2485 }, 2486 #endif /* CONFIG_SMP */ 2487 { 2488 .procname = "hung_task_panic", 2489 .data = &sysctl_hung_task_panic, 2490 .maxlen = sizeof(int), 2491 .mode = 0644, 2492 .proc_handler = proc_dointvec_minmax, 2493 .extra1 = SYSCTL_ZERO, 2494 .extra2 = SYSCTL_ONE, 2495 }, 2496 { 2497 .procname = "hung_task_check_count", 2498 .data = &sysctl_hung_task_check_count, 2499 .maxlen = sizeof(int), 2500 .mode = 0644, 2501 .proc_handler = proc_dointvec_minmax, 2502 .extra1 = SYSCTL_ZERO, 2503 }, 2504 { 2505 .procname = "hung_task_timeout_secs", 2506 .data = &sysctl_hung_task_timeout_secs, 2507 .maxlen = sizeof(unsigned long), 2508 .mode = 0644, 2509 .proc_handler = proc_dohung_task_timeout_secs, 2510 .extra2 = &hung_task_timeout_max, 2511 }, 2512 { 2513 .procname = "hung_task_check_interval_secs", 2514 .data = &sysctl_hung_task_check_interval_secs, 2515 .maxlen = sizeof(unsigned long), 2516 .mode = 0644, 2517 .proc_handler = proc_dohung_task_timeout_secs, 2518 .extra2 = &hung_task_timeout_max, 2519 }, 2520 { 2521 .procname = "hung_task_warnings", 2522 .data = &sysctl_hung_task_warnings, 2523 .maxlen = sizeof(int), 2524 .mode = 0644, 2525 .proc_handler = proc_dointvec_minmax, 2526 .extra1 = &neg_one, 2527 }, 2528 #endif 2529 #ifdef CONFIG_RT_MUTEXES 2530 { 2531 .procname = "max_lock_depth", 2532 .data = &max_lock_depth, 2533 .maxlen = sizeof(int), 2534 .mode = 0644, 2535 .proc_handler = proc_dointvec, 2536 }, 2537 #endif 2538 { 2539 .procname = "poweroff_cmd", 2540 .data = &poweroff_cmd, 2541 .maxlen = POWEROFF_CMD_PATH_LEN, 2542 .mode = 0644, 2543 .proc_handler = proc_dostring, 2544 }, 2545 #ifdef CONFIG_KEYS 2546 { 2547 .procname = "keys", 2548 .mode = 0555, 2549 .child = key_sysctls, 2550 }, 2551 #endif 2552 #ifdef CONFIG_PERF_EVENTS 2553 /* 2554 * User-space scripts rely on the existence of this file 2555 * as a feature check for perf_events being enabled. 2556 * 2557 * So it's an ABI, do not remove! 2558 */ 2559 { 2560 .procname = "perf_event_paranoid", 2561 .data = &sysctl_perf_event_paranoid, 2562 .maxlen = sizeof(sysctl_perf_event_paranoid), 2563 .mode = 0644, 2564 .proc_handler = proc_dointvec, 2565 }, 2566 { 2567 .procname = "perf_event_mlock_kb", 2568 .data = &sysctl_perf_event_mlock, 2569 .maxlen = sizeof(sysctl_perf_event_mlock), 2570 .mode = 0644, 2571 .proc_handler = proc_dointvec, 2572 }, 2573 { 2574 .procname = "perf_event_max_sample_rate", 2575 .data = &sysctl_perf_event_sample_rate, 2576 .maxlen = sizeof(sysctl_perf_event_sample_rate), 2577 .mode = 0644, 2578 .proc_handler = perf_proc_update_handler, 2579 .extra1 = SYSCTL_ONE, 2580 }, 2581 { 2582 .procname = "perf_cpu_time_max_percent", 2583 .data = &sysctl_perf_cpu_time_max_percent, 2584 .maxlen = sizeof(sysctl_perf_cpu_time_max_percent), 2585 .mode = 0644, 2586 .proc_handler = perf_cpu_time_max_percent_handler, 2587 .extra1 = SYSCTL_ZERO, 2588 .extra2 = &one_hundred, 2589 }, 2590 { 2591 .procname = "perf_event_max_stack", 2592 .data = &sysctl_perf_event_max_stack, 2593 .maxlen = sizeof(sysctl_perf_event_max_stack), 2594 .mode = 0644, 2595 .proc_handler = perf_event_max_stack_handler, 2596 .extra1 = SYSCTL_ZERO, 2597 .extra2 = &six_hundred_forty_kb, 2598 }, 2599 { 2600 .procname = "perf_event_max_contexts_per_stack", 2601 .data = &sysctl_perf_event_max_contexts_per_stack, 2602 .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack), 2603 .mode = 0644, 2604 .proc_handler = perf_event_max_stack_handler, 2605 .extra1 = SYSCTL_ZERO, 2606 .extra2 = &one_thousand, 2607 }, 2608 #endif 2609 { 2610 .procname = "panic_on_warn", 2611 .data = &panic_on_warn, 2612 .maxlen = sizeof(int), 2613 .mode = 0644, 2614 .proc_handler = proc_dointvec_minmax, 2615 .extra1 = SYSCTL_ZERO, 2616 .extra2 = SYSCTL_ONE, 2617 }, 2618 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 2619 { 2620 .procname = "timer_migration", 2621 .data = &sysctl_timer_migration, 2622 .maxlen = sizeof(unsigned int), 2623 .mode = 0644, 2624 .proc_handler = timer_migration_handler, 2625 .extra1 = SYSCTL_ZERO, 2626 .extra2 = SYSCTL_ONE, 2627 }, 2628 #endif 2629 #ifdef CONFIG_BPF_SYSCALL 2630 { 2631 .procname = "unprivileged_bpf_disabled", 2632 .data = &sysctl_unprivileged_bpf_disabled, 2633 .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), 2634 .mode = 0644, 2635 .proc_handler = bpf_unpriv_handler, 2636 .extra1 = SYSCTL_ZERO, 2637 .extra2 = &two, 2638 }, 2639 { 2640 .procname = "bpf_stats_enabled", 2641 .data = &bpf_stats_enabled_key.key, 2642 .maxlen = sizeof(bpf_stats_enabled_key), 2643 .mode = 0644, 2644 .proc_handler = bpf_stats_handler, 2645 }, 2646 #endif 2647 #if defined(CONFIG_TREE_RCU) 2648 { 2649 .procname = "panic_on_rcu_stall", 2650 .data = &sysctl_panic_on_rcu_stall, 2651 .maxlen = sizeof(sysctl_panic_on_rcu_stall), 2652 .mode = 0644, 2653 .proc_handler = proc_dointvec_minmax, 2654 .extra1 = SYSCTL_ZERO, 2655 .extra2 = SYSCTL_ONE, 2656 }, 2657 #endif 2658 #if defined(CONFIG_TREE_RCU) 2659 { 2660 .procname = "max_rcu_stall_to_panic", 2661 .data = &sysctl_max_rcu_stall_to_panic, 2662 .maxlen = sizeof(sysctl_max_rcu_stall_to_panic), 2663 .mode = 0644, 2664 .proc_handler = proc_dointvec_minmax, 2665 .extra1 = SYSCTL_ONE, 2666 .extra2 = SYSCTL_INT_MAX, 2667 }, 2668 #endif 2669 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE 2670 { 2671 .procname = "stack_erasing", 2672 .data = NULL, 2673 .maxlen = sizeof(int), 2674 .mode = 0600, 2675 .proc_handler = stack_erasing_sysctl, 2676 .extra1 = SYSCTL_ZERO, 2677 .extra2 = SYSCTL_ONE, 2678 }, 2679 #endif 2680 { } 2681 }; 2682 2683 static struct ctl_table vm_table[] = { 2684 { 2685 .procname = "overcommit_memory", 2686 .data = &sysctl_overcommit_memory, 2687 .maxlen = sizeof(sysctl_overcommit_memory), 2688 .mode = 0644, 2689 .proc_handler = overcommit_policy_handler, 2690 .extra1 = SYSCTL_ZERO, 2691 .extra2 = &two, 2692 }, 2693 { 2694 .procname = "panic_on_oom", 2695 .data = &sysctl_panic_on_oom, 2696 .maxlen = sizeof(sysctl_panic_on_oom), 2697 .mode = 0644, 2698 .proc_handler = proc_dointvec_minmax, 2699 .extra1 = SYSCTL_ZERO, 2700 .extra2 = &two, 2701 }, 2702 { 2703 .procname = "oom_kill_allocating_task", 2704 .data = &sysctl_oom_kill_allocating_task, 2705 .maxlen = sizeof(sysctl_oom_kill_allocating_task), 2706 .mode = 0644, 2707 .proc_handler = proc_dointvec, 2708 }, 2709 { 2710 .procname = "oom_dump_tasks", 2711 .data = &sysctl_oom_dump_tasks, 2712 .maxlen = sizeof(sysctl_oom_dump_tasks), 2713 .mode = 0644, 2714 .proc_handler = proc_dointvec, 2715 }, 2716 { 2717 .procname = "overcommit_ratio", 2718 .data = &sysctl_overcommit_ratio, 2719 .maxlen = sizeof(sysctl_overcommit_ratio), 2720 .mode = 0644, 2721 .proc_handler = overcommit_ratio_handler, 2722 }, 2723 { 2724 .procname = "overcommit_kbytes", 2725 .data = &sysctl_overcommit_kbytes, 2726 .maxlen = sizeof(sysctl_overcommit_kbytes), 2727 .mode = 0644, 2728 .proc_handler = overcommit_kbytes_handler, 2729 }, 2730 { 2731 .procname = "page-cluster", 2732 .data = &page_cluster, 2733 .maxlen = sizeof(int), 2734 .mode = 0644, 2735 .proc_handler = proc_dointvec_minmax, 2736 .extra1 = SYSCTL_ZERO, 2737 }, 2738 { 2739 .procname = "dirty_background_ratio", 2740 .data = &dirty_background_ratio, 2741 .maxlen = sizeof(dirty_background_ratio), 2742 .mode = 0644, 2743 .proc_handler = dirty_background_ratio_handler, 2744 .extra1 = SYSCTL_ZERO, 2745 .extra2 = &one_hundred, 2746 }, 2747 { 2748 .procname = "dirty_background_bytes", 2749 .data = &dirty_background_bytes, 2750 .maxlen = sizeof(dirty_background_bytes), 2751 .mode = 0644, 2752 .proc_handler = dirty_background_bytes_handler, 2753 .extra1 = &one_ul, 2754 }, 2755 { 2756 .procname = "dirty_ratio", 2757 .data = &vm_dirty_ratio, 2758 .maxlen = sizeof(vm_dirty_ratio), 2759 .mode = 0644, 2760 .proc_handler = dirty_ratio_handler, 2761 .extra1 = SYSCTL_ZERO, 2762 .extra2 = &one_hundred, 2763 }, 2764 { 2765 .procname = "dirty_bytes", 2766 .data = &vm_dirty_bytes, 2767 .maxlen = sizeof(vm_dirty_bytes), 2768 .mode = 0644, 2769 .proc_handler = dirty_bytes_handler, 2770 .extra1 = &dirty_bytes_min, 2771 }, 2772 { 2773 .procname = "dirty_writeback_centisecs", 2774 .data = &dirty_writeback_interval, 2775 .maxlen = sizeof(dirty_writeback_interval), 2776 .mode = 0644, 2777 .proc_handler = dirty_writeback_centisecs_handler, 2778 }, 2779 { 2780 .procname = "dirty_expire_centisecs", 2781 .data = &dirty_expire_interval, 2782 .maxlen = sizeof(dirty_expire_interval), 2783 .mode = 0644, 2784 .proc_handler = proc_dointvec_minmax, 2785 .extra1 = SYSCTL_ZERO, 2786 }, 2787 { 2788 .procname = "dirtytime_expire_seconds", 2789 .data = &dirtytime_expire_interval, 2790 .maxlen = sizeof(dirtytime_expire_interval), 2791 .mode = 0644, 2792 .proc_handler = dirtytime_interval_handler, 2793 .extra1 = SYSCTL_ZERO, 2794 }, 2795 { 2796 .procname = "swappiness", 2797 .data = &vm_swappiness, 2798 .maxlen = sizeof(vm_swappiness), 2799 .mode = 0644, 2800 .proc_handler = proc_dointvec_minmax, 2801 .extra1 = SYSCTL_ZERO, 2802 .extra2 = &two_hundred, 2803 }, 2804 #ifdef CONFIG_HUGETLB_PAGE 2805 { 2806 .procname = "nr_hugepages", 2807 .data = NULL, 2808 .maxlen = sizeof(unsigned long), 2809 .mode = 0644, 2810 .proc_handler = hugetlb_sysctl_handler, 2811 }, 2812 #ifdef CONFIG_NUMA 2813 { 2814 .procname = "nr_hugepages_mempolicy", 2815 .data = NULL, 2816 .maxlen = sizeof(unsigned long), 2817 .mode = 0644, 2818 .proc_handler = &hugetlb_mempolicy_sysctl_handler, 2819 }, 2820 { 2821 .procname = "numa_stat", 2822 .data = &sysctl_vm_numa_stat, 2823 .maxlen = sizeof(int), 2824 .mode = 0644, 2825 .proc_handler = sysctl_vm_numa_stat_handler, 2826 .extra1 = SYSCTL_ZERO, 2827 .extra2 = SYSCTL_ONE, 2828 }, 2829 #endif 2830 { 2831 .procname = "hugetlb_shm_group", 2832 .data = &sysctl_hugetlb_shm_group, 2833 .maxlen = sizeof(gid_t), 2834 .mode = 0644, 2835 .proc_handler = proc_dointvec, 2836 }, 2837 { 2838 .procname = "nr_overcommit_hugepages", 2839 .data = NULL, 2840 .maxlen = sizeof(unsigned long), 2841 .mode = 0644, 2842 .proc_handler = hugetlb_overcommit_handler, 2843 }, 2844 #endif 2845 { 2846 .procname = "lowmem_reserve_ratio", 2847 .data = &sysctl_lowmem_reserve_ratio, 2848 .maxlen = sizeof(sysctl_lowmem_reserve_ratio), 2849 .mode = 0644, 2850 .proc_handler = lowmem_reserve_ratio_sysctl_handler, 2851 }, 2852 { 2853 .procname = "drop_caches", 2854 .data = &sysctl_drop_caches, 2855 .maxlen = sizeof(int), 2856 .mode = 0200, 2857 .proc_handler = drop_caches_sysctl_handler, 2858 .extra1 = SYSCTL_ONE, 2859 .extra2 = &four, 2860 }, 2861 #ifdef CONFIG_COMPACTION 2862 { 2863 .procname = "compact_memory", 2864 .data = NULL, 2865 .maxlen = sizeof(int), 2866 .mode = 0200, 2867 .proc_handler = sysctl_compaction_handler, 2868 }, 2869 { 2870 .procname = "compaction_proactiveness", 2871 .data = &sysctl_compaction_proactiveness, 2872 .maxlen = sizeof(sysctl_compaction_proactiveness), 2873 .mode = 0644, 2874 .proc_handler = proc_dointvec_minmax, 2875 .extra1 = SYSCTL_ZERO, 2876 .extra2 = &one_hundred, 2877 }, 2878 { 2879 .procname = "extfrag_threshold", 2880 .data = &sysctl_extfrag_threshold, 2881 .maxlen = sizeof(int), 2882 .mode = 0644, 2883 .proc_handler = proc_dointvec_minmax, 2884 .extra1 = &min_extfrag_threshold, 2885 .extra2 = &max_extfrag_threshold, 2886 }, 2887 { 2888 .procname = "compact_unevictable_allowed", 2889 .data = &sysctl_compact_unevictable_allowed, 2890 .maxlen = sizeof(int), 2891 .mode = 0644, 2892 .proc_handler = proc_dointvec_minmax_warn_RT_change, 2893 .extra1 = SYSCTL_ZERO, 2894 .extra2 = SYSCTL_ONE, 2895 }, 2896 2897 #endif /* CONFIG_COMPACTION */ 2898 { 2899 .procname = "min_free_kbytes", 2900 .data = &min_free_kbytes, 2901 .maxlen = sizeof(min_free_kbytes), 2902 .mode = 0644, 2903 .proc_handler = min_free_kbytes_sysctl_handler, 2904 .extra1 = SYSCTL_ZERO, 2905 }, 2906 { 2907 .procname = "watermark_boost_factor", 2908 .data = &watermark_boost_factor, 2909 .maxlen = sizeof(watermark_boost_factor), 2910 .mode = 0644, 2911 .proc_handler = proc_dointvec_minmax, 2912 .extra1 = SYSCTL_ZERO, 2913 }, 2914 { 2915 .procname = "watermark_scale_factor", 2916 .data = &watermark_scale_factor, 2917 .maxlen = sizeof(watermark_scale_factor), 2918 .mode = 0644, 2919 .proc_handler = watermark_scale_factor_sysctl_handler, 2920 .extra1 = SYSCTL_ONE, 2921 .extra2 = &one_thousand, 2922 }, 2923 { 2924 .procname = "percpu_pagelist_high_fraction", 2925 .data = &percpu_pagelist_high_fraction, 2926 .maxlen = sizeof(percpu_pagelist_high_fraction), 2927 .mode = 0644, 2928 .proc_handler = percpu_pagelist_high_fraction_sysctl_handler, 2929 .extra1 = SYSCTL_ZERO, 2930 }, 2931 { 2932 .procname = "page_lock_unfairness", 2933 .data = &sysctl_page_lock_unfairness, 2934 .maxlen = sizeof(sysctl_page_lock_unfairness), 2935 .mode = 0644, 2936 .proc_handler = proc_dointvec_minmax, 2937 .extra1 = SYSCTL_ZERO, 2938 }, 2939 #ifdef CONFIG_MMU 2940 { 2941 .procname = "max_map_count", 2942 .data = &sysctl_max_map_count, 2943 .maxlen = sizeof(sysctl_max_map_count), 2944 .mode = 0644, 2945 .proc_handler = proc_dointvec_minmax, 2946 .extra1 = SYSCTL_ZERO, 2947 }, 2948 #else 2949 { 2950 .procname = "nr_trim_pages", 2951 .data = &sysctl_nr_trim_pages, 2952 .maxlen = sizeof(sysctl_nr_trim_pages), 2953 .mode = 0644, 2954 .proc_handler = proc_dointvec_minmax, 2955 .extra1 = SYSCTL_ZERO, 2956 }, 2957 #endif 2958 { 2959 .procname = "laptop_mode", 2960 .data = &laptop_mode, 2961 .maxlen = sizeof(laptop_mode), 2962 .mode = 0644, 2963 .proc_handler = proc_dointvec_jiffies, 2964 }, 2965 { 2966 .procname = "vfs_cache_pressure", 2967 .data = &sysctl_vfs_cache_pressure, 2968 .maxlen = sizeof(sysctl_vfs_cache_pressure), 2969 .mode = 0644, 2970 .proc_handler = proc_dointvec_minmax, 2971 .extra1 = SYSCTL_ZERO, 2972 }, 2973 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \ 2974 defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT) 2975 { 2976 .procname = "legacy_va_layout", 2977 .data = &sysctl_legacy_va_layout, 2978 .maxlen = sizeof(sysctl_legacy_va_layout), 2979 .mode = 0644, 2980 .proc_handler = proc_dointvec_minmax, 2981 .extra1 = SYSCTL_ZERO, 2982 }, 2983 #endif 2984 #ifdef CONFIG_NUMA 2985 { 2986 .procname = "zone_reclaim_mode", 2987 .data = &node_reclaim_mode, 2988 .maxlen = sizeof(node_reclaim_mode), 2989 .mode = 0644, 2990 .proc_handler = proc_dointvec_minmax, 2991 .extra1 = SYSCTL_ZERO, 2992 }, 2993 { 2994 .procname = "min_unmapped_ratio", 2995 .data = &sysctl_min_unmapped_ratio, 2996 .maxlen = sizeof(sysctl_min_unmapped_ratio), 2997 .mode = 0644, 2998 .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler, 2999 .extra1 = SYSCTL_ZERO, 3000 .extra2 = &one_hundred, 3001 }, 3002 { 3003 .procname = "min_slab_ratio", 3004 .data = &sysctl_min_slab_ratio, 3005 .maxlen = sizeof(sysctl_min_slab_ratio), 3006 .mode = 0644, 3007 .proc_handler = sysctl_min_slab_ratio_sysctl_handler, 3008 .extra1 = SYSCTL_ZERO, 3009 .extra2 = &one_hundred, 3010 }, 3011 #endif 3012 #ifdef CONFIG_SMP 3013 { 3014 .procname = "stat_interval", 3015 .data = &sysctl_stat_interval, 3016 .maxlen = sizeof(sysctl_stat_interval), 3017 .mode = 0644, 3018 .proc_handler = proc_dointvec_jiffies, 3019 }, 3020 { 3021 .procname = "stat_refresh", 3022 .data = NULL, 3023 .maxlen = 0, 3024 .mode = 0600, 3025 .proc_handler = vmstat_refresh, 3026 }, 3027 #endif 3028 #ifdef CONFIG_MMU 3029 { 3030 .procname = "mmap_min_addr", 3031 .data = &dac_mmap_min_addr, 3032 .maxlen = sizeof(unsigned long), 3033 .mode = 0644, 3034 .proc_handler = mmap_min_addr_handler, 3035 }, 3036 #endif 3037 #ifdef CONFIG_NUMA 3038 { 3039 .procname = "numa_zonelist_order", 3040 .data = &numa_zonelist_order, 3041 .maxlen = NUMA_ZONELIST_ORDER_LEN, 3042 .mode = 0644, 3043 .proc_handler = numa_zonelist_order_handler, 3044 }, 3045 #endif 3046 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \ 3047 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) 3048 { 3049 .procname = "vdso_enabled", 3050 #ifdef CONFIG_X86_32 3051 .data = &vdso32_enabled, 3052 .maxlen = sizeof(vdso32_enabled), 3053 #else 3054 .data = &vdso_enabled, 3055 .maxlen = sizeof(vdso_enabled), 3056 #endif 3057 .mode = 0644, 3058 .proc_handler = proc_dointvec, 3059 .extra1 = SYSCTL_ZERO, 3060 }, 3061 #endif 3062 #ifdef CONFIG_HIGHMEM 3063 { 3064 .procname = "highmem_is_dirtyable", 3065 .data = &vm_highmem_is_dirtyable, 3066 .maxlen = sizeof(vm_highmem_is_dirtyable), 3067 .mode = 0644, 3068 .proc_handler = proc_dointvec_minmax, 3069 .extra1 = SYSCTL_ZERO, 3070 .extra2 = SYSCTL_ONE, 3071 }, 3072 #endif 3073 #ifdef CONFIG_MEMORY_FAILURE 3074 { 3075 .procname = "memory_failure_early_kill", 3076 .data = &sysctl_memory_failure_early_kill, 3077 .maxlen = sizeof(sysctl_memory_failure_early_kill), 3078 .mode = 0644, 3079 .proc_handler = proc_dointvec_minmax, 3080 .extra1 = SYSCTL_ZERO, 3081 .extra2 = SYSCTL_ONE, 3082 }, 3083 { 3084 .procname = "memory_failure_recovery", 3085 .data = &sysctl_memory_failure_recovery, 3086 .maxlen = sizeof(sysctl_memory_failure_recovery), 3087 .mode = 0644, 3088 .proc_handler = proc_dointvec_minmax, 3089 .extra1 = SYSCTL_ZERO, 3090 .extra2 = SYSCTL_ONE, 3091 }, 3092 #endif 3093 { 3094 .procname = "user_reserve_kbytes", 3095 .data = &sysctl_user_reserve_kbytes, 3096 .maxlen = sizeof(sysctl_user_reserve_kbytes), 3097 .mode = 0644, 3098 .proc_handler = proc_doulongvec_minmax, 3099 }, 3100 { 3101 .procname = "admin_reserve_kbytes", 3102 .data = &sysctl_admin_reserve_kbytes, 3103 .maxlen = sizeof(sysctl_admin_reserve_kbytes), 3104 .mode = 0644, 3105 .proc_handler = proc_doulongvec_minmax, 3106 }, 3107 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS 3108 { 3109 .procname = "mmap_rnd_bits", 3110 .data = &mmap_rnd_bits, 3111 .maxlen = sizeof(mmap_rnd_bits), 3112 .mode = 0600, 3113 .proc_handler = proc_dointvec_minmax, 3114 .extra1 = (void *)&mmap_rnd_bits_min, 3115 .extra2 = (void *)&mmap_rnd_bits_max, 3116 }, 3117 #endif 3118 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS 3119 { 3120 .procname = "mmap_rnd_compat_bits", 3121 .data = &mmap_rnd_compat_bits, 3122 .maxlen = sizeof(mmap_rnd_compat_bits), 3123 .mode = 0600, 3124 .proc_handler = proc_dointvec_minmax, 3125 .extra1 = (void *)&mmap_rnd_compat_bits_min, 3126 .extra2 = (void *)&mmap_rnd_compat_bits_max, 3127 }, 3128 #endif 3129 #ifdef CONFIG_USERFAULTFD 3130 { 3131 .procname = "unprivileged_userfaultfd", 3132 .data = &sysctl_unprivileged_userfaultfd, 3133 .maxlen = sizeof(sysctl_unprivileged_userfaultfd), 3134 .mode = 0644, 3135 .proc_handler = proc_dointvec_minmax, 3136 .extra1 = SYSCTL_ZERO, 3137 .extra2 = SYSCTL_ONE, 3138 }, 3139 #endif 3140 { } 3141 }; 3142 3143 static struct ctl_table fs_table[] = { 3144 { 3145 .procname = "inode-nr", 3146 .data = &inodes_stat, 3147 .maxlen = 2*sizeof(long), 3148 .mode = 0444, 3149 .proc_handler = proc_nr_inodes, 3150 }, 3151 { 3152 .procname = "inode-state", 3153 .data = &inodes_stat, 3154 .maxlen = 7*sizeof(long), 3155 .mode = 0444, 3156 .proc_handler = proc_nr_inodes, 3157 }, 3158 { 3159 .procname = "file-nr", 3160 .data = &files_stat, 3161 .maxlen = sizeof(files_stat), 3162 .mode = 0444, 3163 .proc_handler = proc_nr_files, 3164 }, 3165 { 3166 .procname = "file-max", 3167 .data = &files_stat.max_files, 3168 .maxlen = sizeof(files_stat.max_files), 3169 .mode = 0644, 3170 .proc_handler = proc_doulongvec_minmax, 3171 .extra1 = &zero_ul, 3172 .extra2 = &long_max, 3173 }, 3174 { 3175 .procname = "nr_open", 3176 .data = &sysctl_nr_open, 3177 .maxlen = sizeof(unsigned int), 3178 .mode = 0644, 3179 .proc_handler = proc_dointvec_minmax, 3180 .extra1 = &sysctl_nr_open_min, 3181 .extra2 = &sysctl_nr_open_max, 3182 }, 3183 { 3184 .procname = "dentry-state", 3185 .data = &dentry_stat, 3186 .maxlen = 6*sizeof(long), 3187 .mode = 0444, 3188 .proc_handler = proc_nr_dentry, 3189 }, 3190 { 3191 .procname = "overflowuid", 3192 .data = &fs_overflowuid, 3193 .maxlen = sizeof(int), 3194 .mode = 0644, 3195 .proc_handler = proc_dointvec_minmax, 3196 .extra1 = &minolduid, 3197 .extra2 = &maxolduid, 3198 }, 3199 { 3200 .procname = "overflowgid", 3201 .data = &fs_overflowgid, 3202 .maxlen = sizeof(int), 3203 .mode = 0644, 3204 .proc_handler = proc_dointvec_minmax, 3205 .extra1 = &minolduid, 3206 .extra2 = &maxolduid, 3207 }, 3208 #ifdef CONFIG_FILE_LOCKING 3209 { 3210 .procname = "leases-enable", 3211 .data = &leases_enable, 3212 .maxlen = sizeof(int), 3213 .mode = 0644, 3214 .proc_handler = proc_dointvec, 3215 }, 3216 #endif 3217 #ifdef CONFIG_DNOTIFY 3218 { 3219 .procname = "dir-notify-enable", 3220 .data = &dir_notify_enable, 3221 .maxlen = sizeof(int), 3222 .mode = 0644, 3223 .proc_handler = proc_dointvec, 3224 }, 3225 #endif 3226 #ifdef CONFIG_MMU 3227 #ifdef CONFIG_FILE_LOCKING 3228 { 3229 .procname = "lease-break-time", 3230 .data = &lease_break_time, 3231 .maxlen = sizeof(int), 3232 .mode = 0644, 3233 .proc_handler = proc_dointvec, 3234 }, 3235 #endif 3236 #ifdef CONFIG_AIO 3237 { 3238 .procname = "aio-nr", 3239 .data = &aio_nr, 3240 .maxlen = sizeof(aio_nr), 3241 .mode = 0444, 3242 .proc_handler = proc_doulongvec_minmax, 3243 }, 3244 { 3245 .procname = "aio-max-nr", 3246 .data = &aio_max_nr, 3247 .maxlen = sizeof(aio_max_nr), 3248 .mode = 0644, 3249 .proc_handler = proc_doulongvec_minmax, 3250 }, 3251 #endif /* CONFIG_AIO */ 3252 #ifdef CONFIG_INOTIFY_USER 3253 { 3254 .procname = "inotify", 3255 .mode = 0555, 3256 .child = inotify_table, 3257 }, 3258 #endif 3259 #ifdef CONFIG_FANOTIFY 3260 { 3261 .procname = "fanotify", 3262 .mode = 0555, 3263 .child = fanotify_table, 3264 }, 3265 #endif 3266 #ifdef CONFIG_EPOLL 3267 { 3268 .procname = "epoll", 3269 .mode = 0555, 3270 .child = epoll_table, 3271 }, 3272 #endif 3273 #endif 3274 { 3275 .procname = "protected_symlinks", 3276 .data = &sysctl_protected_symlinks, 3277 .maxlen = sizeof(int), 3278 .mode = 0600, 3279 .proc_handler = proc_dointvec_minmax, 3280 .extra1 = SYSCTL_ZERO, 3281 .extra2 = SYSCTL_ONE, 3282 }, 3283 { 3284 .procname = "protected_hardlinks", 3285 .data = &sysctl_protected_hardlinks, 3286 .maxlen = sizeof(int), 3287 .mode = 0600, 3288 .proc_handler = proc_dointvec_minmax, 3289 .extra1 = SYSCTL_ZERO, 3290 .extra2 = SYSCTL_ONE, 3291 }, 3292 { 3293 .procname = "protected_fifos", 3294 .data = &sysctl_protected_fifos, 3295 .maxlen = sizeof(int), 3296 .mode = 0600, 3297 .proc_handler = proc_dointvec_minmax, 3298 .extra1 = SYSCTL_ZERO, 3299 .extra2 = &two, 3300 }, 3301 { 3302 .procname = "protected_regular", 3303 .data = &sysctl_protected_regular, 3304 .maxlen = sizeof(int), 3305 .mode = 0600, 3306 .proc_handler = proc_dointvec_minmax, 3307 .extra1 = SYSCTL_ZERO, 3308 .extra2 = &two, 3309 }, 3310 { 3311 .procname = "suid_dumpable", 3312 .data = &suid_dumpable, 3313 .maxlen = sizeof(int), 3314 .mode = 0644, 3315 .proc_handler = proc_dointvec_minmax_coredump, 3316 .extra1 = SYSCTL_ZERO, 3317 .extra2 = &two, 3318 }, 3319 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) 3320 { 3321 .procname = "binfmt_misc", 3322 .mode = 0555, 3323 .child = sysctl_mount_point, 3324 }, 3325 #endif 3326 { 3327 .procname = "pipe-max-size", 3328 .data = &pipe_max_size, 3329 .maxlen = sizeof(pipe_max_size), 3330 .mode = 0644, 3331 .proc_handler = proc_dopipe_max_size, 3332 }, 3333 { 3334 .procname = "pipe-user-pages-hard", 3335 .data = &pipe_user_pages_hard, 3336 .maxlen = sizeof(pipe_user_pages_hard), 3337 .mode = 0644, 3338 .proc_handler = proc_doulongvec_minmax, 3339 }, 3340 { 3341 .procname = "pipe-user-pages-soft", 3342 .data = &pipe_user_pages_soft, 3343 .maxlen = sizeof(pipe_user_pages_soft), 3344 .mode = 0644, 3345 .proc_handler = proc_doulongvec_minmax, 3346 }, 3347 { 3348 .procname = "mount-max", 3349 .data = &sysctl_mount_max, 3350 .maxlen = sizeof(unsigned int), 3351 .mode = 0644, 3352 .proc_handler = proc_dointvec_minmax, 3353 .extra1 = SYSCTL_ONE, 3354 }, 3355 { } 3356 }; 3357 3358 static struct ctl_table debug_table[] = { 3359 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE 3360 { 3361 .procname = "exception-trace", 3362 .data = &show_unhandled_signals, 3363 .maxlen = sizeof(int), 3364 .mode = 0644, 3365 .proc_handler = proc_dointvec 3366 }, 3367 #endif 3368 #if defined(CONFIG_OPTPROBES) 3369 { 3370 .procname = "kprobes-optimization", 3371 .data = &sysctl_kprobes_optimization, 3372 .maxlen = sizeof(int), 3373 .mode = 0644, 3374 .proc_handler = proc_kprobes_optimization_handler, 3375 .extra1 = SYSCTL_ZERO, 3376 .extra2 = SYSCTL_ONE, 3377 }, 3378 #endif 3379 { } 3380 }; 3381 3382 static struct ctl_table dev_table[] = { 3383 { } 3384 }; 3385 3386 static struct ctl_table sysctl_base_table[] = { 3387 { 3388 .procname = "kernel", 3389 .mode = 0555, 3390 .child = kern_table, 3391 }, 3392 { 3393 .procname = "vm", 3394 .mode = 0555, 3395 .child = vm_table, 3396 }, 3397 { 3398 .procname = "fs", 3399 .mode = 0555, 3400 .child = fs_table, 3401 }, 3402 { 3403 .procname = "debug", 3404 .mode = 0555, 3405 .child = debug_table, 3406 }, 3407 { 3408 .procname = "dev", 3409 .mode = 0555, 3410 .child = dev_table, 3411 }, 3412 { } 3413 }; 3414 3415 int __init sysctl_init(void) 3416 { 3417 struct ctl_table_header *hdr; 3418 3419 hdr = register_sysctl_table(sysctl_base_table); 3420 kmemleak_not_leak(hdr); 3421 return 0; 3422 } 3423 #endif /* CONFIG_SYSCTL */ 3424 /* 3425 * No sense putting this after each symbol definition, twice, 3426 * exception granted :-) 3427 */ 3428 EXPORT_SYMBOL(proc_dointvec); 3429 EXPORT_SYMBOL(proc_douintvec); 3430 EXPORT_SYMBOL(proc_dointvec_jiffies); 3431 EXPORT_SYMBOL(proc_dointvec_minmax); 3432 EXPORT_SYMBOL_GPL(proc_douintvec_minmax); 3433 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies); 3434 EXPORT_SYMBOL(proc_dointvec_ms_jiffies); 3435 EXPORT_SYMBOL(proc_dostring); 3436 EXPORT_SYMBOL(proc_doulongvec_minmax); 3437 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax); 3438 EXPORT_SYMBOL(proc_do_large_bitmap); 3439