1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/proc/base.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 * 7 * proc base directory handling functions 8 * 9 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 10 * Instead of using magical inumbers to determine the kind of object 11 * we allocate and fill in-core inodes upon lookup. They don't even 12 * go into icache. We cache the reference to task_struct upon lookup too. 13 * Eventually it should become a filesystem in its own. We don't use the 14 * rest of procfs anymore. 15 * 16 * 17 * Changelog: 18 * 17-Jan-2005 19 * Allan Bezerra 20 * Bruna Moreira <bruna.moreira@indt.org.br> 21 * Edjard Mota <edjard.mota@indt.org.br> 22 * Ilias Biris <ilias.biris@indt.org.br> 23 * Mauricio Lin <mauricio.lin@indt.org.br> 24 * 25 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 26 * 27 * A new process specific entry (smaps) included in /proc. It shows the 28 * size of rss for each memory area. The maps entry lacks information 29 * about physical memory size (rss) for each mapped file, i.e., 30 * rss information for executables and library files. 31 * This additional information is useful for any tools that need to know 32 * about physical memory consumption for a process specific library. 33 * 34 * Changelog: 35 * 21-Feb-2005 36 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 37 * Pud inclusion in the page table walking. 38 * 39 * ChangeLog: 40 * 10-Mar-2005 41 * 10LE Instituto Nokia de Tecnologia - INdT: 42 * A better way to walks through the page table as suggested by Hugh Dickins. 43 * 44 * Simo Piiroinen <simo.piiroinen@nokia.com>: 45 * Smaps information related to shared, private, clean and dirty pages. 46 * 47 * Paul Mundt <paul.mundt@nokia.com>: 48 * Overall revision about smaps. 49 */ 50 51 #include <linux/uaccess.h> 52 53 #include <linux/errno.h> 54 #include <linux/time.h> 55 #include <linux/proc_fs.h> 56 #include <linux/stat.h> 57 #include <linux/task_io_accounting_ops.h> 58 #include <linux/init.h> 59 #include <linux/capability.h> 60 #include <linux/file.h> 61 #include <linux/generic-radix-tree.h> 62 #include <linux/string.h> 63 #include <linux/seq_file.h> 64 #include <linux/namei.h> 65 #include <linux/mnt_namespace.h> 66 #include <linux/mm.h> 67 #include <linux/swap.h> 68 #include <linux/rcupdate.h> 69 #include <linux/kallsyms.h> 70 #include <linux/stacktrace.h> 71 #include <linux/resource.h> 72 #include <linux/module.h> 73 #include <linux/mount.h> 74 #include <linux/security.h> 75 #include <linux/ptrace.h> 76 #include <linux/printk.h> 77 #include <linux/cache.h> 78 #include <linux/cgroup.h> 79 #include <linux/cpuset.h> 80 #include <linux/audit.h> 81 #include <linux/poll.h> 82 #include <linux/nsproxy.h> 83 #include <linux/oom.h> 84 #include <linux/elf.h> 85 #include <linux/pid_namespace.h> 86 #include <linux/user_namespace.h> 87 #include <linux/fs_parser.h> 88 #include <linux/fs_struct.h> 89 #include <linux/slab.h> 90 #include <linux/sched/autogroup.h> 91 #include <linux/sched/mm.h> 92 #include <linux/sched/coredump.h> 93 #include <linux/sched/debug.h> 94 #include <linux/sched/exec_state.h> 95 #include <linux/sched/stat.h> 96 #include <linux/posix-timers.h> 97 #include <linux/time_namespace.h> 98 #include <linux/resctrl.h> 99 #include <linux/cn_proc.h> 100 #include <linux/ksm.h> 101 #include <uapi/linux/lsm.h> 102 #include <trace/events/oom.h> 103 #include "internal.h" 104 #include "fd.h" 105 106 #include "../../lib/kstrtox.h" 107 108 /* NOTE: 109 * Implementing inode permission operations in /proc is almost 110 * certainly an error. Permission checks need to happen during 111 * each system call not at open time. The reason is that most of 112 * what we wish to check for permissions in /proc varies at runtime. 113 * 114 * The classic example of a problem is opening file descriptors 115 * in /proc for a task before it execs a suid executable. 116 */ 117 118 static u8 nlink_tid __ro_after_init; 119 static u8 nlink_tgid __ro_after_init; 120 121 enum proc_mem_force { 122 PROC_MEM_FORCE_ALWAYS, 123 PROC_MEM_FORCE_PTRACE, 124 PROC_MEM_FORCE_NEVER 125 }; 126 127 static enum proc_mem_force proc_mem_force_override __ro_after_init = 128 IS_ENABLED(CONFIG_PROC_MEM_NO_FORCE) ? PROC_MEM_FORCE_NEVER : 129 IS_ENABLED(CONFIG_PROC_MEM_FORCE_PTRACE) ? PROC_MEM_FORCE_PTRACE : 130 PROC_MEM_FORCE_ALWAYS; 131 132 static const struct constant_table proc_mem_force_table[] __initconst = { 133 { "always", PROC_MEM_FORCE_ALWAYS }, 134 { "ptrace", PROC_MEM_FORCE_PTRACE }, 135 { "never", PROC_MEM_FORCE_NEVER }, 136 { } 137 }; 138 139 static int __init early_proc_mem_force_override(char *buf) 140 { 141 if (!buf) 142 return -EINVAL; 143 144 /* 145 * lookup_constant() defaults to proc_mem_force_override to preseve 146 * the initial Kconfig choice in case an invalid param gets passed. 147 */ 148 proc_mem_force_override = lookup_constant(proc_mem_force_table, 149 buf, proc_mem_force_override); 150 151 return 0; 152 } 153 early_param("proc_mem.force_override", early_proc_mem_force_override); 154 155 struct pid_entry { 156 const char *name; 157 unsigned int len; 158 umode_t mode; 159 const struct inode_operations *iop; 160 const struct file_operations *fop; 161 union proc_op op; 162 }; 163 164 #define NOD(NAME, MODE, IOP, FOP, OP) { \ 165 .name = (NAME), \ 166 .len = sizeof(NAME) - 1, \ 167 .mode = MODE, \ 168 .iop = IOP, \ 169 .fop = FOP, \ 170 .op = OP, \ 171 } 172 173 #define DIR(NAME, MODE, iops, fops) \ 174 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) 175 #define LNK(NAME, get_link) \ 176 NOD(NAME, (S_IFLNK|S_IRWXUGO), \ 177 &proc_pid_link_inode_operations, NULL, \ 178 { .proc_get_link = get_link } ) 179 #define REG(NAME, MODE, fops) \ 180 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) 181 #define ONE(NAME, MODE, show) \ 182 NOD(NAME, (S_IFREG|(MODE)), \ 183 NULL, &proc_single_file_operations, \ 184 { .proc_show = show } ) 185 #define ATTR(LSMID, NAME, MODE) \ 186 NOD(NAME, (S_IFREG|(MODE)), \ 187 NULL, &proc_pid_attr_operations, \ 188 { .lsmid = LSMID }) 189 190 /* 191 * Count the number of hardlinks for the pid_entry table, excluding the . 192 * and .. links. 193 */ 194 static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, 195 unsigned int n) 196 { 197 unsigned int i; 198 unsigned int count; 199 200 count = 2; 201 for (i = 0; i < n; ++i) { 202 if (S_ISDIR(entries[i].mode)) 203 ++count; 204 } 205 206 return count; 207 } 208 209 static int get_task_root(struct task_struct *task, struct path *root) 210 { 211 int result = -ENOENT; 212 213 task_lock(task); 214 if (task->fs) { 215 get_fs_root(task->fs, root); 216 result = 0; 217 } 218 task_unlock(task); 219 return result; 220 } 221 222 static int proc_cwd_link(struct dentry *dentry, struct path *path, 223 struct task_struct *task) 224 { 225 int result = -ENOENT; 226 227 task_lock(task); 228 if (task->fs) { 229 get_fs_pwd(task->fs, path); 230 result = 0; 231 } 232 task_unlock(task); 233 return result; 234 } 235 236 static int proc_root_link(struct dentry *dentry, struct path *path, 237 struct task_struct *task) 238 { 239 return get_task_root(task, path); 240 } 241 242 /* 243 * If the user used setproctitle(), we just get the string from 244 * user space at arg_start, and limit it to a maximum of one page. 245 */ 246 static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf, 247 size_t count, unsigned long pos, 248 unsigned long arg_start) 249 { 250 char *page; 251 int ret, got; 252 253 if (pos >= PAGE_SIZE) 254 return 0; 255 256 page = kmalloc(PAGE_SIZE, GFP_KERNEL); 257 if (!page) 258 return -ENOMEM; 259 260 ret = 0; 261 got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON); 262 if (got > 0) { 263 int len = strnlen(page, got); 264 265 /* Include the NUL character if it was found */ 266 if (len < got) 267 len++; 268 269 if (len > pos) { 270 len -= pos; 271 if (len > count) 272 len = count; 273 len -= copy_to_user(buf, page+pos, len); 274 if (!len) 275 len = -EFAULT; 276 ret = len; 277 } 278 } 279 kfree(page); 280 return ret; 281 } 282 283 static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, 284 size_t count, loff_t *ppos) 285 { 286 unsigned long arg_start, arg_end, env_start, env_end; 287 unsigned long pos, len; 288 char *page, c; 289 290 /* Check if process spawned far enough to have cmdline. */ 291 if (!mm->env_end) 292 return 0; 293 294 spin_lock(&mm->arg_lock); 295 arg_start = mm->arg_start; 296 arg_end = mm->arg_end; 297 env_start = mm->env_start; 298 env_end = mm->env_end; 299 spin_unlock(&mm->arg_lock); 300 301 if (arg_start >= arg_end) 302 return 0; 303 304 /* 305 * We allow setproctitle() to overwrite the argument 306 * strings, and overflow past the original end. But 307 * only when it overflows into the environment area. 308 */ 309 if (env_start != arg_end || env_end < env_start) 310 env_start = env_end = arg_end; 311 len = env_end - arg_start; 312 313 /* We're not going to care if "*ppos" has high bits set */ 314 pos = *ppos; 315 if (pos >= len) 316 return 0; 317 if (count > len - pos) 318 count = len - pos; 319 if (!count) 320 return 0; 321 322 /* 323 * Magical special case: if the argv[] end byte is not 324 * zero, the user has overwritten it with setproctitle(3). 325 * 326 * Possible future enhancement: do this only once when 327 * pos is 0, and set a flag in the 'struct file'. 328 */ 329 if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c) 330 return get_mm_proctitle(mm, buf, count, pos, arg_start); 331 332 /* 333 * For the non-setproctitle() case we limit things strictly 334 * to the [arg_start, arg_end[ range. 335 */ 336 pos += arg_start; 337 if (pos < arg_start || pos >= arg_end) 338 return 0; 339 if (count > arg_end - pos) 340 count = arg_end - pos; 341 342 page = kmalloc(PAGE_SIZE, GFP_KERNEL); 343 if (!page) 344 return -ENOMEM; 345 346 len = 0; 347 while (count) { 348 int got; 349 size_t size = min_t(size_t, PAGE_SIZE, count); 350 351 got = access_remote_vm(mm, pos, page, size, FOLL_ANON); 352 if (got <= 0) 353 break; 354 got -= copy_to_user(buf, page, got); 355 if (unlikely(!got)) { 356 if (!len) 357 len = -EFAULT; 358 break; 359 } 360 pos += got; 361 buf += got; 362 len += got; 363 count -= got; 364 } 365 366 kfree(page); 367 return len; 368 } 369 370 static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf, 371 size_t count, loff_t *pos) 372 { 373 struct mm_struct *mm; 374 ssize_t ret; 375 376 mm = get_task_mm(tsk); 377 if (!mm) 378 return 0; 379 380 ret = get_mm_cmdline(mm, buf, count, pos); 381 mmput(mm); 382 return ret; 383 } 384 385 static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, 386 size_t count, loff_t *pos) 387 { 388 struct task_struct *tsk; 389 ssize_t ret; 390 391 BUG_ON(*pos < 0); 392 393 tsk = get_proc_task(file_inode(file)); 394 if (!tsk) 395 return -ESRCH; 396 ret = get_task_cmdline(tsk, buf, count, pos); 397 put_task_struct(tsk); 398 if (ret > 0) 399 *pos += ret; 400 return ret; 401 } 402 403 static const struct file_operations proc_pid_cmdline_ops = { 404 .read = proc_pid_cmdline_read, 405 .llseek = generic_file_llseek, 406 }; 407 408 #ifdef CONFIG_KALLSYMS 409 /* 410 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 411 * Returns the resolved symbol to user space. 412 */ 413 static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, 414 struct pid *pid, struct task_struct *task) 415 { 416 unsigned long wchan; 417 char symname[KSYM_NAME_LEN]; 418 int err; 419 420 err = down_read_killable(&task->signal->exec_update_lock); 421 if (err) 422 return err; 423 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) 424 goto print0; 425 426 wchan = get_wchan(task); 427 if (wchan && !lookup_symbol_name(wchan, symname)) { 428 seq_puts(m, symname); 429 up_read(&task->signal->exec_update_lock); 430 return 0; 431 } 432 433 print0: 434 seq_putc(m, '0'); 435 up_read(&task->signal->exec_update_lock); 436 return 0; 437 } 438 #endif /* CONFIG_KALLSYMS */ 439 440 static int lock_trace(struct task_struct *task) 441 { 442 int err = down_read_killable(&task->signal->exec_update_lock); 443 if (err) 444 return err; 445 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { 446 up_read(&task->signal->exec_update_lock); 447 return -EPERM; 448 } 449 return 0; 450 } 451 452 static void unlock_trace(struct task_struct *task) 453 { 454 up_read(&task->signal->exec_update_lock); 455 } 456 457 #ifdef CONFIG_STACKTRACE 458 459 #define MAX_STACK_TRACE_DEPTH 64 460 461 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, 462 struct pid *pid, struct task_struct *task) 463 { 464 unsigned long *entries; 465 int err; 466 467 /* 468 * The ability to racily run the kernel stack unwinder on a running task 469 * and then observe the unwinder output is scary; while it is useful for 470 * debugging kernel issues, it can also allow an attacker to leak kernel 471 * stack contents. 472 * Doing this in a manner that is at least safe from races would require 473 * some work to ensure that the remote task can not be scheduled; and 474 * even then, this would still expose the unwinder as local attack 475 * surface. 476 * Therefore, this interface is restricted to root. 477 */ 478 if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) 479 return -EACCES; 480 481 entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries), 482 GFP_KERNEL); 483 if (!entries) 484 return -ENOMEM; 485 486 err = lock_trace(task); 487 if (!err) { 488 unsigned int i, nr_entries; 489 490 nr_entries = stack_trace_save_tsk(task, entries, 491 MAX_STACK_TRACE_DEPTH, 0); 492 493 for (i = 0; i < nr_entries; i++) { 494 seq_printf(m, "[<0>] %pB\n", (void *)entries[i]); 495 } 496 497 unlock_trace(task); 498 } 499 kfree(entries); 500 501 return err; 502 } 503 #endif 504 505 #ifdef CONFIG_SCHED_INFO 506 /* 507 * Provides /proc/PID/schedstat 508 */ 509 static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, 510 struct pid *pid, struct task_struct *task) 511 { 512 if (unlikely(!sched_info_on())) 513 seq_puts(m, "0 0 0\n"); 514 else 515 seq_printf(m, "%llu %llu %lu\n", 516 (unsigned long long)task->se.sum_exec_runtime, 517 (unsigned long long)task->sched_info.run_delay, 518 task->sched_info.pcount); 519 520 return 0; 521 } 522 #endif 523 524 #ifdef CONFIG_LATENCYTOP 525 static int lstats_show_proc(struct seq_file *m, void *v) 526 { 527 int i; 528 struct inode *inode = m->private; 529 struct task_struct *task = get_proc_task(inode); 530 531 if (!task) 532 return -ESRCH; 533 seq_puts(m, "Latency Top version : v0.1\n"); 534 for (i = 0; i < LT_SAVECOUNT; i++) { 535 struct latency_record *lr = &task->latency_record[i]; 536 if (lr->backtrace[0]) { 537 int q; 538 seq_printf(m, "%i %li %li", 539 lr->count, lr->time, lr->max); 540 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 541 unsigned long bt = lr->backtrace[q]; 542 543 if (!bt) 544 break; 545 seq_printf(m, " %ps", (void *)bt); 546 } 547 seq_putc(m, '\n'); 548 } 549 550 } 551 put_task_struct(task); 552 return 0; 553 } 554 555 static int lstats_open(struct inode *inode, struct file *file) 556 { 557 return single_open(file, lstats_show_proc, inode); 558 } 559 560 static ssize_t lstats_write(struct file *file, const char __user *buf, 561 size_t count, loff_t *offs) 562 { 563 struct task_struct *task = get_proc_task(file_inode(file)); 564 565 if (!task) 566 return -ESRCH; 567 clear_tsk_latency_tracing(task); 568 put_task_struct(task); 569 570 return count; 571 } 572 573 static const struct file_operations proc_lstats_operations = { 574 .open = lstats_open, 575 .read = seq_read, 576 .write = lstats_write, 577 .llseek = seq_lseek, 578 .release = single_release, 579 }; 580 581 #endif 582 583 static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, 584 struct pid *pid, struct task_struct *task) 585 { 586 unsigned long totalpages = totalram_pages() + total_swap_pages; 587 unsigned long points = 0; 588 long badness; 589 590 badness = oom_badness(task, totalpages); 591 /* 592 * Special case OOM_SCORE_ADJ_MIN for all others scale the 593 * badness value into [0, 2000] range which we have been 594 * exporting for a long time so userspace might depend on it. 595 */ 596 if (badness != LONG_MIN) 597 points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3; 598 599 seq_printf(m, "%lu\n", points); 600 601 return 0; 602 } 603 604 struct limit_names { 605 const char *name; 606 const char *unit; 607 }; 608 609 static const struct limit_names lnames[RLIM_NLIMITS] = { 610 [RLIMIT_CPU] = {"Max cpu time", "seconds"}, 611 [RLIMIT_FSIZE] = {"Max file size", "bytes"}, 612 [RLIMIT_DATA] = {"Max data size", "bytes"}, 613 [RLIMIT_STACK] = {"Max stack size", "bytes"}, 614 [RLIMIT_CORE] = {"Max core file size", "bytes"}, 615 [RLIMIT_RSS] = {"Max resident set", "bytes"}, 616 [RLIMIT_NPROC] = {"Max processes", "processes"}, 617 [RLIMIT_NOFILE] = {"Max open files", "files"}, 618 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, 619 [RLIMIT_AS] = {"Max address space", "bytes"}, 620 [RLIMIT_LOCKS] = {"Max file locks", "locks"}, 621 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, 622 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, 623 [RLIMIT_NICE] = {"Max nice priority", NULL}, 624 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, 625 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, 626 }; 627 628 /* Display limits for a process */ 629 static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, 630 struct pid *pid, struct task_struct *task) 631 { 632 unsigned int i; 633 unsigned long flags; 634 635 struct rlimit rlim[RLIM_NLIMITS]; 636 637 if (!lock_task_sighand(task, &flags)) 638 return 0; 639 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); 640 unlock_task_sighand(task, &flags); 641 642 /* 643 * print the file header 644 */ 645 seq_puts(m, "Limit " 646 "Soft Limit " 647 "Hard Limit " 648 "Units \n"); 649 650 for (i = 0; i < RLIM_NLIMITS; i++) { 651 if (rlim[i].rlim_cur == RLIM_INFINITY) 652 seq_printf(m, "%-25s %-20s ", 653 lnames[i].name, "unlimited"); 654 else 655 seq_printf(m, "%-25s %-20lu ", 656 lnames[i].name, rlim[i].rlim_cur); 657 658 if (rlim[i].rlim_max == RLIM_INFINITY) 659 seq_printf(m, "%-20s ", "unlimited"); 660 else 661 seq_printf(m, "%-20lu ", rlim[i].rlim_max); 662 663 if (lnames[i].unit) 664 seq_printf(m, "%-10s\n", lnames[i].unit); 665 else 666 seq_putc(m, '\n'); 667 } 668 669 return 0; 670 } 671 672 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 673 static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, 674 struct pid *pid, struct task_struct *task) 675 { 676 struct syscall_info info; 677 u64 *args = &info.data.args[0]; 678 int res; 679 680 res = lock_trace(task); 681 if (res) 682 return res; 683 684 if (task_current_syscall(task, &info)) 685 seq_puts(m, "running\n"); 686 else if (info.data.nr < 0) 687 seq_printf(m, "%d 0x%llx 0x%llx\n", 688 info.data.nr, info.sp, info.data.instruction_pointer); 689 else 690 seq_printf(m, 691 "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n", 692 info.data.nr, 693 args[0], args[1], args[2], args[3], args[4], args[5], 694 info.sp, info.data.instruction_pointer); 695 unlock_trace(task); 696 697 return 0; 698 } 699 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 700 701 /************************************************************************/ 702 /* Here the fs part begins */ 703 /************************************************************************/ 704 705 int proc_nochmod_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 706 struct iattr *attr) 707 { 708 int error; 709 struct inode *inode = d_inode(dentry); 710 711 if (attr->ia_valid & ATTR_MODE) 712 return -EPERM; 713 714 error = setattr_prepare(&nop_mnt_idmap, dentry, attr); 715 if (error) 716 return error; 717 718 setattr_copy(&nop_mnt_idmap, inode, attr); 719 return 0; 720 } 721 722 /* 723 * May current process learn task's sched/cmdline info (for hide_pid_min=1) 724 * or euid/egid (for hide_pid_min=2)? 725 */ 726 static bool has_pid_permissions(struct proc_fs_info *fs_info, 727 struct task_struct *task, 728 enum proc_hidepid hide_pid_min) 729 { 730 /* 731 * If 'hidpid' mount option is set force a ptrace check, 732 * we indicate that we are using a filesystem syscall 733 * by passing PTRACE_MODE_READ_FSCREDS 734 */ 735 if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) 736 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); 737 738 if (fs_info->hide_pid < hide_pid_min) 739 return true; 740 if (in_group_p(fs_info->pid_gid)) 741 return true; 742 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); 743 } 744 745 746 static int proc_pid_permission(struct mnt_idmap *idmap, 747 struct inode *inode, int mask) 748 { 749 struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); 750 struct task_struct *task; 751 bool has_perms; 752 753 task = get_proc_task(inode); 754 if (!task) 755 return -ESRCH; 756 has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS); 757 put_task_struct(task); 758 759 if (!has_perms) { 760 if (fs_info->hide_pid == HIDEPID_INVISIBLE) { 761 /* 762 * Let's make getdents(), stat(), and open() 763 * consistent with each other. If a process 764 * may not stat() a file, it shouldn't be seen 765 * in procfs at all. 766 */ 767 return -ENOENT; 768 } 769 770 return -EPERM; 771 } 772 return generic_permission(&nop_mnt_idmap, inode, mask); 773 } 774 775 776 777 static const struct inode_operations proc_def_inode_operations = { 778 .setattr = proc_nochmod_setattr, 779 }; 780 781 static int proc_single_show(struct seq_file *m, void *v) 782 { 783 struct inode *inode = m->private; 784 struct pid_namespace *ns = proc_pid_ns(inode->i_sb); 785 struct pid *pid = proc_pid(inode); 786 struct task_struct *task; 787 int ret; 788 789 task = get_pid_task(pid, PIDTYPE_PID); 790 if (!task) 791 return -ESRCH; 792 793 ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); 794 795 put_task_struct(task); 796 return ret; 797 } 798 799 static int proc_single_open(struct inode *inode, struct file *filp) 800 { 801 return single_open(filp, proc_single_show, inode); 802 } 803 804 static const struct file_operations proc_single_file_operations = { 805 .open = proc_single_open, 806 .read = seq_read, 807 .llseek = seq_lseek, 808 .release = single_release, 809 }; 810 811 /* 812 * proc_mem_open() can return errno, NULL or mm_struct*. 813 * 814 * - Returns NULL if the task has no mm (PF_KTHREAD or PF_EXITING) 815 * - Returns mm_struct* on success 816 * - Returns error code on failure 817 */ 818 struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) 819 { 820 struct task_struct *task = get_proc_task(inode); 821 struct mm_struct *mm; 822 823 if (!task) 824 return ERR_PTR(-ESRCH); 825 826 mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); 827 put_task_struct(task); 828 829 if (IS_ERR(mm)) 830 return mm == ERR_PTR(-ESRCH) ? NULL : mm; 831 832 /* ensure this mm_struct can't be freed */ 833 mmgrab(mm); 834 /* but do not pin its memory */ 835 mmput(mm); 836 837 return mm; 838 } 839 840 static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) 841 { 842 struct mm_struct *mm = proc_mem_open(inode, mode); 843 844 if (IS_ERR_OR_NULL(mm)) 845 return mm ? PTR_ERR(mm) : -ESRCH; 846 847 file->private_data = mm; 848 return 0; 849 } 850 851 static int mem_open(struct inode *inode, struct file *file) 852 { 853 if (WARN_ON_ONCE(!(file->f_op->fop_flags & FOP_UNSIGNED_OFFSET))) 854 return -EINVAL; 855 return __mem_open(inode, file, PTRACE_MODE_ATTACH); 856 } 857 858 static bool proc_mem_foll_force(struct file *file, struct mm_struct *mm) 859 { 860 struct task_struct *task; 861 bool ptrace_active = false; 862 863 switch (proc_mem_force_override) { 864 case PROC_MEM_FORCE_NEVER: 865 return false; 866 case PROC_MEM_FORCE_PTRACE: 867 task = get_proc_task(file_inode(file)); 868 if (task) { 869 ptrace_active = READ_ONCE(task->ptrace) && 870 READ_ONCE(task->mm) == mm && 871 READ_ONCE(task->parent) == current; 872 put_task_struct(task); 873 } 874 return ptrace_active; 875 default: 876 return true; 877 } 878 } 879 880 static ssize_t mem_rw(struct file *file, char __user *buf, 881 size_t count, loff_t *ppos, int write) 882 { 883 struct mm_struct *mm = file->private_data; 884 unsigned long addr = *ppos; 885 ssize_t copied; 886 char *page; 887 unsigned int flags; 888 889 if (!mm) 890 return 0; 891 892 page = kmalloc(PAGE_SIZE, GFP_KERNEL); 893 if (!page) 894 return -ENOMEM; 895 896 copied = 0; 897 if (!mmget_not_zero(mm)) 898 goto free; 899 900 flags = write ? FOLL_WRITE : 0; 901 if (proc_mem_foll_force(file, mm)) 902 flags |= FOLL_FORCE; 903 904 while (count > 0) { 905 size_t this_len = min_t(size_t, count, PAGE_SIZE); 906 907 if (write && copy_from_user(page, buf, this_len)) { 908 copied = -EFAULT; 909 break; 910 } 911 912 this_len = access_remote_vm(mm, addr, page, this_len, flags); 913 if (!this_len) { 914 if (!copied) 915 copied = -EIO; 916 break; 917 } 918 919 if (!write && copy_to_user(buf, page, this_len)) { 920 copied = -EFAULT; 921 break; 922 } 923 924 buf += this_len; 925 addr += this_len; 926 copied += this_len; 927 count -= this_len; 928 } 929 *ppos = addr; 930 931 mmput(mm); 932 free: 933 kfree(page); 934 return copied; 935 } 936 937 static ssize_t mem_read(struct file *file, char __user *buf, 938 size_t count, loff_t *ppos) 939 { 940 return mem_rw(file, buf, count, ppos, 0); 941 } 942 943 static ssize_t mem_write(struct file *file, const char __user *buf, 944 size_t count, loff_t *ppos) 945 { 946 return mem_rw(file, (char __user*)buf, count, ppos, 1); 947 } 948 949 loff_t mem_lseek(struct file *file, loff_t offset, int orig) 950 { 951 switch (orig) { 952 case 0: 953 file->f_pos = offset; 954 break; 955 case 1: 956 file->f_pos += offset; 957 break; 958 default: 959 return -EINVAL; 960 } 961 force_successful_syscall_return(); 962 return file->f_pos; 963 } 964 965 static int mem_release(struct inode *inode, struct file *file) 966 { 967 struct mm_struct *mm = file->private_data; 968 if (mm) 969 mmdrop(mm); 970 return 0; 971 } 972 973 static const struct file_operations proc_mem_operations = { 974 .llseek = mem_lseek, 975 .read = mem_read, 976 .write = mem_write, 977 .open = mem_open, 978 .release = mem_release, 979 .fop_flags = FOP_UNSIGNED_OFFSET, 980 }; 981 982 static int environ_open(struct inode *inode, struct file *file) 983 { 984 return __mem_open(inode, file, PTRACE_MODE_READ); 985 } 986 987 static ssize_t environ_read(struct file *file, char __user *buf, 988 size_t count, loff_t *ppos) 989 { 990 char *page; 991 unsigned long src = *ppos; 992 int ret = 0; 993 struct mm_struct *mm = file->private_data; 994 unsigned long env_start, env_end; 995 996 /* Ensure the process spawned far enough to have an environment. */ 997 if (!mm || !mm->env_end) 998 return 0; 999 1000 page = kmalloc(PAGE_SIZE, GFP_KERNEL); 1001 if (!page) 1002 return -ENOMEM; 1003 1004 ret = 0; 1005 if (!mmget_not_zero(mm)) 1006 goto free; 1007 1008 spin_lock(&mm->arg_lock); 1009 env_start = mm->env_start; 1010 env_end = mm->env_end; 1011 spin_unlock(&mm->arg_lock); 1012 1013 while (count > 0) { 1014 size_t this_len, max_len; 1015 int retval; 1016 1017 if (src >= (env_end - env_start)) 1018 break; 1019 1020 this_len = env_end - (env_start + src); 1021 1022 max_len = min_t(size_t, PAGE_SIZE, count); 1023 this_len = min(max_len, this_len); 1024 1025 retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); 1026 1027 if (retval <= 0) { 1028 ret = retval; 1029 break; 1030 } 1031 1032 if (copy_to_user(buf, page, retval)) { 1033 ret = -EFAULT; 1034 break; 1035 } 1036 1037 ret += retval; 1038 src += retval; 1039 buf += retval; 1040 count -= retval; 1041 } 1042 *ppos = src; 1043 mmput(mm); 1044 1045 free: 1046 kfree(page); 1047 return ret; 1048 } 1049 1050 static const struct file_operations proc_environ_operations = { 1051 .open = environ_open, 1052 .read = environ_read, 1053 .llseek = generic_file_llseek, 1054 .release = mem_release, 1055 }; 1056 1057 static int auxv_open(struct inode *inode, struct file *file) 1058 { 1059 return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); 1060 } 1061 1062 static ssize_t auxv_read(struct file *file, char __user *buf, 1063 size_t count, loff_t *ppos) 1064 { 1065 struct mm_struct *mm = file->private_data; 1066 unsigned int nwords = 0; 1067 1068 if (!mm) 1069 return 0; 1070 do { 1071 nwords += 2; 1072 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 1073 return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv, 1074 nwords * sizeof(mm->saved_auxv[0])); 1075 } 1076 1077 static const struct file_operations proc_auxv_operations = { 1078 .open = auxv_open, 1079 .read = auxv_read, 1080 .llseek = generic_file_llseek, 1081 .release = mem_release, 1082 }; 1083 1084 static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, 1085 loff_t *ppos) 1086 { 1087 struct task_struct *task = get_proc_task(file_inode(file)); 1088 char buffer[PROC_NUMBUF]; 1089 int oom_adj = OOM_ADJUST_MIN; 1090 size_t len; 1091 1092 if (!task) 1093 return -ESRCH; 1094 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) 1095 oom_adj = OOM_ADJUST_MAX; 1096 else 1097 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / 1098 OOM_SCORE_ADJ_MAX; 1099 put_task_struct(task); 1100 if (oom_adj > OOM_ADJUST_MAX) 1101 oom_adj = OOM_ADJUST_MAX; 1102 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); 1103 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1104 } 1105 1106 static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) 1107 { 1108 struct mm_struct *mm = NULL; 1109 struct task_struct *task; 1110 int err = 0; 1111 1112 task = get_proc_task(file_inode(file)); 1113 if (!task) 1114 return -ESRCH; 1115 1116 mutex_lock(&oom_adj_mutex); 1117 if (legacy) { 1118 if (oom_adj < task->signal->oom_score_adj && 1119 !capable(CAP_SYS_RESOURCE)) { 1120 err = -EACCES; 1121 goto err_unlock; 1122 } 1123 /* 1124 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use 1125 * /proc/pid/oom_score_adj instead. 1126 */ 1127 pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", 1128 current->comm, task_pid_nr(current), task_pid_nr(task), 1129 task_pid_nr(task)); 1130 } else { 1131 if ((short)oom_adj < task->signal->oom_score_adj_min && 1132 !capable(CAP_SYS_RESOURCE)) { 1133 err = -EACCES; 1134 goto err_unlock; 1135 } 1136 } 1137 1138 /* 1139 * Make sure we will check other processes sharing the mm if this is 1140 * not vfrok which wants its own oom_score_adj. 1141 * pin the mm so it doesn't go away and get reused after task_unlock 1142 */ 1143 if (!task->vfork_done) { 1144 struct task_struct *p = find_lock_task_mm(task); 1145 1146 if (p) { 1147 if (mm_flags_test(MMF_MULTIPROCESS, p->mm)) { 1148 mm = p->mm; 1149 mmgrab(mm); 1150 } 1151 task_unlock(p); 1152 } 1153 } 1154 1155 task->signal->oom_score_adj = oom_adj; 1156 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1157 task->signal->oom_score_adj_min = (short)oom_adj; 1158 trace_oom_score_adj_update(task); 1159 1160 if (mm) { 1161 struct task_struct *p; 1162 1163 rcu_read_lock(); 1164 for_each_process(p) { 1165 if (same_thread_group(task, p)) 1166 continue; 1167 1168 /* do not touch kernel threads or the global init */ 1169 if (p->flags & PF_KTHREAD || is_global_init(p)) 1170 continue; 1171 1172 task_lock(p); 1173 if (!p->vfork_done && process_shares_mm(p, mm)) { 1174 p->signal->oom_score_adj = oom_adj; 1175 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1176 p->signal->oom_score_adj_min = (short)oom_adj; 1177 } 1178 task_unlock(p); 1179 } 1180 rcu_read_unlock(); 1181 mmdrop(mm); 1182 } 1183 err_unlock: 1184 mutex_unlock(&oom_adj_mutex); 1185 put_task_struct(task); 1186 return err; 1187 } 1188 1189 /* 1190 * /proc/pid/oom_adj exists solely for backwards compatibility with previous 1191 * kernels. The effective policy is defined by oom_score_adj, which has a 1192 * different scale: oom_adj grew exponentially and oom_score_adj grows linearly. 1193 * Values written to oom_adj are simply mapped linearly to oom_score_adj. 1194 * Processes that become oom disabled via oom_adj will still be oom disabled 1195 * with this implementation. 1196 * 1197 * oom_adj cannot be removed since existing userspace binaries use it. 1198 */ 1199 static ssize_t oom_adj_write(struct file *file, const char __user *buf, 1200 size_t count, loff_t *ppos) 1201 { 1202 char buffer[PROC_NUMBUF] = {}; 1203 int oom_adj; 1204 int err; 1205 1206 if (count > sizeof(buffer) - 1) 1207 count = sizeof(buffer) - 1; 1208 if (copy_from_user(buffer, buf, count)) { 1209 err = -EFAULT; 1210 goto out; 1211 } 1212 1213 err = kstrtoint(strstrip(buffer), 0, &oom_adj); 1214 if (err) 1215 goto out; 1216 if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && 1217 oom_adj != OOM_DISABLE) { 1218 err = -EINVAL; 1219 goto out; 1220 } 1221 1222 /* 1223 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum 1224 * value is always attainable. 1225 */ 1226 if (oom_adj == OOM_ADJUST_MAX) 1227 oom_adj = OOM_SCORE_ADJ_MAX; 1228 else 1229 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; 1230 1231 err = __set_oom_adj(file, oom_adj, true); 1232 out: 1233 return err < 0 ? err : count; 1234 } 1235 1236 static const struct file_operations proc_oom_adj_operations = { 1237 .read = oom_adj_read, 1238 .write = oom_adj_write, 1239 .llseek = generic_file_llseek, 1240 }; 1241 1242 static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 1243 size_t count, loff_t *ppos) 1244 { 1245 struct task_struct *task = get_proc_task(file_inode(file)); 1246 char buffer[PROC_NUMBUF]; 1247 short oom_score_adj = OOM_SCORE_ADJ_MIN; 1248 size_t len; 1249 1250 if (!task) 1251 return -ESRCH; 1252 oom_score_adj = task->signal->oom_score_adj; 1253 put_task_struct(task); 1254 len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); 1255 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1256 } 1257 1258 static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, 1259 size_t count, loff_t *ppos) 1260 { 1261 char buffer[PROC_NUMBUF] = {}; 1262 int oom_score_adj; 1263 int err; 1264 1265 if (count > sizeof(buffer) - 1) 1266 count = sizeof(buffer) - 1; 1267 if (copy_from_user(buffer, buf, count)) { 1268 err = -EFAULT; 1269 goto out; 1270 } 1271 1272 err = kstrtoint(strstrip(buffer), 0, &oom_score_adj); 1273 if (err) 1274 goto out; 1275 if (oom_score_adj < OOM_SCORE_ADJ_MIN || 1276 oom_score_adj > OOM_SCORE_ADJ_MAX) { 1277 err = -EINVAL; 1278 goto out; 1279 } 1280 1281 err = __set_oom_adj(file, oom_score_adj, false); 1282 out: 1283 return err < 0 ? err : count; 1284 } 1285 1286 static const struct file_operations proc_oom_score_adj_operations = { 1287 .read = oom_score_adj_read, 1288 .write = oom_score_adj_write, 1289 .llseek = default_llseek, 1290 }; 1291 1292 #ifdef CONFIG_AUDIT 1293 #define TMPBUFLEN 11 1294 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 1295 size_t count, loff_t *ppos) 1296 { 1297 struct inode * inode = file_inode(file); 1298 struct task_struct *task = get_proc_task(inode); 1299 ssize_t length; 1300 char tmpbuf[TMPBUFLEN]; 1301 1302 if (!task) 1303 return -ESRCH; 1304 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1305 from_kuid(file->f_cred->user_ns, 1306 audit_get_loginuid(task))); 1307 put_task_struct(task); 1308 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1309 } 1310 1311 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 1312 size_t count, loff_t *ppos) 1313 { 1314 struct inode * inode = file_inode(file); 1315 uid_t loginuid; 1316 kuid_t kloginuid; 1317 int rv; 1318 1319 /* Don't let kthreads write their own loginuid */ 1320 if (current->flags & PF_KTHREAD) 1321 return -EPERM; 1322 1323 rcu_read_lock(); 1324 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { 1325 rcu_read_unlock(); 1326 return -EPERM; 1327 } 1328 rcu_read_unlock(); 1329 1330 if (*ppos != 0) { 1331 /* No partial writes. */ 1332 return -EINVAL; 1333 } 1334 1335 rv = kstrtou32_from_user(buf, count, 10, &loginuid); 1336 if (rv < 0) 1337 return rv; 1338 1339 /* is userspace tring to explicitly UNSET the loginuid? */ 1340 if (loginuid == AUDIT_UID_UNSET) { 1341 kloginuid = INVALID_UID; 1342 } else { 1343 kloginuid = make_kuid(file->f_cred->user_ns, loginuid); 1344 if (!uid_valid(kloginuid)) 1345 return -EINVAL; 1346 } 1347 1348 rv = audit_set_loginuid(kloginuid); 1349 if (rv < 0) 1350 return rv; 1351 return count; 1352 } 1353 1354 static const struct file_operations proc_loginuid_operations = { 1355 .read = proc_loginuid_read, 1356 .write = proc_loginuid_write, 1357 .llseek = generic_file_llseek, 1358 }; 1359 1360 static ssize_t proc_sessionid_read(struct file * file, char __user * buf, 1361 size_t count, loff_t *ppos) 1362 { 1363 struct inode * inode = file_inode(file); 1364 struct task_struct *task = get_proc_task(inode); 1365 ssize_t length; 1366 char tmpbuf[TMPBUFLEN]; 1367 1368 if (!task) 1369 return -ESRCH; 1370 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1371 audit_get_sessionid(task)); 1372 put_task_struct(task); 1373 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1374 } 1375 1376 static const struct file_operations proc_sessionid_operations = { 1377 .read = proc_sessionid_read, 1378 .llseek = generic_file_llseek, 1379 }; 1380 #endif 1381 1382 #ifdef CONFIG_FAULT_INJECTION 1383 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, 1384 size_t count, loff_t *ppos) 1385 { 1386 struct task_struct *task = get_proc_task(file_inode(file)); 1387 char buffer[PROC_NUMBUF]; 1388 size_t len; 1389 int make_it_fail; 1390 1391 if (!task) 1392 return -ESRCH; 1393 make_it_fail = task->make_it_fail; 1394 put_task_struct(task); 1395 1396 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); 1397 1398 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1399 } 1400 1401 static ssize_t proc_fault_inject_write(struct file * file, 1402 const char __user * buf, size_t count, loff_t *ppos) 1403 { 1404 struct task_struct *task; 1405 char buffer[PROC_NUMBUF] = {}; 1406 int make_it_fail; 1407 int rv; 1408 1409 if (!capable(CAP_SYS_RESOURCE)) 1410 return -EPERM; 1411 1412 if (count > sizeof(buffer) - 1) 1413 count = sizeof(buffer) - 1; 1414 if (copy_from_user(buffer, buf, count)) 1415 return -EFAULT; 1416 rv = kstrtoint(strstrip(buffer), 0, &make_it_fail); 1417 if (rv < 0) 1418 return rv; 1419 if (make_it_fail < 0 || make_it_fail > 1) 1420 return -EINVAL; 1421 1422 task = get_proc_task(file_inode(file)); 1423 if (!task) 1424 return -ESRCH; 1425 task->make_it_fail = make_it_fail; 1426 put_task_struct(task); 1427 1428 return count; 1429 } 1430 1431 static const struct file_operations proc_fault_inject_operations = { 1432 .read = proc_fault_inject_read, 1433 .write = proc_fault_inject_write, 1434 .llseek = generic_file_llseek, 1435 }; 1436 1437 static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf, 1438 size_t count, loff_t *ppos) 1439 { 1440 struct task_struct *task; 1441 int err; 1442 unsigned int n; 1443 1444 err = kstrtouint_from_user(buf, count, 0, &n); 1445 if (err) 1446 return err; 1447 1448 task = get_proc_task(file_inode(file)); 1449 if (!task) 1450 return -ESRCH; 1451 task->fail_nth = n; 1452 put_task_struct(task); 1453 1454 return count; 1455 } 1456 1457 static ssize_t proc_fail_nth_read(struct file *file, char __user *buf, 1458 size_t count, loff_t *ppos) 1459 { 1460 struct task_struct *task; 1461 char numbuf[PROC_NUMBUF]; 1462 ssize_t len; 1463 1464 task = get_proc_task(file_inode(file)); 1465 if (!task) 1466 return -ESRCH; 1467 len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth); 1468 put_task_struct(task); 1469 return simple_read_from_buffer(buf, count, ppos, numbuf, len); 1470 } 1471 1472 static const struct file_operations proc_fail_nth_operations = { 1473 .read = proc_fail_nth_read, 1474 .write = proc_fail_nth_write, 1475 }; 1476 #endif 1477 1478 1479 /* 1480 * Print out various scheduling related per-task fields: 1481 */ 1482 static int sched_show(struct seq_file *m, void *v) 1483 { 1484 struct inode *inode = m->private; 1485 struct pid_namespace *ns = proc_pid_ns(inode->i_sb); 1486 struct task_struct *p; 1487 1488 p = get_proc_task(inode); 1489 if (!p) 1490 return -ESRCH; 1491 proc_sched_show_task(p, ns, m); 1492 1493 put_task_struct(p); 1494 1495 return 0; 1496 } 1497 1498 static ssize_t 1499 sched_write(struct file *file, const char __user *buf, 1500 size_t count, loff_t *offset) 1501 { 1502 struct inode *inode = file_inode(file); 1503 struct task_struct *p; 1504 1505 p = get_proc_task(inode); 1506 if (!p) 1507 return -ESRCH; 1508 proc_sched_set_task(p); 1509 1510 put_task_struct(p); 1511 1512 return count; 1513 } 1514 1515 static int sched_open(struct inode *inode, struct file *filp) 1516 { 1517 return single_open(filp, sched_show, inode); 1518 } 1519 1520 static const struct file_operations proc_pid_sched_operations = { 1521 .open = sched_open, 1522 .read = seq_read, 1523 .write = sched_write, 1524 .llseek = seq_lseek, 1525 .release = single_release, 1526 }; 1527 1528 #ifdef CONFIG_SCHED_AUTOGROUP 1529 /* 1530 * Print out autogroup related information: 1531 */ 1532 static int sched_autogroup_show(struct seq_file *m, void *v) 1533 { 1534 struct inode *inode = m->private; 1535 struct task_struct *p; 1536 1537 p = get_proc_task(inode); 1538 if (!p) 1539 return -ESRCH; 1540 proc_sched_autogroup_show_task(p, m); 1541 1542 put_task_struct(p); 1543 1544 return 0; 1545 } 1546 1547 static ssize_t 1548 sched_autogroup_write(struct file *file, const char __user *buf, 1549 size_t count, loff_t *offset) 1550 { 1551 struct inode *inode = file_inode(file); 1552 struct task_struct *p; 1553 char buffer[PROC_NUMBUF] = {}; 1554 int nice; 1555 int err; 1556 1557 if (count > sizeof(buffer) - 1) 1558 count = sizeof(buffer) - 1; 1559 if (copy_from_user(buffer, buf, count)) 1560 return -EFAULT; 1561 1562 err = kstrtoint(strstrip(buffer), 0, &nice); 1563 if (err < 0) 1564 return err; 1565 1566 p = get_proc_task(inode); 1567 if (!p) 1568 return -ESRCH; 1569 1570 err = proc_sched_autogroup_set_nice(p, nice); 1571 if (err) 1572 count = err; 1573 1574 put_task_struct(p); 1575 1576 return count; 1577 } 1578 1579 static int sched_autogroup_open(struct inode *inode, struct file *filp) 1580 { 1581 int ret; 1582 1583 ret = single_open(filp, sched_autogroup_show, NULL); 1584 if (!ret) { 1585 struct seq_file *m = filp->private_data; 1586 1587 m->private = inode; 1588 } 1589 return ret; 1590 } 1591 1592 static const struct file_operations proc_pid_sched_autogroup_operations = { 1593 .open = sched_autogroup_open, 1594 .read = seq_read, 1595 .write = sched_autogroup_write, 1596 .llseek = seq_lseek, 1597 .release = single_release, 1598 }; 1599 1600 #endif /* CONFIG_SCHED_AUTOGROUP */ 1601 1602 #ifdef CONFIG_TIME_NS 1603 static int timens_offsets_show(struct seq_file *m, void *v) 1604 { 1605 struct task_struct *p; 1606 1607 p = get_proc_task(file_inode(m->file)); 1608 if (!p) 1609 return -ESRCH; 1610 proc_timens_show_offsets(p, m); 1611 1612 put_task_struct(p); 1613 1614 return 0; 1615 } 1616 1617 static ssize_t timens_offsets_write(struct file *file, const char __user *buf, 1618 size_t count, loff_t *ppos) 1619 { 1620 struct inode *inode = file_inode(file); 1621 struct proc_timens_offset offsets[2]; 1622 char *kbuf = NULL, *pos, *next_line; 1623 struct task_struct *p; 1624 int ret, noffsets; 1625 1626 /* Only allow < page size writes at the beginning of the file */ 1627 if ((*ppos != 0) || (count >= PAGE_SIZE)) 1628 return -EINVAL; 1629 1630 /* Slurp in the user data */ 1631 kbuf = memdup_user_nul(buf, count); 1632 if (IS_ERR(kbuf)) 1633 return PTR_ERR(kbuf); 1634 1635 /* Parse the user data */ 1636 ret = -EINVAL; 1637 noffsets = 0; 1638 for (pos = kbuf; pos; pos = next_line) { 1639 struct proc_timens_offset *off = &offsets[noffsets]; 1640 char clock[10]; 1641 int err; 1642 1643 /* Find the end of line and ensure we don't look past it */ 1644 next_line = strchr(pos, '\n'); 1645 if (next_line) { 1646 *next_line = '\0'; 1647 next_line++; 1648 if (*next_line == '\0') 1649 next_line = NULL; 1650 } 1651 1652 err = sscanf(pos, "%9s %lld %lu", clock, 1653 &off->val.tv_sec, &off->val.tv_nsec); 1654 if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC) 1655 goto out; 1656 1657 clock[sizeof(clock) - 1] = 0; 1658 if (strcmp(clock, "monotonic") == 0 || 1659 strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0) 1660 off->clockid = CLOCK_MONOTONIC; 1661 else if (strcmp(clock, "boottime") == 0 || 1662 strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0) 1663 off->clockid = CLOCK_BOOTTIME; 1664 else 1665 goto out; 1666 1667 noffsets++; 1668 if (noffsets == ARRAY_SIZE(offsets)) { 1669 if (next_line) 1670 count = next_line - kbuf; 1671 break; 1672 } 1673 } 1674 1675 ret = -ESRCH; 1676 p = get_proc_task(inode); 1677 if (!p) 1678 goto out; 1679 ret = proc_timens_set_offset(file, p, offsets, noffsets); 1680 put_task_struct(p); 1681 if (ret) 1682 goto out; 1683 1684 ret = count; 1685 out: 1686 kfree(kbuf); 1687 return ret; 1688 } 1689 1690 static int timens_offsets_open(struct inode *inode, struct file *filp) 1691 { 1692 return single_open(filp, timens_offsets_show, inode); 1693 } 1694 1695 static const struct file_operations proc_timens_offsets_operations = { 1696 .open = timens_offsets_open, 1697 .read = seq_read, 1698 .write = timens_offsets_write, 1699 .llseek = seq_lseek, 1700 .release = single_release, 1701 }; 1702 #endif /* CONFIG_TIME_NS */ 1703 1704 static ssize_t comm_write(struct file *file, const char __user *buf, 1705 size_t count, loff_t *offset) 1706 { 1707 struct inode *inode = file_inode(file); 1708 struct task_struct *p; 1709 char buffer[TASK_COMM_LEN] = {}; 1710 const size_t maxlen = sizeof(buffer) - 1; 1711 1712 if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) 1713 return -EFAULT; 1714 1715 p = get_proc_task(inode); 1716 if (!p) 1717 return -ESRCH; 1718 1719 if (same_thread_group(current, p)) { 1720 set_task_comm(p, buffer); 1721 proc_comm_connector(p); 1722 } 1723 else 1724 count = -EINVAL; 1725 1726 put_task_struct(p); 1727 1728 return count; 1729 } 1730 1731 static int comm_show(struct seq_file *m, void *v) 1732 { 1733 struct inode *inode = m->private; 1734 struct task_struct *p; 1735 1736 p = get_proc_task(inode); 1737 if (!p) 1738 return -ESRCH; 1739 1740 proc_task_name(m, p, false); 1741 seq_putc(m, '\n'); 1742 1743 put_task_struct(p); 1744 1745 return 0; 1746 } 1747 1748 static int comm_open(struct inode *inode, struct file *filp) 1749 { 1750 return single_open(filp, comm_show, inode); 1751 } 1752 1753 static const struct file_operations proc_pid_set_comm_operations = { 1754 .open = comm_open, 1755 .read = seq_read, 1756 .write = comm_write, 1757 .llseek = seq_lseek, 1758 .release = single_release, 1759 }; 1760 1761 static int proc_exe_link(struct dentry *dentry, struct path *exe_path, 1762 struct task_struct *task) 1763 { 1764 struct file *exe_file; 1765 1766 exe_file = get_task_exe_file(task); 1767 if (exe_file) { 1768 *exe_path = exe_file->f_path; 1769 path_get(&exe_file->f_path); 1770 fput(exe_file); 1771 return 0; 1772 } else 1773 return -ENOENT; 1774 } 1775 1776 static int call_proc_get_link(struct dentry *dentry, struct inode *inode, struct path *path_out) 1777 { 1778 struct task_struct *task; 1779 int ret; 1780 1781 task = get_proc_task(inode); 1782 if (!task) 1783 return -ENOENT; 1784 ret = down_read_killable(&task->signal->exec_update_lock); 1785 if (ret) 1786 goto out_put_task; 1787 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { 1788 ret = -EACCES; 1789 goto out; 1790 } 1791 ret = PROC_I(inode)->op.proc_get_link(dentry, path_out, task); 1792 1793 out: 1794 up_read(&task->signal->exec_update_lock); 1795 out_put_task: 1796 put_task_struct(task); 1797 return ret; 1798 } 1799 1800 static const char *proc_pid_get_link(struct dentry *dentry, 1801 struct inode *inode, 1802 struct delayed_call *done) 1803 { 1804 struct path path; 1805 int error; 1806 1807 if (!dentry) 1808 return ERR_PTR(-ECHILD); 1809 error = call_proc_get_link(dentry, inode, &path); 1810 if (!error) 1811 error = nd_jump_link(&path); 1812 return ERR_PTR(error); 1813 } 1814 1815 static int do_proc_readlink(const struct path *path, char __user *buffer, int buflen) 1816 { 1817 char *tmp = kmalloc(PATH_MAX, GFP_KERNEL); 1818 char *pathname; 1819 int len; 1820 1821 if (!tmp) 1822 return -ENOMEM; 1823 1824 pathname = d_path(path, tmp, PATH_MAX); 1825 len = PTR_ERR(pathname); 1826 if (IS_ERR(pathname)) 1827 goto out; 1828 len = tmp + PATH_MAX - 1 - pathname; 1829 1830 if (len > buflen) 1831 len = buflen; 1832 if (copy_to_user(buffer, pathname, len)) 1833 len = -EFAULT; 1834 out: 1835 kfree(tmp); 1836 return len; 1837 } 1838 1839 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1840 { 1841 int error = -EACCES; 1842 struct inode *inode = d_inode(dentry); 1843 struct path path; 1844 1845 error = call_proc_get_link(dentry, inode, &path); 1846 if (!error) { 1847 error = do_proc_readlink(&path, buffer, buflen); 1848 path_put(&path); 1849 } 1850 return error; 1851 } 1852 1853 const struct inode_operations proc_pid_link_inode_operations = { 1854 .readlink = proc_pid_readlink, 1855 .get_link = proc_pid_get_link, 1856 .setattr = proc_nochmod_setattr, 1857 }; 1858 1859 1860 /* building an inode */ 1861 1862 void task_dump_owner(struct task_struct *task, umode_t mode, 1863 kuid_t *ruid, kgid_t *rgid) 1864 { 1865 /* Depending on the state of dumpable compute who should own a 1866 * proc file for a task. 1867 */ 1868 const struct cred *cred; 1869 kuid_t uid; 1870 kgid_t gid; 1871 1872 if (unlikely(task->flags & PF_KTHREAD)) { 1873 *ruid = GLOBAL_ROOT_UID; 1874 *rgid = GLOBAL_ROOT_GID; 1875 return; 1876 } 1877 1878 /* Default to the tasks effective ownership */ 1879 rcu_read_lock(); 1880 cred = __task_cred(task); 1881 uid = cred->euid; 1882 gid = cred->egid; 1883 1884 /* 1885 * Before the /proc/pid/status file was created the only way to read 1886 * the effective uid of a /process was to stat /proc/pid. Reading 1887 * /proc/pid/status is slow enough that procps and other packages 1888 * kept stating /proc/pid. To keep the rules in /proc simple I have 1889 * made this apply to all per process world readable and executable 1890 * directories. 1891 */ 1892 if (mode != (S_IFDIR | S_IRUGO | S_IXUGO)) { 1893 struct task_exec_state *exec_state; 1894 1895 exec_state = task_exec_state_rcu(task); 1896 if (READ_ONCE(exec_state->dumpable) != TASK_DUMPABLE_OWNER) { 1897 uid = make_kuid(exec_state->user_ns, 0); 1898 if (!uid_valid(uid)) 1899 uid = GLOBAL_ROOT_UID; 1900 1901 gid = make_kgid(exec_state->user_ns, 0); 1902 if (!gid_valid(gid)) 1903 gid = GLOBAL_ROOT_GID; 1904 } 1905 } 1906 rcu_read_unlock(); 1907 1908 *ruid = uid; 1909 *rgid = gid; 1910 } 1911 1912 void proc_pid_evict_inode(struct proc_inode *ei) 1913 { 1914 struct pid *pid = ei->pid; 1915 1916 if (S_ISDIR(ei->vfs_inode.i_mode)) { 1917 spin_lock(&pid->lock); 1918 hlist_del_init_rcu(&ei->sibling_inodes); 1919 spin_unlock(&pid->lock); 1920 } 1921 } 1922 1923 struct inode *proc_pid_make_inode(struct super_block *sb, 1924 struct task_struct *task, umode_t mode) 1925 { 1926 struct inode * inode; 1927 struct proc_inode *ei; 1928 struct pid *pid; 1929 1930 /* We need a new inode */ 1931 1932 inode = new_inode(sb); 1933 if (!inode) 1934 goto out; 1935 1936 /* Common stuff */ 1937 ei = PROC_I(inode); 1938 inode->i_mode = mode; 1939 inode->i_ino = get_next_ino(); 1940 simple_inode_init_ts(inode); 1941 inode->i_op = &proc_def_inode_operations; 1942 1943 /* 1944 * grab the reference to task. 1945 */ 1946 pid = get_task_pid(task, PIDTYPE_PID); 1947 if (!pid) 1948 goto out_unlock; 1949 1950 /* Let the pid remember us for quick removal */ 1951 ei->pid = pid; 1952 1953 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); 1954 security_task_to_inode(task, inode); 1955 1956 out: 1957 return inode; 1958 1959 out_unlock: 1960 iput(inode); 1961 return NULL; 1962 } 1963 1964 /* 1965 * Generating an inode and adding it into @pid->inodes, so that task will 1966 * invalidate inode's dentry before being released. 1967 * 1968 * This helper is used for creating dir-type entries under '/proc' and 1969 * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>' 1970 * can be released by invalidating '/proc/<tgid>' dentry. 1971 * In theory, dentries under '/proc/<tgid>/task' can also be released by 1972 * invalidating '/proc/<tgid>' dentry, we reserve it to handle single 1973 * thread exiting situation: Any one of threads should invalidate its 1974 * '/proc/<tgid>/task/<pid>' dentry before released. 1975 */ 1976 static struct inode *proc_pid_make_base_inode(struct super_block *sb, 1977 struct task_struct *task, umode_t mode) 1978 { 1979 struct inode *inode; 1980 struct proc_inode *ei; 1981 struct pid *pid; 1982 1983 inode = proc_pid_make_inode(sb, task, mode); 1984 if (!inode) 1985 return NULL; 1986 1987 /* Let proc_flush_pid find this directory inode */ 1988 ei = PROC_I(inode); 1989 pid = ei->pid; 1990 spin_lock(&pid->lock); 1991 hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); 1992 spin_unlock(&pid->lock); 1993 1994 return inode; 1995 } 1996 1997 int pid_getattr(struct mnt_idmap *idmap, const struct path *path, 1998 struct kstat *stat, u32 request_mask, unsigned int query_flags) 1999 { 2000 struct inode *inode = d_inode(path->dentry); 2001 struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); 2002 struct task_struct *task; 2003 2004 generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); 2005 2006 stat->uid = GLOBAL_ROOT_UID; 2007 stat->gid = GLOBAL_ROOT_GID; 2008 rcu_read_lock(); 2009 task = pid_task(proc_pid(inode), PIDTYPE_PID); 2010 if (task) { 2011 if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) { 2012 rcu_read_unlock(); 2013 /* 2014 * This doesn't prevent learning whether PID exists, 2015 * it only makes getattr() consistent with readdir(). 2016 */ 2017 return -ENOENT; 2018 } 2019 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid); 2020 } 2021 rcu_read_unlock(); 2022 return 0; 2023 } 2024 2025 /* dentry stuff */ 2026 2027 /* 2028 * Set <pid>/... inode ownership (can change due to setuid(), etc.) 2029 */ 2030 void pid_update_inode(struct task_struct *task, struct inode *inode) 2031 { 2032 task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); 2033 2034 inode->i_mode &= ~(S_ISUID | S_ISGID); 2035 security_task_to_inode(task, inode); 2036 } 2037 2038 /* 2039 * Rewrite the inode's ownerships here because the owning task may have 2040 * performed a setuid(), etc. 2041 * 2042 */ 2043 static int pid_revalidate(struct inode *dir, const struct qstr *name, 2044 struct dentry *dentry, unsigned int flags) 2045 { 2046 struct inode *inode; 2047 struct task_struct *task; 2048 int ret = 0; 2049 2050 rcu_read_lock(); 2051 inode = d_inode_rcu(dentry); 2052 if (!inode) 2053 goto out; 2054 task = pid_task(proc_pid(inode), PIDTYPE_PID); 2055 2056 if (task) { 2057 pid_update_inode(task, inode); 2058 ret = 1; 2059 } 2060 out: 2061 rcu_read_unlock(); 2062 return ret; 2063 } 2064 2065 static inline bool proc_inode_is_dead(struct inode *inode) 2066 { 2067 return !proc_pid(inode)->tasks[PIDTYPE_PID].first; 2068 } 2069 2070 int pid_delete_dentry(const struct dentry *dentry) 2071 { 2072 /* Is the task we represent dead? 2073 * If so, then don't put the dentry on the lru list, 2074 * kill it immediately. 2075 */ 2076 return proc_inode_is_dead(d_inode(dentry)); 2077 } 2078 2079 const struct dentry_operations pid_dentry_operations = 2080 { 2081 .d_revalidate = pid_revalidate, 2082 .d_delete = pid_delete_dentry, 2083 }; 2084 2085 /* Lookups */ 2086 2087 /* 2088 * Fill a directory entry. 2089 * 2090 * If possible create the dcache entry and derive our inode number and 2091 * file type from dcache entry. 2092 * 2093 * Since all of the proc inode numbers are dynamically generated, the inode 2094 * numbers do not exist until the inode is cache. This means creating 2095 * the dcache entry in readdir is necessary to keep the inode numbers 2096 * reported by readdir in sync with the inode numbers reported 2097 * by stat. 2098 */ 2099 bool proc_fill_cache(struct file *file, struct dir_context *ctx, 2100 const char *name, unsigned int len, 2101 instantiate_t instantiate, struct task_struct *task, const void *ptr) 2102 { 2103 struct dentry *child, *dir = file->f_path.dentry; 2104 struct qstr qname = QSTR_INIT(name, len); 2105 struct inode *inode; 2106 unsigned type = DT_UNKNOWN; 2107 ino_t ino = 1; 2108 2109 child = try_lookup_noperm(&qname, dir); 2110 if (IS_ERR(child)) 2111 goto end_instantiate; 2112 2113 if (!child) { 2114 child = d_alloc_parallel(dir, &qname); 2115 if (IS_ERR(child)) 2116 goto end_instantiate; 2117 if (d_in_lookup(child)) { 2118 struct dentry *res; 2119 res = instantiate(child, task, ptr); 2120 d_lookup_done(child); 2121 if (unlikely(res)) { 2122 dput(child); 2123 child = res; 2124 if (IS_ERR(child)) 2125 goto end_instantiate; 2126 } 2127 } 2128 } 2129 inode = d_inode(child); 2130 ino = inode->i_ino; 2131 type = inode->i_mode >> 12; 2132 dput(child); 2133 end_instantiate: 2134 return dir_emit(ctx, name, len, ino, type); 2135 } 2136 2137 /* 2138 * dname_to_vma_addr - maps a dentry name into two unsigned longs 2139 * which represent vma start and end addresses. 2140 */ 2141 static int dname_to_vma_addr(struct dentry *dentry, 2142 unsigned long *start, unsigned long *end) 2143 { 2144 const char *str = dentry->d_name.name; 2145 unsigned long long sval, eval; 2146 unsigned int len; 2147 2148 if (str[0] == '0' && str[1] != '-') 2149 return -EINVAL; 2150 len = _parse_integer(str, 16, &sval); 2151 if (len & KSTRTOX_OVERFLOW) 2152 return -EINVAL; 2153 if (sval != (unsigned long)sval) 2154 return -EINVAL; 2155 str += len; 2156 2157 if (*str != '-') 2158 return -EINVAL; 2159 str++; 2160 2161 if (str[0] == '0' && str[1]) 2162 return -EINVAL; 2163 len = _parse_integer(str, 16, &eval); 2164 if (len & KSTRTOX_OVERFLOW) 2165 return -EINVAL; 2166 if (eval != (unsigned long)eval) 2167 return -EINVAL; 2168 str += len; 2169 2170 if (*str != '\0') 2171 return -EINVAL; 2172 2173 *start = sval; 2174 *end = eval; 2175 2176 return 0; 2177 } 2178 2179 static int map_files_d_revalidate(struct inode *dir, const struct qstr *name, 2180 struct dentry *dentry, unsigned int flags) 2181 { 2182 unsigned long vm_start, vm_end; 2183 bool exact_vma_exists = false; 2184 struct mm_struct *mm = NULL; 2185 struct task_struct *task; 2186 struct inode *inode; 2187 int status = 0; 2188 2189 if (flags & LOOKUP_RCU) 2190 return -ECHILD; 2191 2192 inode = d_inode(dentry); 2193 task = get_proc_task(inode); 2194 if (!task) 2195 goto out_notask; 2196 2197 mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); 2198 if (IS_ERR(mm)) 2199 goto out; 2200 2201 if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { 2202 status = mmap_read_lock_killable(mm); 2203 if (!status) { 2204 exact_vma_exists = !!find_exact_vma(mm, vm_start, 2205 vm_end); 2206 mmap_read_unlock(mm); 2207 } 2208 } 2209 2210 mmput(mm); 2211 2212 if (exact_vma_exists) { 2213 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); 2214 2215 security_task_to_inode(task, inode); 2216 status = 1; 2217 } 2218 2219 out: 2220 put_task_struct(task); 2221 2222 out_notask: 2223 return status; 2224 } 2225 2226 static const struct dentry_operations tid_map_files_dentry_operations = { 2227 .d_revalidate = map_files_d_revalidate, 2228 .d_delete = pid_delete_dentry, 2229 }; 2230 2231 static int map_files_get_link(struct dentry *dentry, struct path *path, 2232 struct task_struct *task) 2233 { 2234 unsigned long vm_start, vm_end; 2235 struct vm_area_struct *vma; 2236 struct mm_struct *mm; 2237 int rc; 2238 2239 rc = -ENOENT; 2240 mm = get_task_mm(task); 2241 if (!mm) 2242 goto out; 2243 2244 rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); 2245 if (rc) 2246 goto out_mmput; 2247 2248 rc = mmap_read_lock_killable(mm); 2249 if (rc) 2250 goto out_mmput; 2251 2252 rc = -ENOENT; 2253 vma = find_exact_vma(mm, vm_start, vm_end); 2254 if (vma && vma->vm_file) { 2255 *path = *file_user_path(vma->vm_file); 2256 path_get(path); 2257 rc = 0; 2258 } 2259 mmap_read_unlock(mm); 2260 2261 out_mmput: 2262 mmput(mm); 2263 out: 2264 return rc; 2265 } 2266 2267 struct map_files_info { 2268 unsigned long start; 2269 unsigned long end; 2270 fmode_t mode; 2271 }; 2272 2273 /* 2274 * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due 2275 * to concerns about how the symlinks may be used to bypass permissions on 2276 * ancestor directories in the path to the file in question. 2277 */ 2278 static const char * 2279 proc_map_files_get_link(struct dentry *dentry, 2280 struct inode *inode, 2281 struct delayed_call *done) 2282 { 2283 if (!checkpoint_restore_ns_capable(&init_user_ns)) 2284 return ERR_PTR(-EPERM); 2285 2286 return proc_pid_get_link(dentry, inode, done); 2287 } 2288 2289 /* 2290 * Identical to proc_pid_link_inode_operations except for get_link() 2291 */ 2292 static const struct inode_operations proc_map_files_link_inode_operations = { 2293 .readlink = proc_pid_readlink, 2294 .get_link = proc_map_files_get_link, 2295 .setattr = proc_nochmod_setattr, 2296 }; 2297 2298 static struct dentry * 2299 proc_map_files_instantiate(struct dentry *dentry, 2300 struct task_struct *task, const void *ptr) 2301 { 2302 fmode_t mode = (fmode_t)(unsigned long)ptr; 2303 struct proc_inode *ei; 2304 struct inode *inode; 2305 2306 inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | 2307 ((mode & FMODE_READ ) ? S_IRUSR : 0) | 2308 ((mode & FMODE_WRITE) ? S_IWUSR : 0)); 2309 if (!inode) 2310 return ERR_PTR(-ENOENT); 2311 2312 ei = PROC_I(inode); 2313 ei->op.proc_get_link = map_files_get_link; 2314 2315 inode->i_op = &proc_map_files_link_inode_operations; 2316 inode->i_size = 64; 2317 2318 return proc_splice_unmountable(inode, dentry, 2319 &tid_map_files_dentry_operations); 2320 } 2321 2322 static struct dentry *proc_map_files_lookup(struct inode *dir, 2323 struct dentry *dentry, unsigned int flags) 2324 { 2325 unsigned long vm_start, vm_end; 2326 struct vm_area_struct *vma; 2327 struct task_struct *task; 2328 struct dentry *result; 2329 struct mm_struct *mm; 2330 2331 result = ERR_PTR(-ENOENT); 2332 task = get_proc_task(dir); 2333 if (!task) 2334 goto out; 2335 2336 result = ERR_PTR(-ENOENT); 2337 if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) 2338 goto out_put_task; 2339 2340 mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); 2341 if (IS_ERR(mm)) { 2342 result = ERR_CAST(mm); 2343 goto out_put_task; 2344 } 2345 2346 result = ERR_PTR(-EINTR); 2347 if (mmap_read_lock_killable(mm)) 2348 goto out_put_mm; 2349 2350 result = ERR_PTR(-ENOENT); 2351 vma = find_exact_vma(mm, vm_start, vm_end); 2352 if (!vma) 2353 goto out_no_vma; 2354 2355 if (vma->vm_file) 2356 result = proc_map_files_instantiate(dentry, task, 2357 (void *)(unsigned long)vma->vm_file->f_mode); 2358 2359 out_no_vma: 2360 mmap_read_unlock(mm); 2361 out_put_mm: 2362 mmput(mm); 2363 out_put_task: 2364 put_task_struct(task); 2365 out: 2366 return result; 2367 } 2368 2369 static const struct inode_operations proc_map_files_inode_operations = { 2370 .lookup = proc_map_files_lookup, 2371 .permission = proc_fd_permission, 2372 .setattr = proc_nochmod_setattr, 2373 }; 2374 2375 static int 2376 proc_map_files_readdir(struct file *file, struct dir_context *ctx) 2377 { 2378 struct vm_area_struct *vma; 2379 struct task_struct *task; 2380 struct mm_struct *mm; 2381 unsigned long nr_files, pos, i; 2382 GENRADIX(struct map_files_info) fa; 2383 struct map_files_info *p; 2384 int ret; 2385 struct vma_iterator vmi; 2386 2387 genradix_init(&fa); 2388 2389 ret = -ENOENT; 2390 task = get_proc_task(file_inode(file)); 2391 if (!task) 2392 goto out; 2393 2394 ret = 0; 2395 if (!dir_emit_dots(file, ctx)) 2396 goto out_put_task; 2397 2398 mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); 2399 if (IS_ERR(mm)) { 2400 ret = PTR_ERR(mm); 2401 /* if the task has no mm, the directory should just be empty */ 2402 if (ret == -ESRCH) 2403 ret = 0; 2404 goto out_put_task; 2405 } 2406 2407 ret = mmap_read_lock_killable(mm); 2408 if (ret) 2409 goto out_put_mm; 2410 2411 nr_files = 0; 2412 2413 /* 2414 * We need two passes here: 2415 * 2416 * 1) Collect vmas of mapped files with mmap_lock taken 2417 * 2) Release mmap_lock and instantiate entries 2418 * 2419 * otherwise we get lockdep complained, since filldir() 2420 * routine might require mmap_lock taken in might_fault(). 2421 */ 2422 2423 pos = 2; 2424 vma_iter_init(&vmi, mm, 0); 2425 for_each_vma(vmi, vma) { 2426 if (!vma->vm_file) 2427 continue; 2428 if (++pos <= ctx->pos) 2429 continue; 2430 2431 p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL); 2432 if (!p) { 2433 ret = -ENOMEM; 2434 mmap_read_unlock(mm); 2435 goto out_put_mm; 2436 } 2437 2438 p->start = vma->vm_start; 2439 p->end = vma->vm_end; 2440 p->mode = vma->vm_file->f_mode; 2441 } 2442 mmap_read_unlock(mm); 2443 2444 for (i = 0; i < nr_files; i++) { 2445 char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ 2446 unsigned int len; 2447 2448 p = genradix_ptr(&fa, i); 2449 len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end); 2450 if (!proc_fill_cache(file, ctx, 2451 buf, len, 2452 proc_map_files_instantiate, 2453 task, 2454 (void *)(unsigned long)p->mode)) 2455 break; 2456 ctx->pos++; 2457 } 2458 2459 out_put_mm: 2460 mmput(mm); 2461 out_put_task: 2462 put_task_struct(task); 2463 out: 2464 genradix_free(&fa); 2465 return ret; 2466 } 2467 2468 static const struct file_operations proc_map_files_operations = { 2469 .read = generic_read_dir, 2470 .iterate_shared = proc_map_files_readdir, 2471 .llseek = generic_file_llseek, 2472 }; 2473 2474 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) 2475 struct timers_private { 2476 struct pid *pid; 2477 struct task_struct *task; 2478 struct pid_namespace *ns; 2479 }; 2480 2481 static void *timers_start(struct seq_file *m, loff_t *pos) 2482 { 2483 struct timers_private *tp = m->private; 2484 2485 tp->task = get_pid_task(tp->pid, PIDTYPE_PID); 2486 if (!tp->task) 2487 return ERR_PTR(-ESRCH); 2488 2489 rcu_read_lock(); 2490 return seq_hlist_start_rcu(&tp->task->signal->posix_timers, *pos); 2491 } 2492 2493 static void *timers_next(struct seq_file *m, void *v, loff_t *pos) 2494 { 2495 struct timers_private *tp = m->private; 2496 2497 return seq_hlist_next_rcu(v, &tp->task->signal->posix_timers, pos); 2498 } 2499 2500 static void timers_stop(struct seq_file *m, void *v) 2501 { 2502 struct timers_private *tp = m->private; 2503 2504 if (tp->task) { 2505 put_task_struct(tp->task); 2506 tp->task = NULL; 2507 rcu_read_unlock(); 2508 } 2509 } 2510 2511 static int show_timer(struct seq_file *m, void *v) 2512 { 2513 static const char * const nstr[] = { 2514 [SIGEV_SIGNAL] = "signal", 2515 [SIGEV_NONE] = "none", 2516 [SIGEV_THREAD] = "thread", 2517 }; 2518 2519 struct k_itimer *timer = hlist_entry((struct hlist_node *)v, struct k_itimer, list); 2520 struct timers_private *tp = m->private; 2521 int notify = timer->it_sigev_notify; 2522 2523 guard(spinlock_irq)(&timer->it_lock); 2524 if (!posixtimer_valid(timer)) 2525 return 0; 2526 2527 seq_printf(m, "ID: %d\n", timer->it_id); 2528 seq_printf(m, "signal: %d/%px\n", timer->sigq.info.si_signo, 2529 timer->sigq.info.si_value.sival_ptr); 2530 seq_printf(m, "notify: %s/%s.%d\n", nstr[notify & ~SIGEV_THREAD_ID], 2531 (notify & SIGEV_THREAD_ID) ? "tid" : "pid", 2532 pid_nr_ns(timer->it_pid, tp->ns)); 2533 seq_printf(m, "ClockID: %d\n", timer->it_clock); 2534 2535 return 0; 2536 } 2537 2538 static const struct seq_operations proc_timers_seq_ops = { 2539 .start = timers_start, 2540 .next = timers_next, 2541 .stop = timers_stop, 2542 .show = show_timer, 2543 }; 2544 2545 static int proc_timers_open(struct inode *inode, struct file *file) 2546 { 2547 struct timers_private *tp; 2548 2549 tp = __seq_open_private(file, &proc_timers_seq_ops, 2550 sizeof(struct timers_private)); 2551 if (!tp) 2552 return -ENOMEM; 2553 2554 tp->pid = proc_pid(inode); 2555 tp->ns = proc_pid_ns(inode->i_sb); 2556 return 0; 2557 } 2558 2559 static const struct file_operations proc_timers_operations = { 2560 .open = proc_timers_open, 2561 .read = seq_read, 2562 .llseek = seq_lseek, 2563 .release = seq_release_private, 2564 }; 2565 #endif 2566 2567 static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, 2568 size_t count, loff_t *offset) 2569 { 2570 struct inode *inode = file_inode(file); 2571 struct task_struct *p; 2572 u64 slack_ns; 2573 int err; 2574 2575 err = kstrtoull_from_user(buf, count, 10, &slack_ns); 2576 if (err < 0) 2577 return err; 2578 2579 p = get_proc_task(inode); 2580 if (!p) 2581 return -ESRCH; 2582 2583 if (p != current) { 2584 rcu_read_lock(); 2585 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { 2586 rcu_read_unlock(); 2587 count = -EPERM; 2588 goto out; 2589 } 2590 rcu_read_unlock(); 2591 2592 err = security_task_setscheduler(p); 2593 if (err) { 2594 count = err; 2595 goto out; 2596 } 2597 } 2598 2599 task_lock(p); 2600 if (rt_or_dl_task_policy(p)) 2601 slack_ns = 0; 2602 else if (slack_ns == 0) 2603 slack_ns = p->default_timer_slack_ns; 2604 p->timer_slack_ns = slack_ns; 2605 task_unlock(p); 2606 2607 out: 2608 put_task_struct(p); 2609 2610 return count; 2611 } 2612 2613 static int timerslack_ns_show(struct seq_file *m, void *v) 2614 { 2615 struct inode *inode = m->private; 2616 struct task_struct *p; 2617 int err = 0; 2618 2619 p = get_proc_task(inode); 2620 if (!p) 2621 return -ESRCH; 2622 2623 if (p != current) { 2624 rcu_read_lock(); 2625 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { 2626 rcu_read_unlock(); 2627 err = -EPERM; 2628 goto out; 2629 } 2630 rcu_read_unlock(); 2631 2632 err = security_task_getscheduler(p); 2633 if (err) 2634 goto out; 2635 } 2636 2637 task_lock(p); 2638 seq_printf(m, "%llu\n", p->timer_slack_ns); 2639 task_unlock(p); 2640 2641 out: 2642 put_task_struct(p); 2643 2644 return err; 2645 } 2646 2647 static int timerslack_ns_open(struct inode *inode, struct file *filp) 2648 { 2649 return single_open(filp, timerslack_ns_show, inode); 2650 } 2651 2652 static const struct file_operations proc_pid_set_timerslack_ns_operations = { 2653 .open = timerslack_ns_open, 2654 .read = seq_read, 2655 .write = timerslack_ns_write, 2656 .llseek = seq_lseek, 2657 .release = single_release, 2658 }; 2659 2660 static struct dentry *proc_pident_instantiate(struct dentry *dentry, 2661 struct task_struct *task, const void *ptr) 2662 { 2663 const struct pid_entry *p = ptr; 2664 struct inode *inode; 2665 struct proc_inode *ei; 2666 2667 inode = proc_pid_make_inode(dentry->d_sb, task, p->mode); 2668 if (!inode) 2669 return ERR_PTR(-ENOENT); 2670 2671 ei = PROC_I(inode); 2672 if (S_ISDIR(inode->i_mode)) 2673 set_nlink(inode, 2); /* Use getattr to fix if necessary */ 2674 if (p->iop) 2675 inode->i_op = p->iop; 2676 if (p->fop) 2677 inode->i_fop = p->fop; 2678 ei->op = p->op; 2679 pid_update_inode(task, inode); 2680 return d_splice_alias_ops(inode, dentry, &pid_dentry_operations); 2681 } 2682 2683 static struct dentry *proc_pident_lookup(struct inode *dir, 2684 struct dentry *dentry, 2685 const struct pid_entry *p, 2686 const struct pid_entry *end) 2687 { 2688 struct task_struct *task = get_proc_task(dir); 2689 struct dentry *res = ERR_PTR(-ENOENT); 2690 2691 if (!task) 2692 goto out_no_task; 2693 2694 /* 2695 * Yes, it does not scale. And it should not. Don't add 2696 * new entries into /proc/<tgid>/ without very good reasons. 2697 */ 2698 for (; p < end; p++) { 2699 if (p->len != dentry->d_name.len) 2700 continue; 2701 if (!memcmp(dentry->d_name.name, p->name, p->len)) { 2702 res = proc_pident_instantiate(dentry, task, p); 2703 break; 2704 } 2705 } 2706 put_task_struct(task); 2707 out_no_task: 2708 return res; 2709 } 2710 2711 static int proc_pident_readdir(struct file *file, struct dir_context *ctx, 2712 const struct pid_entry *ents, unsigned int nents) 2713 { 2714 struct task_struct *task = get_proc_task(file_inode(file)); 2715 const struct pid_entry *p; 2716 2717 if (!task) 2718 return -ENOENT; 2719 2720 if (!dir_emit_dots(file, ctx)) 2721 goto out; 2722 2723 if (ctx->pos >= nents + 2) 2724 goto out; 2725 2726 for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { 2727 if (!proc_fill_cache(file, ctx, p->name, p->len, 2728 proc_pident_instantiate, task, p)) 2729 break; 2730 ctx->pos++; 2731 } 2732 out: 2733 put_task_struct(task); 2734 return 0; 2735 } 2736 2737 #ifdef CONFIG_SECURITY 2738 static int proc_pid_attr_open(struct inode *inode, struct file *file) 2739 { 2740 file->private_data = NULL; 2741 __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); 2742 return 0; 2743 } 2744 2745 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 2746 size_t count, loff_t *ppos) 2747 { 2748 struct inode * inode = file_inode(file); 2749 char *p = NULL; 2750 ssize_t length; 2751 struct task_struct *task = get_proc_task(inode); 2752 2753 if (!task) 2754 return -ESRCH; 2755 2756 length = security_getprocattr(task, PROC_I(inode)->op.lsmid, 2757 file->f_path.dentry->d_name.name, 2758 &p); 2759 put_task_struct(task); 2760 if (length > 0) 2761 length = simple_read_from_buffer(buf, count, ppos, p, length); 2762 kfree(p); 2763 return length; 2764 } 2765 2766 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 2767 size_t count, loff_t *ppos) 2768 { 2769 struct inode * inode = file_inode(file); 2770 struct task_struct *task; 2771 void *page; 2772 int rv; 2773 2774 /* A task may only write when it was the opener. */ 2775 if (file->private_data != current->mm) 2776 return -EPERM; 2777 2778 rcu_read_lock(); 2779 task = pid_task(proc_pid(inode), PIDTYPE_PID); 2780 if (!task) { 2781 rcu_read_unlock(); 2782 return -ESRCH; 2783 } 2784 /* A task may only write its own attributes. */ 2785 if (current != task) { 2786 rcu_read_unlock(); 2787 return -EACCES; 2788 } 2789 /* Prevent changes to overridden credentials. */ 2790 if (current_cred() != current_real_cred()) { 2791 rcu_read_unlock(); 2792 return -EBUSY; 2793 } 2794 rcu_read_unlock(); 2795 2796 if (count > PAGE_SIZE) 2797 count = PAGE_SIZE; 2798 2799 /* No partial writes. */ 2800 if (*ppos != 0) 2801 return -EINVAL; 2802 2803 page = memdup_user(buf, count); 2804 if (IS_ERR(page)) { 2805 rv = PTR_ERR(page); 2806 goto out; 2807 } 2808 2809 /* Guard against adverse ptrace interaction */ 2810 rv = mutex_lock_interruptible(¤t->signal->cred_guard_mutex); 2811 if (rv < 0) 2812 goto out_free; 2813 2814 rv = security_setprocattr(PROC_I(inode)->op.lsmid, 2815 file->f_path.dentry->d_name.name, page, 2816 count); 2817 mutex_unlock(¤t->signal->cred_guard_mutex); 2818 out_free: 2819 kfree(page); 2820 out: 2821 return rv; 2822 } 2823 2824 static const struct file_operations proc_pid_attr_operations = { 2825 .open = proc_pid_attr_open, 2826 .read = proc_pid_attr_read, 2827 .write = proc_pid_attr_write, 2828 .llseek = generic_file_llseek, 2829 .release = mem_release, 2830 }; 2831 2832 #define LSM_DIR_OPS(LSM) \ 2833 static int proc_##LSM##_attr_dir_iterate(struct file *filp, \ 2834 struct dir_context *ctx) \ 2835 { \ 2836 return proc_pident_readdir(filp, ctx, \ 2837 LSM##_attr_dir_stuff, \ 2838 ARRAY_SIZE(LSM##_attr_dir_stuff)); \ 2839 } \ 2840 \ 2841 static const struct file_operations proc_##LSM##_attr_dir_ops = { \ 2842 .read = generic_read_dir, \ 2843 .iterate_shared = proc_##LSM##_attr_dir_iterate, \ 2844 .llseek = default_llseek, \ 2845 }; \ 2846 \ 2847 static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \ 2848 struct dentry *dentry, unsigned int flags) \ 2849 { \ 2850 return proc_pident_lookup(dir, dentry, \ 2851 LSM##_attr_dir_stuff, \ 2852 LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \ 2853 } \ 2854 \ 2855 static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \ 2856 .lookup = proc_##LSM##_attr_dir_lookup, \ 2857 .getattr = pid_getattr, \ 2858 .setattr = proc_nochmod_setattr, \ 2859 } 2860 2861 #ifdef CONFIG_SECURITY_SMACK 2862 static const struct pid_entry smack_attr_dir_stuff[] = { 2863 ATTR(LSM_ID_SMACK, "current", 0666), 2864 }; 2865 LSM_DIR_OPS(smack); 2866 #endif 2867 2868 #ifdef CONFIG_SECURITY_APPARMOR 2869 static const struct pid_entry apparmor_attr_dir_stuff[] = { 2870 ATTR(LSM_ID_APPARMOR, "current", 0666), 2871 ATTR(LSM_ID_APPARMOR, "prev", 0444), 2872 ATTR(LSM_ID_APPARMOR, "exec", 0666), 2873 }; 2874 LSM_DIR_OPS(apparmor); 2875 #endif 2876 2877 static const struct pid_entry attr_dir_stuff[] = { 2878 ATTR(LSM_ID_UNDEF, "current", 0666), 2879 ATTR(LSM_ID_UNDEF, "prev", 0444), 2880 ATTR(LSM_ID_UNDEF, "exec", 0666), 2881 ATTR(LSM_ID_UNDEF, "fscreate", 0666), 2882 ATTR(LSM_ID_UNDEF, "keycreate", 0666), 2883 ATTR(LSM_ID_UNDEF, "sockcreate", 0666), 2884 #ifdef CONFIG_SECURITY_SMACK 2885 DIR("smack", 0555, 2886 proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops), 2887 #endif 2888 #ifdef CONFIG_SECURITY_APPARMOR 2889 DIR("apparmor", 0555, 2890 proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops), 2891 #endif 2892 }; 2893 2894 static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) 2895 { 2896 return proc_pident_readdir(file, ctx, 2897 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2898 } 2899 2900 static const struct file_operations proc_attr_dir_operations = { 2901 .read = generic_read_dir, 2902 .iterate_shared = proc_attr_dir_readdir, 2903 .llseek = generic_file_llseek, 2904 }; 2905 2906 static struct dentry *proc_attr_dir_lookup(struct inode *dir, 2907 struct dentry *dentry, unsigned int flags) 2908 { 2909 return proc_pident_lookup(dir, dentry, 2910 attr_dir_stuff, 2911 attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff)); 2912 } 2913 2914 static const struct inode_operations proc_attr_dir_inode_operations = { 2915 .lookup = proc_attr_dir_lookup, 2916 .getattr = pid_getattr, 2917 .setattr = proc_nochmod_setattr, 2918 }; 2919 2920 #endif 2921 2922 #ifdef CONFIG_ELF_CORE 2923 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2924 size_t count, loff_t *ppos) 2925 { 2926 struct task_struct *task = get_proc_task(file_inode(file)); 2927 struct mm_struct *mm; 2928 char buffer[PROC_NUMBUF]; 2929 size_t len; 2930 int ret; 2931 2932 if (!task) 2933 return -ESRCH; 2934 2935 ret = 0; 2936 mm = get_task_mm(task); 2937 if (mm) { 2938 unsigned long flags = __mm_flags_get_word(mm); 2939 2940 len = snprintf(buffer, sizeof(buffer), "%08lx\n", 2941 ((flags & MMF_DUMP_FILTER_MASK) >> 2942 MMF_DUMP_FILTER_SHIFT)); 2943 mmput(mm); 2944 ret = simple_read_from_buffer(buf, count, ppos, buffer, len); 2945 } 2946 2947 put_task_struct(task); 2948 2949 return ret; 2950 } 2951 2952 static ssize_t proc_coredump_filter_write(struct file *file, 2953 const char __user *buf, 2954 size_t count, 2955 loff_t *ppos) 2956 { 2957 struct task_struct *task; 2958 struct mm_struct *mm; 2959 unsigned int val; 2960 int ret; 2961 int i; 2962 unsigned long mask; 2963 2964 ret = kstrtouint_from_user(buf, count, 0, &val); 2965 if (ret < 0) 2966 return ret; 2967 2968 ret = -ESRCH; 2969 task = get_proc_task(file_inode(file)); 2970 if (!task) 2971 goto out_no_task; 2972 2973 mm = get_task_mm(task); 2974 if (!mm) 2975 goto out_no_mm; 2976 ret = 0; 2977 2978 for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { 2979 if (val & mask) 2980 mm_flags_set(i + MMF_DUMP_FILTER_SHIFT, mm); 2981 else 2982 mm_flags_clear(i + MMF_DUMP_FILTER_SHIFT, mm); 2983 } 2984 2985 mmput(mm); 2986 out_no_mm: 2987 put_task_struct(task); 2988 out_no_task: 2989 if (ret < 0) 2990 return ret; 2991 return count; 2992 } 2993 2994 static const struct file_operations proc_coredump_filter_operations = { 2995 .read = proc_coredump_filter_read, 2996 .write = proc_coredump_filter_write, 2997 .llseek = generic_file_llseek, 2998 }; 2999 #endif 3000 3001 #ifdef CONFIG_TASK_IO_ACCOUNTING 3002 static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole) 3003 { 3004 struct task_io_accounting acct; 3005 int result; 3006 3007 result = down_read_killable(&task->signal->exec_update_lock); 3008 if (result) 3009 return result; 3010 3011 if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { 3012 result = -EACCES; 3013 goto out_unlock; 3014 } 3015 3016 if (whole) { 3017 struct signal_struct *sig = task->signal; 3018 struct task_struct *t; 3019 3020 guard(rcu)(); 3021 scoped_seqlock_read (&sig->stats_lock, ss_lock_irqsave) { 3022 acct = sig->ioac; 3023 __for_each_thread(sig, t) 3024 task_io_accounting_add(&acct, &t->ioac); 3025 3026 } 3027 } else { 3028 acct = task->ioac; 3029 } 3030 3031 seq_printf(m, 3032 "rchar: %llu\n" 3033 "wchar: %llu\n" 3034 "syscr: %llu\n" 3035 "syscw: %llu\n" 3036 "read_bytes: %llu\n" 3037 "write_bytes: %llu\n" 3038 "cancelled_write_bytes: %llu\n", 3039 (unsigned long long)acct.rchar, 3040 (unsigned long long)acct.wchar, 3041 (unsigned long long)acct.syscr, 3042 (unsigned long long)acct.syscw, 3043 (unsigned long long)acct.read_bytes, 3044 (unsigned long long)acct.write_bytes, 3045 (unsigned long long)acct.cancelled_write_bytes); 3046 result = 0; 3047 3048 out_unlock: 3049 up_read(&task->signal->exec_update_lock); 3050 return result; 3051 } 3052 3053 static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns, 3054 struct pid *pid, struct task_struct *task) 3055 { 3056 return do_io_accounting(task, m, 0); 3057 } 3058 3059 static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns, 3060 struct pid *pid, struct task_struct *task) 3061 { 3062 return do_io_accounting(task, m, 1); 3063 } 3064 #endif /* CONFIG_TASK_IO_ACCOUNTING */ 3065 3066 #ifdef CONFIG_USER_NS 3067 static int proc_id_map_open(struct inode *inode, struct file *file, 3068 const struct seq_operations *seq_ops) 3069 { 3070 struct user_namespace *ns = NULL; 3071 struct task_struct *task; 3072 struct seq_file *seq; 3073 int ret = -EINVAL; 3074 3075 task = get_proc_task(inode); 3076 if (task) { 3077 rcu_read_lock(); 3078 ns = get_user_ns(task_cred_xxx(task, user_ns)); 3079 rcu_read_unlock(); 3080 put_task_struct(task); 3081 } 3082 if (!ns) 3083 goto err; 3084 3085 ret = seq_open(file, seq_ops); 3086 if (ret) 3087 goto err_put_ns; 3088 3089 seq = file->private_data; 3090 seq->private = ns; 3091 3092 return 0; 3093 err_put_ns: 3094 put_user_ns(ns); 3095 err: 3096 return ret; 3097 } 3098 3099 static int proc_id_map_release(struct inode *inode, struct file *file) 3100 { 3101 struct seq_file *seq = file->private_data; 3102 struct user_namespace *ns = seq->private; 3103 put_user_ns(ns); 3104 return seq_release(inode, file); 3105 } 3106 3107 static int proc_uid_map_open(struct inode *inode, struct file *file) 3108 { 3109 return proc_id_map_open(inode, file, &proc_uid_seq_operations); 3110 } 3111 3112 static int proc_gid_map_open(struct inode *inode, struct file *file) 3113 { 3114 return proc_id_map_open(inode, file, &proc_gid_seq_operations); 3115 } 3116 3117 static int proc_projid_map_open(struct inode *inode, struct file *file) 3118 { 3119 return proc_id_map_open(inode, file, &proc_projid_seq_operations); 3120 } 3121 3122 static const struct file_operations proc_uid_map_operations = { 3123 .open = proc_uid_map_open, 3124 .write = proc_uid_map_write, 3125 .read = seq_read, 3126 .llseek = seq_lseek, 3127 .release = proc_id_map_release, 3128 }; 3129 3130 static const struct file_operations proc_gid_map_operations = { 3131 .open = proc_gid_map_open, 3132 .write = proc_gid_map_write, 3133 .read = seq_read, 3134 .llseek = seq_lseek, 3135 .release = proc_id_map_release, 3136 }; 3137 3138 static const struct file_operations proc_projid_map_operations = { 3139 .open = proc_projid_map_open, 3140 .write = proc_projid_map_write, 3141 .read = seq_read, 3142 .llseek = seq_lseek, 3143 .release = proc_id_map_release, 3144 }; 3145 3146 static int proc_setgroups_open(struct inode *inode, struct file *file) 3147 { 3148 struct user_namespace *ns = NULL; 3149 struct task_struct *task; 3150 int ret; 3151 3152 ret = -ESRCH; 3153 task = get_proc_task(inode); 3154 if (task) { 3155 rcu_read_lock(); 3156 ns = get_user_ns(task_cred_xxx(task, user_ns)); 3157 rcu_read_unlock(); 3158 put_task_struct(task); 3159 } 3160 if (!ns) 3161 goto err; 3162 3163 if (file->f_mode & FMODE_WRITE) { 3164 ret = -EACCES; 3165 if (!ns_capable(ns, CAP_SYS_ADMIN)) 3166 goto err_put_ns; 3167 } 3168 3169 ret = single_open(file, &proc_setgroups_show, ns); 3170 if (ret) 3171 goto err_put_ns; 3172 3173 return 0; 3174 err_put_ns: 3175 put_user_ns(ns); 3176 err: 3177 return ret; 3178 } 3179 3180 static int proc_setgroups_release(struct inode *inode, struct file *file) 3181 { 3182 struct seq_file *seq = file->private_data; 3183 struct user_namespace *ns = seq->private; 3184 int ret = single_release(inode, file); 3185 put_user_ns(ns); 3186 return ret; 3187 } 3188 3189 static const struct file_operations proc_setgroups_operations = { 3190 .open = proc_setgroups_open, 3191 .write = proc_setgroups_write, 3192 .read = seq_read, 3193 .llseek = seq_lseek, 3194 .release = proc_setgroups_release, 3195 }; 3196 #endif /* CONFIG_USER_NS */ 3197 3198 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 3199 struct pid *pid, struct task_struct *task) 3200 { 3201 int err = lock_trace(task); 3202 if (!err) { 3203 seq_printf(m, "%08x\n", task->personality); 3204 unlock_trace(task); 3205 } 3206 return err; 3207 } 3208 3209 #ifdef CONFIG_LIVEPATCH 3210 static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns, 3211 struct pid *pid, struct task_struct *task) 3212 { 3213 seq_printf(m, "%d\n", task->patch_state); 3214 return 0; 3215 } 3216 #endif /* CONFIG_LIVEPATCH */ 3217 3218 #ifdef CONFIG_KSM 3219 static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace *ns, 3220 struct pid *pid, struct task_struct *task) 3221 { 3222 struct mm_struct *mm; 3223 3224 mm = get_task_mm(task); 3225 if (mm) { 3226 seq_printf(m, "%lu\n", mm->ksm_merging_pages); 3227 mmput(mm); 3228 } 3229 3230 return 0; 3231 } 3232 static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns, 3233 struct pid *pid, struct task_struct *task) 3234 { 3235 struct mm_struct *mm; 3236 int ret = 0; 3237 3238 mm = get_task_mm(task); 3239 if (mm) { 3240 seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items); 3241 seq_printf(m, "ksm_zero_pages %ld\n", mm_ksm_zero_pages(mm)); 3242 seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages); 3243 seq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm)); 3244 seq_printf(m, "ksm_merge_any: %s\n", 3245 mm_flags_test(MMF_VM_MERGE_ANY, mm) ? "yes" : "no"); 3246 ret = mmap_read_lock_killable(mm); 3247 if (ret) { 3248 mmput(mm); 3249 return ret; 3250 } 3251 seq_printf(m, "ksm_mergeable: %s\n", 3252 ksm_process_mergeable(mm) ? "yes" : "no"); 3253 mmap_read_unlock(mm); 3254 mmput(mm); 3255 } 3256 3257 return 0; 3258 } 3259 #endif /* CONFIG_KSM */ 3260 3261 #ifdef CONFIG_KSTACK_ERASE_METRICS 3262 static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, 3263 struct pid *pid, struct task_struct *task) 3264 { 3265 unsigned long prev_depth = THREAD_SIZE - 3266 (task->prev_lowest_stack & (THREAD_SIZE - 1)); 3267 unsigned long depth = THREAD_SIZE - 3268 (task->lowest_stack & (THREAD_SIZE - 1)); 3269 3270 seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n", 3271 prev_depth, depth); 3272 return 0; 3273 } 3274 #endif /* CONFIG_KSTACK_ERASE_METRICS */ 3275 3276 /* 3277 * Thread groups 3278 */ 3279 static const struct file_operations proc_task_operations; 3280 static const struct inode_operations proc_task_inode_operations; 3281 3282 static const struct pid_entry tgid_base_stuff[] = { 3283 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 3284 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3285 DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), 3286 DIR("fdinfo", S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3287 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 3288 #ifdef CONFIG_NET 3289 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 3290 #endif 3291 REG("environ", S_IRUSR, proc_environ_operations), 3292 REG("auxv", S_IRUSR, proc_auxv_operations), 3293 ONE("status", S_IRUGO, proc_pid_status), 3294 ONE("personality", S_IRUSR, proc_pid_personality), 3295 ONE("limits", S_IRUGO, proc_pid_limits), 3296 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3297 #ifdef CONFIG_SCHED_AUTOGROUP 3298 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), 3299 #endif 3300 #ifdef CONFIG_TIME_NS 3301 REG("timens_offsets", S_IRUGO|S_IWUSR, proc_timens_offsets_operations), 3302 #endif 3303 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 3304 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3305 ONE("syscall", S_IRUSR, proc_pid_syscall), 3306 #endif 3307 REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), 3308 ONE("stat", S_IRUGO, proc_tgid_stat), 3309 ONE("statm", S_IRUGO, proc_pid_statm), 3310 REG("maps", S_IRUGO, proc_pid_maps_operations), 3311 #ifdef CONFIG_NUMA 3312 REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), 3313 #endif 3314 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 3315 LNK("cwd", proc_cwd_link), 3316 LNK("root", proc_root_link), 3317 LNK("exe", proc_exe_link), 3318 REG("mounts", S_IRUGO, proc_mounts_operations), 3319 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 3320 REG("mountstats", S_IRUSR, proc_mountstats_operations), 3321 #ifdef CONFIG_PROC_PAGE_MONITOR 3322 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3323 REG("smaps", S_IRUGO, proc_pid_smaps_operations), 3324 REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), 3325 REG("pagemap", S_IRUSR, proc_pagemap_operations), 3326 #endif 3327 #ifdef CONFIG_SECURITY 3328 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3329 #endif 3330 #ifdef CONFIG_KALLSYMS 3331 ONE("wchan", S_IRUGO, proc_pid_wchan), 3332 #endif 3333 #ifdef CONFIG_STACKTRACE 3334 ONE("stack", S_IRUSR, proc_pid_stack), 3335 #endif 3336 #ifdef CONFIG_SCHED_INFO 3337 ONE("schedstat", S_IRUGO, proc_pid_schedstat), 3338 #endif 3339 #ifdef CONFIG_LATENCYTOP 3340 REG("latency", S_IRUGO, proc_lstats_operations), 3341 #endif 3342 #ifdef CONFIG_PROC_PID_CPUSET 3343 ONE("cpuset", S_IRUGO, proc_cpuset_show), 3344 #endif 3345 #ifdef CONFIG_CGROUPS 3346 ONE("cgroup", S_IRUGO, proc_cgroup_show), 3347 #endif 3348 #ifdef CONFIG_PROC_CPU_RESCTRL 3349 ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show), 3350 #endif 3351 ONE("oom_score", S_IRUGO, proc_oom_score), 3352 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 3353 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3354 #ifdef CONFIG_AUDIT 3355 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3356 REG("sessionid", S_IRUGO, proc_sessionid_operations), 3357 #endif 3358 #ifdef CONFIG_FAULT_INJECTION 3359 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3360 REG("fail-nth", 0644, proc_fail_nth_operations), 3361 #endif 3362 #ifdef CONFIG_ELF_CORE 3363 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 3364 #endif 3365 #ifdef CONFIG_TASK_IO_ACCOUNTING 3366 ONE("io", S_IRUSR, proc_tgid_io_accounting), 3367 #endif 3368 #ifdef CONFIG_USER_NS 3369 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 3370 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 3371 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 3372 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), 3373 #endif 3374 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) 3375 REG("timers", S_IRUGO, proc_timers_operations), 3376 #endif 3377 REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), 3378 #ifdef CONFIG_LIVEPATCH 3379 ONE("patch_state", S_IRUSR, proc_pid_patch_state), 3380 #endif 3381 #ifdef CONFIG_KSTACK_ERASE_METRICS 3382 ONE("stack_depth", S_IRUGO, proc_stack_depth), 3383 #endif 3384 #ifdef CONFIG_PROC_PID_ARCH_STATUS 3385 ONE("arch_status", S_IRUGO, proc_pid_arch_status), 3386 #endif 3387 #ifdef CONFIG_SECCOMP_CACHE_DEBUG 3388 ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), 3389 #endif 3390 #ifdef CONFIG_KSM 3391 ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), 3392 ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), 3393 #endif 3394 }; 3395 3396 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) 3397 { 3398 return proc_pident_readdir(file, ctx, 3399 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 3400 } 3401 3402 static const struct file_operations proc_tgid_base_operations = { 3403 .read = generic_read_dir, 3404 .iterate_shared = proc_tgid_base_readdir, 3405 .llseek = generic_file_llseek, 3406 }; 3407 3408 struct pid *tgid_pidfd_to_pid(const struct file *file) 3409 { 3410 if (file->f_op != &proc_tgid_base_operations) 3411 return ERR_PTR(-EBADF); 3412 3413 return proc_pid(file_inode(file)); 3414 } 3415 3416 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 3417 { 3418 return proc_pident_lookup(dir, dentry, 3419 tgid_base_stuff, 3420 tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff)); 3421 } 3422 3423 static const struct inode_operations proc_tgid_base_inode_operations = { 3424 .lookup = proc_tgid_base_lookup, 3425 .getattr = pid_getattr, 3426 .setattr = proc_nochmod_setattr, 3427 .permission = proc_pid_permission, 3428 }; 3429 3430 /** 3431 * proc_flush_pid - Remove dcache entries for @pid from the /proc dcache. 3432 * @pid: pid that should be flushed. 3433 * 3434 * This function walks a list of inodes (that belong to any proc 3435 * filesystem) that are attached to the pid and flushes them from 3436 * the dentry cache. 3437 * 3438 * It is safe and reasonable to cache /proc entries for a task until 3439 * that task exits. After that they just clog up the dcache with 3440 * useless entries, possibly causing useful dcache entries to be 3441 * flushed instead. This routine is provided to flush those useless 3442 * dcache entries when a process is reaped. 3443 * 3444 * NOTE: This routine is just an optimization so it does not guarantee 3445 * that no dcache entries will exist after a process is reaped 3446 * it just makes it very unlikely that any will persist. 3447 */ 3448 3449 void proc_flush_pid(struct pid *pid) 3450 { 3451 proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock); 3452 } 3453 3454 static struct dentry *proc_pid_instantiate(struct dentry * dentry, 3455 struct task_struct *task, const void *ptr) 3456 { 3457 struct inode *inode; 3458 3459 inode = proc_pid_make_base_inode(dentry->d_sb, task, 3460 S_IFDIR | S_IRUGO | S_IXUGO); 3461 if (!inode) 3462 return ERR_PTR(-ENOENT); 3463 3464 inode->i_op = &proc_tgid_base_inode_operations; 3465 inode->i_fop = &proc_tgid_base_operations; 3466 inode->i_flags|=S_IMMUTABLE; 3467 3468 set_nlink(inode, nlink_tgid); 3469 pid_update_inode(task, inode); 3470 3471 return d_splice_alias_ops(inode, dentry, &pid_dentry_operations); 3472 } 3473 3474 struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags) 3475 { 3476 struct task_struct *task; 3477 unsigned tgid; 3478 struct proc_fs_info *fs_info; 3479 struct pid_namespace *ns; 3480 struct dentry *result = ERR_PTR(-ENOENT); 3481 3482 tgid = name_to_int(&dentry->d_name); 3483 if (tgid == ~0U) 3484 goto out; 3485 3486 fs_info = proc_sb_info(dentry->d_sb); 3487 ns = fs_info->pid_ns; 3488 rcu_read_lock(); 3489 task = find_task_by_pid_ns(tgid, ns); 3490 if (task) 3491 get_task_struct(task); 3492 rcu_read_unlock(); 3493 if (!task) 3494 goto out; 3495 3496 /* Limit procfs to only ptraceable tasks */ 3497 if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) { 3498 if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS)) 3499 goto out_put_task; 3500 } 3501 3502 result = proc_pid_instantiate(dentry, task, NULL); 3503 out_put_task: 3504 put_task_struct(task); 3505 out: 3506 return result; 3507 } 3508 3509 /* 3510 * Find the first task with tgid >= tgid 3511 * 3512 */ 3513 struct tgid_iter { 3514 unsigned int tgid; 3515 struct task_struct *task; 3516 }; 3517 static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) 3518 { 3519 struct pid *pid; 3520 3521 if (iter.task) 3522 put_task_struct(iter.task); 3523 rcu_read_lock(); 3524 retry: 3525 iter.task = NULL; 3526 pid = find_ge_pid(iter.tgid, ns); 3527 if (pid) { 3528 iter.tgid = pid_nr_ns(pid, ns); 3529 iter.task = pid_task(pid, PIDTYPE_TGID); 3530 if (!iter.task) { 3531 iter.tgid += 1; 3532 goto retry; 3533 } 3534 get_task_struct(iter.task); 3535 } 3536 rcu_read_unlock(); 3537 return iter; 3538 } 3539 3540 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2) 3541 3542 /* for the /proc/ directory itself, after non-process stuff has been done */ 3543 int proc_pid_readdir(struct file *file, struct dir_context *ctx) 3544 { 3545 struct tgid_iter iter; 3546 struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb); 3547 struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb); 3548 loff_t pos = ctx->pos; 3549 3550 if (pos >= PID_MAX_LIMIT + TGID_OFFSET) 3551 return 0; 3552 3553 if (pos == TGID_OFFSET - 2) { 3554 if (!dir_emit(ctx, "self", 4, self_inum, DT_LNK)) 3555 return 0; 3556 ctx->pos = pos = pos + 1; 3557 } 3558 if (pos == TGID_OFFSET - 1) { 3559 if (!dir_emit(ctx, "thread-self", 11, thread_self_inum, DT_LNK)) 3560 return 0; 3561 ctx->pos = pos = pos + 1; 3562 } 3563 iter.tgid = pos - TGID_OFFSET; 3564 iter.task = NULL; 3565 for (iter = next_tgid(ns, iter); 3566 iter.task; 3567 iter.tgid += 1, iter = next_tgid(ns, iter)) { 3568 char name[10 + 1]; 3569 unsigned int len; 3570 3571 cond_resched(); 3572 if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE)) 3573 continue; 3574 3575 len = snprintf(name, sizeof(name), "%u", iter.tgid); 3576 ctx->pos = iter.tgid + TGID_OFFSET; 3577 if (!proc_fill_cache(file, ctx, name, len, 3578 proc_pid_instantiate, iter.task, NULL)) { 3579 put_task_struct(iter.task); 3580 return 0; 3581 } 3582 } 3583 ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; 3584 return 0; 3585 } 3586 3587 /* 3588 * proc_tid_comm_permission is a special permission function exclusively 3589 * used for the node /proc/<pid>/task/<tid>/comm. 3590 * It bypasses generic permission checks in the case where a task of the same 3591 * task group attempts to access the node. 3592 * The rationale behind this is that glibc and bionic access this node for 3593 * cross thread naming (pthread_set/getname_np(!self)). However, if 3594 * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0, 3595 * which locks out the cross thread naming implementation. 3596 * This function makes sure that the node is always accessible for members of 3597 * same thread group. 3598 */ 3599 static int proc_tid_comm_permission(struct mnt_idmap *idmap, 3600 struct inode *inode, int mask) 3601 { 3602 bool is_same_tgroup; 3603 struct task_struct *task; 3604 3605 task = get_proc_task(inode); 3606 if (!task) 3607 return -ESRCH; 3608 is_same_tgroup = same_thread_group(current, task); 3609 put_task_struct(task); 3610 3611 if (likely(is_same_tgroup && !(mask & MAY_EXEC))) { 3612 /* This file (/proc/<pid>/task/<tid>/comm) can always be 3613 * read or written by the members of the corresponding 3614 * thread group. 3615 */ 3616 return 0; 3617 } 3618 3619 return generic_permission(&nop_mnt_idmap, inode, mask); 3620 } 3621 3622 static const struct inode_operations proc_tid_comm_inode_operations = { 3623 .setattr = proc_nochmod_setattr, 3624 .permission = proc_tid_comm_permission, 3625 }; 3626 3627 /* 3628 * Tasks 3629 */ 3630 static const struct pid_entry tid_base_stuff[] = { 3631 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3632 DIR("fdinfo", S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3633 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 3634 #ifdef CONFIG_NET 3635 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 3636 #endif 3637 REG("environ", S_IRUSR, proc_environ_operations), 3638 REG("auxv", S_IRUSR, proc_auxv_operations), 3639 ONE("status", S_IRUGO, proc_pid_status), 3640 ONE("personality", S_IRUSR, proc_pid_personality), 3641 ONE("limits", S_IRUGO, proc_pid_limits), 3642 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3643 NOD("comm", S_IFREG|S_IRUGO|S_IWUSR, 3644 &proc_tid_comm_inode_operations, 3645 &proc_pid_set_comm_operations, {}), 3646 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3647 ONE("syscall", S_IRUSR, proc_pid_syscall), 3648 #endif 3649 REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), 3650 ONE("stat", S_IRUGO, proc_tid_stat), 3651 ONE("statm", S_IRUGO, proc_pid_statm), 3652 REG("maps", S_IRUGO, proc_pid_maps_operations), 3653 #ifdef CONFIG_PROC_CHILDREN 3654 REG("children", S_IRUGO, proc_tid_children_operations), 3655 #endif 3656 #ifdef CONFIG_NUMA 3657 REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), 3658 #endif 3659 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 3660 LNK("cwd", proc_cwd_link), 3661 LNK("root", proc_root_link), 3662 LNK("exe", proc_exe_link), 3663 REG("mounts", S_IRUGO, proc_mounts_operations), 3664 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 3665 #ifdef CONFIG_PROC_PAGE_MONITOR 3666 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3667 REG("smaps", S_IRUGO, proc_pid_smaps_operations), 3668 REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), 3669 REG("pagemap", S_IRUSR, proc_pagemap_operations), 3670 #endif 3671 #ifdef CONFIG_SECURITY 3672 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3673 #endif 3674 #ifdef CONFIG_KALLSYMS 3675 ONE("wchan", S_IRUGO, proc_pid_wchan), 3676 #endif 3677 #ifdef CONFIG_STACKTRACE 3678 ONE("stack", S_IRUSR, proc_pid_stack), 3679 #endif 3680 #ifdef CONFIG_SCHED_INFO 3681 ONE("schedstat", S_IRUGO, proc_pid_schedstat), 3682 #endif 3683 #ifdef CONFIG_LATENCYTOP 3684 REG("latency", S_IRUGO, proc_lstats_operations), 3685 #endif 3686 #ifdef CONFIG_PROC_PID_CPUSET 3687 ONE("cpuset", S_IRUGO, proc_cpuset_show), 3688 #endif 3689 #ifdef CONFIG_CGROUPS 3690 ONE("cgroup", S_IRUGO, proc_cgroup_show), 3691 #endif 3692 #ifdef CONFIG_PROC_CPU_RESCTRL 3693 ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show), 3694 #endif 3695 ONE("oom_score", S_IRUGO, proc_oom_score), 3696 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), 3697 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3698 #ifdef CONFIG_AUDIT 3699 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3700 REG("sessionid", S_IRUGO, proc_sessionid_operations), 3701 #endif 3702 #ifdef CONFIG_FAULT_INJECTION 3703 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3704 REG("fail-nth", 0644, proc_fail_nth_operations), 3705 #endif 3706 #ifdef CONFIG_TASK_IO_ACCOUNTING 3707 ONE("io", S_IRUSR, proc_tid_io_accounting), 3708 #endif 3709 #ifdef CONFIG_USER_NS 3710 REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), 3711 REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), 3712 REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), 3713 REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), 3714 #endif 3715 #ifdef CONFIG_LIVEPATCH 3716 ONE("patch_state", S_IRUSR, proc_pid_patch_state), 3717 #endif 3718 #ifdef CONFIG_PROC_PID_ARCH_STATUS 3719 ONE("arch_status", S_IRUGO, proc_pid_arch_status), 3720 #endif 3721 #ifdef CONFIG_SECCOMP_CACHE_DEBUG 3722 ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), 3723 #endif 3724 #ifdef CONFIG_KSM 3725 ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), 3726 ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), 3727 #endif 3728 }; 3729 3730 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) 3731 { 3732 return proc_pident_readdir(file, ctx, 3733 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3734 } 3735 3736 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 3737 { 3738 return proc_pident_lookup(dir, dentry, 3739 tid_base_stuff, 3740 tid_base_stuff + ARRAY_SIZE(tid_base_stuff)); 3741 } 3742 3743 static const struct file_operations proc_tid_base_operations = { 3744 .read = generic_read_dir, 3745 .iterate_shared = proc_tid_base_readdir, 3746 .llseek = generic_file_llseek, 3747 }; 3748 3749 static const struct inode_operations proc_tid_base_inode_operations = { 3750 .lookup = proc_tid_base_lookup, 3751 .getattr = pid_getattr, 3752 .setattr = proc_nochmod_setattr, 3753 }; 3754 3755 static struct dentry *proc_task_instantiate(struct dentry *dentry, 3756 struct task_struct *task, const void *ptr) 3757 { 3758 struct inode *inode; 3759 inode = proc_pid_make_base_inode(dentry->d_sb, task, 3760 S_IFDIR | S_IRUGO | S_IXUGO); 3761 if (!inode) 3762 return ERR_PTR(-ENOENT); 3763 3764 inode->i_op = &proc_tid_base_inode_operations; 3765 inode->i_fop = &proc_tid_base_operations; 3766 inode->i_flags |= S_IMMUTABLE; 3767 3768 set_nlink(inode, nlink_tid); 3769 pid_update_inode(task, inode); 3770 3771 return d_splice_alias_ops(inode, dentry, &pid_dentry_operations); 3772 } 3773 3774 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) 3775 { 3776 struct task_struct *task; 3777 struct task_struct *leader = get_proc_task(dir); 3778 unsigned tid; 3779 struct proc_fs_info *fs_info; 3780 struct pid_namespace *ns; 3781 struct dentry *result = ERR_PTR(-ENOENT); 3782 3783 if (!leader) 3784 goto out_no_task; 3785 3786 tid = name_to_int(&dentry->d_name); 3787 if (tid == ~0U) 3788 goto out; 3789 3790 fs_info = proc_sb_info(dentry->d_sb); 3791 ns = fs_info->pid_ns; 3792 rcu_read_lock(); 3793 task = find_task_by_pid_ns(tid, ns); 3794 if (task) 3795 get_task_struct(task); 3796 rcu_read_unlock(); 3797 if (!task) 3798 goto out; 3799 if (!same_thread_group(leader, task)) 3800 goto out_drop_task; 3801 3802 result = proc_task_instantiate(dentry, task, NULL); 3803 out_drop_task: 3804 put_task_struct(task); 3805 out: 3806 put_task_struct(leader); 3807 out_no_task: 3808 return result; 3809 } 3810 3811 /* 3812 * Find the first tid of a thread group to return to user space. 3813 * 3814 * Usually this is just the thread group leader, but if the users 3815 * buffer was too small or there was a seek into the middle of the 3816 * directory we have more work todo. 3817 * 3818 * In the case of a short read we start with find_task_by_pid. 3819 * 3820 * In the case of a seek we start with the leader and walk nr 3821 * threads past it. 3822 */ 3823 static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, 3824 struct pid_namespace *ns) 3825 { 3826 struct task_struct *pos, *task; 3827 unsigned long nr = f_pos; 3828 3829 if (nr != f_pos) /* 32bit overflow? */ 3830 return NULL; 3831 3832 rcu_read_lock(); 3833 task = pid_task(pid, PIDTYPE_PID); 3834 if (!task) 3835 goto fail; 3836 3837 /* Attempt to start with the tid of a thread */ 3838 if (tid && nr) { 3839 pos = find_task_by_pid_ns(tid, ns); 3840 if (pos && same_thread_group(pos, task)) 3841 goto found; 3842 } 3843 3844 /* If nr exceeds the number of threads there is nothing todo */ 3845 if (nr >= get_nr_threads(task)) 3846 goto fail; 3847 3848 /* If we haven't found our starting place yet start 3849 * with the leader and walk nr threads forward. 3850 */ 3851 for_each_thread(task, pos) { 3852 if (!nr--) 3853 goto found; 3854 } 3855 fail: 3856 pos = NULL; 3857 goto out; 3858 found: 3859 get_task_struct(pos); 3860 out: 3861 rcu_read_unlock(); 3862 return pos; 3863 } 3864 3865 /* 3866 * Find the next thread in the thread list. 3867 * Return NULL if there is an error or no next thread. 3868 * 3869 * The reference to the input task_struct is released. 3870 */ 3871 static struct task_struct *next_tid(struct task_struct *start) 3872 { 3873 struct task_struct *pos = NULL; 3874 rcu_read_lock(); 3875 if (pid_alive(start)) { 3876 pos = __next_thread(start); 3877 if (pos) 3878 get_task_struct(pos); 3879 } 3880 rcu_read_unlock(); 3881 put_task_struct(start); 3882 return pos; 3883 } 3884 3885 /* for the /proc/TGID/task/ directories */ 3886 static int proc_task_readdir(struct file *file, struct dir_context *ctx) 3887 { 3888 struct inode *inode = file_inode(file); 3889 struct task_struct *task; 3890 struct pid_namespace *ns; 3891 int tid; 3892 3893 if (proc_inode_is_dead(inode)) 3894 return -ENOENT; 3895 3896 if (!dir_emit_dots(file, ctx)) 3897 return 0; 3898 3899 /* We cache the tgid value that the last readdir call couldn't 3900 * return and lseek resets it to 0. 3901 */ 3902 ns = proc_pid_ns(inode->i_sb); 3903 tid = (int)(intptr_t)file->private_data; 3904 file->private_data = NULL; 3905 for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); 3906 task; 3907 task = next_tid(task), ctx->pos++) { 3908 char name[10 + 1]; 3909 unsigned int len; 3910 3911 tid = task_pid_nr_ns(task, ns); 3912 if (!tid) 3913 continue; /* The task has just exited. */ 3914 len = snprintf(name, sizeof(name), "%d", tid); 3915 if (!proc_fill_cache(file, ctx, name, len, 3916 proc_task_instantiate, task, NULL)) { 3917 /* returning this tgid failed, save it as the first 3918 * pid for the next readir call */ 3919 file->private_data = (void *)(intptr_t)tid; 3920 put_task_struct(task); 3921 break; 3922 } 3923 } 3924 3925 return 0; 3926 } 3927 3928 static int proc_task_getattr(struct mnt_idmap *idmap, 3929 const struct path *path, struct kstat *stat, 3930 u32 request_mask, unsigned int query_flags) 3931 { 3932 struct inode *inode = d_inode(path->dentry); 3933 struct task_struct *p = get_proc_task(inode); 3934 generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); 3935 3936 if (p) { 3937 stat->nlink += get_nr_threads(p); 3938 put_task_struct(p); 3939 } 3940 3941 return 0; 3942 } 3943 3944 /* 3945 * proc_task_readdir() set @file->private_data to a positive integer 3946 * value, so casting that to u64 is safe. generic_llseek_cookie() will 3947 * set @cookie to 0, so casting to an int is safe. The WARN_ON_ONCE() is 3948 * here to catch any unexpected change in behavior either in 3949 * proc_task_readdir() or generic_llseek_cookie(). 3950 */ 3951 static loff_t proc_dir_llseek(struct file *file, loff_t offset, int whence) 3952 { 3953 u64 cookie = (u64)(intptr_t)file->private_data; 3954 loff_t off; 3955 3956 off = generic_llseek_cookie(file, offset, whence, &cookie); 3957 WARN_ON_ONCE(cookie > INT_MAX); 3958 file->private_data = (void *)(intptr_t)cookie; /* serialized by f_pos_lock */ 3959 return off; 3960 } 3961 3962 static const struct inode_operations proc_task_inode_operations = { 3963 .lookup = proc_task_lookup, 3964 .getattr = proc_task_getattr, 3965 .setattr = proc_nochmod_setattr, 3966 .permission = proc_pid_permission, 3967 }; 3968 3969 static const struct file_operations proc_task_operations = { 3970 .read = generic_read_dir, 3971 .iterate_shared = proc_task_readdir, 3972 .llseek = proc_dir_llseek, 3973 }; 3974 3975 void __init set_proc_pid_nlink(void) 3976 { 3977 nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3978 nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 3979 } 3980