1 /* 2 * linux/fs/proc/base.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * proc base directory handling functions 7 * 8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 9 * Instead of using magical inumbers to determine the kind of object 10 * we allocate and fill in-core inodes upon lookup. They don't even 11 * go into icache. We cache the reference to task_struct upon lookup too. 12 * Eventually it should become a filesystem in its own. We don't use the 13 * rest of procfs anymore. 14 * 15 * 16 * Changelog: 17 * 17-Jan-2005 18 * Allan Bezerra 19 * Bruna Moreira <bruna.moreira@indt.org.br> 20 * Edjard Mota <edjard.mota@indt.org.br> 21 * Ilias Biris <ilias.biris@indt.org.br> 22 * Mauricio Lin <mauricio.lin@indt.org.br> 23 * 24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 25 * 26 * A new process specific entry (smaps) included in /proc. It shows the 27 * size of rss for each memory area. The maps entry lacks information 28 * about physical memory size (rss) for each mapped file, i.e., 29 * rss information for executables and library files. 30 * This additional information is useful for any tools that need to know 31 * about physical memory consumption for a process specific library. 32 * 33 * Changelog: 34 * 21-Feb-2005 35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 36 * Pud inclusion in the page table walking. 37 * 38 * ChangeLog: 39 * 10-Mar-2005 40 * 10LE Instituto Nokia de Tecnologia - INdT: 41 * A better way to walks through the page table as suggested by Hugh Dickins. 42 * 43 * Simo Piiroinen <simo.piiroinen@nokia.com>: 44 * Smaps information related to shared, private, clean and dirty pages. 45 * 46 * Paul Mundt <paul.mundt@nokia.com>: 47 * Overall revision about smaps. 48 */ 49 50 #include <asm/uaccess.h> 51 52 #include <linux/errno.h> 53 #include <linux/time.h> 54 #include <linux/proc_fs.h> 55 #include <linux/stat.h> 56 #include <linux/task_io_accounting_ops.h> 57 #include <linux/init.h> 58 #include <linux/capability.h> 59 #include <linux/file.h> 60 #include <linux/fdtable.h> 61 #include <linux/string.h> 62 #include <linux/seq_file.h> 63 #include <linux/namei.h> 64 #include <linux/mnt_namespace.h> 65 #include <linux/mm.h> 66 #include <linux/swap.h> 67 #include <linux/rcupdate.h> 68 #include <linux/kallsyms.h> 69 #include <linux/stacktrace.h> 70 #include <linux/resource.h> 71 #include <linux/module.h> 72 #include <linux/mount.h> 73 #include <linux/security.h> 74 #include <linux/ptrace.h> 75 #include <linux/tracehook.h> 76 #include <linux/cgroup.h> 77 #include <linux/cpuset.h> 78 #include <linux/audit.h> 79 #include <linux/poll.h> 80 #include <linux/nsproxy.h> 81 #include <linux/oom.h> 82 #include <linux/elf.h> 83 #include <linux/pid_namespace.h> 84 #include <linux/fs_struct.h> 85 #include <linux/slab.h> 86 #include "internal.h" 87 88 /* NOTE: 89 * Implementing inode permission operations in /proc is almost 90 * certainly an error. Permission checks need to happen during 91 * each system call not at open time. The reason is that most of 92 * what we wish to check for permissions in /proc varies at runtime. 93 * 94 * The classic example of a problem is opening file descriptors 95 * in /proc for a task before it execs a suid executable. 96 */ 97 98 struct pid_entry { 99 char *name; 100 int len; 101 mode_t mode; 102 const struct inode_operations *iop; 103 const struct file_operations *fop; 104 union proc_op op; 105 }; 106 107 #define NOD(NAME, MODE, IOP, FOP, OP) { \ 108 .name = (NAME), \ 109 .len = sizeof(NAME) - 1, \ 110 .mode = MODE, \ 111 .iop = IOP, \ 112 .fop = FOP, \ 113 .op = OP, \ 114 } 115 116 #define DIR(NAME, MODE, iops, fops) \ 117 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) 118 #define LNK(NAME, get_link) \ 119 NOD(NAME, (S_IFLNK|S_IRWXUGO), \ 120 &proc_pid_link_inode_operations, NULL, \ 121 { .proc_get_link = get_link } ) 122 #define REG(NAME, MODE, fops) \ 123 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) 124 #define INF(NAME, MODE, read) \ 125 NOD(NAME, (S_IFREG|(MODE)), \ 126 NULL, &proc_info_file_operations, \ 127 { .proc_read = read } ) 128 #define ONE(NAME, MODE, show) \ 129 NOD(NAME, (S_IFREG|(MODE)), \ 130 NULL, &proc_single_file_operations, \ 131 { .proc_show = show } ) 132 133 /* 134 * Count the number of hardlinks for the pid_entry table, excluding the . 135 * and .. links. 136 */ 137 static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, 138 unsigned int n) 139 { 140 unsigned int i; 141 unsigned int count; 142 143 count = 0; 144 for (i = 0; i < n; ++i) { 145 if (S_ISDIR(entries[i].mode)) 146 ++count; 147 } 148 149 return count; 150 } 151 152 static int get_task_root(struct task_struct *task, struct path *root) 153 { 154 int result = -ENOENT; 155 156 task_lock(task); 157 if (task->fs) { 158 get_fs_root(task->fs, root); 159 result = 0; 160 } 161 task_unlock(task); 162 return result; 163 } 164 165 static int proc_cwd_link(struct inode *inode, struct path *path) 166 { 167 struct task_struct *task = get_proc_task(inode); 168 int result = -ENOENT; 169 170 if (task) { 171 task_lock(task); 172 if (task->fs) { 173 get_fs_pwd(task->fs, path); 174 result = 0; 175 } 176 task_unlock(task); 177 put_task_struct(task); 178 } 179 return result; 180 } 181 182 static int proc_root_link(struct inode *inode, struct path *path) 183 { 184 struct task_struct *task = get_proc_task(inode); 185 int result = -ENOENT; 186 187 if (task) { 188 result = get_task_root(task, path); 189 put_task_struct(task); 190 } 191 return result; 192 } 193 194 static struct mm_struct *__check_mem_permission(struct task_struct *task) 195 { 196 struct mm_struct *mm; 197 198 mm = get_task_mm(task); 199 if (!mm) 200 return ERR_PTR(-EINVAL); 201 202 /* 203 * A task can always look at itself, in case it chooses 204 * to use system calls instead of load instructions. 205 */ 206 if (task == current) 207 return mm; 208 209 /* 210 * If current is actively ptrace'ing, and would also be 211 * permitted to freshly attach with ptrace now, permit it. 212 */ 213 if (task_is_stopped_or_traced(task)) { 214 int match; 215 rcu_read_lock(); 216 match = (tracehook_tracer_task(task) == current); 217 rcu_read_unlock(); 218 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) 219 return mm; 220 } 221 222 /* 223 * No one else is allowed. 224 */ 225 mmput(mm); 226 return ERR_PTR(-EPERM); 227 } 228 229 /* 230 * If current may access user memory in @task return a reference to the 231 * corresponding mm, otherwise ERR_PTR. 232 */ 233 static struct mm_struct *check_mem_permission(struct task_struct *task) 234 { 235 struct mm_struct *mm; 236 int err; 237 238 /* 239 * Avoid racing if task exec's as we might get a new mm but validate 240 * against old credentials. 241 */ 242 err = mutex_lock_killable(&task->signal->cred_guard_mutex); 243 if (err) 244 return ERR_PTR(err); 245 246 mm = __check_mem_permission(task); 247 mutex_unlock(&task->signal->cred_guard_mutex); 248 249 return mm; 250 } 251 252 struct mm_struct *mm_for_maps(struct task_struct *task) 253 { 254 struct mm_struct *mm; 255 int err; 256 257 err = mutex_lock_killable(&task->signal->cred_guard_mutex); 258 if (err) 259 return ERR_PTR(err); 260 261 mm = get_task_mm(task); 262 if (mm && mm != current->mm && 263 !ptrace_may_access(task, PTRACE_MODE_READ)) { 264 mmput(mm); 265 mm = ERR_PTR(-EACCES); 266 } 267 mutex_unlock(&task->signal->cred_guard_mutex); 268 269 return mm; 270 } 271 272 static int proc_pid_cmdline(struct task_struct *task, char * buffer) 273 { 274 int res = 0; 275 unsigned int len; 276 struct mm_struct *mm = get_task_mm(task); 277 if (!mm) 278 goto out; 279 if (!mm->arg_end) 280 goto out_mm; /* Shh! No looking before we're done */ 281 282 len = mm->arg_end - mm->arg_start; 283 284 if (len > PAGE_SIZE) 285 len = PAGE_SIZE; 286 287 res = access_process_vm(task, mm->arg_start, buffer, len, 0); 288 289 // If the nul at the end of args has been overwritten, then 290 // assume application is using setproctitle(3). 291 if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { 292 len = strnlen(buffer, res); 293 if (len < res) { 294 res = len; 295 } else { 296 len = mm->env_end - mm->env_start; 297 if (len > PAGE_SIZE - res) 298 len = PAGE_SIZE - res; 299 res += access_process_vm(task, mm->env_start, buffer+res, len, 0); 300 res = strnlen(buffer, res); 301 } 302 } 303 out_mm: 304 mmput(mm); 305 out: 306 return res; 307 } 308 309 static int proc_pid_auxv(struct task_struct *task, char *buffer) 310 { 311 struct mm_struct *mm = mm_for_maps(task); 312 int res = PTR_ERR(mm); 313 if (mm && !IS_ERR(mm)) { 314 unsigned int nwords = 0; 315 do { 316 nwords += 2; 317 } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 318 res = nwords * sizeof(mm->saved_auxv[0]); 319 if (res > PAGE_SIZE) 320 res = PAGE_SIZE; 321 memcpy(buffer, mm->saved_auxv, res); 322 mmput(mm); 323 } 324 return res; 325 } 326 327 328 #ifdef CONFIG_KALLSYMS 329 /* 330 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 331 * Returns the resolved symbol. If that fails, simply return the address. 332 */ 333 static int proc_pid_wchan(struct task_struct *task, char *buffer) 334 { 335 unsigned long wchan; 336 char symname[KSYM_NAME_LEN]; 337 338 wchan = get_wchan(task); 339 340 if (lookup_symbol_name(wchan, symname) < 0) 341 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 342 return 0; 343 else 344 return sprintf(buffer, "%lu", wchan); 345 else 346 return sprintf(buffer, "%s", symname); 347 } 348 #endif /* CONFIG_KALLSYMS */ 349 350 static int lock_trace(struct task_struct *task) 351 { 352 int err = mutex_lock_killable(&task->signal->cred_guard_mutex); 353 if (err) 354 return err; 355 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) { 356 mutex_unlock(&task->signal->cred_guard_mutex); 357 return -EPERM; 358 } 359 return 0; 360 } 361 362 static void unlock_trace(struct task_struct *task) 363 { 364 mutex_unlock(&task->signal->cred_guard_mutex); 365 } 366 367 #ifdef CONFIG_STACKTRACE 368 369 #define MAX_STACK_TRACE_DEPTH 64 370 371 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, 372 struct pid *pid, struct task_struct *task) 373 { 374 struct stack_trace trace; 375 unsigned long *entries; 376 int err; 377 int i; 378 379 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); 380 if (!entries) 381 return -ENOMEM; 382 383 trace.nr_entries = 0; 384 trace.max_entries = MAX_STACK_TRACE_DEPTH; 385 trace.entries = entries; 386 trace.skip = 0; 387 388 err = lock_trace(task); 389 if (!err) { 390 save_stack_trace_tsk(task, &trace); 391 392 for (i = 0; i < trace.nr_entries; i++) { 393 seq_printf(m, "[<%pK>] %pS\n", 394 (void *)entries[i], (void *)entries[i]); 395 } 396 unlock_trace(task); 397 } 398 kfree(entries); 399 400 return err; 401 } 402 #endif 403 404 #ifdef CONFIG_SCHEDSTATS 405 /* 406 * Provides /proc/PID/schedstat 407 */ 408 static int proc_pid_schedstat(struct task_struct *task, char *buffer) 409 { 410 return sprintf(buffer, "%llu %llu %lu\n", 411 (unsigned long long)task->se.sum_exec_runtime, 412 (unsigned long long)task->sched_info.run_delay, 413 task->sched_info.pcount); 414 } 415 #endif 416 417 #ifdef CONFIG_LATENCYTOP 418 static int lstats_show_proc(struct seq_file *m, void *v) 419 { 420 int i; 421 struct inode *inode = m->private; 422 struct task_struct *task = get_proc_task(inode); 423 424 if (!task) 425 return -ESRCH; 426 seq_puts(m, "Latency Top version : v0.1\n"); 427 for (i = 0; i < 32; i++) { 428 struct latency_record *lr = &task->latency_record[i]; 429 if (lr->backtrace[0]) { 430 int q; 431 seq_printf(m, "%i %li %li", 432 lr->count, lr->time, lr->max); 433 for (q = 0; q < LT_BACKTRACEDEPTH; q++) { 434 unsigned long bt = lr->backtrace[q]; 435 if (!bt) 436 break; 437 if (bt == ULONG_MAX) 438 break; 439 seq_printf(m, " %ps", (void *)bt); 440 } 441 seq_putc(m, '\n'); 442 } 443 444 } 445 put_task_struct(task); 446 return 0; 447 } 448 449 static int lstats_open(struct inode *inode, struct file *file) 450 { 451 return single_open(file, lstats_show_proc, inode); 452 } 453 454 static ssize_t lstats_write(struct file *file, const char __user *buf, 455 size_t count, loff_t *offs) 456 { 457 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 458 459 if (!task) 460 return -ESRCH; 461 clear_all_latency_tracing(task); 462 put_task_struct(task); 463 464 return count; 465 } 466 467 static const struct file_operations proc_lstats_operations = { 468 .open = lstats_open, 469 .read = seq_read, 470 .write = lstats_write, 471 .llseek = seq_lseek, 472 .release = single_release, 473 }; 474 475 #endif 476 477 static int proc_oom_score(struct task_struct *task, char *buffer) 478 { 479 unsigned long points = 0; 480 481 read_lock(&tasklist_lock); 482 if (pid_alive(task)) 483 points = oom_badness(task, NULL, NULL, 484 totalram_pages + total_swap_pages); 485 read_unlock(&tasklist_lock); 486 return sprintf(buffer, "%lu\n", points); 487 } 488 489 struct limit_names { 490 char *name; 491 char *unit; 492 }; 493 494 static const struct limit_names lnames[RLIM_NLIMITS] = { 495 [RLIMIT_CPU] = {"Max cpu time", "seconds"}, 496 [RLIMIT_FSIZE] = {"Max file size", "bytes"}, 497 [RLIMIT_DATA] = {"Max data size", "bytes"}, 498 [RLIMIT_STACK] = {"Max stack size", "bytes"}, 499 [RLIMIT_CORE] = {"Max core file size", "bytes"}, 500 [RLIMIT_RSS] = {"Max resident set", "bytes"}, 501 [RLIMIT_NPROC] = {"Max processes", "processes"}, 502 [RLIMIT_NOFILE] = {"Max open files", "files"}, 503 [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, 504 [RLIMIT_AS] = {"Max address space", "bytes"}, 505 [RLIMIT_LOCKS] = {"Max file locks", "locks"}, 506 [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, 507 [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, 508 [RLIMIT_NICE] = {"Max nice priority", NULL}, 509 [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, 510 [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, 511 }; 512 513 /* Display limits for a process */ 514 static int proc_pid_limits(struct task_struct *task, char *buffer) 515 { 516 unsigned int i; 517 int count = 0; 518 unsigned long flags; 519 char *bufptr = buffer; 520 521 struct rlimit rlim[RLIM_NLIMITS]; 522 523 if (!lock_task_sighand(task, &flags)) 524 return 0; 525 memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); 526 unlock_task_sighand(task, &flags); 527 528 /* 529 * print the file header 530 */ 531 count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n", 532 "Limit", "Soft Limit", "Hard Limit", "Units"); 533 534 for (i = 0; i < RLIM_NLIMITS; i++) { 535 if (rlim[i].rlim_cur == RLIM_INFINITY) 536 count += sprintf(&bufptr[count], "%-25s %-20s ", 537 lnames[i].name, "unlimited"); 538 else 539 count += sprintf(&bufptr[count], "%-25s %-20lu ", 540 lnames[i].name, rlim[i].rlim_cur); 541 542 if (rlim[i].rlim_max == RLIM_INFINITY) 543 count += sprintf(&bufptr[count], "%-20s ", "unlimited"); 544 else 545 count += sprintf(&bufptr[count], "%-20lu ", 546 rlim[i].rlim_max); 547 548 if (lnames[i].unit) 549 count += sprintf(&bufptr[count], "%-10s\n", 550 lnames[i].unit); 551 else 552 count += sprintf(&bufptr[count], "\n"); 553 } 554 555 return count; 556 } 557 558 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 559 static int proc_pid_syscall(struct task_struct *task, char *buffer) 560 { 561 long nr; 562 unsigned long args[6], sp, pc; 563 int res = lock_trace(task); 564 if (res) 565 return res; 566 567 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 568 res = sprintf(buffer, "running\n"); 569 else if (nr < 0) 570 res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 571 else 572 res = sprintf(buffer, 573 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 574 nr, 575 args[0], args[1], args[2], args[3], args[4], args[5], 576 sp, pc); 577 unlock_trace(task); 578 return res; 579 } 580 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 581 582 /************************************************************************/ 583 /* Here the fs part begins */ 584 /************************************************************************/ 585 586 /* permission checks */ 587 static int proc_fd_access_allowed(struct inode *inode) 588 { 589 struct task_struct *task; 590 int allowed = 0; 591 /* Allow access to a task's file descriptors if it is us or we 592 * may use ptrace attach to the process and find out that 593 * information. 594 */ 595 task = get_proc_task(inode); 596 if (task) { 597 allowed = ptrace_may_access(task, PTRACE_MODE_READ); 598 put_task_struct(task); 599 } 600 return allowed; 601 } 602 603 int proc_setattr(struct dentry *dentry, struct iattr *attr) 604 { 605 int error; 606 struct inode *inode = dentry->d_inode; 607 608 if (attr->ia_valid & ATTR_MODE) 609 return -EPERM; 610 611 error = inode_change_ok(inode, attr); 612 if (error) 613 return error; 614 615 if ((attr->ia_valid & ATTR_SIZE) && 616 attr->ia_size != i_size_read(inode)) { 617 error = vmtruncate(inode, attr->ia_size); 618 if (error) 619 return error; 620 } 621 622 setattr_copy(inode, attr); 623 mark_inode_dirty(inode); 624 return 0; 625 } 626 627 static const struct inode_operations proc_def_inode_operations = { 628 .setattr = proc_setattr, 629 }; 630 631 static int mounts_open_common(struct inode *inode, struct file *file, 632 const struct seq_operations *op) 633 { 634 struct task_struct *task = get_proc_task(inode); 635 struct nsproxy *nsp; 636 struct mnt_namespace *ns = NULL; 637 struct path root; 638 struct proc_mounts *p; 639 int ret = -EINVAL; 640 641 if (task) { 642 rcu_read_lock(); 643 nsp = task_nsproxy(task); 644 if (nsp) { 645 ns = nsp->mnt_ns; 646 if (ns) 647 get_mnt_ns(ns); 648 } 649 rcu_read_unlock(); 650 if (ns && get_task_root(task, &root) == 0) 651 ret = 0; 652 put_task_struct(task); 653 } 654 655 if (!ns) 656 goto err; 657 if (ret) 658 goto err_put_ns; 659 660 ret = -ENOMEM; 661 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); 662 if (!p) 663 goto err_put_path; 664 665 file->private_data = &p->m; 666 ret = seq_open(file, op); 667 if (ret) 668 goto err_free; 669 670 p->m.private = p; 671 p->ns = ns; 672 p->root = root; 673 p->event = ns->event; 674 675 return 0; 676 677 err_free: 678 kfree(p); 679 err_put_path: 680 path_put(&root); 681 err_put_ns: 682 put_mnt_ns(ns); 683 err: 684 return ret; 685 } 686 687 static int mounts_release(struct inode *inode, struct file *file) 688 { 689 struct proc_mounts *p = file->private_data; 690 path_put(&p->root); 691 put_mnt_ns(p->ns); 692 return seq_release(inode, file); 693 } 694 695 static unsigned mounts_poll(struct file *file, poll_table *wait) 696 { 697 struct proc_mounts *p = file->private_data; 698 unsigned res = POLLIN | POLLRDNORM; 699 700 poll_wait(file, &p->ns->poll, wait); 701 if (mnt_had_events(p)) 702 res |= POLLERR | POLLPRI; 703 704 return res; 705 } 706 707 static int mounts_open(struct inode *inode, struct file *file) 708 { 709 return mounts_open_common(inode, file, &mounts_op); 710 } 711 712 static const struct file_operations proc_mounts_operations = { 713 .open = mounts_open, 714 .read = seq_read, 715 .llseek = seq_lseek, 716 .release = mounts_release, 717 .poll = mounts_poll, 718 }; 719 720 static int mountinfo_open(struct inode *inode, struct file *file) 721 { 722 return mounts_open_common(inode, file, &mountinfo_op); 723 } 724 725 static const struct file_operations proc_mountinfo_operations = { 726 .open = mountinfo_open, 727 .read = seq_read, 728 .llseek = seq_lseek, 729 .release = mounts_release, 730 .poll = mounts_poll, 731 }; 732 733 static int mountstats_open(struct inode *inode, struct file *file) 734 { 735 return mounts_open_common(inode, file, &mountstats_op); 736 } 737 738 static const struct file_operations proc_mountstats_operations = { 739 .open = mountstats_open, 740 .read = seq_read, 741 .llseek = seq_lseek, 742 .release = mounts_release, 743 }; 744 745 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 746 747 static ssize_t proc_info_read(struct file * file, char __user * buf, 748 size_t count, loff_t *ppos) 749 { 750 struct inode * inode = file->f_path.dentry->d_inode; 751 unsigned long page; 752 ssize_t length; 753 struct task_struct *task = get_proc_task(inode); 754 755 length = -ESRCH; 756 if (!task) 757 goto out_no_task; 758 759 if (count > PROC_BLOCK_SIZE) 760 count = PROC_BLOCK_SIZE; 761 762 length = -ENOMEM; 763 if (!(page = __get_free_page(GFP_TEMPORARY))) 764 goto out; 765 766 length = PROC_I(inode)->op.proc_read(task, (char*)page); 767 768 if (length >= 0) 769 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 770 free_page(page); 771 out: 772 put_task_struct(task); 773 out_no_task: 774 return length; 775 } 776 777 static const struct file_operations proc_info_file_operations = { 778 .read = proc_info_read, 779 .llseek = generic_file_llseek, 780 }; 781 782 static int proc_single_show(struct seq_file *m, void *v) 783 { 784 struct inode *inode = m->private; 785 struct pid_namespace *ns; 786 struct pid *pid; 787 struct task_struct *task; 788 int ret; 789 790 ns = inode->i_sb->s_fs_info; 791 pid = proc_pid(inode); 792 task = get_pid_task(pid, PIDTYPE_PID); 793 if (!task) 794 return -ESRCH; 795 796 ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); 797 798 put_task_struct(task); 799 return ret; 800 } 801 802 static int proc_single_open(struct inode *inode, struct file *filp) 803 { 804 return single_open(filp, proc_single_show, inode); 805 } 806 807 static const struct file_operations proc_single_file_operations = { 808 .open = proc_single_open, 809 .read = seq_read, 810 .llseek = seq_lseek, 811 .release = single_release, 812 }; 813 814 static int mem_open(struct inode* inode, struct file* file) 815 { 816 file->private_data = (void*)((long)current->self_exec_id); 817 /* OK to pass negative loff_t, we can catch out-of-range */ 818 file->f_mode |= FMODE_UNSIGNED_OFFSET; 819 return 0; 820 } 821 822 static ssize_t mem_read(struct file * file, char __user * buf, 823 size_t count, loff_t *ppos) 824 { 825 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 826 char *page; 827 unsigned long src = *ppos; 828 int ret = -ESRCH; 829 struct mm_struct *mm; 830 831 if (!task) 832 goto out_no_task; 833 834 ret = -ENOMEM; 835 page = (char *)__get_free_page(GFP_TEMPORARY); 836 if (!page) 837 goto out; 838 839 mm = check_mem_permission(task); 840 ret = PTR_ERR(mm); 841 if (IS_ERR(mm)) 842 goto out_free; 843 844 ret = -EIO; 845 846 if (file->private_data != (void*)((long)current->self_exec_id)) 847 goto out_put; 848 849 ret = 0; 850 851 while (count > 0) { 852 int this_len, retval; 853 854 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 855 retval = access_remote_vm(mm, src, page, this_len, 0); 856 if (!retval) { 857 if (!ret) 858 ret = -EIO; 859 break; 860 } 861 862 if (copy_to_user(buf, page, retval)) { 863 ret = -EFAULT; 864 break; 865 } 866 867 ret += retval; 868 src += retval; 869 buf += retval; 870 count -= retval; 871 } 872 *ppos = src; 873 874 out_put: 875 mmput(mm); 876 out_free: 877 free_page((unsigned long) page); 878 out: 879 put_task_struct(task); 880 out_no_task: 881 return ret; 882 } 883 884 static ssize_t mem_write(struct file * file, const char __user *buf, 885 size_t count, loff_t *ppos) 886 { 887 int copied; 888 char *page; 889 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 890 unsigned long dst = *ppos; 891 struct mm_struct *mm; 892 893 copied = -ESRCH; 894 if (!task) 895 goto out_no_task; 896 897 copied = -ENOMEM; 898 page = (char *)__get_free_page(GFP_TEMPORARY); 899 if (!page) 900 goto out_task; 901 902 mm = check_mem_permission(task); 903 copied = PTR_ERR(mm); 904 if (IS_ERR(mm)) 905 goto out_free; 906 907 copied = -EIO; 908 if (file->private_data != (void *)((long)current->self_exec_id)) 909 goto out_mm; 910 911 copied = 0; 912 while (count > 0) { 913 int this_len, retval; 914 915 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 916 if (copy_from_user(page, buf, this_len)) { 917 copied = -EFAULT; 918 break; 919 } 920 retval = access_remote_vm(mm, dst, page, this_len, 1); 921 if (!retval) { 922 if (!copied) 923 copied = -EIO; 924 break; 925 } 926 copied += retval; 927 buf += retval; 928 dst += retval; 929 count -= retval; 930 } 931 *ppos = dst; 932 933 out_mm: 934 mmput(mm); 935 out_free: 936 free_page((unsigned long) page); 937 out_task: 938 put_task_struct(task); 939 out_no_task: 940 return copied; 941 } 942 943 loff_t mem_lseek(struct file *file, loff_t offset, int orig) 944 { 945 switch (orig) { 946 case 0: 947 file->f_pos = offset; 948 break; 949 case 1: 950 file->f_pos += offset; 951 break; 952 default: 953 return -EINVAL; 954 } 955 force_successful_syscall_return(); 956 return file->f_pos; 957 } 958 959 static const struct file_operations proc_mem_operations = { 960 .llseek = mem_lseek, 961 .read = mem_read, 962 .write = mem_write, 963 .open = mem_open, 964 }; 965 966 static ssize_t environ_read(struct file *file, char __user *buf, 967 size_t count, loff_t *ppos) 968 { 969 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 970 char *page; 971 unsigned long src = *ppos; 972 int ret = -ESRCH; 973 struct mm_struct *mm; 974 975 if (!task) 976 goto out_no_task; 977 978 ret = -ENOMEM; 979 page = (char *)__get_free_page(GFP_TEMPORARY); 980 if (!page) 981 goto out; 982 983 984 mm = mm_for_maps(task); 985 ret = PTR_ERR(mm); 986 if (!mm || IS_ERR(mm)) 987 goto out_free; 988 989 ret = 0; 990 while (count > 0) { 991 int this_len, retval, max_len; 992 993 this_len = mm->env_end - (mm->env_start + src); 994 995 if (this_len <= 0) 996 break; 997 998 max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 999 this_len = (this_len > max_len) ? max_len : this_len; 1000 1001 retval = access_process_vm(task, (mm->env_start + src), 1002 page, this_len, 0); 1003 1004 if (retval <= 0) { 1005 ret = retval; 1006 break; 1007 } 1008 1009 if (copy_to_user(buf, page, retval)) { 1010 ret = -EFAULT; 1011 break; 1012 } 1013 1014 ret += retval; 1015 src += retval; 1016 buf += retval; 1017 count -= retval; 1018 } 1019 *ppos = src; 1020 1021 mmput(mm); 1022 out_free: 1023 free_page((unsigned long) page); 1024 out: 1025 put_task_struct(task); 1026 out_no_task: 1027 return ret; 1028 } 1029 1030 static const struct file_operations proc_environ_operations = { 1031 .read = environ_read, 1032 .llseek = generic_file_llseek, 1033 }; 1034 1035 static ssize_t oom_adjust_read(struct file *file, char __user *buf, 1036 size_t count, loff_t *ppos) 1037 { 1038 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 1039 char buffer[PROC_NUMBUF]; 1040 size_t len; 1041 int oom_adjust = OOM_DISABLE; 1042 unsigned long flags; 1043 1044 if (!task) 1045 return -ESRCH; 1046 1047 if (lock_task_sighand(task, &flags)) { 1048 oom_adjust = task->signal->oom_adj; 1049 unlock_task_sighand(task, &flags); 1050 } 1051 1052 put_task_struct(task); 1053 1054 len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); 1055 1056 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1057 } 1058 1059 static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 1060 size_t count, loff_t *ppos) 1061 { 1062 struct task_struct *task; 1063 char buffer[PROC_NUMBUF]; 1064 int oom_adjust; 1065 unsigned long flags; 1066 int err; 1067 1068 memset(buffer, 0, sizeof(buffer)); 1069 if (count > sizeof(buffer) - 1) 1070 count = sizeof(buffer) - 1; 1071 if (copy_from_user(buffer, buf, count)) { 1072 err = -EFAULT; 1073 goto out; 1074 } 1075 1076 err = kstrtoint(strstrip(buffer), 0, &oom_adjust); 1077 if (err) 1078 goto out; 1079 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1080 oom_adjust != OOM_DISABLE) { 1081 err = -EINVAL; 1082 goto out; 1083 } 1084 1085 task = get_proc_task(file->f_path.dentry->d_inode); 1086 if (!task) { 1087 err = -ESRCH; 1088 goto out; 1089 } 1090 1091 task_lock(task); 1092 if (!task->mm) { 1093 err = -EINVAL; 1094 goto err_task_lock; 1095 } 1096 1097 if (!lock_task_sighand(task, &flags)) { 1098 err = -ESRCH; 1099 goto err_task_lock; 1100 } 1101 1102 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { 1103 err = -EACCES; 1104 goto err_sighand; 1105 } 1106 1107 if (oom_adjust != task->signal->oom_adj) { 1108 if (oom_adjust == OOM_DISABLE) 1109 atomic_inc(&task->mm->oom_disable_count); 1110 if (task->signal->oom_adj == OOM_DISABLE) 1111 atomic_dec(&task->mm->oom_disable_count); 1112 } 1113 1114 /* 1115 * Warn that /proc/pid/oom_adj is deprecated, see 1116 * Documentation/feature-removal-schedule.txt. 1117 */ 1118 printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, " 1119 "please use /proc/%d/oom_score_adj instead.\n", 1120 current->comm, task_pid_nr(current), 1121 task_pid_nr(task), task_pid_nr(task)); 1122 task->signal->oom_adj = oom_adjust; 1123 /* 1124 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum 1125 * value is always attainable. 1126 */ 1127 if (task->signal->oom_adj == OOM_ADJUST_MAX) 1128 task->signal->oom_score_adj = OOM_SCORE_ADJ_MAX; 1129 else 1130 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / 1131 -OOM_DISABLE; 1132 err_sighand: 1133 unlock_task_sighand(task, &flags); 1134 err_task_lock: 1135 task_unlock(task); 1136 put_task_struct(task); 1137 out: 1138 return err < 0 ? err : count; 1139 } 1140 1141 static const struct file_operations proc_oom_adjust_operations = { 1142 .read = oom_adjust_read, 1143 .write = oom_adjust_write, 1144 .llseek = generic_file_llseek, 1145 }; 1146 1147 static ssize_t oom_score_adj_read(struct file *file, char __user *buf, 1148 size_t count, loff_t *ppos) 1149 { 1150 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 1151 char buffer[PROC_NUMBUF]; 1152 int oom_score_adj = OOM_SCORE_ADJ_MIN; 1153 unsigned long flags; 1154 size_t len; 1155 1156 if (!task) 1157 return -ESRCH; 1158 if (lock_task_sighand(task, &flags)) { 1159 oom_score_adj = task->signal->oom_score_adj; 1160 unlock_task_sighand(task, &flags); 1161 } 1162 put_task_struct(task); 1163 len = snprintf(buffer, sizeof(buffer), "%d\n", oom_score_adj); 1164 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1165 } 1166 1167 static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, 1168 size_t count, loff_t *ppos) 1169 { 1170 struct task_struct *task; 1171 char buffer[PROC_NUMBUF]; 1172 unsigned long flags; 1173 int oom_score_adj; 1174 int err; 1175 1176 memset(buffer, 0, sizeof(buffer)); 1177 if (count > sizeof(buffer) - 1) 1178 count = sizeof(buffer) - 1; 1179 if (copy_from_user(buffer, buf, count)) { 1180 err = -EFAULT; 1181 goto out; 1182 } 1183 1184 err = kstrtoint(strstrip(buffer), 0, &oom_score_adj); 1185 if (err) 1186 goto out; 1187 if (oom_score_adj < OOM_SCORE_ADJ_MIN || 1188 oom_score_adj > OOM_SCORE_ADJ_MAX) { 1189 err = -EINVAL; 1190 goto out; 1191 } 1192 1193 task = get_proc_task(file->f_path.dentry->d_inode); 1194 if (!task) { 1195 err = -ESRCH; 1196 goto out; 1197 } 1198 1199 task_lock(task); 1200 if (!task->mm) { 1201 err = -EINVAL; 1202 goto err_task_lock; 1203 } 1204 1205 if (!lock_task_sighand(task, &flags)) { 1206 err = -ESRCH; 1207 goto err_task_lock; 1208 } 1209 1210 if (oom_score_adj < task->signal->oom_score_adj_min && 1211 !capable(CAP_SYS_RESOURCE)) { 1212 err = -EACCES; 1213 goto err_sighand; 1214 } 1215 1216 if (oom_score_adj != task->signal->oom_score_adj) { 1217 if (oom_score_adj == OOM_SCORE_ADJ_MIN) 1218 atomic_inc(&task->mm->oom_disable_count); 1219 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) 1220 atomic_dec(&task->mm->oom_disable_count); 1221 } 1222 task->signal->oom_score_adj = oom_score_adj; 1223 if (has_capability_noaudit(current, CAP_SYS_RESOURCE)) 1224 task->signal->oom_score_adj_min = oom_score_adj; 1225 /* 1226 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is 1227 * always attainable. 1228 */ 1229 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) 1230 task->signal->oom_adj = OOM_DISABLE; 1231 else 1232 task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / 1233 OOM_SCORE_ADJ_MAX; 1234 err_sighand: 1235 unlock_task_sighand(task, &flags); 1236 err_task_lock: 1237 task_unlock(task); 1238 put_task_struct(task); 1239 out: 1240 return err < 0 ? err : count; 1241 } 1242 1243 static const struct file_operations proc_oom_score_adj_operations = { 1244 .read = oom_score_adj_read, 1245 .write = oom_score_adj_write, 1246 .llseek = default_llseek, 1247 }; 1248 1249 #ifdef CONFIG_AUDITSYSCALL 1250 #define TMPBUFLEN 21 1251 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 1252 size_t count, loff_t *ppos) 1253 { 1254 struct inode * inode = file->f_path.dentry->d_inode; 1255 struct task_struct *task = get_proc_task(inode); 1256 ssize_t length; 1257 char tmpbuf[TMPBUFLEN]; 1258 1259 if (!task) 1260 return -ESRCH; 1261 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1262 audit_get_loginuid(task)); 1263 put_task_struct(task); 1264 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1265 } 1266 1267 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 1268 size_t count, loff_t *ppos) 1269 { 1270 struct inode * inode = file->f_path.dentry->d_inode; 1271 char *page, *tmp; 1272 ssize_t length; 1273 uid_t loginuid; 1274 1275 if (!capable(CAP_AUDIT_CONTROL)) 1276 return -EPERM; 1277 1278 rcu_read_lock(); 1279 if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { 1280 rcu_read_unlock(); 1281 return -EPERM; 1282 } 1283 rcu_read_unlock(); 1284 1285 if (count >= PAGE_SIZE) 1286 count = PAGE_SIZE - 1; 1287 1288 if (*ppos != 0) { 1289 /* No partial writes. */ 1290 return -EINVAL; 1291 } 1292 page = (char*)__get_free_page(GFP_TEMPORARY); 1293 if (!page) 1294 return -ENOMEM; 1295 length = -EFAULT; 1296 if (copy_from_user(page, buf, count)) 1297 goto out_free_page; 1298 1299 page[count] = '\0'; 1300 loginuid = simple_strtoul(page, &tmp, 10); 1301 if (tmp == page) { 1302 length = -EINVAL; 1303 goto out_free_page; 1304 1305 } 1306 length = audit_set_loginuid(current, loginuid); 1307 if (likely(length == 0)) 1308 length = count; 1309 1310 out_free_page: 1311 free_page((unsigned long) page); 1312 return length; 1313 } 1314 1315 static const struct file_operations proc_loginuid_operations = { 1316 .read = proc_loginuid_read, 1317 .write = proc_loginuid_write, 1318 .llseek = generic_file_llseek, 1319 }; 1320 1321 static ssize_t proc_sessionid_read(struct file * file, char __user * buf, 1322 size_t count, loff_t *ppos) 1323 { 1324 struct inode * inode = file->f_path.dentry->d_inode; 1325 struct task_struct *task = get_proc_task(inode); 1326 ssize_t length; 1327 char tmpbuf[TMPBUFLEN]; 1328 1329 if (!task) 1330 return -ESRCH; 1331 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 1332 audit_get_sessionid(task)); 1333 put_task_struct(task); 1334 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 1335 } 1336 1337 static const struct file_operations proc_sessionid_operations = { 1338 .read = proc_sessionid_read, 1339 .llseek = generic_file_llseek, 1340 }; 1341 #endif 1342 1343 #ifdef CONFIG_FAULT_INJECTION 1344 static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, 1345 size_t count, loff_t *ppos) 1346 { 1347 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 1348 char buffer[PROC_NUMBUF]; 1349 size_t len; 1350 int make_it_fail; 1351 1352 if (!task) 1353 return -ESRCH; 1354 make_it_fail = task->make_it_fail; 1355 put_task_struct(task); 1356 1357 len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); 1358 1359 return simple_read_from_buffer(buf, count, ppos, buffer, len); 1360 } 1361 1362 static ssize_t proc_fault_inject_write(struct file * file, 1363 const char __user * buf, size_t count, loff_t *ppos) 1364 { 1365 struct task_struct *task; 1366 char buffer[PROC_NUMBUF], *end; 1367 int make_it_fail; 1368 1369 if (!capable(CAP_SYS_RESOURCE)) 1370 return -EPERM; 1371 memset(buffer, 0, sizeof(buffer)); 1372 if (count > sizeof(buffer) - 1) 1373 count = sizeof(buffer) - 1; 1374 if (copy_from_user(buffer, buf, count)) 1375 return -EFAULT; 1376 make_it_fail = simple_strtol(strstrip(buffer), &end, 0); 1377 if (*end) 1378 return -EINVAL; 1379 task = get_proc_task(file->f_dentry->d_inode); 1380 if (!task) 1381 return -ESRCH; 1382 task->make_it_fail = make_it_fail; 1383 put_task_struct(task); 1384 1385 return count; 1386 } 1387 1388 static const struct file_operations proc_fault_inject_operations = { 1389 .read = proc_fault_inject_read, 1390 .write = proc_fault_inject_write, 1391 .llseek = generic_file_llseek, 1392 }; 1393 #endif 1394 1395 1396 #ifdef CONFIG_SCHED_DEBUG 1397 /* 1398 * Print out various scheduling related per-task fields: 1399 */ 1400 static int sched_show(struct seq_file *m, void *v) 1401 { 1402 struct inode *inode = m->private; 1403 struct task_struct *p; 1404 1405 p = get_proc_task(inode); 1406 if (!p) 1407 return -ESRCH; 1408 proc_sched_show_task(p, m); 1409 1410 put_task_struct(p); 1411 1412 return 0; 1413 } 1414 1415 static ssize_t 1416 sched_write(struct file *file, const char __user *buf, 1417 size_t count, loff_t *offset) 1418 { 1419 struct inode *inode = file->f_path.dentry->d_inode; 1420 struct task_struct *p; 1421 1422 p = get_proc_task(inode); 1423 if (!p) 1424 return -ESRCH; 1425 proc_sched_set_task(p); 1426 1427 put_task_struct(p); 1428 1429 return count; 1430 } 1431 1432 static int sched_open(struct inode *inode, struct file *filp) 1433 { 1434 return single_open(filp, sched_show, inode); 1435 } 1436 1437 static const struct file_operations proc_pid_sched_operations = { 1438 .open = sched_open, 1439 .read = seq_read, 1440 .write = sched_write, 1441 .llseek = seq_lseek, 1442 .release = single_release, 1443 }; 1444 1445 #endif 1446 1447 #ifdef CONFIG_SCHED_AUTOGROUP 1448 /* 1449 * Print out autogroup related information: 1450 */ 1451 static int sched_autogroup_show(struct seq_file *m, void *v) 1452 { 1453 struct inode *inode = m->private; 1454 struct task_struct *p; 1455 1456 p = get_proc_task(inode); 1457 if (!p) 1458 return -ESRCH; 1459 proc_sched_autogroup_show_task(p, m); 1460 1461 put_task_struct(p); 1462 1463 return 0; 1464 } 1465 1466 static ssize_t 1467 sched_autogroup_write(struct file *file, const char __user *buf, 1468 size_t count, loff_t *offset) 1469 { 1470 struct inode *inode = file->f_path.dentry->d_inode; 1471 struct task_struct *p; 1472 char buffer[PROC_NUMBUF]; 1473 int nice; 1474 int err; 1475 1476 memset(buffer, 0, sizeof(buffer)); 1477 if (count > sizeof(buffer) - 1) 1478 count = sizeof(buffer) - 1; 1479 if (copy_from_user(buffer, buf, count)) 1480 return -EFAULT; 1481 1482 err = kstrtoint(strstrip(buffer), 0, &nice); 1483 if (err < 0) 1484 return err; 1485 1486 p = get_proc_task(inode); 1487 if (!p) 1488 return -ESRCH; 1489 1490 err = nice; 1491 err = proc_sched_autogroup_set_nice(p, &err); 1492 if (err) 1493 count = err; 1494 1495 put_task_struct(p); 1496 1497 return count; 1498 } 1499 1500 static int sched_autogroup_open(struct inode *inode, struct file *filp) 1501 { 1502 int ret; 1503 1504 ret = single_open(filp, sched_autogroup_show, NULL); 1505 if (!ret) { 1506 struct seq_file *m = filp->private_data; 1507 1508 m->private = inode; 1509 } 1510 return ret; 1511 } 1512 1513 static const struct file_operations proc_pid_sched_autogroup_operations = { 1514 .open = sched_autogroup_open, 1515 .read = seq_read, 1516 .write = sched_autogroup_write, 1517 .llseek = seq_lseek, 1518 .release = single_release, 1519 }; 1520 1521 #endif /* CONFIG_SCHED_AUTOGROUP */ 1522 1523 static ssize_t comm_write(struct file *file, const char __user *buf, 1524 size_t count, loff_t *offset) 1525 { 1526 struct inode *inode = file->f_path.dentry->d_inode; 1527 struct task_struct *p; 1528 char buffer[TASK_COMM_LEN]; 1529 1530 memset(buffer, 0, sizeof(buffer)); 1531 if (count > sizeof(buffer) - 1) 1532 count = sizeof(buffer) - 1; 1533 if (copy_from_user(buffer, buf, count)) 1534 return -EFAULT; 1535 1536 p = get_proc_task(inode); 1537 if (!p) 1538 return -ESRCH; 1539 1540 if (same_thread_group(current, p)) 1541 set_task_comm(p, buffer); 1542 else 1543 count = -EINVAL; 1544 1545 put_task_struct(p); 1546 1547 return count; 1548 } 1549 1550 static int comm_show(struct seq_file *m, void *v) 1551 { 1552 struct inode *inode = m->private; 1553 struct task_struct *p; 1554 1555 p = get_proc_task(inode); 1556 if (!p) 1557 return -ESRCH; 1558 1559 task_lock(p); 1560 seq_printf(m, "%s\n", p->comm); 1561 task_unlock(p); 1562 1563 put_task_struct(p); 1564 1565 return 0; 1566 } 1567 1568 static int comm_open(struct inode *inode, struct file *filp) 1569 { 1570 return single_open(filp, comm_show, inode); 1571 } 1572 1573 static const struct file_operations proc_pid_set_comm_operations = { 1574 .open = comm_open, 1575 .read = seq_read, 1576 .write = comm_write, 1577 .llseek = seq_lseek, 1578 .release = single_release, 1579 }; 1580 1581 static int proc_exe_link(struct inode *inode, struct path *exe_path) 1582 { 1583 struct task_struct *task; 1584 struct mm_struct *mm; 1585 struct file *exe_file; 1586 1587 task = get_proc_task(inode); 1588 if (!task) 1589 return -ENOENT; 1590 mm = get_task_mm(task); 1591 put_task_struct(task); 1592 if (!mm) 1593 return -ENOENT; 1594 exe_file = get_mm_exe_file(mm); 1595 mmput(mm); 1596 if (exe_file) { 1597 *exe_path = exe_file->f_path; 1598 path_get(&exe_file->f_path); 1599 fput(exe_file); 1600 return 0; 1601 } else 1602 return -ENOENT; 1603 } 1604 1605 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1606 { 1607 struct inode *inode = dentry->d_inode; 1608 int error = -EACCES; 1609 1610 /* We don't need a base pointer in the /proc filesystem */ 1611 path_put(&nd->path); 1612 1613 /* Are we allowed to snoop on the tasks file descriptors? */ 1614 if (!proc_fd_access_allowed(inode)) 1615 goto out; 1616 1617 error = PROC_I(inode)->op.proc_get_link(inode, &nd->path); 1618 out: 1619 return ERR_PTR(error); 1620 } 1621 1622 static int do_proc_readlink(struct path *path, char __user *buffer, int buflen) 1623 { 1624 char *tmp = (char*)__get_free_page(GFP_TEMPORARY); 1625 char *pathname; 1626 int len; 1627 1628 if (!tmp) 1629 return -ENOMEM; 1630 1631 pathname = d_path(path, tmp, PAGE_SIZE); 1632 len = PTR_ERR(pathname); 1633 if (IS_ERR(pathname)) 1634 goto out; 1635 len = tmp + PAGE_SIZE - 1 - pathname; 1636 1637 if (len > buflen) 1638 len = buflen; 1639 if (copy_to_user(buffer, pathname, len)) 1640 len = -EFAULT; 1641 out: 1642 free_page((unsigned long)tmp); 1643 return len; 1644 } 1645 1646 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1647 { 1648 int error = -EACCES; 1649 struct inode *inode = dentry->d_inode; 1650 struct path path; 1651 1652 /* Are we allowed to snoop on the tasks file descriptors? */ 1653 if (!proc_fd_access_allowed(inode)) 1654 goto out; 1655 1656 error = PROC_I(inode)->op.proc_get_link(inode, &path); 1657 if (error) 1658 goto out; 1659 1660 error = do_proc_readlink(&path, buffer, buflen); 1661 path_put(&path); 1662 out: 1663 return error; 1664 } 1665 1666 static const struct inode_operations proc_pid_link_inode_operations = { 1667 .readlink = proc_pid_readlink, 1668 .follow_link = proc_pid_follow_link, 1669 .setattr = proc_setattr, 1670 }; 1671 1672 1673 /* building an inode */ 1674 1675 static int task_dumpable(struct task_struct *task) 1676 { 1677 int dumpable = 0; 1678 struct mm_struct *mm; 1679 1680 task_lock(task); 1681 mm = task->mm; 1682 if (mm) 1683 dumpable = get_dumpable(mm); 1684 task_unlock(task); 1685 if(dumpable == 1) 1686 return 1; 1687 return 0; 1688 } 1689 1690 struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task) 1691 { 1692 struct inode * inode; 1693 struct proc_inode *ei; 1694 const struct cred *cred; 1695 1696 /* We need a new inode */ 1697 1698 inode = new_inode(sb); 1699 if (!inode) 1700 goto out; 1701 1702 /* Common stuff */ 1703 ei = PROC_I(inode); 1704 inode->i_ino = get_next_ino(); 1705 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1706 inode->i_op = &proc_def_inode_operations; 1707 1708 /* 1709 * grab the reference to task. 1710 */ 1711 ei->pid = get_task_pid(task, PIDTYPE_PID); 1712 if (!ei->pid) 1713 goto out_unlock; 1714 1715 if (task_dumpable(task)) { 1716 rcu_read_lock(); 1717 cred = __task_cred(task); 1718 inode->i_uid = cred->euid; 1719 inode->i_gid = cred->egid; 1720 rcu_read_unlock(); 1721 } 1722 security_task_to_inode(task, inode); 1723 1724 out: 1725 return inode; 1726 1727 out_unlock: 1728 iput(inode); 1729 return NULL; 1730 } 1731 1732 int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 1733 { 1734 struct inode *inode = dentry->d_inode; 1735 struct task_struct *task; 1736 const struct cred *cred; 1737 1738 generic_fillattr(inode, stat); 1739 1740 rcu_read_lock(); 1741 stat->uid = 0; 1742 stat->gid = 0; 1743 task = pid_task(proc_pid(inode), PIDTYPE_PID); 1744 if (task) { 1745 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1746 task_dumpable(task)) { 1747 cred = __task_cred(task); 1748 stat->uid = cred->euid; 1749 stat->gid = cred->egid; 1750 } 1751 } 1752 rcu_read_unlock(); 1753 return 0; 1754 } 1755 1756 /* dentry stuff */ 1757 1758 /* 1759 * Exceptional case: normally we are not allowed to unhash a busy 1760 * directory. In this case, however, we can do it - no aliasing problems 1761 * due to the way we treat inodes. 1762 * 1763 * Rewrite the inode's ownerships here because the owning task may have 1764 * performed a setuid(), etc. 1765 * 1766 * Before the /proc/pid/status file was created the only way to read 1767 * the effective uid of a /process was to stat /proc/pid. Reading 1768 * /proc/pid/status is slow enough that procps and other packages 1769 * kept stating /proc/pid. To keep the rules in /proc simple I have 1770 * made this apply to all per process world readable and executable 1771 * directories. 1772 */ 1773 int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1774 { 1775 struct inode *inode; 1776 struct task_struct *task; 1777 const struct cred *cred; 1778 1779 if (nd && nd->flags & LOOKUP_RCU) 1780 return -ECHILD; 1781 1782 inode = dentry->d_inode; 1783 task = get_proc_task(inode); 1784 1785 if (task) { 1786 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1787 task_dumpable(task)) { 1788 rcu_read_lock(); 1789 cred = __task_cred(task); 1790 inode->i_uid = cred->euid; 1791 inode->i_gid = cred->egid; 1792 rcu_read_unlock(); 1793 } else { 1794 inode->i_uid = 0; 1795 inode->i_gid = 0; 1796 } 1797 inode->i_mode &= ~(S_ISUID | S_ISGID); 1798 security_task_to_inode(task, inode); 1799 put_task_struct(task); 1800 return 1; 1801 } 1802 d_drop(dentry); 1803 return 0; 1804 } 1805 1806 static int pid_delete_dentry(const struct dentry * dentry) 1807 { 1808 /* Is the task we represent dead? 1809 * If so, then don't put the dentry on the lru list, 1810 * kill it immediately. 1811 */ 1812 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first; 1813 } 1814 1815 const struct dentry_operations pid_dentry_operations = 1816 { 1817 .d_revalidate = pid_revalidate, 1818 .d_delete = pid_delete_dentry, 1819 }; 1820 1821 /* Lookups */ 1822 1823 /* 1824 * Fill a directory entry. 1825 * 1826 * If possible create the dcache entry and derive our inode number and 1827 * file type from dcache entry. 1828 * 1829 * Since all of the proc inode numbers are dynamically generated, the inode 1830 * numbers do not exist until the inode is cache. This means creating the 1831 * the dcache entry in readdir is necessary to keep the inode numbers 1832 * reported by readdir in sync with the inode numbers reported 1833 * by stat. 1834 */ 1835 int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 1836 const char *name, int len, 1837 instantiate_t instantiate, struct task_struct *task, const void *ptr) 1838 { 1839 struct dentry *child, *dir = filp->f_path.dentry; 1840 struct inode *inode; 1841 struct qstr qname; 1842 ino_t ino = 0; 1843 unsigned type = DT_UNKNOWN; 1844 1845 qname.name = name; 1846 qname.len = len; 1847 qname.hash = full_name_hash(name, len); 1848 1849 child = d_lookup(dir, &qname); 1850 if (!child) { 1851 struct dentry *new; 1852 new = d_alloc(dir, &qname); 1853 if (new) { 1854 child = instantiate(dir->d_inode, new, task, ptr); 1855 if (child) 1856 dput(new); 1857 else 1858 child = new; 1859 } 1860 } 1861 if (!child || IS_ERR(child) || !child->d_inode) 1862 goto end_instantiate; 1863 inode = child->d_inode; 1864 if (inode) { 1865 ino = inode->i_ino; 1866 type = inode->i_mode >> 12; 1867 } 1868 dput(child); 1869 end_instantiate: 1870 if (!ino) 1871 ino = find_inode_number(dir, &qname); 1872 if (!ino) 1873 ino = 1; 1874 return filldir(dirent, name, len, filp->f_pos, ino, type); 1875 } 1876 1877 static unsigned name_to_int(struct dentry *dentry) 1878 { 1879 const char *name = dentry->d_name.name; 1880 int len = dentry->d_name.len; 1881 unsigned n = 0; 1882 1883 if (len > 1 && *name == '0') 1884 goto out; 1885 while (len-- > 0) { 1886 unsigned c = *name++ - '0'; 1887 if (c > 9) 1888 goto out; 1889 if (n >= (~0U-9)/10) 1890 goto out; 1891 n *= 10; 1892 n += c; 1893 } 1894 return n; 1895 out: 1896 return ~0U; 1897 } 1898 1899 #define PROC_FDINFO_MAX 64 1900 1901 static int proc_fd_info(struct inode *inode, struct path *path, char *info) 1902 { 1903 struct task_struct *task = get_proc_task(inode); 1904 struct files_struct *files = NULL; 1905 struct file *file; 1906 int fd = proc_fd(inode); 1907 1908 if (task) { 1909 files = get_files_struct(task); 1910 put_task_struct(task); 1911 } 1912 if (files) { 1913 /* 1914 * We are not taking a ref to the file structure, so we must 1915 * hold ->file_lock. 1916 */ 1917 spin_lock(&files->file_lock); 1918 file = fcheck_files(files, fd); 1919 if (file) { 1920 if (path) { 1921 *path = file->f_path; 1922 path_get(&file->f_path); 1923 } 1924 if (info) 1925 snprintf(info, PROC_FDINFO_MAX, 1926 "pos:\t%lli\n" 1927 "flags:\t0%o\n", 1928 (long long) file->f_pos, 1929 file->f_flags); 1930 spin_unlock(&files->file_lock); 1931 put_files_struct(files); 1932 return 0; 1933 } 1934 spin_unlock(&files->file_lock); 1935 put_files_struct(files); 1936 } 1937 return -ENOENT; 1938 } 1939 1940 static int proc_fd_link(struct inode *inode, struct path *path) 1941 { 1942 return proc_fd_info(inode, path, NULL); 1943 } 1944 1945 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1946 { 1947 struct inode *inode; 1948 struct task_struct *task; 1949 int fd; 1950 struct files_struct *files; 1951 const struct cred *cred; 1952 1953 if (nd && nd->flags & LOOKUP_RCU) 1954 return -ECHILD; 1955 1956 inode = dentry->d_inode; 1957 task = get_proc_task(inode); 1958 fd = proc_fd(inode); 1959 1960 if (task) { 1961 files = get_files_struct(task); 1962 if (files) { 1963 rcu_read_lock(); 1964 if (fcheck_files(files, fd)) { 1965 rcu_read_unlock(); 1966 put_files_struct(files); 1967 if (task_dumpable(task)) { 1968 rcu_read_lock(); 1969 cred = __task_cred(task); 1970 inode->i_uid = cred->euid; 1971 inode->i_gid = cred->egid; 1972 rcu_read_unlock(); 1973 } else { 1974 inode->i_uid = 0; 1975 inode->i_gid = 0; 1976 } 1977 inode->i_mode &= ~(S_ISUID | S_ISGID); 1978 security_task_to_inode(task, inode); 1979 put_task_struct(task); 1980 return 1; 1981 } 1982 rcu_read_unlock(); 1983 put_files_struct(files); 1984 } 1985 put_task_struct(task); 1986 } 1987 d_drop(dentry); 1988 return 0; 1989 } 1990 1991 static const struct dentry_operations tid_fd_dentry_operations = 1992 { 1993 .d_revalidate = tid_fd_revalidate, 1994 .d_delete = pid_delete_dentry, 1995 }; 1996 1997 static struct dentry *proc_fd_instantiate(struct inode *dir, 1998 struct dentry *dentry, struct task_struct *task, const void *ptr) 1999 { 2000 unsigned fd = *(const unsigned *)ptr; 2001 struct file *file; 2002 struct files_struct *files; 2003 struct inode *inode; 2004 struct proc_inode *ei; 2005 struct dentry *error = ERR_PTR(-ENOENT); 2006 2007 inode = proc_pid_make_inode(dir->i_sb, task); 2008 if (!inode) 2009 goto out; 2010 ei = PROC_I(inode); 2011 ei->fd = fd; 2012 files = get_files_struct(task); 2013 if (!files) 2014 goto out_iput; 2015 inode->i_mode = S_IFLNK; 2016 2017 /* 2018 * We are not taking a ref to the file structure, so we must 2019 * hold ->file_lock. 2020 */ 2021 spin_lock(&files->file_lock); 2022 file = fcheck_files(files, fd); 2023 if (!file) 2024 goto out_unlock; 2025 if (file->f_mode & FMODE_READ) 2026 inode->i_mode |= S_IRUSR | S_IXUSR; 2027 if (file->f_mode & FMODE_WRITE) 2028 inode->i_mode |= S_IWUSR | S_IXUSR; 2029 spin_unlock(&files->file_lock); 2030 put_files_struct(files); 2031 2032 inode->i_op = &proc_pid_link_inode_operations; 2033 inode->i_size = 64; 2034 ei->op.proc_get_link = proc_fd_link; 2035 d_set_d_op(dentry, &tid_fd_dentry_operations); 2036 d_add(dentry, inode); 2037 /* Close the race of the process dying before we return the dentry */ 2038 if (tid_fd_revalidate(dentry, NULL)) 2039 error = NULL; 2040 2041 out: 2042 return error; 2043 out_unlock: 2044 spin_unlock(&files->file_lock); 2045 put_files_struct(files); 2046 out_iput: 2047 iput(inode); 2048 goto out; 2049 } 2050 2051 static struct dentry *proc_lookupfd_common(struct inode *dir, 2052 struct dentry *dentry, 2053 instantiate_t instantiate) 2054 { 2055 struct task_struct *task = get_proc_task(dir); 2056 unsigned fd = name_to_int(dentry); 2057 struct dentry *result = ERR_PTR(-ENOENT); 2058 2059 if (!task) 2060 goto out_no_task; 2061 if (fd == ~0U) 2062 goto out; 2063 2064 result = instantiate(dir, dentry, task, &fd); 2065 out: 2066 put_task_struct(task); 2067 out_no_task: 2068 return result; 2069 } 2070 2071 static int proc_readfd_common(struct file * filp, void * dirent, 2072 filldir_t filldir, instantiate_t instantiate) 2073 { 2074 struct dentry *dentry = filp->f_path.dentry; 2075 struct inode *inode = dentry->d_inode; 2076 struct task_struct *p = get_proc_task(inode); 2077 unsigned int fd, ino; 2078 int retval; 2079 struct files_struct * files; 2080 2081 retval = -ENOENT; 2082 if (!p) 2083 goto out_no_task; 2084 retval = 0; 2085 2086 fd = filp->f_pos; 2087 switch (fd) { 2088 case 0: 2089 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 2090 goto out; 2091 filp->f_pos++; 2092 case 1: 2093 ino = parent_ino(dentry); 2094 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 2095 goto out; 2096 filp->f_pos++; 2097 default: 2098 files = get_files_struct(p); 2099 if (!files) 2100 goto out; 2101 rcu_read_lock(); 2102 for (fd = filp->f_pos-2; 2103 fd < files_fdtable(files)->max_fds; 2104 fd++, filp->f_pos++) { 2105 char name[PROC_NUMBUF]; 2106 int len; 2107 2108 if (!fcheck_files(files, fd)) 2109 continue; 2110 rcu_read_unlock(); 2111 2112 len = snprintf(name, sizeof(name), "%d", fd); 2113 if (proc_fill_cache(filp, dirent, filldir, 2114 name, len, instantiate, 2115 p, &fd) < 0) { 2116 rcu_read_lock(); 2117 break; 2118 } 2119 rcu_read_lock(); 2120 } 2121 rcu_read_unlock(); 2122 put_files_struct(files); 2123 } 2124 out: 2125 put_task_struct(p); 2126 out_no_task: 2127 return retval; 2128 } 2129 2130 static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, 2131 struct nameidata *nd) 2132 { 2133 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); 2134 } 2135 2136 static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) 2137 { 2138 return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); 2139 } 2140 2141 static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, 2142 size_t len, loff_t *ppos) 2143 { 2144 char tmp[PROC_FDINFO_MAX]; 2145 int err = proc_fd_info(file->f_path.dentry->d_inode, NULL, tmp); 2146 if (!err) 2147 err = simple_read_from_buffer(buf, len, ppos, tmp, strlen(tmp)); 2148 return err; 2149 } 2150 2151 static const struct file_operations proc_fdinfo_file_operations = { 2152 .open = nonseekable_open, 2153 .read = proc_fdinfo_read, 2154 .llseek = no_llseek, 2155 }; 2156 2157 static const struct file_operations proc_fd_operations = { 2158 .read = generic_read_dir, 2159 .readdir = proc_readfd, 2160 .llseek = default_llseek, 2161 }; 2162 2163 /* 2164 * /proc/pid/fd needs a special permission handler so that a process can still 2165 * access /proc/self/fd after it has executed a setuid(). 2166 */ 2167 static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) 2168 { 2169 int rv; 2170 2171 if (flags & IPERM_FLAG_RCU) 2172 return -ECHILD; 2173 rv = generic_permission(inode, mask, flags, NULL); 2174 if (rv == 0) 2175 return 0; 2176 if (task_pid(current) == proc_pid(inode)) 2177 rv = 0; 2178 return rv; 2179 } 2180 2181 /* 2182 * proc directories can do almost nothing.. 2183 */ 2184 static const struct inode_operations proc_fd_inode_operations = { 2185 .lookup = proc_lookupfd, 2186 .permission = proc_fd_permission, 2187 .setattr = proc_setattr, 2188 }; 2189 2190 static struct dentry *proc_fdinfo_instantiate(struct inode *dir, 2191 struct dentry *dentry, struct task_struct *task, const void *ptr) 2192 { 2193 unsigned fd = *(unsigned *)ptr; 2194 struct inode *inode; 2195 struct proc_inode *ei; 2196 struct dentry *error = ERR_PTR(-ENOENT); 2197 2198 inode = proc_pid_make_inode(dir->i_sb, task); 2199 if (!inode) 2200 goto out; 2201 ei = PROC_I(inode); 2202 ei->fd = fd; 2203 inode->i_mode = S_IFREG | S_IRUSR; 2204 inode->i_fop = &proc_fdinfo_file_operations; 2205 d_set_d_op(dentry, &tid_fd_dentry_operations); 2206 d_add(dentry, inode); 2207 /* Close the race of the process dying before we return the dentry */ 2208 if (tid_fd_revalidate(dentry, NULL)) 2209 error = NULL; 2210 2211 out: 2212 return error; 2213 } 2214 2215 static struct dentry *proc_lookupfdinfo(struct inode *dir, 2216 struct dentry *dentry, 2217 struct nameidata *nd) 2218 { 2219 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); 2220 } 2221 2222 static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) 2223 { 2224 return proc_readfd_common(filp, dirent, filldir, 2225 proc_fdinfo_instantiate); 2226 } 2227 2228 static const struct file_operations proc_fdinfo_operations = { 2229 .read = generic_read_dir, 2230 .readdir = proc_readfdinfo, 2231 .llseek = default_llseek, 2232 }; 2233 2234 /* 2235 * proc directories can do almost nothing.. 2236 */ 2237 static const struct inode_operations proc_fdinfo_inode_operations = { 2238 .lookup = proc_lookupfdinfo, 2239 .setattr = proc_setattr, 2240 }; 2241 2242 2243 static struct dentry *proc_pident_instantiate(struct inode *dir, 2244 struct dentry *dentry, struct task_struct *task, const void *ptr) 2245 { 2246 const struct pid_entry *p = ptr; 2247 struct inode *inode; 2248 struct proc_inode *ei; 2249 struct dentry *error = ERR_PTR(-ENOENT); 2250 2251 inode = proc_pid_make_inode(dir->i_sb, task); 2252 if (!inode) 2253 goto out; 2254 2255 ei = PROC_I(inode); 2256 inode->i_mode = p->mode; 2257 if (S_ISDIR(inode->i_mode)) 2258 inode->i_nlink = 2; /* Use getattr to fix if necessary */ 2259 if (p->iop) 2260 inode->i_op = p->iop; 2261 if (p->fop) 2262 inode->i_fop = p->fop; 2263 ei->op = p->op; 2264 d_set_d_op(dentry, &pid_dentry_operations); 2265 d_add(dentry, inode); 2266 /* Close the race of the process dying before we return the dentry */ 2267 if (pid_revalidate(dentry, NULL)) 2268 error = NULL; 2269 out: 2270 return error; 2271 } 2272 2273 static struct dentry *proc_pident_lookup(struct inode *dir, 2274 struct dentry *dentry, 2275 const struct pid_entry *ents, 2276 unsigned int nents) 2277 { 2278 struct dentry *error; 2279 struct task_struct *task = get_proc_task(dir); 2280 const struct pid_entry *p, *last; 2281 2282 error = ERR_PTR(-ENOENT); 2283 2284 if (!task) 2285 goto out_no_task; 2286 2287 /* 2288 * Yes, it does not scale. And it should not. Don't add 2289 * new entries into /proc/<tgid>/ without very good reasons. 2290 */ 2291 last = &ents[nents - 1]; 2292 for (p = ents; p <= last; p++) { 2293 if (p->len != dentry->d_name.len) 2294 continue; 2295 if (!memcmp(dentry->d_name.name, p->name, p->len)) 2296 break; 2297 } 2298 if (p > last) 2299 goto out; 2300 2301 error = proc_pident_instantiate(dir, dentry, task, p); 2302 out: 2303 put_task_struct(task); 2304 out_no_task: 2305 return error; 2306 } 2307 2308 static int proc_pident_fill_cache(struct file *filp, void *dirent, 2309 filldir_t filldir, struct task_struct *task, const struct pid_entry *p) 2310 { 2311 return proc_fill_cache(filp, dirent, filldir, p->name, p->len, 2312 proc_pident_instantiate, task, p); 2313 } 2314 2315 static int proc_pident_readdir(struct file *filp, 2316 void *dirent, filldir_t filldir, 2317 const struct pid_entry *ents, unsigned int nents) 2318 { 2319 int i; 2320 struct dentry *dentry = filp->f_path.dentry; 2321 struct inode *inode = dentry->d_inode; 2322 struct task_struct *task = get_proc_task(inode); 2323 const struct pid_entry *p, *last; 2324 ino_t ino; 2325 int ret; 2326 2327 ret = -ENOENT; 2328 if (!task) 2329 goto out_no_task; 2330 2331 ret = 0; 2332 i = filp->f_pos; 2333 switch (i) { 2334 case 0: 2335 ino = inode->i_ino; 2336 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 2337 goto out; 2338 i++; 2339 filp->f_pos++; 2340 /* fall through */ 2341 case 1: 2342 ino = parent_ino(dentry); 2343 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 2344 goto out; 2345 i++; 2346 filp->f_pos++; 2347 /* fall through */ 2348 default: 2349 i -= 2; 2350 if (i >= nents) { 2351 ret = 1; 2352 goto out; 2353 } 2354 p = ents + i; 2355 last = &ents[nents - 1]; 2356 while (p <= last) { 2357 if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0) 2358 goto out; 2359 filp->f_pos++; 2360 p++; 2361 } 2362 } 2363 2364 ret = 1; 2365 out: 2366 put_task_struct(task); 2367 out_no_task: 2368 return ret; 2369 } 2370 2371 #ifdef CONFIG_SECURITY 2372 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 2373 size_t count, loff_t *ppos) 2374 { 2375 struct inode * inode = file->f_path.dentry->d_inode; 2376 char *p = NULL; 2377 ssize_t length; 2378 struct task_struct *task = get_proc_task(inode); 2379 2380 if (!task) 2381 return -ESRCH; 2382 2383 length = security_getprocattr(task, 2384 (char*)file->f_path.dentry->d_name.name, 2385 &p); 2386 put_task_struct(task); 2387 if (length > 0) 2388 length = simple_read_from_buffer(buf, count, ppos, p, length); 2389 kfree(p); 2390 return length; 2391 } 2392 2393 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 2394 size_t count, loff_t *ppos) 2395 { 2396 struct inode * inode = file->f_path.dentry->d_inode; 2397 char *page; 2398 ssize_t length; 2399 struct task_struct *task = get_proc_task(inode); 2400 2401 length = -ESRCH; 2402 if (!task) 2403 goto out_no_task; 2404 if (count > PAGE_SIZE) 2405 count = PAGE_SIZE; 2406 2407 /* No partial writes. */ 2408 length = -EINVAL; 2409 if (*ppos != 0) 2410 goto out; 2411 2412 length = -ENOMEM; 2413 page = (char*)__get_free_page(GFP_TEMPORARY); 2414 if (!page) 2415 goto out; 2416 2417 length = -EFAULT; 2418 if (copy_from_user(page, buf, count)) 2419 goto out_free; 2420 2421 /* Guard against adverse ptrace interaction */ 2422 length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); 2423 if (length < 0) 2424 goto out_free; 2425 2426 length = security_setprocattr(task, 2427 (char*)file->f_path.dentry->d_name.name, 2428 (void*)page, count); 2429 mutex_unlock(&task->signal->cred_guard_mutex); 2430 out_free: 2431 free_page((unsigned long) page); 2432 out: 2433 put_task_struct(task); 2434 out_no_task: 2435 return length; 2436 } 2437 2438 static const struct file_operations proc_pid_attr_operations = { 2439 .read = proc_pid_attr_read, 2440 .write = proc_pid_attr_write, 2441 .llseek = generic_file_llseek, 2442 }; 2443 2444 static const struct pid_entry attr_dir_stuff[] = { 2445 REG("current", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2446 REG("prev", S_IRUGO, proc_pid_attr_operations), 2447 REG("exec", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2448 REG("fscreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2449 REG("keycreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2450 REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), 2451 }; 2452 2453 static int proc_attr_dir_readdir(struct file * filp, 2454 void * dirent, filldir_t filldir) 2455 { 2456 return proc_pident_readdir(filp,dirent,filldir, 2457 attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); 2458 } 2459 2460 static const struct file_operations proc_attr_dir_operations = { 2461 .read = generic_read_dir, 2462 .readdir = proc_attr_dir_readdir, 2463 .llseek = default_llseek, 2464 }; 2465 2466 static struct dentry *proc_attr_dir_lookup(struct inode *dir, 2467 struct dentry *dentry, struct nameidata *nd) 2468 { 2469 return proc_pident_lookup(dir, dentry, 2470 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2471 } 2472 2473 static const struct inode_operations proc_attr_dir_inode_operations = { 2474 .lookup = proc_attr_dir_lookup, 2475 .getattr = pid_getattr, 2476 .setattr = proc_setattr, 2477 }; 2478 2479 #endif 2480 2481 #ifdef CONFIG_ELF_CORE 2482 static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, 2483 size_t count, loff_t *ppos) 2484 { 2485 struct task_struct *task = get_proc_task(file->f_dentry->d_inode); 2486 struct mm_struct *mm; 2487 char buffer[PROC_NUMBUF]; 2488 size_t len; 2489 int ret; 2490 2491 if (!task) 2492 return -ESRCH; 2493 2494 ret = 0; 2495 mm = get_task_mm(task); 2496 if (mm) { 2497 len = snprintf(buffer, sizeof(buffer), "%08lx\n", 2498 ((mm->flags & MMF_DUMP_FILTER_MASK) >> 2499 MMF_DUMP_FILTER_SHIFT)); 2500 mmput(mm); 2501 ret = simple_read_from_buffer(buf, count, ppos, buffer, len); 2502 } 2503 2504 put_task_struct(task); 2505 2506 return ret; 2507 } 2508 2509 static ssize_t proc_coredump_filter_write(struct file *file, 2510 const char __user *buf, 2511 size_t count, 2512 loff_t *ppos) 2513 { 2514 struct task_struct *task; 2515 struct mm_struct *mm; 2516 char buffer[PROC_NUMBUF], *end; 2517 unsigned int val; 2518 int ret; 2519 int i; 2520 unsigned long mask; 2521 2522 ret = -EFAULT; 2523 memset(buffer, 0, sizeof(buffer)); 2524 if (count > sizeof(buffer) - 1) 2525 count = sizeof(buffer) - 1; 2526 if (copy_from_user(buffer, buf, count)) 2527 goto out_no_task; 2528 2529 ret = -EINVAL; 2530 val = (unsigned int)simple_strtoul(buffer, &end, 0); 2531 if (*end == '\n') 2532 end++; 2533 if (end - buffer == 0) 2534 goto out_no_task; 2535 2536 ret = -ESRCH; 2537 task = get_proc_task(file->f_dentry->d_inode); 2538 if (!task) 2539 goto out_no_task; 2540 2541 ret = end - buffer; 2542 mm = get_task_mm(task); 2543 if (!mm) 2544 goto out_no_mm; 2545 2546 for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { 2547 if (val & mask) 2548 set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2549 else 2550 clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); 2551 } 2552 2553 mmput(mm); 2554 out_no_mm: 2555 put_task_struct(task); 2556 out_no_task: 2557 return ret; 2558 } 2559 2560 static const struct file_operations proc_coredump_filter_operations = { 2561 .read = proc_coredump_filter_read, 2562 .write = proc_coredump_filter_write, 2563 .llseek = generic_file_llseek, 2564 }; 2565 #endif 2566 2567 /* 2568 * /proc/self: 2569 */ 2570 static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 2571 int buflen) 2572 { 2573 struct pid_namespace *ns = dentry->d_sb->s_fs_info; 2574 pid_t tgid = task_tgid_nr_ns(current, ns); 2575 char tmp[PROC_NUMBUF]; 2576 if (!tgid) 2577 return -ENOENT; 2578 sprintf(tmp, "%d", tgid); 2579 return vfs_readlink(dentry,buffer,buflen,tmp); 2580 } 2581 2582 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 2583 { 2584 struct pid_namespace *ns = dentry->d_sb->s_fs_info; 2585 pid_t tgid = task_tgid_nr_ns(current, ns); 2586 char *name = ERR_PTR(-ENOENT); 2587 if (tgid) { 2588 name = __getname(); 2589 if (!name) 2590 name = ERR_PTR(-ENOMEM); 2591 else 2592 sprintf(name, "%d", tgid); 2593 } 2594 nd_set_link(nd, name); 2595 return NULL; 2596 } 2597 2598 static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, 2599 void *cookie) 2600 { 2601 char *s = nd_get_link(nd); 2602 if (!IS_ERR(s)) 2603 __putname(s); 2604 } 2605 2606 static const struct inode_operations proc_self_inode_operations = { 2607 .readlink = proc_self_readlink, 2608 .follow_link = proc_self_follow_link, 2609 .put_link = proc_self_put_link, 2610 }; 2611 2612 /* 2613 * proc base 2614 * 2615 * These are the directory entries in the root directory of /proc 2616 * that properly belong to the /proc filesystem, as they describe 2617 * describe something that is process related. 2618 */ 2619 static const struct pid_entry proc_base_stuff[] = { 2620 NOD("self", S_IFLNK|S_IRWXUGO, 2621 &proc_self_inode_operations, NULL, {}), 2622 }; 2623 2624 static struct dentry *proc_base_instantiate(struct inode *dir, 2625 struct dentry *dentry, struct task_struct *task, const void *ptr) 2626 { 2627 const struct pid_entry *p = ptr; 2628 struct inode *inode; 2629 struct proc_inode *ei; 2630 struct dentry *error; 2631 2632 /* Allocate the inode */ 2633 error = ERR_PTR(-ENOMEM); 2634 inode = new_inode(dir->i_sb); 2635 if (!inode) 2636 goto out; 2637 2638 /* Initialize the inode */ 2639 ei = PROC_I(inode); 2640 inode->i_ino = get_next_ino(); 2641 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 2642 2643 /* 2644 * grab the reference to the task. 2645 */ 2646 ei->pid = get_task_pid(task, PIDTYPE_PID); 2647 if (!ei->pid) 2648 goto out_iput; 2649 2650 inode->i_mode = p->mode; 2651 if (S_ISDIR(inode->i_mode)) 2652 inode->i_nlink = 2; 2653 if (S_ISLNK(inode->i_mode)) 2654 inode->i_size = 64; 2655 if (p->iop) 2656 inode->i_op = p->iop; 2657 if (p->fop) 2658 inode->i_fop = p->fop; 2659 ei->op = p->op; 2660 d_add(dentry, inode); 2661 error = NULL; 2662 out: 2663 return error; 2664 out_iput: 2665 iput(inode); 2666 goto out; 2667 } 2668 2669 static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) 2670 { 2671 struct dentry *error; 2672 struct task_struct *task = get_proc_task(dir); 2673 const struct pid_entry *p, *last; 2674 2675 error = ERR_PTR(-ENOENT); 2676 2677 if (!task) 2678 goto out_no_task; 2679 2680 /* Lookup the directory entry */ 2681 last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; 2682 for (p = proc_base_stuff; p <= last; p++) { 2683 if (p->len != dentry->d_name.len) 2684 continue; 2685 if (!memcmp(dentry->d_name.name, p->name, p->len)) 2686 break; 2687 } 2688 if (p > last) 2689 goto out; 2690 2691 error = proc_base_instantiate(dir, dentry, task, p); 2692 2693 out: 2694 put_task_struct(task); 2695 out_no_task: 2696 return error; 2697 } 2698 2699 static int proc_base_fill_cache(struct file *filp, void *dirent, 2700 filldir_t filldir, struct task_struct *task, const struct pid_entry *p) 2701 { 2702 return proc_fill_cache(filp, dirent, filldir, p->name, p->len, 2703 proc_base_instantiate, task, p); 2704 } 2705 2706 #ifdef CONFIG_TASK_IO_ACCOUNTING 2707 static int do_io_accounting(struct task_struct *task, char *buffer, int whole) 2708 { 2709 struct task_io_accounting acct = task->ioac; 2710 unsigned long flags; 2711 2712 if (whole && lock_task_sighand(task, &flags)) { 2713 struct task_struct *t = task; 2714 2715 task_io_accounting_add(&acct, &task->signal->ioac); 2716 while_each_thread(task, t) 2717 task_io_accounting_add(&acct, &t->ioac); 2718 2719 unlock_task_sighand(task, &flags); 2720 } 2721 return sprintf(buffer, 2722 "rchar: %llu\n" 2723 "wchar: %llu\n" 2724 "syscr: %llu\n" 2725 "syscw: %llu\n" 2726 "read_bytes: %llu\n" 2727 "write_bytes: %llu\n" 2728 "cancelled_write_bytes: %llu\n", 2729 (unsigned long long)acct.rchar, 2730 (unsigned long long)acct.wchar, 2731 (unsigned long long)acct.syscr, 2732 (unsigned long long)acct.syscw, 2733 (unsigned long long)acct.read_bytes, 2734 (unsigned long long)acct.write_bytes, 2735 (unsigned long long)acct.cancelled_write_bytes); 2736 } 2737 2738 static int proc_tid_io_accounting(struct task_struct *task, char *buffer) 2739 { 2740 return do_io_accounting(task, buffer, 0); 2741 } 2742 2743 static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) 2744 { 2745 return do_io_accounting(task, buffer, 1); 2746 } 2747 #endif /* CONFIG_TASK_IO_ACCOUNTING */ 2748 2749 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2750 struct pid *pid, struct task_struct *task) 2751 { 2752 int err = lock_trace(task); 2753 if (!err) { 2754 seq_printf(m, "%08x\n", task->personality); 2755 unlock_trace(task); 2756 } 2757 return err; 2758 } 2759 2760 /* 2761 * Thread groups 2762 */ 2763 static const struct file_operations proc_task_operations; 2764 static const struct inode_operations proc_task_inode_operations; 2765 2766 static const struct pid_entry tgid_base_stuff[] = { 2767 DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), 2768 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 2769 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 2770 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 2771 #ifdef CONFIG_NET 2772 DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), 2773 #endif 2774 REG("environ", S_IRUSR, proc_environ_operations), 2775 INF("auxv", S_IRUSR, proc_pid_auxv), 2776 ONE("status", S_IRUGO, proc_pid_status), 2777 ONE("personality", S_IRUGO, proc_pid_personality), 2778 INF("limits", S_IRUGO, proc_pid_limits), 2779 #ifdef CONFIG_SCHED_DEBUG 2780 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2781 #endif 2782 #ifdef CONFIG_SCHED_AUTOGROUP 2783 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), 2784 #endif 2785 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2786 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2787 INF("syscall", S_IRUGO, proc_pid_syscall), 2788 #endif 2789 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2790 ONE("stat", S_IRUGO, proc_tgid_stat), 2791 ONE("statm", S_IRUGO, proc_pid_statm), 2792 REG("maps", S_IRUGO, proc_maps_operations), 2793 #ifdef CONFIG_NUMA 2794 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 2795 #endif 2796 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 2797 LNK("cwd", proc_cwd_link), 2798 LNK("root", proc_root_link), 2799 LNK("exe", proc_exe_link), 2800 REG("mounts", S_IRUGO, proc_mounts_operations), 2801 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 2802 REG("mountstats", S_IRUSR, proc_mountstats_operations), 2803 #ifdef CONFIG_PROC_PAGE_MONITOR 2804 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2805 REG("smaps", S_IRUGO, proc_smaps_operations), 2806 REG("pagemap", S_IRUGO, proc_pagemap_operations), 2807 #endif 2808 #ifdef CONFIG_SECURITY 2809 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2810 #endif 2811 #ifdef CONFIG_KALLSYMS 2812 INF("wchan", S_IRUGO, proc_pid_wchan), 2813 #endif 2814 #ifdef CONFIG_STACKTRACE 2815 ONE("stack", S_IRUGO, proc_pid_stack), 2816 #endif 2817 #ifdef CONFIG_SCHEDSTATS 2818 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2819 #endif 2820 #ifdef CONFIG_LATENCYTOP 2821 REG("latency", S_IRUGO, proc_lstats_operations), 2822 #endif 2823 #ifdef CONFIG_PROC_PID_CPUSET 2824 REG("cpuset", S_IRUGO, proc_cpuset_operations), 2825 #endif 2826 #ifdef CONFIG_CGROUPS 2827 REG("cgroup", S_IRUGO, proc_cgroup_operations), 2828 #endif 2829 INF("oom_score", S_IRUGO, proc_oom_score), 2830 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), 2831 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 2832 #ifdef CONFIG_AUDITSYSCALL 2833 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 2834 REG("sessionid", S_IRUGO, proc_sessionid_operations), 2835 #endif 2836 #ifdef CONFIG_FAULT_INJECTION 2837 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 2838 #endif 2839 #ifdef CONFIG_ELF_CORE 2840 REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), 2841 #endif 2842 #ifdef CONFIG_TASK_IO_ACCOUNTING 2843 INF("io", S_IRUGO, proc_tgid_io_accounting), 2844 #endif 2845 }; 2846 2847 static int proc_tgid_base_readdir(struct file * filp, 2848 void * dirent, filldir_t filldir) 2849 { 2850 return proc_pident_readdir(filp,dirent,filldir, 2851 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); 2852 } 2853 2854 static const struct file_operations proc_tgid_base_operations = { 2855 .read = generic_read_dir, 2856 .readdir = proc_tgid_base_readdir, 2857 .llseek = default_llseek, 2858 }; 2859 2860 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 2861 return proc_pident_lookup(dir, dentry, 2862 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 2863 } 2864 2865 static const struct inode_operations proc_tgid_base_inode_operations = { 2866 .lookup = proc_tgid_base_lookup, 2867 .getattr = pid_getattr, 2868 .setattr = proc_setattr, 2869 }; 2870 2871 static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) 2872 { 2873 struct dentry *dentry, *leader, *dir; 2874 char buf[PROC_NUMBUF]; 2875 struct qstr name; 2876 2877 name.name = buf; 2878 name.len = snprintf(buf, sizeof(buf), "%d", pid); 2879 dentry = d_hash_and_lookup(mnt->mnt_root, &name); 2880 if (dentry) { 2881 shrink_dcache_parent(dentry); 2882 d_drop(dentry); 2883 dput(dentry); 2884 } 2885 2886 name.name = buf; 2887 name.len = snprintf(buf, sizeof(buf), "%d", tgid); 2888 leader = d_hash_and_lookup(mnt->mnt_root, &name); 2889 if (!leader) 2890 goto out; 2891 2892 name.name = "task"; 2893 name.len = strlen(name.name); 2894 dir = d_hash_and_lookup(leader, &name); 2895 if (!dir) 2896 goto out_put_leader; 2897 2898 name.name = buf; 2899 name.len = snprintf(buf, sizeof(buf), "%d", pid); 2900 dentry = d_hash_and_lookup(dir, &name); 2901 if (dentry) { 2902 shrink_dcache_parent(dentry); 2903 d_drop(dentry); 2904 dput(dentry); 2905 } 2906 2907 dput(dir); 2908 out_put_leader: 2909 dput(leader); 2910 out: 2911 return; 2912 } 2913 2914 /** 2915 * proc_flush_task - Remove dcache entries for @task from the /proc dcache. 2916 * @task: task that should be flushed. 2917 * 2918 * When flushing dentries from proc, one needs to flush them from global 2919 * proc (proc_mnt) and from all the namespaces' procs this task was seen 2920 * in. This call is supposed to do all of this job. 2921 * 2922 * Looks in the dcache for 2923 * /proc/@pid 2924 * /proc/@tgid/task/@pid 2925 * if either directory is present flushes it and all of it'ts children 2926 * from the dcache. 2927 * 2928 * It is safe and reasonable to cache /proc entries for a task until 2929 * that task exits. After that they just clog up the dcache with 2930 * useless entries, possibly causing useful dcache entries to be 2931 * flushed instead. This routine is proved to flush those useless 2932 * dcache entries at process exit time. 2933 * 2934 * NOTE: This routine is just an optimization so it does not guarantee 2935 * that no dcache entries will exist at process exit time it 2936 * just makes it very unlikely that any will persist. 2937 */ 2938 2939 void proc_flush_task(struct task_struct *task) 2940 { 2941 int i; 2942 struct pid *pid, *tgid; 2943 struct upid *upid; 2944 2945 pid = task_pid(task); 2946 tgid = task_tgid(task); 2947 2948 for (i = 0; i <= pid->level; i++) { 2949 upid = &pid->numbers[i]; 2950 proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, 2951 tgid->numbers[i].nr); 2952 } 2953 2954 upid = &pid->numbers[pid->level]; 2955 if (upid->nr == 1) 2956 pid_ns_release_proc(upid->ns); 2957 } 2958 2959 static struct dentry *proc_pid_instantiate(struct inode *dir, 2960 struct dentry * dentry, 2961 struct task_struct *task, const void *ptr) 2962 { 2963 struct dentry *error = ERR_PTR(-ENOENT); 2964 struct inode *inode; 2965 2966 inode = proc_pid_make_inode(dir->i_sb, task); 2967 if (!inode) 2968 goto out; 2969 2970 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2971 inode->i_op = &proc_tgid_base_inode_operations; 2972 inode->i_fop = &proc_tgid_base_operations; 2973 inode->i_flags|=S_IMMUTABLE; 2974 2975 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, 2976 ARRAY_SIZE(tgid_base_stuff)); 2977 2978 d_set_d_op(dentry, &pid_dentry_operations); 2979 2980 d_add(dentry, inode); 2981 /* Close the race of the process dying before we return the dentry */ 2982 if (pid_revalidate(dentry, NULL)) 2983 error = NULL; 2984 out: 2985 return error; 2986 } 2987 2988 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 2989 { 2990 struct dentry *result; 2991 struct task_struct *task; 2992 unsigned tgid; 2993 struct pid_namespace *ns; 2994 2995 result = proc_base_lookup(dir, dentry); 2996 if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) 2997 goto out; 2998 2999 tgid = name_to_int(dentry); 3000 if (tgid == ~0U) 3001 goto out; 3002 3003 ns = dentry->d_sb->s_fs_info; 3004 rcu_read_lock(); 3005 task = find_task_by_pid_ns(tgid, ns); 3006 if (task) 3007 get_task_struct(task); 3008 rcu_read_unlock(); 3009 if (!task) 3010 goto out; 3011 3012 result = proc_pid_instantiate(dir, dentry, task, NULL); 3013 put_task_struct(task); 3014 out: 3015 return result; 3016 } 3017 3018 /* 3019 * Find the first task with tgid >= tgid 3020 * 3021 */ 3022 struct tgid_iter { 3023 unsigned int tgid; 3024 struct task_struct *task; 3025 }; 3026 static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) 3027 { 3028 struct pid *pid; 3029 3030 if (iter.task) 3031 put_task_struct(iter.task); 3032 rcu_read_lock(); 3033 retry: 3034 iter.task = NULL; 3035 pid = find_ge_pid(iter.tgid, ns); 3036 if (pid) { 3037 iter.tgid = pid_nr_ns(pid, ns); 3038 iter.task = pid_task(pid, PIDTYPE_PID); 3039 /* What we to know is if the pid we have find is the 3040 * pid of a thread_group_leader. Testing for task 3041 * being a thread_group_leader is the obvious thing 3042 * todo but there is a window when it fails, due to 3043 * the pid transfer logic in de_thread. 3044 * 3045 * So we perform the straight forward test of seeing 3046 * if the pid we have found is the pid of a thread 3047 * group leader, and don't worry if the task we have 3048 * found doesn't happen to be a thread group leader. 3049 * As we don't care in the case of readdir. 3050 */ 3051 if (!iter.task || !has_group_leader_pid(iter.task)) { 3052 iter.tgid += 1; 3053 goto retry; 3054 } 3055 get_task_struct(iter.task); 3056 } 3057 rcu_read_unlock(); 3058 return iter; 3059 } 3060 3061 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) 3062 3063 static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 3064 struct tgid_iter iter) 3065 { 3066 char name[PROC_NUMBUF]; 3067 int len = snprintf(name, sizeof(name), "%d", iter.tgid); 3068 return proc_fill_cache(filp, dirent, filldir, name, len, 3069 proc_pid_instantiate, iter.task, NULL); 3070 } 3071 3072 /* for the /proc/ directory itself, after non-process stuff has been done */ 3073 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 3074 { 3075 unsigned int nr; 3076 struct task_struct *reaper; 3077 struct tgid_iter iter; 3078 struct pid_namespace *ns; 3079 3080 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) 3081 goto out_no_task; 3082 nr = filp->f_pos - FIRST_PROCESS_ENTRY; 3083 3084 reaper = get_proc_task(filp->f_path.dentry->d_inode); 3085 if (!reaper) 3086 goto out_no_task; 3087 3088 for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { 3089 const struct pid_entry *p = &proc_base_stuff[nr]; 3090 if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) 3091 goto out; 3092 } 3093 3094 ns = filp->f_dentry->d_sb->s_fs_info; 3095 iter.task = NULL; 3096 iter.tgid = filp->f_pos - TGID_OFFSET; 3097 for (iter = next_tgid(ns, iter); 3098 iter.task; 3099 iter.tgid += 1, iter = next_tgid(ns, iter)) { 3100 filp->f_pos = iter.tgid + TGID_OFFSET; 3101 if (proc_pid_fill_cache(filp, dirent, filldir, iter) < 0) { 3102 put_task_struct(iter.task); 3103 goto out; 3104 } 3105 } 3106 filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; 3107 out: 3108 put_task_struct(reaper); 3109 out_no_task: 3110 return 0; 3111 } 3112 3113 /* 3114 * Tasks 3115 */ 3116 static const struct pid_entry tid_base_stuff[] = { 3117 DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), 3118 DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), 3119 DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), 3120 REG("environ", S_IRUSR, proc_environ_operations), 3121 INF("auxv", S_IRUSR, proc_pid_auxv), 3122 ONE("status", S_IRUGO, proc_pid_status), 3123 ONE("personality", S_IRUGO, proc_pid_personality), 3124 INF("limits", S_IRUGO, proc_pid_limits), 3125 #ifdef CONFIG_SCHED_DEBUG 3126 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3127 #endif 3128 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 3129 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3130 INF("syscall", S_IRUGO, proc_pid_syscall), 3131 #endif 3132 INF("cmdline", S_IRUGO, proc_pid_cmdline), 3133 ONE("stat", S_IRUGO, proc_tid_stat), 3134 ONE("statm", S_IRUGO, proc_pid_statm), 3135 REG("maps", S_IRUGO, proc_maps_operations), 3136 #ifdef CONFIG_NUMA 3137 REG("numa_maps", S_IRUGO, proc_numa_maps_operations), 3138 #endif 3139 REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), 3140 LNK("cwd", proc_cwd_link), 3141 LNK("root", proc_root_link), 3142 LNK("exe", proc_exe_link), 3143 REG("mounts", S_IRUGO, proc_mounts_operations), 3144 REG("mountinfo", S_IRUGO, proc_mountinfo_operations), 3145 #ifdef CONFIG_PROC_PAGE_MONITOR 3146 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3147 REG("smaps", S_IRUGO, proc_smaps_operations), 3148 REG("pagemap", S_IRUGO, proc_pagemap_operations), 3149 #endif 3150 #ifdef CONFIG_SECURITY 3151 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3152 #endif 3153 #ifdef CONFIG_KALLSYMS 3154 INF("wchan", S_IRUGO, proc_pid_wchan), 3155 #endif 3156 #ifdef CONFIG_STACKTRACE 3157 ONE("stack", S_IRUGO, proc_pid_stack), 3158 #endif 3159 #ifdef CONFIG_SCHEDSTATS 3160 INF("schedstat", S_IRUGO, proc_pid_schedstat), 3161 #endif 3162 #ifdef CONFIG_LATENCYTOP 3163 REG("latency", S_IRUGO, proc_lstats_operations), 3164 #endif 3165 #ifdef CONFIG_PROC_PID_CPUSET 3166 REG("cpuset", S_IRUGO, proc_cpuset_operations), 3167 #endif 3168 #ifdef CONFIG_CGROUPS 3169 REG("cgroup", S_IRUGO, proc_cgroup_operations), 3170 #endif 3171 INF("oom_score", S_IRUGO, proc_oom_score), 3172 REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adjust_operations), 3173 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3174 #ifdef CONFIG_AUDITSYSCALL 3175 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3176 REG("sessionid", S_IRUGO, proc_sessionid_operations), 3177 #endif 3178 #ifdef CONFIG_FAULT_INJECTION 3179 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3180 #endif 3181 #ifdef CONFIG_TASK_IO_ACCOUNTING 3182 INF("io", S_IRUGO, proc_tid_io_accounting), 3183 #endif 3184 }; 3185 3186 static int proc_tid_base_readdir(struct file * filp, 3187 void * dirent, filldir_t filldir) 3188 { 3189 return proc_pident_readdir(filp,dirent,filldir, 3190 tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); 3191 } 3192 3193 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 3194 return proc_pident_lookup(dir, dentry, 3195 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3196 } 3197 3198 static const struct file_operations proc_tid_base_operations = { 3199 .read = generic_read_dir, 3200 .readdir = proc_tid_base_readdir, 3201 .llseek = default_llseek, 3202 }; 3203 3204 static const struct inode_operations proc_tid_base_inode_operations = { 3205 .lookup = proc_tid_base_lookup, 3206 .getattr = pid_getattr, 3207 .setattr = proc_setattr, 3208 }; 3209 3210 static struct dentry *proc_task_instantiate(struct inode *dir, 3211 struct dentry *dentry, struct task_struct *task, const void *ptr) 3212 { 3213 struct dentry *error = ERR_PTR(-ENOENT); 3214 struct inode *inode; 3215 inode = proc_pid_make_inode(dir->i_sb, task); 3216 3217 if (!inode) 3218 goto out; 3219 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 3220 inode->i_op = &proc_tid_base_inode_operations; 3221 inode->i_fop = &proc_tid_base_operations; 3222 inode->i_flags|=S_IMMUTABLE; 3223 3224 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, 3225 ARRAY_SIZE(tid_base_stuff)); 3226 3227 d_set_d_op(dentry, &pid_dentry_operations); 3228 3229 d_add(dentry, inode); 3230 /* Close the race of the process dying before we return the dentry */ 3231 if (pid_revalidate(dentry, NULL)) 3232 error = NULL; 3233 out: 3234 return error; 3235 } 3236 3237 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 3238 { 3239 struct dentry *result = ERR_PTR(-ENOENT); 3240 struct task_struct *task; 3241 struct task_struct *leader = get_proc_task(dir); 3242 unsigned tid; 3243 struct pid_namespace *ns; 3244 3245 if (!leader) 3246 goto out_no_task; 3247 3248 tid = name_to_int(dentry); 3249 if (tid == ~0U) 3250 goto out; 3251 3252 ns = dentry->d_sb->s_fs_info; 3253 rcu_read_lock(); 3254 task = find_task_by_pid_ns(tid, ns); 3255 if (task) 3256 get_task_struct(task); 3257 rcu_read_unlock(); 3258 if (!task) 3259 goto out; 3260 if (!same_thread_group(leader, task)) 3261 goto out_drop_task; 3262 3263 result = proc_task_instantiate(dir, dentry, task, NULL); 3264 out_drop_task: 3265 put_task_struct(task); 3266 out: 3267 put_task_struct(leader); 3268 out_no_task: 3269 return result; 3270 } 3271 3272 /* 3273 * Find the first tid of a thread group to return to user space. 3274 * 3275 * Usually this is just the thread group leader, but if the users 3276 * buffer was too small or there was a seek into the middle of the 3277 * directory we have more work todo. 3278 * 3279 * In the case of a short read we start with find_task_by_pid. 3280 * 3281 * In the case of a seek we start with the leader and walk nr 3282 * threads past it. 3283 */ 3284 static struct task_struct *first_tid(struct task_struct *leader, 3285 int tid, int nr, struct pid_namespace *ns) 3286 { 3287 struct task_struct *pos; 3288 3289 rcu_read_lock(); 3290 /* Attempt to start with the pid of a thread */ 3291 if (tid && (nr > 0)) { 3292 pos = find_task_by_pid_ns(tid, ns); 3293 if (pos && (pos->group_leader == leader)) 3294 goto found; 3295 } 3296 3297 /* If nr exceeds the number of threads there is nothing todo */ 3298 pos = NULL; 3299 if (nr && nr >= get_nr_threads(leader)) 3300 goto out; 3301 3302 /* If we haven't found our starting place yet start 3303 * with the leader and walk nr threads forward. 3304 */ 3305 for (pos = leader; nr > 0; --nr) { 3306 pos = next_thread(pos); 3307 if (pos == leader) { 3308 pos = NULL; 3309 goto out; 3310 } 3311 } 3312 found: 3313 get_task_struct(pos); 3314 out: 3315 rcu_read_unlock(); 3316 return pos; 3317 } 3318 3319 /* 3320 * Find the next thread in the thread list. 3321 * Return NULL if there is an error or no next thread. 3322 * 3323 * The reference to the input task_struct is released. 3324 */ 3325 static struct task_struct *next_tid(struct task_struct *start) 3326 { 3327 struct task_struct *pos = NULL; 3328 rcu_read_lock(); 3329 if (pid_alive(start)) { 3330 pos = next_thread(start); 3331 if (thread_group_leader(pos)) 3332 pos = NULL; 3333 else 3334 get_task_struct(pos); 3335 } 3336 rcu_read_unlock(); 3337 put_task_struct(start); 3338 return pos; 3339 } 3340 3341 static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 3342 struct task_struct *task, int tid) 3343 { 3344 char name[PROC_NUMBUF]; 3345 int len = snprintf(name, sizeof(name), "%d", tid); 3346 return proc_fill_cache(filp, dirent, filldir, name, len, 3347 proc_task_instantiate, task, NULL); 3348 } 3349 3350 /* for the /proc/TGID/task/ directories */ 3351 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 3352 { 3353 struct dentry *dentry = filp->f_path.dentry; 3354 struct inode *inode = dentry->d_inode; 3355 struct task_struct *leader = NULL; 3356 struct task_struct *task; 3357 int retval = -ENOENT; 3358 ino_t ino; 3359 int tid; 3360 struct pid_namespace *ns; 3361 3362 task = get_proc_task(inode); 3363 if (!task) 3364 goto out_no_task; 3365 rcu_read_lock(); 3366 if (pid_alive(task)) { 3367 leader = task->group_leader; 3368 get_task_struct(leader); 3369 } 3370 rcu_read_unlock(); 3371 put_task_struct(task); 3372 if (!leader) 3373 goto out_no_task; 3374 retval = 0; 3375 3376 switch ((unsigned long)filp->f_pos) { 3377 case 0: 3378 ino = inode->i_ino; 3379 if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0) 3380 goto out; 3381 filp->f_pos++; 3382 /* fall through */ 3383 case 1: 3384 ino = parent_ino(dentry); 3385 if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0) 3386 goto out; 3387 filp->f_pos++; 3388 /* fall through */ 3389 } 3390 3391 /* f_version caches the tgid value that the last readdir call couldn't 3392 * return. lseek aka telldir automagically resets f_version to 0. 3393 */ 3394 ns = filp->f_dentry->d_sb->s_fs_info; 3395 tid = (int)filp->f_version; 3396 filp->f_version = 0; 3397 for (task = first_tid(leader, tid, filp->f_pos - 2, ns); 3398 task; 3399 task = next_tid(task), filp->f_pos++) { 3400 tid = task_pid_nr_ns(task, ns); 3401 if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { 3402 /* returning this tgid failed, save it as the first 3403 * pid for the next readir call */ 3404 filp->f_version = (u64)tid; 3405 put_task_struct(task); 3406 break; 3407 } 3408 } 3409 out: 3410 put_task_struct(leader); 3411 out_no_task: 3412 return retval; 3413 } 3414 3415 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 3416 { 3417 struct inode *inode = dentry->d_inode; 3418 struct task_struct *p = get_proc_task(inode); 3419 generic_fillattr(inode, stat); 3420 3421 if (p) { 3422 stat->nlink += get_nr_threads(p); 3423 put_task_struct(p); 3424 } 3425 3426 return 0; 3427 } 3428 3429 static const struct inode_operations proc_task_inode_operations = { 3430 .lookup = proc_task_lookup, 3431 .getattr = proc_task_getattr, 3432 .setattr = proc_setattr, 3433 }; 3434 3435 static const struct file_operations proc_task_operations = { 3436 .read = generic_read_dir, 3437 .readdir = proc_task_readdir, 3438 .llseek = default_llseek, 3439 }; 3440