1 /* 2 * linux/fs/proc/base.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * 6 * proc base directory handling functions 7 * 8 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. 9 * Instead of using magical inumbers to determine the kind of object 10 * we allocate and fill in-core inodes upon lookup. They don't even 11 * go into icache. We cache the reference to task_struct upon lookup too. 12 * Eventually it should become a filesystem in its own. We don't use the 13 * rest of procfs anymore. 14 * 15 * 16 * Changelog: 17 * 17-Jan-2005 18 * Allan Bezerra 19 * Bruna Moreira <bruna.moreira@indt.org.br> 20 * Edjard Mota <edjard.mota@indt.org.br> 21 * Ilias Biris <ilias.biris@indt.org.br> 22 * Mauricio Lin <mauricio.lin@indt.org.br> 23 * 24 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 25 * 26 * A new process specific entry (smaps) included in /proc. It shows the 27 * size of rss for each memory area. The maps entry lacks information 28 * about physical memory size (rss) for each mapped file, i.e., 29 * rss information for executables and library files. 30 * This additional information is useful for any tools that need to know 31 * about physical memory consumption for a process specific library. 32 * 33 * Changelog: 34 * 21-Feb-2005 35 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT 36 * Pud inclusion in the page table walking. 37 * 38 * ChangeLog: 39 * 10-Mar-2005 40 * 10LE Instituto Nokia de Tecnologia - INdT: 41 * A better way to walks through the page table as suggested by Hugh Dickins. 42 * 43 * Simo Piiroinen <simo.piiroinen@nokia.com>: 44 * Smaps information related to shared, private, clean and dirty pages. 45 * 46 * Paul Mundt <paul.mundt@nokia.com>: 47 * Overall revision about smaps. 48 */ 49 50 #include <asm/uaccess.h> 51 52 #include <linux/config.h> 53 #include <linux/errno.h> 54 #include <linux/time.h> 55 #include <linux/proc_fs.h> 56 #include <linux/stat.h> 57 #include <linux/init.h> 58 #include <linux/file.h> 59 #include <linux/string.h> 60 #include <linux/seq_file.h> 61 #include <linux/namei.h> 62 #include <linux/namespace.h> 63 #include <linux/mm.h> 64 #include <linux/smp_lock.h> 65 #include <linux/rcupdate.h> 66 #include <linux/kallsyms.h> 67 #include <linux/mount.h> 68 #include <linux/security.h> 69 #include <linux/ptrace.h> 70 #include <linux/seccomp.h> 71 #include <linux/cpuset.h> 72 #include <linux/audit.h> 73 #include "internal.h" 74 75 /* 76 * For hysterical raisins we keep the same inumbers as in the old procfs. 77 * Feel free to change the macro below - just keep the range distinct from 78 * inumbers of the rest of procfs (currently those are in 0x0000--0xffff). 79 * As soon as we'll get a separate superblock we will be able to forget 80 * about magical ranges too. 81 */ 82 83 #define fake_ino(pid,ino) (((pid)<<16)|(ino)) 84 85 enum pid_directory_inos { 86 PROC_TGID_INO = 2, 87 PROC_TGID_TASK, 88 PROC_TGID_STATUS, 89 PROC_TGID_MEM, 90 #ifdef CONFIG_SECCOMP 91 PROC_TGID_SECCOMP, 92 #endif 93 PROC_TGID_CWD, 94 PROC_TGID_ROOT, 95 PROC_TGID_EXE, 96 PROC_TGID_FD, 97 PROC_TGID_ENVIRON, 98 PROC_TGID_AUXV, 99 PROC_TGID_CMDLINE, 100 PROC_TGID_STAT, 101 PROC_TGID_STATM, 102 PROC_TGID_MAPS, 103 PROC_TGID_NUMA_MAPS, 104 PROC_TGID_MOUNTS, 105 PROC_TGID_WCHAN, 106 PROC_TGID_SMAPS, 107 #ifdef CONFIG_SCHEDSTATS 108 PROC_TGID_SCHEDSTAT, 109 #endif 110 #ifdef CONFIG_CPUSETS 111 PROC_TGID_CPUSET, 112 #endif 113 #ifdef CONFIG_SECURITY 114 PROC_TGID_ATTR, 115 PROC_TGID_ATTR_CURRENT, 116 PROC_TGID_ATTR_PREV, 117 PROC_TGID_ATTR_EXEC, 118 PROC_TGID_ATTR_FSCREATE, 119 #endif 120 #ifdef CONFIG_AUDITSYSCALL 121 PROC_TGID_LOGINUID, 122 #endif 123 PROC_TGID_OOM_SCORE, 124 PROC_TGID_OOM_ADJUST, 125 PROC_TID_INO, 126 PROC_TID_STATUS, 127 PROC_TID_MEM, 128 #ifdef CONFIG_SECCOMP 129 PROC_TID_SECCOMP, 130 #endif 131 PROC_TID_CWD, 132 PROC_TID_ROOT, 133 PROC_TID_EXE, 134 PROC_TID_FD, 135 PROC_TID_ENVIRON, 136 PROC_TID_AUXV, 137 PROC_TID_CMDLINE, 138 PROC_TID_STAT, 139 PROC_TID_STATM, 140 PROC_TID_MAPS, 141 PROC_TID_NUMA_MAPS, 142 PROC_TID_MOUNTS, 143 PROC_TID_WCHAN, 144 PROC_TID_SMAPS, 145 #ifdef CONFIG_SCHEDSTATS 146 PROC_TID_SCHEDSTAT, 147 #endif 148 #ifdef CONFIG_CPUSETS 149 PROC_TID_CPUSET, 150 #endif 151 #ifdef CONFIG_SECURITY 152 PROC_TID_ATTR, 153 PROC_TID_ATTR_CURRENT, 154 PROC_TID_ATTR_PREV, 155 PROC_TID_ATTR_EXEC, 156 PROC_TID_ATTR_FSCREATE, 157 #endif 158 #ifdef CONFIG_AUDITSYSCALL 159 PROC_TID_LOGINUID, 160 #endif 161 PROC_TID_OOM_SCORE, 162 PROC_TID_OOM_ADJUST, 163 164 /* Add new entries before this */ 165 PROC_TID_FD_DIR = 0x8000, /* 0x8000-0xffff */ 166 }; 167 168 struct pid_entry { 169 int type; 170 int len; 171 char *name; 172 mode_t mode; 173 }; 174 175 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)} 176 177 static struct pid_entry tgid_base_stuff[] = { 178 E(PROC_TGID_TASK, "task", S_IFDIR|S_IRUGO|S_IXUGO), 179 E(PROC_TGID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 180 E(PROC_TGID_ENVIRON, "environ", S_IFREG|S_IRUSR), 181 E(PROC_TGID_AUXV, "auxv", S_IFREG|S_IRUSR), 182 E(PROC_TGID_STATUS, "status", S_IFREG|S_IRUGO), 183 E(PROC_TGID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), 184 E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), 185 E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), 186 E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), 187 #ifdef CONFIG_NUMA 188 E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), 189 #endif 190 E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 191 #ifdef CONFIG_SECCOMP 192 E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 193 #endif 194 E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO), 195 E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO), 196 E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO), 197 E(PROC_TGID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 198 E(PROC_TGID_SMAPS, "smaps", S_IFREG|S_IRUGO), 199 #ifdef CONFIG_SECURITY 200 E(PROC_TGID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 201 #endif 202 #ifdef CONFIG_KALLSYMS 203 E(PROC_TGID_WCHAN, "wchan", S_IFREG|S_IRUGO), 204 #endif 205 #ifdef CONFIG_SCHEDSTATS 206 E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO), 207 #endif 208 #ifdef CONFIG_CPUSETS 209 E(PROC_TGID_CPUSET, "cpuset", S_IFREG|S_IRUGO), 210 #endif 211 E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), 212 E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR), 213 #ifdef CONFIG_AUDITSYSCALL 214 E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), 215 #endif 216 {0,0,NULL,0} 217 }; 218 static struct pid_entry tid_base_stuff[] = { 219 E(PROC_TID_FD, "fd", S_IFDIR|S_IRUSR|S_IXUSR), 220 E(PROC_TID_ENVIRON, "environ", S_IFREG|S_IRUSR), 221 E(PROC_TID_AUXV, "auxv", S_IFREG|S_IRUSR), 222 E(PROC_TID_STATUS, "status", S_IFREG|S_IRUGO), 223 E(PROC_TID_CMDLINE, "cmdline", S_IFREG|S_IRUGO), 224 E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), 225 E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), 226 E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), 227 #ifdef CONFIG_NUMA 228 E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), 229 #endif 230 E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), 231 #ifdef CONFIG_SECCOMP 232 E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), 233 #endif 234 E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO), 235 E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO), 236 E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO), 237 E(PROC_TID_MOUNTS, "mounts", S_IFREG|S_IRUGO), 238 E(PROC_TID_SMAPS, "smaps", S_IFREG|S_IRUGO), 239 #ifdef CONFIG_SECURITY 240 E(PROC_TID_ATTR, "attr", S_IFDIR|S_IRUGO|S_IXUGO), 241 #endif 242 #ifdef CONFIG_KALLSYMS 243 E(PROC_TID_WCHAN, "wchan", S_IFREG|S_IRUGO), 244 #endif 245 #ifdef CONFIG_SCHEDSTATS 246 E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO), 247 #endif 248 #ifdef CONFIG_CPUSETS 249 E(PROC_TID_CPUSET, "cpuset", S_IFREG|S_IRUGO), 250 #endif 251 E(PROC_TID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO), 252 E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR), 253 #ifdef CONFIG_AUDITSYSCALL 254 E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO), 255 #endif 256 {0,0,NULL,0} 257 }; 258 259 #ifdef CONFIG_SECURITY 260 static struct pid_entry tgid_attr_stuff[] = { 261 E(PROC_TGID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), 262 E(PROC_TGID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 263 E(PROC_TGID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 264 E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 265 {0,0,NULL,0} 266 }; 267 static struct pid_entry tid_attr_stuff[] = { 268 E(PROC_TID_ATTR_CURRENT, "current", S_IFREG|S_IRUGO|S_IWUGO), 269 E(PROC_TID_ATTR_PREV, "prev", S_IFREG|S_IRUGO), 270 E(PROC_TID_ATTR_EXEC, "exec", S_IFREG|S_IRUGO|S_IWUGO), 271 E(PROC_TID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO), 272 {0,0,NULL,0} 273 }; 274 #endif 275 276 #undef E 277 278 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 279 { 280 struct task_struct *task = proc_task(inode); 281 struct files_struct *files; 282 struct file *file; 283 int fd = proc_type(inode) - PROC_TID_FD_DIR; 284 285 files = get_files_struct(task); 286 if (files) { 287 rcu_read_lock(); 288 file = fcheck_files(files, fd); 289 if (file) { 290 *mnt = mntget(file->f_vfsmnt); 291 *dentry = dget(file->f_dentry); 292 rcu_read_unlock(); 293 put_files_struct(files); 294 return 0; 295 } 296 rcu_read_unlock(); 297 put_files_struct(files); 298 } 299 return -ENOENT; 300 } 301 302 static struct fs_struct *get_fs_struct(struct task_struct *task) 303 { 304 struct fs_struct *fs; 305 task_lock(task); 306 fs = task->fs; 307 if(fs) 308 atomic_inc(&fs->count); 309 task_unlock(task); 310 return fs; 311 } 312 313 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 314 { 315 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 316 int result = -ENOENT; 317 if (fs) { 318 read_lock(&fs->lock); 319 *mnt = mntget(fs->pwdmnt); 320 *dentry = dget(fs->pwd); 321 read_unlock(&fs->lock); 322 result = 0; 323 put_fs_struct(fs); 324 } 325 return result; 326 } 327 328 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt) 329 { 330 struct fs_struct *fs = get_fs_struct(proc_task(inode)); 331 int result = -ENOENT; 332 if (fs) { 333 read_lock(&fs->lock); 334 *mnt = mntget(fs->rootmnt); 335 *dentry = dget(fs->root); 336 read_unlock(&fs->lock); 337 result = 0; 338 put_fs_struct(fs); 339 } 340 return result; 341 } 342 343 344 /* Same as proc_root_link, but this addionally tries to get fs from other 345 * threads in the group */ 346 static int proc_task_root_link(struct inode *inode, struct dentry **dentry, 347 struct vfsmount **mnt) 348 { 349 struct fs_struct *fs; 350 int result = -ENOENT; 351 struct task_struct *leader = proc_task(inode); 352 353 task_lock(leader); 354 fs = leader->fs; 355 if (fs) { 356 atomic_inc(&fs->count); 357 task_unlock(leader); 358 } else { 359 /* Try to get fs from other threads */ 360 task_unlock(leader); 361 read_lock(&tasklist_lock); 362 if (pid_alive(leader)) { 363 struct task_struct *task = leader; 364 365 while ((task = next_thread(task)) != leader) { 366 task_lock(task); 367 fs = task->fs; 368 if (fs) { 369 atomic_inc(&fs->count); 370 task_unlock(task); 371 break; 372 } 373 task_unlock(task); 374 } 375 } 376 read_unlock(&tasklist_lock); 377 } 378 379 if (fs) { 380 read_lock(&fs->lock); 381 *mnt = mntget(fs->rootmnt); 382 *dentry = dget(fs->root); 383 read_unlock(&fs->lock); 384 result = 0; 385 put_fs_struct(fs); 386 } 387 return result; 388 } 389 390 391 #define MAY_PTRACE(task) \ 392 (task == current || \ 393 (task->parent == current && \ 394 (task->ptrace & PT_PTRACED) && \ 395 (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ 396 security_ptrace(current,task) == 0)) 397 398 static int proc_pid_environ(struct task_struct *task, char * buffer) 399 { 400 int res = 0; 401 struct mm_struct *mm = get_task_mm(task); 402 if (mm) { 403 unsigned int len = mm->env_end - mm->env_start; 404 if (len > PAGE_SIZE) 405 len = PAGE_SIZE; 406 res = access_process_vm(task, mm->env_start, buffer, len, 0); 407 if (!ptrace_may_attach(task)) 408 res = -ESRCH; 409 mmput(mm); 410 } 411 return res; 412 } 413 414 static int proc_pid_cmdline(struct task_struct *task, char * buffer) 415 { 416 int res = 0; 417 unsigned int len; 418 struct mm_struct *mm = get_task_mm(task); 419 if (!mm) 420 goto out; 421 if (!mm->arg_end) 422 goto out_mm; /* Shh! No looking before we're done */ 423 424 len = mm->arg_end - mm->arg_start; 425 426 if (len > PAGE_SIZE) 427 len = PAGE_SIZE; 428 429 res = access_process_vm(task, mm->arg_start, buffer, len, 0); 430 431 // If the nul at the end of args has been overwritten, then 432 // assume application is using setproctitle(3). 433 if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) { 434 len = strnlen(buffer, res); 435 if (len < res) { 436 res = len; 437 } else { 438 len = mm->env_end - mm->env_start; 439 if (len > PAGE_SIZE - res) 440 len = PAGE_SIZE - res; 441 res += access_process_vm(task, mm->env_start, buffer+res, len, 0); 442 res = strnlen(buffer, res); 443 } 444 } 445 out_mm: 446 mmput(mm); 447 out: 448 return res; 449 } 450 451 static int proc_pid_auxv(struct task_struct *task, char *buffer) 452 { 453 int res = 0; 454 struct mm_struct *mm = get_task_mm(task); 455 if (mm) { 456 unsigned int nwords = 0; 457 do 458 nwords += 2; 459 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ 460 res = nwords * sizeof(mm->saved_auxv[0]); 461 if (res > PAGE_SIZE) 462 res = PAGE_SIZE; 463 memcpy(buffer, mm->saved_auxv, res); 464 mmput(mm); 465 } 466 return res; 467 } 468 469 470 #ifdef CONFIG_KALLSYMS 471 /* 472 * Provides a wchan file via kallsyms in a proper one-value-per-file format. 473 * Returns the resolved symbol. If that fails, simply return the address. 474 */ 475 static int proc_pid_wchan(struct task_struct *task, char *buffer) 476 { 477 char *modname; 478 const char *sym_name; 479 unsigned long wchan, size, offset; 480 char namebuf[KSYM_NAME_LEN+1]; 481 482 wchan = get_wchan(task); 483 484 sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf); 485 if (sym_name) 486 return sprintf(buffer, "%s", sym_name); 487 return sprintf(buffer, "%lu", wchan); 488 } 489 #endif /* CONFIG_KALLSYMS */ 490 491 #ifdef CONFIG_SCHEDSTATS 492 /* 493 * Provides /proc/PID/schedstat 494 */ 495 static int proc_pid_schedstat(struct task_struct *task, char *buffer) 496 { 497 return sprintf(buffer, "%lu %lu %lu\n", 498 task->sched_info.cpu_time, 499 task->sched_info.run_delay, 500 task->sched_info.pcnt); 501 } 502 #endif 503 504 /* The badness from the OOM killer */ 505 unsigned long badness(struct task_struct *p, unsigned long uptime); 506 static int proc_oom_score(struct task_struct *task, char *buffer) 507 { 508 unsigned long points; 509 struct timespec uptime; 510 511 do_posix_clock_monotonic_gettime(&uptime); 512 points = badness(task, uptime.tv_sec); 513 return sprintf(buffer, "%lu\n", points); 514 } 515 516 /************************************************************************/ 517 /* Here the fs part begins */ 518 /************************************************************************/ 519 520 /* permission checks */ 521 522 /* If the process being read is separated by chroot from the reading process, 523 * don't let the reader access the threads. 524 */ 525 static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt) 526 { 527 struct dentry *de, *base; 528 struct vfsmount *our_vfsmnt, *mnt; 529 int res = 0; 530 read_lock(¤t->fs->lock); 531 our_vfsmnt = mntget(current->fs->rootmnt); 532 base = dget(current->fs->root); 533 read_unlock(¤t->fs->lock); 534 535 spin_lock(&vfsmount_lock); 536 de = root; 537 mnt = vfsmnt; 538 539 while (vfsmnt != our_vfsmnt) { 540 if (vfsmnt == vfsmnt->mnt_parent) 541 goto out; 542 de = vfsmnt->mnt_mountpoint; 543 vfsmnt = vfsmnt->mnt_parent; 544 } 545 546 if (!is_subdir(de, base)) 547 goto out; 548 spin_unlock(&vfsmount_lock); 549 550 exit: 551 dput(base); 552 mntput(our_vfsmnt); 553 dput(root); 554 mntput(mnt); 555 return res; 556 out: 557 spin_unlock(&vfsmount_lock); 558 res = -EACCES; 559 goto exit; 560 } 561 562 static int proc_check_root(struct inode *inode) 563 { 564 struct dentry *root; 565 struct vfsmount *vfsmnt; 566 567 if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ 568 return -ENOENT; 569 return proc_check_chroot(root, vfsmnt); 570 } 571 572 static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) 573 { 574 if (generic_permission(inode, mask, NULL) != 0) 575 return -EACCES; 576 return proc_check_root(inode); 577 } 578 579 static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd) 580 { 581 struct dentry *root; 582 struct vfsmount *vfsmnt; 583 584 if (generic_permission(inode, mask, NULL) != 0) 585 return -EACCES; 586 587 if (proc_task_root_link(inode, &root, &vfsmnt)) 588 return -ENOENT; 589 590 return proc_check_chroot(root, vfsmnt); 591 } 592 593 extern struct seq_operations proc_pid_maps_op; 594 static int maps_open(struct inode *inode, struct file *file) 595 { 596 struct task_struct *task = proc_task(inode); 597 int ret = seq_open(file, &proc_pid_maps_op); 598 if (!ret) { 599 struct seq_file *m = file->private_data; 600 m->private = task; 601 } 602 return ret; 603 } 604 605 static struct file_operations proc_maps_operations = { 606 .open = maps_open, 607 .read = seq_read, 608 .llseek = seq_lseek, 609 .release = seq_release, 610 }; 611 612 #ifdef CONFIG_NUMA 613 extern struct seq_operations proc_pid_numa_maps_op; 614 static int numa_maps_open(struct inode *inode, struct file *file) 615 { 616 struct task_struct *task = proc_task(inode); 617 int ret = seq_open(file, &proc_pid_numa_maps_op); 618 if (!ret) { 619 struct seq_file *m = file->private_data; 620 m->private = task; 621 } 622 return ret; 623 } 624 625 static struct file_operations proc_numa_maps_operations = { 626 .open = numa_maps_open, 627 .read = seq_read, 628 .llseek = seq_lseek, 629 .release = seq_release, 630 }; 631 #endif 632 633 extern struct seq_operations proc_pid_smaps_op; 634 static int smaps_open(struct inode *inode, struct file *file) 635 { 636 struct task_struct *task = proc_task(inode); 637 int ret = seq_open(file, &proc_pid_smaps_op); 638 if (!ret) { 639 struct seq_file *m = file->private_data; 640 m->private = task; 641 } 642 return ret; 643 } 644 645 static struct file_operations proc_smaps_operations = { 646 .open = smaps_open, 647 .read = seq_read, 648 .llseek = seq_lseek, 649 .release = seq_release, 650 }; 651 652 extern struct seq_operations mounts_op; 653 static int mounts_open(struct inode *inode, struct file *file) 654 { 655 struct task_struct *task = proc_task(inode); 656 int ret = seq_open(file, &mounts_op); 657 658 if (!ret) { 659 struct seq_file *m = file->private_data; 660 struct namespace *namespace; 661 task_lock(task); 662 namespace = task->namespace; 663 if (namespace) 664 get_namespace(namespace); 665 task_unlock(task); 666 667 if (namespace) 668 m->private = namespace; 669 else { 670 seq_release(inode, file); 671 ret = -EINVAL; 672 } 673 } 674 return ret; 675 } 676 677 static int mounts_release(struct inode *inode, struct file *file) 678 { 679 struct seq_file *m = file->private_data; 680 struct namespace *namespace = m->private; 681 put_namespace(namespace); 682 return seq_release(inode, file); 683 } 684 685 static struct file_operations proc_mounts_operations = { 686 .open = mounts_open, 687 .read = seq_read, 688 .llseek = seq_lseek, 689 .release = mounts_release, 690 }; 691 692 #define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 693 694 static ssize_t proc_info_read(struct file * file, char __user * buf, 695 size_t count, loff_t *ppos) 696 { 697 struct inode * inode = file->f_dentry->d_inode; 698 unsigned long page; 699 ssize_t length; 700 struct task_struct *task = proc_task(inode); 701 702 if (count > PROC_BLOCK_SIZE) 703 count = PROC_BLOCK_SIZE; 704 if (!(page = __get_free_page(GFP_KERNEL))) 705 return -ENOMEM; 706 707 length = PROC_I(inode)->op.proc_read(task, (char*)page); 708 709 if (length >= 0) 710 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 711 free_page(page); 712 return length; 713 } 714 715 static struct file_operations proc_info_file_operations = { 716 .read = proc_info_read, 717 }; 718 719 static int mem_open(struct inode* inode, struct file* file) 720 { 721 file->private_data = (void*)((long)current->self_exec_id); 722 return 0; 723 } 724 725 static ssize_t mem_read(struct file * file, char __user * buf, 726 size_t count, loff_t *ppos) 727 { 728 struct task_struct *task = proc_task(file->f_dentry->d_inode); 729 char *page; 730 unsigned long src = *ppos; 731 int ret = -ESRCH; 732 struct mm_struct *mm; 733 734 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 735 goto out; 736 737 ret = -ENOMEM; 738 page = (char *)__get_free_page(GFP_USER); 739 if (!page) 740 goto out; 741 742 ret = 0; 743 744 mm = get_task_mm(task); 745 if (!mm) 746 goto out_free; 747 748 ret = -EIO; 749 750 if (file->private_data != (void*)((long)current->self_exec_id)) 751 goto out_put; 752 753 ret = 0; 754 755 while (count > 0) { 756 int this_len, retval; 757 758 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 759 retval = access_process_vm(task, src, page, this_len, 0); 760 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { 761 if (!ret) 762 ret = -EIO; 763 break; 764 } 765 766 if (copy_to_user(buf, page, retval)) { 767 ret = -EFAULT; 768 break; 769 } 770 771 ret += retval; 772 src += retval; 773 buf += retval; 774 count -= retval; 775 } 776 *ppos = src; 777 778 out_put: 779 mmput(mm); 780 out_free: 781 free_page((unsigned long) page); 782 out: 783 return ret; 784 } 785 786 #define mem_write NULL 787 788 #ifndef mem_write 789 /* This is a security hazard */ 790 static ssize_t mem_write(struct file * file, const char * buf, 791 size_t count, loff_t *ppos) 792 { 793 int copied = 0; 794 char *page; 795 struct task_struct *task = proc_task(file->f_dentry->d_inode); 796 unsigned long dst = *ppos; 797 798 if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) 799 return -ESRCH; 800 801 page = (char *)__get_free_page(GFP_USER); 802 if (!page) 803 return -ENOMEM; 804 805 while (count > 0) { 806 int this_len, retval; 807 808 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 809 if (copy_from_user(page, buf, this_len)) { 810 copied = -EFAULT; 811 break; 812 } 813 retval = access_process_vm(task, dst, page, this_len, 1); 814 if (!retval) { 815 if (!copied) 816 copied = -EIO; 817 break; 818 } 819 copied += retval; 820 buf += retval; 821 dst += retval; 822 count -= retval; 823 } 824 *ppos = dst; 825 free_page((unsigned long) page); 826 return copied; 827 } 828 #endif 829 830 static loff_t mem_lseek(struct file * file, loff_t offset, int orig) 831 { 832 switch (orig) { 833 case 0: 834 file->f_pos = offset; 835 break; 836 case 1: 837 file->f_pos += offset; 838 break; 839 default: 840 return -EINVAL; 841 } 842 force_successful_syscall_return(); 843 return file->f_pos; 844 } 845 846 static struct file_operations proc_mem_operations = { 847 .llseek = mem_lseek, 848 .read = mem_read, 849 .write = mem_write, 850 .open = mem_open, 851 }; 852 853 static ssize_t oom_adjust_read(struct file *file, char __user *buf, 854 size_t count, loff_t *ppos) 855 { 856 struct task_struct *task = proc_task(file->f_dentry->d_inode); 857 char buffer[8]; 858 size_t len; 859 int oom_adjust = task->oomkilladj; 860 loff_t __ppos = *ppos; 861 862 len = sprintf(buffer, "%i\n", oom_adjust); 863 if (__ppos >= len) 864 return 0; 865 if (count > len-__ppos) 866 count = len-__ppos; 867 if (copy_to_user(buf, buffer + __ppos, count)) 868 return -EFAULT; 869 *ppos = __ppos + count; 870 return count; 871 } 872 873 static ssize_t oom_adjust_write(struct file *file, const char __user *buf, 874 size_t count, loff_t *ppos) 875 { 876 struct task_struct *task = proc_task(file->f_dentry->d_inode); 877 char buffer[8], *end; 878 int oom_adjust; 879 880 if (!capable(CAP_SYS_RESOURCE)) 881 return -EPERM; 882 memset(buffer, 0, 8); 883 if (count > 6) 884 count = 6; 885 if (copy_from_user(buffer, buf, count)) 886 return -EFAULT; 887 oom_adjust = simple_strtol(buffer, &end, 0); 888 if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE) 889 return -EINVAL; 890 if (*end == '\n') 891 end++; 892 task->oomkilladj = oom_adjust; 893 if (end - buffer == 0) 894 return -EIO; 895 return end - buffer; 896 } 897 898 static struct file_operations proc_oom_adjust_operations = { 899 .read = oom_adjust_read, 900 .write = oom_adjust_write, 901 }; 902 903 static struct inode_operations proc_mem_inode_operations = { 904 .permission = proc_permission, 905 }; 906 907 #ifdef CONFIG_AUDITSYSCALL 908 #define TMPBUFLEN 21 909 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, 910 size_t count, loff_t *ppos) 911 { 912 struct inode * inode = file->f_dentry->d_inode; 913 struct task_struct *task = proc_task(inode); 914 ssize_t length; 915 char tmpbuf[TMPBUFLEN]; 916 917 length = scnprintf(tmpbuf, TMPBUFLEN, "%u", 918 audit_get_loginuid(task->audit_context)); 919 return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); 920 } 921 922 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, 923 size_t count, loff_t *ppos) 924 { 925 struct inode * inode = file->f_dentry->d_inode; 926 char *page, *tmp; 927 ssize_t length; 928 struct task_struct *task = proc_task(inode); 929 uid_t loginuid; 930 931 if (!capable(CAP_AUDIT_CONTROL)) 932 return -EPERM; 933 934 if (current != task) 935 return -EPERM; 936 937 if (count > PAGE_SIZE) 938 count = PAGE_SIZE; 939 940 if (*ppos != 0) { 941 /* No partial writes. */ 942 return -EINVAL; 943 } 944 page = (char*)__get_free_page(GFP_USER); 945 if (!page) 946 return -ENOMEM; 947 length = -EFAULT; 948 if (copy_from_user(page, buf, count)) 949 goto out_free_page; 950 951 loginuid = simple_strtoul(page, &tmp, 10); 952 if (tmp == page) { 953 length = -EINVAL; 954 goto out_free_page; 955 956 } 957 length = audit_set_loginuid(task, loginuid); 958 if (likely(length == 0)) 959 length = count; 960 961 out_free_page: 962 free_page((unsigned long) page); 963 return length; 964 } 965 966 static struct file_operations proc_loginuid_operations = { 967 .read = proc_loginuid_read, 968 .write = proc_loginuid_write, 969 }; 970 #endif 971 972 #ifdef CONFIG_SECCOMP 973 static ssize_t seccomp_read(struct file *file, char __user *buf, 974 size_t count, loff_t *ppos) 975 { 976 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 977 char __buf[20]; 978 loff_t __ppos = *ppos; 979 size_t len; 980 981 /* no need to print the trailing zero, so use only len */ 982 len = sprintf(__buf, "%u\n", tsk->seccomp.mode); 983 if (__ppos >= len) 984 return 0; 985 if (count > len - __ppos) 986 count = len - __ppos; 987 if (copy_to_user(buf, __buf + __ppos, count)) 988 return -EFAULT; 989 *ppos = __ppos + count; 990 return count; 991 } 992 993 static ssize_t seccomp_write(struct file *file, const char __user *buf, 994 size_t count, loff_t *ppos) 995 { 996 struct task_struct *tsk = proc_task(file->f_dentry->d_inode); 997 char __buf[20], *end; 998 unsigned int seccomp_mode; 999 1000 /* can set it only once to be even more secure */ 1001 if (unlikely(tsk->seccomp.mode)) 1002 return -EPERM; 1003 1004 memset(__buf, 0, sizeof(__buf)); 1005 count = min(count, sizeof(__buf) - 1); 1006 if (copy_from_user(__buf, buf, count)) 1007 return -EFAULT; 1008 seccomp_mode = simple_strtoul(__buf, &end, 0); 1009 if (*end == '\n') 1010 end++; 1011 if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { 1012 tsk->seccomp.mode = seccomp_mode; 1013 set_tsk_thread_flag(tsk, TIF_SECCOMP); 1014 } else 1015 return -EINVAL; 1016 if (unlikely(!(end - __buf))) 1017 return -EIO; 1018 return end - __buf; 1019 } 1020 1021 static struct file_operations proc_seccomp_operations = { 1022 .read = seccomp_read, 1023 .write = seccomp_write, 1024 }; 1025 #endif /* CONFIG_SECCOMP */ 1026 1027 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1028 { 1029 struct inode *inode = dentry->d_inode; 1030 int error = -EACCES; 1031 1032 /* We don't need a base pointer in the /proc filesystem */ 1033 path_release(nd); 1034 1035 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) 1036 goto out; 1037 error = proc_check_root(inode); 1038 if (error) 1039 goto out; 1040 1041 error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt); 1042 nd->last_type = LAST_BIND; 1043 out: 1044 return ERR_PTR(error); 1045 } 1046 1047 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt, 1048 char __user *buffer, int buflen) 1049 { 1050 struct inode * inode; 1051 char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; 1052 int len; 1053 1054 if (!tmp) 1055 return -ENOMEM; 1056 1057 inode = dentry->d_inode; 1058 path = d_path(dentry, mnt, tmp, PAGE_SIZE); 1059 len = PTR_ERR(path); 1060 if (IS_ERR(path)) 1061 goto out; 1062 len = tmp + PAGE_SIZE - 1 - path; 1063 1064 if (len > buflen) 1065 len = buflen; 1066 if (copy_to_user(buffer, path, len)) 1067 len = -EFAULT; 1068 out: 1069 free_page((unsigned long)tmp); 1070 return len; 1071 } 1072 1073 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) 1074 { 1075 int error = -EACCES; 1076 struct inode *inode = dentry->d_inode; 1077 struct dentry *de; 1078 struct vfsmount *mnt = NULL; 1079 1080 lock_kernel(); 1081 1082 if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE)) 1083 goto out; 1084 error = proc_check_root(inode); 1085 if (error) 1086 goto out; 1087 1088 error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt); 1089 if (error) 1090 goto out; 1091 1092 error = do_proc_readlink(de, mnt, buffer, buflen); 1093 dput(de); 1094 mntput(mnt); 1095 out: 1096 unlock_kernel(); 1097 return error; 1098 } 1099 1100 static struct inode_operations proc_pid_link_inode_operations = { 1101 .readlink = proc_pid_readlink, 1102 .follow_link = proc_pid_follow_link 1103 }; 1104 1105 #define NUMBUF 10 1106 1107 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) 1108 { 1109 struct inode *inode = filp->f_dentry->d_inode; 1110 struct task_struct *p = proc_task(inode); 1111 unsigned int fd, tid, ino; 1112 int retval; 1113 char buf[NUMBUF]; 1114 struct files_struct * files; 1115 struct fdtable *fdt; 1116 1117 retval = -ENOENT; 1118 if (!pid_alive(p)) 1119 goto out; 1120 retval = 0; 1121 tid = p->pid; 1122 1123 fd = filp->f_pos; 1124 switch (fd) { 1125 case 0: 1126 if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) 1127 goto out; 1128 filp->f_pos++; 1129 case 1: 1130 ino = fake_ino(tid, PROC_TID_INO); 1131 if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) 1132 goto out; 1133 filp->f_pos++; 1134 default: 1135 files = get_files_struct(p); 1136 if (!files) 1137 goto out; 1138 rcu_read_lock(); 1139 fdt = files_fdtable(files); 1140 for (fd = filp->f_pos-2; 1141 fd < fdt->max_fds; 1142 fd++, filp->f_pos++) { 1143 unsigned int i,j; 1144 1145 if (!fcheck_files(files, fd)) 1146 continue; 1147 rcu_read_unlock(); 1148 1149 j = NUMBUF; 1150 i = fd; 1151 do { 1152 j--; 1153 buf[j] = '0' + (i % 10); 1154 i /= 10; 1155 } while (i); 1156 1157 ino = fake_ino(tid, PROC_TID_FD_DIR + fd); 1158 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) { 1159 rcu_read_lock(); 1160 break; 1161 } 1162 rcu_read_lock(); 1163 } 1164 rcu_read_unlock(); 1165 put_files_struct(files); 1166 } 1167 out: 1168 return retval; 1169 } 1170 1171 static int proc_pident_readdir(struct file *filp, 1172 void *dirent, filldir_t filldir, 1173 struct pid_entry *ents, unsigned int nents) 1174 { 1175 int i; 1176 int pid; 1177 struct dentry *dentry = filp->f_dentry; 1178 struct inode *inode = dentry->d_inode; 1179 struct pid_entry *p; 1180 ino_t ino; 1181 int ret; 1182 1183 ret = -ENOENT; 1184 if (!pid_alive(proc_task(inode))) 1185 goto out; 1186 1187 ret = 0; 1188 pid = proc_task(inode)->pid; 1189 i = filp->f_pos; 1190 switch (i) { 1191 case 0: 1192 ino = inode->i_ino; 1193 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 1194 goto out; 1195 i++; 1196 filp->f_pos++; 1197 /* fall through */ 1198 case 1: 1199 ino = parent_ino(dentry); 1200 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) 1201 goto out; 1202 i++; 1203 filp->f_pos++; 1204 /* fall through */ 1205 default: 1206 i -= 2; 1207 if (i >= nents) { 1208 ret = 1; 1209 goto out; 1210 } 1211 p = ents + i; 1212 while (p->name) { 1213 if (filldir(dirent, p->name, p->len, filp->f_pos, 1214 fake_ino(pid, p->type), p->mode >> 12) < 0) 1215 goto out; 1216 filp->f_pos++; 1217 p++; 1218 } 1219 } 1220 1221 ret = 1; 1222 out: 1223 return ret; 1224 } 1225 1226 static int proc_tgid_base_readdir(struct file * filp, 1227 void * dirent, filldir_t filldir) 1228 { 1229 return proc_pident_readdir(filp,dirent,filldir, 1230 tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); 1231 } 1232 1233 static int proc_tid_base_readdir(struct file * filp, 1234 void * dirent, filldir_t filldir) 1235 { 1236 return proc_pident_readdir(filp,dirent,filldir, 1237 tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); 1238 } 1239 1240 /* building an inode */ 1241 1242 static int task_dumpable(struct task_struct *task) 1243 { 1244 int dumpable = 0; 1245 struct mm_struct *mm; 1246 1247 task_lock(task); 1248 mm = task->mm; 1249 if (mm) 1250 dumpable = mm->dumpable; 1251 task_unlock(task); 1252 if(dumpable == 1) 1253 return 1; 1254 return 0; 1255 } 1256 1257 1258 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino) 1259 { 1260 struct inode * inode; 1261 struct proc_inode *ei; 1262 1263 /* We need a new inode */ 1264 1265 inode = new_inode(sb); 1266 if (!inode) 1267 goto out; 1268 1269 /* Common stuff */ 1270 ei = PROC_I(inode); 1271 ei->task = NULL; 1272 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1273 inode->i_ino = fake_ino(task->pid, ino); 1274 1275 if (!pid_alive(task)) 1276 goto out_unlock; 1277 1278 /* 1279 * grab the reference to task. 1280 */ 1281 get_task_struct(task); 1282 ei->task = task; 1283 ei->type = ino; 1284 inode->i_uid = 0; 1285 inode->i_gid = 0; 1286 if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) { 1287 inode->i_uid = task->euid; 1288 inode->i_gid = task->egid; 1289 } 1290 security_task_to_inode(task, inode); 1291 1292 out: 1293 return inode; 1294 1295 out_unlock: 1296 ei->pde = NULL; 1297 iput(inode); 1298 return NULL; 1299 } 1300 1301 /* dentry stuff */ 1302 1303 /* 1304 * Exceptional case: normally we are not allowed to unhash a busy 1305 * directory. In this case, however, we can do it - no aliasing problems 1306 * due to the way we treat inodes. 1307 * 1308 * Rewrite the inode's ownerships here because the owning task may have 1309 * performed a setuid(), etc. 1310 */ 1311 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1312 { 1313 struct inode *inode = dentry->d_inode; 1314 struct task_struct *task = proc_task(inode); 1315 if (pid_alive(task)) { 1316 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) { 1317 inode->i_uid = task->euid; 1318 inode->i_gid = task->egid; 1319 } else { 1320 inode->i_uid = 0; 1321 inode->i_gid = 0; 1322 } 1323 security_task_to_inode(task, inode); 1324 return 1; 1325 } 1326 d_drop(dentry); 1327 return 0; 1328 } 1329 1330 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1331 { 1332 struct inode *inode = dentry->d_inode; 1333 struct task_struct *task = proc_task(inode); 1334 int fd = proc_type(inode) - PROC_TID_FD_DIR; 1335 struct files_struct *files; 1336 1337 files = get_files_struct(task); 1338 if (files) { 1339 rcu_read_lock(); 1340 if (fcheck_files(files, fd)) { 1341 rcu_read_unlock(); 1342 put_files_struct(files); 1343 if (task_dumpable(task)) { 1344 inode->i_uid = task->euid; 1345 inode->i_gid = task->egid; 1346 } else { 1347 inode->i_uid = 0; 1348 inode->i_gid = 0; 1349 } 1350 security_task_to_inode(task, inode); 1351 return 1; 1352 } 1353 rcu_read_unlock(); 1354 put_files_struct(files); 1355 } 1356 d_drop(dentry); 1357 return 0; 1358 } 1359 1360 static void pid_base_iput(struct dentry *dentry, struct inode *inode) 1361 { 1362 struct task_struct *task = proc_task(inode); 1363 spin_lock(&task->proc_lock); 1364 if (task->proc_dentry == dentry) 1365 task->proc_dentry = NULL; 1366 spin_unlock(&task->proc_lock); 1367 iput(inode); 1368 } 1369 1370 static int pid_delete_dentry(struct dentry * dentry) 1371 { 1372 /* Is the task we represent dead? 1373 * If so, then don't put the dentry on the lru list, 1374 * kill it immediately. 1375 */ 1376 return !pid_alive(proc_task(dentry->d_inode)); 1377 } 1378 1379 static struct dentry_operations tid_fd_dentry_operations = 1380 { 1381 .d_revalidate = tid_fd_revalidate, 1382 .d_delete = pid_delete_dentry, 1383 }; 1384 1385 static struct dentry_operations pid_dentry_operations = 1386 { 1387 .d_revalidate = pid_revalidate, 1388 .d_delete = pid_delete_dentry, 1389 }; 1390 1391 static struct dentry_operations pid_base_dentry_operations = 1392 { 1393 .d_revalidate = pid_revalidate, 1394 .d_iput = pid_base_iput, 1395 .d_delete = pid_delete_dentry, 1396 }; 1397 1398 /* Lookups */ 1399 1400 static unsigned name_to_int(struct dentry *dentry) 1401 { 1402 const char *name = dentry->d_name.name; 1403 int len = dentry->d_name.len; 1404 unsigned n = 0; 1405 1406 if (len > 1 && *name == '0') 1407 goto out; 1408 while (len-- > 0) { 1409 unsigned c = *name++ - '0'; 1410 if (c > 9) 1411 goto out; 1412 if (n >= (~0U-9)/10) 1413 goto out; 1414 n *= 10; 1415 n += c; 1416 } 1417 return n; 1418 out: 1419 return ~0U; 1420 } 1421 1422 /* SMP-safe */ 1423 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 1424 { 1425 struct task_struct *task = proc_task(dir); 1426 unsigned fd = name_to_int(dentry); 1427 struct file * file; 1428 struct files_struct * files; 1429 struct inode *inode; 1430 struct proc_inode *ei; 1431 1432 if (fd == ~0U) 1433 goto out; 1434 if (!pid_alive(task)) 1435 goto out; 1436 1437 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd); 1438 if (!inode) 1439 goto out; 1440 ei = PROC_I(inode); 1441 files = get_files_struct(task); 1442 if (!files) 1443 goto out_unlock; 1444 inode->i_mode = S_IFLNK; 1445 rcu_read_lock(); 1446 file = fcheck_files(files, fd); 1447 if (!file) 1448 goto out_unlock2; 1449 if (file->f_mode & 1) 1450 inode->i_mode |= S_IRUSR | S_IXUSR; 1451 if (file->f_mode & 2) 1452 inode->i_mode |= S_IWUSR | S_IXUSR; 1453 rcu_read_unlock(); 1454 put_files_struct(files); 1455 inode->i_op = &proc_pid_link_inode_operations; 1456 inode->i_size = 64; 1457 ei->op.proc_get_link = proc_fd_link; 1458 dentry->d_op = &tid_fd_dentry_operations; 1459 d_add(dentry, inode); 1460 return NULL; 1461 1462 out_unlock2: 1463 rcu_read_unlock(); 1464 put_files_struct(files); 1465 out_unlock: 1466 iput(inode); 1467 out: 1468 return ERR_PTR(-ENOENT); 1469 } 1470 1471 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir); 1472 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd); 1473 1474 static struct file_operations proc_fd_operations = { 1475 .read = generic_read_dir, 1476 .readdir = proc_readfd, 1477 }; 1478 1479 static struct file_operations proc_task_operations = { 1480 .read = generic_read_dir, 1481 .readdir = proc_task_readdir, 1482 }; 1483 1484 /* 1485 * proc directories can do almost nothing.. 1486 */ 1487 static struct inode_operations proc_fd_inode_operations = { 1488 .lookup = proc_lookupfd, 1489 .permission = proc_permission, 1490 }; 1491 1492 static struct inode_operations proc_task_inode_operations = { 1493 .lookup = proc_task_lookup, 1494 .permission = proc_task_permission, 1495 }; 1496 1497 #ifdef CONFIG_SECURITY 1498 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, 1499 size_t count, loff_t *ppos) 1500 { 1501 struct inode * inode = file->f_dentry->d_inode; 1502 unsigned long page; 1503 ssize_t length; 1504 struct task_struct *task = proc_task(inode); 1505 1506 if (count > PAGE_SIZE) 1507 count = PAGE_SIZE; 1508 if (!(page = __get_free_page(GFP_KERNEL))) 1509 return -ENOMEM; 1510 1511 length = security_getprocattr(task, 1512 (char*)file->f_dentry->d_name.name, 1513 (void*)page, count); 1514 if (length >= 0) 1515 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length); 1516 free_page(page); 1517 return length; 1518 } 1519 1520 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, 1521 size_t count, loff_t *ppos) 1522 { 1523 struct inode * inode = file->f_dentry->d_inode; 1524 char *page; 1525 ssize_t length; 1526 struct task_struct *task = proc_task(inode); 1527 1528 if (count > PAGE_SIZE) 1529 count = PAGE_SIZE; 1530 if (*ppos != 0) { 1531 /* No partial writes. */ 1532 return -EINVAL; 1533 } 1534 page = (char*)__get_free_page(GFP_USER); 1535 if (!page) 1536 return -ENOMEM; 1537 length = -EFAULT; 1538 if (copy_from_user(page, buf, count)) 1539 goto out; 1540 1541 length = security_setprocattr(task, 1542 (char*)file->f_dentry->d_name.name, 1543 (void*)page, count); 1544 out: 1545 free_page((unsigned long) page); 1546 return length; 1547 } 1548 1549 static struct file_operations proc_pid_attr_operations = { 1550 .read = proc_pid_attr_read, 1551 .write = proc_pid_attr_write, 1552 }; 1553 1554 static struct file_operations proc_tid_attr_operations; 1555 static struct inode_operations proc_tid_attr_inode_operations; 1556 static struct file_operations proc_tgid_attr_operations; 1557 static struct inode_operations proc_tgid_attr_inode_operations; 1558 #endif 1559 1560 static int get_tid_list(int index, unsigned int *tids, struct inode *dir); 1561 1562 /* SMP-safe */ 1563 static struct dentry *proc_pident_lookup(struct inode *dir, 1564 struct dentry *dentry, 1565 struct pid_entry *ents) 1566 { 1567 struct inode *inode; 1568 int error; 1569 struct task_struct *task = proc_task(dir); 1570 struct pid_entry *p; 1571 struct proc_inode *ei; 1572 1573 error = -ENOENT; 1574 inode = NULL; 1575 1576 if (!pid_alive(task)) 1577 goto out; 1578 1579 for (p = ents; p->name; p++) { 1580 if (p->len != dentry->d_name.len) 1581 continue; 1582 if (!memcmp(dentry->d_name.name, p->name, p->len)) 1583 break; 1584 } 1585 if (!p->name) 1586 goto out; 1587 1588 error = -EINVAL; 1589 inode = proc_pid_make_inode(dir->i_sb, task, p->type); 1590 if (!inode) 1591 goto out; 1592 1593 ei = PROC_I(inode); 1594 inode->i_mode = p->mode; 1595 /* 1596 * Yes, it does not scale. And it should not. Don't add 1597 * new entries into /proc/<tgid>/ without very good reasons. 1598 */ 1599 switch(p->type) { 1600 case PROC_TGID_TASK: 1601 inode->i_nlink = 2 + get_tid_list(2, NULL, dir); 1602 inode->i_op = &proc_task_inode_operations; 1603 inode->i_fop = &proc_task_operations; 1604 break; 1605 case PROC_TID_FD: 1606 case PROC_TGID_FD: 1607 inode->i_nlink = 2; 1608 inode->i_op = &proc_fd_inode_operations; 1609 inode->i_fop = &proc_fd_operations; 1610 break; 1611 case PROC_TID_EXE: 1612 case PROC_TGID_EXE: 1613 inode->i_op = &proc_pid_link_inode_operations; 1614 ei->op.proc_get_link = proc_exe_link; 1615 break; 1616 case PROC_TID_CWD: 1617 case PROC_TGID_CWD: 1618 inode->i_op = &proc_pid_link_inode_operations; 1619 ei->op.proc_get_link = proc_cwd_link; 1620 break; 1621 case PROC_TID_ROOT: 1622 case PROC_TGID_ROOT: 1623 inode->i_op = &proc_pid_link_inode_operations; 1624 ei->op.proc_get_link = proc_root_link; 1625 break; 1626 case PROC_TID_ENVIRON: 1627 case PROC_TGID_ENVIRON: 1628 inode->i_fop = &proc_info_file_operations; 1629 ei->op.proc_read = proc_pid_environ; 1630 break; 1631 case PROC_TID_AUXV: 1632 case PROC_TGID_AUXV: 1633 inode->i_fop = &proc_info_file_operations; 1634 ei->op.proc_read = proc_pid_auxv; 1635 break; 1636 case PROC_TID_STATUS: 1637 case PROC_TGID_STATUS: 1638 inode->i_fop = &proc_info_file_operations; 1639 ei->op.proc_read = proc_pid_status; 1640 break; 1641 case PROC_TID_STAT: 1642 inode->i_fop = &proc_info_file_operations; 1643 ei->op.proc_read = proc_tid_stat; 1644 break; 1645 case PROC_TGID_STAT: 1646 inode->i_fop = &proc_info_file_operations; 1647 ei->op.proc_read = proc_tgid_stat; 1648 break; 1649 case PROC_TID_CMDLINE: 1650 case PROC_TGID_CMDLINE: 1651 inode->i_fop = &proc_info_file_operations; 1652 ei->op.proc_read = proc_pid_cmdline; 1653 break; 1654 case PROC_TID_STATM: 1655 case PROC_TGID_STATM: 1656 inode->i_fop = &proc_info_file_operations; 1657 ei->op.proc_read = proc_pid_statm; 1658 break; 1659 case PROC_TID_MAPS: 1660 case PROC_TGID_MAPS: 1661 inode->i_fop = &proc_maps_operations; 1662 break; 1663 #ifdef CONFIG_NUMA 1664 case PROC_TID_NUMA_MAPS: 1665 case PROC_TGID_NUMA_MAPS: 1666 inode->i_fop = &proc_numa_maps_operations; 1667 break; 1668 #endif 1669 case PROC_TID_MEM: 1670 case PROC_TGID_MEM: 1671 inode->i_op = &proc_mem_inode_operations; 1672 inode->i_fop = &proc_mem_operations; 1673 break; 1674 #ifdef CONFIG_SECCOMP 1675 case PROC_TID_SECCOMP: 1676 case PROC_TGID_SECCOMP: 1677 inode->i_fop = &proc_seccomp_operations; 1678 break; 1679 #endif /* CONFIG_SECCOMP */ 1680 case PROC_TID_MOUNTS: 1681 case PROC_TGID_MOUNTS: 1682 inode->i_fop = &proc_mounts_operations; 1683 break; 1684 case PROC_TID_SMAPS: 1685 case PROC_TGID_SMAPS: 1686 inode->i_fop = &proc_smaps_operations; 1687 break; 1688 #ifdef CONFIG_SECURITY 1689 case PROC_TID_ATTR: 1690 inode->i_nlink = 2; 1691 inode->i_op = &proc_tid_attr_inode_operations; 1692 inode->i_fop = &proc_tid_attr_operations; 1693 break; 1694 case PROC_TGID_ATTR: 1695 inode->i_nlink = 2; 1696 inode->i_op = &proc_tgid_attr_inode_operations; 1697 inode->i_fop = &proc_tgid_attr_operations; 1698 break; 1699 case PROC_TID_ATTR_CURRENT: 1700 case PROC_TGID_ATTR_CURRENT: 1701 case PROC_TID_ATTR_PREV: 1702 case PROC_TGID_ATTR_PREV: 1703 case PROC_TID_ATTR_EXEC: 1704 case PROC_TGID_ATTR_EXEC: 1705 case PROC_TID_ATTR_FSCREATE: 1706 case PROC_TGID_ATTR_FSCREATE: 1707 inode->i_fop = &proc_pid_attr_operations; 1708 break; 1709 #endif 1710 #ifdef CONFIG_KALLSYMS 1711 case PROC_TID_WCHAN: 1712 case PROC_TGID_WCHAN: 1713 inode->i_fop = &proc_info_file_operations; 1714 ei->op.proc_read = proc_pid_wchan; 1715 break; 1716 #endif 1717 #ifdef CONFIG_SCHEDSTATS 1718 case PROC_TID_SCHEDSTAT: 1719 case PROC_TGID_SCHEDSTAT: 1720 inode->i_fop = &proc_info_file_operations; 1721 ei->op.proc_read = proc_pid_schedstat; 1722 break; 1723 #endif 1724 #ifdef CONFIG_CPUSETS 1725 case PROC_TID_CPUSET: 1726 case PROC_TGID_CPUSET: 1727 inode->i_fop = &proc_cpuset_operations; 1728 break; 1729 #endif 1730 case PROC_TID_OOM_SCORE: 1731 case PROC_TGID_OOM_SCORE: 1732 inode->i_fop = &proc_info_file_operations; 1733 ei->op.proc_read = proc_oom_score; 1734 break; 1735 case PROC_TID_OOM_ADJUST: 1736 case PROC_TGID_OOM_ADJUST: 1737 inode->i_fop = &proc_oom_adjust_operations; 1738 break; 1739 #ifdef CONFIG_AUDITSYSCALL 1740 case PROC_TID_LOGINUID: 1741 case PROC_TGID_LOGINUID: 1742 inode->i_fop = &proc_loginuid_operations; 1743 break; 1744 #endif 1745 default: 1746 printk("procfs: impossible type (%d)",p->type); 1747 iput(inode); 1748 return ERR_PTR(-EINVAL); 1749 } 1750 dentry->d_op = &pid_dentry_operations; 1751 d_add(dentry, inode); 1752 return NULL; 1753 1754 out: 1755 return ERR_PTR(error); 1756 } 1757 1758 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1759 return proc_pident_lookup(dir, dentry, tgid_base_stuff); 1760 } 1761 1762 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 1763 return proc_pident_lookup(dir, dentry, tid_base_stuff); 1764 } 1765 1766 static struct file_operations proc_tgid_base_operations = { 1767 .read = generic_read_dir, 1768 .readdir = proc_tgid_base_readdir, 1769 }; 1770 1771 static struct file_operations proc_tid_base_operations = { 1772 .read = generic_read_dir, 1773 .readdir = proc_tid_base_readdir, 1774 }; 1775 1776 static struct inode_operations proc_tgid_base_inode_operations = { 1777 .lookup = proc_tgid_base_lookup, 1778 }; 1779 1780 static struct inode_operations proc_tid_base_inode_operations = { 1781 .lookup = proc_tid_base_lookup, 1782 }; 1783 1784 #ifdef CONFIG_SECURITY 1785 static int proc_tgid_attr_readdir(struct file * filp, 1786 void * dirent, filldir_t filldir) 1787 { 1788 return proc_pident_readdir(filp,dirent,filldir, 1789 tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff)); 1790 } 1791 1792 static int proc_tid_attr_readdir(struct file * filp, 1793 void * dirent, filldir_t filldir) 1794 { 1795 return proc_pident_readdir(filp,dirent,filldir, 1796 tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff)); 1797 } 1798 1799 static struct file_operations proc_tgid_attr_operations = { 1800 .read = generic_read_dir, 1801 .readdir = proc_tgid_attr_readdir, 1802 }; 1803 1804 static struct file_operations proc_tid_attr_operations = { 1805 .read = generic_read_dir, 1806 .readdir = proc_tid_attr_readdir, 1807 }; 1808 1809 static struct dentry *proc_tgid_attr_lookup(struct inode *dir, 1810 struct dentry *dentry, struct nameidata *nd) 1811 { 1812 return proc_pident_lookup(dir, dentry, tgid_attr_stuff); 1813 } 1814 1815 static struct dentry *proc_tid_attr_lookup(struct inode *dir, 1816 struct dentry *dentry, struct nameidata *nd) 1817 { 1818 return proc_pident_lookup(dir, dentry, tid_attr_stuff); 1819 } 1820 1821 static struct inode_operations proc_tgid_attr_inode_operations = { 1822 .lookup = proc_tgid_attr_lookup, 1823 }; 1824 1825 static struct inode_operations proc_tid_attr_inode_operations = { 1826 .lookup = proc_tid_attr_lookup, 1827 }; 1828 #endif 1829 1830 /* 1831 * /proc/self: 1832 */ 1833 static int proc_self_readlink(struct dentry *dentry, char __user *buffer, 1834 int buflen) 1835 { 1836 char tmp[30]; 1837 sprintf(tmp, "%d", current->tgid); 1838 return vfs_readlink(dentry,buffer,buflen,tmp); 1839 } 1840 1841 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 1842 { 1843 char tmp[30]; 1844 sprintf(tmp, "%d", current->tgid); 1845 return ERR_PTR(vfs_follow_link(nd,tmp)); 1846 } 1847 1848 static struct inode_operations proc_self_inode_operations = { 1849 .readlink = proc_self_readlink, 1850 .follow_link = proc_self_follow_link, 1851 }; 1852 1853 /** 1854 * proc_pid_unhash - Unhash /proc/@pid entry from the dcache. 1855 * @p: task that should be flushed. 1856 * 1857 * Drops the /proc/@pid dcache entry from the hash chains. 1858 * 1859 * Dropping /proc/@pid entries and detach_pid must be synchroneous, 1860 * otherwise e.g. /proc/@pid/exe might point to the wrong executable, 1861 * if the pid value is immediately reused. This is enforced by 1862 * - caller must acquire spin_lock(p->proc_lock) 1863 * - must be called before detach_pid() 1864 * - proc_pid_lookup acquires proc_lock, and checks that 1865 * the target is not dead by looking at the attach count 1866 * of PIDTYPE_PID. 1867 */ 1868 1869 struct dentry *proc_pid_unhash(struct task_struct *p) 1870 { 1871 struct dentry *proc_dentry; 1872 1873 proc_dentry = p->proc_dentry; 1874 if (proc_dentry != NULL) { 1875 1876 spin_lock(&dcache_lock); 1877 spin_lock(&proc_dentry->d_lock); 1878 if (!d_unhashed(proc_dentry)) { 1879 dget_locked(proc_dentry); 1880 __d_drop(proc_dentry); 1881 spin_unlock(&proc_dentry->d_lock); 1882 } else { 1883 spin_unlock(&proc_dentry->d_lock); 1884 proc_dentry = NULL; 1885 } 1886 spin_unlock(&dcache_lock); 1887 } 1888 return proc_dentry; 1889 } 1890 1891 /** 1892 * proc_pid_flush - recover memory used by stale /proc/@pid/x entries 1893 * @proc_dentry: directoy to prune. 1894 * 1895 * Shrink the /proc directory that was used by the just killed thread. 1896 */ 1897 1898 void proc_pid_flush(struct dentry *proc_dentry) 1899 { 1900 might_sleep(); 1901 if(proc_dentry != NULL) { 1902 shrink_dcache_parent(proc_dentry); 1903 dput(proc_dentry); 1904 } 1905 } 1906 1907 /* SMP-safe */ 1908 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1909 { 1910 struct task_struct *task; 1911 struct inode *inode; 1912 struct proc_inode *ei; 1913 unsigned tgid; 1914 int died; 1915 1916 if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) { 1917 inode = new_inode(dir->i_sb); 1918 if (!inode) 1919 return ERR_PTR(-ENOMEM); 1920 ei = PROC_I(inode); 1921 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 1922 inode->i_ino = fake_ino(0, PROC_TGID_INO); 1923 ei->pde = NULL; 1924 inode->i_mode = S_IFLNK|S_IRWXUGO; 1925 inode->i_uid = inode->i_gid = 0; 1926 inode->i_size = 64; 1927 inode->i_op = &proc_self_inode_operations; 1928 d_add(dentry, inode); 1929 return NULL; 1930 } 1931 tgid = name_to_int(dentry); 1932 if (tgid == ~0U) 1933 goto out; 1934 1935 read_lock(&tasklist_lock); 1936 task = find_task_by_pid(tgid); 1937 if (task) 1938 get_task_struct(task); 1939 read_unlock(&tasklist_lock); 1940 if (!task) 1941 goto out; 1942 1943 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO); 1944 1945 1946 if (!inode) { 1947 put_task_struct(task); 1948 goto out; 1949 } 1950 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 1951 inode->i_op = &proc_tgid_base_inode_operations; 1952 inode->i_fop = &proc_tgid_base_operations; 1953 inode->i_flags|=S_IMMUTABLE; 1954 #ifdef CONFIG_SECURITY 1955 inode->i_nlink = 5; 1956 #else 1957 inode->i_nlink = 4; 1958 #endif 1959 1960 dentry->d_op = &pid_base_dentry_operations; 1961 1962 died = 0; 1963 d_add(dentry, inode); 1964 spin_lock(&task->proc_lock); 1965 task->proc_dentry = dentry; 1966 if (!pid_alive(task)) { 1967 dentry = proc_pid_unhash(task); 1968 died = 1; 1969 } 1970 spin_unlock(&task->proc_lock); 1971 1972 put_task_struct(task); 1973 if (died) { 1974 proc_pid_flush(dentry); 1975 goto out; 1976 } 1977 return NULL; 1978 out: 1979 return ERR_PTR(-ENOENT); 1980 } 1981 1982 /* SMP-safe */ 1983 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1984 { 1985 struct task_struct *task; 1986 struct task_struct *leader = proc_task(dir); 1987 struct inode *inode; 1988 unsigned tid; 1989 1990 tid = name_to_int(dentry); 1991 if (tid == ~0U) 1992 goto out; 1993 1994 read_lock(&tasklist_lock); 1995 task = find_task_by_pid(tid); 1996 if (task) 1997 get_task_struct(task); 1998 read_unlock(&tasklist_lock); 1999 if (!task) 2000 goto out; 2001 if (leader->tgid != task->tgid) 2002 goto out_drop_task; 2003 2004 inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO); 2005 2006 2007 if (!inode) 2008 goto out_drop_task; 2009 inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO; 2010 inode->i_op = &proc_tid_base_inode_operations; 2011 inode->i_fop = &proc_tid_base_operations; 2012 inode->i_flags|=S_IMMUTABLE; 2013 #ifdef CONFIG_SECURITY 2014 inode->i_nlink = 4; 2015 #else 2016 inode->i_nlink = 3; 2017 #endif 2018 2019 dentry->d_op = &pid_base_dentry_operations; 2020 2021 d_add(dentry, inode); 2022 2023 put_task_struct(task); 2024 return NULL; 2025 out_drop_task: 2026 put_task_struct(task); 2027 out: 2028 return ERR_PTR(-ENOENT); 2029 } 2030 2031 #define PROC_NUMBUF 10 2032 #define PROC_MAXPIDS 20 2033 2034 /* 2035 * Get a few tgid's to return for filldir - we need to hold the 2036 * tasklist lock while doing this, and we must release it before 2037 * we actually do the filldir itself, so we use a temp buffer.. 2038 */ 2039 static int get_tgid_list(int index, unsigned long version, unsigned int *tgids) 2040 { 2041 struct task_struct *p; 2042 int nr_tgids = 0; 2043 2044 index--; 2045 read_lock(&tasklist_lock); 2046 p = NULL; 2047 if (version) { 2048 p = find_task_by_pid(version); 2049 if (p && !thread_group_leader(p)) 2050 p = NULL; 2051 } 2052 2053 if (p) 2054 index = 0; 2055 else 2056 p = next_task(&init_task); 2057 2058 for ( ; p != &init_task; p = next_task(p)) { 2059 int tgid = p->pid; 2060 if (!pid_alive(p)) 2061 continue; 2062 if (--index >= 0) 2063 continue; 2064 tgids[nr_tgids] = tgid; 2065 nr_tgids++; 2066 if (nr_tgids >= PROC_MAXPIDS) 2067 break; 2068 } 2069 read_unlock(&tasklist_lock); 2070 return nr_tgids; 2071 } 2072 2073 /* 2074 * Get a few tid's to return for filldir - we need to hold the 2075 * tasklist lock while doing this, and we must release it before 2076 * we actually do the filldir itself, so we use a temp buffer.. 2077 */ 2078 static int get_tid_list(int index, unsigned int *tids, struct inode *dir) 2079 { 2080 struct task_struct *leader_task = proc_task(dir); 2081 struct task_struct *task = leader_task; 2082 int nr_tids = 0; 2083 2084 index -= 2; 2085 read_lock(&tasklist_lock); 2086 /* 2087 * The starting point task (leader_task) might be an already 2088 * unlinked task, which cannot be used to access the task-list 2089 * via next_thread(). 2090 */ 2091 if (pid_alive(task)) do { 2092 int tid = task->pid; 2093 2094 if (--index >= 0) 2095 continue; 2096 if (tids != NULL) 2097 tids[nr_tids] = tid; 2098 nr_tids++; 2099 if (nr_tids >= PROC_MAXPIDS) 2100 break; 2101 } while ((task = next_thread(task)) != leader_task); 2102 read_unlock(&tasklist_lock); 2103 return nr_tids; 2104 } 2105 2106 /* for the /proc/ directory itself, after non-process stuff has been done */ 2107 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) 2108 { 2109 unsigned int tgid_array[PROC_MAXPIDS]; 2110 char buf[PROC_NUMBUF]; 2111 unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY; 2112 unsigned int nr_tgids, i; 2113 int next_tgid; 2114 2115 if (!nr) { 2116 ino_t ino = fake_ino(0,PROC_TGID_INO); 2117 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0) 2118 return 0; 2119 filp->f_pos++; 2120 nr++; 2121 } 2122 2123 /* f_version caches the tgid value that the last readdir call couldn't 2124 * return. lseek aka telldir automagically resets f_version to 0. 2125 */ 2126 next_tgid = filp->f_version; 2127 filp->f_version = 0; 2128 for (;;) { 2129 nr_tgids = get_tgid_list(nr, next_tgid, tgid_array); 2130 if (!nr_tgids) { 2131 /* no more entries ! */ 2132 break; 2133 } 2134 next_tgid = 0; 2135 2136 /* do not use the last found pid, reserve it for next_tgid */ 2137 if (nr_tgids == PROC_MAXPIDS) { 2138 nr_tgids--; 2139 next_tgid = tgid_array[nr_tgids]; 2140 } 2141 2142 for (i=0;i<nr_tgids;i++) { 2143 int tgid = tgid_array[i]; 2144 ino_t ino = fake_ino(tgid,PROC_TGID_INO); 2145 unsigned long j = PROC_NUMBUF; 2146 2147 do 2148 buf[--j] = '0' + (tgid % 10); 2149 while ((tgid /= 10) != 0); 2150 2151 if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) { 2152 /* returning this tgid failed, save it as the first 2153 * pid for the next readir call */ 2154 filp->f_version = tgid_array[i]; 2155 goto out; 2156 } 2157 filp->f_pos++; 2158 nr++; 2159 } 2160 } 2161 out: 2162 return 0; 2163 } 2164 2165 /* for the /proc/TGID/task/ directories */ 2166 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) 2167 { 2168 unsigned int tid_array[PROC_MAXPIDS]; 2169 char buf[PROC_NUMBUF]; 2170 unsigned int nr_tids, i; 2171 struct dentry *dentry = filp->f_dentry; 2172 struct inode *inode = dentry->d_inode; 2173 int retval = -ENOENT; 2174 ino_t ino; 2175 unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ 2176 2177 if (!pid_alive(proc_task(inode))) 2178 goto out; 2179 retval = 0; 2180 2181 switch (pos) { 2182 case 0: 2183 ino = inode->i_ino; 2184 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0) 2185 goto out; 2186 pos++; 2187 /* fall through */ 2188 case 1: 2189 ino = parent_ino(dentry); 2190 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0) 2191 goto out; 2192 pos++; 2193 /* fall through */ 2194 } 2195 2196 nr_tids = get_tid_list(pos, tid_array, inode); 2197 inode->i_nlink = pos + nr_tids; 2198 2199 for (i = 0; i < nr_tids; i++) { 2200 unsigned long j = PROC_NUMBUF; 2201 int tid = tid_array[i]; 2202 2203 ino = fake_ino(tid,PROC_TID_INO); 2204 2205 do 2206 buf[--j] = '0' + (tid % 10); 2207 while ((tid /= 10) != 0); 2208 2209 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0) 2210 break; 2211 pos++; 2212 } 2213 out: 2214 filp->f_pos = pos; 2215 return retval; 2216 } 2217