1 /* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11 #include <linux/errno.h> 12 #include <linux/time.h> 13 #include <linux/proc_fs.h> 14 #include <linux/stat.h> 15 #include <linux/module.h> 16 #include <linux/mount.h> 17 #include <linux/smp_lock.h> 18 #include <linux/init.h> 19 #include <linux/idr.h> 20 #include <linux/namei.h> 21 #include <linux/bitops.h> 22 #include <linux/spinlock.h> 23 #include <linux/completion.h> 24 #include <asm/uaccess.h> 25 26 #include "internal.h" 27 28 static ssize_t proc_file_read(struct file *file, char __user *buf, 29 size_t nbytes, loff_t *ppos); 30 static ssize_t proc_file_write(struct file *file, const char __user *buffer, 31 size_t count, loff_t *ppos); 32 static loff_t proc_file_lseek(struct file *, loff_t, int); 33 34 DEFINE_SPINLOCK(proc_subdir_lock); 35 36 static int proc_match(int len, const char *name, struct proc_dir_entry *de) 37 { 38 if (de->namelen != len) 39 return 0; 40 return !memcmp(name, de->name, len); 41 } 42 43 static const struct file_operations proc_file_operations = { 44 .llseek = proc_file_lseek, 45 .read = proc_file_read, 46 .write = proc_file_write, 47 }; 48 49 /* buffer size is one page but our output routines use some slack for overruns */ 50 #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) 51 52 static ssize_t 53 proc_file_read(struct file *file, char __user *buf, size_t nbytes, 54 loff_t *ppos) 55 { 56 struct inode * inode = file->f_path.dentry->d_inode; 57 char *page; 58 ssize_t retval=0; 59 int eof=0; 60 ssize_t n, count; 61 char *start; 62 struct proc_dir_entry * dp; 63 unsigned long long pos; 64 65 /* 66 * Gaah, please just use "seq_file" instead. The legacy /proc 67 * interfaces cut loff_t down to off_t for reads, and ignore 68 * the offset entirely for writes.. 69 */ 70 pos = *ppos; 71 if (pos > MAX_NON_LFS) 72 return 0; 73 if (nbytes > MAX_NON_LFS - pos) 74 nbytes = MAX_NON_LFS - pos; 75 76 dp = PDE(inode); 77 if (!(page = (char*) __get_free_page(GFP_KERNEL))) 78 return -ENOMEM; 79 80 while ((nbytes > 0) && !eof) { 81 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 82 83 start = NULL; 84 if (dp->get_info) { 85 /* Handle old net routines */ 86 n = dp->get_info(page, &start, *ppos, count); 87 if (n < count) 88 eof = 1; 89 } else if (dp->read_proc) { 90 /* 91 * How to be a proc read function 92 * ------------------------------ 93 * Prototype: 94 * int f(char *buffer, char **start, off_t offset, 95 * int count, int *peof, void *dat) 96 * 97 * Assume that the buffer is "count" bytes in size. 98 * 99 * If you know you have supplied all the data you 100 * have, set *peof. 101 * 102 * You have three ways to return data: 103 * 0) Leave *start = NULL. (This is the default.) 104 * Put the data of the requested offset at that 105 * offset within the buffer. Return the number (n) 106 * of bytes there are from the beginning of the 107 * buffer up to the last byte of data. If the 108 * number of supplied bytes (= n - offset) is 109 * greater than zero and you didn't signal eof 110 * and the reader is prepared to take more data 111 * you will be called again with the requested 112 * offset advanced by the number of bytes 113 * absorbed. This interface is useful for files 114 * no larger than the buffer. 115 * 1) Set *start = an unsigned long value less than 116 * the buffer address but greater than zero. 117 * Put the data of the requested offset at the 118 * beginning of the buffer. Return the number of 119 * bytes of data placed there. If this number is 120 * greater than zero and you didn't signal eof 121 * and the reader is prepared to take more data 122 * you will be called again with the requested 123 * offset advanced by *start. This interface is 124 * useful when you have a large file consisting 125 * of a series of blocks which you want to count 126 * and return as wholes. 127 * (Hack by Paul.Russell@rustcorp.com.au) 128 * 2) Set *start = an address within the buffer. 129 * Put the data of the requested offset at *start. 130 * Return the number of bytes of data placed there. 131 * If this number is greater than zero and you 132 * didn't signal eof and the reader is prepared to 133 * take more data you will be called again with the 134 * requested offset advanced by the number of bytes 135 * absorbed. 136 */ 137 n = dp->read_proc(page, &start, *ppos, 138 count, &eof, dp->data); 139 } else 140 break; 141 142 if (n == 0) /* end of file */ 143 break; 144 if (n < 0) { /* error */ 145 if (retval == 0) 146 retval = n; 147 break; 148 } 149 150 if (start == NULL) { 151 if (n > PAGE_SIZE) { 152 printk(KERN_ERR 153 "proc_file_read: Apparent buffer overflow!\n"); 154 n = PAGE_SIZE; 155 } 156 n -= *ppos; 157 if (n <= 0) 158 break; 159 if (n > count) 160 n = count; 161 start = page + *ppos; 162 } else if (start < page) { 163 if (n > PAGE_SIZE) { 164 printk(KERN_ERR 165 "proc_file_read: Apparent buffer overflow!\n"); 166 n = PAGE_SIZE; 167 } 168 if (n > count) { 169 /* 170 * Don't reduce n because doing so might 171 * cut off part of a data block. 172 */ 173 printk(KERN_WARNING 174 "proc_file_read: Read count exceeded\n"); 175 } 176 } else /* start >= page */ { 177 unsigned long startoff = (unsigned long)(start - page); 178 if (n > (PAGE_SIZE - startoff)) { 179 printk(KERN_ERR 180 "proc_file_read: Apparent buffer overflow!\n"); 181 n = PAGE_SIZE - startoff; 182 } 183 if (n > count) 184 n = count; 185 } 186 187 n -= copy_to_user(buf, start < page ? page : start, n); 188 if (n == 0) { 189 if (retval == 0) 190 retval = -EFAULT; 191 break; 192 } 193 194 *ppos += start < page ? (unsigned long)start : n; 195 nbytes -= n; 196 buf += n; 197 retval += n; 198 } 199 free_page((unsigned long) page); 200 return retval; 201 } 202 203 static ssize_t 204 proc_file_write(struct file *file, const char __user *buffer, 205 size_t count, loff_t *ppos) 206 { 207 struct inode *inode = file->f_path.dentry->d_inode; 208 struct proc_dir_entry * dp; 209 210 dp = PDE(inode); 211 212 if (!dp->write_proc) 213 return -EIO; 214 215 /* FIXME: does this routine need ppos? probably... */ 216 return dp->write_proc(file, buffer, count, dp->data); 217 } 218 219 220 static loff_t 221 proc_file_lseek(struct file *file, loff_t offset, int orig) 222 { 223 loff_t retval = -EINVAL; 224 switch (orig) { 225 case 1: 226 offset += file->f_pos; 227 /* fallthrough */ 228 case 0: 229 if (offset < 0 || offset > MAX_NON_LFS) 230 break; 231 file->f_pos = retval = offset; 232 } 233 return retval; 234 } 235 236 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 237 { 238 struct inode *inode = dentry->d_inode; 239 struct proc_dir_entry *de = PDE(inode); 240 int error; 241 242 error = inode_change_ok(inode, iattr); 243 if (error) 244 goto out; 245 246 error = inode_setattr(inode, iattr); 247 if (error) 248 goto out; 249 250 de->uid = inode->i_uid; 251 de->gid = inode->i_gid; 252 de->mode = inode->i_mode; 253 out: 254 return error; 255 } 256 257 static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, 258 struct kstat *stat) 259 { 260 struct inode *inode = dentry->d_inode; 261 struct proc_dir_entry *de = PROC_I(inode)->pde; 262 if (de && de->nlink) 263 inode->i_nlink = de->nlink; 264 265 generic_fillattr(inode, stat); 266 return 0; 267 } 268 269 static const struct inode_operations proc_file_inode_operations = { 270 .setattr = proc_notify_change, 271 }; 272 273 /* 274 * This function parses a name such as "tty/driver/serial", and 275 * returns the struct proc_dir_entry for "/proc/tty/driver", and 276 * returns "serial" in residual. 277 */ 278 static int xlate_proc_name(const char *name, 279 struct proc_dir_entry **ret, const char **residual) 280 { 281 const char *cp = name, *next; 282 struct proc_dir_entry *de; 283 int len; 284 int rtn = 0; 285 286 spin_lock(&proc_subdir_lock); 287 de = &proc_root; 288 while (1) { 289 next = strchr(cp, '/'); 290 if (!next) 291 break; 292 293 len = next - cp; 294 for (de = de->subdir; de ; de = de->next) { 295 if (proc_match(len, cp, de)) 296 break; 297 } 298 if (!de) { 299 rtn = -ENOENT; 300 goto out; 301 } 302 cp += len + 1; 303 } 304 *residual = cp; 305 *ret = de; 306 out: 307 spin_unlock(&proc_subdir_lock); 308 return rtn; 309 } 310 311 static DEFINE_IDR(proc_inum_idr); 312 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 313 314 #define PROC_DYNAMIC_FIRST 0xF0000000UL 315 316 /* 317 * Return an inode number between PROC_DYNAMIC_FIRST and 318 * 0xffffffff, or zero on failure. 319 */ 320 static unsigned int get_inode_number(void) 321 { 322 int i, inum = 0; 323 int error; 324 325 retry: 326 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 327 return 0; 328 329 spin_lock(&proc_inum_lock); 330 error = idr_get_new(&proc_inum_idr, NULL, &i); 331 spin_unlock(&proc_inum_lock); 332 if (error == -EAGAIN) 333 goto retry; 334 else if (error) 335 return 0; 336 337 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 338 339 /* inum will never be more than 0xf0ffffff, so no check 340 * for overflow. 341 */ 342 343 return inum; 344 } 345 346 static void release_inode_number(unsigned int inum) 347 { 348 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; 349 350 spin_lock(&proc_inum_lock); 351 idr_remove(&proc_inum_idr, id); 352 spin_unlock(&proc_inum_lock); 353 } 354 355 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) 356 { 357 nd_set_link(nd, PDE(dentry->d_inode)->data); 358 return NULL; 359 } 360 361 static const struct inode_operations proc_link_inode_operations = { 362 .readlink = generic_readlink, 363 .follow_link = proc_follow_link, 364 }; 365 366 /* 367 * As some entries in /proc are volatile, we want to 368 * get rid of unused dentries. This could be made 369 * smarter: we could keep a "volatile" flag in the 370 * inode to indicate which ones to keep. 371 */ 372 static int proc_delete_dentry(struct dentry * dentry) 373 { 374 return 1; 375 } 376 377 static struct dentry_operations proc_dentry_operations = 378 { 379 .d_delete = proc_delete_dentry, 380 }; 381 382 /* 383 * Don't create negative dentries here, return -ENOENT by hand 384 * instead. 385 */ 386 struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 387 { 388 struct inode *inode = NULL; 389 struct proc_dir_entry * de; 390 int error = -ENOENT; 391 392 lock_kernel(); 393 spin_lock(&proc_subdir_lock); 394 de = PDE(dir); 395 if (de) { 396 for (de = de->subdir; de ; de = de->next) { 397 if (de->namelen != dentry->d_name.len) 398 continue; 399 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 400 unsigned int ino = de->low_ino; 401 402 de_get(de); 403 spin_unlock(&proc_subdir_lock); 404 error = -EINVAL; 405 inode = proc_get_inode(dir->i_sb, ino, de); 406 spin_lock(&proc_subdir_lock); 407 break; 408 } 409 } 410 } 411 spin_unlock(&proc_subdir_lock); 412 unlock_kernel(); 413 414 if (inode) { 415 dentry->d_op = &proc_dentry_operations; 416 d_add(dentry, inode); 417 return NULL; 418 } 419 de_put(de); 420 return ERR_PTR(error); 421 } 422 423 /* 424 * This returns non-zero if at EOF, so that the /proc 425 * root directory can use this and check if it should 426 * continue with the <pid> entries.. 427 * 428 * Note that the VFS-layer doesn't care about the return 429 * value of the readdir() call, as long as it's non-negative 430 * for success.. 431 */ 432 int proc_readdir(struct file * filp, 433 void * dirent, filldir_t filldir) 434 { 435 struct proc_dir_entry * de; 436 unsigned int ino; 437 int i; 438 struct inode *inode = filp->f_path.dentry->d_inode; 439 int ret = 0; 440 441 lock_kernel(); 442 443 ino = inode->i_ino; 444 de = PDE(inode); 445 if (!de) { 446 ret = -EINVAL; 447 goto out; 448 } 449 i = filp->f_pos; 450 switch (i) { 451 case 0: 452 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 453 goto out; 454 i++; 455 filp->f_pos++; 456 /* fall through */ 457 case 1: 458 if (filldir(dirent, "..", 2, i, 459 parent_ino(filp->f_path.dentry), 460 DT_DIR) < 0) 461 goto out; 462 i++; 463 filp->f_pos++; 464 /* fall through */ 465 default: 466 spin_lock(&proc_subdir_lock); 467 de = de->subdir; 468 i -= 2; 469 for (;;) { 470 if (!de) { 471 ret = 1; 472 spin_unlock(&proc_subdir_lock); 473 goto out; 474 } 475 if (!i) 476 break; 477 de = de->next; 478 i--; 479 } 480 481 do { 482 struct proc_dir_entry *next; 483 484 /* filldir passes info to user space */ 485 de_get(de); 486 spin_unlock(&proc_subdir_lock); 487 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 488 de->low_ino, de->mode >> 12) < 0) { 489 de_put(de); 490 goto out; 491 } 492 spin_lock(&proc_subdir_lock); 493 filp->f_pos++; 494 next = de->next; 495 de_put(de); 496 de = next; 497 } while (de); 498 spin_unlock(&proc_subdir_lock); 499 } 500 ret = 1; 501 out: unlock_kernel(); 502 return ret; 503 } 504 505 /* 506 * These are the generic /proc directory operations. They 507 * use the in-memory "struct proc_dir_entry" tree to parse 508 * the /proc directory. 509 */ 510 static const struct file_operations proc_dir_operations = { 511 .read = generic_read_dir, 512 .readdir = proc_readdir, 513 }; 514 515 /* 516 * proc directories can do almost nothing.. 517 */ 518 static const struct inode_operations proc_dir_inode_operations = { 519 .lookup = proc_lookup, 520 .getattr = proc_getattr, 521 .setattr = proc_notify_change, 522 }; 523 524 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 525 { 526 unsigned int i; 527 528 i = get_inode_number(); 529 if (i == 0) 530 return -EAGAIN; 531 dp->low_ino = i; 532 533 if (S_ISDIR(dp->mode)) { 534 if (dp->proc_iops == NULL) { 535 dp->proc_fops = &proc_dir_operations; 536 dp->proc_iops = &proc_dir_inode_operations; 537 } 538 dir->nlink++; 539 } else if (S_ISLNK(dp->mode)) { 540 if (dp->proc_iops == NULL) 541 dp->proc_iops = &proc_link_inode_operations; 542 } else if (S_ISREG(dp->mode)) { 543 if (dp->proc_fops == NULL) 544 dp->proc_fops = &proc_file_operations; 545 if (dp->proc_iops == NULL) 546 dp->proc_iops = &proc_file_inode_operations; 547 } 548 549 spin_lock(&proc_subdir_lock); 550 dp->next = dir->subdir; 551 dp->parent = dir; 552 dir->subdir = dp; 553 spin_unlock(&proc_subdir_lock); 554 555 return 0; 556 } 557 558 /* 559 * Kill an inode that got unregistered.. 560 */ 561 static void proc_kill_inodes(struct proc_dir_entry *de) 562 { 563 struct list_head *p; 564 struct super_block *sb = proc_mnt->mnt_sb; 565 566 /* 567 * Actually it's a partial revoke(). 568 */ 569 file_list_lock(); 570 list_for_each(p, &sb->s_files) { 571 struct file * filp = list_entry(p, struct file, f_u.fu_list); 572 struct dentry * dentry = filp->f_path.dentry; 573 struct inode * inode; 574 const struct file_operations *fops; 575 576 if (dentry->d_op != &proc_dentry_operations) 577 continue; 578 inode = dentry->d_inode; 579 if (PDE(inode) != de) 580 continue; 581 fops = filp->f_op; 582 filp->f_op = NULL; 583 fops_put(fops); 584 } 585 file_list_unlock(); 586 } 587 588 static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, 589 const char *name, 590 mode_t mode, 591 nlink_t nlink) 592 { 593 struct proc_dir_entry *ent = NULL; 594 const char *fn = name; 595 int len; 596 597 /* make sure name is valid */ 598 if (!name || !strlen(name)) goto out; 599 600 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 601 goto out; 602 603 /* At this point there must not be any '/' characters beyond *fn */ 604 if (strchr(fn, '/')) 605 goto out; 606 607 len = strlen(fn); 608 609 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); 610 if (!ent) goto out; 611 612 memset(ent, 0, sizeof(struct proc_dir_entry)); 613 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 614 ent->name = ((char *) ent) + sizeof(*ent); 615 ent->namelen = len; 616 ent->mode = mode; 617 ent->nlink = nlink; 618 ent->pde_users = 0; 619 spin_lock_init(&ent->pde_unload_lock); 620 ent->pde_unload_completion = NULL; 621 out: 622 return ent; 623 } 624 625 struct proc_dir_entry *proc_symlink(const char *name, 626 struct proc_dir_entry *parent, const char *dest) 627 { 628 struct proc_dir_entry *ent; 629 630 ent = proc_create(&parent,name, 631 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 632 633 if (ent) { 634 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 635 if (ent->data) { 636 strcpy((char*)ent->data,dest); 637 if (proc_register(parent, ent) < 0) { 638 kfree(ent->data); 639 kfree(ent); 640 ent = NULL; 641 } 642 } else { 643 kfree(ent); 644 ent = NULL; 645 } 646 } 647 return ent; 648 } 649 650 struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 651 struct proc_dir_entry *parent) 652 { 653 struct proc_dir_entry *ent; 654 655 ent = proc_create(&parent, name, S_IFDIR | mode, 2); 656 if (ent) { 657 if (proc_register(parent, ent) < 0) { 658 kfree(ent); 659 ent = NULL; 660 } 661 } 662 return ent; 663 } 664 665 struct proc_dir_entry *proc_mkdir(const char *name, 666 struct proc_dir_entry *parent) 667 { 668 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 669 } 670 671 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 672 struct proc_dir_entry *parent) 673 { 674 struct proc_dir_entry *ent; 675 nlink_t nlink; 676 677 if (S_ISDIR(mode)) { 678 if ((mode & S_IALLUGO) == 0) 679 mode |= S_IRUGO | S_IXUGO; 680 nlink = 2; 681 } else { 682 if ((mode & S_IFMT) == 0) 683 mode |= S_IFREG; 684 if ((mode & S_IALLUGO) == 0) 685 mode |= S_IRUGO; 686 nlink = 1; 687 } 688 689 ent = proc_create(&parent,name,mode,nlink); 690 if (ent) { 691 if (proc_register(parent, ent) < 0) { 692 kfree(ent); 693 ent = NULL; 694 } 695 } 696 return ent; 697 } 698 699 void free_proc_entry(struct proc_dir_entry *de) 700 { 701 unsigned int ino = de->low_ino; 702 703 if (ino < PROC_DYNAMIC_FIRST) 704 return; 705 706 release_inode_number(ino); 707 708 if (S_ISLNK(de->mode) && de->data) 709 kfree(de->data); 710 kfree(de); 711 } 712 713 /* 714 * Remove a /proc entry and free it if it's not currently in use. 715 * If it is in use, we set the 'deleted' flag. 716 */ 717 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 718 { 719 struct proc_dir_entry **p; 720 struct proc_dir_entry *de; 721 const char *fn = name; 722 int len; 723 724 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 725 goto out; 726 len = strlen(fn); 727 728 spin_lock(&proc_subdir_lock); 729 for (p = &parent->subdir; *p; p=&(*p)->next ) { 730 if (!proc_match(len, fn, *p)) 731 continue; 732 de = *p; 733 *p = de->next; 734 de->next = NULL; 735 736 spin_lock(&de->pde_unload_lock); 737 /* 738 * Stop accepting new callers into module. If you're 739 * dynamically allocating ->proc_fops, save a pointer somewhere. 740 */ 741 de->proc_fops = NULL; 742 /* Wait until all existing callers into module are done. */ 743 if (de->pde_users > 0) { 744 DECLARE_COMPLETION_ONSTACK(c); 745 746 if (!de->pde_unload_completion) 747 de->pde_unload_completion = &c; 748 749 spin_unlock(&de->pde_unload_lock); 750 spin_unlock(&proc_subdir_lock); 751 752 wait_for_completion(de->pde_unload_completion); 753 754 spin_lock(&proc_subdir_lock); 755 goto continue_removing; 756 } 757 spin_unlock(&de->pde_unload_lock); 758 759 continue_removing: 760 if (S_ISDIR(de->mode)) 761 parent->nlink--; 762 if (!S_ISREG(de->mode)) 763 proc_kill_inodes(de); 764 de->nlink = 0; 765 WARN_ON(de->subdir); 766 if (!atomic_read(&de->count)) 767 free_proc_entry(de); 768 else { 769 de->deleted = 1; 770 printk("remove_proc_entry: %s/%s busy, count=%d\n", 771 parent->name, de->name, atomic_read(&de->count)); 772 } 773 break; 774 } 775 spin_unlock(&proc_subdir_lock); 776 out: 777 return; 778 } 779