1 /* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11 #include <linux/errno.h> 12 #include <linux/time.h> 13 #include <linux/proc_fs.h> 14 #include <linux/stat.h> 15 #include <linux/module.h> 16 #include <linux/mount.h> 17 #include <linux/smp_lock.h> 18 #include <linux/init.h> 19 #include <linux/idr.h> 20 #include <linux/namei.h> 21 #include <linux/bitops.h> 22 #include <linux/spinlock.h> 23 #include <linux/completion.h> 24 #include <asm/uaccess.h> 25 26 #include "internal.h" 27 28 DEFINE_SPINLOCK(proc_subdir_lock); 29 30 static int proc_match(int len, const char *name, struct proc_dir_entry *de) 31 { 32 if (de->namelen != len) 33 return 0; 34 return !memcmp(name, de->name, len); 35 } 36 37 /* buffer size is one page but our output routines use some slack for overruns */ 38 #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) 39 40 static ssize_t 41 proc_file_read(struct file *file, char __user *buf, size_t nbytes, 42 loff_t *ppos) 43 { 44 struct inode * inode = file->f_path.dentry->d_inode; 45 char *page; 46 ssize_t retval=0; 47 int eof=0; 48 ssize_t n, count; 49 char *start; 50 struct proc_dir_entry * dp; 51 unsigned long long pos; 52 53 /* 54 * Gaah, please just use "seq_file" instead. The legacy /proc 55 * interfaces cut loff_t down to off_t for reads, and ignore 56 * the offset entirely for writes.. 57 */ 58 pos = *ppos; 59 if (pos > MAX_NON_LFS) 60 return 0; 61 if (nbytes > MAX_NON_LFS - pos) 62 nbytes = MAX_NON_LFS - pos; 63 64 dp = PDE(inode); 65 if (!(page = (char*) __get_free_page(GFP_TEMPORARY))) 66 return -ENOMEM; 67 68 while ((nbytes > 0) && !eof) { 69 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 70 71 start = NULL; 72 if (dp->get_info) { 73 /* Handle old net routines */ 74 n = dp->get_info(page, &start, *ppos, count); 75 if (n < count) 76 eof = 1; 77 } else if (dp->read_proc) { 78 /* 79 * How to be a proc read function 80 * ------------------------------ 81 * Prototype: 82 * int f(char *buffer, char **start, off_t offset, 83 * int count, int *peof, void *dat) 84 * 85 * Assume that the buffer is "count" bytes in size. 86 * 87 * If you know you have supplied all the data you 88 * have, set *peof. 89 * 90 * You have three ways to return data: 91 * 0) Leave *start = NULL. (This is the default.) 92 * Put the data of the requested offset at that 93 * offset within the buffer. Return the number (n) 94 * of bytes there are from the beginning of the 95 * buffer up to the last byte of data. If the 96 * number of supplied bytes (= n - offset) is 97 * greater than zero and you didn't signal eof 98 * and the reader is prepared to take more data 99 * you will be called again with the requested 100 * offset advanced by the number of bytes 101 * absorbed. This interface is useful for files 102 * no larger than the buffer. 103 * 1) Set *start = an unsigned long value less than 104 * the buffer address but greater than zero. 105 * Put the data of the requested offset at the 106 * beginning of the buffer. Return the number of 107 * bytes of data placed there. If this number is 108 * greater than zero and you didn't signal eof 109 * and the reader is prepared to take more data 110 * you will be called again with the requested 111 * offset advanced by *start. This interface is 112 * useful when you have a large file consisting 113 * of a series of blocks which you want to count 114 * and return as wholes. 115 * (Hack by Paul.Russell@rustcorp.com.au) 116 * 2) Set *start = an address within the buffer. 117 * Put the data of the requested offset at *start. 118 * Return the number of bytes of data placed there. 119 * If this number is greater than zero and you 120 * didn't signal eof and the reader is prepared to 121 * take more data you will be called again with the 122 * requested offset advanced by the number of bytes 123 * absorbed. 124 */ 125 n = dp->read_proc(page, &start, *ppos, 126 count, &eof, dp->data); 127 } else 128 break; 129 130 if (n == 0) /* end of file */ 131 break; 132 if (n < 0) { /* error */ 133 if (retval == 0) 134 retval = n; 135 break; 136 } 137 138 if (start == NULL) { 139 if (n > PAGE_SIZE) { 140 printk(KERN_ERR 141 "proc_file_read: Apparent buffer overflow!\n"); 142 n = PAGE_SIZE; 143 } 144 n -= *ppos; 145 if (n <= 0) 146 break; 147 if (n > count) 148 n = count; 149 start = page + *ppos; 150 } else if (start < page) { 151 if (n > PAGE_SIZE) { 152 printk(KERN_ERR 153 "proc_file_read: Apparent buffer overflow!\n"); 154 n = PAGE_SIZE; 155 } 156 if (n > count) { 157 /* 158 * Don't reduce n because doing so might 159 * cut off part of a data block. 160 */ 161 printk(KERN_WARNING 162 "proc_file_read: Read count exceeded\n"); 163 } 164 } else /* start >= page */ { 165 unsigned long startoff = (unsigned long)(start - page); 166 if (n > (PAGE_SIZE - startoff)) { 167 printk(KERN_ERR 168 "proc_file_read: Apparent buffer overflow!\n"); 169 n = PAGE_SIZE - startoff; 170 } 171 if (n > count) 172 n = count; 173 } 174 175 n -= copy_to_user(buf, start < page ? page : start, n); 176 if (n == 0) { 177 if (retval == 0) 178 retval = -EFAULT; 179 break; 180 } 181 182 *ppos += start < page ? (unsigned long)start : n; 183 nbytes -= n; 184 buf += n; 185 retval += n; 186 } 187 free_page((unsigned long) page); 188 return retval; 189 } 190 191 static ssize_t 192 proc_file_write(struct file *file, const char __user *buffer, 193 size_t count, loff_t *ppos) 194 { 195 struct inode *inode = file->f_path.dentry->d_inode; 196 struct proc_dir_entry * dp; 197 198 dp = PDE(inode); 199 200 if (!dp->write_proc) 201 return -EIO; 202 203 /* FIXME: does this routine need ppos? probably... */ 204 return dp->write_proc(file, buffer, count, dp->data); 205 } 206 207 208 static loff_t 209 proc_file_lseek(struct file *file, loff_t offset, int orig) 210 { 211 loff_t retval = -EINVAL; 212 switch (orig) { 213 case 1: 214 offset += file->f_pos; 215 /* fallthrough */ 216 case 0: 217 if (offset < 0 || offset > MAX_NON_LFS) 218 break; 219 file->f_pos = retval = offset; 220 } 221 return retval; 222 } 223 224 static const struct file_operations proc_file_operations = { 225 .llseek = proc_file_lseek, 226 .read = proc_file_read, 227 .write = proc_file_write, 228 }; 229 230 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 231 { 232 struct inode *inode = dentry->d_inode; 233 struct proc_dir_entry *de = PDE(inode); 234 int error; 235 236 error = inode_change_ok(inode, iattr); 237 if (error) 238 goto out; 239 240 error = inode_setattr(inode, iattr); 241 if (error) 242 goto out; 243 244 de->uid = inode->i_uid; 245 de->gid = inode->i_gid; 246 de->mode = inode->i_mode; 247 out: 248 return error; 249 } 250 251 static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, 252 struct kstat *stat) 253 { 254 struct inode *inode = dentry->d_inode; 255 struct proc_dir_entry *de = PROC_I(inode)->pde; 256 if (de && de->nlink) 257 inode->i_nlink = de->nlink; 258 259 generic_fillattr(inode, stat); 260 return 0; 261 } 262 263 static const struct inode_operations proc_file_inode_operations = { 264 .setattr = proc_notify_change, 265 }; 266 267 /* 268 * This function parses a name such as "tty/driver/serial", and 269 * returns the struct proc_dir_entry for "/proc/tty/driver", and 270 * returns "serial" in residual. 271 */ 272 static int xlate_proc_name(const char *name, 273 struct proc_dir_entry **ret, const char **residual) 274 { 275 const char *cp = name, *next; 276 struct proc_dir_entry *de; 277 int len; 278 int rtn = 0; 279 280 spin_lock(&proc_subdir_lock); 281 de = &proc_root; 282 while (1) { 283 next = strchr(cp, '/'); 284 if (!next) 285 break; 286 287 len = next - cp; 288 for (de = de->subdir; de ; de = de->next) { 289 if (proc_match(len, cp, de)) 290 break; 291 } 292 if (!de) { 293 rtn = -ENOENT; 294 goto out; 295 } 296 cp += len + 1; 297 } 298 *residual = cp; 299 *ret = de; 300 out: 301 spin_unlock(&proc_subdir_lock); 302 return rtn; 303 } 304 305 static DEFINE_IDR(proc_inum_idr); 306 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 307 308 #define PROC_DYNAMIC_FIRST 0xF0000000UL 309 310 /* 311 * Return an inode number between PROC_DYNAMIC_FIRST and 312 * 0xffffffff, or zero on failure. 313 */ 314 static unsigned int get_inode_number(void) 315 { 316 int i, inum = 0; 317 int error; 318 319 retry: 320 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 321 return 0; 322 323 spin_lock(&proc_inum_lock); 324 error = idr_get_new(&proc_inum_idr, NULL, &i); 325 spin_unlock(&proc_inum_lock); 326 if (error == -EAGAIN) 327 goto retry; 328 else if (error) 329 return 0; 330 331 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 332 333 /* inum will never be more than 0xf0ffffff, so no check 334 * for overflow. 335 */ 336 337 return inum; 338 } 339 340 static void release_inode_number(unsigned int inum) 341 { 342 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; 343 344 spin_lock(&proc_inum_lock); 345 idr_remove(&proc_inum_idr, id); 346 spin_unlock(&proc_inum_lock); 347 } 348 349 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) 350 { 351 nd_set_link(nd, PDE(dentry->d_inode)->data); 352 return NULL; 353 } 354 355 static const struct inode_operations proc_link_inode_operations = { 356 .readlink = generic_readlink, 357 .follow_link = proc_follow_link, 358 }; 359 360 /* 361 * As some entries in /proc are volatile, we want to 362 * get rid of unused dentries. This could be made 363 * smarter: we could keep a "volatile" flag in the 364 * inode to indicate which ones to keep. 365 */ 366 static int proc_delete_dentry(struct dentry * dentry) 367 { 368 return 1; 369 } 370 371 static struct dentry_operations proc_dentry_operations = 372 { 373 .d_delete = proc_delete_dentry, 374 }; 375 376 /* 377 * Don't create negative dentries here, return -ENOENT by hand 378 * instead. 379 */ 380 struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 381 { 382 struct inode *inode = NULL; 383 struct proc_dir_entry * de; 384 int error = -ENOENT; 385 386 lock_kernel(); 387 spin_lock(&proc_subdir_lock); 388 de = PDE(dir); 389 if (de) { 390 for (de = de->subdir; de ; de = de->next) { 391 if (de->namelen != dentry->d_name.len) 392 continue; 393 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 394 unsigned int ino; 395 396 if (de->shadow_proc) 397 de = de->shadow_proc(current, de); 398 ino = de->low_ino; 399 de_get(de); 400 spin_unlock(&proc_subdir_lock); 401 error = -EINVAL; 402 inode = proc_get_inode(dir->i_sb, ino, de); 403 goto out_unlock; 404 } 405 } 406 } 407 spin_unlock(&proc_subdir_lock); 408 out_unlock: 409 unlock_kernel(); 410 411 if (inode) { 412 dentry->d_op = &proc_dentry_operations; 413 d_add(dentry, inode); 414 return NULL; 415 } 416 de_put(de); 417 return ERR_PTR(error); 418 } 419 420 /* 421 * This returns non-zero if at EOF, so that the /proc 422 * root directory can use this and check if it should 423 * continue with the <pid> entries.. 424 * 425 * Note that the VFS-layer doesn't care about the return 426 * value of the readdir() call, as long as it's non-negative 427 * for success.. 428 */ 429 int proc_readdir(struct file * filp, 430 void * dirent, filldir_t filldir) 431 { 432 struct proc_dir_entry * de; 433 unsigned int ino; 434 int i; 435 struct inode *inode = filp->f_path.dentry->d_inode; 436 int ret = 0; 437 438 lock_kernel(); 439 440 ino = inode->i_ino; 441 de = PDE(inode); 442 if (!de) { 443 ret = -EINVAL; 444 goto out; 445 } 446 i = filp->f_pos; 447 switch (i) { 448 case 0: 449 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 450 goto out; 451 i++; 452 filp->f_pos++; 453 /* fall through */ 454 case 1: 455 if (filldir(dirent, "..", 2, i, 456 parent_ino(filp->f_path.dentry), 457 DT_DIR) < 0) 458 goto out; 459 i++; 460 filp->f_pos++; 461 /* fall through */ 462 default: 463 spin_lock(&proc_subdir_lock); 464 de = de->subdir; 465 i -= 2; 466 for (;;) { 467 if (!de) { 468 ret = 1; 469 spin_unlock(&proc_subdir_lock); 470 goto out; 471 } 472 if (!i) 473 break; 474 de = de->next; 475 i--; 476 } 477 478 do { 479 struct proc_dir_entry *next; 480 481 /* filldir passes info to user space */ 482 de_get(de); 483 spin_unlock(&proc_subdir_lock); 484 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 485 de->low_ino, de->mode >> 12) < 0) { 486 de_put(de); 487 goto out; 488 } 489 spin_lock(&proc_subdir_lock); 490 filp->f_pos++; 491 next = de->next; 492 de_put(de); 493 de = next; 494 } while (de); 495 spin_unlock(&proc_subdir_lock); 496 } 497 ret = 1; 498 out: unlock_kernel(); 499 return ret; 500 } 501 502 /* 503 * These are the generic /proc directory operations. They 504 * use the in-memory "struct proc_dir_entry" tree to parse 505 * the /proc directory. 506 */ 507 static const struct file_operations proc_dir_operations = { 508 .read = generic_read_dir, 509 .readdir = proc_readdir, 510 }; 511 512 /* 513 * proc directories can do almost nothing.. 514 */ 515 static const struct inode_operations proc_dir_inode_operations = { 516 .lookup = proc_lookup, 517 .getattr = proc_getattr, 518 .setattr = proc_notify_change, 519 }; 520 521 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 522 { 523 unsigned int i; 524 struct proc_dir_entry *tmp; 525 526 i = get_inode_number(); 527 if (i == 0) 528 return -EAGAIN; 529 dp->low_ino = i; 530 531 if (S_ISDIR(dp->mode)) { 532 if (dp->proc_iops == NULL) { 533 dp->proc_fops = &proc_dir_operations; 534 dp->proc_iops = &proc_dir_inode_operations; 535 } 536 dir->nlink++; 537 } else if (S_ISLNK(dp->mode)) { 538 if (dp->proc_iops == NULL) 539 dp->proc_iops = &proc_link_inode_operations; 540 } else if (S_ISREG(dp->mode)) { 541 if (dp->proc_fops == NULL) 542 dp->proc_fops = &proc_file_operations; 543 if (dp->proc_iops == NULL) 544 dp->proc_iops = &proc_file_inode_operations; 545 } 546 547 spin_lock(&proc_subdir_lock); 548 549 for (tmp = dir->subdir; tmp; tmp = tmp->next) 550 if (strcmp(tmp->name, dp->name) == 0) { 551 printk(KERN_WARNING "proc_dir_entry '%s' already " 552 "registered\n", dp->name); 553 dump_stack(); 554 break; 555 } 556 557 dp->next = dir->subdir; 558 dp->parent = dir; 559 dir->subdir = dp; 560 spin_unlock(&proc_subdir_lock); 561 562 return 0; 563 } 564 565 static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, 566 const char *name, 567 mode_t mode, 568 nlink_t nlink) 569 { 570 struct proc_dir_entry *ent = NULL; 571 const char *fn = name; 572 int len; 573 574 /* make sure name is valid */ 575 if (!name || !strlen(name)) goto out; 576 577 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 578 goto out; 579 580 /* At this point there must not be any '/' characters beyond *fn */ 581 if (strchr(fn, '/')) 582 goto out; 583 584 len = strlen(fn); 585 586 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); 587 if (!ent) goto out; 588 589 memset(ent, 0, sizeof(struct proc_dir_entry)); 590 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 591 ent->name = ((char *) ent) + sizeof(*ent); 592 ent->namelen = len; 593 ent->mode = mode; 594 ent->nlink = nlink; 595 atomic_set(&ent->count, 1); 596 ent->pde_users = 0; 597 spin_lock_init(&ent->pde_unload_lock); 598 ent->pde_unload_completion = NULL; 599 out: 600 return ent; 601 } 602 603 struct proc_dir_entry *proc_symlink(const char *name, 604 struct proc_dir_entry *parent, const char *dest) 605 { 606 struct proc_dir_entry *ent; 607 608 ent = __proc_create(&parent, name, 609 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 610 611 if (ent) { 612 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 613 if (ent->data) { 614 strcpy((char*)ent->data,dest); 615 if (proc_register(parent, ent) < 0) { 616 kfree(ent->data); 617 kfree(ent); 618 ent = NULL; 619 } 620 } else { 621 kfree(ent); 622 ent = NULL; 623 } 624 } 625 return ent; 626 } 627 628 struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 629 struct proc_dir_entry *parent) 630 { 631 struct proc_dir_entry *ent; 632 633 ent = __proc_create(&parent, name, S_IFDIR | mode, 2); 634 if (ent) { 635 if (proc_register(parent, ent) < 0) { 636 kfree(ent); 637 ent = NULL; 638 } 639 } 640 return ent; 641 } 642 643 struct proc_dir_entry *proc_mkdir(const char *name, 644 struct proc_dir_entry *parent) 645 { 646 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 647 } 648 649 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 650 struct proc_dir_entry *parent) 651 { 652 struct proc_dir_entry *ent; 653 nlink_t nlink; 654 655 if (S_ISDIR(mode)) { 656 if ((mode & S_IALLUGO) == 0) 657 mode |= S_IRUGO | S_IXUGO; 658 nlink = 2; 659 } else { 660 if ((mode & S_IFMT) == 0) 661 mode |= S_IFREG; 662 if ((mode & S_IALLUGO) == 0) 663 mode |= S_IRUGO; 664 nlink = 1; 665 } 666 667 ent = __proc_create(&parent, name, mode, nlink); 668 if (ent) { 669 if (proc_register(parent, ent) < 0) { 670 kfree(ent); 671 ent = NULL; 672 } 673 } 674 return ent; 675 } 676 677 struct proc_dir_entry *proc_create(const char *name, mode_t mode, 678 struct proc_dir_entry *parent, 679 const struct file_operations *proc_fops) 680 { 681 struct proc_dir_entry *pde; 682 nlink_t nlink; 683 684 if (S_ISDIR(mode)) { 685 if ((mode & S_IALLUGO) == 0) 686 mode |= S_IRUGO | S_IXUGO; 687 nlink = 2; 688 } else { 689 if ((mode & S_IFMT) == 0) 690 mode |= S_IFREG; 691 if ((mode & S_IALLUGO) == 0) 692 mode |= S_IRUGO; 693 nlink = 1; 694 } 695 696 pde = __proc_create(&parent, name, mode, nlink); 697 if (!pde) 698 goto out; 699 pde->proc_fops = proc_fops; 700 if (proc_register(parent, pde) < 0) 701 goto out_free; 702 return pde; 703 out_free: 704 kfree(pde); 705 out: 706 return NULL; 707 } 708 709 void free_proc_entry(struct proc_dir_entry *de) 710 { 711 unsigned int ino = de->low_ino; 712 713 if (ino < PROC_DYNAMIC_FIRST) 714 return; 715 716 release_inode_number(ino); 717 718 if (S_ISLNK(de->mode)) 719 kfree(de->data); 720 kfree(de); 721 } 722 723 /* 724 * Remove a /proc entry and free it if it's not currently in use. 725 */ 726 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 727 { 728 struct proc_dir_entry **p; 729 struct proc_dir_entry *de; 730 const char *fn = name; 731 int len; 732 733 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 734 goto out; 735 len = strlen(fn); 736 737 spin_lock(&proc_subdir_lock); 738 for (p = &parent->subdir; *p; p=&(*p)->next ) { 739 if (!proc_match(len, fn, *p)) 740 continue; 741 de = *p; 742 *p = de->next; 743 de->next = NULL; 744 745 spin_lock(&de->pde_unload_lock); 746 /* 747 * Stop accepting new callers into module. If you're 748 * dynamically allocating ->proc_fops, save a pointer somewhere. 749 */ 750 de->proc_fops = NULL; 751 /* Wait until all existing callers into module are done. */ 752 if (de->pde_users > 0) { 753 DECLARE_COMPLETION_ONSTACK(c); 754 755 if (!de->pde_unload_completion) 756 de->pde_unload_completion = &c; 757 758 spin_unlock(&de->pde_unload_lock); 759 spin_unlock(&proc_subdir_lock); 760 761 wait_for_completion(de->pde_unload_completion); 762 763 spin_lock(&proc_subdir_lock); 764 goto continue_removing; 765 } 766 spin_unlock(&de->pde_unload_lock); 767 768 continue_removing: 769 if (S_ISDIR(de->mode)) 770 parent->nlink--; 771 de->nlink = 0; 772 WARN_ON(de->subdir); 773 if (atomic_dec_and_test(&de->count)) 774 free_proc_entry(de); 775 break; 776 } 777 spin_unlock(&proc_subdir_lock); 778 out: 779 return; 780 } 781