1 /* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11 #include <linux/errno.h> 12 #include <linux/time.h> 13 #include <linux/proc_fs.h> 14 #include <linux/stat.h> 15 #include <linux/module.h> 16 #include <linux/mount.h> 17 #include <linux/smp_lock.h> 18 #include <linux/init.h> 19 #include <linux/idr.h> 20 #include <linux/namei.h> 21 #include <linux/bitops.h> 22 #include <linux/spinlock.h> 23 #include <asm/uaccess.h> 24 25 #include "internal.h" 26 27 static ssize_t proc_file_read(struct file *file, char __user *buf, 28 size_t nbytes, loff_t *ppos); 29 static ssize_t proc_file_write(struct file *file, const char __user *buffer, 30 size_t count, loff_t *ppos); 31 static loff_t proc_file_lseek(struct file *, loff_t, int); 32 33 DEFINE_SPINLOCK(proc_subdir_lock); 34 35 int proc_match(int len, const char *name, struct proc_dir_entry *de) 36 { 37 if (de->namelen != len) 38 return 0; 39 return !memcmp(name, de->name, len); 40 } 41 42 static struct file_operations proc_file_operations = { 43 .llseek = proc_file_lseek, 44 .read = proc_file_read, 45 .write = proc_file_write, 46 }; 47 48 /* buffer size is one page but our output routines use some slack for overruns */ 49 #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) 50 51 static ssize_t 52 proc_file_read(struct file *file, char __user *buf, size_t nbytes, 53 loff_t *ppos) 54 { 55 struct inode * inode = file->f_dentry->d_inode; 56 char *page; 57 ssize_t retval=0; 58 int eof=0; 59 ssize_t n, count; 60 char *start; 61 struct proc_dir_entry * dp; 62 unsigned long long pos; 63 64 /* 65 * Gaah, please just use "seq_file" instead. The legacy /proc 66 * interfaces cut loff_t down to off_t for reads, and ignore 67 * the offset entirely for writes.. 68 */ 69 pos = *ppos; 70 if (pos > MAX_NON_LFS) 71 return 0; 72 if (nbytes > MAX_NON_LFS - pos) 73 nbytes = MAX_NON_LFS - pos; 74 75 dp = PDE(inode); 76 if (!(page = (char*) __get_free_page(GFP_KERNEL))) 77 return -ENOMEM; 78 79 while ((nbytes > 0) && !eof) { 80 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 81 82 start = NULL; 83 if (dp->get_info) { 84 /* Handle old net routines */ 85 n = dp->get_info(page, &start, *ppos, count); 86 if (n < count) 87 eof = 1; 88 } else if (dp->read_proc) { 89 /* 90 * How to be a proc read function 91 * ------------------------------ 92 * Prototype: 93 * int f(char *buffer, char **start, off_t offset, 94 * int count, int *peof, void *dat) 95 * 96 * Assume that the buffer is "count" bytes in size. 97 * 98 * If you know you have supplied all the data you 99 * have, set *peof. 100 * 101 * You have three ways to return data: 102 * 0) Leave *start = NULL. (This is the default.) 103 * Put the data of the requested offset at that 104 * offset within the buffer. Return the number (n) 105 * of bytes there are from the beginning of the 106 * buffer up to the last byte of data. If the 107 * number of supplied bytes (= n - offset) is 108 * greater than zero and you didn't signal eof 109 * and the reader is prepared to take more data 110 * you will be called again with the requested 111 * offset advanced by the number of bytes 112 * absorbed. This interface is useful for files 113 * no larger than the buffer. 114 * 1) Set *start = an unsigned long value less than 115 * the buffer address but greater than zero. 116 * Put the data of the requested offset at the 117 * beginning of the buffer. Return the number of 118 * bytes of data placed there. If this number is 119 * greater than zero and you didn't signal eof 120 * and the reader is prepared to take more data 121 * you will be called again with the requested 122 * offset advanced by *start. This interface is 123 * useful when you have a large file consisting 124 * of a series of blocks which you want to count 125 * and return as wholes. 126 * (Hack by Paul.Russell@rustcorp.com.au) 127 * 2) Set *start = an address within the buffer. 128 * Put the data of the requested offset at *start. 129 * Return the number of bytes of data placed there. 130 * If this number is greater than zero and you 131 * didn't signal eof and the reader is prepared to 132 * take more data you will be called again with the 133 * requested offset advanced by the number of bytes 134 * absorbed. 135 */ 136 n = dp->read_proc(page, &start, *ppos, 137 count, &eof, dp->data); 138 } else 139 break; 140 141 if (n == 0) /* end of file */ 142 break; 143 if (n < 0) { /* error */ 144 if (retval == 0) 145 retval = n; 146 break; 147 } 148 149 if (start == NULL) { 150 if (n > PAGE_SIZE) { 151 printk(KERN_ERR 152 "proc_file_read: Apparent buffer overflow!\n"); 153 n = PAGE_SIZE; 154 } 155 n -= *ppos; 156 if (n <= 0) 157 break; 158 if (n > count) 159 n = count; 160 start = page + *ppos; 161 } else if (start < page) { 162 if (n > PAGE_SIZE) { 163 printk(KERN_ERR 164 "proc_file_read: Apparent buffer overflow!\n"); 165 n = PAGE_SIZE; 166 } 167 if (n > count) { 168 /* 169 * Don't reduce n because doing so might 170 * cut off part of a data block. 171 */ 172 printk(KERN_WARNING 173 "proc_file_read: Read count exceeded\n"); 174 } 175 } else /* start >= page */ { 176 unsigned long startoff = (unsigned long)(start - page); 177 if (n > (PAGE_SIZE - startoff)) { 178 printk(KERN_ERR 179 "proc_file_read: Apparent buffer overflow!\n"); 180 n = PAGE_SIZE - startoff; 181 } 182 if (n > count) 183 n = count; 184 } 185 186 n -= copy_to_user(buf, start < page ? page : start, n); 187 if (n == 0) { 188 if (retval == 0) 189 retval = -EFAULT; 190 break; 191 } 192 193 *ppos += start < page ? (unsigned long)start : n; 194 nbytes -= n; 195 buf += n; 196 retval += n; 197 } 198 free_page((unsigned long) page); 199 return retval; 200 } 201 202 static ssize_t 203 proc_file_write(struct file *file, const char __user *buffer, 204 size_t count, loff_t *ppos) 205 { 206 struct inode *inode = file->f_dentry->d_inode; 207 struct proc_dir_entry * dp; 208 209 dp = PDE(inode); 210 211 if (!dp->write_proc) 212 return -EIO; 213 214 /* FIXME: does this routine need ppos? probably... */ 215 return dp->write_proc(file, buffer, count, dp->data); 216 } 217 218 219 static loff_t 220 proc_file_lseek(struct file *file, loff_t offset, int orig) 221 { 222 loff_t retval = -EINVAL; 223 switch (orig) { 224 case 1: 225 offset += file->f_pos; 226 /* fallthrough */ 227 case 0: 228 if (offset < 0 || offset > MAX_NON_LFS) 229 break; 230 file->f_pos = retval = offset; 231 } 232 return retval; 233 } 234 235 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 236 { 237 struct inode *inode = dentry->d_inode; 238 struct proc_dir_entry *de = PDE(inode); 239 int error; 240 241 error = inode_change_ok(inode, iattr); 242 if (error) 243 goto out; 244 245 error = inode_setattr(inode, iattr); 246 if (error) 247 goto out; 248 249 de->uid = inode->i_uid; 250 de->gid = inode->i_gid; 251 de->mode = inode->i_mode; 252 out: 253 return error; 254 } 255 256 static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, 257 struct kstat *stat) 258 { 259 struct inode *inode = dentry->d_inode; 260 struct proc_dir_entry *de = PROC_I(inode)->pde; 261 if (de && de->nlink) 262 inode->i_nlink = de->nlink; 263 264 generic_fillattr(inode, stat); 265 return 0; 266 } 267 268 static struct inode_operations proc_file_inode_operations = { 269 .setattr = proc_notify_change, 270 }; 271 272 /* 273 * This function parses a name such as "tty/driver/serial", and 274 * returns the struct proc_dir_entry for "/proc/tty/driver", and 275 * returns "serial" in residual. 276 */ 277 static int xlate_proc_name(const char *name, 278 struct proc_dir_entry **ret, const char **residual) 279 { 280 const char *cp = name, *next; 281 struct proc_dir_entry *de; 282 int len; 283 int rtn = 0; 284 285 spin_lock(&proc_subdir_lock); 286 de = &proc_root; 287 while (1) { 288 next = strchr(cp, '/'); 289 if (!next) 290 break; 291 292 len = next - cp; 293 for (de = de->subdir; de ; de = de->next) { 294 if (proc_match(len, cp, de)) 295 break; 296 } 297 if (!de) { 298 rtn = -ENOENT; 299 goto out; 300 } 301 cp += len + 1; 302 } 303 *residual = cp; 304 *ret = de; 305 out: 306 spin_unlock(&proc_subdir_lock); 307 return rtn; 308 } 309 310 static DEFINE_IDR(proc_inum_idr); 311 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 312 313 #define PROC_DYNAMIC_FIRST 0xF0000000UL 314 315 /* 316 * Return an inode number between PROC_DYNAMIC_FIRST and 317 * 0xffffffff, or zero on failure. 318 */ 319 static unsigned int get_inode_number(void) 320 { 321 int i, inum = 0; 322 int error; 323 324 retry: 325 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 326 return 0; 327 328 spin_lock(&proc_inum_lock); 329 error = idr_get_new(&proc_inum_idr, NULL, &i); 330 spin_unlock(&proc_inum_lock); 331 if (error == -EAGAIN) 332 goto retry; 333 else if (error) 334 return 0; 335 336 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 337 338 /* inum will never be more than 0xf0ffffff, so no check 339 * for overflow. 340 */ 341 342 return inum; 343 } 344 345 static void release_inode_number(unsigned int inum) 346 { 347 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; 348 349 spin_lock(&proc_inum_lock); 350 idr_remove(&proc_inum_idr, id); 351 spin_unlock(&proc_inum_lock); 352 } 353 354 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) 355 { 356 nd_set_link(nd, PDE(dentry->d_inode)->data); 357 return NULL; 358 } 359 360 static struct inode_operations proc_link_inode_operations = { 361 .readlink = generic_readlink, 362 .follow_link = proc_follow_link, 363 }; 364 365 /* 366 * As some entries in /proc are volatile, we want to 367 * get rid of unused dentries. This could be made 368 * smarter: we could keep a "volatile" flag in the 369 * inode to indicate which ones to keep. 370 */ 371 static int proc_delete_dentry(struct dentry * dentry) 372 { 373 return 1; 374 } 375 376 static struct dentry_operations proc_dentry_operations = 377 { 378 .d_delete = proc_delete_dentry, 379 }; 380 381 /* 382 * Don't create negative dentries here, return -ENOENT by hand 383 * instead. 384 */ 385 struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 386 { 387 struct inode *inode = NULL; 388 struct proc_dir_entry * de; 389 int error = -ENOENT; 390 391 lock_kernel(); 392 spin_lock(&proc_subdir_lock); 393 de = PDE(dir); 394 if (de) { 395 for (de = de->subdir; de ; de = de->next) { 396 if (de->namelen != dentry->d_name.len) 397 continue; 398 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 399 unsigned int ino = de->low_ino; 400 401 spin_unlock(&proc_subdir_lock); 402 error = -EINVAL; 403 inode = proc_get_inode(dir->i_sb, ino, de); 404 spin_lock(&proc_subdir_lock); 405 break; 406 } 407 } 408 } 409 spin_unlock(&proc_subdir_lock); 410 unlock_kernel(); 411 412 if (inode) { 413 dentry->d_op = &proc_dentry_operations; 414 d_add(dentry, inode); 415 return NULL; 416 } 417 return ERR_PTR(error); 418 } 419 420 /* 421 * This returns non-zero if at EOF, so that the /proc 422 * root directory can use this and check if it should 423 * continue with the <pid> entries.. 424 * 425 * Note that the VFS-layer doesn't care about the return 426 * value of the readdir() call, as long as it's non-negative 427 * for success.. 428 */ 429 int proc_readdir(struct file * filp, 430 void * dirent, filldir_t filldir) 431 { 432 struct proc_dir_entry * de; 433 unsigned int ino; 434 int i; 435 struct inode *inode = filp->f_dentry->d_inode; 436 int ret = 0; 437 438 lock_kernel(); 439 440 ino = inode->i_ino; 441 de = PDE(inode); 442 if (!de) { 443 ret = -EINVAL; 444 goto out; 445 } 446 i = filp->f_pos; 447 switch (i) { 448 case 0: 449 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 450 goto out; 451 i++; 452 filp->f_pos++; 453 /* fall through */ 454 case 1: 455 if (filldir(dirent, "..", 2, i, 456 parent_ino(filp->f_dentry), 457 DT_DIR) < 0) 458 goto out; 459 i++; 460 filp->f_pos++; 461 /* fall through */ 462 default: 463 spin_lock(&proc_subdir_lock); 464 de = de->subdir; 465 i -= 2; 466 for (;;) { 467 if (!de) { 468 ret = 1; 469 spin_unlock(&proc_subdir_lock); 470 goto out; 471 } 472 if (!i) 473 break; 474 de = de->next; 475 i--; 476 } 477 478 do { 479 /* filldir passes info to user space */ 480 spin_unlock(&proc_subdir_lock); 481 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 482 de->low_ino, de->mode >> 12) < 0) 483 goto out; 484 spin_lock(&proc_subdir_lock); 485 filp->f_pos++; 486 de = de->next; 487 } while (de); 488 spin_unlock(&proc_subdir_lock); 489 } 490 ret = 1; 491 out: unlock_kernel(); 492 return ret; 493 } 494 495 /* 496 * These are the generic /proc directory operations. They 497 * use the in-memory "struct proc_dir_entry" tree to parse 498 * the /proc directory. 499 */ 500 static struct file_operations proc_dir_operations = { 501 .read = generic_read_dir, 502 .readdir = proc_readdir, 503 }; 504 505 /* 506 * proc directories can do almost nothing.. 507 */ 508 static struct inode_operations proc_dir_inode_operations = { 509 .lookup = proc_lookup, 510 .getattr = proc_getattr, 511 .setattr = proc_notify_change, 512 }; 513 514 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 515 { 516 unsigned int i; 517 518 i = get_inode_number(); 519 if (i == 0) 520 return -EAGAIN; 521 dp->low_ino = i; 522 523 spin_lock(&proc_subdir_lock); 524 dp->next = dir->subdir; 525 dp->parent = dir; 526 dir->subdir = dp; 527 spin_unlock(&proc_subdir_lock); 528 529 if (S_ISDIR(dp->mode)) { 530 if (dp->proc_iops == NULL) { 531 dp->proc_fops = &proc_dir_operations; 532 dp->proc_iops = &proc_dir_inode_operations; 533 } 534 dir->nlink++; 535 } else if (S_ISLNK(dp->mode)) { 536 if (dp->proc_iops == NULL) 537 dp->proc_iops = &proc_link_inode_operations; 538 } else if (S_ISREG(dp->mode)) { 539 if (dp->proc_fops == NULL) 540 dp->proc_fops = &proc_file_operations; 541 if (dp->proc_iops == NULL) 542 dp->proc_iops = &proc_file_inode_operations; 543 } 544 return 0; 545 } 546 547 /* 548 * Kill an inode that got unregistered.. 549 */ 550 static void proc_kill_inodes(struct proc_dir_entry *de) 551 { 552 struct list_head *p; 553 struct super_block *sb = proc_mnt->mnt_sb; 554 555 /* 556 * Actually it's a partial revoke(). 557 */ 558 file_list_lock(); 559 list_for_each(p, &sb->s_files) { 560 struct file * filp = list_entry(p, struct file, f_u.fu_list); 561 struct dentry * dentry = filp->f_dentry; 562 struct inode * inode; 563 const struct file_operations *fops; 564 565 if (dentry->d_op != &proc_dentry_operations) 566 continue; 567 inode = dentry->d_inode; 568 if (PDE(inode) != de) 569 continue; 570 fops = filp->f_op; 571 filp->f_op = NULL; 572 fops_put(fops); 573 } 574 file_list_unlock(); 575 } 576 577 static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, 578 const char *name, 579 mode_t mode, 580 nlink_t nlink) 581 { 582 struct proc_dir_entry *ent = NULL; 583 const char *fn = name; 584 int len; 585 586 /* make sure name is valid */ 587 if (!name || !strlen(name)) goto out; 588 589 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 590 goto out; 591 592 /* At this point there must not be any '/' characters beyond *fn */ 593 if (strchr(fn, '/')) 594 goto out; 595 596 len = strlen(fn); 597 598 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); 599 if (!ent) goto out; 600 601 memset(ent, 0, sizeof(struct proc_dir_entry)); 602 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 603 ent->name = ((char *) ent) + sizeof(*ent); 604 ent->namelen = len; 605 ent->mode = mode; 606 ent->nlink = nlink; 607 out: 608 return ent; 609 } 610 611 struct proc_dir_entry *proc_symlink(const char *name, 612 struct proc_dir_entry *parent, const char *dest) 613 { 614 struct proc_dir_entry *ent; 615 616 ent = proc_create(&parent,name, 617 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 618 619 if (ent) { 620 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 621 if (ent->data) { 622 strcpy((char*)ent->data,dest); 623 if (proc_register(parent, ent) < 0) { 624 kfree(ent->data); 625 kfree(ent); 626 ent = NULL; 627 } 628 } else { 629 kfree(ent); 630 ent = NULL; 631 } 632 } 633 return ent; 634 } 635 636 struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 637 struct proc_dir_entry *parent) 638 { 639 struct proc_dir_entry *ent; 640 641 ent = proc_create(&parent, name, S_IFDIR | mode, 2); 642 if (ent) { 643 ent->proc_fops = &proc_dir_operations; 644 ent->proc_iops = &proc_dir_inode_operations; 645 646 if (proc_register(parent, ent) < 0) { 647 kfree(ent); 648 ent = NULL; 649 } 650 } 651 return ent; 652 } 653 654 struct proc_dir_entry *proc_mkdir(const char *name, 655 struct proc_dir_entry *parent) 656 { 657 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 658 } 659 660 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 661 struct proc_dir_entry *parent) 662 { 663 struct proc_dir_entry *ent; 664 nlink_t nlink; 665 666 if (S_ISDIR(mode)) { 667 if ((mode & S_IALLUGO) == 0) 668 mode |= S_IRUGO | S_IXUGO; 669 nlink = 2; 670 } else { 671 if ((mode & S_IFMT) == 0) 672 mode |= S_IFREG; 673 if ((mode & S_IALLUGO) == 0) 674 mode |= S_IRUGO; 675 nlink = 1; 676 } 677 678 ent = proc_create(&parent,name,mode,nlink); 679 if (ent) { 680 if (S_ISDIR(mode)) { 681 ent->proc_fops = &proc_dir_operations; 682 ent->proc_iops = &proc_dir_inode_operations; 683 } 684 if (proc_register(parent, ent) < 0) { 685 kfree(ent); 686 ent = NULL; 687 } 688 } 689 return ent; 690 } 691 692 void free_proc_entry(struct proc_dir_entry *de) 693 { 694 unsigned int ino = de->low_ino; 695 696 if (ino < PROC_DYNAMIC_FIRST) 697 return; 698 699 release_inode_number(ino); 700 701 if (S_ISLNK(de->mode) && de->data) 702 kfree(de->data); 703 kfree(de); 704 } 705 706 /* 707 * Remove a /proc entry and free it if it's not currently in use. 708 * If it is in use, we set the 'deleted' flag. 709 */ 710 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 711 { 712 struct proc_dir_entry **p; 713 struct proc_dir_entry *de; 714 const char *fn = name; 715 int len; 716 717 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 718 goto out; 719 len = strlen(fn); 720 721 spin_lock(&proc_subdir_lock); 722 for (p = &parent->subdir; *p; p=&(*p)->next ) { 723 if (!proc_match(len, fn, *p)) 724 continue; 725 de = *p; 726 *p = de->next; 727 de->next = NULL; 728 if (S_ISDIR(de->mode)) 729 parent->nlink--; 730 proc_kill_inodes(de); 731 de->nlink = 0; 732 WARN_ON(de->subdir); 733 if (!atomic_read(&de->count)) 734 free_proc_entry(de); 735 else { 736 de->deleted = 1; 737 printk("remove_proc_entry: %s/%s busy, count=%d\n", 738 parent->name, de->name, atomic_read(&de->count)); 739 } 740 break; 741 } 742 spin_unlock(&proc_subdir_lock); 743 out: 744 return; 745 } 746