1 /* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11 #include <linux/errno.h> 12 #include <linux/time.h> 13 #include <linux/proc_fs.h> 14 #include <linux/stat.h> 15 #include <linux/module.h> 16 #include <linux/mount.h> 17 #include <linux/smp_lock.h> 18 #include <linux/init.h> 19 #include <linux/idr.h> 20 #include <linux/namei.h> 21 #include <linux/bitops.h> 22 #include <linux/spinlock.h> 23 #include <linux/completion.h> 24 #include <asm/uaccess.h> 25 26 #include "internal.h" 27 28 static ssize_t proc_file_read(struct file *file, char __user *buf, 29 size_t nbytes, loff_t *ppos); 30 static ssize_t proc_file_write(struct file *file, const char __user *buffer, 31 size_t count, loff_t *ppos); 32 static loff_t proc_file_lseek(struct file *, loff_t, int); 33 34 DEFINE_SPINLOCK(proc_subdir_lock); 35 36 static int proc_match(int len, const char *name, struct proc_dir_entry *de) 37 { 38 if (de->namelen != len) 39 return 0; 40 return !memcmp(name, de->name, len); 41 } 42 43 static const struct file_operations proc_file_operations = { 44 .llseek = proc_file_lseek, 45 .read = proc_file_read, 46 .write = proc_file_write, 47 }; 48 49 /* buffer size is one page but our output routines use some slack for overruns */ 50 #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024) 51 52 static ssize_t 53 proc_file_read(struct file *file, char __user *buf, size_t nbytes, 54 loff_t *ppos) 55 { 56 struct inode * inode = file->f_path.dentry->d_inode; 57 char *page; 58 ssize_t retval=0; 59 int eof=0; 60 ssize_t n, count; 61 char *start; 62 struct proc_dir_entry * dp; 63 unsigned long long pos; 64 65 /* 66 * Gaah, please just use "seq_file" instead. The legacy /proc 67 * interfaces cut loff_t down to off_t for reads, and ignore 68 * the offset entirely for writes.. 69 */ 70 pos = *ppos; 71 if (pos > MAX_NON_LFS) 72 return 0; 73 if (nbytes > MAX_NON_LFS - pos) 74 nbytes = MAX_NON_LFS - pos; 75 76 dp = PDE(inode); 77 if (!(page = (char*) __get_free_page(GFP_TEMPORARY))) 78 return -ENOMEM; 79 80 while ((nbytes > 0) && !eof) { 81 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes); 82 83 start = NULL; 84 if (dp->get_info) { 85 /* Handle old net routines */ 86 n = dp->get_info(page, &start, *ppos, count); 87 if (n < count) 88 eof = 1; 89 } else if (dp->read_proc) { 90 /* 91 * How to be a proc read function 92 * ------------------------------ 93 * Prototype: 94 * int f(char *buffer, char **start, off_t offset, 95 * int count, int *peof, void *dat) 96 * 97 * Assume that the buffer is "count" bytes in size. 98 * 99 * If you know you have supplied all the data you 100 * have, set *peof. 101 * 102 * You have three ways to return data: 103 * 0) Leave *start = NULL. (This is the default.) 104 * Put the data of the requested offset at that 105 * offset within the buffer. Return the number (n) 106 * of bytes there are from the beginning of the 107 * buffer up to the last byte of data. If the 108 * number of supplied bytes (= n - offset) is 109 * greater than zero and you didn't signal eof 110 * and the reader is prepared to take more data 111 * you will be called again with the requested 112 * offset advanced by the number of bytes 113 * absorbed. This interface is useful for files 114 * no larger than the buffer. 115 * 1) Set *start = an unsigned long value less than 116 * the buffer address but greater than zero. 117 * Put the data of the requested offset at the 118 * beginning of the buffer. Return the number of 119 * bytes of data placed there. If this number is 120 * greater than zero and you didn't signal eof 121 * and the reader is prepared to take more data 122 * you will be called again with the requested 123 * offset advanced by *start. This interface is 124 * useful when you have a large file consisting 125 * of a series of blocks which you want to count 126 * and return as wholes. 127 * (Hack by Paul.Russell@rustcorp.com.au) 128 * 2) Set *start = an address within the buffer. 129 * Put the data of the requested offset at *start. 130 * Return the number of bytes of data placed there. 131 * If this number is greater than zero and you 132 * didn't signal eof and the reader is prepared to 133 * take more data you will be called again with the 134 * requested offset advanced by the number of bytes 135 * absorbed. 136 */ 137 n = dp->read_proc(page, &start, *ppos, 138 count, &eof, dp->data); 139 } else 140 break; 141 142 if (n == 0) /* end of file */ 143 break; 144 if (n < 0) { /* error */ 145 if (retval == 0) 146 retval = n; 147 break; 148 } 149 150 if (start == NULL) { 151 if (n > PAGE_SIZE) { 152 printk(KERN_ERR 153 "proc_file_read: Apparent buffer overflow!\n"); 154 n = PAGE_SIZE; 155 } 156 n -= *ppos; 157 if (n <= 0) 158 break; 159 if (n > count) 160 n = count; 161 start = page + *ppos; 162 } else if (start < page) { 163 if (n > PAGE_SIZE) { 164 printk(KERN_ERR 165 "proc_file_read: Apparent buffer overflow!\n"); 166 n = PAGE_SIZE; 167 } 168 if (n > count) { 169 /* 170 * Don't reduce n because doing so might 171 * cut off part of a data block. 172 */ 173 printk(KERN_WARNING 174 "proc_file_read: Read count exceeded\n"); 175 } 176 } else /* start >= page */ { 177 unsigned long startoff = (unsigned long)(start - page); 178 if (n > (PAGE_SIZE - startoff)) { 179 printk(KERN_ERR 180 "proc_file_read: Apparent buffer overflow!\n"); 181 n = PAGE_SIZE - startoff; 182 } 183 if (n > count) 184 n = count; 185 } 186 187 n -= copy_to_user(buf, start < page ? page : start, n); 188 if (n == 0) { 189 if (retval == 0) 190 retval = -EFAULT; 191 break; 192 } 193 194 *ppos += start < page ? (unsigned long)start : n; 195 nbytes -= n; 196 buf += n; 197 retval += n; 198 } 199 free_page((unsigned long) page); 200 return retval; 201 } 202 203 static ssize_t 204 proc_file_write(struct file *file, const char __user *buffer, 205 size_t count, loff_t *ppos) 206 { 207 struct inode *inode = file->f_path.dentry->d_inode; 208 struct proc_dir_entry * dp; 209 210 dp = PDE(inode); 211 212 if (!dp->write_proc) 213 return -EIO; 214 215 /* FIXME: does this routine need ppos? probably... */ 216 return dp->write_proc(file, buffer, count, dp->data); 217 } 218 219 220 static loff_t 221 proc_file_lseek(struct file *file, loff_t offset, int orig) 222 { 223 loff_t retval = -EINVAL; 224 switch (orig) { 225 case 1: 226 offset += file->f_pos; 227 /* fallthrough */ 228 case 0: 229 if (offset < 0 || offset > MAX_NON_LFS) 230 break; 231 file->f_pos = retval = offset; 232 } 233 return retval; 234 } 235 236 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 237 { 238 struct inode *inode = dentry->d_inode; 239 struct proc_dir_entry *de = PDE(inode); 240 int error; 241 242 error = inode_change_ok(inode, iattr); 243 if (error) 244 goto out; 245 246 error = inode_setattr(inode, iattr); 247 if (error) 248 goto out; 249 250 de->uid = inode->i_uid; 251 de->gid = inode->i_gid; 252 de->mode = inode->i_mode; 253 out: 254 return error; 255 } 256 257 static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, 258 struct kstat *stat) 259 { 260 struct inode *inode = dentry->d_inode; 261 struct proc_dir_entry *de = PROC_I(inode)->pde; 262 if (de && de->nlink) 263 inode->i_nlink = de->nlink; 264 265 generic_fillattr(inode, stat); 266 return 0; 267 } 268 269 static const struct inode_operations proc_file_inode_operations = { 270 .setattr = proc_notify_change, 271 }; 272 273 /* 274 * This function parses a name such as "tty/driver/serial", and 275 * returns the struct proc_dir_entry for "/proc/tty/driver", and 276 * returns "serial" in residual. 277 */ 278 static int xlate_proc_name(const char *name, 279 struct proc_dir_entry **ret, const char **residual) 280 { 281 const char *cp = name, *next; 282 struct proc_dir_entry *de; 283 int len; 284 int rtn = 0; 285 286 spin_lock(&proc_subdir_lock); 287 de = &proc_root; 288 while (1) { 289 next = strchr(cp, '/'); 290 if (!next) 291 break; 292 293 len = next - cp; 294 for (de = de->subdir; de ; de = de->next) { 295 if (proc_match(len, cp, de)) 296 break; 297 } 298 if (!de) { 299 rtn = -ENOENT; 300 goto out; 301 } 302 cp += len + 1; 303 } 304 *residual = cp; 305 *ret = de; 306 out: 307 spin_unlock(&proc_subdir_lock); 308 return rtn; 309 } 310 311 static DEFINE_IDR(proc_inum_idr); 312 static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 313 314 #define PROC_DYNAMIC_FIRST 0xF0000000UL 315 316 /* 317 * Return an inode number between PROC_DYNAMIC_FIRST and 318 * 0xffffffff, or zero on failure. 319 */ 320 static unsigned int get_inode_number(void) 321 { 322 int i, inum = 0; 323 int error; 324 325 retry: 326 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 327 return 0; 328 329 spin_lock(&proc_inum_lock); 330 error = idr_get_new(&proc_inum_idr, NULL, &i); 331 spin_unlock(&proc_inum_lock); 332 if (error == -EAGAIN) 333 goto retry; 334 else if (error) 335 return 0; 336 337 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 338 339 /* inum will never be more than 0xf0ffffff, so no check 340 * for overflow. 341 */ 342 343 return inum; 344 } 345 346 static void release_inode_number(unsigned int inum) 347 { 348 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; 349 350 spin_lock(&proc_inum_lock); 351 idr_remove(&proc_inum_idr, id); 352 spin_unlock(&proc_inum_lock); 353 } 354 355 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) 356 { 357 nd_set_link(nd, PDE(dentry->d_inode)->data); 358 return NULL; 359 } 360 361 static const struct inode_operations proc_link_inode_operations = { 362 .readlink = generic_readlink, 363 .follow_link = proc_follow_link, 364 }; 365 366 /* 367 * As some entries in /proc are volatile, we want to 368 * get rid of unused dentries. This could be made 369 * smarter: we could keep a "volatile" flag in the 370 * inode to indicate which ones to keep. 371 */ 372 static int proc_delete_dentry(struct dentry * dentry) 373 { 374 return 1; 375 } 376 377 static struct dentry_operations proc_dentry_operations = 378 { 379 .d_delete = proc_delete_dentry, 380 }; 381 382 /* 383 * Don't create negative dentries here, return -ENOENT by hand 384 * instead. 385 */ 386 struct dentry *proc_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 387 { 388 struct inode *inode = NULL; 389 struct proc_dir_entry * de; 390 int error = -ENOENT; 391 392 lock_kernel(); 393 spin_lock(&proc_subdir_lock); 394 de = PDE(dir); 395 if (de) { 396 for (de = de->subdir; de ; de = de->next) { 397 if (de->namelen != dentry->d_name.len) 398 continue; 399 if (!memcmp(dentry->d_name.name, de->name, de->namelen)) { 400 unsigned int ino; 401 402 if (de->shadow_proc) 403 de = de->shadow_proc(current, de); 404 ino = de->low_ino; 405 de_get(de); 406 spin_unlock(&proc_subdir_lock); 407 error = -EINVAL; 408 inode = proc_get_inode(dir->i_sb, ino, de); 409 spin_lock(&proc_subdir_lock); 410 break; 411 } 412 } 413 } 414 spin_unlock(&proc_subdir_lock); 415 unlock_kernel(); 416 417 if (inode) { 418 dentry->d_op = &proc_dentry_operations; 419 d_add(dentry, inode); 420 return NULL; 421 } 422 de_put(de); 423 return ERR_PTR(error); 424 } 425 426 /* 427 * This returns non-zero if at EOF, so that the /proc 428 * root directory can use this and check if it should 429 * continue with the <pid> entries.. 430 * 431 * Note that the VFS-layer doesn't care about the return 432 * value of the readdir() call, as long as it's non-negative 433 * for success.. 434 */ 435 int proc_readdir(struct file * filp, 436 void * dirent, filldir_t filldir) 437 { 438 struct proc_dir_entry * de; 439 unsigned int ino; 440 int i; 441 struct inode *inode = filp->f_path.dentry->d_inode; 442 int ret = 0; 443 444 lock_kernel(); 445 446 ino = inode->i_ino; 447 de = PDE(inode); 448 if (!de) { 449 ret = -EINVAL; 450 goto out; 451 } 452 i = filp->f_pos; 453 switch (i) { 454 case 0: 455 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) 456 goto out; 457 i++; 458 filp->f_pos++; 459 /* fall through */ 460 case 1: 461 if (filldir(dirent, "..", 2, i, 462 parent_ino(filp->f_path.dentry), 463 DT_DIR) < 0) 464 goto out; 465 i++; 466 filp->f_pos++; 467 /* fall through */ 468 default: 469 spin_lock(&proc_subdir_lock); 470 de = de->subdir; 471 i -= 2; 472 for (;;) { 473 if (!de) { 474 ret = 1; 475 spin_unlock(&proc_subdir_lock); 476 goto out; 477 } 478 if (!i) 479 break; 480 de = de->next; 481 i--; 482 } 483 484 do { 485 struct proc_dir_entry *next; 486 487 /* filldir passes info to user space */ 488 de_get(de); 489 spin_unlock(&proc_subdir_lock); 490 if (filldir(dirent, de->name, de->namelen, filp->f_pos, 491 de->low_ino, de->mode >> 12) < 0) { 492 de_put(de); 493 goto out; 494 } 495 spin_lock(&proc_subdir_lock); 496 filp->f_pos++; 497 next = de->next; 498 de_put(de); 499 de = next; 500 } while (de); 501 spin_unlock(&proc_subdir_lock); 502 } 503 ret = 1; 504 out: unlock_kernel(); 505 return ret; 506 } 507 508 /* 509 * These are the generic /proc directory operations. They 510 * use the in-memory "struct proc_dir_entry" tree to parse 511 * the /proc directory. 512 */ 513 static const struct file_operations proc_dir_operations = { 514 .read = generic_read_dir, 515 .readdir = proc_readdir, 516 }; 517 518 /* 519 * proc directories can do almost nothing.. 520 */ 521 static const struct inode_operations proc_dir_inode_operations = { 522 .lookup = proc_lookup, 523 .getattr = proc_getattr, 524 .setattr = proc_notify_change, 525 }; 526 527 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) 528 { 529 unsigned int i; 530 531 i = get_inode_number(); 532 if (i == 0) 533 return -EAGAIN; 534 dp->low_ino = i; 535 536 if (S_ISDIR(dp->mode)) { 537 if (dp->proc_iops == NULL) { 538 dp->proc_fops = &proc_dir_operations; 539 dp->proc_iops = &proc_dir_inode_operations; 540 } 541 dir->nlink++; 542 } else if (S_ISLNK(dp->mode)) { 543 if (dp->proc_iops == NULL) 544 dp->proc_iops = &proc_link_inode_operations; 545 } else if (S_ISREG(dp->mode)) { 546 if (dp->proc_fops == NULL) 547 dp->proc_fops = &proc_file_operations; 548 if (dp->proc_iops == NULL) 549 dp->proc_iops = &proc_file_inode_operations; 550 } 551 552 spin_lock(&proc_subdir_lock); 553 dp->next = dir->subdir; 554 dp->parent = dir; 555 dir->subdir = dp; 556 spin_unlock(&proc_subdir_lock); 557 558 return 0; 559 } 560 561 static struct proc_dir_entry *proc_create(struct proc_dir_entry **parent, 562 const char *name, 563 mode_t mode, 564 nlink_t nlink) 565 { 566 struct proc_dir_entry *ent = NULL; 567 const char *fn = name; 568 int len; 569 570 /* make sure name is valid */ 571 if (!name || !strlen(name)) goto out; 572 573 if (!(*parent) && xlate_proc_name(name, parent, &fn) != 0) 574 goto out; 575 576 /* At this point there must not be any '/' characters beyond *fn */ 577 if (strchr(fn, '/')) 578 goto out; 579 580 len = strlen(fn); 581 582 ent = kmalloc(sizeof(struct proc_dir_entry) + len + 1, GFP_KERNEL); 583 if (!ent) goto out; 584 585 memset(ent, 0, sizeof(struct proc_dir_entry)); 586 memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); 587 ent->name = ((char *) ent) + sizeof(*ent); 588 ent->namelen = len; 589 ent->mode = mode; 590 ent->nlink = nlink; 591 atomic_set(&ent->count, 1); 592 ent->pde_users = 0; 593 spin_lock_init(&ent->pde_unload_lock); 594 ent->pde_unload_completion = NULL; 595 out: 596 return ent; 597 } 598 599 struct proc_dir_entry *proc_symlink(const char *name, 600 struct proc_dir_entry *parent, const char *dest) 601 { 602 struct proc_dir_entry *ent; 603 604 ent = proc_create(&parent,name, 605 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 606 607 if (ent) { 608 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 609 if (ent->data) { 610 strcpy((char*)ent->data,dest); 611 if (proc_register(parent, ent) < 0) { 612 kfree(ent->data); 613 kfree(ent); 614 ent = NULL; 615 } 616 } else { 617 kfree(ent); 618 ent = NULL; 619 } 620 } 621 return ent; 622 } 623 624 struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 625 struct proc_dir_entry *parent) 626 { 627 struct proc_dir_entry *ent; 628 629 ent = proc_create(&parent, name, S_IFDIR | mode, 2); 630 if (ent) { 631 if (proc_register(parent, ent) < 0) { 632 kfree(ent); 633 ent = NULL; 634 } 635 } 636 return ent; 637 } 638 639 struct proc_dir_entry *proc_mkdir(const char *name, 640 struct proc_dir_entry *parent) 641 { 642 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 643 } 644 645 struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 646 struct proc_dir_entry *parent) 647 { 648 struct proc_dir_entry *ent; 649 nlink_t nlink; 650 651 if (S_ISDIR(mode)) { 652 if ((mode & S_IALLUGO) == 0) 653 mode |= S_IRUGO | S_IXUGO; 654 nlink = 2; 655 } else { 656 if ((mode & S_IFMT) == 0) 657 mode |= S_IFREG; 658 if ((mode & S_IALLUGO) == 0) 659 mode |= S_IRUGO; 660 nlink = 1; 661 } 662 663 ent = proc_create(&parent,name,mode,nlink); 664 if (ent) { 665 if (proc_register(parent, ent) < 0) { 666 kfree(ent); 667 ent = NULL; 668 } 669 } 670 return ent; 671 } 672 673 void free_proc_entry(struct proc_dir_entry *de) 674 { 675 unsigned int ino = de->low_ino; 676 677 if (ino < PROC_DYNAMIC_FIRST) 678 return; 679 680 release_inode_number(ino); 681 682 if (S_ISLNK(de->mode) && de->data) 683 kfree(de->data); 684 kfree(de); 685 } 686 687 /* 688 * Remove a /proc entry and free it if it's not currently in use. 689 */ 690 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 691 { 692 struct proc_dir_entry **p; 693 struct proc_dir_entry *de; 694 const char *fn = name; 695 int len; 696 697 if (!parent && xlate_proc_name(name, &parent, &fn) != 0) 698 goto out; 699 len = strlen(fn); 700 701 spin_lock(&proc_subdir_lock); 702 for (p = &parent->subdir; *p; p=&(*p)->next ) { 703 if (!proc_match(len, fn, *p)) 704 continue; 705 de = *p; 706 *p = de->next; 707 de->next = NULL; 708 709 spin_lock(&de->pde_unload_lock); 710 /* 711 * Stop accepting new callers into module. If you're 712 * dynamically allocating ->proc_fops, save a pointer somewhere. 713 */ 714 de->proc_fops = NULL; 715 /* Wait until all existing callers into module are done. */ 716 if (de->pde_users > 0) { 717 DECLARE_COMPLETION_ONSTACK(c); 718 719 if (!de->pde_unload_completion) 720 de->pde_unload_completion = &c; 721 722 spin_unlock(&de->pde_unload_lock); 723 spin_unlock(&proc_subdir_lock); 724 725 wait_for_completion(de->pde_unload_completion); 726 727 spin_lock(&proc_subdir_lock); 728 goto continue_removing; 729 } 730 spin_unlock(&de->pde_unload_lock); 731 732 continue_removing: 733 if (S_ISDIR(de->mode)) 734 parent->nlink--; 735 de->nlink = 0; 736 WARN_ON(de->subdir); 737 if (atomic_dec_and_test(&de->count)) 738 free_proc_entry(de); 739 break; 740 } 741 spin_unlock(&proc_subdir_lock); 742 out: 743 return; 744 } 745