1 /* 2 * proc/fs/generic.c --- generic routines for the proc-fs 3 * 4 * This file contains generic proc-fs routines for handling 5 * directories and files. 6 * 7 * Copyright (C) 1991, 1992 Linus Torvalds. 8 * Copyright (C) 1997 Theodore Ts'o 9 */ 10 11 #include <linux/cache.h> 12 #include <linux/errno.h> 13 #include <linux/time.h> 14 #include <linux/proc_fs.h> 15 #include <linux/stat.h> 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/namei.h> 19 #include <linux/slab.h> 20 #include <linux/printk.h> 21 #include <linux/mount.h> 22 #include <linux/init.h> 23 #include <linux/idr.h> 24 #include <linux/bitops.h> 25 #include <linux/spinlock.h> 26 #include <linux/completion.h> 27 #include <linux/uaccess.h> 28 #include <linux/seq_file.h> 29 30 #include "internal.h" 31 32 static DEFINE_RWLOCK(proc_subdir_lock); 33 34 struct kmem_cache *proc_dir_entry_cache __ro_after_init; 35 36 void pde_free(struct proc_dir_entry *pde) 37 { 38 if (S_ISLNK(pde->mode)) 39 kfree(pde->data); 40 if (pde->name != pde->inline_name) 41 kfree(pde->name); 42 kmem_cache_free(proc_dir_entry_cache, pde); 43 } 44 45 static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len) 46 { 47 if (len < de->namelen) 48 return -1; 49 if (len > de->namelen) 50 return 1; 51 52 return memcmp(name, de->name, len); 53 } 54 55 static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir) 56 { 57 return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry, 58 subdir_node); 59 } 60 61 static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir) 62 { 63 return rb_entry_safe(rb_next(&dir->subdir_node), struct proc_dir_entry, 64 subdir_node); 65 } 66 67 static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, 68 const char *name, 69 unsigned int len) 70 { 71 struct rb_node *node = dir->subdir.rb_node; 72 73 while (node) { 74 struct proc_dir_entry *de = rb_entry(node, 75 struct proc_dir_entry, 76 subdir_node); 77 int result = proc_match(name, de, len); 78 79 if (result < 0) 80 node = node->rb_left; 81 else if (result > 0) 82 node = node->rb_right; 83 else 84 return de; 85 } 86 return NULL; 87 } 88 89 static bool pde_subdir_insert(struct proc_dir_entry *dir, 90 struct proc_dir_entry *de) 91 { 92 struct rb_root *root = &dir->subdir; 93 struct rb_node **new = &root->rb_node, *parent = NULL; 94 95 /* Figure out where to put new node */ 96 while (*new) { 97 struct proc_dir_entry *this = rb_entry(*new, 98 struct proc_dir_entry, 99 subdir_node); 100 int result = proc_match(de->name, this, de->namelen); 101 102 parent = *new; 103 if (result < 0) 104 new = &(*new)->rb_left; 105 else if (result > 0) 106 new = &(*new)->rb_right; 107 else 108 return false; 109 } 110 111 /* Add new node and rebalance tree. */ 112 rb_link_node(&de->subdir_node, parent, new); 113 rb_insert_color(&de->subdir_node, root); 114 return true; 115 } 116 117 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 118 { 119 struct inode *inode = d_inode(dentry); 120 struct proc_dir_entry *de = PDE(inode); 121 int error; 122 123 error = setattr_prepare(dentry, iattr); 124 if (error) 125 return error; 126 127 setattr_copy(inode, iattr); 128 mark_inode_dirty(inode); 129 130 proc_set_user(de, inode->i_uid, inode->i_gid); 131 de->mode = inode->i_mode; 132 return 0; 133 } 134 135 static int proc_getattr(const struct path *path, struct kstat *stat, 136 u32 request_mask, unsigned int query_flags) 137 { 138 struct inode *inode = d_inode(path->dentry); 139 struct proc_dir_entry *de = PDE(inode); 140 if (de && de->nlink) 141 set_nlink(inode, de->nlink); 142 143 generic_fillattr(inode, stat); 144 return 0; 145 } 146 147 static const struct inode_operations proc_file_inode_operations = { 148 .setattr = proc_notify_change, 149 }; 150 151 /* 152 * This function parses a name such as "tty/driver/serial", and 153 * returns the struct proc_dir_entry for "/proc/tty/driver", and 154 * returns "serial" in residual. 155 */ 156 static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret, 157 const char **residual) 158 { 159 const char *cp = name, *next; 160 struct proc_dir_entry *de; 161 unsigned int len; 162 163 de = *ret; 164 if (!de) 165 de = &proc_root; 166 167 while (1) { 168 next = strchr(cp, '/'); 169 if (!next) 170 break; 171 172 len = next - cp; 173 de = pde_subdir_find(de, cp, len); 174 if (!de) { 175 WARN(1, "name '%s'\n", name); 176 return -ENOENT; 177 } 178 cp += len + 1; 179 } 180 *residual = cp; 181 *ret = de; 182 return 0; 183 } 184 185 static int xlate_proc_name(const char *name, struct proc_dir_entry **ret, 186 const char **residual) 187 { 188 int rv; 189 190 read_lock(&proc_subdir_lock); 191 rv = __xlate_proc_name(name, ret, residual); 192 read_unlock(&proc_subdir_lock); 193 return rv; 194 } 195 196 static DEFINE_IDA(proc_inum_ida); 197 198 #define PROC_DYNAMIC_FIRST 0xF0000000U 199 200 /* 201 * Return an inode number between PROC_DYNAMIC_FIRST and 202 * 0xffffffff, or zero on failure. 203 */ 204 int proc_alloc_inum(unsigned int *inum) 205 { 206 int i; 207 208 i = ida_simple_get(&proc_inum_ida, 0, UINT_MAX - PROC_DYNAMIC_FIRST + 1, 209 GFP_KERNEL); 210 if (i < 0) 211 return i; 212 213 *inum = PROC_DYNAMIC_FIRST + (unsigned int)i; 214 return 0; 215 } 216 217 void proc_free_inum(unsigned int inum) 218 { 219 ida_simple_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); 220 } 221 222 static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags) 223 { 224 if (flags & LOOKUP_RCU) 225 return -ECHILD; 226 227 if (atomic_read(&PDE(d_inode(dentry))->in_use) < 0) 228 return 0; /* revalidate */ 229 return 1; 230 } 231 232 static int proc_misc_d_delete(const struct dentry *dentry) 233 { 234 return atomic_read(&PDE(d_inode(dentry))->in_use) < 0; 235 } 236 237 static const struct dentry_operations proc_misc_dentry_ops = { 238 .d_revalidate = proc_misc_d_revalidate, 239 .d_delete = proc_misc_d_delete, 240 }; 241 242 /* 243 * Don't create negative dentries here, return -ENOENT by hand 244 * instead. 245 */ 246 struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, 247 struct proc_dir_entry *de) 248 { 249 struct inode *inode; 250 251 read_lock(&proc_subdir_lock); 252 de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len); 253 if (de) { 254 pde_get(de); 255 read_unlock(&proc_subdir_lock); 256 inode = proc_get_inode(dir->i_sb, de); 257 if (!inode) 258 return ERR_PTR(-ENOMEM); 259 d_set_d_op(dentry, &proc_misc_dentry_ops); 260 return d_splice_alias(inode, dentry); 261 } 262 read_unlock(&proc_subdir_lock); 263 return ERR_PTR(-ENOENT); 264 } 265 266 struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, 267 unsigned int flags) 268 { 269 return proc_lookup_de(dir, dentry, PDE(dir)); 270 } 271 272 /* 273 * This returns non-zero if at EOF, so that the /proc 274 * root directory can use this and check if it should 275 * continue with the <pid> entries.. 276 * 277 * Note that the VFS-layer doesn't care about the return 278 * value of the readdir() call, as long as it's non-negative 279 * for success.. 280 */ 281 int proc_readdir_de(struct file *file, struct dir_context *ctx, 282 struct proc_dir_entry *de) 283 { 284 int i; 285 286 if (!dir_emit_dots(file, ctx)) 287 return 0; 288 289 i = ctx->pos - 2; 290 read_lock(&proc_subdir_lock); 291 de = pde_subdir_first(de); 292 for (;;) { 293 if (!de) { 294 read_unlock(&proc_subdir_lock); 295 return 0; 296 } 297 if (!i) 298 break; 299 de = pde_subdir_next(de); 300 i--; 301 } 302 303 do { 304 struct proc_dir_entry *next; 305 pde_get(de); 306 read_unlock(&proc_subdir_lock); 307 if (!dir_emit(ctx, de->name, de->namelen, 308 de->low_ino, de->mode >> 12)) { 309 pde_put(de); 310 return 0; 311 } 312 ctx->pos++; 313 read_lock(&proc_subdir_lock); 314 next = pde_subdir_next(de); 315 pde_put(de); 316 de = next; 317 } while (de); 318 read_unlock(&proc_subdir_lock); 319 return 1; 320 } 321 322 int proc_readdir(struct file *file, struct dir_context *ctx) 323 { 324 struct inode *inode = file_inode(file); 325 326 return proc_readdir_de(file, ctx, PDE(inode)); 327 } 328 329 /* 330 * These are the generic /proc directory operations. They 331 * use the in-memory "struct proc_dir_entry" tree to parse 332 * the /proc directory. 333 */ 334 static const struct file_operations proc_dir_operations = { 335 .llseek = generic_file_llseek, 336 .read = generic_read_dir, 337 .iterate_shared = proc_readdir, 338 }; 339 340 /* 341 * proc directories can do almost nothing.. 342 */ 343 static const struct inode_operations proc_dir_inode_operations = { 344 .lookup = proc_lookup, 345 .getattr = proc_getattr, 346 .setattr = proc_notify_change, 347 }; 348 349 /* returns the registered entry, or frees dp and returns NULL on failure */ 350 struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, 351 struct proc_dir_entry *dp) 352 { 353 if (proc_alloc_inum(&dp->low_ino)) 354 goto out_free_entry; 355 356 write_lock(&proc_subdir_lock); 357 dp->parent = dir; 358 if (pde_subdir_insert(dir, dp) == false) { 359 WARN(1, "proc_dir_entry '%s/%s' already registered\n", 360 dir->name, dp->name); 361 write_unlock(&proc_subdir_lock); 362 goto out_free_inum; 363 } 364 write_unlock(&proc_subdir_lock); 365 366 return dp; 367 out_free_inum: 368 proc_free_inum(dp->low_ino); 369 out_free_entry: 370 pde_free(dp); 371 return NULL; 372 } 373 374 static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, 375 const char *name, 376 umode_t mode, 377 nlink_t nlink) 378 { 379 struct proc_dir_entry *ent = NULL; 380 const char *fn; 381 struct qstr qstr; 382 383 if (xlate_proc_name(name, parent, &fn) != 0) 384 goto out; 385 qstr.name = fn; 386 qstr.len = strlen(fn); 387 if (qstr.len == 0 || qstr.len >= 256) { 388 WARN(1, "name len %u\n", qstr.len); 389 return NULL; 390 } 391 if (qstr.len == 1 && fn[0] == '.') { 392 WARN(1, "name '.'\n"); 393 return NULL; 394 } 395 if (qstr.len == 2 && fn[0] == '.' && fn[1] == '.') { 396 WARN(1, "name '..'\n"); 397 return NULL; 398 } 399 if (*parent == &proc_root && name_to_int(&qstr) != ~0U) { 400 WARN(1, "create '/proc/%s' by hand\n", qstr.name); 401 return NULL; 402 } 403 if (is_empty_pde(*parent)) { 404 WARN(1, "attempt to add to permanently empty directory"); 405 return NULL; 406 } 407 408 ent = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL); 409 if (!ent) 410 goto out; 411 412 if (qstr.len + 1 <= SIZEOF_PDE_INLINE_NAME) { 413 ent->name = ent->inline_name; 414 } else { 415 ent->name = kmalloc(qstr.len + 1, GFP_KERNEL); 416 if (!ent->name) { 417 pde_free(ent); 418 return NULL; 419 } 420 } 421 422 memcpy(ent->name, fn, qstr.len + 1); 423 ent->namelen = qstr.len; 424 ent->mode = mode; 425 ent->nlink = nlink; 426 ent->subdir = RB_ROOT; 427 refcount_set(&ent->refcnt, 1); 428 spin_lock_init(&ent->pde_unload_lock); 429 INIT_LIST_HEAD(&ent->pde_openers); 430 proc_set_user(ent, (*parent)->uid, (*parent)->gid); 431 432 out: 433 return ent; 434 } 435 436 struct proc_dir_entry *proc_symlink(const char *name, 437 struct proc_dir_entry *parent, const char *dest) 438 { 439 struct proc_dir_entry *ent; 440 441 ent = __proc_create(&parent, name, 442 (S_IFLNK | S_IRUGO | S_IWUGO | S_IXUGO),1); 443 444 if (ent) { 445 ent->data = kmalloc((ent->size=strlen(dest))+1, GFP_KERNEL); 446 if (ent->data) { 447 strcpy((char*)ent->data,dest); 448 ent->proc_iops = &proc_link_inode_operations; 449 ent = proc_register(parent, ent); 450 } else { 451 pde_free(ent); 452 ent = NULL; 453 } 454 } 455 return ent; 456 } 457 EXPORT_SYMBOL(proc_symlink); 458 459 struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode, 460 struct proc_dir_entry *parent, void *data) 461 { 462 struct proc_dir_entry *ent; 463 464 if (mode == 0) 465 mode = S_IRUGO | S_IXUGO; 466 467 ent = __proc_create(&parent, name, S_IFDIR | mode, 2); 468 if (ent) { 469 ent->data = data; 470 ent->proc_fops = &proc_dir_operations; 471 ent->proc_iops = &proc_dir_inode_operations; 472 parent->nlink++; 473 ent = proc_register(parent, ent); 474 if (!ent) 475 parent->nlink--; 476 } 477 return ent; 478 } 479 EXPORT_SYMBOL_GPL(proc_mkdir_data); 480 481 struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode, 482 struct proc_dir_entry *parent) 483 { 484 return proc_mkdir_data(name, mode, parent, NULL); 485 } 486 EXPORT_SYMBOL(proc_mkdir_mode); 487 488 struct proc_dir_entry *proc_mkdir(const char *name, 489 struct proc_dir_entry *parent) 490 { 491 return proc_mkdir_data(name, 0, parent, NULL); 492 } 493 EXPORT_SYMBOL(proc_mkdir); 494 495 struct proc_dir_entry *proc_create_mount_point(const char *name) 496 { 497 umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO; 498 struct proc_dir_entry *ent, *parent = NULL; 499 500 ent = __proc_create(&parent, name, mode, 2); 501 if (ent) { 502 ent->data = NULL; 503 ent->proc_fops = NULL; 504 ent->proc_iops = NULL; 505 parent->nlink++; 506 ent = proc_register(parent, ent); 507 if (!ent) 508 parent->nlink--; 509 } 510 return ent; 511 } 512 EXPORT_SYMBOL(proc_create_mount_point); 513 514 struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, 515 struct proc_dir_entry **parent, void *data) 516 { 517 struct proc_dir_entry *p; 518 519 if ((mode & S_IFMT) == 0) 520 mode |= S_IFREG; 521 if ((mode & S_IALLUGO) == 0) 522 mode |= S_IRUGO; 523 if (WARN_ON_ONCE(!S_ISREG(mode))) 524 return NULL; 525 526 p = __proc_create(parent, name, mode, 1); 527 if (p) { 528 p->proc_iops = &proc_file_inode_operations; 529 p->data = data; 530 } 531 return p; 532 } 533 534 struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, 535 struct proc_dir_entry *parent, 536 const struct file_operations *proc_fops, void *data) 537 { 538 struct proc_dir_entry *p; 539 540 BUG_ON(proc_fops == NULL); 541 542 p = proc_create_reg(name, mode, &parent, data); 543 if (!p) 544 return NULL; 545 p->proc_fops = proc_fops; 546 return proc_register(parent, p); 547 } 548 EXPORT_SYMBOL(proc_create_data); 549 550 struct proc_dir_entry *proc_create(const char *name, umode_t mode, 551 struct proc_dir_entry *parent, 552 const struct file_operations *proc_fops) 553 { 554 return proc_create_data(name, mode, parent, proc_fops, NULL); 555 } 556 EXPORT_SYMBOL(proc_create); 557 558 static int proc_seq_open(struct inode *inode, struct file *file) 559 { 560 struct proc_dir_entry *de = PDE(inode); 561 562 if (de->state_size) 563 return seq_open_private(file, de->seq_ops, de->state_size); 564 return seq_open(file, de->seq_ops); 565 } 566 567 static int proc_seq_release(struct inode *inode, struct file *file) 568 { 569 struct proc_dir_entry *de = PDE(inode); 570 571 if (de->state_size) 572 return seq_release_private(inode, file); 573 return seq_release(inode, file); 574 } 575 576 static const struct file_operations proc_seq_fops = { 577 .open = proc_seq_open, 578 .read = seq_read, 579 .llseek = seq_lseek, 580 .release = proc_seq_release, 581 }; 582 583 struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, 584 struct proc_dir_entry *parent, const struct seq_operations *ops, 585 unsigned int state_size, void *data) 586 { 587 struct proc_dir_entry *p; 588 589 p = proc_create_reg(name, mode, &parent, data); 590 if (!p) 591 return NULL; 592 p->proc_fops = &proc_seq_fops; 593 p->seq_ops = ops; 594 p->state_size = state_size; 595 return proc_register(parent, p); 596 } 597 EXPORT_SYMBOL(proc_create_seq_private); 598 599 static int proc_single_open(struct inode *inode, struct file *file) 600 { 601 struct proc_dir_entry *de = PDE(inode); 602 603 return single_open(file, de->single_show, de->data); 604 } 605 606 static const struct file_operations proc_single_fops = { 607 .open = proc_single_open, 608 .read = seq_read, 609 .llseek = seq_lseek, 610 .release = single_release, 611 }; 612 613 struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode, 614 struct proc_dir_entry *parent, 615 int (*show)(struct seq_file *, void *), void *data) 616 { 617 struct proc_dir_entry *p; 618 619 p = proc_create_reg(name, mode, &parent, data); 620 if (!p) 621 return NULL; 622 p->proc_fops = &proc_single_fops; 623 p->single_show = show; 624 return proc_register(parent, p); 625 } 626 EXPORT_SYMBOL(proc_create_single_data); 627 628 void proc_set_size(struct proc_dir_entry *de, loff_t size) 629 { 630 de->size = size; 631 } 632 EXPORT_SYMBOL(proc_set_size); 633 634 void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) 635 { 636 de->uid = uid; 637 de->gid = gid; 638 } 639 EXPORT_SYMBOL(proc_set_user); 640 641 void pde_put(struct proc_dir_entry *pde) 642 { 643 if (refcount_dec_and_test(&pde->refcnt)) { 644 proc_free_inum(pde->low_ino); 645 pde_free(pde); 646 } 647 } 648 649 /* 650 * Remove a /proc entry and free it if it's not currently in use. 651 */ 652 void remove_proc_entry(const char *name, struct proc_dir_entry *parent) 653 { 654 struct proc_dir_entry *de = NULL; 655 const char *fn = name; 656 unsigned int len; 657 658 write_lock(&proc_subdir_lock); 659 if (__xlate_proc_name(name, &parent, &fn) != 0) { 660 write_unlock(&proc_subdir_lock); 661 return; 662 } 663 len = strlen(fn); 664 665 de = pde_subdir_find(parent, fn, len); 666 if (de) 667 rb_erase(&de->subdir_node, &parent->subdir); 668 write_unlock(&proc_subdir_lock); 669 if (!de) { 670 WARN(1, "name '%s'\n", name); 671 return; 672 } 673 674 proc_entry_rundown(de); 675 676 if (S_ISDIR(de->mode)) 677 parent->nlink--; 678 de->nlink = 0; 679 WARN(pde_subdir_first(de), 680 "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n", 681 __func__, de->parent->name, de->name, pde_subdir_first(de)->name); 682 pde_put(de); 683 } 684 EXPORT_SYMBOL(remove_proc_entry); 685 686 int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) 687 { 688 struct proc_dir_entry *root = NULL, *de, *next; 689 const char *fn = name; 690 unsigned int len; 691 692 write_lock(&proc_subdir_lock); 693 if (__xlate_proc_name(name, &parent, &fn) != 0) { 694 write_unlock(&proc_subdir_lock); 695 return -ENOENT; 696 } 697 len = strlen(fn); 698 699 root = pde_subdir_find(parent, fn, len); 700 if (!root) { 701 write_unlock(&proc_subdir_lock); 702 return -ENOENT; 703 } 704 rb_erase(&root->subdir_node, &parent->subdir); 705 706 de = root; 707 while (1) { 708 next = pde_subdir_first(de); 709 if (next) { 710 rb_erase(&next->subdir_node, &de->subdir); 711 de = next; 712 continue; 713 } 714 write_unlock(&proc_subdir_lock); 715 716 proc_entry_rundown(de); 717 next = de->parent; 718 if (S_ISDIR(de->mode)) 719 next->nlink--; 720 de->nlink = 0; 721 if (de == root) 722 break; 723 pde_put(de); 724 725 write_lock(&proc_subdir_lock); 726 de = next; 727 } 728 pde_put(root); 729 return 0; 730 } 731 EXPORT_SYMBOL(remove_proc_subtree); 732 733 void *proc_get_parent_data(const struct inode *inode) 734 { 735 struct proc_dir_entry *de = PDE(inode); 736 return de->parent->data; 737 } 738 EXPORT_SYMBOL_GPL(proc_get_parent_data); 739 740 void proc_remove(struct proc_dir_entry *de) 741 { 742 if (de) 743 remove_proc_subtree(de->name, de->parent); 744 } 745 EXPORT_SYMBOL(proc_remove); 746 747 void *PDE_DATA(const struct inode *inode) 748 { 749 return __PDE_DATA(inode); 750 } 751 EXPORT_SYMBOL(PDE_DATA); 752 753 /* 754 * Pull a user buffer into memory and pass it to the file's write handler if 755 * one is supplied. The ->write() method is permitted to modify the 756 * kernel-side buffer. 757 */ 758 ssize_t proc_simple_write(struct file *f, const char __user *ubuf, size_t size, 759 loff_t *_pos) 760 { 761 struct proc_dir_entry *pde = PDE(file_inode(f)); 762 char *buf; 763 int ret; 764 765 if (!pde->write) 766 return -EACCES; 767 if (size == 0 || size > PAGE_SIZE - 1) 768 return -EINVAL; 769 buf = memdup_user_nul(ubuf, size); 770 if (IS_ERR(buf)) 771 return PTR_ERR(buf); 772 ret = pde->write(f, buf, size); 773 kfree(buf); 774 return ret == 0 ? size : ret; 775 } 776