1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * inode.c - part of tracefs, a pseudo file system for activating tracing 4 * 5 * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com> 6 * 7 * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com> 8 * 9 * tracefs is the file system that is used by the tracing infrastructure. 10 */ 11 12 #include <linux/module.h> 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/kobject.h> 16 #include <linux/namei.h> 17 #include <linux/tracefs.h> 18 #include <linux/fsnotify.h> 19 #include <linux/security.h> 20 #include <linux/seq_file.h> 21 #include <linux/parser.h> 22 #include <linux/magic.h> 23 #include <linux/slab.h> 24 #include "internal.h" 25 26 #define TRACEFS_DEFAULT_MODE 0700 27 static struct kmem_cache *tracefs_inode_cachep __ro_after_init; 28 29 static struct vfsmount *tracefs_mount; 30 static int tracefs_mount_count; 31 static bool tracefs_registered; 32 33 static struct inode *tracefs_alloc_inode(struct super_block *sb) 34 { 35 struct tracefs_inode *ti; 36 37 ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL); 38 if (!ti) 39 return NULL; 40 41 return &ti->vfs_inode; 42 } 43 44 static void tracefs_free_inode(struct inode *inode) 45 { 46 kmem_cache_free(tracefs_inode_cachep, get_tracefs(inode)); 47 } 48 49 static ssize_t default_read_file(struct file *file, char __user *buf, 50 size_t count, loff_t *ppos) 51 { 52 return 0; 53 } 54 55 static ssize_t default_write_file(struct file *file, const char __user *buf, 56 size_t count, loff_t *ppos) 57 { 58 return count; 59 } 60 61 static const struct file_operations tracefs_file_operations = { 62 .read = default_read_file, 63 .write = default_write_file, 64 .open = simple_open, 65 .llseek = noop_llseek, 66 }; 67 68 static struct tracefs_dir_ops { 69 int (*mkdir)(const char *name); 70 int (*rmdir)(const char *name); 71 } tracefs_ops __ro_after_init; 72 73 static char *get_dname(struct dentry *dentry) 74 { 75 const char *dname; 76 char *name; 77 int len = dentry->d_name.len; 78 79 dname = dentry->d_name.name; 80 name = kmalloc(len + 1, GFP_KERNEL); 81 if (!name) 82 return NULL; 83 memcpy(name, dname, len); 84 name[len] = 0; 85 return name; 86 } 87 88 static int tracefs_syscall_mkdir(struct mnt_idmap *idmap, 89 struct inode *inode, struct dentry *dentry, 90 umode_t mode) 91 { 92 struct tracefs_inode *ti; 93 char *name; 94 int ret; 95 96 name = get_dname(dentry); 97 if (!name) 98 return -ENOMEM; 99 100 /* 101 * This is a new directory that does not take the default of 102 * the rootfs. It becomes the default permissions for all the 103 * files and directories underneath it. 104 */ 105 ti = get_tracefs(inode); 106 ti->flags |= TRACEFS_INSTANCE_INODE; 107 ti->private = inode; 108 109 /* 110 * The mkdir call can call the generic functions that create 111 * the files within the tracefs system. It is up to the individual 112 * mkdir routine to handle races. 113 */ 114 inode_unlock(inode); 115 ret = tracefs_ops.mkdir(name); 116 inode_lock(inode); 117 118 kfree(name); 119 120 return ret; 121 } 122 123 static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry) 124 { 125 char *name; 126 int ret; 127 128 name = get_dname(dentry); 129 if (!name) 130 return -ENOMEM; 131 132 /* 133 * The rmdir call can call the generic functions that create 134 * the files within the tracefs system. It is up to the individual 135 * rmdir routine to handle races. 136 * This time we need to unlock not only the parent (inode) but 137 * also the directory that is being deleted. 138 */ 139 inode_unlock(inode); 140 inode_unlock(d_inode(dentry)); 141 142 ret = tracefs_ops.rmdir(name); 143 144 inode_lock_nested(inode, I_MUTEX_PARENT); 145 inode_lock(d_inode(dentry)); 146 147 kfree(name); 148 149 return ret; 150 } 151 152 static void set_tracefs_inode_owner(struct inode *inode) 153 { 154 struct tracefs_inode *ti = get_tracefs(inode); 155 struct inode *root_inode = ti->private; 156 157 /* 158 * If this inode has never been referenced, then update 159 * the permissions to the superblock. 160 */ 161 if (!(ti->flags & TRACEFS_UID_PERM_SET)) 162 inode->i_uid = root_inode->i_uid; 163 164 if (!(ti->flags & TRACEFS_GID_PERM_SET)) 165 inode->i_gid = root_inode->i_gid; 166 } 167 168 static int tracefs_permission(struct mnt_idmap *idmap, 169 struct inode *inode, int mask) 170 { 171 set_tracefs_inode_owner(inode); 172 return generic_permission(idmap, inode, mask); 173 } 174 175 static int tracefs_getattr(struct mnt_idmap *idmap, 176 const struct path *path, struct kstat *stat, 177 u32 request_mask, unsigned int flags) 178 { 179 struct inode *inode = d_backing_inode(path->dentry); 180 181 set_tracefs_inode_owner(inode); 182 generic_fillattr(idmap, request_mask, inode, stat); 183 return 0; 184 } 185 186 static int tracefs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 187 struct iattr *attr) 188 { 189 unsigned int ia_valid = attr->ia_valid; 190 struct inode *inode = d_inode(dentry); 191 struct tracefs_inode *ti = get_tracefs(inode); 192 193 if (ia_valid & ATTR_UID) 194 ti->flags |= TRACEFS_UID_PERM_SET; 195 196 if (ia_valid & ATTR_GID) 197 ti->flags |= TRACEFS_GID_PERM_SET; 198 199 return simple_setattr(idmap, dentry, attr); 200 } 201 202 static const struct inode_operations tracefs_instance_dir_inode_operations = { 203 .lookup = simple_lookup, 204 .mkdir = tracefs_syscall_mkdir, 205 .rmdir = tracefs_syscall_rmdir, 206 .permission = tracefs_permission, 207 .getattr = tracefs_getattr, 208 .setattr = tracefs_setattr, 209 }; 210 211 static const struct inode_operations tracefs_dir_inode_operations = { 212 .lookup = simple_lookup, 213 .permission = tracefs_permission, 214 .getattr = tracefs_getattr, 215 .setattr = tracefs_setattr, 216 }; 217 218 static const struct inode_operations tracefs_file_inode_operations = { 219 .permission = tracefs_permission, 220 .getattr = tracefs_getattr, 221 .setattr = tracefs_setattr, 222 }; 223 224 struct inode *tracefs_get_inode(struct super_block *sb) 225 { 226 struct inode *inode = new_inode(sb); 227 if (inode) { 228 inode->i_ino = get_next_ino(); 229 simple_inode_init_ts(inode); 230 } 231 return inode; 232 } 233 234 struct tracefs_mount_opts { 235 kuid_t uid; 236 kgid_t gid; 237 umode_t mode; 238 /* Opt_* bitfield. */ 239 unsigned int opts; 240 }; 241 242 enum { 243 Opt_uid, 244 Opt_gid, 245 Opt_mode, 246 Opt_err 247 }; 248 249 static const match_table_t tokens = { 250 {Opt_uid, "uid=%u"}, 251 {Opt_gid, "gid=%u"}, 252 {Opt_mode, "mode=%o"}, 253 {Opt_err, NULL} 254 }; 255 256 struct tracefs_fs_info { 257 struct tracefs_mount_opts mount_opts; 258 }; 259 260 static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) 261 { 262 substring_t args[MAX_OPT_ARGS]; 263 int option; 264 int token; 265 kuid_t uid; 266 kgid_t gid; 267 char *p; 268 269 opts->opts = 0; 270 opts->mode = TRACEFS_DEFAULT_MODE; 271 272 while ((p = strsep(&data, ",")) != NULL) { 273 if (!*p) 274 continue; 275 276 token = match_token(p, tokens, args); 277 switch (token) { 278 case Opt_uid: 279 if (match_int(&args[0], &option)) 280 return -EINVAL; 281 uid = make_kuid(current_user_ns(), option); 282 if (!uid_valid(uid)) 283 return -EINVAL; 284 opts->uid = uid; 285 break; 286 case Opt_gid: 287 if (match_int(&args[0], &option)) 288 return -EINVAL; 289 gid = make_kgid(current_user_ns(), option); 290 if (!gid_valid(gid)) 291 return -EINVAL; 292 opts->gid = gid; 293 break; 294 case Opt_mode: 295 if (match_octal(&args[0], &option)) 296 return -EINVAL; 297 opts->mode = option & S_IALLUGO; 298 break; 299 /* 300 * We might like to report bad mount options here; 301 * but traditionally tracefs has ignored all mount options 302 */ 303 } 304 305 opts->opts |= BIT(token); 306 } 307 308 return 0; 309 } 310 311 static int tracefs_apply_options(struct super_block *sb, bool remount) 312 { 313 struct tracefs_fs_info *fsi = sb->s_fs_info; 314 struct inode *inode = d_inode(sb->s_root); 315 struct tracefs_mount_opts *opts = &fsi->mount_opts; 316 umode_t tmp_mode; 317 318 /* 319 * On remount, only reset mode/uid/gid if they were provided as mount 320 * options. 321 */ 322 323 if (!remount || opts->opts & BIT(Opt_mode)) { 324 tmp_mode = READ_ONCE(inode->i_mode) & ~S_IALLUGO; 325 tmp_mode |= opts->mode; 326 WRITE_ONCE(inode->i_mode, tmp_mode); 327 } 328 329 if (!remount || opts->opts & BIT(Opt_uid)) 330 inode->i_uid = opts->uid; 331 332 if (!remount || opts->opts & BIT(Opt_gid)) 333 inode->i_gid = opts->gid; 334 335 return 0; 336 } 337 338 static int tracefs_remount(struct super_block *sb, int *flags, char *data) 339 { 340 int err; 341 struct tracefs_fs_info *fsi = sb->s_fs_info; 342 343 sync_filesystem(sb); 344 err = tracefs_parse_options(data, &fsi->mount_opts); 345 if (err) 346 goto fail; 347 348 tracefs_apply_options(sb, true); 349 350 fail: 351 return err; 352 } 353 354 static int tracefs_show_options(struct seq_file *m, struct dentry *root) 355 { 356 struct tracefs_fs_info *fsi = root->d_sb->s_fs_info; 357 struct tracefs_mount_opts *opts = &fsi->mount_opts; 358 359 if (!uid_eq(opts->uid, GLOBAL_ROOT_UID)) 360 seq_printf(m, ",uid=%u", 361 from_kuid_munged(&init_user_ns, opts->uid)); 362 if (!gid_eq(opts->gid, GLOBAL_ROOT_GID)) 363 seq_printf(m, ",gid=%u", 364 from_kgid_munged(&init_user_ns, opts->gid)); 365 if (opts->mode != TRACEFS_DEFAULT_MODE) 366 seq_printf(m, ",mode=%o", opts->mode); 367 368 return 0; 369 } 370 371 static const struct super_operations tracefs_super_operations = { 372 .alloc_inode = tracefs_alloc_inode, 373 .free_inode = tracefs_free_inode, 374 .drop_inode = generic_delete_inode, 375 .statfs = simple_statfs, 376 .remount_fs = tracefs_remount, 377 .show_options = tracefs_show_options, 378 }; 379 380 /* 381 * It would be cleaner if eventfs had its own dentry ops. 382 * 383 * Note that d_revalidate is called potentially under RCU, 384 * so it can't take the eventfs mutex etc. It's fine - if 385 * we open a file just as it's marked dead, things will 386 * still work just fine, and just see the old stale case. 387 */ 388 static void tracefs_d_release(struct dentry *dentry) 389 { 390 if (dentry->d_fsdata) 391 eventfs_d_release(dentry); 392 } 393 394 static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) 395 { 396 struct eventfs_inode *ei = dentry->d_fsdata; 397 398 return !(ei && ei->is_freed); 399 } 400 401 static const struct dentry_operations tracefs_dentry_operations = { 402 .d_revalidate = tracefs_d_revalidate, 403 .d_release = tracefs_d_release, 404 }; 405 406 static int trace_fill_super(struct super_block *sb, void *data, int silent) 407 { 408 static const struct tree_descr trace_files[] = {{""}}; 409 struct tracefs_fs_info *fsi; 410 int err; 411 412 fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL); 413 sb->s_fs_info = fsi; 414 if (!fsi) { 415 err = -ENOMEM; 416 goto fail; 417 } 418 419 err = tracefs_parse_options(data, &fsi->mount_opts); 420 if (err) 421 goto fail; 422 423 err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files); 424 if (err) 425 goto fail; 426 427 sb->s_op = &tracefs_super_operations; 428 sb->s_d_op = &tracefs_dentry_operations; 429 430 tracefs_apply_options(sb, false); 431 432 return 0; 433 434 fail: 435 kfree(fsi); 436 sb->s_fs_info = NULL; 437 return err; 438 } 439 440 static struct dentry *trace_mount(struct file_system_type *fs_type, 441 int flags, const char *dev_name, 442 void *data) 443 { 444 return mount_single(fs_type, flags, data, trace_fill_super); 445 } 446 447 static struct file_system_type trace_fs_type = { 448 .owner = THIS_MODULE, 449 .name = "tracefs", 450 .mount = trace_mount, 451 .kill_sb = kill_litter_super, 452 }; 453 MODULE_ALIAS_FS("tracefs"); 454 455 struct dentry *tracefs_start_creating(const char *name, struct dentry *parent) 456 { 457 struct dentry *dentry; 458 int error; 459 460 pr_debug("tracefs: creating file '%s'\n",name); 461 462 error = simple_pin_fs(&trace_fs_type, &tracefs_mount, 463 &tracefs_mount_count); 464 if (error) 465 return ERR_PTR(error); 466 467 /* If the parent is not specified, we create it in the root. 468 * We need the root dentry to do this, which is in the super 469 * block. A pointer to that is in the struct vfsmount that we 470 * have around. 471 */ 472 if (!parent) 473 parent = tracefs_mount->mnt_root; 474 475 inode_lock(d_inode(parent)); 476 if (unlikely(IS_DEADDIR(d_inode(parent)))) 477 dentry = ERR_PTR(-ENOENT); 478 else 479 dentry = lookup_one_len(name, parent, strlen(name)); 480 if (!IS_ERR(dentry) && d_inode(dentry)) { 481 dput(dentry); 482 dentry = ERR_PTR(-EEXIST); 483 } 484 485 if (IS_ERR(dentry)) { 486 inode_unlock(d_inode(parent)); 487 simple_release_fs(&tracefs_mount, &tracefs_mount_count); 488 } 489 490 return dentry; 491 } 492 493 struct dentry *tracefs_failed_creating(struct dentry *dentry) 494 { 495 inode_unlock(d_inode(dentry->d_parent)); 496 dput(dentry); 497 simple_release_fs(&tracefs_mount, &tracefs_mount_count); 498 return NULL; 499 } 500 501 struct dentry *tracefs_end_creating(struct dentry *dentry) 502 { 503 inode_unlock(d_inode(dentry->d_parent)); 504 return dentry; 505 } 506 507 /* Find the inode that this will use for default */ 508 static struct inode *instance_inode(struct dentry *parent, struct inode *inode) 509 { 510 struct tracefs_inode *ti; 511 512 /* If parent is NULL then use root inode */ 513 if (!parent) 514 return d_inode(inode->i_sb->s_root); 515 516 /* Find the inode that is flagged as an instance or the root inode */ 517 while (!IS_ROOT(parent)) { 518 ti = get_tracefs(d_inode(parent)); 519 if (ti->flags & TRACEFS_INSTANCE_INODE) 520 break; 521 parent = parent->d_parent; 522 } 523 524 return d_inode(parent); 525 } 526 527 /** 528 * tracefs_create_file - create a file in the tracefs filesystem 529 * @name: a pointer to a string containing the name of the file to create. 530 * @mode: the permission that the file should have. 531 * @parent: a pointer to the parent dentry for this file. This should be a 532 * directory dentry if set. If this parameter is NULL, then the 533 * file will be created in the root of the tracefs filesystem. 534 * @data: a pointer to something that the caller will want to get to later 535 * on. The inode.i_private pointer will point to this value on 536 * the open() call. 537 * @fops: a pointer to a struct file_operations that should be used for 538 * this file. 539 * 540 * This is the basic "create a file" function for tracefs. It allows for a 541 * wide range of flexibility in creating a file, or a directory (if you want 542 * to create a directory, the tracefs_create_dir() function is 543 * recommended to be used instead.) 544 * 545 * This function will return a pointer to a dentry if it succeeds. This 546 * pointer must be passed to the tracefs_remove() function when the file is 547 * to be removed (no automatic cleanup happens if your module is unloaded, 548 * you are responsible here.) If an error occurs, %NULL will be returned. 549 * 550 * If tracefs is not enabled in the kernel, the value -%ENODEV will be 551 * returned. 552 */ 553 struct dentry *tracefs_create_file(const char *name, umode_t mode, 554 struct dentry *parent, void *data, 555 const struct file_operations *fops) 556 { 557 struct tracefs_inode *ti; 558 struct dentry *dentry; 559 struct inode *inode; 560 561 if (security_locked_down(LOCKDOWN_TRACEFS)) 562 return NULL; 563 564 if (!(mode & S_IFMT)) 565 mode |= S_IFREG; 566 BUG_ON(!S_ISREG(mode)); 567 dentry = tracefs_start_creating(name, parent); 568 569 if (IS_ERR(dentry)) 570 return NULL; 571 572 inode = tracefs_get_inode(dentry->d_sb); 573 if (unlikely(!inode)) 574 return tracefs_failed_creating(dentry); 575 576 ti = get_tracefs(inode); 577 ti->private = instance_inode(parent, inode); 578 579 inode->i_mode = mode; 580 inode->i_op = &tracefs_file_inode_operations; 581 inode->i_fop = fops ? fops : &tracefs_file_operations; 582 inode->i_private = data; 583 inode->i_uid = d_inode(dentry->d_parent)->i_uid; 584 inode->i_gid = d_inode(dentry->d_parent)->i_gid; 585 d_instantiate(dentry, inode); 586 fsnotify_create(d_inode(dentry->d_parent), dentry); 587 return tracefs_end_creating(dentry); 588 } 589 590 static struct dentry *__create_dir(const char *name, struct dentry *parent, 591 const struct inode_operations *ops) 592 { 593 struct tracefs_inode *ti; 594 struct dentry *dentry = tracefs_start_creating(name, parent); 595 struct inode *inode; 596 597 if (IS_ERR(dentry)) 598 return NULL; 599 600 inode = tracefs_get_inode(dentry->d_sb); 601 if (unlikely(!inode)) 602 return tracefs_failed_creating(dentry); 603 604 /* Do not set bits for OTH */ 605 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP; 606 inode->i_op = ops; 607 inode->i_fop = &simple_dir_operations; 608 inode->i_uid = d_inode(dentry->d_parent)->i_uid; 609 inode->i_gid = d_inode(dentry->d_parent)->i_gid; 610 611 ti = get_tracefs(inode); 612 ti->private = instance_inode(parent, inode); 613 614 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 615 inc_nlink(inode); 616 d_instantiate(dentry, inode); 617 inc_nlink(d_inode(dentry->d_parent)); 618 fsnotify_mkdir(d_inode(dentry->d_parent), dentry); 619 return tracefs_end_creating(dentry); 620 } 621 622 /** 623 * tracefs_create_dir - create a directory in the tracefs filesystem 624 * @name: a pointer to a string containing the name of the directory to 625 * create. 626 * @parent: a pointer to the parent dentry for this file. This should be a 627 * directory dentry if set. If this parameter is NULL, then the 628 * directory will be created in the root of the tracefs filesystem. 629 * 630 * This function creates a directory in tracefs with the given name. 631 * 632 * This function will return a pointer to a dentry if it succeeds. This 633 * pointer must be passed to the tracefs_remove() function when the file is 634 * to be removed. If an error occurs, %NULL will be returned. 635 * 636 * If tracing is not enabled in the kernel, the value -%ENODEV will be 637 * returned. 638 */ 639 struct dentry *tracefs_create_dir(const char *name, struct dentry *parent) 640 { 641 if (security_locked_down(LOCKDOWN_TRACEFS)) 642 return NULL; 643 644 return __create_dir(name, parent, &tracefs_dir_inode_operations); 645 } 646 647 /** 648 * tracefs_create_instance_dir - create the tracing instances directory 649 * @name: The name of the instances directory to create 650 * @parent: The parent directory that the instances directory will exist 651 * @mkdir: The function to call when a mkdir is performed. 652 * @rmdir: The function to call when a rmdir is performed. 653 * 654 * Only one instances directory is allowed. 655 * 656 * The instances directory is special as it allows for mkdir and rmdir 657 * to be done by userspace. When a mkdir or rmdir is performed, the inode 658 * locks are released and the methods passed in (@mkdir and @rmdir) are 659 * called without locks and with the name of the directory being created 660 * within the instances directory. 661 * 662 * Returns the dentry of the instances directory. 663 */ 664 __init struct dentry *tracefs_create_instance_dir(const char *name, 665 struct dentry *parent, 666 int (*mkdir)(const char *name), 667 int (*rmdir)(const char *name)) 668 { 669 struct dentry *dentry; 670 671 /* Only allow one instance of the instances directory. */ 672 if (WARN_ON(tracefs_ops.mkdir || tracefs_ops.rmdir)) 673 return NULL; 674 675 dentry = __create_dir(name, parent, &tracefs_instance_dir_inode_operations); 676 if (!dentry) 677 return NULL; 678 679 tracefs_ops.mkdir = mkdir; 680 tracefs_ops.rmdir = rmdir; 681 682 return dentry; 683 } 684 685 static void remove_one(struct dentry *victim) 686 { 687 simple_release_fs(&tracefs_mount, &tracefs_mount_count); 688 } 689 690 /** 691 * tracefs_remove - recursively removes a directory 692 * @dentry: a pointer to a the dentry of the directory to be removed. 693 * 694 * This function recursively removes a directory tree in tracefs that 695 * was previously created with a call to another tracefs function 696 * (like tracefs_create_file() or variants thereof.) 697 */ 698 void tracefs_remove(struct dentry *dentry) 699 { 700 if (IS_ERR_OR_NULL(dentry)) 701 return; 702 703 simple_pin_fs(&trace_fs_type, &tracefs_mount, &tracefs_mount_count); 704 simple_recursive_removal(dentry, remove_one); 705 simple_release_fs(&tracefs_mount, &tracefs_mount_count); 706 } 707 708 /** 709 * tracefs_initialized - Tells whether tracefs has been registered 710 */ 711 bool tracefs_initialized(void) 712 { 713 return tracefs_registered; 714 } 715 716 static void init_once(void *foo) 717 { 718 struct tracefs_inode *ti = (struct tracefs_inode *) foo; 719 720 /* inode_init_once() calls memset() on the vfs_inode portion */ 721 inode_init_once(&ti->vfs_inode); 722 723 /* Zero out the rest */ 724 memset_after(ti, 0, vfs_inode); 725 } 726 727 static int __init tracefs_init(void) 728 { 729 int retval; 730 731 tracefs_inode_cachep = kmem_cache_create("tracefs_inode_cache", 732 sizeof(struct tracefs_inode), 733 0, (SLAB_RECLAIM_ACCOUNT| 734 SLAB_ACCOUNT), 735 init_once); 736 if (!tracefs_inode_cachep) 737 return -ENOMEM; 738 739 retval = sysfs_create_mount_point(kernel_kobj, "tracing"); 740 if (retval) 741 return -EINVAL; 742 743 retval = register_filesystem(&trace_fs_type); 744 if (!retval) 745 tracefs_registered = true; 746 747 return retval; 748 } 749 core_initcall(tracefs_init); 750