1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * event_inode.c - part of tracefs, a pseudo file system for activating tracing 4 * 5 * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org> 6 * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com> 7 * Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org> 8 * 9 * eventfs is used to dynamically create inodes and dentries based on the 10 * meta data provided by the tracing system. 11 * 12 * eventfs stores the meta-data of files/dirs and holds off on creating 13 * inodes/dentries of the files. When accessed, the eventfs will create the 14 * inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up 15 * and delete the inodes/dentries when they are no longer referenced. 16 */ 17 #include <linux/fsnotify.h> 18 #include <linux/fs.h> 19 #include <linux/namei.h> 20 #include <linux/workqueue.h> 21 #include <linux/security.h> 22 #include <linux/tracefs.h> 23 #include <linux/kref.h> 24 #include <linux/delay.h> 25 #include "internal.h" 26 27 /* 28 * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access 29 * to the ei->dentry must be done under this mutex and after checking 30 * if ei->is_freed is not set. When ei->is_freed is set, the dentry 31 * is on its way to being freed after the last dput() is made on it. 32 */ 33 static DEFINE_MUTEX(eventfs_mutex); 34 35 /* Choose something "unique" ;-) */ 36 #define EVENTFS_FILE_INODE_INO 0x12c4e37 37 38 /* Just try to make something consistent and unique */ 39 static int eventfs_dir_ino(struct eventfs_inode *ei) 40 { 41 if (!ei->ino) 42 ei->ino = get_next_ino(); 43 44 return ei->ino; 45 } 46 47 /* 48 * The eventfs_inode (ei) itself is protected by SRCU. It is released from 49 * its parent's list and will have is_freed set (under eventfs_mutex). 50 * After the SRCU grace period is over and the last dput() is called 51 * the ei is freed. 52 */ 53 DEFINE_STATIC_SRCU(eventfs_srcu); 54 55 /* Mode is unsigned short, use the upper bits for flags */ 56 enum { 57 EVENTFS_SAVE_MODE = BIT(16), 58 EVENTFS_SAVE_UID = BIT(17), 59 EVENTFS_SAVE_GID = BIT(18), 60 EVENTFS_TOPLEVEL = BIT(19), 61 }; 62 63 #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) 64 65 static struct dentry *eventfs_root_lookup(struct inode *dir, 66 struct dentry *dentry, 67 unsigned int flags); 68 static int eventfs_iterate(struct file *file, struct dir_context *ctx); 69 70 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr) 71 { 72 unsigned int ia_valid = iattr->ia_valid; 73 74 if (ia_valid & ATTR_MODE) { 75 attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) | 76 (iattr->ia_mode & EVENTFS_MODE_MASK) | 77 EVENTFS_SAVE_MODE; 78 } 79 if (ia_valid & ATTR_UID) { 80 attr->mode |= EVENTFS_SAVE_UID; 81 attr->uid = iattr->ia_uid; 82 } 83 if (ia_valid & ATTR_GID) { 84 attr->mode |= EVENTFS_SAVE_GID; 85 attr->gid = iattr->ia_gid; 86 } 87 } 88 89 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, 90 struct iattr *iattr) 91 { 92 const struct eventfs_entry *entry; 93 struct eventfs_inode *ei; 94 const char *name; 95 int ret; 96 97 mutex_lock(&eventfs_mutex); 98 ei = dentry->d_fsdata; 99 if (ei->is_freed) { 100 /* Do not allow changes if the event is about to be removed. */ 101 mutex_unlock(&eventfs_mutex); 102 return -ENODEV; 103 } 104 105 /* Preallocate the children mode array if necessary */ 106 if (!(dentry->d_inode->i_mode & S_IFDIR)) { 107 if (!ei->entry_attrs) { 108 ei->entry_attrs = kcalloc(ei->nr_entries, sizeof(*ei->entry_attrs), 109 GFP_NOFS); 110 if (!ei->entry_attrs) { 111 ret = -ENOMEM; 112 goto out; 113 } 114 } 115 } 116 117 ret = simple_setattr(idmap, dentry, iattr); 118 if (ret < 0) 119 goto out; 120 121 /* 122 * If this is a dir, then update the ei cache, only the file 123 * mode is saved in the ei->m_children, and the ownership is 124 * determined by the parent directory. 125 */ 126 if (dentry->d_inode->i_mode & S_IFDIR) { 127 /* 128 * The events directory dentry is never freed, unless its 129 * part of an instance that is deleted. It's attr is the 130 * default for its child files and directories. 131 * Do not update it. It's not used for its own mode or ownership. 132 */ 133 if (ei->is_events) { 134 /* But it still needs to know if it was modified */ 135 if (iattr->ia_valid & ATTR_UID) 136 ei->attr.mode |= EVENTFS_SAVE_UID; 137 if (iattr->ia_valid & ATTR_GID) 138 ei->attr.mode |= EVENTFS_SAVE_GID; 139 } else { 140 update_attr(&ei->attr, iattr); 141 } 142 143 } else { 144 name = dentry->d_name.name; 145 146 for (int i = 0; i < ei->nr_entries; i++) { 147 entry = &ei->entries[i]; 148 if (strcmp(name, entry->name) == 0) { 149 update_attr(&ei->entry_attrs[i], iattr); 150 break; 151 } 152 } 153 } 154 out: 155 mutex_unlock(&eventfs_mutex); 156 return ret; 157 } 158 159 static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry) 160 { 161 struct inode *inode; 162 163 /* Only update if the "events" was on the top level */ 164 if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL)) 165 return; 166 167 /* Get the tracefs root inode. */ 168 inode = d_inode(dentry->d_sb->s_root); 169 ei->attr.uid = inode->i_uid; 170 ei->attr.gid = inode->i_gid; 171 } 172 173 static void set_top_events_ownership(struct inode *inode) 174 { 175 struct tracefs_inode *ti = get_tracefs(inode); 176 struct eventfs_inode *ei = ti->private; 177 struct dentry *dentry; 178 179 /* The top events directory doesn't get automatically updated */ 180 if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL)) 181 return; 182 183 dentry = ei->dentry; 184 185 update_top_events_attr(ei, dentry); 186 187 if (!(ei->attr.mode & EVENTFS_SAVE_UID)) 188 inode->i_uid = ei->attr.uid; 189 190 if (!(ei->attr.mode & EVENTFS_SAVE_GID)) 191 inode->i_gid = ei->attr.gid; 192 } 193 194 static int eventfs_get_attr(struct mnt_idmap *idmap, 195 const struct path *path, struct kstat *stat, 196 u32 request_mask, unsigned int flags) 197 { 198 struct dentry *dentry = path->dentry; 199 struct inode *inode = d_backing_inode(dentry); 200 201 set_top_events_ownership(inode); 202 203 generic_fillattr(idmap, request_mask, inode, stat); 204 return 0; 205 } 206 207 static int eventfs_permission(struct mnt_idmap *idmap, 208 struct inode *inode, int mask) 209 { 210 set_top_events_ownership(inode); 211 return generic_permission(idmap, inode, mask); 212 } 213 214 static const struct inode_operations eventfs_root_dir_inode_operations = { 215 .lookup = eventfs_root_lookup, 216 .setattr = eventfs_set_attr, 217 .getattr = eventfs_get_attr, 218 .permission = eventfs_permission, 219 }; 220 221 static const struct inode_operations eventfs_file_inode_operations = { 222 .setattr = eventfs_set_attr, 223 }; 224 225 static const struct file_operations eventfs_file_operations = { 226 .read = generic_read_dir, 227 .iterate_shared = eventfs_iterate, 228 .llseek = generic_file_llseek, 229 }; 230 231 /* Return the evenfs_inode of the "events" directory */ 232 static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) 233 { 234 struct eventfs_inode *ei; 235 236 mutex_lock(&eventfs_mutex); 237 do { 238 /* The parent always has an ei, except for events itself */ 239 ei = dentry->d_parent->d_fsdata; 240 241 /* 242 * If the ei is being freed, the ownership of the children 243 * doesn't matter. 244 */ 245 if (ei->is_freed) { 246 ei = NULL; 247 break; 248 } 249 250 dentry = ei->dentry; 251 } while (!ei->is_events); 252 mutex_unlock(&eventfs_mutex); 253 254 update_top_events_attr(ei, dentry); 255 256 return ei; 257 } 258 259 static void update_inode_attr(struct dentry *dentry, struct inode *inode, 260 struct eventfs_attr *attr, umode_t mode) 261 { 262 struct eventfs_inode *events_ei = eventfs_find_events(dentry); 263 264 if (!events_ei) 265 return; 266 267 inode->i_mode = mode; 268 inode->i_uid = events_ei->attr.uid; 269 inode->i_gid = events_ei->attr.gid; 270 271 if (!attr) 272 return; 273 274 if (attr->mode & EVENTFS_SAVE_MODE) 275 inode->i_mode = attr->mode & EVENTFS_MODE_MASK; 276 277 if (attr->mode & EVENTFS_SAVE_UID) 278 inode->i_uid = attr->uid; 279 280 if (attr->mode & EVENTFS_SAVE_GID) 281 inode->i_gid = attr->gid; 282 } 283 284 static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level) 285 { 286 struct eventfs_inode *ei_child; 287 288 /* at most we have events/system/event */ 289 if (WARN_ON_ONCE(level > 3)) 290 return; 291 292 ei->attr.gid = gid; 293 294 if (ei->entry_attrs) { 295 for (int i = 0; i < ei->nr_entries; i++) { 296 ei->entry_attrs[i].gid = gid; 297 } 298 } 299 300 /* 301 * Only eventfs_inode with dentries are updated, make sure 302 * all eventfs_inodes are updated. If one of the children 303 * do not have a dentry, this function must traverse it. 304 */ 305 list_for_each_entry_srcu(ei_child, &ei->children, list, 306 srcu_read_lock_held(&eventfs_srcu)) { 307 if (!ei_child->dentry) 308 update_gid(ei_child, gid, level + 1); 309 } 310 } 311 312 void eventfs_update_gid(struct dentry *dentry, kgid_t gid) 313 { 314 struct eventfs_inode *ei = dentry->d_fsdata; 315 int idx; 316 317 idx = srcu_read_lock(&eventfs_srcu); 318 update_gid(ei, gid, 0); 319 srcu_read_unlock(&eventfs_srcu, idx); 320 } 321 322 /** 323 * create_file - create a file in the tracefs filesystem 324 * @name: the name of the file to create. 325 * @mode: the permission that the file should have. 326 * @attr: saved attributes changed by user 327 * @parent: parent dentry for this file. 328 * @data: something that the caller will want to get to later on. 329 * @fop: struct file_operations that should be used for this file. 330 * 331 * This function creates a dentry that represents a file in the eventsfs_inode 332 * directory. The inode.i_private pointer will point to @data in the open() 333 * call. 334 */ 335 static struct dentry *create_file(const char *name, umode_t mode, 336 struct eventfs_attr *attr, 337 struct dentry *parent, void *data, 338 const struct file_operations *fop) 339 { 340 struct tracefs_inode *ti; 341 struct dentry *dentry; 342 struct inode *inode; 343 344 if (!(mode & S_IFMT)) 345 mode |= S_IFREG; 346 347 if (WARN_ON_ONCE(!S_ISREG(mode))) 348 return NULL; 349 350 WARN_ON_ONCE(!parent); 351 dentry = eventfs_start_creating(name, parent); 352 353 if (IS_ERR(dentry)) 354 return dentry; 355 356 inode = tracefs_get_inode(dentry->d_sb); 357 if (unlikely(!inode)) 358 return eventfs_failed_creating(dentry); 359 360 /* If the user updated the directory's attributes, use them */ 361 update_inode_attr(dentry, inode, attr, mode); 362 363 inode->i_op = &eventfs_file_inode_operations; 364 inode->i_fop = fop; 365 inode->i_private = data; 366 367 /* All files will have the same inode number */ 368 inode->i_ino = EVENTFS_FILE_INODE_INO; 369 370 ti = get_tracefs(inode); 371 ti->flags |= TRACEFS_EVENT_INODE; 372 d_instantiate(dentry, inode); 373 fsnotify_create(dentry->d_parent->d_inode, dentry); 374 return eventfs_end_creating(dentry); 375 }; 376 377 /** 378 * create_dir - create a dir in the tracefs filesystem 379 * @ei: the eventfs_inode that represents the directory to create 380 * @parent: parent dentry for this file. 381 * 382 * This function will create a dentry for a directory represented by 383 * a eventfs_inode. 384 */ 385 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent) 386 { 387 struct tracefs_inode *ti; 388 struct dentry *dentry; 389 struct inode *inode; 390 391 dentry = eventfs_start_creating(ei->name, parent); 392 if (IS_ERR(dentry)) 393 return dentry; 394 395 inode = tracefs_get_inode(dentry->d_sb); 396 if (unlikely(!inode)) 397 return eventfs_failed_creating(dentry); 398 399 /* If the user updated the directory's attributes, use them */ 400 update_inode_attr(dentry, inode, &ei->attr, 401 S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); 402 403 inode->i_op = &eventfs_root_dir_inode_operations; 404 inode->i_fop = &eventfs_file_operations; 405 406 /* All directories will have the same inode number */ 407 inode->i_ino = eventfs_dir_ino(ei); 408 409 ti = get_tracefs(inode); 410 ti->flags |= TRACEFS_EVENT_INODE; 411 412 inc_nlink(inode); 413 d_instantiate(dentry, inode); 414 inc_nlink(dentry->d_parent->d_inode); 415 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 416 return eventfs_end_creating(dentry); 417 } 418 419 static void free_ei(struct eventfs_inode *ei) 420 { 421 kfree_const(ei->name); 422 kfree(ei->d_children); 423 kfree(ei->entry_attrs); 424 kfree(ei); 425 } 426 427 /** 428 * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode 429 * @ti: the tracefs_inode of the dentry 430 * @dentry: dentry which has the reference to remove. 431 * 432 * Remove the association between a dentry from an eventfs_inode. 433 */ 434 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) 435 { 436 struct eventfs_inode *ei; 437 int i; 438 439 mutex_lock(&eventfs_mutex); 440 441 ei = dentry->d_fsdata; 442 if (!ei) 443 goto out; 444 445 /* This could belong to one of the files of the ei */ 446 if (ei->dentry != dentry) { 447 for (i = 0; i < ei->nr_entries; i++) { 448 if (ei->d_children[i] == dentry) 449 break; 450 } 451 if (WARN_ON_ONCE(i == ei->nr_entries)) 452 goto out; 453 ei->d_children[i] = NULL; 454 } else if (ei->is_freed) { 455 free_ei(ei); 456 } else { 457 ei->dentry = NULL; 458 } 459 460 dentry->d_fsdata = NULL; 461 out: 462 mutex_unlock(&eventfs_mutex); 463 } 464 465 /** 466 * create_file_dentry - create a dentry for a file of an eventfs_inode 467 * @ei: the eventfs_inode that the file will be created under 468 * @idx: the index into the d_children[] of the @ei 469 * @parent: The parent dentry of the created file. 470 * @name: The name of the file to create 471 * @mode: The mode of the file. 472 * @data: The data to use to set the inode of the file with on open() 473 * @fops: The fops of the file to be created. 474 * 475 * Create a dentry for a file of an eventfs_inode @ei and place it into the 476 * address located at @e_dentry. 477 */ 478 static struct dentry * 479 create_file_dentry(struct eventfs_inode *ei, int idx, 480 struct dentry *parent, const char *name, umode_t mode, void *data, 481 const struct file_operations *fops) 482 { 483 struct eventfs_attr *attr = NULL; 484 struct dentry **e_dentry = &ei->d_children[idx]; 485 struct dentry *dentry; 486 487 WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 488 489 mutex_lock(&eventfs_mutex); 490 if (ei->is_freed) { 491 mutex_unlock(&eventfs_mutex); 492 return NULL; 493 } 494 /* If the e_dentry already has a dentry, use it */ 495 if (*e_dentry) { 496 dget(*e_dentry); 497 mutex_unlock(&eventfs_mutex); 498 return *e_dentry; 499 } 500 501 /* ei->entry_attrs are protected by SRCU */ 502 if (ei->entry_attrs) 503 attr = &ei->entry_attrs[idx]; 504 505 mutex_unlock(&eventfs_mutex); 506 507 dentry = create_file(name, mode, attr, parent, data, fops); 508 509 mutex_lock(&eventfs_mutex); 510 511 if (IS_ERR_OR_NULL(dentry)) { 512 /* 513 * When the mutex was released, something else could have 514 * created the dentry for this e_dentry. In which case 515 * use that one. 516 * 517 * If ei->is_freed is set, the e_dentry is currently on its 518 * way to being freed, don't return it. If e_dentry is NULL 519 * it means it was already freed. 520 */ 521 if (ei->is_freed) { 522 dentry = NULL; 523 } else { 524 dentry = *e_dentry; 525 dget(dentry); 526 } 527 mutex_unlock(&eventfs_mutex); 528 return dentry; 529 } 530 531 if (!*e_dentry && !ei->is_freed) { 532 *e_dentry = dentry; 533 dentry->d_fsdata = ei; 534 } else { 535 /* 536 * Should never happen unless we get here due to being freed. 537 * Otherwise it means two dentries exist with the same name. 538 */ 539 WARN_ON_ONCE(!ei->is_freed); 540 dentry = NULL; 541 } 542 mutex_unlock(&eventfs_mutex); 543 544 return dentry; 545 } 546 547 /** 548 * eventfs_post_create_dir - post create dir routine 549 * @ei: eventfs_inode of recently created dir 550 * 551 * Map the meta-data of files within an eventfs dir to their parent dentry 552 */ 553 static void eventfs_post_create_dir(struct eventfs_inode *ei) 554 { 555 struct eventfs_inode *ei_child; 556 struct tracefs_inode *ti; 557 558 lockdep_assert_held(&eventfs_mutex); 559 560 /* srcu lock already held */ 561 /* fill parent-child relation */ 562 list_for_each_entry_srcu(ei_child, &ei->children, list, 563 srcu_read_lock_held(&eventfs_srcu)) { 564 ei_child->d_parent = ei->dentry; 565 } 566 567 ti = get_tracefs(ei->dentry->d_inode); 568 ti->private = ei; 569 } 570 571 /** 572 * create_dir_dentry - Create a directory dentry for the eventfs_inode 573 * @pei: The eventfs_inode parent of ei. 574 * @ei: The eventfs_inode to create the directory for 575 * @parent: The dentry of the parent of this directory 576 * 577 * This creates and attaches a directory dentry to the eventfs_inode @ei. 578 */ 579 static struct dentry * 580 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, 581 struct dentry *parent) 582 { 583 struct dentry *dentry = NULL; 584 585 WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 586 587 mutex_lock(&eventfs_mutex); 588 if (pei->is_freed || ei->is_freed) { 589 mutex_unlock(&eventfs_mutex); 590 return NULL; 591 } 592 if (ei->dentry) { 593 /* If the eventfs_inode already has a dentry, use it */ 594 dentry = ei->dentry; 595 dget(dentry); 596 mutex_unlock(&eventfs_mutex); 597 return dentry; 598 } 599 mutex_unlock(&eventfs_mutex); 600 601 dentry = create_dir(ei, parent); 602 603 mutex_lock(&eventfs_mutex); 604 605 if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { 606 /* 607 * When the mutex was released, something else could have 608 * created the dentry for this e_dentry. In which case 609 * use that one. 610 * 611 * If ei->is_freed is set, the e_dentry is currently on its 612 * way to being freed. 613 */ 614 dentry = ei->dentry; 615 if (dentry) 616 dget(dentry); 617 mutex_unlock(&eventfs_mutex); 618 return dentry; 619 } 620 621 if (!ei->dentry && !ei->is_freed) { 622 ei->dentry = dentry; 623 eventfs_post_create_dir(ei); 624 dentry->d_fsdata = ei; 625 } else { 626 /* 627 * Should never happen unless we get here due to being freed. 628 * Otherwise it means two dentries exist with the same name. 629 */ 630 WARN_ON_ONCE(!ei->is_freed); 631 dentry = NULL; 632 } 633 mutex_unlock(&eventfs_mutex); 634 635 return dentry; 636 } 637 638 /** 639 * eventfs_root_lookup - lookup routine to create file/dir 640 * @dir: in which a lookup is being done 641 * @dentry: file/dir dentry 642 * @flags: Just passed to simple_lookup() 643 * 644 * Used to create dynamic file/dir with-in @dir, search with-in @ei 645 * list, if @dentry found go ahead and create the file/dir 646 */ 647 648 static struct dentry *eventfs_root_lookup(struct inode *dir, 649 struct dentry *dentry, 650 unsigned int flags) 651 { 652 const struct file_operations *fops; 653 const struct eventfs_entry *entry; 654 struct eventfs_inode *ei_child; 655 struct tracefs_inode *ti; 656 struct eventfs_inode *ei; 657 struct dentry *ei_dentry = NULL; 658 struct dentry *ret = NULL; 659 struct dentry *d; 660 const char *name = dentry->d_name.name; 661 umode_t mode; 662 void *data; 663 int idx; 664 int i; 665 int r; 666 667 ti = get_tracefs(dir); 668 if (!(ti->flags & TRACEFS_EVENT_INODE)) 669 return NULL; 670 671 /* Grab srcu to prevent the ei from going away */ 672 idx = srcu_read_lock(&eventfs_srcu); 673 674 /* 675 * Grab the eventfs_mutex to consistent value from ti->private. 676 * This s 677 */ 678 mutex_lock(&eventfs_mutex); 679 ei = READ_ONCE(ti->private); 680 if (ei && !ei->is_freed) 681 ei_dentry = READ_ONCE(ei->dentry); 682 mutex_unlock(&eventfs_mutex); 683 684 if (!ei || !ei_dentry) 685 goto out; 686 687 data = ei->data; 688 689 list_for_each_entry_srcu(ei_child, &ei->children, list, 690 srcu_read_lock_held(&eventfs_srcu)) { 691 if (strcmp(ei_child->name, name) != 0) 692 continue; 693 ret = simple_lookup(dir, dentry, flags); 694 if (IS_ERR(ret)) 695 goto out; 696 d = create_dir_dentry(ei, ei_child, ei_dentry); 697 dput(d); 698 goto out; 699 } 700 701 for (i = 0; i < ei->nr_entries; i++) { 702 entry = &ei->entries[i]; 703 if (strcmp(name, entry->name) == 0) { 704 void *cdata = data; 705 mutex_lock(&eventfs_mutex); 706 /* If ei->is_freed, then the event itself may be too */ 707 if (!ei->is_freed) 708 r = entry->callback(name, &mode, &cdata, &fops); 709 else 710 r = -1; 711 mutex_unlock(&eventfs_mutex); 712 if (r <= 0) 713 continue; 714 ret = simple_lookup(dir, dentry, flags); 715 if (IS_ERR(ret)) 716 goto out; 717 d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); 718 dput(d); 719 break; 720 } 721 } 722 out: 723 srcu_read_unlock(&eventfs_srcu, idx); 724 return ret; 725 } 726 727 /* 728 * Walk the children of a eventfs_inode to fill in getdents(). 729 */ 730 static int eventfs_iterate(struct file *file, struct dir_context *ctx) 731 { 732 const struct file_operations *fops; 733 struct inode *f_inode = file_inode(file); 734 const struct eventfs_entry *entry; 735 struct eventfs_inode *ei_child; 736 struct tracefs_inode *ti; 737 struct eventfs_inode *ei; 738 const char *name; 739 umode_t mode; 740 int idx; 741 int ret = -EINVAL; 742 int ino; 743 int i, r, c; 744 745 if (!dir_emit_dots(file, ctx)) 746 return 0; 747 748 ti = get_tracefs(f_inode); 749 if (!(ti->flags & TRACEFS_EVENT_INODE)) 750 return -EINVAL; 751 752 c = ctx->pos - 2; 753 754 idx = srcu_read_lock(&eventfs_srcu); 755 756 mutex_lock(&eventfs_mutex); 757 ei = READ_ONCE(ti->private); 758 if (ei && ei->is_freed) 759 ei = NULL; 760 mutex_unlock(&eventfs_mutex); 761 762 if (!ei) 763 goto out; 764 765 /* 766 * Need to create the dentries and inodes to have a consistent 767 * inode number. 768 */ 769 ret = 0; 770 771 /* Start at 'c' to jump over already read entries */ 772 for (i = c; i < ei->nr_entries; i++, ctx->pos++) { 773 void *cdata = ei->data; 774 775 entry = &ei->entries[i]; 776 name = entry->name; 777 778 mutex_lock(&eventfs_mutex); 779 /* If ei->is_freed then just bail here, nothing more to do */ 780 if (ei->is_freed) { 781 mutex_unlock(&eventfs_mutex); 782 goto out; 783 } 784 r = entry->callback(name, &mode, &cdata, &fops); 785 mutex_unlock(&eventfs_mutex); 786 if (r <= 0) 787 continue; 788 789 ino = EVENTFS_FILE_INODE_INO; 790 791 if (!dir_emit(ctx, name, strlen(name), ino, DT_REG)) 792 goto out; 793 } 794 795 /* Subtract the skipped entries above */ 796 c -= min((unsigned int)c, (unsigned int)ei->nr_entries); 797 798 list_for_each_entry_srcu(ei_child, &ei->children, list, 799 srcu_read_lock_held(&eventfs_srcu)) { 800 801 if (c > 0) { 802 c--; 803 continue; 804 } 805 806 ctx->pos++; 807 808 if (ei_child->is_freed) 809 continue; 810 811 name = ei_child->name; 812 813 ino = eventfs_dir_ino(ei_child); 814 815 if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR)) 816 goto out_dec; 817 } 818 ret = 1; 819 out: 820 srcu_read_unlock(&eventfs_srcu, idx); 821 822 return ret; 823 824 out_dec: 825 /* Incremented ctx->pos without adding something, reset it */ 826 ctx->pos--; 827 goto out; 828 } 829 830 /** 831 * eventfs_create_dir - Create the eventfs_inode for this directory 832 * @name: The name of the directory to create. 833 * @parent: The eventfs_inode of the parent directory. 834 * @entries: A list of entries that represent the files under this directory 835 * @size: The number of @entries 836 * @data: The default data to pass to the files (an entry may override it). 837 * 838 * This function creates the descriptor to represent a directory in the 839 * eventfs. This descriptor is an eventfs_inode, and it is returned to be 840 * used to create other children underneath. 841 * 842 * The @entries is an array of eventfs_entry structures which has: 843 * const char *name 844 * eventfs_callback callback; 845 * 846 * The name is the name of the file, and the callback is a pointer to a function 847 * that will be called when the file is reference (either by lookup or by 848 * reading a directory). The callback is of the prototype: 849 * 850 * int callback(const char *name, umode_t *mode, void **data, 851 * const struct file_operations **fops); 852 * 853 * When a file needs to be created, this callback will be called with 854 * name = the name of the file being created (so that the same callback 855 * may be used for multiple files). 856 * mode = a place to set the file's mode 857 * data = A pointer to @data, and the callback may replace it, which will 858 * cause the file created to pass the new data to the open() call. 859 * fops = the fops to use for the created file. 860 * 861 * NB. @callback is called while holding internal locks of the eventfs 862 * system. The callback must not call any code that might also call into 863 * the tracefs or eventfs system or it will risk creating a deadlock. 864 */ 865 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, 866 const struct eventfs_entry *entries, 867 int size, void *data) 868 { 869 struct eventfs_inode *ei; 870 871 if (!parent) 872 return ERR_PTR(-EINVAL); 873 874 ei = kzalloc(sizeof(*ei), GFP_KERNEL); 875 if (!ei) 876 return ERR_PTR(-ENOMEM); 877 878 ei->name = kstrdup_const(name, GFP_KERNEL); 879 if (!ei->name) { 880 kfree(ei); 881 return ERR_PTR(-ENOMEM); 882 } 883 884 if (size) { 885 ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); 886 if (!ei->d_children) { 887 kfree_const(ei->name); 888 kfree(ei); 889 return ERR_PTR(-ENOMEM); 890 } 891 } 892 893 ei->entries = entries; 894 ei->nr_entries = size; 895 ei->data = data; 896 INIT_LIST_HEAD(&ei->children); 897 INIT_LIST_HEAD(&ei->list); 898 899 mutex_lock(&eventfs_mutex); 900 if (!parent->is_freed) { 901 list_add_tail(&ei->list, &parent->children); 902 ei->d_parent = parent->dentry; 903 } 904 mutex_unlock(&eventfs_mutex); 905 906 /* Was the parent freed? */ 907 if (list_empty(&ei->list)) { 908 free_ei(ei); 909 ei = NULL; 910 } 911 return ei; 912 } 913 914 /** 915 * eventfs_create_events_dir - create the top level events directory 916 * @name: The name of the top level directory to create. 917 * @parent: Parent dentry for this file in the tracefs directory. 918 * @entries: A list of entries that represent the files under this directory 919 * @size: The number of @entries 920 * @data: The default data to pass to the files (an entry may override it). 921 * 922 * This function creates the top of the trace event directory. 923 * 924 * See eventfs_create_dir() for use of @entries. 925 */ 926 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, 927 const struct eventfs_entry *entries, 928 int size, void *data) 929 { 930 struct dentry *dentry = tracefs_start_creating(name, parent); 931 struct eventfs_inode *ei; 932 struct tracefs_inode *ti; 933 struct inode *inode; 934 kuid_t uid; 935 kgid_t gid; 936 937 if (security_locked_down(LOCKDOWN_TRACEFS)) 938 return NULL; 939 940 if (IS_ERR(dentry)) 941 return ERR_CAST(dentry); 942 943 ei = kzalloc(sizeof(*ei), GFP_KERNEL); 944 if (!ei) 945 goto fail_ei; 946 947 inode = tracefs_get_inode(dentry->d_sb); 948 if (unlikely(!inode)) 949 goto fail; 950 951 if (size) { 952 ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); 953 if (!ei->d_children) 954 goto fail; 955 } 956 957 ei->dentry = dentry; 958 ei->entries = entries; 959 ei->nr_entries = size; 960 ei->is_events = 1; 961 ei->data = data; 962 ei->name = kstrdup_const(name, GFP_KERNEL); 963 if (!ei->name) 964 goto fail; 965 966 /* Save the ownership of this directory */ 967 uid = d_inode(dentry->d_parent)->i_uid; 968 gid = d_inode(dentry->d_parent)->i_gid; 969 970 /* 971 * If the events directory is of the top instance, then parent 972 * is NULL. Set the attr.mode to reflect this and its permissions will 973 * default to the tracefs root dentry. 974 */ 975 if (!parent) 976 ei->attr.mode = EVENTFS_TOPLEVEL; 977 978 /* This is used as the default ownership of the files and directories */ 979 ei->attr.uid = uid; 980 ei->attr.gid = gid; 981 982 INIT_LIST_HEAD(&ei->children); 983 INIT_LIST_HEAD(&ei->list); 984 985 ti = get_tracefs(inode); 986 ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE; 987 ti->private = ei; 988 989 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; 990 inode->i_uid = uid; 991 inode->i_gid = gid; 992 inode->i_op = &eventfs_root_dir_inode_operations; 993 inode->i_fop = &eventfs_file_operations; 994 995 dentry->d_fsdata = ei; 996 997 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 998 inc_nlink(inode); 999 d_instantiate(dentry, inode); 1000 inc_nlink(dentry->d_parent->d_inode); 1001 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 1002 tracefs_end_creating(dentry); 1003 1004 return ei; 1005 1006 fail: 1007 kfree(ei->d_children); 1008 kfree(ei); 1009 fail_ei: 1010 tracefs_failed_creating(dentry); 1011 return ERR_PTR(-ENOMEM); 1012 } 1013 1014 static LLIST_HEAD(free_list); 1015 1016 static void eventfs_workfn(struct work_struct *work) 1017 { 1018 struct eventfs_inode *ei, *tmp; 1019 struct llist_node *llnode; 1020 1021 llnode = llist_del_all(&free_list); 1022 llist_for_each_entry_safe(ei, tmp, llnode, llist) { 1023 /* This dput() matches the dget() from unhook_dentry() */ 1024 for (int i = 0; i < ei->nr_entries; i++) { 1025 if (ei->d_children[i]) 1026 dput(ei->d_children[i]); 1027 } 1028 /* This should only get here if it had a dentry */ 1029 if (!WARN_ON_ONCE(!ei->dentry)) 1030 dput(ei->dentry); 1031 } 1032 } 1033 1034 static DECLARE_WORK(eventfs_work, eventfs_workfn); 1035 1036 static void free_rcu_ei(struct rcu_head *head) 1037 { 1038 struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu); 1039 1040 if (ei->dentry) { 1041 /* Do not free the ei until all references of dentry are gone */ 1042 if (llist_add(&ei->llist, &free_list)) 1043 queue_work(system_unbound_wq, &eventfs_work); 1044 return; 1045 } 1046 1047 /* If the ei doesn't have a dentry, neither should its children */ 1048 for (int i = 0; i < ei->nr_entries; i++) { 1049 WARN_ON_ONCE(ei->d_children[i]); 1050 } 1051 1052 free_ei(ei); 1053 } 1054 1055 static void unhook_dentry(struct dentry *dentry) 1056 { 1057 if (!dentry) 1058 return; 1059 /* 1060 * Need to add a reference to the dentry that is expected by 1061 * simple_recursive_removal(), which will include a dput(). 1062 */ 1063 dget(dentry); 1064 1065 /* 1066 * Also add a reference for the dput() in eventfs_workfn(). 1067 * That is required as that dput() will free the ei after 1068 * the SRCU grace period is over. 1069 */ 1070 dget(dentry); 1071 } 1072 1073 /** 1074 * eventfs_remove_rec - remove eventfs dir or file from list 1075 * @ei: eventfs_inode to be removed. 1076 * @level: prevent recursion from going more than 3 levels deep. 1077 * 1078 * This function recursively removes eventfs_inodes which 1079 * contains info of files and/or directories. 1080 */ 1081 static void eventfs_remove_rec(struct eventfs_inode *ei, int level) 1082 { 1083 struct eventfs_inode *ei_child; 1084 1085 if (!ei) 1086 return; 1087 /* 1088 * Check recursion depth. It should never be greater than 3: 1089 * 0 - events/ 1090 * 1 - events/group/ 1091 * 2 - events/group/event/ 1092 * 3 - events/group/event/file 1093 */ 1094 if (WARN_ON_ONCE(level > 3)) 1095 return; 1096 1097 /* search for nested folders or files */ 1098 list_for_each_entry_srcu(ei_child, &ei->children, list, 1099 lockdep_is_held(&eventfs_mutex)) { 1100 /* Children only have dentry if parent does */ 1101 WARN_ON_ONCE(ei_child->dentry && !ei->dentry); 1102 eventfs_remove_rec(ei_child, level + 1); 1103 } 1104 1105 1106 ei->is_freed = 1; 1107 1108 for (int i = 0; i < ei->nr_entries; i++) { 1109 if (ei->d_children[i]) { 1110 /* Children only have dentry if parent does */ 1111 WARN_ON_ONCE(!ei->dentry); 1112 unhook_dentry(ei->d_children[i]); 1113 } 1114 } 1115 1116 unhook_dentry(ei->dentry); 1117 1118 list_del_rcu(&ei->list); 1119 call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei); 1120 } 1121 1122 /** 1123 * eventfs_remove_dir - remove eventfs dir or file from list 1124 * @ei: eventfs_inode to be removed. 1125 * 1126 * This function acquire the eventfs_mutex lock and call eventfs_remove_rec() 1127 */ 1128 void eventfs_remove_dir(struct eventfs_inode *ei) 1129 { 1130 struct dentry *dentry; 1131 1132 if (!ei) 1133 return; 1134 1135 mutex_lock(&eventfs_mutex); 1136 dentry = ei->dentry; 1137 eventfs_remove_rec(ei, 0); 1138 mutex_unlock(&eventfs_mutex); 1139 1140 /* 1141 * If any of the ei children has a dentry, then the ei itself 1142 * must have a dentry. 1143 */ 1144 if (dentry) 1145 simple_recursive_removal(dentry, NULL); 1146 } 1147 1148 /** 1149 * eventfs_remove_events_dir - remove the top level eventfs directory 1150 * @ei: the event_inode returned by eventfs_create_events_dir(). 1151 * 1152 * This function removes the events main directory 1153 */ 1154 void eventfs_remove_events_dir(struct eventfs_inode *ei) 1155 { 1156 struct dentry *dentry; 1157 1158 dentry = ei->dentry; 1159 eventfs_remove_dir(ei); 1160 1161 /* 1162 * Matches the dget() done by tracefs_start_creating() 1163 * in eventfs_create_events_dir() when it the dentry was 1164 * created. In other words, it's a normal dentry that 1165 * sticks around while the other ei->dentry are created 1166 * and destroyed dynamically. 1167 */ 1168 dput(dentry); 1169 } 1170