1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * event_inode.c - part of tracefs, a pseudo file system for activating tracing 4 * 5 * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org> 6 * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com> 7 * Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org> 8 * 9 * eventfs is used to dynamically create inodes and dentries based on the 10 * meta data provided by the tracing system. 11 * 12 * eventfs stores the meta-data of files/dirs and holds off on creating 13 * inodes/dentries of the files. When accessed, the eventfs will create the 14 * inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up 15 * and delete the inodes/dentries when they are no longer referenced. 16 */ 17 #include <linux/fsnotify.h> 18 #include <linux/fs.h> 19 #include <linux/namei.h> 20 #include <linux/workqueue.h> 21 #include <linux/security.h> 22 #include <linux/tracefs.h> 23 #include <linux/kref.h> 24 #include <linux/delay.h> 25 #include "internal.h" 26 27 /* 28 * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access 29 * to the ei->dentry must be done under this mutex and after checking 30 * if ei->is_freed is not set. When ei->is_freed is set, the dentry 31 * is on its way to being freed after the last dput() is made on it. 32 */ 33 static DEFINE_MUTEX(eventfs_mutex); 34 35 /* 36 * The eventfs_inode (ei) itself is protected by SRCU. It is released from 37 * its parent's list and will have is_freed set (under eventfs_mutex). 38 * After the SRCU grace period is over and the last dput() is called 39 * the ei is freed. 40 */ 41 DEFINE_STATIC_SRCU(eventfs_srcu); 42 43 /* Mode is unsigned short, use the upper bits for flags */ 44 enum { 45 EVENTFS_SAVE_MODE = BIT(16), 46 EVENTFS_SAVE_UID = BIT(17), 47 EVENTFS_SAVE_GID = BIT(18), 48 }; 49 50 #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) 51 52 static struct dentry *eventfs_root_lookup(struct inode *dir, 53 struct dentry *dentry, 54 unsigned int flags); 55 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file); 56 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx); 57 static int eventfs_release(struct inode *inode, struct file *file); 58 59 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr) 60 { 61 unsigned int ia_valid = iattr->ia_valid; 62 63 if (ia_valid & ATTR_MODE) { 64 attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) | 65 (iattr->ia_mode & EVENTFS_MODE_MASK) | 66 EVENTFS_SAVE_MODE; 67 } 68 if (ia_valid & ATTR_UID) { 69 attr->mode |= EVENTFS_SAVE_UID; 70 attr->uid = iattr->ia_uid; 71 } 72 if (ia_valid & ATTR_GID) { 73 attr->mode |= EVENTFS_SAVE_GID; 74 attr->gid = iattr->ia_gid; 75 } 76 } 77 78 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, 79 struct iattr *iattr) 80 { 81 const struct eventfs_entry *entry; 82 struct eventfs_inode *ei; 83 const char *name; 84 int ret; 85 86 mutex_lock(&eventfs_mutex); 87 ei = dentry->d_fsdata; 88 if (ei->is_freed) { 89 /* Do not allow changes if the event is about to be removed. */ 90 mutex_unlock(&eventfs_mutex); 91 return -ENODEV; 92 } 93 94 /* Preallocate the children mode array if necessary */ 95 if (!(dentry->d_inode->i_mode & S_IFDIR)) { 96 if (!ei->entry_attrs) { 97 ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries, 98 GFP_NOFS); 99 if (!ei->entry_attrs) { 100 ret = -ENOMEM; 101 goto out; 102 } 103 } 104 } 105 106 ret = simple_setattr(idmap, dentry, iattr); 107 if (ret < 0) 108 goto out; 109 110 /* 111 * If this is a dir, then update the ei cache, only the file 112 * mode is saved in the ei->m_children, and the ownership is 113 * determined by the parent directory. 114 */ 115 if (dentry->d_inode->i_mode & S_IFDIR) { 116 /* 117 * The events directory dentry is never freed, unless its 118 * part of an instance that is deleted. It's attr is the 119 * default for its child files and directories. 120 * Do not update it. It's not used for its own mode or ownership 121 */ 122 if (!ei->is_events) 123 update_attr(&ei->attr, iattr); 124 125 } else { 126 name = dentry->d_name.name; 127 128 for (int i = 0; i < ei->nr_entries; i++) { 129 entry = &ei->entries[i]; 130 if (strcmp(name, entry->name) == 0) { 131 update_attr(&ei->entry_attrs[i], iattr); 132 break; 133 } 134 } 135 } 136 out: 137 mutex_unlock(&eventfs_mutex); 138 return ret; 139 } 140 141 static const struct inode_operations eventfs_root_dir_inode_operations = { 142 .lookup = eventfs_root_lookup, 143 .setattr = eventfs_set_attr, 144 }; 145 146 static const struct inode_operations eventfs_file_inode_operations = { 147 .setattr = eventfs_set_attr, 148 }; 149 150 static const struct file_operations eventfs_file_operations = { 151 .open = dcache_dir_open_wrapper, 152 .read = generic_read_dir, 153 .iterate_shared = dcache_readdir_wrapper, 154 .llseek = generic_file_llseek, 155 .release = eventfs_release, 156 }; 157 158 /* Return the evenfs_inode of the "events" directory */ 159 static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) 160 { 161 struct eventfs_inode *ei; 162 163 mutex_lock(&eventfs_mutex); 164 do { 165 /* The parent always has an ei, except for events itself */ 166 ei = dentry->d_parent->d_fsdata; 167 168 /* 169 * If the ei is being freed, the ownership of the children 170 * doesn't matter. 171 */ 172 if (ei->is_freed) { 173 ei = NULL; 174 break; 175 } 176 177 dentry = ei->dentry; 178 } while (!ei->is_events); 179 mutex_unlock(&eventfs_mutex); 180 181 return ei; 182 } 183 184 static void update_inode_attr(struct dentry *dentry, struct inode *inode, 185 struct eventfs_attr *attr, umode_t mode) 186 { 187 struct eventfs_inode *events_ei = eventfs_find_events(dentry); 188 189 if (!events_ei) 190 return; 191 192 inode->i_mode = mode; 193 inode->i_uid = events_ei->attr.uid; 194 inode->i_gid = events_ei->attr.gid; 195 196 if (!attr) 197 return; 198 199 if (attr->mode & EVENTFS_SAVE_MODE) 200 inode->i_mode = attr->mode & EVENTFS_MODE_MASK; 201 202 if (attr->mode & EVENTFS_SAVE_UID) 203 inode->i_uid = attr->uid; 204 205 if (attr->mode & EVENTFS_SAVE_GID) 206 inode->i_gid = attr->gid; 207 } 208 209 static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level) 210 { 211 struct eventfs_inode *ei_child; 212 213 /* at most we have events/system/event */ 214 if (WARN_ON_ONCE(level > 3)) 215 return; 216 217 ei->attr.gid = gid; 218 219 if (ei->entry_attrs) { 220 for (int i = 0; i < ei->nr_entries; i++) { 221 ei->entry_attrs[i].gid = gid; 222 } 223 } 224 225 /* 226 * Only eventfs_inode with dentries are updated, make sure 227 * all eventfs_inodes are updated. If one of the children 228 * do not have a dentry, this function must traverse it. 229 */ 230 list_for_each_entry_srcu(ei_child, &ei->children, list, 231 srcu_read_lock_held(&eventfs_srcu)) { 232 if (!ei_child->dentry) 233 update_gid(ei_child, gid, level + 1); 234 } 235 } 236 237 void eventfs_update_gid(struct dentry *dentry, kgid_t gid) 238 { 239 struct eventfs_inode *ei = dentry->d_fsdata; 240 int idx; 241 242 idx = srcu_read_lock(&eventfs_srcu); 243 update_gid(ei, gid, 0); 244 srcu_read_unlock(&eventfs_srcu, idx); 245 } 246 247 /** 248 * create_file - create a file in the tracefs filesystem 249 * @name: the name of the file to create. 250 * @mode: the permission that the file should have. 251 * @attr: saved attributes changed by user 252 * @parent: parent dentry for this file. 253 * @data: something that the caller will want to get to later on. 254 * @fop: struct file_operations that should be used for this file. 255 * 256 * This function creates a dentry that represents a file in the eventsfs_inode 257 * directory. The inode.i_private pointer will point to @data in the open() 258 * call. 259 */ 260 static struct dentry *create_file(const char *name, umode_t mode, 261 struct eventfs_attr *attr, 262 struct dentry *parent, void *data, 263 const struct file_operations *fop) 264 { 265 struct tracefs_inode *ti; 266 struct dentry *dentry; 267 struct inode *inode; 268 269 if (!(mode & S_IFMT)) 270 mode |= S_IFREG; 271 272 if (WARN_ON_ONCE(!S_ISREG(mode))) 273 return NULL; 274 275 WARN_ON_ONCE(!parent); 276 dentry = eventfs_start_creating(name, parent); 277 278 if (IS_ERR(dentry)) 279 return dentry; 280 281 inode = tracefs_get_inode(dentry->d_sb); 282 if (unlikely(!inode)) 283 return eventfs_failed_creating(dentry); 284 285 /* If the user updated the directory's attributes, use them */ 286 update_inode_attr(dentry, inode, attr, mode); 287 288 inode->i_op = &eventfs_file_inode_operations; 289 inode->i_fop = fop; 290 inode->i_private = data; 291 292 ti = get_tracefs(inode); 293 ti->flags |= TRACEFS_EVENT_INODE; 294 d_instantiate(dentry, inode); 295 fsnotify_create(dentry->d_parent->d_inode, dentry); 296 return eventfs_end_creating(dentry); 297 }; 298 299 /** 300 * create_dir - create a dir in the tracefs filesystem 301 * @ei: the eventfs_inode that represents the directory to create 302 * @parent: parent dentry for this file. 303 * 304 * This function will create a dentry for a directory represented by 305 * a eventfs_inode. 306 */ 307 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent) 308 { 309 struct tracefs_inode *ti; 310 struct dentry *dentry; 311 struct inode *inode; 312 313 dentry = eventfs_start_creating(ei->name, parent); 314 if (IS_ERR(dentry)) 315 return dentry; 316 317 inode = tracefs_get_inode(dentry->d_sb); 318 if (unlikely(!inode)) 319 return eventfs_failed_creating(dentry); 320 321 /* If the user updated the directory's attributes, use them */ 322 update_inode_attr(dentry, inode, &ei->attr, 323 S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); 324 325 inode->i_op = &eventfs_root_dir_inode_operations; 326 inode->i_fop = &eventfs_file_operations; 327 328 ti = get_tracefs(inode); 329 ti->flags |= TRACEFS_EVENT_INODE; 330 331 inc_nlink(inode); 332 d_instantiate(dentry, inode); 333 inc_nlink(dentry->d_parent->d_inode); 334 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 335 return eventfs_end_creating(dentry); 336 } 337 338 static void free_ei(struct eventfs_inode *ei) 339 { 340 kfree_const(ei->name); 341 kfree(ei->d_children); 342 kfree(ei->entry_attrs); 343 kfree(ei); 344 } 345 346 /** 347 * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode 348 * @ti: the tracefs_inode of the dentry 349 * @dentry: dentry which has the reference to remove. 350 * 351 * Remove the association between a dentry from an eventfs_inode. 352 */ 353 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) 354 { 355 struct eventfs_inode *ei; 356 int i; 357 358 mutex_lock(&eventfs_mutex); 359 360 ei = dentry->d_fsdata; 361 if (!ei) 362 goto out; 363 364 /* This could belong to one of the files of the ei */ 365 if (ei->dentry != dentry) { 366 for (i = 0; i < ei->nr_entries; i++) { 367 if (ei->d_children[i] == dentry) 368 break; 369 } 370 if (WARN_ON_ONCE(i == ei->nr_entries)) 371 goto out; 372 ei->d_children[i] = NULL; 373 } else if (ei->is_freed) { 374 free_ei(ei); 375 } else { 376 ei->dentry = NULL; 377 } 378 379 dentry->d_fsdata = NULL; 380 out: 381 mutex_unlock(&eventfs_mutex); 382 } 383 384 /** 385 * create_file_dentry - create a dentry for a file of an eventfs_inode 386 * @ei: the eventfs_inode that the file will be created under 387 * @idx: the index into the d_children[] of the @ei 388 * @parent: The parent dentry of the created file. 389 * @name: The name of the file to create 390 * @mode: The mode of the file. 391 * @data: The data to use to set the inode of the file with on open() 392 * @fops: The fops of the file to be created. 393 * @lookup: If called by the lookup routine, in which case, dput() the created dentry. 394 * 395 * Create a dentry for a file of an eventfs_inode @ei and place it into the 396 * address located at @e_dentry. If the @e_dentry already has a dentry, then 397 * just do a dget() on it and return. Otherwise create the dentry and attach it. 398 */ 399 static struct dentry * 400 create_file_dentry(struct eventfs_inode *ei, int idx, 401 struct dentry *parent, const char *name, umode_t mode, void *data, 402 const struct file_operations *fops, bool lookup) 403 { 404 struct eventfs_attr *attr = NULL; 405 struct dentry **e_dentry = &ei->d_children[idx]; 406 struct dentry *dentry; 407 408 WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 409 410 mutex_lock(&eventfs_mutex); 411 if (ei->is_freed) { 412 mutex_unlock(&eventfs_mutex); 413 return NULL; 414 } 415 /* If the e_dentry already has a dentry, use it */ 416 if (*e_dentry) { 417 /* lookup does not need to up the ref count */ 418 if (!lookup) 419 dget(*e_dentry); 420 mutex_unlock(&eventfs_mutex); 421 return *e_dentry; 422 } 423 424 /* ei->entry_attrs are protected by SRCU */ 425 if (ei->entry_attrs) 426 attr = &ei->entry_attrs[idx]; 427 428 mutex_unlock(&eventfs_mutex); 429 430 dentry = create_file(name, mode, attr, parent, data, fops); 431 432 mutex_lock(&eventfs_mutex); 433 434 if (IS_ERR_OR_NULL(dentry)) { 435 /* 436 * When the mutex was released, something else could have 437 * created the dentry for this e_dentry. In which case 438 * use that one. 439 * 440 * If ei->is_freed is set, the e_dentry is currently on its 441 * way to being freed, don't return it. If e_dentry is NULL 442 * it means it was already freed. 443 */ 444 if (ei->is_freed) 445 dentry = NULL; 446 else 447 dentry = *e_dentry; 448 /* The lookup does not need to up the dentry refcount */ 449 if (dentry && !lookup) 450 dget(dentry); 451 mutex_unlock(&eventfs_mutex); 452 return dentry; 453 } 454 455 if (!*e_dentry && !ei->is_freed) { 456 *e_dentry = dentry; 457 dentry->d_fsdata = ei; 458 } else { 459 /* 460 * Should never happen unless we get here due to being freed. 461 * Otherwise it means two dentries exist with the same name. 462 */ 463 WARN_ON_ONCE(!ei->is_freed); 464 dentry = NULL; 465 } 466 mutex_unlock(&eventfs_mutex); 467 468 if (lookup) 469 dput(dentry); 470 471 return dentry; 472 } 473 474 /** 475 * eventfs_post_create_dir - post create dir routine 476 * @ei: eventfs_inode of recently created dir 477 * 478 * Map the meta-data of files within an eventfs dir to their parent dentry 479 */ 480 static void eventfs_post_create_dir(struct eventfs_inode *ei) 481 { 482 struct eventfs_inode *ei_child; 483 struct tracefs_inode *ti; 484 485 lockdep_assert_held(&eventfs_mutex); 486 487 /* srcu lock already held */ 488 /* fill parent-child relation */ 489 list_for_each_entry_srcu(ei_child, &ei->children, list, 490 srcu_read_lock_held(&eventfs_srcu)) { 491 ei_child->d_parent = ei->dentry; 492 } 493 494 ti = get_tracefs(ei->dentry->d_inode); 495 ti->private = ei; 496 } 497 498 /** 499 * create_dir_dentry - Create a directory dentry for the eventfs_inode 500 * @pei: The eventfs_inode parent of ei. 501 * @ei: The eventfs_inode to create the directory for 502 * @parent: The dentry of the parent of this directory 503 * @lookup: True if this is called by the lookup code 504 * 505 * This creates and attaches a directory dentry to the eventfs_inode @ei. 506 */ 507 static struct dentry * 508 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, 509 struct dentry *parent, bool lookup) 510 { 511 struct dentry *dentry = NULL; 512 513 WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 514 515 mutex_lock(&eventfs_mutex); 516 if (pei->is_freed || ei->is_freed) { 517 mutex_unlock(&eventfs_mutex); 518 return NULL; 519 } 520 if (ei->dentry) { 521 /* If the dentry already has a dentry, use it */ 522 dentry = ei->dentry; 523 /* lookup does not need to up the ref count */ 524 if (!lookup) 525 dget(dentry); 526 mutex_unlock(&eventfs_mutex); 527 return dentry; 528 } 529 mutex_unlock(&eventfs_mutex); 530 531 dentry = create_dir(ei, parent); 532 533 mutex_lock(&eventfs_mutex); 534 535 if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { 536 /* 537 * When the mutex was released, something else could have 538 * created the dentry for this e_dentry. In which case 539 * use that one. 540 * 541 * If ei->is_freed is set, the e_dentry is currently on its 542 * way to being freed. 543 */ 544 dentry = ei->dentry; 545 if (dentry && !lookup) 546 dget(dentry); 547 mutex_unlock(&eventfs_mutex); 548 return dentry; 549 } 550 551 if (!ei->dentry && !ei->is_freed) { 552 ei->dentry = dentry; 553 eventfs_post_create_dir(ei); 554 dentry->d_fsdata = ei; 555 } else { 556 /* 557 * Should never happen unless we get here due to being freed. 558 * Otherwise it means two dentries exist with the same name. 559 */ 560 WARN_ON_ONCE(!ei->is_freed); 561 dentry = NULL; 562 } 563 mutex_unlock(&eventfs_mutex); 564 565 if (lookup) 566 dput(dentry); 567 568 return dentry; 569 } 570 571 /** 572 * eventfs_root_lookup - lookup routine to create file/dir 573 * @dir: in which a lookup is being done 574 * @dentry: file/dir dentry 575 * @flags: Just passed to simple_lookup() 576 * 577 * Used to create dynamic file/dir with-in @dir, search with-in @ei 578 * list, if @dentry found go ahead and create the file/dir 579 */ 580 581 static struct dentry *eventfs_root_lookup(struct inode *dir, 582 struct dentry *dentry, 583 unsigned int flags) 584 { 585 const struct file_operations *fops; 586 const struct eventfs_entry *entry; 587 struct eventfs_inode *ei_child; 588 struct tracefs_inode *ti; 589 struct eventfs_inode *ei; 590 struct dentry *ei_dentry = NULL; 591 struct dentry *ret = NULL; 592 const char *name = dentry->d_name.name; 593 bool created = false; 594 umode_t mode; 595 void *data; 596 int idx; 597 int i; 598 int r; 599 600 ti = get_tracefs(dir); 601 if (!(ti->flags & TRACEFS_EVENT_INODE)) 602 return NULL; 603 604 /* Grab srcu to prevent the ei from going away */ 605 idx = srcu_read_lock(&eventfs_srcu); 606 607 /* 608 * Grab the eventfs_mutex to consistent value from ti->private. 609 * This s 610 */ 611 mutex_lock(&eventfs_mutex); 612 ei = READ_ONCE(ti->private); 613 if (ei && !ei->is_freed) 614 ei_dentry = READ_ONCE(ei->dentry); 615 mutex_unlock(&eventfs_mutex); 616 617 if (!ei || !ei_dentry) 618 goto out; 619 620 data = ei->data; 621 622 list_for_each_entry_srcu(ei_child, &ei->children, list, 623 srcu_read_lock_held(&eventfs_srcu)) { 624 if (strcmp(ei_child->name, name) != 0) 625 continue; 626 ret = simple_lookup(dir, dentry, flags); 627 if (IS_ERR(ret)) 628 goto out; 629 create_dir_dentry(ei, ei_child, ei_dentry, true); 630 created = true; 631 break; 632 } 633 634 if (created) 635 goto out; 636 637 for (i = 0; i < ei->nr_entries; i++) { 638 entry = &ei->entries[i]; 639 if (strcmp(name, entry->name) == 0) { 640 void *cdata = data; 641 mutex_lock(&eventfs_mutex); 642 /* If ei->is_freed, then the event itself may be too */ 643 if (!ei->is_freed) 644 r = entry->callback(name, &mode, &cdata, &fops); 645 else 646 r = -1; 647 mutex_unlock(&eventfs_mutex); 648 if (r <= 0) 649 continue; 650 ret = simple_lookup(dir, dentry, flags); 651 if (IS_ERR(ret)) 652 goto out; 653 create_file_dentry(ei, i, ei_dentry, name, mode, cdata, 654 fops, true); 655 break; 656 } 657 } 658 out: 659 srcu_read_unlock(&eventfs_srcu, idx); 660 return ret; 661 } 662 663 struct dentry_list { 664 void *cursor; 665 struct dentry **dentries; 666 }; 667 668 /** 669 * eventfs_release - called to release eventfs file/dir 670 * @inode: inode to be released 671 * @file: file to be released (not used) 672 */ 673 static int eventfs_release(struct inode *inode, struct file *file) 674 { 675 struct tracefs_inode *ti; 676 struct dentry_list *dlist = file->private_data; 677 void *cursor; 678 int i; 679 680 ti = get_tracefs(inode); 681 if (!(ti->flags & TRACEFS_EVENT_INODE)) 682 return -EINVAL; 683 684 if (WARN_ON_ONCE(!dlist)) 685 return -EINVAL; 686 687 for (i = 0; dlist->dentries && dlist->dentries[i]; i++) { 688 dput(dlist->dentries[i]); 689 } 690 691 cursor = dlist->cursor; 692 kfree(dlist->dentries); 693 kfree(dlist); 694 file->private_data = cursor; 695 return dcache_dir_close(inode, file); 696 } 697 698 static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt) 699 { 700 struct dentry **tmp; 701 702 tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS); 703 if (!tmp) 704 return -1; 705 tmp[cnt] = d; 706 tmp[cnt + 1] = NULL; 707 *dentries = tmp; 708 return 0; 709 } 710 711 /** 712 * dcache_dir_open_wrapper - eventfs open wrapper 713 * @inode: not used 714 * @file: dir to be opened (to create it's children) 715 * 716 * Used to dynamic create file/dir with-in @file, all the 717 * file/dir will be created. If already created then references 718 * will be increased 719 */ 720 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) 721 { 722 const struct file_operations *fops; 723 const struct eventfs_entry *entry; 724 struct eventfs_inode *ei_child; 725 struct tracefs_inode *ti; 726 struct eventfs_inode *ei; 727 struct dentry_list *dlist; 728 struct dentry **dentries = NULL; 729 struct dentry *parent = file_dentry(file); 730 struct dentry *d; 731 struct inode *f_inode = file_inode(file); 732 const char *name = parent->d_name.name; 733 umode_t mode; 734 void *data; 735 int cnt = 0; 736 int idx; 737 int ret; 738 int i; 739 int r; 740 741 ti = get_tracefs(f_inode); 742 if (!(ti->flags & TRACEFS_EVENT_INODE)) 743 return -EINVAL; 744 745 if (WARN_ON_ONCE(file->private_data)) 746 return -EINVAL; 747 748 idx = srcu_read_lock(&eventfs_srcu); 749 750 mutex_lock(&eventfs_mutex); 751 ei = READ_ONCE(ti->private); 752 mutex_unlock(&eventfs_mutex); 753 754 if (!ei) { 755 srcu_read_unlock(&eventfs_srcu, idx); 756 return -EINVAL; 757 } 758 759 760 data = ei->data; 761 762 dlist = kmalloc(sizeof(*dlist), GFP_KERNEL); 763 if (!dlist) { 764 srcu_read_unlock(&eventfs_srcu, idx); 765 return -ENOMEM; 766 } 767 768 inode_lock(parent->d_inode); 769 list_for_each_entry_srcu(ei_child, &ei->children, list, 770 srcu_read_lock_held(&eventfs_srcu)) { 771 d = create_dir_dentry(ei, ei_child, parent, false); 772 if (d) { 773 ret = add_dentries(&dentries, d, cnt); 774 if (ret < 0) 775 break; 776 cnt++; 777 } 778 } 779 780 for (i = 0; i < ei->nr_entries; i++) { 781 void *cdata = data; 782 entry = &ei->entries[i]; 783 name = entry->name; 784 mutex_lock(&eventfs_mutex); 785 /* If ei->is_freed, then the event itself may be too */ 786 if (!ei->is_freed) 787 r = entry->callback(name, &mode, &cdata, &fops); 788 else 789 r = -1; 790 mutex_unlock(&eventfs_mutex); 791 if (r <= 0) 792 continue; 793 d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false); 794 if (d) { 795 ret = add_dentries(&dentries, d, cnt); 796 if (ret < 0) 797 break; 798 cnt++; 799 } 800 } 801 inode_unlock(parent->d_inode); 802 srcu_read_unlock(&eventfs_srcu, idx); 803 ret = dcache_dir_open(inode, file); 804 805 /* 806 * dcache_dir_open() sets file->private_data to a dentry cursor. 807 * Need to save that but also save all the dentries that were 808 * opened by this function. 809 */ 810 dlist->cursor = file->private_data; 811 dlist->dentries = dentries; 812 file->private_data = dlist; 813 return ret; 814 } 815 816 /* 817 * This just sets the file->private_data back to the cursor and back. 818 */ 819 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx) 820 { 821 struct dentry_list *dlist = file->private_data; 822 int ret; 823 824 file->private_data = dlist->cursor; 825 ret = dcache_readdir(file, ctx); 826 dlist->cursor = file->private_data; 827 file->private_data = dlist; 828 return ret; 829 } 830 831 /** 832 * eventfs_create_dir - Create the eventfs_inode for this directory 833 * @name: The name of the directory to create. 834 * @parent: The eventfs_inode of the parent directory. 835 * @entries: A list of entries that represent the files under this directory 836 * @size: The number of @entries 837 * @data: The default data to pass to the files (an entry may override it). 838 * 839 * This function creates the descriptor to represent a directory in the 840 * eventfs. This descriptor is an eventfs_inode, and it is returned to be 841 * used to create other children underneath. 842 * 843 * The @entries is an array of eventfs_entry structures which has: 844 * const char *name 845 * eventfs_callback callback; 846 * 847 * The name is the name of the file, and the callback is a pointer to a function 848 * that will be called when the file is reference (either by lookup or by 849 * reading a directory). The callback is of the prototype: 850 * 851 * int callback(const char *name, umode_t *mode, void **data, 852 * const struct file_operations **fops); 853 * 854 * When a file needs to be created, this callback will be called with 855 * name = the name of the file being created (so that the same callback 856 * may be used for multiple files). 857 * mode = a place to set the file's mode 858 * data = A pointer to @data, and the callback may replace it, which will 859 * cause the file created to pass the new data to the open() call. 860 * fops = the fops to use for the created file. 861 * 862 * NB. @callback is called while holding internal locks of the eventfs 863 * system. The callback must not call any code that might also call into 864 * the tracefs or eventfs system or it will risk creating a deadlock. 865 */ 866 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, 867 const struct eventfs_entry *entries, 868 int size, void *data) 869 { 870 struct eventfs_inode *ei; 871 872 if (!parent) 873 return ERR_PTR(-EINVAL); 874 875 ei = kzalloc(sizeof(*ei), GFP_KERNEL); 876 if (!ei) 877 return ERR_PTR(-ENOMEM); 878 879 ei->name = kstrdup_const(name, GFP_KERNEL); 880 if (!ei->name) { 881 kfree(ei); 882 return ERR_PTR(-ENOMEM); 883 } 884 885 if (size) { 886 ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); 887 if (!ei->d_children) { 888 kfree_const(ei->name); 889 kfree(ei); 890 return ERR_PTR(-ENOMEM); 891 } 892 } 893 894 ei->entries = entries; 895 ei->nr_entries = size; 896 ei->data = data; 897 INIT_LIST_HEAD(&ei->children); 898 INIT_LIST_HEAD(&ei->list); 899 900 mutex_lock(&eventfs_mutex); 901 if (!parent->is_freed) { 902 list_add_tail(&ei->list, &parent->children); 903 ei->d_parent = parent->dentry; 904 } 905 mutex_unlock(&eventfs_mutex); 906 907 /* Was the parent freed? */ 908 if (list_empty(&ei->list)) { 909 free_ei(ei); 910 ei = NULL; 911 } 912 return ei; 913 } 914 915 /** 916 * eventfs_create_events_dir - create the top level events directory 917 * @name: The name of the top level directory to create. 918 * @parent: Parent dentry for this file in the tracefs directory. 919 * @entries: A list of entries that represent the files under this directory 920 * @size: The number of @entries 921 * @data: The default data to pass to the files (an entry may override it). 922 * 923 * This function creates the top of the trace event directory. 924 * 925 * See eventfs_create_dir() for use of @entries. 926 */ 927 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, 928 const struct eventfs_entry *entries, 929 int size, void *data) 930 { 931 struct dentry *dentry = tracefs_start_creating(name, parent); 932 struct eventfs_inode *ei; 933 struct tracefs_inode *ti; 934 struct inode *inode; 935 kuid_t uid; 936 kgid_t gid; 937 938 if (security_locked_down(LOCKDOWN_TRACEFS)) 939 return NULL; 940 941 if (IS_ERR(dentry)) 942 return ERR_CAST(dentry); 943 944 ei = kzalloc(sizeof(*ei), GFP_KERNEL); 945 if (!ei) 946 goto fail_ei; 947 948 inode = tracefs_get_inode(dentry->d_sb); 949 if (unlikely(!inode)) 950 goto fail; 951 952 if (size) { 953 ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); 954 if (!ei->d_children) 955 goto fail; 956 } 957 958 ei->dentry = dentry; 959 ei->entries = entries; 960 ei->nr_entries = size; 961 ei->is_events = 1; 962 ei->data = data; 963 ei->name = kstrdup_const(name, GFP_KERNEL); 964 if (!ei->name) 965 goto fail; 966 967 /* Save the ownership of this directory */ 968 uid = d_inode(dentry->d_parent)->i_uid; 969 gid = d_inode(dentry->d_parent)->i_gid; 970 971 /* This is used as the default ownership of the files and directories */ 972 ei->attr.uid = uid; 973 ei->attr.gid = gid; 974 975 INIT_LIST_HEAD(&ei->children); 976 INIT_LIST_HEAD(&ei->list); 977 978 ti = get_tracefs(inode); 979 ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE; 980 ti->private = ei; 981 982 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; 983 inode->i_uid = uid; 984 inode->i_gid = gid; 985 inode->i_op = &eventfs_root_dir_inode_operations; 986 inode->i_fop = &eventfs_file_operations; 987 988 dentry->d_fsdata = ei; 989 990 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 991 inc_nlink(inode); 992 d_instantiate(dentry, inode); 993 inc_nlink(dentry->d_parent->d_inode); 994 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 995 tracefs_end_creating(dentry); 996 997 return ei; 998 999 fail: 1000 kfree(ei->d_children); 1001 kfree(ei); 1002 fail_ei: 1003 tracefs_failed_creating(dentry); 1004 return ERR_PTR(-ENOMEM); 1005 } 1006 1007 static LLIST_HEAD(free_list); 1008 1009 static void eventfs_workfn(struct work_struct *work) 1010 { 1011 struct eventfs_inode *ei, *tmp; 1012 struct llist_node *llnode; 1013 1014 llnode = llist_del_all(&free_list); 1015 llist_for_each_entry_safe(ei, tmp, llnode, llist) { 1016 /* This dput() matches the dget() from unhook_dentry() */ 1017 for (int i = 0; i < ei->nr_entries; i++) { 1018 if (ei->d_children[i]) 1019 dput(ei->d_children[i]); 1020 } 1021 /* This should only get here if it had a dentry */ 1022 if (!WARN_ON_ONCE(!ei->dentry)) 1023 dput(ei->dentry); 1024 } 1025 } 1026 1027 static DECLARE_WORK(eventfs_work, eventfs_workfn); 1028 1029 static void free_rcu_ei(struct rcu_head *head) 1030 { 1031 struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu); 1032 1033 if (ei->dentry) { 1034 /* Do not free the ei until all references of dentry are gone */ 1035 if (llist_add(&ei->llist, &free_list)) 1036 queue_work(system_unbound_wq, &eventfs_work); 1037 return; 1038 } 1039 1040 /* If the ei doesn't have a dentry, neither should its children */ 1041 for (int i = 0; i < ei->nr_entries; i++) { 1042 WARN_ON_ONCE(ei->d_children[i]); 1043 } 1044 1045 free_ei(ei); 1046 } 1047 1048 static void unhook_dentry(struct dentry *dentry) 1049 { 1050 if (!dentry) 1051 return; 1052 /* 1053 * Need to add a reference to the dentry that is expected by 1054 * simple_recursive_removal(), which will include a dput(). 1055 */ 1056 dget(dentry); 1057 1058 /* 1059 * Also add a reference for the dput() in eventfs_workfn(). 1060 * That is required as that dput() will free the ei after 1061 * the SRCU grace period is over. 1062 */ 1063 dget(dentry); 1064 } 1065 1066 /** 1067 * eventfs_remove_rec - remove eventfs dir or file from list 1068 * @ei: eventfs_inode to be removed. 1069 * @level: prevent recursion from going more than 3 levels deep. 1070 * 1071 * This function recursively removes eventfs_inodes which 1072 * contains info of files and/or directories. 1073 */ 1074 static void eventfs_remove_rec(struct eventfs_inode *ei, int level) 1075 { 1076 struct eventfs_inode *ei_child; 1077 1078 if (!ei) 1079 return; 1080 /* 1081 * Check recursion depth. It should never be greater than 3: 1082 * 0 - events/ 1083 * 1 - events/group/ 1084 * 2 - events/group/event/ 1085 * 3 - events/group/event/file 1086 */ 1087 if (WARN_ON_ONCE(level > 3)) 1088 return; 1089 1090 /* search for nested folders or files */ 1091 list_for_each_entry_srcu(ei_child, &ei->children, list, 1092 lockdep_is_held(&eventfs_mutex)) { 1093 /* Children only have dentry if parent does */ 1094 WARN_ON_ONCE(ei_child->dentry && !ei->dentry); 1095 eventfs_remove_rec(ei_child, level + 1); 1096 } 1097 1098 1099 ei->is_freed = 1; 1100 1101 for (int i = 0; i < ei->nr_entries; i++) { 1102 if (ei->d_children[i]) { 1103 /* Children only have dentry if parent does */ 1104 WARN_ON_ONCE(!ei->dentry); 1105 unhook_dentry(ei->d_children[i]); 1106 } 1107 } 1108 1109 unhook_dentry(ei->dentry); 1110 1111 list_del_rcu(&ei->list); 1112 call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei); 1113 } 1114 1115 /** 1116 * eventfs_remove_dir - remove eventfs dir or file from list 1117 * @ei: eventfs_inode to be removed. 1118 * 1119 * This function acquire the eventfs_mutex lock and call eventfs_remove_rec() 1120 */ 1121 void eventfs_remove_dir(struct eventfs_inode *ei) 1122 { 1123 struct dentry *dentry; 1124 1125 if (!ei) 1126 return; 1127 1128 mutex_lock(&eventfs_mutex); 1129 dentry = ei->dentry; 1130 eventfs_remove_rec(ei, 0); 1131 mutex_unlock(&eventfs_mutex); 1132 1133 /* 1134 * If any of the ei children has a dentry, then the ei itself 1135 * must have a dentry. 1136 */ 1137 if (dentry) 1138 simple_recursive_removal(dentry, NULL); 1139 } 1140 1141 /** 1142 * eventfs_remove_events_dir - remove the top level eventfs directory 1143 * @ei: the event_inode returned by eventfs_create_events_dir(). 1144 * 1145 * This function removes the events main directory 1146 */ 1147 void eventfs_remove_events_dir(struct eventfs_inode *ei) 1148 { 1149 struct dentry *dentry; 1150 1151 dentry = ei->dentry; 1152 eventfs_remove_dir(ei); 1153 1154 /* 1155 * Matches the dget() done by tracefs_start_creating() 1156 * in eventfs_create_events_dir() when it the dentry was 1157 * created. In other words, it's a normal dentry that 1158 * sticks around while the other ei->dentry are created 1159 * and destroyed dynamically. 1160 */ 1161 dput(dentry); 1162 } 1163