1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * event_inode.c - part of tracefs, a pseudo file system for activating tracing 4 * 5 * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org> 6 * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com> 7 * Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org> 8 * 9 * eventfs is used to dynamically create inodes and dentries based on the 10 * meta data provided by the tracing system. 11 * 12 * eventfs stores the meta-data of files/dirs and holds off on creating 13 * inodes/dentries of the files. When accessed, the eventfs will create the 14 * inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up 15 * and delete the inodes/dentries when they are no longer referenced. 16 */ 17 #include <linux/fsnotify.h> 18 #include <linux/fs.h> 19 #include <linux/namei.h> 20 #include <linux/workqueue.h> 21 #include <linux/security.h> 22 #include <linux/tracefs.h> 23 #include <linux/kref.h> 24 #include <linux/delay.h> 25 #include "internal.h" 26 27 /* 28 * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access 29 * to the ei->dentry must be done under this mutex and after checking 30 * if ei->is_freed is not set. The ei->dentry is released under the 31 * mutex at the same time ei->is_freed is set. If ei->is_freed is set 32 * then the ei->dentry is invalid. 33 */ 34 static DEFINE_MUTEX(eventfs_mutex); 35 36 /* 37 * The eventfs_inode (ei) itself is protected by SRCU. It is released from 38 * its parent's list and will have is_freed set (under eventfs_mutex). 39 * After the SRCU grace period is over, the ei may be freed. 40 */ 41 DEFINE_STATIC_SRCU(eventfs_srcu); 42 43 /* Mode is unsigned short, use the upper bits for flags */ 44 enum { 45 EVENTFS_SAVE_MODE = BIT(16), 46 EVENTFS_SAVE_UID = BIT(17), 47 EVENTFS_SAVE_GID = BIT(18), 48 }; 49 50 #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) 51 52 static struct dentry *eventfs_root_lookup(struct inode *dir, 53 struct dentry *dentry, 54 unsigned int flags); 55 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file); 56 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx); 57 static int eventfs_release(struct inode *inode, struct file *file); 58 59 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr) 60 { 61 unsigned int ia_valid = iattr->ia_valid; 62 63 if (ia_valid & ATTR_MODE) { 64 attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) | 65 (iattr->ia_mode & EVENTFS_MODE_MASK) | 66 EVENTFS_SAVE_MODE; 67 } 68 if (ia_valid & ATTR_UID) { 69 attr->mode |= EVENTFS_SAVE_UID; 70 attr->uid = iattr->ia_uid; 71 } 72 if (ia_valid & ATTR_GID) { 73 attr->mode |= EVENTFS_SAVE_GID; 74 attr->gid = iattr->ia_gid; 75 } 76 } 77 78 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, 79 struct iattr *iattr) 80 { 81 const struct eventfs_entry *entry; 82 struct eventfs_inode *ei; 83 const char *name; 84 int ret; 85 86 mutex_lock(&eventfs_mutex); 87 ei = dentry->d_fsdata; 88 if (ei->is_freed) { 89 /* Do not allow changes if the event is about to be removed. */ 90 mutex_unlock(&eventfs_mutex); 91 return -ENODEV; 92 } 93 94 /* Preallocate the children mode array if necessary */ 95 if (!(dentry->d_inode->i_mode & S_IFDIR)) { 96 if (!ei->entry_attrs) { 97 ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries, 98 GFP_KERNEL); 99 if (!ei->entry_attrs) { 100 ret = -ENOMEM; 101 goto out; 102 } 103 } 104 } 105 106 ret = simple_setattr(idmap, dentry, iattr); 107 if (ret < 0) 108 goto out; 109 110 /* 111 * If this is a dir, then update the ei cache, only the file 112 * mode is saved in the ei->m_children, and the ownership is 113 * determined by the parent directory. 114 */ 115 if (dentry->d_inode->i_mode & S_IFDIR) { 116 update_attr(&ei->attr, iattr); 117 118 } else { 119 name = dentry->d_name.name; 120 121 for (int i = 0; i < ei->nr_entries; i++) { 122 entry = &ei->entries[i]; 123 if (strcmp(name, entry->name) == 0) { 124 update_attr(&ei->entry_attrs[i], iattr); 125 break; 126 } 127 } 128 } 129 out: 130 mutex_unlock(&eventfs_mutex); 131 return ret; 132 } 133 134 static const struct inode_operations eventfs_root_dir_inode_operations = { 135 .lookup = eventfs_root_lookup, 136 .setattr = eventfs_set_attr, 137 }; 138 139 static const struct inode_operations eventfs_file_inode_operations = { 140 .setattr = eventfs_set_attr, 141 }; 142 143 static const struct file_operations eventfs_file_operations = { 144 .open = dcache_dir_open_wrapper, 145 .read = generic_read_dir, 146 .iterate_shared = dcache_readdir_wrapper, 147 .llseek = generic_file_llseek, 148 .release = eventfs_release, 149 }; 150 151 static void update_inode_attr(struct inode *inode, struct eventfs_attr *attr, umode_t mode) 152 { 153 if (!attr) { 154 inode->i_mode = mode; 155 return; 156 } 157 158 if (attr->mode & EVENTFS_SAVE_MODE) 159 inode->i_mode = attr->mode & EVENTFS_MODE_MASK; 160 else 161 inode->i_mode = mode; 162 163 if (attr->mode & EVENTFS_SAVE_UID) 164 inode->i_uid = attr->uid; 165 166 if (attr->mode & EVENTFS_SAVE_GID) 167 inode->i_gid = attr->gid; 168 } 169 170 /** 171 * create_file - create a file in the tracefs filesystem 172 * @name: the name of the file to create. 173 * @mode: the permission that the file should have. 174 * @attr: saved attributes changed by user 175 * @parent: parent dentry for this file. 176 * @data: something that the caller will want to get to later on. 177 * @fop: struct file_operations that should be used for this file. 178 * 179 * This function creates a dentry that represents a file in the eventsfs_inode 180 * directory. The inode.i_private pointer will point to @data in the open() 181 * call. 182 */ 183 static struct dentry *create_file(const char *name, umode_t mode, 184 struct eventfs_attr *attr, 185 struct dentry *parent, void *data, 186 const struct file_operations *fop) 187 { 188 struct tracefs_inode *ti; 189 struct dentry *dentry; 190 struct inode *inode; 191 192 if (!(mode & S_IFMT)) 193 mode |= S_IFREG; 194 195 if (WARN_ON_ONCE(!S_ISREG(mode))) 196 return NULL; 197 198 WARN_ON_ONCE(!parent); 199 dentry = eventfs_start_creating(name, parent); 200 201 if (IS_ERR(dentry)) 202 return dentry; 203 204 inode = tracefs_get_inode(dentry->d_sb); 205 if (unlikely(!inode)) 206 return eventfs_failed_creating(dentry); 207 208 /* If the user updated the directory's attributes, use them */ 209 update_inode_attr(inode, attr, mode); 210 211 inode->i_op = &eventfs_file_inode_operations; 212 inode->i_fop = fop; 213 inode->i_private = data; 214 215 ti = get_tracefs(inode); 216 ti->flags |= TRACEFS_EVENT_INODE; 217 d_instantiate(dentry, inode); 218 fsnotify_create(dentry->d_parent->d_inode, dentry); 219 return eventfs_end_creating(dentry); 220 }; 221 222 /** 223 * create_dir - create a dir in the tracefs filesystem 224 * @ei: the eventfs_inode that represents the directory to create 225 * @parent: parent dentry for this file. 226 * 227 * This function will create a dentry for a directory represented by 228 * a eventfs_inode. 229 */ 230 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent) 231 { 232 struct tracefs_inode *ti; 233 struct dentry *dentry; 234 struct inode *inode; 235 236 dentry = eventfs_start_creating(ei->name, parent); 237 if (IS_ERR(dentry)) 238 return dentry; 239 240 inode = tracefs_get_inode(dentry->d_sb); 241 if (unlikely(!inode)) 242 return eventfs_failed_creating(dentry); 243 244 /* If the user updated the directory's attributes, use them */ 245 update_inode_attr(inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); 246 247 inode->i_op = &eventfs_root_dir_inode_operations; 248 inode->i_fop = &eventfs_file_operations; 249 250 ti = get_tracefs(inode); 251 ti->flags |= TRACEFS_EVENT_INODE; 252 253 inc_nlink(inode); 254 d_instantiate(dentry, inode); 255 inc_nlink(dentry->d_parent->d_inode); 256 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 257 return eventfs_end_creating(dentry); 258 } 259 260 static void free_ei(struct eventfs_inode *ei) 261 { 262 kfree_const(ei->name); 263 kfree(ei->d_children); 264 kfree(ei->entry_attrs); 265 kfree(ei); 266 } 267 268 /** 269 * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode 270 * @ti: the tracefs_inode of the dentry 271 * @dentry: dentry which has the reference to remove. 272 * 273 * Remove the association between a dentry from an eventfs_inode. 274 */ 275 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) 276 { 277 struct eventfs_inode *ei; 278 int i; 279 280 mutex_lock(&eventfs_mutex); 281 282 ei = dentry->d_fsdata; 283 if (!ei) 284 goto out; 285 286 /* This could belong to one of the files of the ei */ 287 if (ei->dentry != dentry) { 288 for (i = 0; i < ei->nr_entries; i++) { 289 if (ei->d_children[i] == dentry) 290 break; 291 } 292 if (WARN_ON_ONCE(i == ei->nr_entries)) 293 goto out; 294 ei->d_children[i] = NULL; 295 } else if (ei->is_freed) { 296 free_ei(ei); 297 } else { 298 ei->dentry = NULL; 299 } 300 301 dentry->d_fsdata = NULL; 302 out: 303 mutex_unlock(&eventfs_mutex); 304 } 305 306 /** 307 * create_file_dentry - create a dentry for a file of an eventfs_inode 308 * @ei: the eventfs_inode that the file will be created under 309 * @idx: the index into the d_children[] of the @ei 310 * @parent: The parent dentry of the created file. 311 * @name: The name of the file to create 312 * @mode: The mode of the file. 313 * @data: The data to use to set the inode of the file with on open() 314 * @fops: The fops of the file to be created. 315 * @lookup: If called by the lookup routine, in which case, dput() the created dentry. 316 * 317 * Create a dentry for a file of an eventfs_inode @ei and place it into the 318 * address located at @e_dentry. If the @e_dentry already has a dentry, then 319 * just do a dget() on it and return. Otherwise create the dentry and attach it. 320 */ 321 static struct dentry * 322 create_file_dentry(struct eventfs_inode *ei, int idx, 323 struct dentry *parent, const char *name, umode_t mode, void *data, 324 const struct file_operations *fops, bool lookup) 325 { 326 struct eventfs_attr *attr = NULL; 327 struct dentry **e_dentry = &ei->d_children[idx]; 328 struct dentry *dentry; 329 bool invalidate = false; 330 331 mutex_lock(&eventfs_mutex); 332 if (ei->is_freed) { 333 mutex_unlock(&eventfs_mutex); 334 return NULL; 335 } 336 /* If the e_dentry already has a dentry, use it */ 337 if (*e_dentry) { 338 /* lookup does not need to up the ref count */ 339 if (!lookup) 340 dget(*e_dentry); 341 mutex_unlock(&eventfs_mutex); 342 return *e_dentry; 343 } 344 345 /* ei->entry_attrs are protected by SRCU */ 346 if (ei->entry_attrs) 347 attr = &ei->entry_attrs[idx]; 348 349 mutex_unlock(&eventfs_mutex); 350 351 /* The lookup already has the parent->d_inode locked */ 352 if (!lookup) 353 inode_lock(parent->d_inode); 354 355 dentry = create_file(name, mode, attr, parent, data, fops); 356 357 if (!lookup) 358 inode_unlock(parent->d_inode); 359 360 mutex_lock(&eventfs_mutex); 361 362 if (IS_ERR_OR_NULL(dentry)) { 363 /* 364 * When the mutex was released, something else could have 365 * created the dentry for this e_dentry. In which case 366 * use that one. 367 * 368 * Note, with the mutex held, the e_dentry cannot have content 369 * and the ei->is_freed be true at the same time. 370 */ 371 dentry = *e_dentry; 372 if (WARN_ON_ONCE(dentry && ei->is_freed)) 373 dentry = NULL; 374 /* The lookup does not need to up the dentry refcount */ 375 if (dentry && !lookup) 376 dget(dentry); 377 mutex_unlock(&eventfs_mutex); 378 return dentry; 379 } 380 381 if (!*e_dentry && !ei->is_freed) { 382 *e_dentry = dentry; 383 dentry->d_fsdata = ei; 384 } else { 385 /* 386 * Should never happen unless we get here due to being freed. 387 * Otherwise it means two dentries exist with the same name. 388 */ 389 WARN_ON_ONCE(!ei->is_freed); 390 invalidate = true; 391 } 392 mutex_unlock(&eventfs_mutex); 393 394 if (invalidate) 395 d_invalidate(dentry); 396 397 if (lookup || invalidate) 398 dput(dentry); 399 400 return invalidate ? NULL : dentry; 401 } 402 403 /** 404 * eventfs_post_create_dir - post create dir routine 405 * @ei: eventfs_inode of recently created dir 406 * 407 * Map the meta-data of files within an eventfs dir to their parent dentry 408 */ 409 static void eventfs_post_create_dir(struct eventfs_inode *ei) 410 { 411 struct eventfs_inode *ei_child; 412 struct tracefs_inode *ti; 413 414 lockdep_assert_held(&eventfs_mutex); 415 416 /* srcu lock already held */ 417 /* fill parent-child relation */ 418 list_for_each_entry_srcu(ei_child, &ei->children, list, 419 srcu_read_lock_held(&eventfs_srcu)) { 420 ei_child->d_parent = ei->dentry; 421 } 422 423 ti = get_tracefs(ei->dentry->d_inode); 424 ti->private = ei; 425 } 426 427 /** 428 * create_dir_dentry - Create a directory dentry for the eventfs_inode 429 * @pei: The eventfs_inode parent of ei. 430 * @ei: The eventfs_inode to create the directory for 431 * @parent: The dentry of the parent of this directory 432 * @lookup: True if this is called by the lookup code 433 * 434 * This creates and attaches a directory dentry to the eventfs_inode @ei. 435 */ 436 static struct dentry * 437 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, 438 struct dentry *parent, bool lookup) 439 { 440 bool invalidate = false; 441 struct dentry *dentry = NULL; 442 443 mutex_lock(&eventfs_mutex); 444 if (pei->is_freed || ei->is_freed) { 445 mutex_unlock(&eventfs_mutex); 446 return NULL; 447 } 448 if (ei->dentry) { 449 /* If the dentry already has a dentry, use it */ 450 dentry = ei->dentry; 451 /* lookup does not need to up the ref count */ 452 if (!lookup) 453 dget(dentry); 454 mutex_unlock(&eventfs_mutex); 455 return dentry; 456 } 457 mutex_unlock(&eventfs_mutex); 458 459 /* The lookup already has the parent->d_inode locked */ 460 if (!lookup) 461 inode_lock(parent->d_inode); 462 463 dentry = create_dir(ei, parent); 464 465 if (!lookup) 466 inode_unlock(parent->d_inode); 467 468 mutex_lock(&eventfs_mutex); 469 470 if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { 471 /* 472 * When the mutex was released, something else could have 473 * created the dentry for this e_dentry. In which case 474 * use that one. 475 * 476 * Note, with the mutex held, the e_dentry cannot have content 477 * and the ei->is_freed be true at the same time. 478 */ 479 dentry = ei->dentry; 480 if (dentry && !lookup) 481 dget(dentry); 482 mutex_unlock(&eventfs_mutex); 483 return dentry; 484 } 485 486 if (!ei->dentry && !ei->is_freed) { 487 ei->dentry = dentry; 488 eventfs_post_create_dir(ei); 489 dentry->d_fsdata = ei; 490 } else { 491 /* 492 * Should never happen unless we get here due to being freed. 493 * Otherwise it means two dentries exist with the same name. 494 */ 495 WARN_ON_ONCE(!ei->is_freed); 496 invalidate = true; 497 } 498 mutex_unlock(&eventfs_mutex); 499 if (invalidate) 500 d_invalidate(dentry); 501 502 if (lookup || invalidate) 503 dput(dentry); 504 505 return invalidate ? NULL : dentry; 506 } 507 508 /** 509 * eventfs_root_lookup - lookup routine to create file/dir 510 * @dir: in which a lookup is being done 511 * @dentry: file/dir dentry 512 * @flags: Just passed to simple_lookup() 513 * 514 * Used to create dynamic file/dir with-in @dir, search with-in @ei 515 * list, if @dentry found go ahead and create the file/dir 516 */ 517 518 static struct dentry *eventfs_root_lookup(struct inode *dir, 519 struct dentry *dentry, 520 unsigned int flags) 521 { 522 const struct file_operations *fops; 523 const struct eventfs_entry *entry; 524 struct eventfs_inode *ei_child; 525 struct tracefs_inode *ti; 526 struct eventfs_inode *ei; 527 struct dentry *ei_dentry = NULL; 528 struct dentry *ret = NULL; 529 const char *name = dentry->d_name.name; 530 bool created = false; 531 umode_t mode; 532 void *data; 533 int idx; 534 int i; 535 int r; 536 537 ti = get_tracefs(dir); 538 if (!(ti->flags & TRACEFS_EVENT_INODE)) 539 return NULL; 540 541 /* Grab srcu to prevent the ei from going away */ 542 idx = srcu_read_lock(&eventfs_srcu); 543 544 /* 545 * Grab the eventfs_mutex to consistent value from ti->private. 546 * This s 547 */ 548 mutex_lock(&eventfs_mutex); 549 ei = READ_ONCE(ti->private); 550 if (ei && !ei->is_freed) 551 ei_dentry = READ_ONCE(ei->dentry); 552 mutex_unlock(&eventfs_mutex); 553 554 if (!ei || !ei_dentry) 555 goto out; 556 557 data = ei->data; 558 559 list_for_each_entry_srcu(ei_child, &ei->children, list, 560 srcu_read_lock_held(&eventfs_srcu)) { 561 if (strcmp(ei_child->name, name) != 0) 562 continue; 563 ret = simple_lookup(dir, dentry, flags); 564 create_dir_dentry(ei, ei_child, ei_dentry, true); 565 created = true; 566 break; 567 } 568 569 if (created) 570 goto out; 571 572 for (i = 0; i < ei->nr_entries; i++) { 573 entry = &ei->entries[i]; 574 if (strcmp(name, entry->name) == 0) { 575 void *cdata = data; 576 mutex_lock(&eventfs_mutex); 577 /* If ei->is_freed, then the event itself may be too */ 578 if (!ei->is_freed) 579 r = entry->callback(name, &mode, &cdata, &fops); 580 else 581 r = -1; 582 mutex_unlock(&eventfs_mutex); 583 if (r <= 0) 584 continue; 585 ret = simple_lookup(dir, dentry, flags); 586 create_file_dentry(ei, i, ei_dentry, name, mode, cdata, 587 fops, true); 588 break; 589 } 590 } 591 out: 592 srcu_read_unlock(&eventfs_srcu, idx); 593 return ret; 594 } 595 596 struct dentry_list { 597 void *cursor; 598 struct dentry **dentries; 599 }; 600 601 /** 602 * eventfs_release - called to release eventfs file/dir 603 * @inode: inode to be released 604 * @file: file to be released (not used) 605 */ 606 static int eventfs_release(struct inode *inode, struct file *file) 607 { 608 struct tracefs_inode *ti; 609 struct dentry_list *dlist = file->private_data; 610 void *cursor; 611 int i; 612 613 ti = get_tracefs(inode); 614 if (!(ti->flags & TRACEFS_EVENT_INODE)) 615 return -EINVAL; 616 617 if (WARN_ON_ONCE(!dlist)) 618 return -EINVAL; 619 620 for (i = 0; dlist->dentries && dlist->dentries[i]; i++) { 621 dput(dlist->dentries[i]); 622 } 623 624 cursor = dlist->cursor; 625 kfree(dlist->dentries); 626 kfree(dlist); 627 file->private_data = cursor; 628 return dcache_dir_close(inode, file); 629 } 630 631 static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt) 632 { 633 struct dentry **tmp; 634 635 tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL); 636 if (!tmp) 637 return -1; 638 tmp[cnt] = d; 639 tmp[cnt + 1] = NULL; 640 *dentries = tmp; 641 return 0; 642 } 643 644 /** 645 * dcache_dir_open_wrapper - eventfs open wrapper 646 * @inode: not used 647 * @file: dir to be opened (to create it's children) 648 * 649 * Used to dynamic create file/dir with-in @file, all the 650 * file/dir will be created. If already created then references 651 * will be increased 652 */ 653 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) 654 { 655 const struct file_operations *fops; 656 const struct eventfs_entry *entry; 657 struct eventfs_inode *ei_child; 658 struct tracefs_inode *ti; 659 struct eventfs_inode *ei; 660 struct dentry_list *dlist; 661 struct dentry **dentries = NULL; 662 struct dentry *parent = file_dentry(file); 663 struct dentry *d; 664 struct inode *f_inode = file_inode(file); 665 const char *name = parent->d_name.name; 666 umode_t mode; 667 void *data; 668 int cnt = 0; 669 int idx; 670 int ret; 671 int i; 672 int r; 673 674 ti = get_tracefs(f_inode); 675 if (!(ti->flags & TRACEFS_EVENT_INODE)) 676 return -EINVAL; 677 678 if (WARN_ON_ONCE(file->private_data)) 679 return -EINVAL; 680 681 idx = srcu_read_lock(&eventfs_srcu); 682 683 mutex_lock(&eventfs_mutex); 684 ei = READ_ONCE(ti->private); 685 mutex_unlock(&eventfs_mutex); 686 687 if (!ei) { 688 srcu_read_unlock(&eventfs_srcu, idx); 689 return -EINVAL; 690 } 691 692 693 data = ei->data; 694 695 dlist = kmalloc(sizeof(*dlist), GFP_KERNEL); 696 if (!dlist) { 697 srcu_read_unlock(&eventfs_srcu, idx); 698 return -ENOMEM; 699 } 700 701 list_for_each_entry_srcu(ei_child, &ei->children, list, 702 srcu_read_lock_held(&eventfs_srcu)) { 703 d = create_dir_dentry(ei, ei_child, parent, false); 704 if (d) { 705 ret = add_dentries(&dentries, d, cnt); 706 if (ret < 0) 707 break; 708 cnt++; 709 } 710 } 711 712 for (i = 0; i < ei->nr_entries; i++) { 713 void *cdata = data; 714 entry = &ei->entries[i]; 715 name = entry->name; 716 mutex_lock(&eventfs_mutex); 717 /* If ei->is_freed, then the event itself may be too */ 718 if (!ei->is_freed) 719 r = entry->callback(name, &mode, &cdata, &fops); 720 else 721 r = -1; 722 mutex_unlock(&eventfs_mutex); 723 if (r <= 0) 724 continue; 725 d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false); 726 if (d) { 727 ret = add_dentries(&dentries, d, cnt); 728 if (ret < 0) 729 break; 730 cnt++; 731 } 732 } 733 srcu_read_unlock(&eventfs_srcu, idx); 734 ret = dcache_dir_open(inode, file); 735 736 /* 737 * dcache_dir_open() sets file->private_data to a dentry cursor. 738 * Need to save that but also save all the dentries that were 739 * opened by this function. 740 */ 741 dlist->cursor = file->private_data; 742 dlist->dentries = dentries; 743 file->private_data = dlist; 744 return ret; 745 } 746 747 /* 748 * This just sets the file->private_data back to the cursor and back. 749 */ 750 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx) 751 { 752 struct dentry_list *dlist = file->private_data; 753 int ret; 754 755 file->private_data = dlist->cursor; 756 ret = dcache_readdir(file, ctx); 757 dlist->cursor = file->private_data; 758 file->private_data = dlist; 759 return ret; 760 } 761 762 /** 763 * eventfs_create_dir - Create the eventfs_inode for this directory 764 * @name: The name of the directory to create. 765 * @parent: The eventfs_inode of the parent directory. 766 * @entries: A list of entries that represent the files under this directory 767 * @size: The number of @entries 768 * @data: The default data to pass to the files (an entry may override it). 769 * 770 * This function creates the descriptor to represent a directory in the 771 * eventfs. This descriptor is an eventfs_inode, and it is returned to be 772 * used to create other children underneath. 773 * 774 * The @entries is an array of eventfs_entry structures which has: 775 * const char *name 776 * eventfs_callback callback; 777 * 778 * The name is the name of the file, and the callback is a pointer to a function 779 * that will be called when the file is reference (either by lookup or by 780 * reading a directory). The callback is of the prototype: 781 * 782 * int callback(const char *name, umode_t *mode, void **data, 783 * const struct file_operations **fops); 784 * 785 * When a file needs to be created, this callback will be called with 786 * name = the name of the file being created (so that the same callback 787 * may be used for multiple files). 788 * mode = a place to set the file's mode 789 * data = A pointer to @data, and the callback may replace it, which will 790 * cause the file created to pass the new data to the open() call. 791 * fops = the fops to use for the created file. 792 * 793 * NB. @callback is called while holding internal locks of the eventfs 794 * system. The callback must not call any code that might also call into 795 * the tracefs or eventfs system or it will risk creating a deadlock. 796 */ 797 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, 798 const struct eventfs_entry *entries, 799 int size, void *data) 800 { 801 struct eventfs_inode *ei; 802 803 if (!parent) 804 return ERR_PTR(-EINVAL); 805 806 ei = kzalloc(sizeof(*ei), GFP_KERNEL); 807 if (!ei) 808 return ERR_PTR(-ENOMEM); 809 810 ei->name = kstrdup_const(name, GFP_KERNEL); 811 if (!ei->name) { 812 kfree(ei); 813 return ERR_PTR(-ENOMEM); 814 } 815 816 if (size) { 817 ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); 818 if (!ei->d_children) { 819 kfree_const(ei->name); 820 kfree(ei); 821 return ERR_PTR(-ENOMEM); 822 } 823 } 824 825 ei->entries = entries; 826 ei->nr_entries = size; 827 ei->data = data; 828 INIT_LIST_HEAD(&ei->children); 829 INIT_LIST_HEAD(&ei->list); 830 831 mutex_lock(&eventfs_mutex); 832 if (!parent->is_freed) { 833 list_add_tail(&ei->list, &parent->children); 834 ei->d_parent = parent->dentry; 835 } 836 mutex_unlock(&eventfs_mutex); 837 838 /* Was the parent freed? */ 839 if (list_empty(&ei->list)) { 840 free_ei(ei); 841 ei = NULL; 842 } 843 return ei; 844 } 845 846 /** 847 * eventfs_create_events_dir - create the top level events directory 848 * @name: The name of the top level directory to create. 849 * @parent: Parent dentry for this file in the tracefs directory. 850 * @entries: A list of entries that represent the files under this directory 851 * @size: The number of @entries 852 * @data: The default data to pass to the files (an entry may override it). 853 * 854 * This function creates the top of the trace event directory. 855 * 856 * See eventfs_create_dir() for use of @entries. 857 */ 858 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, 859 const struct eventfs_entry *entries, 860 int size, void *data) 861 { 862 struct dentry *dentry = tracefs_start_creating(name, parent); 863 struct eventfs_inode *ei; 864 struct tracefs_inode *ti; 865 struct inode *inode; 866 867 if (security_locked_down(LOCKDOWN_TRACEFS)) 868 return NULL; 869 870 if (IS_ERR(dentry)) 871 return ERR_CAST(dentry); 872 873 ei = kzalloc(sizeof(*ei), GFP_KERNEL); 874 if (!ei) 875 goto fail_ei; 876 877 inode = tracefs_get_inode(dentry->d_sb); 878 if (unlikely(!inode)) 879 goto fail; 880 881 if (size) { 882 ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); 883 if (!ei->d_children) 884 goto fail; 885 } 886 887 ei->dentry = dentry; 888 ei->entries = entries; 889 ei->nr_entries = size; 890 ei->data = data; 891 ei->name = kstrdup_const(name, GFP_KERNEL); 892 if (!ei->name) 893 goto fail; 894 895 INIT_LIST_HEAD(&ei->children); 896 INIT_LIST_HEAD(&ei->list); 897 898 ti = get_tracefs(inode); 899 ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE; 900 ti->private = ei; 901 902 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; 903 inode->i_op = &eventfs_root_dir_inode_operations; 904 inode->i_fop = &eventfs_file_operations; 905 906 dentry->d_fsdata = ei; 907 908 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 909 inc_nlink(inode); 910 d_instantiate(dentry, inode); 911 inc_nlink(dentry->d_parent->d_inode); 912 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 913 tracefs_end_creating(dentry); 914 915 return ei; 916 917 fail: 918 kfree(ei->d_children); 919 kfree(ei); 920 fail_ei: 921 tracefs_failed_creating(dentry); 922 return ERR_PTR(-ENOMEM); 923 } 924 925 static LLIST_HEAD(free_list); 926 927 static void eventfs_workfn(struct work_struct *work) 928 { 929 struct eventfs_inode *ei, *tmp; 930 struct llist_node *llnode; 931 932 llnode = llist_del_all(&free_list); 933 llist_for_each_entry_safe(ei, tmp, llnode, llist) { 934 /* This dput() matches the dget() from unhook_dentry() */ 935 for (int i = 0; i < ei->nr_entries; i++) { 936 if (ei->d_children[i]) 937 dput(ei->d_children[i]); 938 } 939 /* This should only get here if it had a dentry */ 940 if (!WARN_ON_ONCE(!ei->dentry)) 941 dput(ei->dentry); 942 } 943 } 944 945 static DECLARE_WORK(eventfs_work, eventfs_workfn); 946 947 static void free_rcu_ei(struct rcu_head *head) 948 { 949 struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu); 950 951 if (ei->dentry) { 952 /* Do not free the ei until all references of dentry are gone */ 953 if (llist_add(&ei->llist, &free_list)) 954 queue_work(system_unbound_wq, &eventfs_work); 955 return; 956 } 957 958 /* If the ei doesn't have a dentry, neither should its children */ 959 for (int i = 0; i < ei->nr_entries; i++) { 960 WARN_ON_ONCE(ei->d_children[i]); 961 } 962 963 free_ei(ei); 964 } 965 966 static void unhook_dentry(struct dentry *dentry) 967 { 968 if (!dentry) 969 return; 970 /* 971 * Need to add a reference to the dentry that is expected by 972 * simple_recursive_removal(), which will include a dput(). 973 */ 974 dget(dentry); 975 976 /* 977 * Also add a reference for the dput() in eventfs_workfn(). 978 * That is required as that dput() will free the ei after 979 * the SRCU grace period is over. 980 */ 981 dget(dentry); 982 } 983 984 /** 985 * eventfs_remove_rec - remove eventfs dir or file from list 986 * @ei: eventfs_inode to be removed. 987 * @level: prevent recursion from going more than 3 levels deep. 988 * 989 * This function recursively removes eventfs_inodes which 990 * contains info of files and/or directories. 991 */ 992 static void eventfs_remove_rec(struct eventfs_inode *ei, int level) 993 { 994 struct eventfs_inode *ei_child; 995 996 if (!ei) 997 return; 998 /* 999 * Check recursion depth. It should never be greater than 3: 1000 * 0 - events/ 1001 * 1 - events/group/ 1002 * 2 - events/group/event/ 1003 * 3 - events/group/event/file 1004 */ 1005 if (WARN_ON_ONCE(level > 3)) 1006 return; 1007 1008 /* search for nested folders or files */ 1009 list_for_each_entry_srcu(ei_child, &ei->children, list, 1010 lockdep_is_held(&eventfs_mutex)) { 1011 /* Children only have dentry if parent does */ 1012 WARN_ON_ONCE(ei_child->dentry && !ei->dentry); 1013 eventfs_remove_rec(ei_child, level + 1); 1014 } 1015 1016 1017 ei->is_freed = 1; 1018 1019 for (int i = 0; i < ei->nr_entries; i++) { 1020 if (ei->d_children[i]) { 1021 /* Children only have dentry if parent does */ 1022 WARN_ON_ONCE(!ei->dentry); 1023 unhook_dentry(ei->d_children[i]); 1024 } 1025 } 1026 1027 unhook_dentry(ei->dentry); 1028 1029 list_del_rcu(&ei->list); 1030 call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei); 1031 } 1032 1033 /** 1034 * eventfs_remove_dir - remove eventfs dir or file from list 1035 * @ei: eventfs_inode to be removed. 1036 * 1037 * This function acquire the eventfs_mutex lock and call eventfs_remove_rec() 1038 */ 1039 void eventfs_remove_dir(struct eventfs_inode *ei) 1040 { 1041 struct dentry *dentry; 1042 1043 if (!ei) 1044 return; 1045 1046 mutex_lock(&eventfs_mutex); 1047 dentry = ei->dentry; 1048 eventfs_remove_rec(ei, 0); 1049 mutex_unlock(&eventfs_mutex); 1050 1051 /* 1052 * If any of the ei children has a dentry, then the ei itself 1053 * must have a dentry. 1054 */ 1055 if (dentry) 1056 simple_recursive_removal(dentry, NULL); 1057 } 1058 1059 /** 1060 * eventfs_remove_events_dir - remove the top level eventfs directory 1061 * @ei: the event_inode returned by eventfs_create_events_dir(). 1062 * 1063 * This function removes the events main directory 1064 */ 1065 void eventfs_remove_events_dir(struct eventfs_inode *ei) 1066 { 1067 struct dentry *dentry; 1068 1069 dentry = ei->dentry; 1070 eventfs_remove_dir(ei); 1071 1072 /* 1073 * Matches the dget() done by tracefs_start_creating() 1074 * in eventfs_create_events_dir() when it the dentry was 1075 * created. In other words, it's a normal dentry that 1076 * sticks around while the other ei->dentry are created 1077 * and destroyed dynamically. 1078 */ 1079 dput(dentry); 1080 } 1081