1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * event_inode.c - part of tracefs, a pseudo file system for activating tracing 4 * 5 * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org> 6 * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com> 7 * Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org> 8 * 9 * eventfs is used to dynamically create inodes and dentries based on the 10 * meta data provided by the tracing system. 11 * 12 * eventfs stores the meta-data of files/dirs and holds off on creating 13 * inodes/dentries of the files. When accessed, the eventfs will create the 14 * inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up 15 * and delete the inodes/dentries when they are no longer referenced. 16 */ 17 #include <linux/fsnotify.h> 18 #include <linux/fs.h> 19 #include <linux/namei.h> 20 #include <linux/workqueue.h> 21 #include <linux/security.h> 22 #include <linux/tracefs.h> 23 #include <linux/kref.h> 24 #include <linux/delay.h> 25 #include "internal.h" 26 27 /* 28 * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access 29 * to the ei->dentry must be done under this mutex and after checking 30 * if ei->is_freed is not set. When ei->is_freed is set, the dentry 31 * is on its way to being freed after the last dput() is made on it. 32 */ 33 static DEFINE_MUTEX(eventfs_mutex); 34 35 /* 36 * The eventfs_inode (ei) itself is protected by SRCU. It is released from 37 * its parent's list and will have is_freed set (under eventfs_mutex). 38 * After the SRCU grace period is over and the last dput() is called 39 * the ei is freed. 40 */ 41 DEFINE_STATIC_SRCU(eventfs_srcu); 42 43 /* Mode is unsigned short, use the upper bits for flags */ 44 enum { 45 EVENTFS_SAVE_MODE = BIT(16), 46 EVENTFS_SAVE_UID = BIT(17), 47 EVENTFS_SAVE_GID = BIT(18), 48 }; 49 50 #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) 51 52 static struct dentry *eventfs_root_lookup(struct inode *dir, 53 struct dentry *dentry, 54 unsigned int flags); 55 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file); 56 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx); 57 static int eventfs_release(struct inode *inode, struct file *file); 58 59 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr) 60 { 61 unsigned int ia_valid = iattr->ia_valid; 62 63 if (ia_valid & ATTR_MODE) { 64 attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) | 65 (iattr->ia_mode & EVENTFS_MODE_MASK) | 66 EVENTFS_SAVE_MODE; 67 } 68 if (ia_valid & ATTR_UID) { 69 attr->mode |= EVENTFS_SAVE_UID; 70 attr->uid = iattr->ia_uid; 71 } 72 if (ia_valid & ATTR_GID) { 73 attr->mode |= EVENTFS_SAVE_GID; 74 attr->gid = iattr->ia_gid; 75 } 76 } 77 78 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, 79 struct iattr *iattr) 80 { 81 const struct eventfs_entry *entry; 82 struct eventfs_inode *ei; 83 const char *name; 84 int ret; 85 86 mutex_lock(&eventfs_mutex); 87 ei = dentry->d_fsdata; 88 if (ei->is_freed) { 89 /* Do not allow changes if the event is about to be removed. */ 90 mutex_unlock(&eventfs_mutex); 91 return -ENODEV; 92 } 93 94 /* Preallocate the children mode array if necessary */ 95 if (!(dentry->d_inode->i_mode & S_IFDIR)) { 96 if (!ei->entry_attrs) { 97 ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries, 98 GFP_NOFS); 99 if (!ei->entry_attrs) { 100 ret = -ENOMEM; 101 goto out; 102 } 103 } 104 } 105 106 ret = simple_setattr(idmap, dentry, iattr); 107 if (ret < 0) 108 goto out; 109 110 /* 111 * If this is a dir, then update the ei cache, only the file 112 * mode is saved in the ei->m_children, and the ownership is 113 * determined by the parent directory. 114 */ 115 if (dentry->d_inode->i_mode & S_IFDIR) { 116 update_attr(&ei->attr, iattr); 117 118 } else { 119 name = dentry->d_name.name; 120 121 for (int i = 0; i < ei->nr_entries; i++) { 122 entry = &ei->entries[i]; 123 if (strcmp(name, entry->name) == 0) { 124 update_attr(&ei->entry_attrs[i], iattr); 125 break; 126 } 127 } 128 } 129 out: 130 mutex_unlock(&eventfs_mutex); 131 return ret; 132 } 133 134 static const struct inode_operations eventfs_root_dir_inode_operations = { 135 .lookup = eventfs_root_lookup, 136 .setattr = eventfs_set_attr, 137 }; 138 139 static const struct inode_operations eventfs_file_inode_operations = { 140 .setattr = eventfs_set_attr, 141 }; 142 143 static const struct file_operations eventfs_file_operations = { 144 .open = dcache_dir_open_wrapper, 145 .read = generic_read_dir, 146 .iterate_shared = dcache_readdir_wrapper, 147 .llseek = generic_file_llseek, 148 .release = eventfs_release, 149 }; 150 151 static void update_inode_attr(struct inode *inode, struct eventfs_attr *attr, umode_t mode) 152 { 153 if (!attr) { 154 inode->i_mode = mode; 155 return; 156 } 157 158 if (attr->mode & EVENTFS_SAVE_MODE) 159 inode->i_mode = attr->mode & EVENTFS_MODE_MASK; 160 else 161 inode->i_mode = mode; 162 163 if (attr->mode & EVENTFS_SAVE_UID) 164 inode->i_uid = attr->uid; 165 166 if (attr->mode & EVENTFS_SAVE_GID) 167 inode->i_gid = attr->gid; 168 } 169 170 /** 171 * create_file - create a file in the tracefs filesystem 172 * @name: the name of the file to create. 173 * @mode: the permission that the file should have. 174 * @attr: saved attributes changed by user 175 * @parent: parent dentry for this file. 176 * @data: something that the caller will want to get to later on. 177 * @fop: struct file_operations that should be used for this file. 178 * 179 * This function creates a dentry that represents a file in the eventsfs_inode 180 * directory. The inode.i_private pointer will point to @data in the open() 181 * call. 182 */ 183 static struct dentry *create_file(const char *name, umode_t mode, 184 struct eventfs_attr *attr, 185 struct dentry *parent, void *data, 186 const struct file_operations *fop) 187 { 188 struct tracefs_inode *ti; 189 struct dentry *dentry; 190 struct inode *inode; 191 192 if (!(mode & S_IFMT)) 193 mode |= S_IFREG; 194 195 if (WARN_ON_ONCE(!S_ISREG(mode))) 196 return NULL; 197 198 WARN_ON_ONCE(!parent); 199 dentry = eventfs_start_creating(name, parent); 200 201 if (IS_ERR(dentry)) 202 return dentry; 203 204 inode = tracefs_get_inode(dentry->d_sb); 205 if (unlikely(!inode)) 206 return eventfs_failed_creating(dentry); 207 208 /* If the user updated the directory's attributes, use them */ 209 update_inode_attr(inode, attr, mode); 210 211 inode->i_op = &eventfs_file_inode_operations; 212 inode->i_fop = fop; 213 inode->i_private = data; 214 215 ti = get_tracefs(inode); 216 ti->flags |= TRACEFS_EVENT_INODE; 217 d_instantiate(dentry, inode); 218 fsnotify_create(dentry->d_parent->d_inode, dentry); 219 return eventfs_end_creating(dentry); 220 }; 221 222 /** 223 * create_dir - create a dir in the tracefs filesystem 224 * @ei: the eventfs_inode that represents the directory to create 225 * @parent: parent dentry for this file. 226 * 227 * This function will create a dentry for a directory represented by 228 * a eventfs_inode. 229 */ 230 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent) 231 { 232 struct tracefs_inode *ti; 233 struct dentry *dentry; 234 struct inode *inode; 235 236 dentry = eventfs_start_creating(ei->name, parent); 237 if (IS_ERR(dentry)) 238 return dentry; 239 240 inode = tracefs_get_inode(dentry->d_sb); 241 if (unlikely(!inode)) 242 return eventfs_failed_creating(dentry); 243 244 /* If the user updated the directory's attributes, use them */ 245 update_inode_attr(inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); 246 247 inode->i_op = &eventfs_root_dir_inode_operations; 248 inode->i_fop = &eventfs_file_operations; 249 250 ti = get_tracefs(inode); 251 ti->flags |= TRACEFS_EVENT_INODE; 252 253 inc_nlink(inode); 254 d_instantiate(dentry, inode); 255 inc_nlink(dentry->d_parent->d_inode); 256 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 257 return eventfs_end_creating(dentry); 258 } 259 260 static void free_ei(struct eventfs_inode *ei) 261 { 262 kfree_const(ei->name); 263 kfree(ei->d_children); 264 kfree(ei->entry_attrs); 265 kfree(ei); 266 } 267 268 /** 269 * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode 270 * @ti: the tracefs_inode of the dentry 271 * @dentry: dentry which has the reference to remove. 272 * 273 * Remove the association between a dentry from an eventfs_inode. 274 */ 275 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) 276 { 277 struct eventfs_inode *ei; 278 int i; 279 280 mutex_lock(&eventfs_mutex); 281 282 ei = dentry->d_fsdata; 283 if (!ei) 284 goto out; 285 286 /* This could belong to one of the files of the ei */ 287 if (ei->dentry != dentry) { 288 for (i = 0; i < ei->nr_entries; i++) { 289 if (ei->d_children[i] == dentry) 290 break; 291 } 292 if (WARN_ON_ONCE(i == ei->nr_entries)) 293 goto out; 294 ei->d_children[i] = NULL; 295 } else if (ei->is_freed) { 296 free_ei(ei); 297 } else { 298 ei->dentry = NULL; 299 } 300 301 dentry->d_fsdata = NULL; 302 out: 303 mutex_unlock(&eventfs_mutex); 304 } 305 306 /** 307 * create_file_dentry - create a dentry for a file of an eventfs_inode 308 * @ei: the eventfs_inode that the file will be created under 309 * @idx: the index into the d_children[] of the @ei 310 * @parent: The parent dentry of the created file. 311 * @name: The name of the file to create 312 * @mode: The mode of the file. 313 * @data: The data to use to set the inode of the file with on open() 314 * @fops: The fops of the file to be created. 315 * @lookup: If called by the lookup routine, in which case, dput() the created dentry. 316 * 317 * Create a dentry for a file of an eventfs_inode @ei and place it into the 318 * address located at @e_dentry. If the @e_dentry already has a dentry, then 319 * just do a dget() on it and return. Otherwise create the dentry and attach it. 320 */ 321 static struct dentry * 322 create_file_dentry(struct eventfs_inode *ei, int idx, 323 struct dentry *parent, const char *name, umode_t mode, void *data, 324 const struct file_operations *fops, bool lookup) 325 { 326 struct eventfs_attr *attr = NULL; 327 struct dentry **e_dentry = &ei->d_children[idx]; 328 struct dentry *dentry; 329 330 WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 331 332 mutex_lock(&eventfs_mutex); 333 if (ei->is_freed) { 334 mutex_unlock(&eventfs_mutex); 335 return NULL; 336 } 337 /* If the e_dentry already has a dentry, use it */ 338 if (*e_dentry) { 339 /* lookup does not need to up the ref count */ 340 if (!lookup) 341 dget(*e_dentry); 342 mutex_unlock(&eventfs_mutex); 343 return *e_dentry; 344 } 345 346 /* ei->entry_attrs are protected by SRCU */ 347 if (ei->entry_attrs) 348 attr = &ei->entry_attrs[idx]; 349 350 mutex_unlock(&eventfs_mutex); 351 352 dentry = create_file(name, mode, attr, parent, data, fops); 353 354 mutex_lock(&eventfs_mutex); 355 356 if (IS_ERR_OR_NULL(dentry)) { 357 /* 358 * When the mutex was released, something else could have 359 * created the dentry for this e_dentry. In which case 360 * use that one. 361 * 362 * If ei->is_freed is set, the e_dentry is currently on its 363 * way to being freed, don't return it. If e_dentry is NULL 364 * it means it was already freed. 365 */ 366 if (ei->is_freed) 367 dentry = NULL; 368 else 369 dentry = *e_dentry; 370 /* The lookup does not need to up the dentry refcount */ 371 if (dentry && !lookup) 372 dget(dentry); 373 mutex_unlock(&eventfs_mutex); 374 return dentry; 375 } 376 377 if (!*e_dentry && !ei->is_freed) { 378 *e_dentry = dentry; 379 dentry->d_fsdata = ei; 380 } else { 381 /* 382 * Should never happen unless we get here due to being freed. 383 * Otherwise it means two dentries exist with the same name. 384 */ 385 WARN_ON_ONCE(!ei->is_freed); 386 dentry = NULL; 387 } 388 mutex_unlock(&eventfs_mutex); 389 390 if (lookup) 391 dput(dentry); 392 393 return dentry; 394 } 395 396 /** 397 * eventfs_post_create_dir - post create dir routine 398 * @ei: eventfs_inode of recently created dir 399 * 400 * Map the meta-data of files within an eventfs dir to their parent dentry 401 */ 402 static void eventfs_post_create_dir(struct eventfs_inode *ei) 403 { 404 struct eventfs_inode *ei_child; 405 struct tracefs_inode *ti; 406 407 lockdep_assert_held(&eventfs_mutex); 408 409 /* srcu lock already held */ 410 /* fill parent-child relation */ 411 list_for_each_entry_srcu(ei_child, &ei->children, list, 412 srcu_read_lock_held(&eventfs_srcu)) { 413 ei_child->d_parent = ei->dentry; 414 } 415 416 ti = get_tracefs(ei->dentry->d_inode); 417 ti->private = ei; 418 } 419 420 /** 421 * create_dir_dentry - Create a directory dentry for the eventfs_inode 422 * @pei: The eventfs_inode parent of ei. 423 * @ei: The eventfs_inode to create the directory for 424 * @parent: The dentry of the parent of this directory 425 * @lookup: True if this is called by the lookup code 426 * 427 * This creates and attaches a directory dentry to the eventfs_inode @ei. 428 */ 429 static struct dentry * 430 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, 431 struct dentry *parent, bool lookup) 432 { 433 struct dentry *dentry = NULL; 434 435 WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 436 437 mutex_lock(&eventfs_mutex); 438 if (pei->is_freed || ei->is_freed) { 439 mutex_unlock(&eventfs_mutex); 440 return NULL; 441 } 442 if (ei->dentry) { 443 /* If the dentry already has a dentry, use it */ 444 dentry = ei->dentry; 445 /* lookup does not need to up the ref count */ 446 if (!lookup) 447 dget(dentry); 448 mutex_unlock(&eventfs_mutex); 449 return dentry; 450 } 451 mutex_unlock(&eventfs_mutex); 452 453 dentry = create_dir(ei, parent); 454 455 mutex_lock(&eventfs_mutex); 456 457 if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { 458 /* 459 * When the mutex was released, something else could have 460 * created the dentry for this e_dentry. In which case 461 * use that one. 462 * 463 * If ei->is_freed is set, the e_dentry is currently on its 464 * way to being freed. 465 */ 466 dentry = ei->dentry; 467 if (dentry && !lookup) 468 dget(dentry); 469 mutex_unlock(&eventfs_mutex); 470 return dentry; 471 } 472 473 if (!ei->dentry && !ei->is_freed) { 474 ei->dentry = dentry; 475 eventfs_post_create_dir(ei); 476 dentry->d_fsdata = ei; 477 } else { 478 /* 479 * Should never happen unless we get here due to being freed. 480 * Otherwise it means two dentries exist with the same name. 481 */ 482 WARN_ON_ONCE(!ei->is_freed); 483 dentry = NULL; 484 } 485 mutex_unlock(&eventfs_mutex); 486 487 if (lookup) 488 dput(dentry); 489 490 return dentry; 491 } 492 493 /** 494 * eventfs_root_lookup - lookup routine to create file/dir 495 * @dir: in which a lookup is being done 496 * @dentry: file/dir dentry 497 * @flags: Just passed to simple_lookup() 498 * 499 * Used to create dynamic file/dir with-in @dir, search with-in @ei 500 * list, if @dentry found go ahead and create the file/dir 501 */ 502 503 static struct dentry *eventfs_root_lookup(struct inode *dir, 504 struct dentry *dentry, 505 unsigned int flags) 506 { 507 const struct file_operations *fops; 508 const struct eventfs_entry *entry; 509 struct eventfs_inode *ei_child; 510 struct tracefs_inode *ti; 511 struct eventfs_inode *ei; 512 struct dentry *ei_dentry = NULL; 513 struct dentry *ret = NULL; 514 const char *name = dentry->d_name.name; 515 bool created = false; 516 umode_t mode; 517 void *data; 518 int idx; 519 int i; 520 int r; 521 522 ti = get_tracefs(dir); 523 if (!(ti->flags & TRACEFS_EVENT_INODE)) 524 return NULL; 525 526 /* Grab srcu to prevent the ei from going away */ 527 idx = srcu_read_lock(&eventfs_srcu); 528 529 /* 530 * Grab the eventfs_mutex to consistent value from ti->private. 531 * This s 532 */ 533 mutex_lock(&eventfs_mutex); 534 ei = READ_ONCE(ti->private); 535 if (ei && !ei->is_freed) 536 ei_dentry = READ_ONCE(ei->dentry); 537 mutex_unlock(&eventfs_mutex); 538 539 if (!ei || !ei_dentry) 540 goto out; 541 542 data = ei->data; 543 544 list_for_each_entry_srcu(ei_child, &ei->children, list, 545 srcu_read_lock_held(&eventfs_srcu)) { 546 if (strcmp(ei_child->name, name) != 0) 547 continue; 548 ret = simple_lookup(dir, dentry, flags); 549 if (IS_ERR(ret)) 550 goto out; 551 create_dir_dentry(ei, ei_child, ei_dentry, true); 552 created = true; 553 break; 554 } 555 556 if (created) 557 goto out; 558 559 for (i = 0; i < ei->nr_entries; i++) { 560 entry = &ei->entries[i]; 561 if (strcmp(name, entry->name) == 0) { 562 void *cdata = data; 563 mutex_lock(&eventfs_mutex); 564 /* If ei->is_freed, then the event itself may be too */ 565 if (!ei->is_freed) 566 r = entry->callback(name, &mode, &cdata, &fops); 567 else 568 r = -1; 569 mutex_unlock(&eventfs_mutex); 570 if (r <= 0) 571 continue; 572 ret = simple_lookup(dir, dentry, flags); 573 if (IS_ERR(ret)) 574 goto out; 575 create_file_dentry(ei, i, ei_dentry, name, mode, cdata, 576 fops, true); 577 break; 578 } 579 } 580 out: 581 srcu_read_unlock(&eventfs_srcu, idx); 582 return ret; 583 } 584 585 struct dentry_list { 586 void *cursor; 587 struct dentry **dentries; 588 }; 589 590 /** 591 * eventfs_release - called to release eventfs file/dir 592 * @inode: inode to be released 593 * @file: file to be released (not used) 594 */ 595 static int eventfs_release(struct inode *inode, struct file *file) 596 { 597 struct tracefs_inode *ti; 598 struct dentry_list *dlist = file->private_data; 599 void *cursor; 600 int i; 601 602 ti = get_tracefs(inode); 603 if (!(ti->flags & TRACEFS_EVENT_INODE)) 604 return -EINVAL; 605 606 if (WARN_ON_ONCE(!dlist)) 607 return -EINVAL; 608 609 for (i = 0; dlist->dentries && dlist->dentries[i]; i++) { 610 dput(dlist->dentries[i]); 611 } 612 613 cursor = dlist->cursor; 614 kfree(dlist->dentries); 615 kfree(dlist); 616 file->private_data = cursor; 617 return dcache_dir_close(inode, file); 618 } 619 620 static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt) 621 { 622 struct dentry **tmp; 623 624 tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS); 625 if (!tmp) 626 return -1; 627 tmp[cnt] = d; 628 tmp[cnt + 1] = NULL; 629 *dentries = tmp; 630 return 0; 631 } 632 633 /** 634 * dcache_dir_open_wrapper - eventfs open wrapper 635 * @inode: not used 636 * @file: dir to be opened (to create it's children) 637 * 638 * Used to dynamic create file/dir with-in @file, all the 639 * file/dir will be created. If already created then references 640 * will be increased 641 */ 642 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file) 643 { 644 const struct file_operations *fops; 645 const struct eventfs_entry *entry; 646 struct eventfs_inode *ei_child; 647 struct tracefs_inode *ti; 648 struct eventfs_inode *ei; 649 struct dentry_list *dlist; 650 struct dentry **dentries = NULL; 651 struct dentry *parent = file_dentry(file); 652 struct dentry *d; 653 struct inode *f_inode = file_inode(file); 654 const char *name = parent->d_name.name; 655 umode_t mode; 656 void *data; 657 int cnt = 0; 658 int idx; 659 int ret; 660 int i; 661 int r; 662 663 ti = get_tracefs(f_inode); 664 if (!(ti->flags & TRACEFS_EVENT_INODE)) 665 return -EINVAL; 666 667 if (WARN_ON_ONCE(file->private_data)) 668 return -EINVAL; 669 670 idx = srcu_read_lock(&eventfs_srcu); 671 672 mutex_lock(&eventfs_mutex); 673 ei = READ_ONCE(ti->private); 674 mutex_unlock(&eventfs_mutex); 675 676 if (!ei) { 677 srcu_read_unlock(&eventfs_srcu, idx); 678 return -EINVAL; 679 } 680 681 682 data = ei->data; 683 684 dlist = kmalloc(sizeof(*dlist), GFP_KERNEL); 685 if (!dlist) { 686 srcu_read_unlock(&eventfs_srcu, idx); 687 return -ENOMEM; 688 } 689 690 inode_lock(parent->d_inode); 691 list_for_each_entry_srcu(ei_child, &ei->children, list, 692 srcu_read_lock_held(&eventfs_srcu)) { 693 d = create_dir_dentry(ei, ei_child, parent, false); 694 if (d) { 695 ret = add_dentries(&dentries, d, cnt); 696 if (ret < 0) 697 break; 698 cnt++; 699 } 700 } 701 702 for (i = 0; i < ei->nr_entries; i++) { 703 void *cdata = data; 704 entry = &ei->entries[i]; 705 name = entry->name; 706 mutex_lock(&eventfs_mutex); 707 /* If ei->is_freed, then the event itself may be too */ 708 if (!ei->is_freed) 709 r = entry->callback(name, &mode, &cdata, &fops); 710 else 711 r = -1; 712 mutex_unlock(&eventfs_mutex); 713 if (r <= 0) 714 continue; 715 d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false); 716 if (d) { 717 ret = add_dentries(&dentries, d, cnt); 718 if (ret < 0) 719 break; 720 cnt++; 721 } 722 } 723 inode_unlock(parent->d_inode); 724 srcu_read_unlock(&eventfs_srcu, idx); 725 ret = dcache_dir_open(inode, file); 726 727 /* 728 * dcache_dir_open() sets file->private_data to a dentry cursor. 729 * Need to save that but also save all the dentries that were 730 * opened by this function. 731 */ 732 dlist->cursor = file->private_data; 733 dlist->dentries = dentries; 734 file->private_data = dlist; 735 return ret; 736 } 737 738 /* 739 * This just sets the file->private_data back to the cursor and back. 740 */ 741 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx) 742 { 743 struct dentry_list *dlist = file->private_data; 744 int ret; 745 746 file->private_data = dlist->cursor; 747 ret = dcache_readdir(file, ctx); 748 dlist->cursor = file->private_data; 749 file->private_data = dlist; 750 return ret; 751 } 752 753 /** 754 * eventfs_create_dir - Create the eventfs_inode for this directory 755 * @name: The name of the directory to create. 756 * @parent: The eventfs_inode of the parent directory. 757 * @entries: A list of entries that represent the files under this directory 758 * @size: The number of @entries 759 * @data: The default data to pass to the files (an entry may override it). 760 * 761 * This function creates the descriptor to represent a directory in the 762 * eventfs. This descriptor is an eventfs_inode, and it is returned to be 763 * used to create other children underneath. 764 * 765 * The @entries is an array of eventfs_entry structures which has: 766 * const char *name 767 * eventfs_callback callback; 768 * 769 * The name is the name of the file, and the callback is a pointer to a function 770 * that will be called when the file is reference (either by lookup or by 771 * reading a directory). The callback is of the prototype: 772 * 773 * int callback(const char *name, umode_t *mode, void **data, 774 * const struct file_operations **fops); 775 * 776 * When a file needs to be created, this callback will be called with 777 * name = the name of the file being created (so that the same callback 778 * may be used for multiple files). 779 * mode = a place to set the file's mode 780 * data = A pointer to @data, and the callback may replace it, which will 781 * cause the file created to pass the new data to the open() call. 782 * fops = the fops to use for the created file. 783 * 784 * NB. @callback is called while holding internal locks of the eventfs 785 * system. The callback must not call any code that might also call into 786 * the tracefs or eventfs system or it will risk creating a deadlock. 787 */ 788 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, 789 const struct eventfs_entry *entries, 790 int size, void *data) 791 { 792 struct eventfs_inode *ei; 793 794 if (!parent) 795 return ERR_PTR(-EINVAL); 796 797 ei = kzalloc(sizeof(*ei), GFP_KERNEL); 798 if (!ei) 799 return ERR_PTR(-ENOMEM); 800 801 ei->name = kstrdup_const(name, GFP_KERNEL); 802 if (!ei->name) { 803 kfree(ei); 804 return ERR_PTR(-ENOMEM); 805 } 806 807 if (size) { 808 ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); 809 if (!ei->d_children) { 810 kfree_const(ei->name); 811 kfree(ei); 812 return ERR_PTR(-ENOMEM); 813 } 814 } 815 816 ei->entries = entries; 817 ei->nr_entries = size; 818 ei->data = data; 819 INIT_LIST_HEAD(&ei->children); 820 INIT_LIST_HEAD(&ei->list); 821 822 mutex_lock(&eventfs_mutex); 823 if (!parent->is_freed) { 824 list_add_tail(&ei->list, &parent->children); 825 ei->d_parent = parent->dentry; 826 } 827 mutex_unlock(&eventfs_mutex); 828 829 /* Was the parent freed? */ 830 if (list_empty(&ei->list)) { 831 free_ei(ei); 832 ei = NULL; 833 } 834 return ei; 835 } 836 837 /** 838 * eventfs_create_events_dir - create the top level events directory 839 * @name: The name of the top level directory to create. 840 * @parent: Parent dentry for this file in the tracefs directory. 841 * @entries: A list of entries that represent the files under this directory 842 * @size: The number of @entries 843 * @data: The default data to pass to the files (an entry may override it). 844 * 845 * This function creates the top of the trace event directory. 846 * 847 * See eventfs_create_dir() for use of @entries. 848 */ 849 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, 850 const struct eventfs_entry *entries, 851 int size, void *data) 852 { 853 struct dentry *dentry = tracefs_start_creating(name, parent); 854 struct eventfs_inode *ei; 855 struct tracefs_inode *ti; 856 struct inode *inode; 857 858 if (security_locked_down(LOCKDOWN_TRACEFS)) 859 return NULL; 860 861 if (IS_ERR(dentry)) 862 return ERR_CAST(dentry); 863 864 ei = kzalloc(sizeof(*ei), GFP_KERNEL); 865 if (!ei) 866 goto fail_ei; 867 868 inode = tracefs_get_inode(dentry->d_sb); 869 if (unlikely(!inode)) 870 goto fail; 871 872 if (size) { 873 ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL); 874 if (!ei->d_children) 875 goto fail; 876 } 877 878 ei->dentry = dentry; 879 ei->entries = entries; 880 ei->nr_entries = size; 881 ei->data = data; 882 ei->name = kstrdup_const(name, GFP_KERNEL); 883 if (!ei->name) 884 goto fail; 885 886 INIT_LIST_HEAD(&ei->children); 887 INIT_LIST_HEAD(&ei->list); 888 889 ti = get_tracefs(inode); 890 ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE; 891 ti->private = ei; 892 893 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; 894 inode->i_op = &eventfs_root_dir_inode_operations; 895 inode->i_fop = &eventfs_file_operations; 896 897 dentry->d_fsdata = ei; 898 899 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 900 inc_nlink(inode); 901 d_instantiate(dentry, inode); 902 inc_nlink(dentry->d_parent->d_inode); 903 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 904 tracefs_end_creating(dentry); 905 906 return ei; 907 908 fail: 909 kfree(ei->d_children); 910 kfree(ei); 911 fail_ei: 912 tracefs_failed_creating(dentry); 913 return ERR_PTR(-ENOMEM); 914 } 915 916 static LLIST_HEAD(free_list); 917 918 static void eventfs_workfn(struct work_struct *work) 919 { 920 struct eventfs_inode *ei, *tmp; 921 struct llist_node *llnode; 922 923 llnode = llist_del_all(&free_list); 924 llist_for_each_entry_safe(ei, tmp, llnode, llist) { 925 /* This dput() matches the dget() from unhook_dentry() */ 926 for (int i = 0; i < ei->nr_entries; i++) { 927 if (ei->d_children[i]) 928 dput(ei->d_children[i]); 929 } 930 /* This should only get here if it had a dentry */ 931 if (!WARN_ON_ONCE(!ei->dentry)) 932 dput(ei->dentry); 933 } 934 } 935 936 static DECLARE_WORK(eventfs_work, eventfs_workfn); 937 938 static void free_rcu_ei(struct rcu_head *head) 939 { 940 struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu); 941 942 if (ei->dentry) { 943 /* Do not free the ei until all references of dentry are gone */ 944 if (llist_add(&ei->llist, &free_list)) 945 queue_work(system_unbound_wq, &eventfs_work); 946 return; 947 } 948 949 /* If the ei doesn't have a dentry, neither should its children */ 950 for (int i = 0; i < ei->nr_entries; i++) { 951 WARN_ON_ONCE(ei->d_children[i]); 952 } 953 954 free_ei(ei); 955 } 956 957 static void unhook_dentry(struct dentry *dentry) 958 { 959 if (!dentry) 960 return; 961 /* 962 * Need to add a reference to the dentry that is expected by 963 * simple_recursive_removal(), which will include a dput(). 964 */ 965 dget(dentry); 966 967 /* 968 * Also add a reference for the dput() in eventfs_workfn(). 969 * That is required as that dput() will free the ei after 970 * the SRCU grace period is over. 971 */ 972 dget(dentry); 973 } 974 975 /** 976 * eventfs_remove_rec - remove eventfs dir or file from list 977 * @ei: eventfs_inode to be removed. 978 * @level: prevent recursion from going more than 3 levels deep. 979 * 980 * This function recursively removes eventfs_inodes which 981 * contains info of files and/or directories. 982 */ 983 static void eventfs_remove_rec(struct eventfs_inode *ei, int level) 984 { 985 struct eventfs_inode *ei_child; 986 987 if (!ei) 988 return; 989 /* 990 * Check recursion depth. It should never be greater than 3: 991 * 0 - events/ 992 * 1 - events/group/ 993 * 2 - events/group/event/ 994 * 3 - events/group/event/file 995 */ 996 if (WARN_ON_ONCE(level > 3)) 997 return; 998 999 /* search for nested folders or files */ 1000 list_for_each_entry_srcu(ei_child, &ei->children, list, 1001 lockdep_is_held(&eventfs_mutex)) { 1002 /* Children only have dentry if parent does */ 1003 WARN_ON_ONCE(ei_child->dentry && !ei->dentry); 1004 eventfs_remove_rec(ei_child, level + 1); 1005 } 1006 1007 1008 ei->is_freed = 1; 1009 1010 for (int i = 0; i < ei->nr_entries; i++) { 1011 if (ei->d_children[i]) { 1012 /* Children only have dentry if parent does */ 1013 WARN_ON_ONCE(!ei->dentry); 1014 unhook_dentry(ei->d_children[i]); 1015 } 1016 } 1017 1018 unhook_dentry(ei->dentry); 1019 1020 list_del_rcu(&ei->list); 1021 call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei); 1022 } 1023 1024 /** 1025 * eventfs_remove_dir - remove eventfs dir or file from list 1026 * @ei: eventfs_inode to be removed. 1027 * 1028 * This function acquire the eventfs_mutex lock and call eventfs_remove_rec() 1029 */ 1030 void eventfs_remove_dir(struct eventfs_inode *ei) 1031 { 1032 struct dentry *dentry; 1033 1034 if (!ei) 1035 return; 1036 1037 mutex_lock(&eventfs_mutex); 1038 dentry = ei->dentry; 1039 eventfs_remove_rec(ei, 0); 1040 mutex_unlock(&eventfs_mutex); 1041 1042 /* 1043 * If any of the ei children has a dentry, then the ei itself 1044 * must have a dentry. 1045 */ 1046 if (dentry) 1047 simple_recursive_removal(dentry, NULL); 1048 } 1049 1050 /** 1051 * eventfs_remove_events_dir - remove the top level eventfs directory 1052 * @ei: the event_inode returned by eventfs_create_events_dir(). 1053 * 1054 * This function removes the events main directory 1055 */ 1056 void eventfs_remove_events_dir(struct eventfs_inode *ei) 1057 { 1058 struct dentry *dentry; 1059 1060 dentry = ei->dentry; 1061 eventfs_remove_dir(ei); 1062 1063 /* 1064 * Matches the dget() done by tracefs_start_creating() 1065 * in eventfs_create_events_dir() when it the dentry was 1066 * created. In other words, it's a normal dentry that 1067 * sticks around while the other ei->dentry are created 1068 * and destroyed dynamically. 1069 */ 1070 dput(dentry); 1071 } 1072