1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * event_inode.c - part of tracefs, a pseudo file system for activating tracing 4 * 5 * Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org> 6 * Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com> 7 * Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org> 8 * 9 * eventfs is used to dynamically create inodes and dentries based on the 10 * meta data provided by the tracing system. 11 * 12 * eventfs stores the meta-data of files/dirs and holds off on creating 13 * inodes/dentries of the files. When accessed, the eventfs will create the 14 * inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up 15 * and delete the inodes/dentries when they are no longer referenced. 16 */ 17 #include <linux/fsnotify.h> 18 #include <linux/fs.h> 19 #include <linux/namei.h> 20 #include <linux/workqueue.h> 21 #include <linux/security.h> 22 #include <linux/tracefs.h> 23 #include <linux/kref.h> 24 #include <linux/delay.h> 25 #include "internal.h" 26 27 /* 28 * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access 29 * to the ei->dentry must be done under this mutex and after checking 30 * if ei->is_freed is not set. When ei->is_freed is set, the dentry 31 * is on its way to being freed after the last dput() is made on it. 32 */ 33 static DEFINE_MUTEX(eventfs_mutex); 34 35 /* Choose something "unique" ;-) */ 36 #define EVENTFS_FILE_INODE_INO 0x12c4e37 37 38 /* Just try to make something consistent and unique */ 39 static int eventfs_dir_ino(struct eventfs_inode *ei) 40 { 41 if (!ei->ino) 42 ei->ino = get_next_ino(); 43 44 return ei->ino; 45 } 46 47 /* 48 * The eventfs_inode (ei) itself is protected by SRCU. It is released from 49 * its parent's list and will have is_freed set (under eventfs_mutex). 50 * After the SRCU grace period is over and the last dput() is called 51 * the ei is freed. 52 */ 53 DEFINE_STATIC_SRCU(eventfs_srcu); 54 55 /* Mode is unsigned short, use the upper bits for flags */ 56 enum { 57 EVENTFS_SAVE_MODE = BIT(16), 58 EVENTFS_SAVE_UID = BIT(17), 59 EVENTFS_SAVE_GID = BIT(18), 60 EVENTFS_TOPLEVEL = BIT(19), 61 }; 62 63 #define EVENTFS_MODE_MASK (EVENTFS_SAVE_MODE - 1) 64 65 static struct dentry *eventfs_root_lookup(struct inode *dir, 66 struct dentry *dentry, 67 unsigned int flags); 68 static int eventfs_iterate(struct file *file, struct dir_context *ctx); 69 70 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr) 71 { 72 unsigned int ia_valid = iattr->ia_valid; 73 74 if (ia_valid & ATTR_MODE) { 75 attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) | 76 (iattr->ia_mode & EVENTFS_MODE_MASK) | 77 EVENTFS_SAVE_MODE; 78 } 79 if (ia_valid & ATTR_UID) { 80 attr->mode |= EVENTFS_SAVE_UID; 81 attr->uid = iattr->ia_uid; 82 } 83 if (ia_valid & ATTR_GID) { 84 attr->mode |= EVENTFS_SAVE_GID; 85 attr->gid = iattr->ia_gid; 86 } 87 } 88 89 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, 90 struct iattr *iattr) 91 { 92 const struct eventfs_entry *entry; 93 struct eventfs_inode *ei; 94 const char *name; 95 int ret; 96 97 mutex_lock(&eventfs_mutex); 98 ei = dentry->d_fsdata; 99 if (ei->is_freed) { 100 /* Do not allow changes if the event is about to be removed. */ 101 mutex_unlock(&eventfs_mutex); 102 return -ENODEV; 103 } 104 105 /* Preallocate the children mode array if necessary */ 106 if (!(dentry->d_inode->i_mode & S_IFDIR)) { 107 if (!ei->entry_attrs) { 108 ei->entry_attrs = kcalloc(ei->nr_entries, sizeof(*ei->entry_attrs), 109 GFP_NOFS); 110 if (!ei->entry_attrs) { 111 ret = -ENOMEM; 112 goto out; 113 } 114 } 115 } 116 117 ret = simple_setattr(idmap, dentry, iattr); 118 if (ret < 0) 119 goto out; 120 121 /* 122 * If this is a dir, then update the ei cache, only the file 123 * mode is saved in the ei->m_children, and the ownership is 124 * determined by the parent directory. 125 */ 126 if (dentry->d_inode->i_mode & S_IFDIR) { 127 /* 128 * The events directory dentry is never freed, unless its 129 * part of an instance that is deleted. It's attr is the 130 * default for its child files and directories. 131 * Do not update it. It's not used for its own mode or ownership. 132 */ 133 if (ei->is_events) { 134 /* But it still needs to know if it was modified */ 135 if (iattr->ia_valid & ATTR_UID) 136 ei->attr.mode |= EVENTFS_SAVE_UID; 137 if (iattr->ia_valid & ATTR_GID) 138 ei->attr.mode |= EVENTFS_SAVE_GID; 139 } else { 140 update_attr(&ei->attr, iattr); 141 } 142 143 } else { 144 name = dentry->d_name.name; 145 146 for (int i = 0; i < ei->nr_entries; i++) { 147 entry = &ei->entries[i]; 148 if (strcmp(name, entry->name) == 0) { 149 update_attr(&ei->entry_attrs[i], iattr); 150 break; 151 } 152 } 153 } 154 out: 155 mutex_unlock(&eventfs_mutex); 156 return ret; 157 } 158 159 static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry) 160 { 161 struct inode *inode; 162 163 /* Only update if the "events" was on the top level */ 164 if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL)) 165 return; 166 167 /* Get the tracefs root inode. */ 168 inode = d_inode(dentry->d_sb->s_root); 169 ei->attr.uid = inode->i_uid; 170 ei->attr.gid = inode->i_gid; 171 } 172 173 static void set_top_events_ownership(struct inode *inode) 174 { 175 struct tracefs_inode *ti = get_tracefs(inode); 176 struct eventfs_inode *ei = ti->private; 177 struct dentry *dentry; 178 179 /* The top events directory doesn't get automatically updated */ 180 if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL)) 181 return; 182 183 dentry = ei->dentry; 184 185 update_top_events_attr(ei, dentry); 186 187 if (!(ei->attr.mode & EVENTFS_SAVE_UID)) 188 inode->i_uid = ei->attr.uid; 189 190 if (!(ei->attr.mode & EVENTFS_SAVE_GID)) 191 inode->i_gid = ei->attr.gid; 192 } 193 194 static int eventfs_get_attr(struct mnt_idmap *idmap, 195 const struct path *path, struct kstat *stat, 196 u32 request_mask, unsigned int flags) 197 { 198 struct dentry *dentry = path->dentry; 199 struct inode *inode = d_backing_inode(dentry); 200 201 set_top_events_ownership(inode); 202 203 generic_fillattr(idmap, request_mask, inode, stat); 204 return 0; 205 } 206 207 static int eventfs_permission(struct mnt_idmap *idmap, 208 struct inode *inode, int mask) 209 { 210 set_top_events_ownership(inode); 211 return generic_permission(idmap, inode, mask); 212 } 213 214 static const struct inode_operations eventfs_root_dir_inode_operations = { 215 .lookup = eventfs_root_lookup, 216 .setattr = eventfs_set_attr, 217 .getattr = eventfs_get_attr, 218 .permission = eventfs_permission, 219 }; 220 221 static const struct inode_operations eventfs_file_inode_operations = { 222 .setattr = eventfs_set_attr, 223 }; 224 225 static const struct file_operations eventfs_file_operations = { 226 .read = generic_read_dir, 227 .iterate_shared = eventfs_iterate, 228 .llseek = generic_file_llseek, 229 }; 230 231 /* Return the evenfs_inode of the "events" directory */ 232 static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) 233 { 234 struct eventfs_inode *ei; 235 236 mutex_lock(&eventfs_mutex); 237 do { 238 /* The parent always has an ei, except for events itself */ 239 ei = dentry->d_parent->d_fsdata; 240 241 /* 242 * If the ei is being freed, the ownership of the children 243 * doesn't matter. 244 */ 245 if (ei->is_freed) { 246 ei = NULL; 247 break; 248 } 249 250 dentry = ei->dentry; 251 } while (!ei->is_events); 252 mutex_unlock(&eventfs_mutex); 253 254 update_top_events_attr(ei, dentry); 255 256 return ei; 257 } 258 259 static void update_inode_attr(struct dentry *dentry, struct inode *inode, 260 struct eventfs_attr *attr, umode_t mode) 261 { 262 struct eventfs_inode *events_ei = eventfs_find_events(dentry); 263 264 if (!events_ei) 265 return; 266 267 inode->i_mode = mode; 268 inode->i_uid = events_ei->attr.uid; 269 inode->i_gid = events_ei->attr.gid; 270 271 if (!attr) 272 return; 273 274 if (attr->mode & EVENTFS_SAVE_MODE) 275 inode->i_mode = attr->mode & EVENTFS_MODE_MASK; 276 277 if (attr->mode & EVENTFS_SAVE_UID) 278 inode->i_uid = attr->uid; 279 280 if (attr->mode & EVENTFS_SAVE_GID) 281 inode->i_gid = attr->gid; 282 } 283 284 /** 285 * create_file - create a file in the tracefs filesystem 286 * @name: the name of the file to create. 287 * @mode: the permission that the file should have. 288 * @attr: saved attributes changed by user 289 * @parent: parent dentry for this file. 290 * @data: something that the caller will want to get to later on. 291 * @fop: struct file_operations that should be used for this file. 292 * 293 * This function creates a dentry that represents a file in the eventsfs_inode 294 * directory. The inode.i_private pointer will point to @data in the open() 295 * call. 296 */ 297 static struct dentry *create_file(const char *name, umode_t mode, 298 struct eventfs_attr *attr, 299 struct dentry *parent, void *data, 300 const struct file_operations *fop) 301 { 302 struct tracefs_inode *ti; 303 struct dentry *dentry; 304 struct inode *inode; 305 306 if (!(mode & S_IFMT)) 307 mode |= S_IFREG; 308 309 if (WARN_ON_ONCE(!S_ISREG(mode))) 310 return NULL; 311 312 WARN_ON_ONCE(!parent); 313 dentry = eventfs_start_creating(name, parent); 314 315 if (IS_ERR(dentry)) 316 return dentry; 317 318 inode = tracefs_get_inode(dentry->d_sb); 319 if (unlikely(!inode)) 320 return eventfs_failed_creating(dentry); 321 322 /* If the user updated the directory's attributes, use them */ 323 update_inode_attr(dentry, inode, attr, mode); 324 325 inode->i_op = &eventfs_file_inode_operations; 326 inode->i_fop = fop; 327 inode->i_private = data; 328 329 /* All files will have the same inode number */ 330 inode->i_ino = EVENTFS_FILE_INODE_INO; 331 332 ti = get_tracefs(inode); 333 ti->flags |= TRACEFS_EVENT_INODE; 334 d_instantiate(dentry, inode); 335 fsnotify_create(dentry->d_parent->d_inode, dentry); 336 return eventfs_end_creating(dentry); 337 }; 338 339 /** 340 * create_dir - create a dir in the tracefs filesystem 341 * @ei: the eventfs_inode that represents the directory to create 342 * @parent: parent dentry for this file. 343 * 344 * This function will create a dentry for a directory represented by 345 * a eventfs_inode. 346 */ 347 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent) 348 { 349 struct tracefs_inode *ti; 350 struct dentry *dentry; 351 struct inode *inode; 352 353 dentry = eventfs_start_creating(ei->name, parent); 354 if (IS_ERR(dentry)) 355 return dentry; 356 357 inode = tracefs_get_inode(dentry->d_sb); 358 if (unlikely(!inode)) 359 return eventfs_failed_creating(dentry); 360 361 /* If the user updated the directory's attributes, use them */ 362 update_inode_attr(dentry, inode, &ei->attr, 363 S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO); 364 365 inode->i_op = &eventfs_root_dir_inode_operations; 366 inode->i_fop = &eventfs_file_operations; 367 368 /* All directories will have the same inode number */ 369 inode->i_ino = eventfs_dir_ino(ei); 370 371 ti = get_tracefs(inode); 372 ti->flags |= TRACEFS_EVENT_INODE; 373 374 inc_nlink(inode); 375 d_instantiate(dentry, inode); 376 inc_nlink(dentry->d_parent->d_inode); 377 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 378 return eventfs_end_creating(dentry); 379 } 380 381 static void free_ei(struct eventfs_inode *ei) 382 { 383 kfree_const(ei->name); 384 kfree(ei->d_children); 385 kfree(ei->entry_attrs); 386 kfree(ei); 387 } 388 389 /** 390 * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode 391 * @ti: the tracefs_inode of the dentry 392 * @dentry: dentry which has the reference to remove. 393 * 394 * Remove the association between a dentry from an eventfs_inode. 395 */ 396 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry) 397 { 398 struct eventfs_inode *ei; 399 int i; 400 401 mutex_lock(&eventfs_mutex); 402 403 ei = dentry->d_fsdata; 404 if (!ei) 405 goto out; 406 407 /* This could belong to one of the files of the ei */ 408 if (ei->dentry != dentry) { 409 for (i = 0; i < ei->nr_entries; i++) { 410 if (ei->d_children[i] == dentry) 411 break; 412 } 413 if (WARN_ON_ONCE(i == ei->nr_entries)) 414 goto out; 415 ei->d_children[i] = NULL; 416 } else if (ei->is_freed) { 417 free_ei(ei); 418 } else { 419 ei->dentry = NULL; 420 } 421 422 dentry->d_fsdata = NULL; 423 out: 424 mutex_unlock(&eventfs_mutex); 425 } 426 427 /** 428 * create_file_dentry - create a dentry for a file of an eventfs_inode 429 * @ei: the eventfs_inode that the file will be created under 430 * @idx: the index into the d_children[] of the @ei 431 * @parent: The parent dentry of the created file. 432 * @name: The name of the file to create 433 * @mode: The mode of the file. 434 * @data: The data to use to set the inode of the file with on open() 435 * @fops: The fops of the file to be created. 436 * 437 * Create a dentry for a file of an eventfs_inode @ei and place it into the 438 * address located at @e_dentry. 439 */ 440 static struct dentry * 441 create_file_dentry(struct eventfs_inode *ei, int idx, 442 struct dentry *parent, const char *name, umode_t mode, void *data, 443 const struct file_operations *fops) 444 { 445 struct eventfs_attr *attr = NULL; 446 struct dentry **e_dentry = &ei->d_children[idx]; 447 struct dentry *dentry; 448 449 WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 450 451 mutex_lock(&eventfs_mutex); 452 if (ei->is_freed) { 453 mutex_unlock(&eventfs_mutex); 454 return NULL; 455 } 456 /* If the e_dentry already has a dentry, use it */ 457 if (*e_dentry) { 458 dget(*e_dentry); 459 mutex_unlock(&eventfs_mutex); 460 return *e_dentry; 461 } 462 463 /* ei->entry_attrs are protected by SRCU */ 464 if (ei->entry_attrs) 465 attr = &ei->entry_attrs[idx]; 466 467 mutex_unlock(&eventfs_mutex); 468 469 dentry = create_file(name, mode, attr, parent, data, fops); 470 471 mutex_lock(&eventfs_mutex); 472 473 if (IS_ERR_OR_NULL(dentry)) { 474 /* 475 * When the mutex was released, something else could have 476 * created the dentry for this e_dentry. In which case 477 * use that one. 478 * 479 * If ei->is_freed is set, the e_dentry is currently on its 480 * way to being freed, don't return it. If e_dentry is NULL 481 * it means it was already freed. 482 */ 483 if (ei->is_freed) { 484 dentry = NULL; 485 } else { 486 dentry = *e_dentry; 487 dget(dentry); 488 } 489 mutex_unlock(&eventfs_mutex); 490 return dentry; 491 } 492 493 if (!*e_dentry && !ei->is_freed) { 494 *e_dentry = dentry; 495 dentry->d_fsdata = ei; 496 } else { 497 /* 498 * Should never happen unless we get here due to being freed. 499 * Otherwise it means two dentries exist with the same name. 500 */ 501 WARN_ON_ONCE(!ei->is_freed); 502 dentry = NULL; 503 } 504 mutex_unlock(&eventfs_mutex); 505 506 return dentry; 507 } 508 509 /** 510 * eventfs_post_create_dir - post create dir routine 511 * @ei: eventfs_inode of recently created dir 512 * 513 * Map the meta-data of files within an eventfs dir to their parent dentry 514 */ 515 static void eventfs_post_create_dir(struct eventfs_inode *ei) 516 { 517 struct eventfs_inode *ei_child; 518 struct tracefs_inode *ti; 519 520 lockdep_assert_held(&eventfs_mutex); 521 522 /* srcu lock already held */ 523 /* fill parent-child relation */ 524 list_for_each_entry_srcu(ei_child, &ei->children, list, 525 srcu_read_lock_held(&eventfs_srcu)) { 526 ei_child->d_parent = ei->dentry; 527 } 528 529 ti = get_tracefs(ei->dentry->d_inode); 530 ti->private = ei; 531 } 532 533 /** 534 * create_dir_dentry - Create a directory dentry for the eventfs_inode 535 * @pei: The eventfs_inode parent of ei. 536 * @ei: The eventfs_inode to create the directory for 537 * @parent: The dentry of the parent of this directory 538 * 539 * This creates and attaches a directory dentry to the eventfs_inode @ei. 540 */ 541 static struct dentry * 542 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei, 543 struct dentry *parent) 544 { 545 struct dentry *dentry = NULL; 546 547 WARN_ON_ONCE(!inode_is_locked(parent->d_inode)); 548 549 mutex_lock(&eventfs_mutex); 550 if (pei->is_freed || ei->is_freed) { 551 mutex_unlock(&eventfs_mutex); 552 return NULL; 553 } 554 if (ei->dentry) { 555 /* If the eventfs_inode already has a dentry, use it */ 556 dentry = ei->dentry; 557 dget(dentry); 558 mutex_unlock(&eventfs_mutex); 559 return dentry; 560 } 561 mutex_unlock(&eventfs_mutex); 562 563 dentry = create_dir(ei, parent); 564 565 mutex_lock(&eventfs_mutex); 566 567 if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) { 568 /* 569 * When the mutex was released, something else could have 570 * created the dentry for this e_dentry. In which case 571 * use that one. 572 * 573 * If ei->is_freed is set, the e_dentry is currently on its 574 * way to being freed. 575 */ 576 dentry = ei->dentry; 577 if (dentry) 578 dget(dentry); 579 mutex_unlock(&eventfs_mutex); 580 return dentry; 581 } 582 583 if (!ei->dentry && !ei->is_freed) { 584 ei->dentry = dentry; 585 eventfs_post_create_dir(ei); 586 dentry->d_fsdata = ei; 587 } else { 588 /* 589 * Should never happen unless we get here due to being freed. 590 * Otherwise it means two dentries exist with the same name. 591 */ 592 WARN_ON_ONCE(!ei->is_freed); 593 dentry = NULL; 594 } 595 mutex_unlock(&eventfs_mutex); 596 597 return dentry; 598 } 599 600 /** 601 * eventfs_root_lookup - lookup routine to create file/dir 602 * @dir: in which a lookup is being done 603 * @dentry: file/dir dentry 604 * @flags: Just passed to simple_lookup() 605 * 606 * Used to create dynamic file/dir with-in @dir, search with-in @ei 607 * list, if @dentry found go ahead and create the file/dir 608 */ 609 610 static struct dentry *eventfs_root_lookup(struct inode *dir, 611 struct dentry *dentry, 612 unsigned int flags) 613 { 614 const struct file_operations *fops; 615 const struct eventfs_entry *entry; 616 struct eventfs_inode *ei_child; 617 struct tracefs_inode *ti; 618 struct eventfs_inode *ei; 619 struct dentry *ei_dentry = NULL; 620 struct dentry *ret = NULL; 621 struct dentry *d; 622 const char *name = dentry->d_name.name; 623 umode_t mode; 624 void *data; 625 int idx; 626 int i; 627 int r; 628 629 ti = get_tracefs(dir); 630 if (!(ti->flags & TRACEFS_EVENT_INODE)) 631 return NULL; 632 633 /* Grab srcu to prevent the ei from going away */ 634 idx = srcu_read_lock(&eventfs_srcu); 635 636 /* 637 * Grab the eventfs_mutex to consistent value from ti->private. 638 * This s 639 */ 640 mutex_lock(&eventfs_mutex); 641 ei = READ_ONCE(ti->private); 642 if (ei && !ei->is_freed) 643 ei_dentry = READ_ONCE(ei->dentry); 644 mutex_unlock(&eventfs_mutex); 645 646 if (!ei || !ei_dentry) 647 goto out; 648 649 data = ei->data; 650 651 list_for_each_entry_srcu(ei_child, &ei->children, list, 652 srcu_read_lock_held(&eventfs_srcu)) { 653 if (strcmp(ei_child->name, name) != 0) 654 continue; 655 ret = simple_lookup(dir, dentry, flags); 656 if (IS_ERR(ret)) 657 goto out; 658 d = create_dir_dentry(ei, ei_child, ei_dentry); 659 dput(d); 660 goto out; 661 } 662 663 for (i = 0; i < ei->nr_entries; i++) { 664 entry = &ei->entries[i]; 665 if (strcmp(name, entry->name) == 0) { 666 void *cdata = data; 667 mutex_lock(&eventfs_mutex); 668 /* If ei->is_freed, then the event itself may be too */ 669 if (!ei->is_freed) 670 r = entry->callback(name, &mode, &cdata, &fops); 671 else 672 r = -1; 673 mutex_unlock(&eventfs_mutex); 674 if (r <= 0) 675 continue; 676 ret = simple_lookup(dir, dentry, flags); 677 if (IS_ERR(ret)) 678 goto out; 679 d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops); 680 dput(d); 681 break; 682 } 683 } 684 out: 685 srcu_read_unlock(&eventfs_srcu, idx); 686 return ret; 687 } 688 689 /* 690 * Walk the children of a eventfs_inode to fill in getdents(). 691 */ 692 static int eventfs_iterate(struct file *file, struct dir_context *ctx) 693 { 694 const struct file_operations *fops; 695 struct inode *f_inode = file_inode(file); 696 const struct eventfs_entry *entry; 697 struct eventfs_inode *ei_child; 698 struct tracefs_inode *ti; 699 struct eventfs_inode *ei; 700 const char *name; 701 umode_t mode; 702 int idx; 703 int ret = -EINVAL; 704 int ino; 705 int i, r, c; 706 707 if (!dir_emit_dots(file, ctx)) 708 return 0; 709 710 ti = get_tracefs(f_inode); 711 if (!(ti->flags & TRACEFS_EVENT_INODE)) 712 return -EINVAL; 713 714 c = ctx->pos - 2; 715 716 idx = srcu_read_lock(&eventfs_srcu); 717 718 mutex_lock(&eventfs_mutex); 719 ei = READ_ONCE(ti->private); 720 if (ei && ei->is_freed) 721 ei = NULL; 722 mutex_unlock(&eventfs_mutex); 723 724 if (!ei) 725 goto out; 726 727 /* 728 * Need to create the dentries and inodes to have a consistent 729 * inode number. 730 */ 731 ret = 0; 732 733 /* Start at 'c' to jump over already read entries */ 734 for (i = c; i < ei->nr_entries; i++, ctx->pos++) { 735 void *cdata = ei->data; 736 737 entry = &ei->entries[i]; 738 name = entry->name; 739 740 mutex_lock(&eventfs_mutex); 741 /* If ei->is_freed then just bail here, nothing more to do */ 742 if (ei->is_freed) { 743 mutex_unlock(&eventfs_mutex); 744 goto out; 745 } 746 r = entry->callback(name, &mode, &cdata, &fops); 747 mutex_unlock(&eventfs_mutex); 748 if (r <= 0) 749 continue; 750 751 ino = EVENTFS_FILE_INODE_INO; 752 753 if (!dir_emit(ctx, name, strlen(name), ino, DT_REG)) 754 goto out; 755 } 756 757 /* Subtract the skipped entries above */ 758 c -= min((unsigned int)c, (unsigned int)ei->nr_entries); 759 760 list_for_each_entry_srcu(ei_child, &ei->children, list, 761 srcu_read_lock_held(&eventfs_srcu)) { 762 763 if (c > 0) { 764 c--; 765 continue; 766 } 767 768 ctx->pos++; 769 770 if (ei_child->is_freed) 771 continue; 772 773 name = ei_child->name; 774 775 ino = eventfs_dir_ino(ei_child); 776 777 if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR)) 778 goto out_dec; 779 } 780 ret = 1; 781 out: 782 srcu_read_unlock(&eventfs_srcu, idx); 783 784 return ret; 785 786 out_dec: 787 /* Incremented ctx->pos without adding something, reset it */ 788 ctx->pos--; 789 goto out; 790 } 791 792 /** 793 * eventfs_create_dir - Create the eventfs_inode for this directory 794 * @name: The name of the directory to create. 795 * @parent: The eventfs_inode of the parent directory. 796 * @entries: A list of entries that represent the files under this directory 797 * @size: The number of @entries 798 * @data: The default data to pass to the files (an entry may override it). 799 * 800 * This function creates the descriptor to represent a directory in the 801 * eventfs. This descriptor is an eventfs_inode, and it is returned to be 802 * used to create other children underneath. 803 * 804 * The @entries is an array of eventfs_entry structures which has: 805 * const char *name 806 * eventfs_callback callback; 807 * 808 * The name is the name of the file, and the callback is a pointer to a function 809 * that will be called when the file is reference (either by lookup or by 810 * reading a directory). The callback is of the prototype: 811 * 812 * int callback(const char *name, umode_t *mode, void **data, 813 * const struct file_operations **fops); 814 * 815 * When a file needs to be created, this callback will be called with 816 * name = the name of the file being created (so that the same callback 817 * may be used for multiple files). 818 * mode = a place to set the file's mode 819 * data = A pointer to @data, and the callback may replace it, which will 820 * cause the file created to pass the new data to the open() call. 821 * fops = the fops to use for the created file. 822 * 823 * NB. @callback is called while holding internal locks of the eventfs 824 * system. The callback must not call any code that might also call into 825 * the tracefs or eventfs system or it will risk creating a deadlock. 826 */ 827 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent, 828 const struct eventfs_entry *entries, 829 int size, void *data) 830 { 831 struct eventfs_inode *ei; 832 833 if (!parent) 834 return ERR_PTR(-EINVAL); 835 836 ei = kzalloc(sizeof(*ei), GFP_KERNEL); 837 if (!ei) 838 return ERR_PTR(-ENOMEM); 839 840 ei->name = kstrdup_const(name, GFP_KERNEL); 841 if (!ei->name) { 842 kfree(ei); 843 return ERR_PTR(-ENOMEM); 844 } 845 846 if (size) { 847 ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); 848 if (!ei->d_children) { 849 kfree_const(ei->name); 850 kfree(ei); 851 return ERR_PTR(-ENOMEM); 852 } 853 } 854 855 ei->entries = entries; 856 ei->nr_entries = size; 857 ei->data = data; 858 INIT_LIST_HEAD(&ei->children); 859 INIT_LIST_HEAD(&ei->list); 860 861 mutex_lock(&eventfs_mutex); 862 if (!parent->is_freed) { 863 list_add_tail(&ei->list, &parent->children); 864 ei->d_parent = parent->dentry; 865 } 866 mutex_unlock(&eventfs_mutex); 867 868 /* Was the parent freed? */ 869 if (list_empty(&ei->list)) { 870 free_ei(ei); 871 ei = NULL; 872 } 873 return ei; 874 } 875 876 /** 877 * eventfs_create_events_dir - create the top level events directory 878 * @name: The name of the top level directory to create. 879 * @parent: Parent dentry for this file in the tracefs directory. 880 * @entries: A list of entries that represent the files under this directory 881 * @size: The number of @entries 882 * @data: The default data to pass to the files (an entry may override it). 883 * 884 * This function creates the top of the trace event directory. 885 * 886 * See eventfs_create_dir() for use of @entries. 887 */ 888 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent, 889 const struct eventfs_entry *entries, 890 int size, void *data) 891 { 892 struct dentry *dentry = tracefs_start_creating(name, parent); 893 struct eventfs_inode *ei; 894 struct tracefs_inode *ti; 895 struct inode *inode; 896 kuid_t uid; 897 kgid_t gid; 898 899 if (security_locked_down(LOCKDOWN_TRACEFS)) 900 return NULL; 901 902 if (IS_ERR(dentry)) 903 return ERR_CAST(dentry); 904 905 ei = kzalloc(sizeof(*ei), GFP_KERNEL); 906 if (!ei) 907 goto fail_ei; 908 909 inode = tracefs_get_inode(dentry->d_sb); 910 if (unlikely(!inode)) 911 goto fail; 912 913 if (size) { 914 ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL); 915 if (!ei->d_children) 916 goto fail; 917 } 918 919 ei->dentry = dentry; 920 ei->entries = entries; 921 ei->nr_entries = size; 922 ei->is_events = 1; 923 ei->data = data; 924 ei->name = kstrdup_const(name, GFP_KERNEL); 925 if (!ei->name) 926 goto fail; 927 928 /* Save the ownership of this directory */ 929 uid = d_inode(dentry->d_parent)->i_uid; 930 gid = d_inode(dentry->d_parent)->i_gid; 931 932 /* 933 * If the events directory is of the top instance, then parent 934 * is NULL. Set the attr.mode to reflect this and its permissions will 935 * default to the tracefs root dentry. 936 */ 937 if (!parent) 938 ei->attr.mode = EVENTFS_TOPLEVEL; 939 940 /* This is used as the default ownership of the files and directories */ 941 ei->attr.uid = uid; 942 ei->attr.gid = gid; 943 944 INIT_LIST_HEAD(&ei->children); 945 INIT_LIST_HEAD(&ei->list); 946 947 ti = get_tracefs(inode); 948 ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE; 949 ti->private = ei; 950 951 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; 952 inode->i_uid = uid; 953 inode->i_gid = gid; 954 inode->i_op = &eventfs_root_dir_inode_operations; 955 inode->i_fop = &eventfs_file_operations; 956 957 dentry->d_fsdata = ei; 958 959 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 960 inc_nlink(inode); 961 d_instantiate(dentry, inode); 962 inc_nlink(dentry->d_parent->d_inode); 963 fsnotify_mkdir(dentry->d_parent->d_inode, dentry); 964 tracefs_end_creating(dentry); 965 966 return ei; 967 968 fail: 969 kfree(ei->d_children); 970 kfree(ei); 971 fail_ei: 972 tracefs_failed_creating(dentry); 973 return ERR_PTR(-ENOMEM); 974 } 975 976 static LLIST_HEAD(free_list); 977 978 static void eventfs_workfn(struct work_struct *work) 979 { 980 struct eventfs_inode *ei, *tmp; 981 struct llist_node *llnode; 982 983 llnode = llist_del_all(&free_list); 984 llist_for_each_entry_safe(ei, tmp, llnode, llist) { 985 /* This dput() matches the dget() from unhook_dentry() */ 986 for (int i = 0; i < ei->nr_entries; i++) { 987 if (ei->d_children[i]) 988 dput(ei->d_children[i]); 989 } 990 /* This should only get here if it had a dentry */ 991 if (!WARN_ON_ONCE(!ei->dentry)) 992 dput(ei->dentry); 993 } 994 } 995 996 static DECLARE_WORK(eventfs_work, eventfs_workfn); 997 998 static void free_rcu_ei(struct rcu_head *head) 999 { 1000 struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu); 1001 1002 if (ei->dentry) { 1003 /* Do not free the ei until all references of dentry are gone */ 1004 if (llist_add(&ei->llist, &free_list)) 1005 queue_work(system_unbound_wq, &eventfs_work); 1006 return; 1007 } 1008 1009 /* If the ei doesn't have a dentry, neither should its children */ 1010 for (int i = 0; i < ei->nr_entries; i++) { 1011 WARN_ON_ONCE(ei->d_children[i]); 1012 } 1013 1014 free_ei(ei); 1015 } 1016 1017 static void unhook_dentry(struct dentry *dentry) 1018 { 1019 if (!dentry) 1020 return; 1021 /* 1022 * Need to add a reference to the dentry that is expected by 1023 * simple_recursive_removal(), which will include a dput(). 1024 */ 1025 dget(dentry); 1026 1027 /* 1028 * Also add a reference for the dput() in eventfs_workfn(). 1029 * That is required as that dput() will free the ei after 1030 * the SRCU grace period is over. 1031 */ 1032 dget(dentry); 1033 } 1034 1035 /** 1036 * eventfs_remove_rec - remove eventfs dir or file from list 1037 * @ei: eventfs_inode to be removed. 1038 * @level: prevent recursion from going more than 3 levels deep. 1039 * 1040 * This function recursively removes eventfs_inodes which 1041 * contains info of files and/or directories. 1042 */ 1043 static void eventfs_remove_rec(struct eventfs_inode *ei, int level) 1044 { 1045 struct eventfs_inode *ei_child; 1046 1047 if (!ei) 1048 return; 1049 /* 1050 * Check recursion depth. It should never be greater than 3: 1051 * 0 - events/ 1052 * 1 - events/group/ 1053 * 2 - events/group/event/ 1054 * 3 - events/group/event/file 1055 */ 1056 if (WARN_ON_ONCE(level > 3)) 1057 return; 1058 1059 /* search for nested folders or files */ 1060 list_for_each_entry_srcu(ei_child, &ei->children, list, 1061 lockdep_is_held(&eventfs_mutex)) { 1062 /* Children only have dentry if parent does */ 1063 WARN_ON_ONCE(ei_child->dentry && !ei->dentry); 1064 eventfs_remove_rec(ei_child, level + 1); 1065 } 1066 1067 1068 ei->is_freed = 1; 1069 1070 for (int i = 0; i < ei->nr_entries; i++) { 1071 if (ei->d_children[i]) { 1072 /* Children only have dentry if parent does */ 1073 WARN_ON_ONCE(!ei->dentry); 1074 unhook_dentry(ei->d_children[i]); 1075 } 1076 } 1077 1078 unhook_dentry(ei->dentry); 1079 1080 list_del_rcu(&ei->list); 1081 call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei); 1082 } 1083 1084 /** 1085 * eventfs_remove_dir - remove eventfs dir or file from list 1086 * @ei: eventfs_inode to be removed. 1087 * 1088 * This function acquire the eventfs_mutex lock and call eventfs_remove_rec() 1089 */ 1090 void eventfs_remove_dir(struct eventfs_inode *ei) 1091 { 1092 struct dentry *dentry; 1093 1094 if (!ei) 1095 return; 1096 1097 mutex_lock(&eventfs_mutex); 1098 dentry = ei->dentry; 1099 eventfs_remove_rec(ei, 0); 1100 mutex_unlock(&eventfs_mutex); 1101 1102 /* 1103 * If any of the ei children has a dentry, then the ei itself 1104 * must have a dentry. 1105 */ 1106 if (dentry) 1107 simple_recursive_removal(dentry, NULL); 1108 } 1109 1110 /** 1111 * eventfs_remove_events_dir - remove the top level eventfs directory 1112 * @ei: the event_inode returned by eventfs_create_events_dir(). 1113 * 1114 * This function removes the events main directory 1115 */ 1116 void eventfs_remove_events_dir(struct eventfs_inode *ei) 1117 { 1118 struct dentry *dentry; 1119 1120 dentry = ei->dentry; 1121 eventfs_remove_dir(ei); 1122 1123 /* 1124 * Matches the dget() done by tracefs_start_creating() 1125 * in eventfs_create_events_dir() when it the dentry was 1126 * created. In other words, it's a normal dentry that 1127 * sticks around while the other ei->dentry are created 1128 * and destroyed dynamically. 1129 */ 1130 dput(dentry); 1131 } 1132