xref: /linux/fs/tracefs/event_inode.c (revision c4bbe83d27c2446a033cc0381c3fb6be5e8c41c7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  event_inode.c - part of tracefs, a pseudo file system for activating tracing
4  *
5  *  Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org>
6  *  Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com>
7  *  Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org>
8  *
9  *  eventfs is used to dynamically create inodes and dentries based on the
10  *  meta data provided by the tracing system.
11  *
12  *  eventfs stores the meta-data of files/dirs and holds off on creating
13  *  inodes/dentries of the files. When accessed, the eventfs will create the
14  *  inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up
15  *  and delete the inodes/dentries when they are no longer referenced.
16  */
17 #include <linux/fsnotify.h>
18 #include <linux/fs.h>
19 #include <linux/namei.h>
20 #include <linux/workqueue.h>
21 #include <linux/security.h>
22 #include <linux/tracefs.h>
23 #include <linux/kref.h>
24 #include <linux/delay.h>
25 #include "internal.h"
26 
27 /*
28  * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
29  * to the ei->dentry must be done under this mutex and after checking
30  * if ei->is_freed is not set. When ei->is_freed is set, the dentry
31  * is on its way to being freed after the last dput() is made on it.
32  */
33 static DEFINE_MUTEX(eventfs_mutex);
34 
35 /* Choose something "unique" ;-) */
36 #define EVENTFS_FILE_INODE_INO		0x12c4e37
37 #define EVENTFS_DIR_INODE_INO		0x134b2f5
38 
39 /*
40  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
41  * its parent's list and will have is_freed set (under eventfs_mutex).
42  * After the SRCU grace period is over and the last dput() is called
43  * the ei is freed.
44  */
45 DEFINE_STATIC_SRCU(eventfs_srcu);
46 
47 /* Mode is unsigned short, use the upper bits for flags */
48 enum {
49 	EVENTFS_SAVE_MODE	= BIT(16),
50 	EVENTFS_SAVE_UID	= BIT(17),
51 	EVENTFS_SAVE_GID	= BIT(18),
52 	EVENTFS_TOPLEVEL	= BIT(19),
53 };
54 
55 #define EVENTFS_MODE_MASK	(EVENTFS_SAVE_MODE - 1)
56 
57 static struct dentry *eventfs_root_lookup(struct inode *dir,
58 					  struct dentry *dentry,
59 					  unsigned int flags);
60 static int eventfs_iterate(struct file *file, struct dir_context *ctx);
61 
62 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
63 {
64 	unsigned int ia_valid = iattr->ia_valid;
65 
66 	if (ia_valid & ATTR_MODE) {
67 		attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) |
68 			(iattr->ia_mode & EVENTFS_MODE_MASK) |
69 			EVENTFS_SAVE_MODE;
70 	}
71 	if (ia_valid & ATTR_UID) {
72 		attr->mode |= EVENTFS_SAVE_UID;
73 		attr->uid = iattr->ia_uid;
74 	}
75 	if (ia_valid & ATTR_GID) {
76 		attr->mode |= EVENTFS_SAVE_GID;
77 		attr->gid = iattr->ia_gid;
78 	}
79 }
80 
81 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
82 			    struct iattr *iattr)
83 {
84 	const struct eventfs_entry *entry;
85 	struct eventfs_inode *ei;
86 	const char *name;
87 	int ret;
88 
89 	mutex_lock(&eventfs_mutex);
90 	ei = dentry->d_fsdata;
91 	if (ei->is_freed) {
92 		/* Do not allow changes if the event is about to be removed. */
93 		mutex_unlock(&eventfs_mutex);
94 		return -ENODEV;
95 	}
96 
97 	/* Preallocate the children mode array if necessary */
98 	if (!(dentry->d_inode->i_mode & S_IFDIR)) {
99 		if (!ei->entry_attrs) {
100 			ei->entry_attrs = kcalloc(ei->nr_entries, sizeof(*ei->entry_attrs),
101 						  GFP_NOFS);
102 			if (!ei->entry_attrs) {
103 				ret = -ENOMEM;
104 				goto out;
105 			}
106 		}
107 	}
108 
109 	ret = simple_setattr(idmap, dentry, iattr);
110 	if (ret < 0)
111 		goto out;
112 
113 	/*
114 	 * If this is a dir, then update the ei cache, only the file
115 	 * mode is saved in the ei->m_children, and the ownership is
116 	 * determined by the parent directory.
117 	 */
118 	if (dentry->d_inode->i_mode & S_IFDIR) {
119 		/*
120 		 * The events directory dentry is never freed, unless its
121 		 * part of an instance that is deleted. It's attr is the
122 		 * default for its child files and directories.
123 		 * Do not update it. It's not used for its own mode or ownership.
124 		 */
125 		if (ei->is_events) {
126 			/* But it still needs to know if it was modified */
127 			if (iattr->ia_valid & ATTR_UID)
128 				ei->attr.mode |= EVENTFS_SAVE_UID;
129 			if (iattr->ia_valid & ATTR_GID)
130 				ei->attr.mode |= EVENTFS_SAVE_GID;
131 		} else {
132 			update_attr(&ei->attr, iattr);
133 		}
134 
135 	} else {
136 		name = dentry->d_name.name;
137 
138 		for (int i = 0; i < ei->nr_entries; i++) {
139 			entry = &ei->entries[i];
140 			if (strcmp(name, entry->name) == 0) {
141 				update_attr(&ei->entry_attrs[i], iattr);
142 				break;
143 			}
144 		}
145 	}
146  out:
147 	mutex_unlock(&eventfs_mutex);
148 	return ret;
149 }
150 
151 static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry)
152 {
153 	struct inode *inode;
154 
155 	/* Only update if the "events" was on the top level */
156 	if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL))
157 		return;
158 
159 	/* Get the tracefs root inode. */
160 	inode = d_inode(dentry->d_sb->s_root);
161 	ei->attr.uid = inode->i_uid;
162 	ei->attr.gid = inode->i_gid;
163 }
164 
165 static void set_top_events_ownership(struct inode *inode)
166 {
167 	struct tracefs_inode *ti = get_tracefs(inode);
168 	struct eventfs_inode *ei = ti->private;
169 	struct dentry *dentry;
170 
171 	/* The top events directory doesn't get automatically updated */
172 	if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL))
173 		return;
174 
175 	dentry = ei->dentry;
176 
177 	update_top_events_attr(ei, dentry);
178 
179 	if (!(ei->attr.mode & EVENTFS_SAVE_UID))
180 		inode->i_uid = ei->attr.uid;
181 
182 	if (!(ei->attr.mode & EVENTFS_SAVE_GID))
183 		inode->i_gid = ei->attr.gid;
184 }
185 
186 static int eventfs_get_attr(struct mnt_idmap *idmap,
187 			    const struct path *path, struct kstat *stat,
188 			    u32 request_mask, unsigned int flags)
189 {
190 	struct dentry *dentry = path->dentry;
191 	struct inode *inode = d_backing_inode(dentry);
192 
193 	set_top_events_ownership(inode);
194 
195 	generic_fillattr(idmap, request_mask, inode, stat);
196 	return 0;
197 }
198 
199 static int eventfs_permission(struct mnt_idmap *idmap,
200 			      struct inode *inode, int mask)
201 {
202 	set_top_events_ownership(inode);
203 	return generic_permission(idmap, inode, mask);
204 }
205 
206 static const struct inode_operations eventfs_root_dir_inode_operations = {
207 	.lookup		= eventfs_root_lookup,
208 	.setattr	= eventfs_set_attr,
209 	.getattr	= eventfs_get_attr,
210 	.permission	= eventfs_permission,
211 };
212 
213 static const struct inode_operations eventfs_file_inode_operations = {
214 	.setattr	= eventfs_set_attr,
215 };
216 
217 static const struct file_operations eventfs_file_operations = {
218 	.read		= generic_read_dir,
219 	.iterate_shared	= eventfs_iterate,
220 	.llseek		= generic_file_llseek,
221 };
222 
223 /* Return the evenfs_inode of the "events" directory */
224 static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
225 {
226 	struct eventfs_inode *ei;
227 
228 	mutex_lock(&eventfs_mutex);
229 	do {
230 		/* The parent always has an ei, except for events itself */
231 		ei = dentry->d_parent->d_fsdata;
232 
233 		/*
234 		 * If the ei is being freed, the ownership of the children
235 		 * doesn't matter.
236 		 */
237 		if (ei->is_freed) {
238 			ei = NULL;
239 			break;
240 		}
241 
242 		dentry = ei->dentry;
243 	} while (!ei->is_events);
244 	mutex_unlock(&eventfs_mutex);
245 
246 	update_top_events_attr(ei, dentry);
247 
248 	return ei;
249 }
250 
251 static void update_inode_attr(struct dentry *dentry, struct inode *inode,
252 			      struct eventfs_attr *attr, umode_t mode)
253 {
254 	struct eventfs_inode *events_ei = eventfs_find_events(dentry);
255 
256 	if (!events_ei)
257 		return;
258 
259 	inode->i_mode = mode;
260 	inode->i_uid = events_ei->attr.uid;
261 	inode->i_gid = events_ei->attr.gid;
262 
263 	if (!attr)
264 		return;
265 
266 	if (attr->mode & EVENTFS_SAVE_MODE)
267 		inode->i_mode = attr->mode & EVENTFS_MODE_MASK;
268 
269 	if (attr->mode & EVENTFS_SAVE_UID)
270 		inode->i_uid = attr->uid;
271 
272 	if (attr->mode & EVENTFS_SAVE_GID)
273 		inode->i_gid = attr->gid;
274 }
275 
276 static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level)
277 {
278 	struct eventfs_inode *ei_child;
279 
280 	/* at most we have events/system/event */
281 	if (WARN_ON_ONCE(level > 3))
282 		return;
283 
284 	ei->attr.gid = gid;
285 
286 	if (ei->entry_attrs) {
287 		for (int i = 0; i < ei->nr_entries; i++) {
288 			ei->entry_attrs[i].gid = gid;
289 		}
290 	}
291 
292 	/*
293 	 * Only eventfs_inode with dentries are updated, make sure
294 	 * all eventfs_inodes are updated. If one of the children
295 	 * do not have a dentry, this function must traverse it.
296 	 */
297 	list_for_each_entry_srcu(ei_child, &ei->children, list,
298 				 srcu_read_lock_held(&eventfs_srcu)) {
299 		if (!ei_child->dentry)
300 			update_gid(ei_child, gid, level + 1);
301 	}
302 }
303 
304 void eventfs_update_gid(struct dentry *dentry, kgid_t gid)
305 {
306 	struct eventfs_inode *ei = dentry->d_fsdata;
307 	int idx;
308 
309 	idx = srcu_read_lock(&eventfs_srcu);
310 	update_gid(ei, gid, 0);
311 	srcu_read_unlock(&eventfs_srcu, idx);
312 }
313 
314 /**
315  * create_file - create a file in the tracefs filesystem
316  * @name: the name of the file to create.
317  * @mode: the permission that the file should have.
318  * @attr: saved attributes changed by user
319  * @parent: parent dentry for this file.
320  * @data: something that the caller will want to get to later on.
321  * @fop: struct file_operations that should be used for this file.
322  *
323  * This function creates a dentry that represents a file in the eventsfs_inode
324  * directory. The inode.i_private pointer will point to @data in the open()
325  * call.
326  */
327 static struct dentry *create_file(const char *name, umode_t mode,
328 				  struct eventfs_attr *attr,
329 				  struct dentry *parent, void *data,
330 				  const struct file_operations *fop)
331 {
332 	struct tracefs_inode *ti;
333 	struct dentry *dentry;
334 	struct inode *inode;
335 
336 	if (!(mode & S_IFMT))
337 		mode |= S_IFREG;
338 
339 	if (WARN_ON_ONCE(!S_ISREG(mode)))
340 		return NULL;
341 
342 	WARN_ON_ONCE(!parent);
343 	dentry = eventfs_start_creating(name, parent);
344 
345 	if (IS_ERR(dentry))
346 		return dentry;
347 
348 	inode = tracefs_get_inode(dentry->d_sb);
349 	if (unlikely(!inode))
350 		return eventfs_failed_creating(dentry);
351 
352 	/* If the user updated the directory's attributes, use them */
353 	update_inode_attr(dentry, inode, attr, mode);
354 
355 	inode->i_op = &eventfs_file_inode_operations;
356 	inode->i_fop = fop;
357 	inode->i_private = data;
358 
359 	/* All files will have the same inode number */
360 	inode->i_ino = EVENTFS_FILE_INODE_INO;
361 
362 	ti = get_tracefs(inode);
363 	ti->flags |= TRACEFS_EVENT_INODE;
364 	d_instantiate(dentry, inode);
365 	fsnotify_create(dentry->d_parent->d_inode, dentry);
366 	return eventfs_end_creating(dentry);
367 };
368 
369 /**
370  * create_dir - create a dir in the tracefs filesystem
371  * @ei: the eventfs_inode that represents the directory to create
372  * @parent: parent dentry for this file.
373  *
374  * This function will create a dentry for a directory represented by
375  * a eventfs_inode.
376  */
377 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
378 {
379 	struct tracefs_inode *ti;
380 	struct dentry *dentry;
381 	struct inode *inode;
382 
383 	dentry = eventfs_start_creating(ei->name, parent);
384 	if (IS_ERR(dentry))
385 		return dentry;
386 
387 	inode = tracefs_get_inode(dentry->d_sb);
388 	if (unlikely(!inode))
389 		return eventfs_failed_creating(dentry);
390 
391 	/* If the user updated the directory's attributes, use them */
392 	update_inode_attr(dentry, inode, &ei->attr,
393 			  S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
394 
395 	inode->i_op = &eventfs_root_dir_inode_operations;
396 	inode->i_fop = &eventfs_file_operations;
397 
398 	/* All directories will have the same inode number */
399 	inode->i_ino = EVENTFS_DIR_INODE_INO;
400 
401 	ti = get_tracefs(inode);
402 	ti->flags |= TRACEFS_EVENT_INODE;
403 
404 	inc_nlink(inode);
405 	d_instantiate(dentry, inode);
406 	inc_nlink(dentry->d_parent->d_inode);
407 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
408 	return eventfs_end_creating(dentry);
409 }
410 
411 static void free_ei(struct eventfs_inode *ei)
412 {
413 	kfree_const(ei->name);
414 	kfree(ei->d_children);
415 	kfree(ei->entry_attrs);
416 	kfree(ei);
417 }
418 
419 /**
420  * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
421  * @ti: the tracefs_inode of the dentry
422  * @dentry: dentry which has the reference to remove.
423  *
424  * Remove the association between a dentry from an eventfs_inode.
425  */
426 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
427 {
428 	struct eventfs_inode *ei;
429 	int i;
430 
431 	mutex_lock(&eventfs_mutex);
432 
433 	ei = dentry->d_fsdata;
434 	if (!ei)
435 		goto out;
436 
437 	/* This could belong to one of the files of the ei */
438 	if (ei->dentry != dentry) {
439 		for (i = 0; i < ei->nr_entries; i++) {
440 			if (ei->d_children[i] == dentry)
441 				break;
442 		}
443 		if (WARN_ON_ONCE(i == ei->nr_entries))
444 			goto out;
445 		ei->d_children[i] = NULL;
446 	} else if (ei->is_freed) {
447 		free_ei(ei);
448 	} else {
449 		ei->dentry = NULL;
450 	}
451 
452 	dentry->d_fsdata = NULL;
453  out:
454 	mutex_unlock(&eventfs_mutex);
455 }
456 
457 /**
458  * create_file_dentry - create a dentry for a file of an eventfs_inode
459  * @ei: the eventfs_inode that the file will be created under
460  * @idx: the index into the d_children[] of the @ei
461  * @parent: The parent dentry of the created file.
462  * @name: The name of the file to create
463  * @mode: The mode of the file.
464  * @data: The data to use to set the inode of the file with on open()
465  * @fops: The fops of the file to be created.
466  *
467  * Create a dentry for a file of an eventfs_inode @ei and place it into the
468  * address located at @e_dentry.
469  */
470 static struct dentry *
471 create_file_dentry(struct eventfs_inode *ei, int idx,
472 		   struct dentry *parent, const char *name, umode_t mode, void *data,
473 		   const struct file_operations *fops)
474 {
475 	struct eventfs_attr *attr = NULL;
476 	struct dentry **e_dentry = &ei->d_children[idx];
477 	struct dentry *dentry;
478 
479 	WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
480 
481 	mutex_lock(&eventfs_mutex);
482 	if (ei->is_freed) {
483 		mutex_unlock(&eventfs_mutex);
484 		return NULL;
485 	}
486 	/* If the e_dentry already has a dentry, use it */
487 	if (*e_dentry) {
488 		dget(*e_dentry);
489 		mutex_unlock(&eventfs_mutex);
490 		return *e_dentry;
491 	}
492 
493 	/* ei->entry_attrs are protected by SRCU */
494 	if (ei->entry_attrs)
495 		attr = &ei->entry_attrs[idx];
496 
497 	mutex_unlock(&eventfs_mutex);
498 
499 	dentry = create_file(name, mode, attr, parent, data, fops);
500 
501 	mutex_lock(&eventfs_mutex);
502 
503 	if (IS_ERR_OR_NULL(dentry)) {
504 		/*
505 		 * When the mutex was released, something else could have
506 		 * created the dentry for this e_dentry. In which case
507 		 * use that one.
508 		 *
509 		 * If ei->is_freed is set, the e_dentry is currently on its
510 		 * way to being freed, don't return it. If e_dentry is NULL
511 		 * it means it was already freed.
512 		 */
513 		if (ei->is_freed) {
514 			dentry = NULL;
515 		} else {
516 			dentry = *e_dentry;
517 			dget(dentry);
518 		}
519 		mutex_unlock(&eventfs_mutex);
520 		return dentry;
521 	}
522 
523 	if (!*e_dentry && !ei->is_freed) {
524 		*e_dentry = dentry;
525 		dentry->d_fsdata = ei;
526 	} else {
527 		/*
528 		 * Should never happen unless we get here due to being freed.
529 		 * Otherwise it means two dentries exist with the same name.
530 		 */
531 		WARN_ON_ONCE(!ei->is_freed);
532 		dentry = NULL;
533 	}
534 	mutex_unlock(&eventfs_mutex);
535 
536 	return dentry;
537 }
538 
539 /**
540  * eventfs_post_create_dir - post create dir routine
541  * @ei: eventfs_inode of recently created dir
542  *
543  * Map the meta-data of files within an eventfs dir to their parent dentry
544  */
545 static void eventfs_post_create_dir(struct eventfs_inode *ei)
546 {
547 	struct eventfs_inode *ei_child;
548 	struct tracefs_inode *ti;
549 
550 	lockdep_assert_held(&eventfs_mutex);
551 
552 	/* srcu lock already held */
553 	/* fill parent-child relation */
554 	list_for_each_entry_srcu(ei_child, &ei->children, list,
555 				 srcu_read_lock_held(&eventfs_srcu)) {
556 		ei_child->d_parent = ei->dentry;
557 	}
558 
559 	ti = get_tracefs(ei->dentry->d_inode);
560 	ti->private = ei;
561 }
562 
563 /**
564  * create_dir_dentry - Create a directory dentry for the eventfs_inode
565  * @pei: The eventfs_inode parent of ei.
566  * @ei: The eventfs_inode to create the directory for
567  * @parent: The dentry of the parent of this directory
568  *
569  * This creates and attaches a directory dentry to the eventfs_inode @ei.
570  */
571 static struct dentry *
572 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
573 		  struct dentry *parent)
574 {
575 	struct dentry *dentry = NULL;
576 
577 	WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
578 
579 	mutex_lock(&eventfs_mutex);
580 	if (pei->is_freed || ei->is_freed) {
581 		mutex_unlock(&eventfs_mutex);
582 		return NULL;
583 	}
584 	if (ei->dentry) {
585 		/* If the eventfs_inode already has a dentry, use it */
586 		dentry = ei->dentry;
587 		dget(dentry);
588 		mutex_unlock(&eventfs_mutex);
589 		return dentry;
590 	}
591 	mutex_unlock(&eventfs_mutex);
592 
593 	dentry = create_dir(ei, parent);
594 
595 	mutex_lock(&eventfs_mutex);
596 
597 	if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
598 		/*
599 		 * When the mutex was released, something else could have
600 		 * created the dentry for this e_dentry. In which case
601 		 * use that one.
602 		 *
603 		 * If ei->is_freed is set, the e_dentry is currently on its
604 		 * way to being freed.
605 		 */
606 		dentry = ei->dentry;
607 		if (dentry)
608 			dget(dentry);
609 		mutex_unlock(&eventfs_mutex);
610 		return dentry;
611 	}
612 
613 	if (!ei->dentry && !ei->is_freed) {
614 		ei->dentry = dentry;
615 		eventfs_post_create_dir(ei);
616 		dentry->d_fsdata = ei;
617 	} else {
618 		/*
619 		 * Should never happen unless we get here due to being freed.
620 		 * Otherwise it means two dentries exist with the same name.
621 		 */
622 		WARN_ON_ONCE(!ei->is_freed);
623 		dentry = NULL;
624 	}
625 	mutex_unlock(&eventfs_mutex);
626 
627 	return dentry;
628 }
629 
630 /**
631  * eventfs_root_lookup - lookup routine to create file/dir
632  * @dir: in which a lookup is being done
633  * @dentry: file/dir dentry
634  * @flags: Just passed to simple_lookup()
635  *
636  * Used to create dynamic file/dir with-in @dir, search with-in @ei
637  * list, if @dentry found go ahead and create the file/dir
638  */
639 
640 static struct dentry *eventfs_root_lookup(struct inode *dir,
641 					  struct dentry *dentry,
642 					  unsigned int flags)
643 {
644 	const struct file_operations *fops;
645 	const struct eventfs_entry *entry;
646 	struct eventfs_inode *ei_child;
647 	struct tracefs_inode *ti;
648 	struct eventfs_inode *ei;
649 	struct dentry *ei_dentry = NULL;
650 	struct dentry *ret = NULL;
651 	struct dentry *d;
652 	const char *name = dentry->d_name.name;
653 	umode_t mode;
654 	void *data;
655 	int idx;
656 	int i;
657 	int r;
658 
659 	ti = get_tracefs(dir);
660 	if (!(ti->flags & TRACEFS_EVENT_INODE))
661 		return NULL;
662 
663 	/* Grab srcu to prevent the ei from going away */
664 	idx = srcu_read_lock(&eventfs_srcu);
665 
666 	/*
667 	 * Grab the eventfs_mutex to consistent value from ti->private.
668 	 * This s
669 	 */
670 	mutex_lock(&eventfs_mutex);
671 	ei = READ_ONCE(ti->private);
672 	if (ei && !ei->is_freed)
673 		ei_dentry = READ_ONCE(ei->dentry);
674 	mutex_unlock(&eventfs_mutex);
675 
676 	if (!ei || !ei_dentry)
677 		goto out;
678 
679 	data = ei->data;
680 
681 	list_for_each_entry_srcu(ei_child, &ei->children, list,
682 				 srcu_read_lock_held(&eventfs_srcu)) {
683 		if (strcmp(ei_child->name, name) != 0)
684 			continue;
685 		ret = simple_lookup(dir, dentry, flags);
686 		if (IS_ERR(ret))
687 			goto out;
688 		d = create_dir_dentry(ei, ei_child, ei_dentry);
689 		dput(d);
690 		goto out;
691 	}
692 
693 	for (i = 0; i < ei->nr_entries; i++) {
694 		entry = &ei->entries[i];
695 		if (strcmp(name, entry->name) == 0) {
696 			void *cdata = data;
697 			mutex_lock(&eventfs_mutex);
698 			/* If ei->is_freed, then the event itself may be too */
699 			if (!ei->is_freed)
700 				r = entry->callback(name, &mode, &cdata, &fops);
701 			else
702 				r = -1;
703 			mutex_unlock(&eventfs_mutex);
704 			if (r <= 0)
705 				continue;
706 			ret = simple_lookup(dir, dentry, flags);
707 			if (IS_ERR(ret))
708 				goto out;
709 			d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
710 			dput(d);
711 			break;
712 		}
713 	}
714  out:
715 	srcu_read_unlock(&eventfs_srcu, idx);
716 	return ret;
717 }
718 
719 /*
720  * Walk the children of a eventfs_inode to fill in getdents().
721  */
722 static int eventfs_iterate(struct file *file, struct dir_context *ctx)
723 {
724 	const struct file_operations *fops;
725 	struct inode *f_inode = file_inode(file);
726 	const struct eventfs_entry *entry;
727 	struct eventfs_inode *ei_child;
728 	struct tracefs_inode *ti;
729 	struct eventfs_inode *ei;
730 	const char *name;
731 	umode_t mode;
732 	int idx;
733 	int ret = -EINVAL;
734 	int ino;
735 	int i, r, c;
736 
737 	if (!dir_emit_dots(file, ctx))
738 		return 0;
739 
740 	ti = get_tracefs(f_inode);
741 	if (!(ti->flags & TRACEFS_EVENT_INODE))
742 		return -EINVAL;
743 
744 	c = ctx->pos - 2;
745 
746 	idx = srcu_read_lock(&eventfs_srcu);
747 
748 	mutex_lock(&eventfs_mutex);
749 	ei = READ_ONCE(ti->private);
750 	if (ei && ei->is_freed)
751 		ei = NULL;
752 	mutex_unlock(&eventfs_mutex);
753 
754 	if (!ei)
755 		goto out;
756 
757 	/*
758 	 * Need to create the dentries and inodes to have a consistent
759 	 * inode number.
760 	 */
761 	ret = 0;
762 
763 	/* Start at 'c' to jump over already read entries */
764 	for (i = c; i < ei->nr_entries; i++, ctx->pos++) {
765 		void *cdata = ei->data;
766 
767 		entry = &ei->entries[i];
768 		name = entry->name;
769 
770 		mutex_lock(&eventfs_mutex);
771 		/* If ei->is_freed then just bail here, nothing more to do */
772 		if (ei->is_freed) {
773 			mutex_unlock(&eventfs_mutex);
774 			goto out;
775 		}
776 		r = entry->callback(name, &mode, &cdata, &fops);
777 		mutex_unlock(&eventfs_mutex);
778 		if (r <= 0)
779 			continue;
780 
781 		ino = EVENTFS_FILE_INODE_INO;
782 
783 		if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
784 			goto out;
785 	}
786 
787 	/* Subtract the skipped entries above */
788 	c -= min((unsigned int)c, (unsigned int)ei->nr_entries);
789 
790 	list_for_each_entry_srcu(ei_child, &ei->children, list,
791 				 srcu_read_lock_held(&eventfs_srcu)) {
792 
793 		if (c > 0) {
794 			c--;
795 			continue;
796 		}
797 
798 		ctx->pos++;
799 
800 		if (ei_child->is_freed)
801 			continue;
802 
803 		name = ei_child->name;
804 
805 		ino = EVENTFS_DIR_INODE_INO;
806 
807 		if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
808 			goto out_dec;
809 	}
810 	ret = 1;
811  out:
812 	srcu_read_unlock(&eventfs_srcu, idx);
813 
814 	return ret;
815 
816  out_dec:
817 	/* Incremented ctx->pos without adding something, reset it */
818 	ctx->pos--;
819 	goto out;
820 }
821 
822 /**
823  * eventfs_create_dir - Create the eventfs_inode for this directory
824  * @name: The name of the directory to create.
825  * @parent: The eventfs_inode of the parent directory.
826  * @entries: A list of entries that represent the files under this directory
827  * @size: The number of @entries
828  * @data: The default data to pass to the files (an entry may override it).
829  *
830  * This function creates the descriptor to represent a directory in the
831  * eventfs. This descriptor is an eventfs_inode, and it is returned to be
832  * used to create other children underneath.
833  *
834  * The @entries is an array of eventfs_entry structures which has:
835  *	const char		 *name
836  *	eventfs_callback	callback;
837  *
838  * The name is the name of the file, and the callback is a pointer to a function
839  * that will be called when the file is reference (either by lookup or by
840  * reading a directory). The callback is of the prototype:
841  *
842  *    int callback(const char *name, umode_t *mode, void **data,
843  *		   const struct file_operations **fops);
844  *
845  * When a file needs to be created, this callback will be called with
846  *   name = the name of the file being created (so that the same callback
847  *          may be used for multiple files).
848  *   mode = a place to set the file's mode
849  *   data = A pointer to @data, and the callback may replace it, which will
850  *         cause the file created to pass the new data to the open() call.
851  *   fops = the fops to use for the created file.
852  *
853  * NB. @callback is called while holding internal locks of the eventfs
854  *     system. The callback must not call any code that might also call into
855  *     the tracefs or eventfs system or it will risk creating a deadlock.
856  */
857 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
858 					 const struct eventfs_entry *entries,
859 					 int size, void *data)
860 {
861 	struct eventfs_inode *ei;
862 
863 	if (!parent)
864 		return ERR_PTR(-EINVAL);
865 
866 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
867 	if (!ei)
868 		return ERR_PTR(-ENOMEM);
869 
870 	ei->name = kstrdup_const(name, GFP_KERNEL);
871 	if (!ei->name) {
872 		kfree(ei);
873 		return ERR_PTR(-ENOMEM);
874 	}
875 
876 	if (size) {
877 		ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
878 		if (!ei->d_children) {
879 			kfree_const(ei->name);
880 			kfree(ei);
881 			return ERR_PTR(-ENOMEM);
882 		}
883 	}
884 
885 	ei->entries = entries;
886 	ei->nr_entries = size;
887 	ei->data = data;
888 	INIT_LIST_HEAD(&ei->children);
889 	INIT_LIST_HEAD(&ei->list);
890 
891 	mutex_lock(&eventfs_mutex);
892 	if (!parent->is_freed) {
893 		list_add_tail(&ei->list, &parent->children);
894 		ei->d_parent = parent->dentry;
895 	}
896 	mutex_unlock(&eventfs_mutex);
897 
898 	/* Was the parent freed? */
899 	if (list_empty(&ei->list)) {
900 		free_ei(ei);
901 		ei = NULL;
902 	}
903 	return ei;
904 }
905 
906 /**
907  * eventfs_create_events_dir - create the top level events directory
908  * @name: The name of the top level directory to create.
909  * @parent: Parent dentry for this file in the tracefs directory.
910  * @entries: A list of entries that represent the files under this directory
911  * @size: The number of @entries
912  * @data: The default data to pass to the files (an entry may override it).
913  *
914  * This function creates the top of the trace event directory.
915  *
916  * See eventfs_create_dir() for use of @entries.
917  */
918 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
919 						const struct eventfs_entry *entries,
920 						int size, void *data)
921 {
922 	struct dentry *dentry = tracefs_start_creating(name, parent);
923 	struct eventfs_inode *ei;
924 	struct tracefs_inode *ti;
925 	struct inode *inode;
926 	kuid_t uid;
927 	kgid_t gid;
928 
929 	if (security_locked_down(LOCKDOWN_TRACEFS))
930 		return NULL;
931 
932 	if (IS_ERR(dentry))
933 		return ERR_CAST(dentry);
934 
935 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
936 	if (!ei)
937 		goto fail_ei;
938 
939 	inode = tracefs_get_inode(dentry->d_sb);
940 	if (unlikely(!inode))
941 		goto fail;
942 
943 	if (size) {
944 		ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
945 		if (!ei->d_children)
946 			goto fail;
947 	}
948 
949 	ei->dentry = dentry;
950 	ei->entries = entries;
951 	ei->nr_entries = size;
952 	ei->is_events = 1;
953 	ei->data = data;
954 	ei->name = kstrdup_const(name, GFP_KERNEL);
955 	if (!ei->name)
956 		goto fail;
957 
958 	/* Save the ownership of this directory */
959 	uid = d_inode(dentry->d_parent)->i_uid;
960 	gid = d_inode(dentry->d_parent)->i_gid;
961 
962 	/*
963 	 * If the events directory is of the top instance, then parent
964 	 * is NULL. Set the attr.mode to reflect this and its permissions will
965 	 * default to the tracefs root dentry.
966 	 */
967 	if (!parent)
968 		ei->attr.mode = EVENTFS_TOPLEVEL;
969 
970 	/* This is used as the default ownership of the files and directories */
971 	ei->attr.uid = uid;
972 	ei->attr.gid = gid;
973 
974 	INIT_LIST_HEAD(&ei->children);
975 	INIT_LIST_HEAD(&ei->list);
976 
977 	ti = get_tracefs(inode);
978 	ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
979 	ti->private = ei;
980 
981 	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
982 	inode->i_uid = uid;
983 	inode->i_gid = gid;
984 	inode->i_op = &eventfs_root_dir_inode_operations;
985 	inode->i_fop = &eventfs_file_operations;
986 
987 	dentry->d_fsdata = ei;
988 
989 	/* directory inodes start off with i_nlink == 2 (for "." entry) */
990 	inc_nlink(inode);
991 	d_instantiate(dentry, inode);
992 	inc_nlink(dentry->d_parent->d_inode);
993 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
994 	tracefs_end_creating(dentry);
995 
996 	return ei;
997 
998  fail:
999 	kfree(ei->d_children);
1000 	kfree(ei);
1001  fail_ei:
1002 	tracefs_failed_creating(dentry);
1003 	return ERR_PTR(-ENOMEM);
1004 }
1005 
1006 static LLIST_HEAD(free_list);
1007 
1008 static void eventfs_workfn(struct work_struct *work)
1009 {
1010         struct eventfs_inode *ei, *tmp;
1011         struct llist_node *llnode;
1012 
1013 	llnode = llist_del_all(&free_list);
1014         llist_for_each_entry_safe(ei, tmp, llnode, llist) {
1015 		/* This dput() matches the dget() from unhook_dentry() */
1016 		for (int i = 0; i < ei->nr_entries; i++) {
1017 			if (ei->d_children[i])
1018 				dput(ei->d_children[i]);
1019 		}
1020 		/* This should only get here if it had a dentry */
1021 		if (!WARN_ON_ONCE(!ei->dentry))
1022 			dput(ei->dentry);
1023         }
1024 }
1025 
1026 static DECLARE_WORK(eventfs_work, eventfs_workfn);
1027 
1028 static void free_rcu_ei(struct rcu_head *head)
1029 {
1030 	struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
1031 
1032 	if (ei->dentry) {
1033 		/* Do not free the ei until all references of dentry are gone */
1034 		if (llist_add(&ei->llist, &free_list))
1035 			queue_work(system_unbound_wq, &eventfs_work);
1036 		return;
1037 	}
1038 
1039 	/* If the ei doesn't have a dentry, neither should its children */
1040 	for (int i = 0; i < ei->nr_entries; i++) {
1041 		WARN_ON_ONCE(ei->d_children[i]);
1042 	}
1043 
1044 	free_ei(ei);
1045 }
1046 
1047 static void unhook_dentry(struct dentry *dentry)
1048 {
1049 	if (!dentry)
1050 		return;
1051 	/*
1052 	 * Need to add a reference to the dentry that is expected by
1053 	 * simple_recursive_removal(), which will include a dput().
1054 	 */
1055 	dget(dentry);
1056 
1057 	/*
1058 	 * Also add a reference for the dput() in eventfs_workfn().
1059 	 * That is required as that dput() will free the ei after
1060 	 * the SRCU grace period is over.
1061 	 */
1062 	dget(dentry);
1063 }
1064 
1065 /**
1066  * eventfs_remove_rec - remove eventfs dir or file from list
1067  * @ei: eventfs_inode to be removed.
1068  * @level: prevent recursion from going more than 3 levels deep.
1069  *
1070  * This function recursively removes eventfs_inodes which
1071  * contains info of files and/or directories.
1072  */
1073 static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
1074 {
1075 	struct eventfs_inode *ei_child;
1076 
1077 	if (!ei)
1078 		return;
1079 	/*
1080 	 * Check recursion depth. It should never be greater than 3:
1081 	 * 0 - events/
1082 	 * 1 - events/group/
1083 	 * 2 - events/group/event/
1084 	 * 3 - events/group/event/file
1085 	 */
1086 	if (WARN_ON_ONCE(level > 3))
1087 		return;
1088 
1089 	/* search for nested folders or files */
1090 	list_for_each_entry_srcu(ei_child, &ei->children, list,
1091 				 lockdep_is_held(&eventfs_mutex)) {
1092 		/* Children only have dentry if parent does */
1093 		WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
1094 		eventfs_remove_rec(ei_child, level + 1);
1095 	}
1096 
1097 
1098 	ei->is_freed = 1;
1099 
1100 	for (int i = 0; i < ei->nr_entries; i++) {
1101 		if (ei->d_children[i]) {
1102 			/* Children only have dentry if parent does */
1103 			WARN_ON_ONCE(!ei->dentry);
1104 			unhook_dentry(ei->d_children[i]);
1105 		}
1106 	}
1107 
1108 	unhook_dentry(ei->dentry);
1109 
1110 	list_del_rcu(&ei->list);
1111 	call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
1112 }
1113 
1114 /**
1115  * eventfs_remove_dir - remove eventfs dir or file from list
1116  * @ei: eventfs_inode to be removed.
1117  *
1118  * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
1119  */
1120 void eventfs_remove_dir(struct eventfs_inode *ei)
1121 {
1122 	struct dentry *dentry;
1123 
1124 	if (!ei)
1125 		return;
1126 
1127 	mutex_lock(&eventfs_mutex);
1128 	dentry = ei->dentry;
1129 	eventfs_remove_rec(ei, 0);
1130 	mutex_unlock(&eventfs_mutex);
1131 
1132 	/*
1133 	 * If any of the ei children has a dentry, then the ei itself
1134 	 * must have a dentry.
1135 	 */
1136 	if (dentry)
1137 		simple_recursive_removal(dentry, NULL);
1138 }
1139 
1140 /**
1141  * eventfs_remove_events_dir - remove the top level eventfs directory
1142  * @ei: the event_inode returned by eventfs_create_events_dir().
1143  *
1144  * This function removes the events main directory
1145  */
1146 void eventfs_remove_events_dir(struct eventfs_inode *ei)
1147 {
1148 	struct dentry *dentry;
1149 
1150 	dentry = ei->dentry;
1151 	eventfs_remove_dir(ei);
1152 
1153 	/*
1154 	 * Matches the dget() done by tracefs_start_creating()
1155 	 * in eventfs_create_events_dir() when it the dentry was
1156 	 * created. In other words, it's a normal dentry that
1157 	 * sticks around while the other ei->dentry are created
1158 	 * and destroyed dynamically.
1159 	 */
1160 	dput(dentry);
1161 }
1162