xref: /linux/fs/tracefs/event_inode.c (revision bdce82e960d1205d118662f575cec39379984e34)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  event_inode.c - part of tracefs, a pseudo file system for activating tracing
4  *
5  *  Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org>
6  *  Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com>
7  *  Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org>
8  *
9  *  eventfs is used to dynamically create inodes and dentries based on the
10  *  meta data provided by the tracing system.
11  *
12  *  eventfs stores the meta-data of files/dirs and holds off on creating
13  *  inodes/dentries of the files. When accessed, the eventfs will create the
14  *  inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up
15  *  and delete the inodes/dentries when they are no longer referenced.
16  */
17 #include <linux/fsnotify.h>
18 #include <linux/fs.h>
19 #include <linux/namei.h>
20 #include <linux/workqueue.h>
21 #include <linux/security.h>
22 #include <linux/tracefs.h>
23 #include <linux/kref.h>
24 #include <linux/delay.h>
25 #include "internal.h"
26 
27 /*
28  * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
29  * to the ei->dentry must be done under this mutex and after checking
30  * if ei->is_freed is not set. When ei->is_freed is set, the dentry
31  * is on its way to being freed after the last dput() is made on it.
32  */
33 static DEFINE_MUTEX(eventfs_mutex);
34 
35 /* Choose something "unique" ;-) */
36 #define EVENTFS_FILE_INODE_INO		0x12c4e37
37 
38 /* Just try to make something consistent and unique */
39 static int eventfs_dir_ino(struct eventfs_inode *ei)
40 {
41 	if (!ei->ino)
42 		ei->ino = get_next_ino();
43 
44 	return ei->ino;
45 }
46 
47 /*
48  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
49  * its parent's list and will have is_freed set (under eventfs_mutex).
50  * After the SRCU grace period is over and the last dput() is called
51  * the ei is freed.
52  */
53 DEFINE_STATIC_SRCU(eventfs_srcu);
54 
55 /* Mode is unsigned short, use the upper bits for flags */
56 enum {
57 	EVENTFS_SAVE_MODE	= BIT(16),
58 	EVENTFS_SAVE_UID	= BIT(17),
59 	EVENTFS_SAVE_GID	= BIT(18),
60 	EVENTFS_TOPLEVEL	= BIT(19),
61 };
62 
63 #define EVENTFS_MODE_MASK	(EVENTFS_SAVE_MODE - 1)
64 
65 static struct dentry *eventfs_root_lookup(struct inode *dir,
66 					  struct dentry *dentry,
67 					  unsigned int flags);
68 static int eventfs_iterate(struct file *file, struct dir_context *ctx);
69 
70 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
71 {
72 	unsigned int ia_valid = iattr->ia_valid;
73 
74 	if (ia_valid & ATTR_MODE) {
75 		attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) |
76 			(iattr->ia_mode & EVENTFS_MODE_MASK) |
77 			EVENTFS_SAVE_MODE;
78 	}
79 	if (ia_valid & ATTR_UID) {
80 		attr->mode |= EVENTFS_SAVE_UID;
81 		attr->uid = iattr->ia_uid;
82 	}
83 	if (ia_valid & ATTR_GID) {
84 		attr->mode |= EVENTFS_SAVE_GID;
85 		attr->gid = iattr->ia_gid;
86 	}
87 }
88 
89 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
90 			    struct iattr *iattr)
91 {
92 	const struct eventfs_entry *entry;
93 	struct eventfs_inode *ei;
94 	const char *name;
95 	int ret;
96 
97 	mutex_lock(&eventfs_mutex);
98 	ei = dentry->d_fsdata;
99 	if (ei->is_freed) {
100 		/* Do not allow changes if the event is about to be removed. */
101 		mutex_unlock(&eventfs_mutex);
102 		return -ENODEV;
103 	}
104 
105 	/* Preallocate the children mode array if necessary */
106 	if (!(dentry->d_inode->i_mode & S_IFDIR)) {
107 		if (!ei->entry_attrs) {
108 			ei->entry_attrs = kcalloc(ei->nr_entries, sizeof(*ei->entry_attrs),
109 						  GFP_NOFS);
110 			if (!ei->entry_attrs) {
111 				ret = -ENOMEM;
112 				goto out;
113 			}
114 		}
115 	}
116 
117 	ret = simple_setattr(idmap, dentry, iattr);
118 	if (ret < 0)
119 		goto out;
120 
121 	/*
122 	 * If this is a dir, then update the ei cache, only the file
123 	 * mode is saved in the ei->m_children, and the ownership is
124 	 * determined by the parent directory.
125 	 */
126 	if (dentry->d_inode->i_mode & S_IFDIR) {
127 		/*
128 		 * The events directory dentry is never freed, unless its
129 		 * part of an instance that is deleted. It's attr is the
130 		 * default for its child files and directories.
131 		 * Do not update it. It's not used for its own mode or ownership.
132 		 */
133 		if (ei->is_events) {
134 			/* But it still needs to know if it was modified */
135 			if (iattr->ia_valid & ATTR_UID)
136 				ei->attr.mode |= EVENTFS_SAVE_UID;
137 			if (iattr->ia_valid & ATTR_GID)
138 				ei->attr.mode |= EVENTFS_SAVE_GID;
139 		} else {
140 			update_attr(&ei->attr, iattr);
141 		}
142 
143 	} else {
144 		name = dentry->d_name.name;
145 
146 		for (int i = 0; i < ei->nr_entries; i++) {
147 			entry = &ei->entries[i];
148 			if (strcmp(name, entry->name) == 0) {
149 				update_attr(&ei->entry_attrs[i], iattr);
150 				break;
151 			}
152 		}
153 	}
154  out:
155 	mutex_unlock(&eventfs_mutex);
156 	return ret;
157 }
158 
159 static void update_top_events_attr(struct eventfs_inode *ei, struct dentry *dentry)
160 {
161 	struct inode *inode;
162 
163 	/* Only update if the "events" was on the top level */
164 	if (!ei || !(ei->attr.mode & EVENTFS_TOPLEVEL))
165 		return;
166 
167 	/* Get the tracefs root inode. */
168 	inode = d_inode(dentry->d_sb->s_root);
169 	ei->attr.uid = inode->i_uid;
170 	ei->attr.gid = inode->i_gid;
171 }
172 
173 static void set_top_events_ownership(struct inode *inode)
174 {
175 	struct tracefs_inode *ti = get_tracefs(inode);
176 	struct eventfs_inode *ei = ti->private;
177 	struct dentry *dentry;
178 
179 	/* The top events directory doesn't get automatically updated */
180 	if (!ei || !ei->is_events || !(ei->attr.mode & EVENTFS_TOPLEVEL))
181 		return;
182 
183 	dentry = ei->dentry;
184 
185 	update_top_events_attr(ei, dentry);
186 
187 	if (!(ei->attr.mode & EVENTFS_SAVE_UID))
188 		inode->i_uid = ei->attr.uid;
189 
190 	if (!(ei->attr.mode & EVENTFS_SAVE_GID))
191 		inode->i_gid = ei->attr.gid;
192 }
193 
194 static int eventfs_get_attr(struct mnt_idmap *idmap,
195 			    const struct path *path, struct kstat *stat,
196 			    u32 request_mask, unsigned int flags)
197 {
198 	struct dentry *dentry = path->dentry;
199 	struct inode *inode = d_backing_inode(dentry);
200 
201 	set_top_events_ownership(inode);
202 
203 	generic_fillattr(idmap, request_mask, inode, stat);
204 	return 0;
205 }
206 
207 static int eventfs_permission(struct mnt_idmap *idmap,
208 			      struct inode *inode, int mask)
209 {
210 	set_top_events_ownership(inode);
211 	return generic_permission(idmap, inode, mask);
212 }
213 
214 static const struct inode_operations eventfs_root_dir_inode_operations = {
215 	.lookup		= eventfs_root_lookup,
216 	.setattr	= eventfs_set_attr,
217 	.getattr	= eventfs_get_attr,
218 	.permission	= eventfs_permission,
219 };
220 
221 static const struct inode_operations eventfs_file_inode_operations = {
222 	.setattr	= eventfs_set_attr,
223 };
224 
225 static const struct file_operations eventfs_file_operations = {
226 	.read		= generic_read_dir,
227 	.iterate_shared	= eventfs_iterate,
228 	.llseek		= generic_file_llseek,
229 };
230 
231 /* Return the evenfs_inode of the "events" directory */
232 static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
233 {
234 	struct eventfs_inode *ei;
235 
236 	mutex_lock(&eventfs_mutex);
237 	do {
238 		/* The parent always has an ei, except for events itself */
239 		ei = dentry->d_parent->d_fsdata;
240 
241 		/*
242 		 * If the ei is being freed, the ownership of the children
243 		 * doesn't matter.
244 		 */
245 		if (ei->is_freed) {
246 			ei = NULL;
247 			break;
248 		}
249 
250 		dentry = ei->dentry;
251 	} while (!ei->is_events);
252 	mutex_unlock(&eventfs_mutex);
253 
254 	update_top_events_attr(ei, dentry);
255 
256 	return ei;
257 }
258 
259 static void update_inode_attr(struct dentry *dentry, struct inode *inode,
260 			      struct eventfs_attr *attr, umode_t mode)
261 {
262 	struct eventfs_inode *events_ei = eventfs_find_events(dentry);
263 
264 	if (!events_ei)
265 		return;
266 
267 	inode->i_mode = mode;
268 	inode->i_uid = events_ei->attr.uid;
269 	inode->i_gid = events_ei->attr.gid;
270 
271 	if (!attr)
272 		return;
273 
274 	if (attr->mode & EVENTFS_SAVE_MODE)
275 		inode->i_mode = attr->mode & EVENTFS_MODE_MASK;
276 
277 	if (attr->mode & EVENTFS_SAVE_UID)
278 		inode->i_uid = attr->uid;
279 
280 	if (attr->mode & EVENTFS_SAVE_GID)
281 		inode->i_gid = attr->gid;
282 }
283 
284 /**
285  * create_file - create a file in the tracefs filesystem
286  * @name: the name of the file to create.
287  * @mode: the permission that the file should have.
288  * @attr: saved attributes changed by user
289  * @parent: parent dentry for this file.
290  * @data: something that the caller will want to get to later on.
291  * @fop: struct file_operations that should be used for this file.
292  *
293  * This function creates a dentry that represents a file in the eventsfs_inode
294  * directory. The inode.i_private pointer will point to @data in the open()
295  * call.
296  */
297 static struct dentry *create_file(const char *name, umode_t mode,
298 				  struct eventfs_attr *attr,
299 				  struct dentry *parent, void *data,
300 				  const struct file_operations *fop)
301 {
302 	struct tracefs_inode *ti;
303 	struct dentry *dentry;
304 	struct inode *inode;
305 
306 	if (!(mode & S_IFMT))
307 		mode |= S_IFREG;
308 
309 	if (WARN_ON_ONCE(!S_ISREG(mode)))
310 		return NULL;
311 
312 	WARN_ON_ONCE(!parent);
313 	dentry = eventfs_start_creating(name, parent);
314 
315 	if (IS_ERR(dentry))
316 		return dentry;
317 
318 	inode = tracefs_get_inode(dentry->d_sb);
319 	if (unlikely(!inode))
320 		return eventfs_failed_creating(dentry);
321 
322 	/* If the user updated the directory's attributes, use them */
323 	update_inode_attr(dentry, inode, attr, mode);
324 
325 	inode->i_op = &eventfs_file_inode_operations;
326 	inode->i_fop = fop;
327 	inode->i_private = data;
328 
329 	/* All files will have the same inode number */
330 	inode->i_ino = EVENTFS_FILE_INODE_INO;
331 
332 	ti = get_tracefs(inode);
333 	ti->flags |= TRACEFS_EVENT_INODE;
334 	d_instantiate(dentry, inode);
335 	fsnotify_create(dentry->d_parent->d_inode, dentry);
336 	return eventfs_end_creating(dentry);
337 };
338 
339 /**
340  * create_dir - create a dir in the tracefs filesystem
341  * @ei: the eventfs_inode that represents the directory to create
342  * @parent: parent dentry for this file.
343  *
344  * This function will create a dentry for a directory represented by
345  * a eventfs_inode.
346  */
347 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
348 {
349 	struct tracefs_inode *ti;
350 	struct dentry *dentry;
351 	struct inode *inode;
352 
353 	dentry = eventfs_start_creating(ei->name, parent);
354 	if (IS_ERR(dentry))
355 		return dentry;
356 
357 	inode = tracefs_get_inode(dentry->d_sb);
358 	if (unlikely(!inode))
359 		return eventfs_failed_creating(dentry);
360 
361 	/* If the user updated the directory's attributes, use them */
362 	update_inode_attr(dentry, inode, &ei->attr,
363 			  S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
364 
365 	inode->i_op = &eventfs_root_dir_inode_operations;
366 	inode->i_fop = &eventfs_file_operations;
367 
368 	/* All directories will have the same inode number */
369 	inode->i_ino = eventfs_dir_ino(ei);
370 
371 	ti = get_tracefs(inode);
372 	ti->flags |= TRACEFS_EVENT_INODE;
373 
374 	inc_nlink(inode);
375 	d_instantiate(dentry, inode);
376 	inc_nlink(dentry->d_parent->d_inode);
377 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
378 	return eventfs_end_creating(dentry);
379 }
380 
381 static void free_ei(struct eventfs_inode *ei)
382 {
383 	kfree_const(ei->name);
384 	kfree(ei->d_children);
385 	kfree(ei->entry_attrs);
386 	kfree(ei);
387 }
388 
389 /**
390  * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
391  * @ti: the tracefs_inode of the dentry
392  * @dentry: dentry which has the reference to remove.
393  *
394  * Remove the association between a dentry from an eventfs_inode.
395  */
396 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
397 {
398 	struct eventfs_inode *ei;
399 	int i;
400 
401 	mutex_lock(&eventfs_mutex);
402 
403 	ei = dentry->d_fsdata;
404 	if (!ei)
405 		goto out;
406 
407 	/* This could belong to one of the files of the ei */
408 	if (ei->dentry != dentry) {
409 		for (i = 0; i < ei->nr_entries; i++) {
410 			if (ei->d_children[i] == dentry)
411 				break;
412 		}
413 		if (WARN_ON_ONCE(i == ei->nr_entries))
414 			goto out;
415 		ei->d_children[i] = NULL;
416 	} else if (ei->is_freed) {
417 		free_ei(ei);
418 	} else {
419 		ei->dentry = NULL;
420 	}
421 
422 	dentry->d_fsdata = NULL;
423  out:
424 	mutex_unlock(&eventfs_mutex);
425 }
426 
427 /**
428  * create_file_dentry - create a dentry for a file of an eventfs_inode
429  * @ei: the eventfs_inode that the file will be created under
430  * @idx: the index into the d_children[] of the @ei
431  * @parent: The parent dentry of the created file.
432  * @name: The name of the file to create
433  * @mode: The mode of the file.
434  * @data: The data to use to set the inode of the file with on open()
435  * @fops: The fops of the file to be created.
436  *
437  * Create a dentry for a file of an eventfs_inode @ei and place it into the
438  * address located at @e_dentry.
439  */
440 static struct dentry *
441 create_file_dentry(struct eventfs_inode *ei, int idx,
442 		   struct dentry *parent, const char *name, umode_t mode, void *data,
443 		   const struct file_operations *fops)
444 {
445 	struct eventfs_attr *attr = NULL;
446 	struct dentry **e_dentry = &ei->d_children[idx];
447 	struct dentry *dentry;
448 
449 	WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
450 
451 	mutex_lock(&eventfs_mutex);
452 	if (ei->is_freed) {
453 		mutex_unlock(&eventfs_mutex);
454 		return NULL;
455 	}
456 	/* If the e_dentry already has a dentry, use it */
457 	if (*e_dentry) {
458 		dget(*e_dentry);
459 		mutex_unlock(&eventfs_mutex);
460 		return *e_dentry;
461 	}
462 
463 	/* ei->entry_attrs are protected by SRCU */
464 	if (ei->entry_attrs)
465 		attr = &ei->entry_attrs[idx];
466 
467 	mutex_unlock(&eventfs_mutex);
468 
469 	dentry = create_file(name, mode, attr, parent, data, fops);
470 
471 	mutex_lock(&eventfs_mutex);
472 
473 	if (IS_ERR_OR_NULL(dentry)) {
474 		/*
475 		 * When the mutex was released, something else could have
476 		 * created the dentry for this e_dentry. In which case
477 		 * use that one.
478 		 *
479 		 * If ei->is_freed is set, the e_dentry is currently on its
480 		 * way to being freed, don't return it. If e_dentry is NULL
481 		 * it means it was already freed.
482 		 */
483 		if (ei->is_freed) {
484 			dentry = NULL;
485 		} else {
486 			dentry = *e_dentry;
487 			dget(dentry);
488 		}
489 		mutex_unlock(&eventfs_mutex);
490 		return dentry;
491 	}
492 
493 	if (!*e_dentry && !ei->is_freed) {
494 		*e_dentry = dentry;
495 		dentry->d_fsdata = ei;
496 	} else {
497 		/*
498 		 * Should never happen unless we get here due to being freed.
499 		 * Otherwise it means two dentries exist with the same name.
500 		 */
501 		WARN_ON_ONCE(!ei->is_freed);
502 		dentry = NULL;
503 	}
504 	mutex_unlock(&eventfs_mutex);
505 
506 	return dentry;
507 }
508 
509 /**
510  * eventfs_post_create_dir - post create dir routine
511  * @ei: eventfs_inode of recently created dir
512  *
513  * Map the meta-data of files within an eventfs dir to their parent dentry
514  */
515 static void eventfs_post_create_dir(struct eventfs_inode *ei)
516 {
517 	struct eventfs_inode *ei_child;
518 	struct tracefs_inode *ti;
519 
520 	lockdep_assert_held(&eventfs_mutex);
521 
522 	/* srcu lock already held */
523 	/* fill parent-child relation */
524 	list_for_each_entry_srcu(ei_child, &ei->children, list,
525 				 srcu_read_lock_held(&eventfs_srcu)) {
526 		ei_child->d_parent = ei->dentry;
527 	}
528 
529 	ti = get_tracefs(ei->dentry->d_inode);
530 	ti->private = ei;
531 }
532 
533 /**
534  * create_dir_dentry - Create a directory dentry for the eventfs_inode
535  * @pei: The eventfs_inode parent of ei.
536  * @ei: The eventfs_inode to create the directory for
537  * @parent: The dentry of the parent of this directory
538  *
539  * This creates and attaches a directory dentry to the eventfs_inode @ei.
540  */
541 static struct dentry *
542 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
543 		  struct dentry *parent)
544 {
545 	struct dentry *dentry = NULL;
546 
547 	WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
548 
549 	mutex_lock(&eventfs_mutex);
550 	if (pei->is_freed || ei->is_freed) {
551 		mutex_unlock(&eventfs_mutex);
552 		return NULL;
553 	}
554 	if (ei->dentry) {
555 		/* If the eventfs_inode already has a dentry, use it */
556 		dentry = ei->dentry;
557 		dget(dentry);
558 		mutex_unlock(&eventfs_mutex);
559 		return dentry;
560 	}
561 	mutex_unlock(&eventfs_mutex);
562 
563 	dentry = create_dir(ei, parent);
564 
565 	mutex_lock(&eventfs_mutex);
566 
567 	if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
568 		/*
569 		 * When the mutex was released, something else could have
570 		 * created the dentry for this e_dentry. In which case
571 		 * use that one.
572 		 *
573 		 * If ei->is_freed is set, the e_dentry is currently on its
574 		 * way to being freed.
575 		 */
576 		dentry = ei->dentry;
577 		if (dentry)
578 			dget(dentry);
579 		mutex_unlock(&eventfs_mutex);
580 		return dentry;
581 	}
582 
583 	if (!ei->dentry && !ei->is_freed) {
584 		ei->dentry = dentry;
585 		eventfs_post_create_dir(ei);
586 		dentry->d_fsdata = ei;
587 	} else {
588 		/*
589 		 * Should never happen unless we get here due to being freed.
590 		 * Otherwise it means two dentries exist with the same name.
591 		 */
592 		WARN_ON_ONCE(!ei->is_freed);
593 		dentry = NULL;
594 	}
595 	mutex_unlock(&eventfs_mutex);
596 
597 	return dentry;
598 }
599 
600 /**
601  * eventfs_root_lookup - lookup routine to create file/dir
602  * @dir: in which a lookup is being done
603  * @dentry: file/dir dentry
604  * @flags: Just passed to simple_lookup()
605  *
606  * Used to create dynamic file/dir with-in @dir, search with-in @ei
607  * list, if @dentry found go ahead and create the file/dir
608  */
609 
610 static struct dentry *eventfs_root_lookup(struct inode *dir,
611 					  struct dentry *dentry,
612 					  unsigned int flags)
613 {
614 	const struct file_operations *fops;
615 	const struct eventfs_entry *entry;
616 	struct eventfs_inode *ei_child;
617 	struct tracefs_inode *ti;
618 	struct eventfs_inode *ei;
619 	struct dentry *ei_dentry = NULL;
620 	struct dentry *ret = NULL;
621 	struct dentry *d;
622 	const char *name = dentry->d_name.name;
623 	umode_t mode;
624 	void *data;
625 	int idx;
626 	int i;
627 	int r;
628 
629 	ti = get_tracefs(dir);
630 	if (!(ti->flags & TRACEFS_EVENT_INODE))
631 		return NULL;
632 
633 	/* Grab srcu to prevent the ei from going away */
634 	idx = srcu_read_lock(&eventfs_srcu);
635 
636 	/*
637 	 * Grab the eventfs_mutex to consistent value from ti->private.
638 	 * This s
639 	 */
640 	mutex_lock(&eventfs_mutex);
641 	ei = READ_ONCE(ti->private);
642 	if (ei && !ei->is_freed)
643 		ei_dentry = READ_ONCE(ei->dentry);
644 	mutex_unlock(&eventfs_mutex);
645 
646 	if (!ei || !ei_dentry)
647 		goto out;
648 
649 	data = ei->data;
650 
651 	list_for_each_entry_srcu(ei_child, &ei->children, list,
652 				 srcu_read_lock_held(&eventfs_srcu)) {
653 		if (strcmp(ei_child->name, name) != 0)
654 			continue;
655 		ret = simple_lookup(dir, dentry, flags);
656 		if (IS_ERR(ret))
657 			goto out;
658 		d = create_dir_dentry(ei, ei_child, ei_dentry);
659 		dput(d);
660 		goto out;
661 	}
662 
663 	for (i = 0; i < ei->nr_entries; i++) {
664 		entry = &ei->entries[i];
665 		if (strcmp(name, entry->name) == 0) {
666 			void *cdata = data;
667 			mutex_lock(&eventfs_mutex);
668 			/* If ei->is_freed, then the event itself may be too */
669 			if (!ei->is_freed)
670 				r = entry->callback(name, &mode, &cdata, &fops);
671 			else
672 				r = -1;
673 			mutex_unlock(&eventfs_mutex);
674 			if (r <= 0)
675 				continue;
676 			ret = simple_lookup(dir, dentry, flags);
677 			if (IS_ERR(ret))
678 				goto out;
679 			d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
680 			dput(d);
681 			break;
682 		}
683 	}
684  out:
685 	srcu_read_unlock(&eventfs_srcu, idx);
686 	return ret;
687 }
688 
689 /*
690  * Walk the children of a eventfs_inode to fill in getdents().
691  */
692 static int eventfs_iterate(struct file *file, struct dir_context *ctx)
693 {
694 	const struct file_operations *fops;
695 	struct inode *f_inode = file_inode(file);
696 	const struct eventfs_entry *entry;
697 	struct eventfs_inode *ei_child;
698 	struct tracefs_inode *ti;
699 	struct eventfs_inode *ei;
700 	const char *name;
701 	umode_t mode;
702 	int idx;
703 	int ret = -EINVAL;
704 	int ino;
705 	int i, r, c;
706 
707 	if (!dir_emit_dots(file, ctx))
708 		return 0;
709 
710 	ti = get_tracefs(f_inode);
711 	if (!(ti->flags & TRACEFS_EVENT_INODE))
712 		return -EINVAL;
713 
714 	c = ctx->pos - 2;
715 
716 	idx = srcu_read_lock(&eventfs_srcu);
717 
718 	mutex_lock(&eventfs_mutex);
719 	ei = READ_ONCE(ti->private);
720 	if (ei && ei->is_freed)
721 		ei = NULL;
722 	mutex_unlock(&eventfs_mutex);
723 
724 	if (!ei)
725 		goto out;
726 
727 	/*
728 	 * Need to create the dentries and inodes to have a consistent
729 	 * inode number.
730 	 */
731 	ret = 0;
732 
733 	/* Start at 'c' to jump over already read entries */
734 	for (i = c; i < ei->nr_entries; i++, ctx->pos++) {
735 		void *cdata = ei->data;
736 
737 		entry = &ei->entries[i];
738 		name = entry->name;
739 
740 		mutex_lock(&eventfs_mutex);
741 		/* If ei->is_freed then just bail here, nothing more to do */
742 		if (ei->is_freed) {
743 			mutex_unlock(&eventfs_mutex);
744 			goto out;
745 		}
746 		r = entry->callback(name, &mode, &cdata, &fops);
747 		mutex_unlock(&eventfs_mutex);
748 		if (r <= 0)
749 			continue;
750 
751 		ino = EVENTFS_FILE_INODE_INO;
752 
753 		if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
754 			goto out;
755 	}
756 
757 	/* Subtract the skipped entries above */
758 	c -= min((unsigned int)c, (unsigned int)ei->nr_entries);
759 
760 	list_for_each_entry_srcu(ei_child, &ei->children, list,
761 				 srcu_read_lock_held(&eventfs_srcu)) {
762 
763 		if (c > 0) {
764 			c--;
765 			continue;
766 		}
767 
768 		ctx->pos++;
769 
770 		if (ei_child->is_freed)
771 			continue;
772 
773 		name = ei_child->name;
774 
775 		ino = eventfs_dir_ino(ei_child);
776 
777 		if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
778 			goto out_dec;
779 	}
780 	ret = 1;
781  out:
782 	srcu_read_unlock(&eventfs_srcu, idx);
783 
784 	return ret;
785 
786  out_dec:
787 	/* Incremented ctx->pos without adding something, reset it */
788 	ctx->pos--;
789 	goto out;
790 }
791 
792 /**
793  * eventfs_create_dir - Create the eventfs_inode for this directory
794  * @name: The name of the directory to create.
795  * @parent: The eventfs_inode of the parent directory.
796  * @entries: A list of entries that represent the files under this directory
797  * @size: The number of @entries
798  * @data: The default data to pass to the files (an entry may override it).
799  *
800  * This function creates the descriptor to represent a directory in the
801  * eventfs. This descriptor is an eventfs_inode, and it is returned to be
802  * used to create other children underneath.
803  *
804  * The @entries is an array of eventfs_entry structures which has:
805  *	const char		 *name
806  *	eventfs_callback	callback;
807  *
808  * The name is the name of the file, and the callback is a pointer to a function
809  * that will be called when the file is reference (either by lookup or by
810  * reading a directory). The callback is of the prototype:
811  *
812  *    int callback(const char *name, umode_t *mode, void **data,
813  *		   const struct file_operations **fops);
814  *
815  * When a file needs to be created, this callback will be called with
816  *   name = the name of the file being created (so that the same callback
817  *          may be used for multiple files).
818  *   mode = a place to set the file's mode
819  *   data = A pointer to @data, and the callback may replace it, which will
820  *         cause the file created to pass the new data to the open() call.
821  *   fops = the fops to use for the created file.
822  *
823  * NB. @callback is called while holding internal locks of the eventfs
824  *     system. The callback must not call any code that might also call into
825  *     the tracefs or eventfs system or it will risk creating a deadlock.
826  */
827 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
828 					 const struct eventfs_entry *entries,
829 					 int size, void *data)
830 {
831 	struct eventfs_inode *ei;
832 
833 	if (!parent)
834 		return ERR_PTR(-EINVAL);
835 
836 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
837 	if (!ei)
838 		return ERR_PTR(-ENOMEM);
839 
840 	ei->name = kstrdup_const(name, GFP_KERNEL);
841 	if (!ei->name) {
842 		kfree(ei);
843 		return ERR_PTR(-ENOMEM);
844 	}
845 
846 	if (size) {
847 		ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
848 		if (!ei->d_children) {
849 			kfree_const(ei->name);
850 			kfree(ei);
851 			return ERR_PTR(-ENOMEM);
852 		}
853 	}
854 
855 	ei->entries = entries;
856 	ei->nr_entries = size;
857 	ei->data = data;
858 	INIT_LIST_HEAD(&ei->children);
859 	INIT_LIST_HEAD(&ei->list);
860 
861 	mutex_lock(&eventfs_mutex);
862 	if (!parent->is_freed) {
863 		list_add_tail(&ei->list, &parent->children);
864 		ei->d_parent = parent->dentry;
865 	}
866 	mutex_unlock(&eventfs_mutex);
867 
868 	/* Was the parent freed? */
869 	if (list_empty(&ei->list)) {
870 		free_ei(ei);
871 		ei = NULL;
872 	}
873 	return ei;
874 }
875 
876 /**
877  * eventfs_create_events_dir - create the top level events directory
878  * @name: The name of the top level directory to create.
879  * @parent: Parent dentry for this file in the tracefs directory.
880  * @entries: A list of entries that represent the files under this directory
881  * @size: The number of @entries
882  * @data: The default data to pass to the files (an entry may override it).
883  *
884  * This function creates the top of the trace event directory.
885  *
886  * See eventfs_create_dir() for use of @entries.
887  */
888 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
889 						const struct eventfs_entry *entries,
890 						int size, void *data)
891 {
892 	struct dentry *dentry = tracefs_start_creating(name, parent);
893 	struct eventfs_inode *ei;
894 	struct tracefs_inode *ti;
895 	struct inode *inode;
896 	kuid_t uid;
897 	kgid_t gid;
898 
899 	if (security_locked_down(LOCKDOWN_TRACEFS))
900 		return NULL;
901 
902 	if (IS_ERR(dentry))
903 		return ERR_CAST(dentry);
904 
905 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
906 	if (!ei)
907 		goto fail_ei;
908 
909 	inode = tracefs_get_inode(dentry->d_sb);
910 	if (unlikely(!inode))
911 		goto fail;
912 
913 	if (size) {
914 		ei->d_children = kcalloc(size, sizeof(*ei->d_children), GFP_KERNEL);
915 		if (!ei->d_children)
916 			goto fail;
917 	}
918 
919 	ei->dentry = dentry;
920 	ei->entries = entries;
921 	ei->nr_entries = size;
922 	ei->is_events = 1;
923 	ei->data = data;
924 	ei->name = kstrdup_const(name, GFP_KERNEL);
925 	if (!ei->name)
926 		goto fail;
927 
928 	/* Save the ownership of this directory */
929 	uid = d_inode(dentry->d_parent)->i_uid;
930 	gid = d_inode(dentry->d_parent)->i_gid;
931 
932 	/*
933 	 * If the events directory is of the top instance, then parent
934 	 * is NULL. Set the attr.mode to reflect this and its permissions will
935 	 * default to the tracefs root dentry.
936 	 */
937 	if (!parent)
938 		ei->attr.mode = EVENTFS_TOPLEVEL;
939 
940 	/* This is used as the default ownership of the files and directories */
941 	ei->attr.uid = uid;
942 	ei->attr.gid = gid;
943 
944 	INIT_LIST_HEAD(&ei->children);
945 	INIT_LIST_HEAD(&ei->list);
946 
947 	ti = get_tracefs(inode);
948 	ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
949 	ti->private = ei;
950 
951 	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
952 	inode->i_uid = uid;
953 	inode->i_gid = gid;
954 	inode->i_op = &eventfs_root_dir_inode_operations;
955 	inode->i_fop = &eventfs_file_operations;
956 
957 	dentry->d_fsdata = ei;
958 
959 	/* directory inodes start off with i_nlink == 2 (for "." entry) */
960 	inc_nlink(inode);
961 	d_instantiate(dentry, inode);
962 	inc_nlink(dentry->d_parent->d_inode);
963 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
964 	tracefs_end_creating(dentry);
965 
966 	return ei;
967 
968  fail:
969 	kfree(ei->d_children);
970 	kfree(ei);
971  fail_ei:
972 	tracefs_failed_creating(dentry);
973 	return ERR_PTR(-ENOMEM);
974 }
975 
976 static LLIST_HEAD(free_list);
977 
978 static void eventfs_workfn(struct work_struct *work)
979 {
980         struct eventfs_inode *ei, *tmp;
981         struct llist_node *llnode;
982 
983 	llnode = llist_del_all(&free_list);
984         llist_for_each_entry_safe(ei, tmp, llnode, llist) {
985 		/* This dput() matches the dget() from unhook_dentry() */
986 		for (int i = 0; i < ei->nr_entries; i++) {
987 			if (ei->d_children[i])
988 				dput(ei->d_children[i]);
989 		}
990 		/* This should only get here if it had a dentry */
991 		if (!WARN_ON_ONCE(!ei->dentry))
992 			dput(ei->dentry);
993         }
994 }
995 
996 static DECLARE_WORK(eventfs_work, eventfs_workfn);
997 
998 static void free_rcu_ei(struct rcu_head *head)
999 {
1000 	struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
1001 
1002 	if (ei->dentry) {
1003 		/* Do not free the ei until all references of dentry are gone */
1004 		if (llist_add(&ei->llist, &free_list))
1005 			queue_work(system_unbound_wq, &eventfs_work);
1006 		return;
1007 	}
1008 
1009 	/* If the ei doesn't have a dentry, neither should its children */
1010 	for (int i = 0; i < ei->nr_entries; i++) {
1011 		WARN_ON_ONCE(ei->d_children[i]);
1012 	}
1013 
1014 	free_ei(ei);
1015 }
1016 
1017 static void unhook_dentry(struct dentry *dentry)
1018 {
1019 	if (!dentry)
1020 		return;
1021 	/*
1022 	 * Need to add a reference to the dentry that is expected by
1023 	 * simple_recursive_removal(), which will include a dput().
1024 	 */
1025 	dget(dentry);
1026 
1027 	/*
1028 	 * Also add a reference for the dput() in eventfs_workfn().
1029 	 * That is required as that dput() will free the ei after
1030 	 * the SRCU grace period is over.
1031 	 */
1032 	dget(dentry);
1033 }
1034 
1035 /**
1036  * eventfs_remove_rec - remove eventfs dir or file from list
1037  * @ei: eventfs_inode to be removed.
1038  * @level: prevent recursion from going more than 3 levels deep.
1039  *
1040  * This function recursively removes eventfs_inodes which
1041  * contains info of files and/or directories.
1042  */
1043 static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
1044 {
1045 	struct eventfs_inode *ei_child;
1046 
1047 	if (!ei)
1048 		return;
1049 	/*
1050 	 * Check recursion depth. It should never be greater than 3:
1051 	 * 0 - events/
1052 	 * 1 - events/group/
1053 	 * 2 - events/group/event/
1054 	 * 3 - events/group/event/file
1055 	 */
1056 	if (WARN_ON_ONCE(level > 3))
1057 		return;
1058 
1059 	/* search for nested folders or files */
1060 	list_for_each_entry_srcu(ei_child, &ei->children, list,
1061 				 lockdep_is_held(&eventfs_mutex)) {
1062 		/* Children only have dentry if parent does */
1063 		WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
1064 		eventfs_remove_rec(ei_child, level + 1);
1065 	}
1066 
1067 
1068 	ei->is_freed = 1;
1069 
1070 	for (int i = 0; i < ei->nr_entries; i++) {
1071 		if (ei->d_children[i]) {
1072 			/* Children only have dentry if parent does */
1073 			WARN_ON_ONCE(!ei->dentry);
1074 			unhook_dentry(ei->d_children[i]);
1075 		}
1076 	}
1077 
1078 	unhook_dentry(ei->dentry);
1079 
1080 	list_del_rcu(&ei->list);
1081 	call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
1082 }
1083 
1084 /**
1085  * eventfs_remove_dir - remove eventfs dir or file from list
1086  * @ei: eventfs_inode to be removed.
1087  *
1088  * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
1089  */
1090 void eventfs_remove_dir(struct eventfs_inode *ei)
1091 {
1092 	struct dentry *dentry;
1093 
1094 	if (!ei)
1095 		return;
1096 
1097 	mutex_lock(&eventfs_mutex);
1098 	dentry = ei->dentry;
1099 	eventfs_remove_rec(ei, 0);
1100 	mutex_unlock(&eventfs_mutex);
1101 
1102 	/*
1103 	 * If any of the ei children has a dentry, then the ei itself
1104 	 * must have a dentry.
1105 	 */
1106 	if (dentry)
1107 		simple_recursive_removal(dentry, NULL);
1108 }
1109 
1110 /**
1111  * eventfs_remove_events_dir - remove the top level eventfs directory
1112  * @ei: the event_inode returned by eventfs_create_events_dir().
1113  *
1114  * This function removes the events main directory
1115  */
1116 void eventfs_remove_events_dir(struct eventfs_inode *ei)
1117 {
1118 	struct dentry *dentry;
1119 
1120 	dentry = ei->dentry;
1121 	eventfs_remove_dir(ei);
1122 
1123 	/*
1124 	 * Matches the dget() done by tracefs_start_creating()
1125 	 * in eventfs_create_events_dir() when it the dentry was
1126 	 * created. In other words, it's a normal dentry that
1127 	 * sticks around while the other ei->dentry are created
1128 	 * and destroyed dynamically.
1129 	 */
1130 	dput(dentry);
1131 }
1132