xref: /linux/fs/tracefs/event_inode.c (revision cff9c565e65f3622e8dc1dcc21c1520a083dff35)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  event_inode.c - part of tracefs, a pseudo file system for activating tracing
4  *
5  *  Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org>
6  *  Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com>
7  *  Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org>
8  *
9  *  eventfs is used to dynamically create inodes and dentries based on the
10  *  meta data provided by the tracing system.
11  *
12  *  eventfs stores the meta-data of files/dirs and holds off on creating
13  *  inodes/dentries of the files. When accessed, the eventfs will create the
14  *  inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up
15  *  and delete the inodes/dentries when they are no longer referenced.
16  */
17 #include <linux/fsnotify.h>
18 #include <linux/fs.h>
19 #include <linux/namei.h>
20 #include <linux/workqueue.h>
21 #include <linux/security.h>
22 #include <linux/tracefs.h>
23 #include <linux/kref.h>
24 #include <linux/delay.h>
25 #include "internal.h"
26 
27 /*
28  * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
29  * to the ei->dentry must be done under this mutex and after checking
30  * if ei->is_freed is not set. When ei->is_freed is set, the dentry
31  * is on its way to being freed after the last dput() is made on it.
32  */
33 static DEFINE_MUTEX(eventfs_mutex);
34 
35 /*
36  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
37  * its parent's list and will have is_freed set (under eventfs_mutex).
38  * After the SRCU grace period is over and the last dput() is called
39  * the ei is freed.
40  */
41 DEFINE_STATIC_SRCU(eventfs_srcu);
42 
43 /* Mode is unsigned short, use the upper bits for flags */
44 enum {
45 	EVENTFS_SAVE_MODE	= BIT(16),
46 	EVENTFS_SAVE_UID	= BIT(17),
47 	EVENTFS_SAVE_GID	= BIT(18),
48 };
49 
50 #define EVENTFS_MODE_MASK	(EVENTFS_SAVE_MODE - 1)
51 
52 static struct dentry *eventfs_root_lookup(struct inode *dir,
53 					  struct dentry *dentry,
54 					  unsigned int flags);
55 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file);
56 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
57 static int eventfs_release(struct inode *inode, struct file *file);
58 
59 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
60 {
61 	unsigned int ia_valid = iattr->ia_valid;
62 
63 	if (ia_valid & ATTR_MODE) {
64 		attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) |
65 			(iattr->ia_mode & EVENTFS_MODE_MASK) |
66 			EVENTFS_SAVE_MODE;
67 	}
68 	if (ia_valid & ATTR_UID) {
69 		attr->mode |= EVENTFS_SAVE_UID;
70 		attr->uid = iattr->ia_uid;
71 	}
72 	if (ia_valid & ATTR_GID) {
73 		attr->mode |= EVENTFS_SAVE_GID;
74 		attr->gid = iattr->ia_gid;
75 	}
76 }
77 
78 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
79 			    struct iattr *iattr)
80 {
81 	const struct eventfs_entry *entry;
82 	struct eventfs_inode *ei;
83 	const char *name;
84 	int ret;
85 
86 	mutex_lock(&eventfs_mutex);
87 	ei = dentry->d_fsdata;
88 	if (ei->is_freed) {
89 		/* Do not allow changes if the event is about to be removed. */
90 		mutex_unlock(&eventfs_mutex);
91 		return -ENODEV;
92 	}
93 
94 	/* Preallocate the children mode array if necessary */
95 	if (!(dentry->d_inode->i_mode & S_IFDIR)) {
96 		if (!ei->entry_attrs) {
97 			ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
98 						  GFP_NOFS);
99 			if (!ei->entry_attrs) {
100 				ret = -ENOMEM;
101 				goto out;
102 			}
103 		}
104 	}
105 
106 	ret = simple_setattr(idmap, dentry, iattr);
107 	if (ret < 0)
108 		goto out;
109 
110 	/*
111 	 * If this is a dir, then update the ei cache, only the file
112 	 * mode is saved in the ei->m_children, and the ownership is
113 	 * determined by the parent directory.
114 	 */
115 	if (dentry->d_inode->i_mode & S_IFDIR) {
116 		update_attr(&ei->attr, iattr);
117 
118 	} else {
119 		name = dentry->d_name.name;
120 
121 		for (int i = 0; i < ei->nr_entries; i++) {
122 			entry = &ei->entries[i];
123 			if (strcmp(name, entry->name) == 0) {
124 				update_attr(&ei->entry_attrs[i], iattr);
125 				break;
126 			}
127 		}
128 	}
129  out:
130 	mutex_unlock(&eventfs_mutex);
131 	return ret;
132 }
133 
134 static const struct inode_operations eventfs_root_dir_inode_operations = {
135 	.lookup		= eventfs_root_lookup,
136 	.setattr	= eventfs_set_attr,
137 };
138 
139 static const struct inode_operations eventfs_file_inode_operations = {
140 	.setattr	= eventfs_set_attr,
141 };
142 
143 static const struct file_operations eventfs_file_operations = {
144 	.open		= dcache_dir_open_wrapper,
145 	.read		= generic_read_dir,
146 	.iterate_shared	= dcache_readdir_wrapper,
147 	.llseek		= generic_file_llseek,
148 	.release	= eventfs_release,
149 };
150 
151 static void update_inode_attr(struct inode *inode, struct eventfs_attr *attr, umode_t mode)
152 {
153 	if (!attr) {
154 		inode->i_mode = mode;
155 		return;
156 	}
157 
158 	if (attr->mode & EVENTFS_SAVE_MODE)
159 		inode->i_mode = attr->mode & EVENTFS_MODE_MASK;
160 	else
161 		inode->i_mode = mode;
162 
163 	if (attr->mode & EVENTFS_SAVE_UID)
164 		inode->i_uid = attr->uid;
165 
166 	if (attr->mode & EVENTFS_SAVE_GID)
167 		inode->i_gid = attr->gid;
168 }
169 
170 /**
171  * create_file - create a file in the tracefs filesystem
172  * @name: the name of the file to create.
173  * @mode: the permission that the file should have.
174  * @attr: saved attributes changed by user
175  * @parent: parent dentry for this file.
176  * @data: something that the caller will want to get to later on.
177  * @fop: struct file_operations that should be used for this file.
178  *
179  * This function creates a dentry that represents a file in the eventsfs_inode
180  * directory. The inode.i_private pointer will point to @data in the open()
181  * call.
182  */
183 static struct dentry *create_file(const char *name, umode_t mode,
184 				  struct eventfs_attr *attr,
185 				  struct dentry *parent, void *data,
186 				  const struct file_operations *fop)
187 {
188 	struct tracefs_inode *ti;
189 	struct dentry *dentry;
190 	struct inode *inode;
191 
192 	if (!(mode & S_IFMT))
193 		mode |= S_IFREG;
194 
195 	if (WARN_ON_ONCE(!S_ISREG(mode)))
196 		return NULL;
197 
198 	WARN_ON_ONCE(!parent);
199 	dentry = eventfs_start_creating(name, parent);
200 
201 	if (IS_ERR(dentry))
202 		return dentry;
203 
204 	inode = tracefs_get_inode(dentry->d_sb);
205 	if (unlikely(!inode))
206 		return eventfs_failed_creating(dentry);
207 
208 	/* If the user updated the directory's attributes, use them */
209 	update_inode_attr(inode, attr, mode);
210 
211 	inode->i_op = &eventfs_file_inode_operations;
212 	inode->i_fop = fop;
213 	inode->i_private = data;
214 
215 	ti = get_tracefs(inode);
216 	ti->flags |= TRACEFS_EVENT_INODE;
217 	d_instantiate(dentry, inode);
218 	fsnotify_create(dentry->d_parent->d_inode, dentry);
219 	return eventfs_end_creating(dentry);
220 };
221 
222 /**
223  * create_dir - create a dir in the tracefs filesystem
224  * @ei: the eventfs_inode that represents the directory to create
225  * @parent: parent dentry for this file.
226  *
227  * This function will create a dentry for a directory represented by
228  * a eventfs_inode.
229  */
230 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
231 {
232 	struct tracefs_inode *ti;
233 	struct dentry *dentry;
234 	struct inode *inode;
235 
236 	dentry = eventfs_start_creating(ei->name, parent);
237 	if (IS_ERR(dentry))
238 		return dentry;
239 
240 	inode = tracefs_get_inode(dentry->d_sb);
241 	if (unlikely(!inode))
242 		return eventfs_failed_creating(dentry);
243 
244 	/* If the user updated the directory's attributes, use them */
245 	update_inode_attr(inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
246 
247 	inode->i_op = &eventfs_root_dir_inode_operations;
248 	inode->i_fop = &eventfs_file_operations;
249 
250 	ti = get_tracefs(inode);
251 	ti->flags |= TRACEFS_EVENT_INODE;
252 
253 	inc_nlink(inode);
254 	d_instantiate(dentry, inode);
255 	inc_nlink(dentry->d_parent->d_inode);
256 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
257 	return eventfs_end_creating(dentry);
258 }
259 
260 static void free_ei(struct eventfs_inode *ei)
261 {
262 	kfree_const(ei->name);
263 	kfree(ei->d_children);
264 	kfree(ei->entry_attrs);
265 	kfree(ei);
266 }
267 
268 /**
269  * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
270  * @ti: the tracefs_inode of the dentry
271  * @dentry: dentry which has the reference to remove.
272  *
273  * Remove the association between a dentry from an eventfs_inode.
274  */
275 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
276 {
277 	struct eventfs_inode *ei;
278 	int i;
279 
280 	mutex_lock(&eventfs_mutex);
281 
282 	ei = dentry->d_fsdata;
283 	if (!ei)
284 		goto out;
285 
286 	/* This could belong to one of the files of the ei */
287 	if (ei->dentry != dentry) {
288 		for (i = 0; i < ei->nr_entries; i++) {
289 			if (ei->d_children[i] == dentry)
290 				break;
291 		}
292 		if (WARN_ON_ONCE(i == ei->nr_entries))
293 			goto out;
294 		ei->d_children[i] = NULL;
295 	} else if (ei->is_freed) {
296 		free_ei(ei);
297 	} else {
298 		ei->dentry = NULL;
299 	}
300 
301 	dentry->d_fsdata = NULL;
302  out:
303 	mutex_unlock(&eventfs_mutex);
304 }
305 
306 /**
307  * create_file_dentry - create a dentry for a file of an eventfs_inode
308  * @ei: the eventfs_inode that the file will be created under
309  * @idx: the index into the d_children[] of the @ei
310  * @parent: The parent dentry of the created file.
311  * @name: The name of the file to create
312  * @mode: The mode of the file.
313  * @data: The data to use to set the inode of the file with on open()
314  * @fops: The fops of the file to be created.
315  * @lookup: If called by the lookup routine, in which case, dput() the created dentry.
316  *
317  * Create a dentry for a file of an eventfs_inode @ei and place it into the
318  * address located at @e_dentry. If the @e_dentry already has a dentry, then
319  * just do a dget() on it and return. Otherwise create the dentry and attach it.
320  */
321 static struct dentry *
322 create_file_dentry(struct eventfs_inode *ei, int idx,
323 		   struct dentry *parent, const char *name, umode_t mode, void *data,
324 		   const struct file_operations *fops, bool lookup)
325 {
326 	struct eventfs_attr *attr = NULL;
327 	struct dentry **e_dentry = &ei->d_children[idx];
328 	struct dentry *dentry;
329 
330 	WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
331 
332 	mutex_lock(&eventfs_mutex);
333 	if (ei->is_freed) {
334 		mutex_unlock(&eventfs_mutex);
335 		return NULL;
336 	}
337 	/* If the e_dentry already has a dentry, use it */
338 	if (*e_dentry) {
339 		/* lookup does not need to up the ref count */
340 		if (!lookup)
341 			dget(*e_dentry);
342 		mutex_unlock(&eventfs_mutex);
343 		return *e_dentry;
344 	}
345 
346 	/* ei->entry_attrs are protected by SRCU */
347 	if (ei->entry_attrs)
348 		attr = &ei->entry_attrs[idx];
349 
350 	mutex_unlock(&eventfs_mutex);
351 
352 	dentry = create_file(name, mode, attr, parent, data, fops);
353 
354 	mutex_lock(&eventfs_mutex);
355 
356 	if (IS_ERR_OR_NULL(dentry)) {
357 		/*
358 		 * When the mutex was released, something else could have
359 		 * created the dentry for this e_dentry. In which case
360 		 * use that one.
361 		 *
362 		 * If ei->is_freed is set, the e_dentry is currently on its
363 		 * way to being freed, don't return it. If e_dentry is NULL
364 		 * it means it was already freed.
365 		 */
366 		if (ei->is_freed)
367 			dentry = NULL;
368 		else
369 			dentry = *e_dentry;
370 		/* The lookup does not need to up the dentry refcount */
371 		if (dentry && !lookup)
372 			dget(dentry);
373 		mutex_unlock(&eventfs_mutex);
374 		return dentry;
375 	}
376 
377 	if (!*e_dentry && !ei->is_freed) {
378 		*e_dentry = dentry;
379 		dentry->d_fsdata = ei;
380 	} else {
381 		/*
382 		 * Should never happen unless we get here due to being freed.
383 		 * Otherwise it means two dentries exist with the same name.
384 		 */
385 		WARN_ON_ONCE(!ei->is_freed);
386 		dentry = NULL;
387 	}
388 	mutex_unlock(&eventfs_mutex);
389 
390 	if (lookup)
391 		dput(dentry);
392 
393 	return dentry;
394 }
395 
396 /**
397  * eventfs_post_create_dir - post create dir routine
398  * @ei: eventfs_inode of recently created dir
399  *
400  * Map the meta-data of files within an eventfs dir to their parent dentry
401  */
402 static void eventfs_post_create_dir(struct eventfs_inode *ei)
403 {
404 	struct eventfs_inode *ei_child;
405 	struct tracefs_inode *ti;
406 
407 	lockdep_assert_held(&eventfs_mutex);
408 
409 	/* srcu lock already held */
410 	/* fill parent-child relation */
411 	list_for_each_entry_srcu(ei_child, &ei->children, list,
412 				 srcu_read_lock_held(&eventfs_srcu)) {
413 		ei_child->d_parent = ei->dentry;
414 	}
415 
416 	ti = get_tracefs(ei->dentry->d_inode);
417 	ti->private = ei;
418 }
419 
420 /**
421  * create_dir_dentry - Create a directory dentry for the eventfs_inode
422  * @pei: The eventfs_inode parent of ei.
423  * @ei: The eventfs_inode to create the directory for
424  * @parent: The dentry of the parent of this directory
425  * @lookup: True if this is called by the lookup code
426  *
427  * This creates and attaches a directory dentry to the eventfs_inode @ei.
428  */
429 static struct dentry *
430 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
431 		  struct dentry *parent, bool lookup)
432 {
433 	struct dentry *dentry = NULL;
434 
435 	WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
436 
437 	mutex_lock(&eventfs_mutex);
438 	if (pei->is_freed || ei->is_freed) {
439 		mutex_unlock(&eventfs_mutex);
440 		return NULL;
441 	}
442 	if (ei->dentry) {
443 		/* If the dentry already has a dentry, use it */
444 		dentry = ei->dentry;
445 		/* lookup does not need to up the ref count */
446 		if (!lookup)
447 			dget(dentry);
448 		mutex_unlock(&eventfs_mutex);
449 		return dentry;
450 	}
451 	mutex_unlock(&eventfs_mutex);
452 
453 	dentry = create_dir(ei, parent);
454 
455 	mutex_lock(&eventfs_mutex);
456 
457 	if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
458 		/*
459 		 * When the mutex was released, something else could have
460 		 * created the dentry for this e_dentry. In which case
461 		 * use that one.
462 		 *
463 		 * If ei->is_freed is set, the e_dentry is currently on its
464 		 * way to being freed.
465 		 */
466 		dentry = ei->dentry;
467 		if (dentry && !lookup)
468 			dget(dentry);
469 		mutex_unlock(&eventfs_mutex);
470 		return dentry;
471 	}
472 
473 	if (!ei->dentry && !ei->is_freed) {
474 		ei->dentry = dentry;
475 		eventfs_post_create_dir(ei);
476 		dentry->d_fsdata = ei;
477 	} else {
478 		/*
479 		 * Should never happen unless we get here due to being freed.
480 		 * Otherwise it means two dentries exist with the same name.
481 		 */
482 		WARN_ON_ONCE(!ei->is_freed);
483 		dentry = NULL;
484 	}
485 	mutex_unlock(&eventfs_mutex);
486 
487 	if (lookup)
488 		dput(dentry);
489 
490 	return dentry;
491 }
492 
493 /**
494  * eventfs_root_lookup - lookup routine to create file/dir
495  * @dir: in which a lookup is being done
496  * @dentry: file/dir dentry
497  * @flags: Just passed to simple_lookup()
498  *
499  * Used to create dynamic file/dir with-in @dir, search with-in @ei
500  * list, if @dentry found go ahead and create the file/dir
501  */
502 
503 static struct dentry *eventfs_root_lookup(struct inode *dir,
504 					  struct dentry *dentry,
505 					  unsigned int flags)
506 {
507 	const struct file_operations *fops;
508 	const struct eventfs_entry *entry;
509 	struct eventfs_inode *ei_child;
510 	struct tracefs_inode *ti;
511 	struct eventfs_inode *ei;
512 	struct dentry *ei_dentry = NULL;
513 	struct dentry *ret = NULL;
514 	const char *name = dentry->d_name.name;
515 	bool created = false;
516 	umode_t mode;
517 	void *data;
518 	int idx;
519 	int i;
520 	int r;
521 
522 	ti = get_tracefs(dir);
523 	if (!(ti->flags & TRACEFS_EVENT_INODE))
524 		return NULL;
525 
526 	/* Grab srcu to prevent the ei from going away */
527 	idx = srcu_read_lock(&eventfs_srcu);
528 
529 	/*
530 	 * Grab the eventfs_mutex to consistent value from ti->private.
531 	 * This s
532 	 */
533 	mutex_lock(&eventfs_mutex);
534 	ei = READ_ONCE(ti->private);
535 	if (ei && !ei->is_freed)
536 		ei_dentry = READ_ONCE(ei->dentry);
537 	mutex_unlock(&eventfs_mutex);
538 
539 	if (!ei || !ei_dentry)
540 		goto out;
541 
542 	data = ei->data;
543 
544 	list_for_each_entry_srcu(ei_child, &ei->children, list,
545 				 srcu_read_lock_held(&eventfs_srcu)) {
546 		if (strcmp(ei_child->name, name) != 0)
547 			continue;
548 		ret = simple_lookup(dir, dentry, flags);
549 		if (IS_ERR(ret))
550 			goto out;
551 		create_dir_dentry(ei, ei_child, ei_dentry, true);
552 		created = true;
553 		break;
554 	}
555 
556 	if (created)
557 		goto out;
558 
559 	for (i = 0; i < ei->nr_entries; i++) {
560 		entry = &ei->entries[i];
561 		if (strcmp(name, entry->name) == 0) {
562 			void *cdata = data;
563 			mutex_lock(&eventfs_mutex);
564 			/* If ei->is_freed, then the event itself may be too */
565 			if (!ei->is_freed)
566 				r = entry->callback(name, &mode, &cdata, &fops);
567 			else
568 				r = -1;
569 			mutex_unlock(&eventfs_mutex);
570 			if (r <= 0)
571 				continue;
572 			ret = simple_lookup(dir, dentry, flags);
573 			if (IS_ERR(ret))
574 				goto out;
575 			create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
576 					   fops, true);
577 			break;
578 		}
579 	}
580  out:
581 	srcu_read_unlock(&eventfs_srcu, idx);
582 	return ret;
583 }
584 
585 struct dentry_list {
586 	void			*cursor;
587 	struct dentry		**dentries;
588 };
589 
590 /**
591  * eventfs_release - called to release eventfs file/dir
592  * @inode: inode to be released
593  * @file: file to be released (not used)
594  */
595 static int eventfs_release(struct inode *inode, struct file *file)
596 {
597 	struct tracefs_inode *ti;
598 	struct dentry_list *dlist = file->private_data;
599 	void *cursor;
600 	int i;
601 
602 	ti = get_tracefs(inode);
603 	if (!(ti->flags & TRACEFS_EVENT_INODE))
604 		return -EINVAL;
605 
606 	if (WARN_ON_ONCE(!dlist))
607 		return -EINVAL;
608 
609 	for (i = 0; dlist->dentries && dlist->dentries[i]; i++) {
610 		dput(dlist->dentries[i]);
611 	}
612 
613 	cursor = dlist->cursor;
614 	kfree(dlist->dentries);
615 	kfree(dlist);
616 	file->private_data = cursor;
617 	return dcache_dir_close(inode, file);
618 }
619 
620 static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
621 {
622 	struct dentry **tmp;
623 
624 	tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS);
625 	if (!tmp)
626 		return -1;
627 	tmp[cnt] = d;
628 	tmp[cnt + 1] = NULL;
629 	*dentries = tmp;
630 	return 0;
631 }
632 
633 /**
634  * dcache_dir_open_wrapper - eventfs open wrapper
635  * @inode: not used
636  * @file: dir to be opened (to create it's children)
637  *
638  * Used to dynamic create file/dir with-in @file, all the
639  * file/dir will be created. If already created then references
640  * will be increased
641  */
642 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
643 {
644 	const struct file_operations *fops;
645 	const struct eventfs_entry *entry;
646 	struct eventfs_inode *ei_child;
647 	struct tracefs_inode *ti;
648 	struct eventfs_inode *ei;
649 	struct dentry_list *dlist;
650 	struct dentry **dentries = NULL;
651 	struct dentry *parent = file_dentry(file);
652 	struct dentry *d;
653 	struct inode *f_inode = file_inode(file);
654 	const char *name = parent->d_name.name;
655 	umode_t mode;
656 	void *data;
657 	int cnt = 0;
658 	int idx;
659 	int ret;
660 	int i;
661 	int r;
662 
663 	ti = get_tracefs(f_inode);
664 	if (!(ti->flags & TRACEFS_EVENT_INODE))
665 		return -EINVAL;
666 
667 	if (WARN_ON_ONCE(file->private_data))
668 		return -EINVAL;
669 
670 	idx = srcu_read_lock(&eventfs_srcu);
671 
672 	mutex_lock(&eventfs_mutex);
673 	ei = READ_ONCE(ti->private);
674 	mutex_unlock(&eventfs_mutex);
675 
676 	if (!ei) {
677 		srcu_read_unlock(&eventfs_srcu, idx);
678 		return -EINVAL;
679 	}
680 
681 
682 	data = ei->data;
683 
684 	dlist = kmalloc(sizeof(*dlist), GFP_KERNEL);
685 	if (!dlist) {
686 		srcu_read_unlock(&eventfs_srcu, idx);
687 		return -ENOMEM;
688 	}
689 
690 	inode_lock(parent->d_inode);
691 	list_for_each_entry_srcu(ei_child, &ei->children, list,
692 				 srcu_read_lock_held(&eventfs_srcu)) {
693 		d = create_dir_dentry(ei, ei_child, parent, false);
694 		if (d) {
695 			ret = add_dentries(&dentries, d, cnt);
696 			if (ret < 0)
697 				break;
698 			cnt++;
699 		}
700 	}
701 
702 	for (i = 0; i < ei->nr_entries; i++) {
703 		void *cdata = data;
704 		entry = &ei->entries[i];
705 		name = entry->name;
706 		mutex_lock(&eventfs_mutex);
707 		/* If ei->is_freed, then the event itself may be too */
708 		if (!ei->is_freed)
709 			r = entry->callback(name, &mode, &cdata, &fops);
710 		else
711 			r = -1;
712 		mutex_unlock(&eventfs_mutex);
713 		if (r <= 0)
714 			continue;
715 		d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false);
716 		if (d) {
717 			ret = add_dentries(&dentries, d, cnt);
718 			if (ret < 0)
719 				break;
720 			cnt++;
721 		}
722 	}
723 	inode_unlock(parent->d_inode);
724 	srcu_read_unlock(&eventfs_srcu, idx);
725 	ret = dcache_dir_open(inode, file);
726 
727 	/*
728 	 * dcache_dir_open() sets file->private_data to a dentry cursor.
729 	 * Need to save that but also save all the dentries that were
730 	 * opened by this function.
731 	 */
732 	dlist->cursor = file->private_data;
733 	dlist->dentries = dentries;
734 	file->private_data = dlist;
735 	return ret;
736 }
737 
738 /*
739  * This just sets the file->private_data back to the cursor and back.
740  */
741 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx)
742 {
743 	struct dentry_list *dlist = file->private_data;
744 	int ret;
745 
746 	file->private_data = dlist->cursor;
747 	ret = dcache_readdir(file, ctx);
748 	dlist->cursor = file->private_data;
749 	file->private_data = dlist;
750 	return ret;
751 }
752 
753 /**
754  * eventfs_create_dir - Create the eventfs_inode for this directory
755  * @name: The name of the directory to create.
756  * @parent: The eventfs_inode of the parent directory.
757  * @entries: A list of entries that represent the files under this directory
758  * @size: The number of @entries
759  * @data: The default data to pass to the files (an entry may override it).
760  *
761  * This function creates the descriptor to represent a directory in the
762  * eventfs. This descriptor is an eventfs_inode, and it is returned to be
763  * used to create other children underneath.
764  *
765  * The @entries is an array of eventfs_entry structures which has:
766  *	const char		 *name
767  *	eventfs_callback	callback;
768  *
769  * The name is the name of the file, and the callback is a pointer to a function
770  * that will be called when the file is reference (either by lookup or by
771  * reading a directory). The callback is of the prototype:
772  *
773  *    int callback(const char *name, umode_t *mode, void **data,
774  *		   const struct file_operations **fops);
775  *
776  * When a file needs to be created, this callback will be called with
777  *   name = the name of the file being created (so that the same callback
778  *          may be used for multiple files).
779  *   mode = a place to set the file's mode
780  *   data = A pointer to @data, and the callback may replace it, which will
781  *         cause the file created to pass the new data to the open() call.
782  *   fops = the fops to use for the created file.
783  *
784  * NB. @callback is called while holding internal locks of the eventfs
785  *     system. The callback must not call any code that might also call into
786  *     the tracefs or eventfs system or it will risk creating a deadlock.
787  */
788 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
789 					 const struct eventfs_entry *entries,
790 					 int size, void *data)
791 {
792 	struct eventfs_inode *ei;
793 
794 	if (!parent)
795 		return ERR_PTR(-EINVAL);
796 
797 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
798 	if (!ei)
799 		return ERR_PTR(-ENOMEM);
800 
801 	ei->name = kstrdup_const(name, GFP_KERNEL);
802 	if (!ei->name) {
803 		kfree(ei);
804 		return ERR_PTR(-ENOMEM);
805 	}
806 
807 	if (size) {
808 		ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
809 		if (!ei->d_children) {
810 			kfree_const(ei->name);
811 			kfree(ei);
812 			return ERR_PTR(-ENOMEM);
813 		}
814 	}
815 
816 	ei->entries = entries;
817 	ei->nr_entries = size;
818 	ei->data = data;
819 	INIT_LIST_HEAD(&ei->children);
820 	INIT_LIST_HEAD(&ei->list);
821 
822 	mutex_lock(&eventfs_mutex);
823 	if (!parent->is_freed) {
824 		list_add_tail(&ei->list, &parent->children);
825 		ei->d_parent = parent->dentry;
826 	}
827 	mutex_unlock(&eventfs_mutex);
828 
829 	/* Was the parent freed? */
830 	if (list_empty(&ei->list)) {
831 		free_ei(ei);
832 		ei = NULL;
833 	}
834 	return ei;
835 }
836 
837 /**
838  * eventfs_create_events_dir - create the top level events directory
839  * @name: The name of the top level directory to create.
840  * @parent: Parent dentry for this file in the tracefs directory.
841  * @entries: A list of entries that represent the files under this directory
842  * @size: The number of @entries
843  * @data: The default data to pass to the files (an entry may override it).
844  *
845  * This function creates the top of the trace event directory.
846  *
847  * See eventfs_create_dir() for use of @entries.
848  */
849 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
850 						const struct eventfs_entry *entries,
851 						int size, void *data)
852 {
853 	struct dentry *dentry = tracefs_start_creating(name, parent);
854 	struct eventfs_inode *ei;
855 	struct tracefs_inode *ti;
856 	struct inode *inode;
857 
858 	if (security_locked_down(LOCKDOWN_TRACEFS))
859 		return NULL;
860 
861 	if (IS_ERR(dentry))
862 		return ERR_CAST(dentry);
863 
864 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
865 	if (!ei)
866 		goto fail_ei;
867 
868 	inode = tracefs_get_inode(dentry->d_sb);
869 	if (unlikely(!inode))
870 		goto fail;
871 
872 	if (size) {
873 		ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
874 		if (!ei->d_children)
875 			goto fail;
876 	}
877 
878 	ei->dentry = dentry;
879 	ei->entries = entries;
880 	ei->nr_entries = size;
881 	ei->data = data;
882 	ei->name = kstrdup_const(name, GFP_KERNEL);
883 	if (!ei->name)
884 		goto fail;
885 
886 	INIT_LIST_HEAD(&ei->children);
887 	INIT_LIST_HEAD(&ei->list);
888 
889 	ti = get_tracefs(inode);
890 	ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
891 	ti->private = ei;
892 
893 	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
894 	inode->i_op = &eventfs_root_dir_inode_operations;
895 	inode->i_fop = &eventfs_file_operations;
896 
897 	dentry->d_fsdata = ei;
898 
899 	/* directory inodes start off with i_nlink == 2 (for "." entry) */
900 	inc_nlink(inode);
901 	d_instantiate(dentry, inode);
902 	inc_nlink(dentry->d_parent->d_inode);
903 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
904 	tracefs_end_creating(dentry);
905 
906 	return ei;
907 
908  fail:
909 	kfree(ei->d_children);
910 	kfree(ei);
911  fail_ei:
912 	tracefs_failed_creating(dentry);
913 	return ERR_PTR(-ENOMEM);
914 }
915 
916 static LLIST_HEAD(free_list);
917 
918 static void eventfs_workfn(struct work_struct *work)
919 {
920         struct eventfs_inode *ei, *tmp;
921         struct llist_node *llnode;
922 
923 	llnode = llist_del_all(&free_list);
924         llist_for_each_entry_safe(ei, tmp, llnode, llist) {
925 		/* This dput() matches the dget() from unhook_dentry() */
926 		for (int i = 0; i < ei->nr_entries; i++) {
927 			if (ei->d_children[i])
928 				dput(ei->d_children[i]);
929 		}
930 		/* This should only get here if it had a dentry */
931 		if (!WARN_ON_ONCE(!ei->dentry))
932 			dput(ei->dentry);
933         }
934 }
935 
936 static DECLARE_WORK(eventfs_work, eventfs_workfn);
937 
938 static void free_rcu_ei(struct rcu_head *head)
939 {
940 	struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
941 
942 	if (ei->dentry) {
943 		/* Do not free the ei until all references of dentry are gone */
944 		if (llist_add(&ei->llist, &free_list))
945 			queue_work(system_unbound_wq, &eventfs_work);
946 		return;
947 	}
948 
949 	/* If the ei doesn't have a dentry, neither should its children */
950 	for (int i = 0; i < ei->nr_entries; i++) {
951 		WARN_ON_ONCE(ei->d_children[i]);
952 	}
953 
954 	free_ei(ei);
955 }
956 
957 static void unhook_dentry(struct dentry *dentry)
958 {
959 	if (!dentry)
960 		return;
961 	/*
962 	 * Need to add a reference to the dentry that is expected by
963 	 * simple_recursive_removal(), which will include a dput().
964 	 */
965 	dget(dentry);
966 
967 	/*
968 	 * Also add a reference for the dput() in eventfs_workfn().
969 	 * That is required as that dput() will free the ei after
970 	 * the SRCU grace period is over.
971 	 */
972 	dget(dentry);
973 }
974 
975 /**
976  * eventfs_remove_rec - remove eventfs dir or file from list
977  * @ei: eventfs_inode to be removed.
978  * @level: prevent recursion from going more than 3 levels deep.
979  *
980  * This function recursively removes eventfs_inodes which
981  * contains info of files and/or directories.
982  */
983 static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
984 {
985 	struct eventfs_inode *ei_child;
986 
987 	if (!ei)
988 		return;
989 	/*
990 	 * Check recursion depth. It should never be greater than 3:
991 	 * 0 - events/
992 	 * 1 - events/group/
993 	 * 2 - events/group/event/
994 	 * 3 - events/group/event/file
995 	 */
996 	if (WARN_ON_ONCE(level > 3))
997 		return;
998 
999 	/* search for nested folders or files */
1000 	list_for_each_entry_srcu(ei_child, &ei->children, list,
1001 				 lockdep_is_held(&eventfs_mutex)) {
1002 		/* Children only have dentry if parent does */
1003 		WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
1004 		eventfs_remove_rec(ei_child, level + 1);
1005 	}
1006 
1007 
1008 	ei->is_freed = 1;
1009 
1010 	for (int i = 0; i < ei->nr_entries; i++) {
1011 		if (ei->d_children[i]) {
1012 			/* Children only have dentry if parent does */
1013 			WARN_ON_ONCE(!ei->dentry);
1014 			unhook_dentry(ei->d_children[i]);
1015 		}
1016 	}
1017 
1018 	unhook_dentry(ei->dentry);
1019 
1020 	list_del_rcu(&ei->list);
1021 	call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
1022 }
1023 
1024 /**
1025  * eventfs_remove_dir - remove eventfs dir or file from list
1026  * @ei: eventfs_inode to be removed.
1027  *
1028  * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
1029  */
1030 void eventfs_remove_dir(struct eventfs_inode *ei)
1031 {
1032 	struct dentry *dentry;
1033 
1034 	if (!ei)
1035 		return;
1036 
1037 	mutex_lock(&eventfs_mutex);
1038 	dentry = ei->dentry;
1039 	eventfs_remove_rec(ei, 0);
1040 	mutex_unlock(&eventfs_mutex);
1041 
1042 	/*
1043 	 * If any of the ei children has a dentry, then the ei itself
1044 	 * must have a dentry.
1045 	 */
1046 	if (dentry)
1047 		simple_recursive_removal(dentry, NULL);
1048 }
1049 
1050 /**
1051  * eventfs_remove_events_dir - remove the top level eventfs directory
1052  * @ei: the event_inode returned by eventfs_create_events_dir().
1053  *
1054  * This function removes the events main directory
1055  */
1056 void eventfs_remove_events_dir(struct eventfs_inode *ei)
1057 {
1058 	struct dentry *dentry;
1059 
1060 	dentry = ei->dentry;
1061 	eventfs_remove_dir(ei);
1062 
1063 	/*
1064 	 * Matches the dget() done by tracefs_start_creating()
1065 	 * in eventfs_create_events_dir() when it the dentry was
1066 	 * created. In other words, it's a normal dentry that
1067 	 * sticks around while the other ei->dentry are created
1068 	 * and destroyed dynamically.
1069 	 */
1070 	dput(dentry);
1071 }
1072