xref: /linux/fs/tracefs/event_inode.c (revision 3d0fe49454652117522f60bfbefb978ba0e5300b)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  event_inode.c - part of tracefs, a pseudo file system for activating tracing
4  *
5  *  Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org>
6  *  Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com>
7  *  Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org>
8  *
9  *  eventfs is used to dynamically create inodes and dentries based on the
10  *  meta data provided by the tracing system.
11  *
12  *  eventfs stores the meta-data of files/dirs and holds off on creating
13  *  inodes/dentries of the files. When accessed, the eventfs will create the
14  *  inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up
15  *  and delete the inodes/dentries when they are no longer referenced.
16  */
17 #include <linux/fsnotify.h>
18 #include <linux/fs.h>
19 #include <linux/namei.h>
20 #include <linux/workqueue.h>
21 #include <linux/security.h>
22 #include <linux/tracefs.h>
23 #include <linux/kref.h>
24 #include <linux/delay.h>
25 #include "internal.h"
26 
27 /*
28  * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
29  * to the ei->dentry must be done under this mutex and after checking
30  * if ei->is_freed is not set. The ei->dentry is released under the
31  * mutex at the same time ei->is_freed is set. If ei->is_freed is set
32  * then the ei->dentry is invalid.
33  */
34 static DEFINE_MUTEX(eventfs_mutex);
35 
36 /*
37  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
38  * its parent's list and will have is_freed set (under eventfs_mutex).
39  * After the SRCU grace period is over, the ei may be freed.
40  */
41 DEFINE_STATIC_SRCU(eventfs_srcu);
42 
43 /* Mode is unsigned short, use the upper bits for flags */
44 enum {
45 	EVENTFS_SAVE_MODE	= BIT(16),
46 	EVENTFS_SAVE_UID	= BIT(17),
47 	EVENTFS_SAVE_GID	= BIT(18),
48 };
49 
50 #define EVENTFS_MODE_MASK	(EVENTFS_SAVE_MODE - 1)
51 
52 static struct dentry *eventfs_root_lookup(struct inode *dir,
53 					  struct dentry *dentry,
54 					  unsigned int flags);
55 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file);
56 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
57 static int eventfs_release(struct inode *inode, struct file *file);
58 
59 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
60 {
61 	unsigned int ia_valid = iattr->ia_valid;
62 
63 	if (ia_valid & ATTR_MODE) {
64 		attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) |
65 			(iattr->ia_mode & EVENTFS_MODE_MASK) |
66 			EVENTFS_SAVE_MODE;
67 	}
68 	if (ia_valid & ATTR_UID) {
69 		attr->mode |= EVENTFS_SAVE_UID;
70 		attr->uid = iattr->ia_uid;
71 	}
72 	if (ia_valid & ATTR_GID) {
73 		attr->mode |= EVENTFS_SAVE_GID;
74 		attr->gid = iattr->ia_gid;
75 	}
76 }
77 
78 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
79 			    struct iattr *iattr)
80 {
81 	const struct eventfs_entry *entry;
82 	struct eventfs_inode *ei;
83 	const char *name;
84 	int ret;
85 
86 	mutex_lock(&eventfs_mutex);
87 	ei = dentry->d_fsdata;
88 	if (ei->is_freed) {
89 		/* Do not allow changes if the event is about to be removed. */
90 		mutex_unlock(&eventfs_mutex);
91 		return -ENODEV;
92 	}
93 
94 	/* Preallocate the children mode array if necessary */
95 	if (!(dentry->d_inode->i_mode & S_IFDIR)) {
96 		if (!ei->entry_attrs) {
97 			ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
98 						  GFP_KERNEL);
99 			if (!ei->entry_attrs) {
100 				ret = -ENOMEM;
101 				goto out;
102 			}
103 		}
104 	}
105 
106 	ret = simple_setattr(idmap, dentry, iattr);
107 	if (ret < 0)
108 		goto out;
109 
110 	/*
111 	 * If this is a dir, then update the ei cache, only the file
112 	 * mode is saved in the ei->m_children, and the ownership is
113 	 * determined by the parent directory.
114 	 */
115 	if (dentry->d_inode->i_mode & S_IFDIR) {
116 		update_attr(&ei->attr, iattr);
117 
118 	} else {
119 		name = dentry->d_name.name;
120 
121 		for (int i = 0; i < ei->nr_entries; i++) {
122 			entry = &ei->entries[i];
123 			if (strcmp(name, entry->name) == 0) {
124 				update_attr(&ei->entry_attrs[i], iattr);
125 				break;
126 			}
127 		}
128 	}
129  out:
130 	mutex_unlock(&eventfs_mutex);
131 	return ret;
132 }
133 
134 static const struct inode_operations eventfs_root_dir_inode_operations = {
135 	.lookup		= eventfs_root_lookup,
136 	.setattr	= eventfs_set_attr,
137 };
138 
139 static const struct inode_operations eventfs_file_inode_operations = {
140 	.setattr	= eventfs_set_attr,
141 };
142 
143 static const struct file_operations eventfs_file_operations = {
144 	.open		= dcache_dir_open_wrapper,
145 	.read		= generic_read_dir,
146 	.iterate_shared	= dcache_readdir_wrapper,
147 	.llseek		= generic_file_llseek,
148 	.release	= eventfs_release,
149 };
150 
151 static void update_inode_attr(struct inode *inode, struct eventfs_attr *attr, umode_t mode)
152 {
153 	if (!attr) {
154 		inode->i_mode = mode;
155 		return;
156 	}
157 
158 	if (attr->mode & EVENTFS_SAVE_MODE)
159 		inode->i_mode = attr->mode & EVENTFS_MODE_MASK;
160 	else
161 		inode->i_mode = mode;
162 
163 	if (attr->mode & EVENTFS_SAVE_UID)
164 		inode->i_uid = attr->uid;
165 
166 	if (attr->mode & EVENTFS_SAVE_GID)
167 		inode->i_gid = attr->gid;
168 }
169 
170 /**
171  * create_file - create a file in the tracefs filesystem
172  * @name: the name of the file to create.
173  * @mode: the permission that the file should have.
174  * @attr: saved attributes changed by user
175  * @parent: parent dentry for this file.
176  * @data: something that the caller will want to get to later on.
177  * @fop: struct file_operations that should be used for this file.
178  *
179  * This function creates a dentry that represents a file in the eventsfs_inode
180  * directory. The inode.i_private pointer will point to @data in the open()
181  * call.
182  */
183 static struct dentry *create_file(const char *name, umode_t mode,
184 				  struct eventfs_attr *attr,
185 				  struct dentry *parent, void *data,
186 				  const struct file_operations *fop)
187 {
188 	struct tracefs_inode *ti;
189 	struct dentry *dentry;
190 	struct inode *inode;
191 
192 	if (!(mode & S_IFMT))
193 		mode |= S_IFREG;
194 
195 	if (WARN_ON_ONCE(!S_ISREG(mode)))
196 		return NULL;
197 
198 	WARN_ON_ONCE(!parent);
199 	dentry = eventfs_start_creating(name, parent);
200 
201 	if (IS_ERR(dentry))
202 		return dentry;
203 
204 	inode = tracefs_get_inode(dentry->d_sb);
205 	if (unlikely(!inode))
206 		return eventfs_failed_creating(dentry);
207 
208 	/* If the user updated the directory's attributes, use them */
209 	update_inode_attr(inode, attr, mode);
210 
211 	inode->i_op = &eventfs_file_inode_operations;
212 	inode->i_fop = fop;
213 	inode->i_private = data;
214 
215 	ti = get_tracefs(inode);
216 	ti->flags |= TRACEFS_EVENT_INODE;
217 	d_instantiate(dentry, inode);
218 	fsnotify_create(dentry->d_parent->d_inode, dentry);
219 	return eventfs_end_creating(dentry);
220 };
221 
222 /**
223  * create_dir - create a dir in the tracefs filesystem
224  * @ei: the eventfs_inode that represents the directory to create
225  * @parent: parent dentry for this file.
226  *
227  * This function will create a dentry for a directory represented by
228  * a eventfs_inode.
229  */
230 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
231 {
232 	struct tracefs_inode *ti;
233 	struct dentry *dentry;
234 	struct inode *inode;
235 
236 	dentry = eventfs_start_creating(ei->name, parent);
237 	if (IS_ERR(dentry))
238 		return dentry;
239 
240 	inode = tracefs_get_inode(dentry->d_sb);
241 	if (unlikely(!inode))
242 		return eventfs_failed_creating(dentry);
243 
244 	/* If the user updated the directory's attributes, use them */
245 	update_inode_attr(inode, &ei->attr, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
246 
247 	inode->i_op = &eventfs_root_dir_inode_operations;
248 	inode->i_fop = &eventfs_file_operations;
249 
250 	ti = get_tracefs(inode);
251 	ti->flags |= TRACEFS_EVENT_INODE;
252 
253 	inc_nlink(inode);
254 	d_instantiate(dentry, inode);
255 	inc_nlink(dentry->d_parent->d_inode);
256 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
257 	return eventfs_end_creating(dentry);
258 }
259 
260 static void free_ei(struct eventfs_inode *ei)
261 {
262 	kfree_const(ei->name);
263 	kfree(ei->d_children);
264 	kfree(ei->entry_attrs);
265 	kfree(ei);
266 }
267 
268 /**
269  * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
270  * @ti: the tracefs_inode of the dentry
271  * @dentry: dentry which has the reference to remove.
272  *
273  * Remove the association between a dentry from an eventfs_inode.
274  */
275 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
276 {
277 	struct eventfs_inode *ei;
278 	int i;
279 
280 	mutex_lock(&eventfs_mutex);
281 
282 	ei = dentry->d_fsdata;
283 	if (!ei)
284 		goto out;
285 
286 	/* This could belong to one of the files of the ei */
287 	if (ei->dentry != dentry) {
288 		for (i = 0; i < ei->nr_entries; i++) {
289 			if (ei->d_children[i] == dentry)
290 				break;
291 		}
292 		if (WARN_ON_ONCE(i == ei->nr_entries))
293 			goto out;
294 		ei->d_children[i] = NULL;
295 	} else if (ei->is_freed) {
296 		free_ei(ei);
297 	} else {
298 		ei->dentry = NULL;
299 	}
300 
301 	dentry->d_fsdata = NULL;
302  out:
303 	mutex_unlock(&eventfs_mutex);
304 }
305 
306 /**
307  * create_file_dentry - create a dentry for a file of an eventfs_inode
308  * @ei: the eventfs_inode that the file will be created under
309  * @idx: the index into the d_children[] of the @ei
310  * @parent: The parent dentry of the created file.
311  * @name: The name of the file to create
312  * @mode: The mode of the file.
313  * @data: The data to use to set the inode of the file with on open()
314  * @fops: The fops of the file to be created.
315  * @lookup: If called by the lookup routine, in which case, dput() the created dentry.
316  *
317  * Create a dentry for a file of an eventfs_inode @ei and place it into the
318  * address located at @e_dentry. If the @e_dentry already has a dentry, then
319  * just do a dget() on it and return. Otherwise create the dentry and attach it.
320  */
321 static struct dentry *
322 create_file_dentry(struct eventfs_inode *ei, int idx,
323 		   struct dentry *parent, const char *name, umode_t mode, void *data,
324 		   const struct file_operations *fops, bool lookup)
325 {
326 	struct eventfs_attr *attr = NULL;
327 	struct dentry **e_dentry = &ei->d_children[idx];
328 	struct dentry *dentry;
329 	bool invalidate = false;
330 
331 	mutex_lock(&eventfs_mutex);
332 	if (ei->is_freed) {
333 		mutex_unlock(&eventfs_mutex);
334 		return NULL;
335 	}
336 	/* If the e_dentry already has a dentry, use it */
337 	if (*e_dentry) {
338 		/* lookup does not need to up the ref count */
339 		if (!lookup)
340 			dget(*e_dentry);
341 		mutex_unlock(&eventfs_mutex);
342 		return *e_dentry;
343 	}
344 
345 	/* ei->entry_attrs are protected by SRCU */
346 	if (ei->entry_attrs)
347 		attr = &ei->entry_attrs[idx];
348 
349 	mutex_unlock(&eventfs_mutex);
350 
351 	/* The lookup already has the parent->d_inode locked */
352 	if (!lookup)
353 		inode_lock(parent->d_inode);
354 
355 	dentry = create_file(name, mode, attr, parent, data, fops);
356 
357 	if (!lookup)
358 		inode_unlock(parent->d_inode);
359 
360 	mutex_lock(&eventfs_mutex);
361 
362 	if (IS_ERR_OR_NULL(dentry)) {
363 		/*
364 		 * When the mutex was released, something else could have
365 		 * created the dentry for this e_dentry. In which case
366 		 * use that one.
367 		 *
368 		 * Note, with the mutex held, the e_dentry cannot have content
369 		 * and the ei->is_freed be true at the same time.
370 		 */
371 		dentry = *e_dentry;
372 		if (WARN_ON_ONCE(dentry && ei->is_freed))
373 			dentry = NULL;
374 		/* The lookup does not need to up the dentry refcount */
375 		if (dentry && !lookup)
376 			dget(dentry);
377 		mutex_unlock(&eventfs_mutex);
378 		return dentry;
379 	}
380 
381 	if (!*e_dentry && !ei->is_freed) {
382 		*e_dentry = dentry;
383 		dentry->d_fsdata = ei;
384 	} else {
385 		/*
386 		 * Should never happen unless we get here due to being freed.
387 		 * Otherwise it means two dentries exist with the same name.
388 		 */
389 		WARN_ON_ONCE(!ei->is_freed);
390 		invalidate = true;
391 	}
392 	mutex_unlock(&eventfs_mutex);
393 
394 	if (invalidate)
395 		d_invalidate(dentry);
396 
397 	if (lookup || invalidate)
398 		dput(dentry);
399 
400 	return invalidate ? NULL : dentry;
401 }
402 
403 /**
404  * eventfs_post_create_dir - post create dir routine
405  * @ei: eventfs_inode of recently created dir
406  *
407  * Map the meta-data of files within an eventfs dir to their parent dentry
408  */
409 static void eventfs_post_create_dir(struct eventfs_inode *ei)
410 {
411 	struct eventfs_inode *ei_child;
412 	struct tracefs_inode *ti;
413 
414 	lockdep_assert_held(&eventfs_mutex);
415 
416 	/* srcu lock already held */
417 	/* fill parent-child relation */
418 	list_for_each_entry_srcu(ei_child, &ei->children, list,
419 				 srcu_read_lock_held(&eventfs_srcu)) {
420 		ei_child->d_parent = ei->dentry;
421 	}
422 
423 	ti = get_tracefs(ei->dentry->d_inode);
424 	ti->private = ei;
425 }
426 
427 /**
428  * create_dir_dentry - Create a directory dentry for the eventfs_inode
429  * @pei: The eventfs_inode parent of ei.
430  * @ei: The eventfs_inode to create the directory for
431  * @parent: The dentry of the parent of this directory
432  * @lookup: True if this is called by the lookup code
433  *
434  * This creates and attaches a directory dentry to the eventfs_inode @ei.
435  */
436 static struct dentry *
437 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
438 		  struct dentry *parent, bool lookup)
439 {
440 	bool invalidate = false;
441 	struct dentry *dentry = NULL;
442 
443 	mutex_lock(&eventfs_mutex);
444 	if (pei->is_freed || ei->is_freed) {
445 		mutex_unlock(&eventfs_mutex);
446 		return NULL;
447 	}
448 	if (ei->dentry) {
449 		/* If the dentry already has a dentry, use it */
450 		dentry = ei->dentry;
451 		/* lookup does not need to up the ref count */
452 		if (!lookup)
453 			dget(dentry);
454 		mutex_unlock(&eventfs_mutex);
455 		return dentry;
456 	}
457 	mutex_unlock(&eventfs_mutex);
458 
459 	/* The lookup already has the parent->d_inode locked */
460 	if (!lookup)
461 		inode_lock(parent->d_inode);
462 
463 	dentry = create_dir(ei, parent);
464 
465 	if (!lookup)
466 		inode_unlock(parent->d_inode);
467 
468 	mutex_lock(&eventfs_mutex);
469 
470 	if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
471 		/*
472 		 * When the mutex was released, something else could have
473 		 * created the dentry for this e_dentry. In which case
474 		 * use that one.
475 		 *
476 		 * Note, with the mutex held, the e_dentry cannot have content
477 		 * and the ei->is_freed be true at the same time.
478 		 */
479 		dentry = ei->dentry;
480 		if (dentry && !lookup)
481 			dget(dentry);
482 		mutex_unlock(&eventfs_mutex);
483 		return dentry;
484 	}
485 
486 	if (!ei->dentry && !ei->is_freed) {
487 		ei->dentry = dentry;
488 		eventfs_post_create_dir(ei);
489 		dentry->d_fsdata = ei;
490 	} else {
491 		/*
492 		 * Should never happen unless we get here due to being freed.
493 		 * Otherwise it means two dentries exist with the same name.
494 		 */
495 		WARN_ON_ONCE(!ei->is_freed);
496 		invalidate = true;
497 	}
498 	mutex_unlock(&eventfs_mutex);
499 	if (invalidate)
500 		d_invalidate(dentry);
501 
502 	if (lookup || invalidate)
503 		dput(dentry);
504 
505 	return invalidate ? NULL : dentry;
506 }
507 
508 /**
509  * eventfs_root_lookup - lookup routine to create file/dir
510  * @dir: in which a lookup is being done
511  * @dentry: file/dir dentry
512  * @flags: Just passed to simple_lookup()
513  *
514  * Used to create dynamic file/dir with-in @dir, search with-in @ei
515  * list, if @dentry found go ahead and create the file/dir
516  */
517 
518 static struct dentry *eventfs_root_lookup(struct inode *dir,
519 					  struct dentry *dentry,
520 					  unsigned int flags)
521 {
522 	const struct file_operations *fops;
523 	const struct eventfs_entry *entry;
524 	struct eventfs_inode *ei_child;
525 	struct tracefs_inode *ti;
526 	struct eventfs_inode *ei;
527 	struct dentry *ei_dentry = NULL;
528 	struct dentry *ret = NULL;
529 	const char *name = dentry->d_name.name;
530 	bool created = false;
531 	umode_t mode;
532 	void *data;
533 	int idx;
534 	int i;
535 	int r;
536 
537 	ti = get_tracefs(dir);
538 	if (!(ti->flags & TRACEFS_EVENT_INODE))
539 		return NULL;
540 
541 	/* Grab srcu to prevent the ei from going away */
542 	idx = srcu_read_lock(&eventfs_srcu);
543 
544 	/*
545 	 * Grab the eventfs_mutex to consistent value from ti->private.
546 	 * This s
547 	 */
548 	mutex_lock(&eventfs_mutex);
549 	ei = READ_ONCE(ti->private);
550 	if (ei && !ei->is_freed)
551 		ei_dentry = READ_ONCE(ei->dentry);
552 	mutex_unlock(&eventfs_mutex);
553 
554 	if (!ei || !ei_dentry)
555 		goto out;
556 
557 	data = ei->data;
558 
559 	list_for_each_entry_srcu(ei_child, &ei->children, list,
560 				 srcu_read_lock_held(&eventfs_srcu)) {
561 		if (strcmp(ei_child->name, name) != 0)
562 			continue;
563 		ret = simple_lookup(dir, dentry, flags);
564 		create_dir_dentry(ei, ei_child, ei_dentry, true);
565 		created = true;
566 		break;
567 	}
568 
569 	if (created)
570 		goto out;
571 
572 	for (i = 0; i < ei->nr_entries; i++) {
573 		entry = &ei->entries[i];
574 		if (strcmp(name, entry->name) == 0) {
575 			void *cdata = data;
576 			mutex_lock(&eventfs_mutex);
577 			/* If ei->is_freed, then the event itself may be too */
578 			if (!ei->is_freed)
579 				r = entry->callback(name, &mode, &cdata, &fops);
580 			else
581 				r = -1;
582 			mutex_unlock(&eventfs_mutex);
583 			if (r <= 0)
584 				continue;
585 			ret = simple_lookup(dir, dentry, flags);
586 			create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
587 					   fops, true);
588 			break;
589 		}
590 	}
591  out:
592 	srcu_read_unlock(&eventfs_srcu, idx);
593 	return ret;
594 }
595 
596 struct dentry_list {
597 	void			*cursor;
598 	struct dentry		**dentries;
599 };
600 
601 /**
602  * eventfs_release - called to release eventfs file/dir
603  * @inode: inode to be released
604  * @file: file to be released (not used)
605  */
606 static int eventfs_release(struct inode *inode, struct file *file)
607 {
608 	struct tracefs_inode *ti;
609 	struct dentry_list *dlist = file->private_data;
610 	void *cursor;
611 	int i;
612 
613 	ti = get_tracefs(inode);
614 	if (!(ti->flags & TRACEFS_EVENT_INODE))
615 		return -EINVAL;
616 
617 	if (WARN_ON_ONCE(!dlist))
618 		return -EINVAL;
619 
620 	for (i = 0; dlist->dentries && dlist->dentries[i]; i++) {
621 		dput(dlist->dentries[i]);
622 	}
623 
624 	cursor = dlist->cursor;
625 	kfree(dlist->dentries);
626 	kfree(dlist);
627 	file->private_data = cursor;
628 	return dcache_dir_close(inode, file);
629 }
630 
631 static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
632 {
633 	struct dentry **tmp;
634 
635 	tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_KERNEL);
636 	if (!tmp)
637 		return -1;
638 	tmp[cnt] = d;
639 	tmp[cnt + 1] = NULL;
640 	*dentries = tmp;
641 	return 0;
642 }
643 
644 /**
645  * dcache_dir_open_wrapper - eventfs open wrapper
646  * @inode: not used
647  * @file: dir to be opened (to create it's children)
648  *
649  * Used to dynamic create file/dir with-in @file, all the
650  * file/dir will be created. If already created then references
651  * will be increased
652  */
653 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
654 {
655 	const struct file_operations *fops;
656 	const struct eventfs_entry *entry;
657 	struct eventfs_inode *ei_child;
658 	struct tracefs_inode *ti;
659 	struct eventfs_inode *ei;
660 	struct dentry_list *dlist;
661 	struct dentry **dentries = NULL;
662 	struct dentry *parent = file_dentry(file);
663 	struct dentry *d;
664 	struct inode *f_inode = file_inode(file);
665 	const char *name = parent->d_name.name;
666 	umode_t mode;
667 	void *data;
668 	int cnt = 0;
669 	int idx;
670 	int ret;
671 	int i;
672 	int r;
673 
674 	ti = get_tracefs(f_inode);
675 	if (!(ti->flags & TRACEFS_EVENT_INODE))
676 		return -EINVAL;
677 
678 	if (WARN_ON_ONCE(file->private_data))
679 		return -EINVAL;
680 
681 	idx = srcu_read_lock(&eventfs_srcu);
682 
683 	mutex_lock(&eventfs_mutex);
684 	ei = READ_ONCE(ti->private);
685 	mutex_unlock(&eventfs_mutex);
686 
687 	if (!ei) {
688 		srcu_read_unlock(&eventfs_srcu, idx);
689 		return -EINVAL;
690 	}
691 
692 
693 	data = ei->data;
694 
695 	dlist = kmalloc(sizeof(*dlist), GFP_KERNEL);
696 	if (!dlist) {
697 		srcu_read_unlock(&eventfs_srcu, idx);
698 		return -ENOMEM;
699 	}
700 
701 	list_for_each_entry_srcu(ei_child, &ei->children, list,
702 				 srcu_read_lock_held(&eventfs_srcu)) {
703 		d = create_dir_dentry(ei, ei_child, parent, false);
704 		if (d) {
705 			ret = add_dentries(&dentries, d, cnt);
706 			if (ret < 0)
707 				break;
708 			cnt++;
709 		}
710 	}
711 
712 	for (i = 0; i < ei->nr_entries; i++) {
713 		void *cdata = data;
714 		entry = &ei->entries[i];
715 		name = entry->name;
716 		mutex_lock(&eventfs_mutex);
717 		/* If ei->is_freed, then the event itself may be too */
718 		if (!ei->is_freed)
719 			r = entry->callback(name, &mode, &cdata, &fops);
720 		else
721 			r = -1;
722 		mutex_unlock(&eventfs_mutex);
723 		if (r <= 0)
724 			continue;
725 		d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false);
726 		if (d) {
727 			ret = add_dentries(&dentries, d, cnt);
728 			if (ret < 0)
729 				break;
730 			cnt++;
731 		}
732 	}
733 	srcu_read_unlock(&eventfs_srcu, idx);
734 	ret = dcache_dir_open(inode, file);
735 
736 	/*
737 	 * dcache_dir_open() sets file->private_data to a dentry cursor.
738 	 * Need to save that but also save all the dentries that were
739 	 * opened by this function.
740 	 */
741 	dlist->cursor = file->private_data;
742 	dlist->dentries = dentries;
743 	file->private_data = dlist;
744 	return ret;
745 }
746 
747 /*
748  * This just sets the file->private_data back to the cursor and back.
749  */
750 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx)
751 {
752 	struct dentry_list *dlist = file->private_data;
753 	int ret;
754 
755 	file->private_data = dlist->cursor;
756 	ret = dcache_readdir(file, ctx);
757 	dlist->cursor = file->private_data;
758 	file->private_data = dlist;
759 	return ret;
760 }
761 
762 /**
763  * eventfs_create_dir - Create the eventfs_inode for this directory
764  * @name: The name of the directory to create.
765  * @parent: The eventfs_inode of the parent directory.
766  * @entries: A list of entries that represent the files under this directory
767  * @size: The number of @entries
768  * @data: The default data to pass to the files (an entry may override it).
769  *
770  * This function creates the descriptor to represent a directory in the
771  * eventfs. This descriptor is an eventfs_inode, and it is returned to be
772  * used to create other children underneath.
773  *
774  * The @entries is an array of eventfs_entry structures which has:
775  *	const char		 *name
776  *	eventfs_callback	callback;
777  *
778  * The name is the name of the file, and the callback is a pointer to a function
779  * that will be called when the file is reference (either by lookup or by
780  * reading a directory). The callback is of the prototype:
781  *
782  *    int callback(const char *name, umode_t *mode, void **data,
783  *		   const struct file_operations **fops);
784  *
785  * When a file needs to be created, this callback will be called with
786  *   name = the name of the file being created (so that the same callback
787  *          may be used for multiple files).
788  *   mode = a place to set the file's mode
789  *   data = A pointer to @data, and the callback may replace it, which will
790  *         cause the file created to pass the new data to the open() call.
791  *   fops = the fops to use for the created file.
792  *
793  * NB. @callback is called while holding internal locks of the eventfs
794  *     system. The callback must not call any code that might also call into
795  *     the tracefs or eventfs system or it will risk creating a deadlock.
796  */
797 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
798 					 const struct eventfs_entry *entries,
799 					 int size, void *data)
800 {
801 	struct eventfs_inode *ei;
802 
803 	if (!parent)
804 		return ERR_PTR(-EINVAL);
805 
806 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
807 	if (!ei)
808 		return ERR_PTR(-ENOMEM);
809 
810 	ei->name = kstrdup_const(name, GFP_KERNEL);
811 	if (!ei->name) {
812 		kfree(ei);
813 		return ERR_PTR(-ENOMEM);
814 	}
815 
816 	if (size) {
817 		ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
818 		if (!ei->d_children) {
819 			kfree_const(ei->name);
820 			kfree(ei);
821 			return ERR_PTR(-ENOMEM);
822 		}
823 	}
824 
825 	ei->entries = entries;
826 	ei->nr_entries = size;
827 	ei->data = data;
828 	INIT_LIST_HEAD(&ei->children);
829 	INIT_LIST_HEAD(&ei->list);
830 
831 	mutex_lock(&eventfs_mutex);
832 	if (!parent->is_freed) {
833 		list_add_tail(&ei->list, &parent->children);
834 		ei->d_parent = parent->dentry;
835 	}
836 	mutex_unlock(&eventfs_mutex);
837 
838 	/* Was the parent freed? */
839 	if (list_empty(&ei->list)) {
840 		free_ei(ei);
841 		ei = NULL;
842 	}
843 	return ei;
844 }
845 
846 /**
847  * eventfs_create_events_dir - create the top level events directory
848  * @name: The name of the top level directory to create.
849  * @parent: Parent dentry for this file in the tracefs directory.
850  * @entries: A list of entries that represent the files under this directory
851  * @size: The number of @entries
852  * @data: The default data to pass to the files (an entry may override it).
853  *
854  * This function creates the top of the trace event directory.
855  *
856  * See eventfs_create_dir() for use of @entries.
857  */
858 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
859 						const struct eventfs_entry *entries,
860 						int size, void *data)
861 {
862 	struct dentry *dentry = tracefs_start_creating(name, parent);
863 	struct eventfs_inode *ei;
864 	struct tracefs_inode *ti;
865 	struct inode *inode;
866 
867 	if (security_locked_down(LOCKDOWN_TRACEFS))
868 		return NULL;
869 
870 	if (IS_ERR(dentry))
871 		return ERR_CAST(dentry);
872 
873 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
874 	if (!ei)
875 		goto fail_ei;
876 
877 	inode = tracefs_get_inode(dentry->d_sb);
878 	if (unlikely(!inode))
879 		goto fail;
880 
881 	if (size) {
882 		ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
883 		if (!ei->d_children)
884 			goto fail;
885 	}
886 
887 	ei->dentry = dentry;
888 	ei->entries = entries;
889 	ei->nr_entries = size;
890 	ei->data = data;
891 	ei->name = kstrdup_const(name, GFP_KERNEL);
892 	if (!ei->name)
893 		goto fail;
894 
895 	INIT_LIST_HEAD(&ei->children);
896 	INIT_LIST_HEAD(&ei->list);
897 
898 	ti = get_tracefs(inode);
899 	ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
900 	ti->private = ei;
901 
902 	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
903 	inode->i_op = &eventfs_root_dir_inode_operations;
904 	inode->i_fop = &eventfs_file_operations;
905 
906 	dentry->d_fsdata = ei;
907 
908 	/* directory inodes start off with i_nlink == 2 (for "." entry) */
909 	inc_nlink(inode);
910 	d_instantiate(dentry, inode);
911 	inc_nlink(dentry->d_parent->d_inode);
912 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
913 	tracefs_end_creating(dentry);
914 
915 	return ei;
916 
917  fail:
918 	kfree(ei->d_children);
919 	kfree(ei);
920  fail_ei:
921 	tracefs_failed_creating(dentry);
922 	return ERR_PTR(-ENOMEM);
923 }
924 
925 static LLIST_HEAD(free_list);
926 
927 static void eventfs_workfn(struct work_struct *work)
928 {
929         struct eventfs_inode *ei, *tmp;
930         struct llist_node *llnode;
931 
932 	llnode = llist_del_all(&free_list);
933         llist_for_each_entry_safe(ei, tmp, llnode, llist) {
934 		/* This dput() matches the dget() from unhook_dentry() */
935 		for (int i = 0; i < ei->nr_entries; i++) {
936 			if (ei->d_children[i])
937 				dput(ei->d_children[i]);
938 		}
939 		/* This should only get here if it had a dentry */
940 		if (!WARN_ON_ONCE(!ei->dentry))
941 			dput(ei->dentry);
942         }
943 }
944 
945 static DECLARE_WORK(eventfs_work, eventfs_workfn);
946 
947 static void free_rcu_ei(struct rcu_head *head)
948 {
949 	struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
950 
951 	if (ei->dentry) {
952 		/* Do not free the ei until all references of dentry are gone */
953 		if (llist_add(&ei->llist, &free_list))
954 			queue_work(system_unbound_wq, &eventfs_work);
955 		return;
956 	}
957 
958 	/* If the ei doesn't have a dentry, neither should its children */
959 	for (int i = 0; i < ei->nr_entries; i++) {
960 		WARN_ON_ONCE(ei->d_children[i]);
961 	}
962 
963 	free_ei(ei);
964 }
965 
966 static void unhook_dentry(struct dentry *dentry)
967 {
968 	if (!dentry)
969 		return;
970 	/*
971 	 * Need to add a reference to the dentry that is expected by
972 	 * simple_recursive_removal(), which will include a dput().
973 	 */
974 	dget(dentry);
975 
976 	/*
977 	 * Also add a reference for the dput() in eventfs_workfn().
978 	 * That is required as that dput() will free the ei after
979 	 * the SRCU grace period is over.
980 	 */
981 	dget(dentry);
982 }
983 
984 /**
985  * eventfs_remove_rec - remove eventfs dir or file from list
986  * @ei: eventfs_inode to be removed.
987  * @level: prevent recursion from going more than 3 levels deep.
988  *
989  * This function recursively removes eventfs_inodes which
990  * contains info of files and/or directories.
991  */
992 static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
993 {
994 	struct eventfs_inode *ei_child;
995 
996 	if (!ei)
997 		return;
998 	/*
999 	 * Check recursion depth. It should never be greater than 3:
1000 	 * 0 - events/
1001 	 * 1 - events/group/
1002 	 * 2 - events/group/event/
1003 	 * 3 - events/group/event/file
1004 	 */
1005 	if (WARN_ON_ONCE(level > 3))
1006 		return;
1007 
1008 	/* search for nested folders or files */
1009 	list_for_each_entry_srcu(ei_child, &ei->children, list,
1010 				 lockdep_is_held(&eventfs_mutex)) {
1011 		/* Children only have dentry if parent does */
1012 		WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
1013 		eventfs_remove_rec(ei_child, level + 1);
1014 	}
1015 
1016 
1017 	ei->is_freed = 1;
1018 
1019 	for (int i = 0; i < ei->nr_entries; i++) {
1020 		if (ei->d_children[i]) {
1021 			/* Children only have dentry if parent does */
1022 			WARN_ON_ONCE(!ei->dentry);
1023 			unhook_dentry(ei->d_children[i]);
1024 		}
1025 	}
1026 
1027 	unhook_dentry(ei->dentry);
1028 
1029 	list_del_rcu(&ei->list);
1030 	call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
1031 }
1032 
1033 /**
1034  * eventfs_remove_dir - remove eventfs dir or file from list
1035  * @ei: eventfs_inode to be removed.
1036  *
1037  * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
1038  */
1039 void eventfs_remove_dir(struct eventfs_inode *ei)
1040 {
1041 	struct dentry *dentry;
1042 
1043 	if (!ei)
1044 		return;
1045 
1046 	mutex_lock(&eventfs_mutex);
1047 	dentry = ei->dentry;
1048 	eventfs_remove_rec(ei, 0);
1049 	mutex_unlock(&eventfs_mutex);
1050 
1051 	/*
1052 	 * If any of the ei children has a dentry, then the ei itself
1053 	 * must have a dentry.
1054 	 */
1055 	if (dentry)
1056 		simple_recursive_removal(dentry, NULL);
1057 }
1058 
1059 /**
1060  * eventfs_remove_events_dir - remove the top level eventfs directory
1061  * @ei: the event_inode returned by eventfs_create_events_dir().
1062  *
1063  * This function removes the events main directory
1064  */
1065 void eventfs_remove_events_dir(struct eventfs_inode *ei)
1066 {
1067 	struct dentry *dentry;
1068 
1069 	dentry = ei->dentry;
1070 	eventfs_remove_dir(ei);
1071 
1072 	/*
1073 	 * Matches the dget() done by tracefs_start_creating()
1074 	 * in eventfs_create_events_dir() when it the dentry was
1075 	 * created. In other words, it's a normal dentry that
1076 	 * sticks around while the other ei->dentry are created
1077 	 * and destroyed dynamically.
1078 	 */
1079 	dput(dentry);
1080 }
1081