xref: /linux/fs/tracefs/event_inode.c (revision 5b9b41617bf3e1282cc60f07d3d52e62399aa4ba)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  event_inode.c - part of tracefs, a pseudo file system for activating tracing
4  *
5  *  Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org>
6  *  Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com>
7  *  Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org>
8  *
9  *  eventfs is used to dynamically create inodes and dentries based on the
10  *  meta data provided by the tracing system.
11  *
12  *  eventfs stores the meta-data of files/dirs and holds off on creating
13  *  inodes/dentries of the files. When accessed, the eventfs will create the
14  *  inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up
15  *  and delete the inodes/dentries when they are no longer referenced.
16  */
17 #include <linux/fsnotify.h>
18 #include <linux/fs.h>
19 #include <linux/namei.h>
20 #include <linux/workqueue.h>
21 #include <linux/security.h>
22 #include <linux/tracefs.h>
23 #include <linux/kref.h>
24 #include <linux/delay.h>
25 #include "internal.h"
26 
27 /*
28  * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
29  * to the ei->dentry must be done under this mutex and after checking
30  * if ei->is_freed is not set. When ei->is_freed is set, the dentry
31  * is on its way to being freed after the last dput() is made on it.
32  */
33 static DEFINE_MUTEX(eventfs_mutex);
34 
35 /*
36  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
37  * its parent's list and will have is_freed set (under eventfs_mutex).
38  * After the SRCU grace period is over and the last dput() is called
39  * the ei is freed.
40  */
41 DEFINE_STATIC_SRCU(eventfs_srcu);
42 
43 /* Mode is unsigned short, use the upper bits for flags */
44 enum {
45 	EVENTFS_SAVE_MODE	= BIT(16),
46 	EVENTFS_SAVE_UID	= BIT(17),
47 	EVENTFS_SAVE_GID	= BIT(18),
48 };
49 
50 #define EVENTFS_MODE_MASK	(EVENTFS_SAVE_MODE - 1)
51 
52 static struct dentry *eventfs_root_lookup(struct inode *dir,
53 					  struct dentry *dentry,
54 					  unsigned int flags);
55 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file);
56 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
57 static int eventfs_release(struct inode *inode, struct file *file);
58 
59 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
60 {
61 	unsigned int ia_valid = iattr->ia_valid;
62 
63 	if (ia_valid & ATTR_MODE) {
64 		attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) |
65 			(iattr->ia_mode & EVENTFS_MODE_MASK) |
66 			EVENTFS_SAVE_MODE;
67 	}
68 	if (ia_valid & ATTR_UID) {
69 		attr->mode |= EVENTFS_SAVE_UID;
70 		attr->uid = iattr->ia_uid;
71 	}
72 	if (ia_valid & ATTR_GID) {
73 		attr->mode |= EVENTFS_SAVE_GID;
74 		attr->gid = iattr->ia_gid;
75 	}
76 }
77 
78 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
79 			    struct iattr *iattr)
80 {
81 	const struct eventfs_entry *entry;
82 	struct eventfs_inode *ei;
83 	const char *name;
84 	int ret;
85 
86 	mutex_lock(&eventfs_mutex);
87 	ei = dentry->d_fsdata;
88 	if (ei->is_freed) {
89 		/* Do not allow changes if the event is about to be removed. */
90 		mutex_unlock(&eventfs_mutex);
91 		return -ENODEV;
92 	}
93 
94 	/* Preallocate the children mode array if necessary */
95 	if (!(dentry->d_inode->i_mode & S_IFDIR)) {
96 		if (!ei->entry_attrs) {
97 			ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
98 						  GFP_NOFS);
99 			if (!ei->entry_attrs) {
100 				ret = -ENOMEM;
101 				goto out;
102 			}
103 		}
104 	}
105 
106 	ret = simple_setattr(idmap, dentry, iattr);
107 	if (ret < 0)
108 		goto out;
109 
110 	/*
111 	 * If this is a dir, then update the ei cache, only the file
112 	 * mode is saved in the ei->m_children, and the ownership is
113 	 * determined by the parent directory.
114 	 */
115 	if (dentry->d_inode->i_mode & S_IFDIR) {
116 		/*
117 		 * The events directory dentry is never freed, unless its
118 		 * part of an instance that is deleted. It's attr is the
119 		 * default for its child files and directories.
120 		 * Do not update it. It's not used for its own mode or ownership
121 		 */
122 		if (!ei->is_events)
123 			update_attr(&ei->attr, iattr);
124 
125 	} else {
126 		name = dentry->d_name.name;
127 
128 		for (int i = 0; i < ei->nr_entries; i++) {
129 			entry = &ei->entries[i];
130 			if (strcmp(name, entry->name) == 0) {
131 				update_attr(&ei->entry_attrs[i], iattr);
132 				break;
133 			}
134 		}
135 	}
136  out:
137 	mutex_unlock(&eventfs_mutex);
138 	return ret;
139 }
140 
141 static const struct inode_operations eventfs_root_dir_inode_operations = {
142 	.lookup		= eventfs_root_lookup,
143 	.setattr	= eventfs_set_attr,
144 };
145 
146 static const struct inode_operations eventfs_file_inode_operations = {
147 	.setattr	= eventfs_set_attr,
148 };
149 
150 static const struct file_operations eventfs_file_operations = {
151 	.open		= dcache_dir_open_wrapper,
152 	.read		= generic_read_dir,
153 	.iterate_shared	= dcache_readdir_wrapper,
154 	.llseek		= generic_file_llseek,
155 	.release	= eventfs_release,
156 };
157 
158 /* Return the evenfs_inode of the "events" directory */
159 static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
160 {
161 	struct eventfs_inode *ei;
162 
163 	mutex_lock(&eventfs_mutex);
164 	do {
165 		/* The parent always has an ei, except for events itself */
166 		ei = dentry->d_parent->d_fsdata;
167 
168 		/*
169 		 * If the ei is being freed, the ownership of the children
170 		 * doesn't matter.
171 		 */
172 		if (ei->is_freed) {
173 			ei = NULL;
174 			break;
175 		}
176 
177 		dentry = ei->dentry;
178 	} while (!ei->is_events);
179 	mutex_unlock(&eventfs_mutex);
180 
181 	return ei;
182 }
183 
184 static void update_inode_attr(struct dentry *dentry, struct inode *inode,
185 			      struct eventfs_attr *attr, umode_t mode)
186 {
187 	struct eventfs_inode *events_ei = eventfs_find_events(dentry);
188 
189 	if (!events_ei)
190 		return;
191 
192 	inode->i_mode = mode;
193 	inode->i_uid = events_ei->attr.uid;
194 	inode->i_gid = events_ei->attr.gid;
195 
196 	if (!attr)
197 		return;
198 
199 	if (attr->mode & EVENTFS_SAVE_MODE)
200 		inode->i_mode = attr->mode & EVENTFS_MODE_MASK;
201 
202 	if (attr->mode & EVENTFS_SAVE_UID)
203 		inode->i_uid = attr->uid;
204 
205 	if (attr->mode & EVENTFS_SAVE_GID)
206 		inode->i_gid = attr->gid;
207 }
208 
209 static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level)
210 {
211 	struct eventfs_inode *ei_child;
212 
213 	/* at most we have events/system/event */
214 	if (WARN_ON_ONCE(level > 3))
215 		return;
216 
217 	ei->attr.gid = gid;
218 
219 	if (ei->entry_attrs) {
220 		for (int i = 0; i < ei->nr_entries; i++) {
221 			ei->entry_attrs[i].gid = gid;
222 		}
223 	}
224 
225 	/*
226 	 * Only eventfs_inode with dentries are updated, make sure
227 	 * all eventfs_inodes are updated. If one of the children
228 	 * do not have a dentry, this function must traverse it.
229 	 */
230 	list_for_each_entry_srcu(ei_child, &ei->children, list,
231 				 srcu_read_lock_held(&eventfs_srcu)) {
232 		if (!ei_child->dentry)
233 			update_gid(ei_child, gid, level + 1);
234 	}
235 }
236 
237 void eventfs_update_gid(struct dentry *dentry, kgid_t gid)
238 {
239 	struct eventfs_inode *ei = dentry->d_fsdata;
240 	int idx;
241 
242 	idx = srcu_read_lock(&eventfs_srcu);
243 	update_gid(ei, gid, 0);
244 	srcu_read_unlock(&eventfs_srcu, idx);
245 }
246 
247 /**
248  * create_file - create a file in the tracefs filesystem
249  * @name: the name of the file to create.
250  * @mode: the permission that the file should have.
251  * @attr: saved attributes changed by user
252  * @parent: parent dentry for this file.
253  * @data: something that the caller will want to get to later on.
254  * @fop: struct file_operations that should be used for this file.
255  *
256  * This function creates a dentry that represents a file in the eventsfs_inode
257  * directory. The inode.i_private pointer will point to @data in the open()
258  * call.
259  */
260 static struct dentry *create_file(const char *name, umode_t mode,
261 				  struct eventfs_attr *attr,
262 				  struct dentry *parent, void *data,
263 				  const struct file_operations *fop)
264 {
265 	struct tracefs_inode *ti;
266 	struct dentry *dentry;
267 	struct inode *inode;
268 
269 	if (!(mode & S_IFMT))
270 		mode |= S_IFREG;
271 
272 	if (WARN_ON_ONCE(!S_ISREG(mode)))
273 		return NULL;
274 
275 	WARN_ON_ONCE(!parent);
276 	dentry = eventfs_start_creating(name, parent);
277 
278 	if (IS_ERR(dentry))
279 		return dentry;
280 
281 	inode = tracefs_get_inode(dentry->d_sb);
282 	if (unlikely(!inode))
283 		return eventfs_failed_creating(dentry);
284 
285 	/* If the user updated the directory's attributes, use them */
286 	update_inode_attr(dentry, inode, attr, mode);
287 
288 	inode->i_op = &eventfs_file_inode_operations;
289 	inode->i_fop = fop;
290 	inode->i_private = data;
291 
292 	ti = get_tracefs(inode);
293 	ti->flags |= TRACEFS_EVENT_INODE;
294 	d_instantiate(dentry, inode);
295 	fsnotify_create(dentry->d_parent->d_inode, dentry);
296 	return eventfs_end_creating(dentry);
297 };
298 
299 /**
300  * create_dir - create a dir in the tracefs filesystem
301  * @ei: the eventfs_inode that represents the directory to create
302  * @parent: parent dentry for this file.
303  *
304  * This function will create a dentry for a directory represented by
305  * a eventfs_inode.
306  */
307 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
308 {
309 	struct tracefs_inode *ti;
310 	struct dentry *dentry;
311 	struct inode *inode;
312 
313 	dentry = eventfs_start_creating(ei->name, parent);
314 	if (IS_ERR(dentry))
315 		return dentry;
316 
317 	inode = tracefs_get_inode(dentry->d_sb);
318 	if (unlikely(!inode))
319 		return eventfs_failed_creating(dentry);
320 
321 	/* If the user updated the directory's attributes, use them */
322 	update_inode_attr(dentry, inode, &ei->attr,
323 			  S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
324 
325 	inode->i_op = &eventfs_root_dir_inode_operations;
326 	inode->i_fop = &eventfs_file_operations;
327 
328 	ti = get_tracefs(inode);
329 	ti->flags |= TRACEFS_EVENT_INODE;
330 
331 	inc_nlink(inode);
332 	d_instantiate(dentry, inode);
333 	inc_nlink(dentry->d_parent->d_inode);
334 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
335 	return eventfs_end_creating(dentry);
336 }
337 
338 static void free_ei(struct eventfs_inode *ei)
339 {
340 	kfree_const(ei->name);
341 	kfree(ei->d_children);
342 	kfree(ei->entry_attrs);
343 	kfree(ei);
344 }
345 
346 /**
347  * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
348  * @ti: the tracefs_inode of the dentry
349  * @dentry: dentry which has the reference to remove.
350  *
351  * Remove the association between a dentry from an eventfs_inode.
352  */
353 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
354 {
355 	struct eventfs_inode *ei;
356 	int i;
357 
358 	mutex_lock(&eventfs_mutex);
359 
360 	ei = dentry->d_fsdata;
361 	if (!ei)
362 		goto out;
363 
364 	/* This could belong to one of the files of the ei */
365 	if (ei->dentry != dentry) {
366 		for (i = 0; i < ei->nr_entries; i++) {
367 			if (ei->d_children[i] == dentry)
368 				break;
369 		}
370 		if (WARN_ON_ONCE(i == ei->nr_entries))
371 			goto out;
372 		ei->d_children[i] = NULL;
373 	} else if (ei->is_freed) {
374 		free_ei(ei);
375 	} else {
376 		ei->dentry = NULL;
377 	}
378 
379 	dentry->d_fsdata = NULL;
380  out:
381 	mutex_unlock(&eventfs_mutex);
382 }
383 
384 /**
385  * create_file_dentry - create a dentry for a file of an eventfs_inode
386  * @ei: the eventfs_inode that the file will be created under
387  * @idx: the index into the d_children[] of the @ei
388  * @parent: The parent dentry of the created file.
389  * @name: The name of the file to create
390  * @mode: The mode of the file.
391  * @data: The data to use to set the inode of the file with on open()
392  * @fops: The fops of the file to be created.
393  * @lookup: If called by the lookup routine, in which case, dput() the created dentry.
394  *
395  * Create a dentry for a file of an eventfs_inode @ei and place it into the
396  * address located at @e_dentry. If the @e_dentry already has a dentry, then
397  * just do a dget() on it and return. Otherwise create the dentry and attach it.
398  */
399 static struct dentry *
400 create_file_dentry(struct eventfs_inode *ei, int idx,
401 		   struct dentry *parent, const char *name, umode_t mode, void *data,
402 		   const struct file_operations *fops, bool lookup)
403 {
404 	struct eventfs_attr *attr = NULL;
405 	struct dentry **e_dentry = &ei->d_children[idx];
406 	struct dentry *dentry;
407 
408 	WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
409 
410 	mutex_lock(&eventfs_mutex);
411 	if (ei->is_freed) {
412 		mutex_unlock(&eventfs_mutex);
413 		return NULL;
414 	}
415 	/* If the e_dentry already has a dentry, use it */
416 	if (*e_dentry) {
417 		/* lookup does not need to up the ref count */
418 		if (!lookup)
419 			dget(*e_dentry);
420 		mutex_unlock(&eventfs_mutex);
421 		return *e_dentry;
422 	}
423 
424 	/* ei->entry_attrs are protected by SRCU */
425 	if (ei->entry_attrs)
426 		attr = &ei->entry_attrs[idx];
427 
428 	mutex_unlock(&eventfs_mutex);
429 
430 	dentry = create_file(name, mode, attr, parent, data, fops);
431 
432 	mutex_lock(&eventfs_mutex);
433 
434 	if (IS_ERR_OR_NULL(dentry)) {
435 		/*
436 		 * When the mutex was released, something else could have
437 		 * created the dentry for this e_dentry. In which case
438 		 * use that one.
439 		 *
440 		 * If ei->is_freed is set, the e_dentry is currently on its
441 		 * way to being freed, don't return it. If e_dentry is NULL
442 		 * it means it was already freed.
443 		 */
444 		if (ei->is_freed)
445 			dentry = NULL;
446 		else
447 			dentry = *e_dentry;
448 		/* The lookup does not need to up the dentry refcount */
449 		if (dentry && !lookup)
450 			dget(dentry);
451 		mutex_unlock(&eventfs_mutex);
452 		return dentry;
453 	}
454 
455 	if (!*e_dentry && !ei->is_freed) {
456 		*e_dentry = dentry;
457 		dentry->d_fsdata = ei;
458 	} else {
459 		/*
460 		 * Should never happen unless we get here due to being freed.
461 		 * Otherwise it means two dentries exist with the same name.
462 		 */
463 		WARN_ON_ONCE(!ei->is_freed);
464 		dentry = NULL;
465 	}
466 	mutex_unlock(&eventfs_mutex);
467 
468 	if (lookup)
469 		dput(dentry);
470 
471 	return dentry;
472 }
473 
474 /**
475  * eventfs_post_create_dir - post create dir routine
476  * @ei: eventfs_inode of recently created dir
477  *
478  * Map the meta-data of files within an eventfs dir to their parent dentry
479  */
480 static void eventfs_post_create_dir(struct eventfs_inode *ei)
481 {
482 	struct eventfs_inode *ei_child;
483 	struct tracefs_inode *ti;
484 
485 	lockdep_assert_held(&eventfs_mutex);
486 
487 	/* srcu lock already held */
488 	/* fill parent-child relation */
489 	list_for_each_entry_srcu(ei_child, &ei->children, list,
490 				 srcu_read_lock_held(&eventfs_srcu)) {
491 		ei_child->d_parent = ei->dentry;
492 	}
493 
494 	ti = get_tracefs(ei->dentry->d_inode);
495 	ti->private = ei;
496 }
497 
498 /**
499  * create_dir_dentry - Create a directory dentry for the eventfs_inode
500  * @pei: The eventfs_inode parent of ei.
501  * @ei: The eventfs_inode to create the directory for
502  * @parent: The dentry of the parent of this directory
503  * @lookup: True if this is called by the lookup code
504  *
505  * This creates and attaches a directory dentry to the eventfs_inode @ei.
506  */
507 static struct dentry *
508 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
509 		  struct dentry *parent, bool lookup)
510 {
511 	struct dentry *dentry = NULL;
512 
513 	WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
514 
515 	mutex_lock(&eventfs_mutex);
516 	if (pei->is_freed || ei->is_freed) {
517 		mutex_unlock(&eventfs_mutex);
518 		return NULL;
519 	}
520 	if (ei->dentry) {
521 		/* If the dentry already has a dentry, use it */
522 		dentry = ei->dentry;
523 		/* lookup does not need to up the ref count */
524 		if (!lookup)
525 			dget(dentry);
526 		mutex_unlock(&eventfs_mutex);
527 		return dentry;
528 	}
529 	mutex_unlock(&eventfs_mutex);
530 
531 	dentry = create_dir(ei, parent);
532 
533 	mutex_lock(&eventfs_mutex);
534 
535 	if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
536 		/*
537 		 * When the mutex was released, something else could have
538 		 * created the dentry for this e_dentry. In which case
539 		 * use that one.
540 		 *
541 		 * If ei->is_freed is set, the e_dentry is currently on its
542 		 * way to being freed.
543 		 */
544 		dentry = ei->dentry;
545 		if (dentry && !lookup)
546 			dget(dentry);
547 		mutex_unlock(&eventfs_mutex);
548 		return dentry;
549 	}
550 
551 	if (!ei->dentry && !ei->is_freed) {
552 		ei->dentry = dentry;
553 		eventfs_post_create_dir(ei);
554 		dentry->d_fsdata = ei;
555 	} else {
556 		/*
557 		 * Should never happen unless we get here due to being freed.
558 		 * Otherwise it means two dentries exist with the same name.
559 		 */
560 		WARN_ON_ONCE(!ei->is_freed);
561 		dentry = NULL;
562 	}
563 	mutex_unlock(&eventfs_mutex);
564 
565 	if (lookup)
566 		dput(dentry);
567 
568 	return dentry;
569 }
570 
571 /**
572  * eventfs_root_lookup - lookup routine to create file/dir
573  * @dir: in which a lookup is being done
574  * @dentry: file/dir dentry
575  * @flags: Just passed to simple_lookup()
576  *
577  * Used to create dynamic file/dir with-in @dir, search with-in @ei
578  * list, if @dentry found go ahead and create the file/dir
579  */
580 
581 static struct dentry *eventfs_root_lookup(struct inode *dir,
582 					  struct dentry *dentry,
583 					  unsigned int flags)
584 {
585 	const struct file_operations *fops;
586 	const struct eventfs_entry *entry;
587 	struct eventfs_inode *ei_child;
588 	struct tracefs_inode *ti;
589 	struct eventfs_inode *ei;
590 	struct dentry *ei_dentry = NULL;
591 	struct dentry *ret = NULL;
592 	const char *name = dentry->d_name.name;
593 	bool created = false;
594 	umode_t mode;
595 	void *data;
596 	int idx;
597 	int i;
598 	int r;
599 
600 	ti = get_tracefs(dir);
601 	if (!(ti->flags & TRACEFS_EVENT_INODE))
602 		return NULL;
603 
604 	/* Grab srcu to prevent the ei from going away */
605 	idx = srcu_read_lock(&eventfs_srcu);
606 
607 	/*
608 	 * Grab the eventfs_mutex to consistent value from ti->private.
609 	 * This s
610 	 */
611 	mutex_lock(&eventfs_mutex);
612 	ei = READ_ONCE(ti->private);
613 	if (ei && !ei->is_freed)
614 		ei_dentry = READ_ONCE(ei->dentry);
615 	mutex_unlock(&eventfs_mutex);
616 
617 	if (!ei || !ei_dentry)
618 		goto out;
619 
620 	data = ei->data;
621 
622 	list_for_each_entry_srcu(ei_child, &ei->children, list,
623 				 srcu_read_lock_held(&eventfs_srcu)) {
624 		if (strcmp(ei_child->name, name) != 0)
625 			continue;
626 		ret = simple_lookup(dir, dentry, flags);
627 		if (IS_ERR(ret))
628 			goto out;
629 		create_dir_dentry(ei, ei_child, ei_dentry, true);
630 		created = true;
631 		break;
632 	}
633 
634 	if (created)
635 		goto out;
636 
637 	for (i = 0; i < ei->nr_entries; i++) {
638 		entry = &ei->entries[i];
639 		if (strcmp(name, entry->name) == 0) {
640 			void *cdata = data;
641 			mutex_lock(&eventfs_mutex);
642 			/* If ei->is_freed, then the event itself may be too */
643 			if (!ei->is_freed)
644 				r = entry->callback(name, &mode, &cdata, &fops);
645 			else
646 				r = -1;
647 			mutex_unlock(&eventfs_mutex);
648 			if (r <= 0)
649 				continue;
650 			ret = simple_lookup(dir, dentry, flags);
651 			if (IS_ERR(ret))
652 				goto out;
653 			create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
654 					   fops, true);
655 			break;
656 		}
657 	}
658  out:
659 	srcu_read_unlock(&eventfs_srcu, idx);
660 	return ret;
661 }
662 
663 struct dentry_list {
664 	void			*cursor;
665 	struct dentry		**dentries;
666 };
667 
668 /**
669  * eventfs_release - called to release eventfs file/dir
670  * @inode: inode to be released
671  * @file: file to be released (not used)
672  */
673 static int eventfs_release(struct inode *inode, struct file *file)
674 {
675 	struct tracefs_inode *ti;
676 	struct dentry_list *dlist = file->private_data;
677 	void *cursor;
678 	int i;
679 
680 	ti = get_tracefs(inode);
681 	if (!(ti->flags & TRACEFS_EVENT_INODE))
682 		return -EINVAL;
683 
684 	if (WARN_ON_ONCE(!dlist))
685 		return -EINVAL;
686 
687 	for (i = 0; dlist->dentries && dlist->dentries[i]; i++) {
688 		dput(dlist->dentries[i]);
689 	}
690 
691 	cursor = dlist->cursor;
692 	kfree(dlist->dentries);
693 	kfree(dlist);
694 	file->private_data = cursor;
695 	return dcache_dir_close(inode, file);
696 }
697 
698 static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
699 {
700 	struct dentry **tmp;
701 
702 	tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS);
703 	if (!tmp)
704 		return -1;
705 	tmp[cnt] = d;
706 	tmp[cnt + 1] = NULL;
707 	*dentries = tmp;
708 	return 0;
709 }
710 
711 /**
712  * dcache_dir_open_wrapper - eventfs open wrapper
713  * @inode: not used
714  * @file: dir to be opened (to create it's children)
715  *
716  * Used to dynamic create file/dir with-in @file, all the
717  * file/dir will be created. If already created then references
718  * will be increased
719  */
720 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
721 {
722 	const struct file_operations *fops;
723 	const struct eventfs_entry *entry;
724 	struct eventfs_inode *ei_child;
725 	struct tracefs_inode *ti;
726 	struct eventfs_inode *ei;
727 	struct dentry_list *dlist;
728 	struct dentry **dentries = NULL;
729 	struct dentry *parent = file_dentry(file);
730 	struct dentry *d;
731 	struct inode *f_inode = file_inode(file);
732 	const char *name = parent->d_name.name;
733 	umode_t mode;
734 	void *data;
735 	int cnt = 0;
736 	int idx;
737 	int ret;
738 	int i;
739 	int r;
740 
741 	ti = get_tracefs(f_inode);
742 	if (!(ti->flags & TRACEFS_EVENT_INODE))
743 		return -EINVAL;
744 
745 	if (WARN_ON_ONCE(file->private_data))
746 		return -EINVAL;
747 
748 	idx = srcu_read_lock(&eventfs_srcu);
749 
750 	mutex_lock(&eventfs_mutex);
751 	ei = READ_ONCE(ti->private);
752 	mutex_unlock(&eventfs_mutex);
753 
754 	if (!ei) {
755 		srcu_read_unlock(&eventfs_srcu, idx);
756 		return -EINVAL;
757 	}
758 
759 
760 	data = ei->data;
761 
762 	dlist = kmalloc(sizeof(*dlist), GFP_KERNEL);
763 	if (!dlist) {
764 		srcu_read_unlock(&eventfs_srcu, idx);
765 		return -ENOMEM;
766 	}
767 
768 	inode_lock(parent->d_inode);
769 	list_for_each_entry_srcu(ei_child, &ei->children, list,
770 				 srcu_read_lock_held(&eventfs_srcu)) {
771 		d = create_dir_dentry(ei, ei_child, parent, false);
772 		if (d) {
773 			ret = add_dentries(&dentries, d, cnt);
774 			if (ret < 0)
775 				break;
776 			cnt++;
777 		}
778 	}
779 
780 	for (i = 0; i < ei->nr_entries; i++) {
781 		void *cdata = data;
782 		entry = &ei->entries[i];
783 		name = entry->name;
784 		mutex_lock(&eventfs_mutex);
785 		/* If ei->is_freed, then the event itself may be too */
786 		if (!ei->is_freed)
787 			r = entry->callback(name, &mode, &cdata, &fops);
788 		else
789 			r = -1;
790 		mutex_unlock(&eventfs_mutex);
791 		if (r <= 0)
792 			continue;
793 		d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false);
794 		if (d) {
795 			ret = add_dentries(&dentries, d, cnt);
796 			if (ret < 0)
797 				break;
798 			cnt++;
799 		}
800 	}
801 	inode_unlock(parent->d_inode);
802 	srcu_read_unlock(&eventfs_srcu, idx);
803 	ret = dcache_dir_open(inode, file);
804 
805 	/*
806 	 * dcache_dir_open() sets file->private_data to a dentry cursor.
807 	 * Need to save that but also save all the dentries that were
808 	 * opened by this function.
809 	 */
810 	dlist->cursor = file->private_data;
811 	dlist->dentries = dentries;
812 	file->private_data = dlist;
813 	return ret;
814 }
815 
816 /*
817  * This just sets the file->private_data back to the cursor and back.
818  */
819 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx)
820 {
821 	struct dentry_list *dlist = file->private_data;
822 	int ret;
823 
824 	file->private_data = dlist->cursor;
825 	ret = dcache_readdir(file, ctx);
826 	dlist->cursor = file->private_data;
827 	file->private_data = dlist;
828 	return ret;
829 }
830 
831 /**
832  * eventfs_create_dir - Create the eventfs_inode for this directory
833  * @name: The name of the directory to create.
834  * @parent: The eventfs_inode of the parent directory.
835  * @entries: A list of entries that represent the files under this directory
836  * @size: The number of @entries
837  * @data: The default data to pass to the files (an entry may override it).
838  *
839  * This function creates the descriptor to represent a directory in the
840  * eventfs. This descriptor is an eventfs_inode, and it is returned to be
841  * used to create other children underneath.
842  *
843  * The @entries is an array of eventfs_entry structures which has:
844  *	const char		 *name
845  *	eventfs_callback	callback;
846  *
847  * The name is the name of the file, and the callback is a pointer to a function
848  * that will be called when the file is reference (either by lookup or by
849  * reading a directory). The callback is of the prototype:
850  *
851  *    int callback(const char *name, umode_t *mode, void **data,
852  *		   const struct file_operations **fops);
853  *
854  * When a file needs to be created, this callback will be called with
855  *   name = the name of the file being created (so that the same callback
856  *          may be used for multiple files).
857  *   mode = a place to set the file's mode
858  *   data = A pointer to @data, and the callback may replace it, which will
859  *         cause the file created to pass the new data to the open() call.
860  *   fops = the fops to use for the created file.
861  *
862  * NB. @callback is called while holding internal locks of the eventfs
863  *     system. The callback must not call any code that might also call into
864  *     the tracefs or eventfs system or it will risk creating a deadlock.
865  */
866 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
867 					 const struct eventfs_entry *entries,
868 					 int size, void *data)
869 {
870 	struct eventfs_inode *ei;
871 
872 	if (!parent)
873 		return ERR_PTR(-EINVAL);
874 
875 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
876 	if (!ei)
877 		return ERR_PTR(-ENOMEM);
878 
879 	ei->name = kstrdup_const(name, GFP_KERNEL);
880 	if (!ei->name) {
881 		kfree(ei);
882 		return ERR_PTR(-ENOMEM);
883 	}
884 
885 	if (size) {
886 		ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
887 		if (!ei->d_children) {
888 			kfree_const(ei->name);
889 			kfree(ei);
890 			return ERR_PTR(-ENOMEM);
891 		}
892 	}
893 
894 	ei->entries = entries;
895 	ei->nr_entries = size;
896 	ei->data = data;
897 	INIT_LIST_HEAD(&ei->children);
898 	INIT_LIST_HEAD(&ei->list);
899 
900 	mutex_lock(&eventfs_mutex);
901 	if (!parent->is_freed) {
902 		list_add_tail(&ei->list, &parent->children);
903 		ei->d_parent = parent->dentry;
904 	}
905 	mutex_unlock(&eventfs_mutex);
906 
907 	/* Was the parent freed? */
908 	if (list_empty(&ei->list)) {
909 		free_ei(ei);
910 		ei = NULL;
911 	}
912 	return ei;
913 }
914 
915 /**
916  * eventfs_create_events_dir - create the top level events directory
917  * @name: The name of the top level directory to create.
918  * @parent: Parent dentry for this file in the tracefs directory.
919  * @entries: A list of entries that represent the files under this directory
920  * @size: The number of @entries
921  * @data: The default data to pass to the files (an entry may override it).
922  *
923  * This function creates the top of the trace event directory.
924  *
925  * See eventfs_create_dir() for use of @entries.
926  */
927 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
928 						const struct eventfs_entry *entries,
929 						int size, void *data)
930 {
931 	struct dentry *dentry = tracefs_start_creating(name, parent);
932 	struct eventfs_inode *ei;
933 	struct tracefs_inode *ti;
934 	struct inode *inode;
935 	kuid_t uid;
936 	kgid_t gid;
937 
938 	if (security_locked_down(LOCKDOWN_TRACEFS))
939 		return NULL;
940 
941 	if (IS_ERR(dentry))
942 		return ERR_CAST(dentry);
943 
944 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
945 	if (!ei)
946 		goto fail_ei;
947 
948 	inode = tracefs_get_inode(dentry->d_sb);
949 	if (unlikely(!inode))
950 		goto fail;
951 
952 	if (size) {
953 		ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
954 		if (!ei->d_children)
955 			goto fail;
956 	}
957 
958 	ei->dentry = dentry;
959 	ei->entries = entries;
960 	ei->nr_entries = size;
961 	ei->is_events = 1;
962 	ei->data = data;
963 	ei->name = kstrdup_const(name, GFP_KERNEL);
964 	if (!ei->name)
965 		goto fail;
966 
967 	/* Save the ownership of this directory */
968 	uid = d_inode(dentry->d_parent)->i_uid;
969 	gid = d_inode(dentry->d_parent)->i_gid;
970 
971 	/* This is used as the default ownership of the files and directories */
972 	ei->attr.uid = uid;
973 	ei->attr.gid = gid;
974 
975 	INIT_LIST_HEAD(&ei->children);
976 	INIT_LIST_HEAD(&ei->list);
977 
978 	ti = get_tracefs(inode);
979 	ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
980 	ti->private = ei;
981 
982 	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
983 	inode->i_uid = uid;
984 	inode->i_gid = gid;
985 	inode->i_op = &eventfs_root_dir_inode_operations;
986 	inode->i_fop = &eventfs_file_operations;
987 
988 	dentry->d_fsdata = ei;
989 
990 	/* directory inodes start off with i_nlink == 2 (for "." entry) */
991 	inc_nlink(inode);
992 	d_instantiate(dentry, inode);
993 	inc_nlink(dentry->d_parent->d_inode);
994 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
995 	tracefs_end_creating(dentry);
996 
997 	return ei;
998 
999  fail:
1000 	kfree(ei->d_children);
1001 	kfree(ei);
1002  fail_ei:
1003 	tracefs_failed_creating(dentry);
1004 	return ERR_PTR(-ENOMEM);
1005 }
1006 
1007 static LLIST_HEAD(free_list);
1008 
1009 static void eventfs_workfn(struct work_struct *work)
1010 {
1011         struct eventfs_inode *ei, *tmp;
1012         struct llist_node *llnode;
1013 
1014 	llnode = llist_del_all(&free_list);
1015         llist_for_each_entry_safe(ei, tmp, llnode, llist) {
1016 		/* This dput() matches the dget() from unhook_dentry() */
1017 		for (int i = 0; i < ei->nr_entries; i++) {
1018 			if (ei->d_children[i])
1019 				dput(ei->d_children[i]);
1020 		}
1021 		/* This should only get here if it had a dentry */
1022 		if (!WARN_ON_ONCE(!ei->dentry))
1023 			dput(ei->dentry);
1024         }
1025 }
1026 
1027 static DECLARE_WORK(eventfs_work, eventfs_workfn);
1028 
1029 static void free_rcu_ei(struct rcu_head *head)
1030 {
1031 	struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
1032 
1033 	if (ei->dentry) {
1034 		/* Do not free the ei until all references of dentry are gone */
1035 		if (llist_add(&ei->llist, &free_list))
1036 			queue_work(system_unbound_wq, &eventfs_work);
1037 		return;
1038 	}
1039 
1040 	/* If the ei doesn't have a dentry, neither should its children */
1041 	for (int i = 0; i < ei->nr_entries; i++) {
1042 		WARN_ON_ONCE(ei->d_children[i]);
1043 	}
1044 
1045 	free_ei(ei);
1046 }
1047 
1048 static void unhook_dentry(struct dentry *dentry)
1049 {
1050 	if (!dentry)
1051 		return;
1052 	/*
1053 	 * Need to add a reference to the dentry that is expected by
1054 	 * simple_recursive_removal(), which will include a dput().
1055 	 */
1056 	dget(dentry);
1057 
1058 	/*
1059 	 * Also add a reference for the dput() in eventfs_workfn().
1060 	 * That is required as that dput() will free the ei after
1061 	 * the SRCU grace period is over.
1062 	 */
1063 	dget(dentry);
1064 }
1065 
1066 /**
1067  * eventfs_remove_rec - remove eventfs dir or file from list
1068  * @ei: eventfs_inode to be removed.
1069  * @level: prevent recursion from going more than 3 levels deep.
1070  *
1071  * This function recursively removes eventfs_inodes which
1072  * contains info of files and/or directories.
1073  */
1074 static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
1075 {
1076 	struct eventfs_inode *ei_child;
1077 
1078 	if (!ei)
1079 		return;
1080 	/*
1081 	 * Check recursion depth. It should never be greater than 3:
1082 	 * 0 - events/
1083 	 * 1 - events/group/
1084 	 * 2 - events/group/event/
1085 	 * 3 - events/group/event/file
1086 	 */
1087 	if (WARN_ON_ONCE(level > 3))
1088 		return;
1089 
1090 	/* search for nested folders or files */
1091 	list_for_each_entry_srcu(ei_child, &ei->children, list,
1092 				 lockdep_is_held(&eventfs_mutex)) {
1093 		/* Children only have dentry if parent does */
1094 		WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
1095 		eventfs_remove_rec(ei_child, level + 1);
1096 	}
1097 
1098 
1099 	ei->is_freed = 1;
1100 
1101 	for (int i = 0; i < ei->nr_entries; i++) {
1102 		if (ei->d_children[i]) {
1103 			/* Children only have dentry if parent does */
1104 			WARN_ON_ONCE(!ei->dentry);
1105 			unhook_dentry(ei->d_children[i]);
1106 		}
1107 	}
1108 
1109 	unhook_dentry(ei->dentry);
1110 
1111 	list_del_rcu(&ei->list);
1112 	call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
1113 }
1114 
1115 /**
1116  * eventfs_remove_dir - remove eventfs dir or file from list
1117  * @ei: eventfs_inode to be removed.
1118  *
1119  * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
1120  */
1121 void eventfs_remove_dir(struct eventfs_inode *ei)
1122 {
1123 	struct dentry *dentry;
1124 
1125 	if (!ei)
1126 		return;
1127 
1128 	mutex_lock(&eventfs_mutex);
1129 	dentry = ei->dentry;
1130 	eventfs_remove_rec(ei, 0);
1131 	mutex_unlock(&eventfs_mutex);
1132 
1133 	/*
1134 	 * If any of the ei children has a dentry, then the ei itself
1135 	 * must have a dentry.
1136 	 */
1137 	if (dentry)
1138 		simple_recursive_removal(dentry, NULL);
1139 }
1140 
1141 /**
1142  * eventfs_remove_events_dir - remove the top level eventfs directory
1143  * @ei: the event_inode returned by eventfs_create_events_dir().
1144  *
1145  * This function removes the events main directory
1146  */
1147 void eventfs_remove_events_dir(struct eventfs_inode *ei)
1148 {
1149 	struct dentry *dentry;
1150 
1151 	dentry = ei->dentry;
1152 	eventfs_remove_dir(ei);
1153 
1154 	/*
1155 	 * Matches the dget() done by tracefs_start_creating()
1156 	 * in eventfs_create_events_dir() when it the dentry was
1157 	 * created. In other words, it's a normal dentry that
1158 	 * sticks around while the other ei->dentry are created
1159 	 * and destroyed dynamically.
1160 	 */
1161 	dput(dentry);
1162 }
1163