xref: /linux/fs/tracefs/event_inode.c (revision a55719847da0a780baa84d0baee745358f144c39)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  *  event_inode.c - part of tracefs, a pseudo file system for activating tracing
4  *
5  *  Copyright (C) 2020-23 VMware Inc, author: Steven Rostedt <rostedt@goodmis.org>
6  *  Copyright (C) 2020-23 VMware Inc, author: Ajay Kaher <akaher@vmware.com>
7  *  Copyright (C) 2023 Google, author: Steven Rostedt <rostedt@goodmis.org>
8  *
9  *  eventfs is used to dynamically create inodes and dentries based on the
10  *  meta data provided by the tracing system.
11  *
12  *  eventfs stores the meta-data of files/dirs and holds off on creating
13  *  inodes/dentries of the files. When accessed, the eventfs will create the
14  *  inodes/dentries in a just-in-time (JIT) manner. The eventfs will clean up
15  *  and delete the inodes/dentries when they are no longer referenced.
16  */
17 #include <linux/fsnotify.h>
18 #include <linux/fs.h>
19 #include <linux/namei.h>
20 #include <linux/workqueue.h>
21 #include <linux/security.h>
22 #include <linux/tracefs.h>
23 #include <linux/kref.h>
24 #include <linux/delay.h>
25 #include "internal.h"
26 
27 /*
28  * eventfs_mutex protects the eventfs_inode (ei) dentry. Any access
29  * to the ei->dentry must be done under this mutex and after checking
30  * if ei->is_freed is not set. When ei->is_freed is set, the dentry
31  * is on its way to being freed after the last dput() is made on it.
32  */
33 static DEFINE_MUTEX(eventfs_mutex);
34 
35 /*
36  * The eventfs_inode (ei) itself is protected by SRCU. It is released from
37  * its parent's list and will have is_freed set (under eventfs_mutex).
38  * After the SRCU grace period is over and the last dput() is called
39  * the ei is freed.
40  */
41 DEFINE_STATIC_SRCU(eventfs_srcu);
42 
43 /* Mode is unsigned short, use the upper bits for flags */
44 enum {
45 	EVENTFS_SAVE_MODE	= BIT(16),
46 	EVENTFS_SAVE_UID	= BIT(17),
47 	EVENTFS_SAVE_GID	= BIT(18),
48 };
49 
50 #define EVENTFS_MODE_MASK	(EVENTFS_SAVE_MODE - 1)
51 
52 static struct dentry *eventfs_root_lookup(struct inode *dir,
53 					  struct dentry *dentry,
54 					  unsigned int flags);
55 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file);
56 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx);
57 static int eventfs_release(struct inode *inode, struct file *file);
58 
59 static void update_attr(struct eventfs_attr *attr, struct iattr *iattr)
60 {
61 	unsigned int ia_valid = iattr->ia_valid;
62 
63 	if (ia_valid & ATTR_MODE) {
64 		attr->mode = (attr->mode & ~EVENTFS_MODE_MASK) |
65 			(iattr->ia_mode & EVENTFS_MODE_MASK) |
66 			EVENTFS_SAVE_MODE;
67 	}
68 	if (ia_valid & ATTR_UID) {
69 		attr->mode |= EVENTFS_SAVE_UID;
70 		attr->uid = iattr->ia_uid;
71 	}
72 	if (ia_valid & ATTR_GID) {
73 		attr->mode |= EVENTFS_SAVE_GID;
74 		attr->gid = iattr->ia_gid;
75 	}
76 }
77 
78 static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry,
79 			    struct iattr *iattr)
80 {
81 	const struct eventfs_entry *entry;
82 	struct eventfs_inode *ei;
83 	const char *name;
84 	int ret;
85 
86 	mutex_lock(&eventfs_mutex);
87 	ei = dentry->d_fsdata;
88 	if (ei->is_freed) {
89 		/* Do not allow changes if the event is about to be removed. */
90 		mutex_unlock(&eventfs_mutex);
91 		return -ENODEV;
92 	}
93 
94 	/* Preallocate the children mode array if necessary */
95 	if (!(dentry->d_inode->i_mode & S_IFDIR)) {
96 		if (!ei->entry_attrs) {
97 			ei->entry_attrs = kzalloc(sizeof(*ei->entry_attrs) * ei->nr_entries,
98 						  GFP_NOFS);
99 			if (!ei->entry_attrs) {
100 				ret = -ENOMEM;
101 				goto out;
102 			}
103 		}
104 	}
105 
106 	ret = simple_setattr(idmap, dentry, iattr);
107 	if (ret < 0)
108 		goto out;
109 
110 	/*
111 	 * If this is a dir, then update the ei cache, only the file
112 	 * mode is saved in the ei->m_children, and the ownership is
113 	 * determined by the parent directory.
114 	 */
115 	if (dentry->d_inode->i_mode & S_IFDIR) {
116 		update_attr(&ei->attr, iattr);
117 
118 	} else {
119 		name = dentry->d_name.name;
120 
121 		for (int i = 0; i < ei->nr_entries; i++) {
122 			entry = &ei->entries[i];
123 			if (strcmp(name, entry->name) == 0) {
124 				update_attr(&ei->entry_attrs[i], iattr);
125 				break;
126 			}
127 		}
128 	}
129  out:
130 	mutex_unlock(&eventfs_mutex);
131 	return ret;
132 }
133 
134 static const struct inode_operations eventfs_root_dir_inode_operations = {
135 	.lookup		= eventfs_root_lookup,
136 	.setattr	= eventfs_set_attr,
137 };
138 
139 static const struct inode_operations eventfs_file_inode_operations = {
140 	.setattr	= eventfs_set_attr,
141 };
142 
143 static const struct file_operations eventfs_file_operations = {
144 	.open		= dcache_dir_open_wrapper,
145 	.read		= generic_read_dir,
146 	.iterate_shared	= dcache_readdir_wrapper,
147 	.llseek		= generic_file_llseek,
148 	.release	= eventfs_release,
149 };
150 
151 static void update_inode_attr(struct dentry *dentry, struct inode *inode,
152 			      struct eventfs_attr *attr, umode_t mode)
153 {
154 	if (!attr) {
155 		inode->i_mode = mode;
156 		return;
157 	}
158 
159 	if (attr->mode & EVENTFS_SAVE_MODE)
160 		inode->i_mode = attr->mode & EVENTFS_MODE_MASK;
161 	else
162 		inode->i_mode = mode;
163 
164 	if (attr->mode & EVENTFS_SAVE_UID)
165 		inode->i_uid = attr->uid;
166 	else
167 		inode->i_uid = d_inode(dentry->d_parent)->i_uid;
168 
169 	if (attr->mode & EVENTFS_SAVE_GID)
170 		inode->i_gid = attr->gid;
171 	else
172 		inode->i_gid = d_inode(dentry->d_parent)->i_gid;
173 }
174 
175 /**
176  * create_file - create a file in the tracefs filesystem
177  * @name: the name of the file to create.
178  * @mode: the permission that the file should have.
179  * @attr: saved attributes changed by user
180  * @parent: parent dentry for this file.
181  * @data: something that the caller will want to get to later on.
182  * @fop: struct file_operations that should be used for this file.
183  *
184  * This function creates a dentry that represents a file in the eventsfs_inode
185  * directory. The inode.i_private pointer will point to @data in the open()
186  * call.
187  */
188 static struct dentry *create_file(const char *name, umode_t mode,
189 				  struct eventfs_attr *attr,
190 				  struct dentry *parent, void *data,
191 				  const struct file_operations *fop)
192 {
193 	struct tracefs_inode *ti;
194 	struct dentry *dentry;
195 	struct inode *inode;
196 
197 	if (!(mode & S_IFMT))
198 		mode |= S_IFREG;
199 
200 	if (WARN_ON_ONCE(!S_ISREG(mode)))
201 		return NULL;
202 
203 	WARN_ON_ONCE(!parent);
204 	dentry = eventfs_start_creating(name, parent);
205 
206 	if (IS_ERR(dentry))
207 		return dentry;
208 
209 	inode = tracefs_get_inode(dentry->d_sb);
210 	if (unlikely(!inode))
211 		return eventfs_failed_creating(dentry);
212 
213 	/* If the user updated the directory's attributes, use them */
214 	update_inode_attr(dentry, inode, attr, mode);
215 
216 	inode->i_op = &eventfs_file_inode_operations;
217 	inode->i_fop = fop;
218 	inode->i_private = data;
219 
220 	ti = get_tracefs(inode);
221 	ti->flags |= TRACEFS_EVENT_INODE;
222 	d_instantiate(dentry, inode);
223 	fsnotify_create(dentry->d_parent->d_inode, dentry);
224 	return eventfs_end_creating(dentry);
225 };
226 
227 /**
228  * create_dir - create a dir in the tracefs filesystem
229  * @ei: the eventfs_inode that represents the directory to create
230  * @parent: parent dentry for this file.
231  *
232  * This function will create a dentry for a directory represented by
233  * a eventfs_inode.
234  */
235 static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent)
236 {
237 	struct tracefs_inode *ti;
238 	struct dentry *dentry;
239 	struct inode *inode;
240 
241 	dentry = eventfs_start_creating(ei->name, parent);
242 	if (IS_ERR(dentry))
243 		return dentry;
244 
245 	inode = tracefs_get_inode(dentry->d_sb);
246 	if (unlikely(!inode))
247 		return eventfs_failed_creating(dentry);
248 
249 	/* If the user updated the directory's attributes, use them */
250 	update_inode_attr(dentry, inode, &ei->attr,
251 			  S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
252 
253 	inode->i_op = &eventfs_root_dir_inode_operations;
254 	inode->i_fop = &eventfs_file_operations;
255 
256 	ti = get_tracefs(inode);
257 	ti->flags |= TRACEFS_EVENT_INODE;
258 
259 	inc_nlink(inode);
260 	d_instantiate(dentry, inode);
261 	inc_nlink(dentry->d_parent->d_inode);
262 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
263 	return eventfs_end_creating(dentry);
264 }
265 
266 static void free_ei(struct eventfs_inode *ei)
267 {
268 	kfree_const(ei->name);
269 	kfree(ei->d_children);
270 	kfree(ei->entry_attrs);
271 	kfree(ei);
272 }
273 
274 /**
275  * eventfs_set_ei_status_free - remove the dentry reference from an eventfs_inode
276  * @ti: the tracefs_inode of the dentry
277  * @dentry: dentry which has the reference to remove.
278  *
279  * Remove the association between a dentry from an eventfs_inode.
280  */
281 void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
282 {
283 	struct eventfs_inode *ei;
284 	int i;
285 
286 	mutex_lock(&eventfs_mutex);
287 
288 	ei = dentry->d_fsdata;
289 	if (!ei)
290 		goto out;
291 
292 	/* This could belong to one of the files of the ei */
293 	if (ei->dentry != dentry) {
294 		for (i = 0; i < ei->nr_entries; i++) {
295 			if (ei->d_children[i] == dentry)
296 				break;
297 		}
298 		if (WARN_ON_ONCE(i == ei->nr_entries))
299 			goto out;
300 		ei->d_children[i] = NULL;
301 	} else if (ei->is_freed) {
302 		free_ei(ei);
303 	} else {
304 		ei->dentry = NULL;
305 	}
306 
307 	dentry->d_fsdata = NULL;
308  out:
309 	mutex_unlock(&eventfs_mutex);
310 }
311 
312 /**
313  * create_file_dentry - create a dentry for a file of an eventfs_inode
314  * @ei: the eventfs_inode that the file will be created under
315  * @idx: the index into the d_children[] of the @ei
316  * @parent: The parent dentry of the created file.
317  * @name: The name of the file to create
318  * @mode: The mode of the file.
319  * @data: The data to use to set the inode of the file with on open()
320  * @fops: The fops of the file to be created.
321  * @lookup: If called by the lookup routine, in which case, dput() the created dentry.
322  *
323  * Create a dentry for a file of an eventfs_inode @ei and place it into the
324  * address located at @e_dentry. If the @e_dentry already has a dentry, then
325  * just do a dget() on it and return. Otherwise create the dentry and attach it.
326  */
327 static struct dentry *
328 create_file_dentry(struct eventfs_inode *ei, int idx,
329 		   struct dentry *parent, const char *name, umode_t mode, void *data,
330 		   const struct file_operations *fops, bool lookup)
331 {
332 	struct eventfs_attr *attr = NULL;
333 	struct dentry **e_dentry = &ei->d_children[idx];
334 	struct dentry *dentry;
335 
336 	WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
337 
338 	mutex_lock(&eventfs_mutex);
339 	if (ei->is_freed) {
340 		mutex_unlock(&eventfs_mutex);
341 		return NULL;
342 	}
343 	/* If the e_dentry already has a dentry, use it */
344 	if (*e_dentry) {
345 		/* lookup does not need to up the ref count */
346 		if (!lookup)
347 			dget(*e_dentry);
348 		mutex_unlock(&eventfs_mutex);
349 		return *e_dentry;
350 	}
351 
352 	/* ei->entry_attrs are protected by SRCU */
353 	if (ei->entry_attrs)
354 		attr = &ei->entry_attrs[idx];
355 
356 	mutex_unlock(&eventfs_mutex);
357 
358 	dentry = create_file(name, mode, attr, parent, data, fops);
359 
360 	mutex_lock(&eventfs_mutex);
361 
362 	if (IS_ERR_OR_NULL(dentry)) {
363 		/*
364 		 * When the mutex was released, something else could have
365 		 * created the dentry for this e_dentry. In which case
366 		 * use that one.
367 		 *
368 		 * If ei->is_freed is set, the e_dentry is currently on its
369 		 * way to being freed, don't return it. If e_dentry is NULL
370 		 * it means it was already freed.
371 		 */
372 		if (ei->is_freed)
373 			dentry = NULL;
374 		else
375 			dentry = *e_dentry;
376 		/* The lookup does not need to up the dentry refcount */
377 		if (dentry && !lookup)
378 			dget(dentry);
379 		mutex_unlock(&eventfs_mutex);
380 		return dentry;
381 	}
382 
383 	if (!*e_dentry && !ei->is_freed) {
384 		*e_dentry = dentry;
385 		dentry->d_fsdata = ei;
386 	} else {
387 		/*
388 		 * Should never happen unless we get here due to being freed.
389 		 * Otherwise it means two dentries exist with the same name.
390 		 */
391 		WARN_ON_ONCE(!ei->is_freed);
392 		dentry = NULL;
393 	}
394 	mutex_unlock(&eventfs_mutex);
395 
396 	if (lookup)
397 		dput(dentry);
398 
399 	return dentry;
400 }
401 
402 /**
403  * eventfs_post_create_dir - post create dir routine
404  * @ei: eventfs_inode of recently created dir
405  *
406  * Map the meta-data of files within an eventfs dir to their parent dentry
407  */
408 static void eventfs_post_create_dir(struct eventfs_inode *ei)
409 {
410 	struct eventfs_inode *ei_child;
411 	struct tracefs_inode *ti;
412 
413 	lockdep_assert_held(&eventfs_mutex);
414 
415 	/* srcu lock already held */
416 	/* fill parent-child relation */
417 	list_for_each_entry_srcu(ei_child, &ei->children, list,
418 				 srcu_read_lock_held(&eventfs_srcu)) {
419 		ei_child->d_parent = ei->dentry;
420 	}
421 
422 	ti = get_tracefs(ei->dentry->d_inode);
423 	ti->private = ei;
424 }
425 
426 /**
427  * create_dir_dentry - Create a directory dentry for the eventfs_inode
428  * @pei: The eventfs_inode parent of ei.
429  * @ei: The eventfs_inode to create the directory for
430  * @parent: The dentry of the parent of this directory
431  * @lookup: True if this is called by the lookup code
432  *
433  * This creates and attaches a directory dentry to the eventfs_inode @ei.
434  */
435 static struct dentry *
436 create_dir_dentry(struct eventfs_inode *pei, struct eventfs_inode *ei,
437 		  struct dentry *parent, bool lookup)
438 {
439 	struct dentry *dentry = NULL;
440 
441 	WARN_ON_ONCE(!inode_is_locked(parent->d_inode));
442 
443 	mutex_lock(&eventfs_mutex);
444 	if (pei->is_freed || ei->is_freed) {
445 		mutex_unlock(&eventfs_mutex);
446 		return NULL;
447 	}
448 	if (ei->dentry) {
449 		/* If the dentry already has a dentry, use it */
450 		dentry = ei->dentry;
451 		/* lookup does not need to up the ref count */
452 		if (!lookup)
453 			dget(dentry);
454 		mutex_unlock(&eventfs_mutex);
455 		return dentry;
456 	}
457 	mutex_unlock(&eventfs_mutex);
458 
459 	dentry = create_dir(ei, parent);
460 
461 	mutex_lock(&eventfs_mutex);
462 
463 	if (IS_ERR_OR_NULL(dentry) && !ei->is_freed) {
464 		/*
465 		 * When the mutex was released, something else could have
466 		 * created the dentry for this e_dentry. In which case
467 		 * use that one.
468 		 *
469 		 * If ei->is_freed is set, the e_dentry is currently on its
470 		 * way to being freed.
471 		 */
472 		dentry = ei->dentry;
473 		if (dentry && !lookup)
474 			dget(dentry);
475 		mutex_unlock(&eventfs_mutex);
476 		return dentry;
477 	}
478 
479 	if (!ei->dentry && !ei->is_freed) {
480 		ei->dentry = dentry;
481 		eventfs_post_create_dir(ei);
482 		dentry->d_fsdata = ei;
483 	} else {
484 		/*
485 		 * Should never happen unless we get here due to being freed.
486 		 * Otherwise it means two dentries exist with the same name.
487 		 */
488 		WARN_ON_ONCE(!ei->is_freed);
489 		dentry = NULL;
490 	}
491 	mutex_unlock(&eventfs_mutex);
492 
493 	if (lookup)
494 		dput(dentry);
495 
496 	return dentry;
497 }
498 
499 /**
500  * eventfs_root_lookup - lookup routine to create file/dir
501  * @dir: in which a lookup is being done
502  * @dentry: file/dir dentry
503  * @flags: Just passed to simple_lookup()
504  *
505  * Used to create dynamic file/dir with-in @dir, search with-in @ei
506  * list, if @dentry found go ahead and create the file/dir
507  */
508 
509 static struct dentry *eventfs_root_lookup(struct inode *dir,
510 					  struct dentry *dentry,
511 					  unsigned int flags)
512 {
513 	const struct file_operations *fops;
514 	const struct eventfs_entry *entry;
515 	struct eventfs_inode *ei_child;
516 	struct tracefs_inode *ti;
517 	struct eventfs_inode *ei;
518 	struct dentry *ei_dentry = NULL;
519 	struct dentry *ret = NULL;
520 	const char *name = dentry->d_name.name;
521 	bool created = false;
522 	umode_t mode;
523 	void *data;
524 	int idx;
525 	int i;
526 	int r;
527 
528 	ti = get_tracefs(dir);
529 	if (!(ti->flags & TRACEFS_EVENT_INODE))
530 		return NULL;
531 
532 	/* Grab srcu to prevent the ei from going away */
533 	idx = srcu_read_lock(&eventfs_srcu);
534 
535 	/*
536 	 * Grab the eventfs_mutex to consistent value from ti->private.
537 	 * This s
538 	 */
539 	mutex_lock(&eventfs_mutex);
540 	ei = READ_ONCE(ti->private);
541 	if (ei && !ei->is_freed)
542 		ei_dentry = READ_ONCE(ei->dentry);
543 	mutex_unlock(&eventfs_mutex);
544 
545 	if (!ei || !ei_dentry)
546 		goto out;
547 
548 	data = ei->data;
549 
550 	list_for_each_entry_srcu(ei_child, &ei->children, list,
551 				 srcu_read_lock_held(&eventfs_srcu)) {
552 		if (strcmp(ei_child->name, name) != 0)
553 			continue;
554 		ret = simple_lookup(dir, dentry, flags);
555 		if (IS_ERR(ret))
556 			goto out;
557 		create_dir_dentry(ei, ei_child, ei_dentry, true);
558 		created = true;
559 		break;
560 	}
561 
562 	if (created)
563 		goto out;
564 
565 	for (i = 0; i < ei->nr_entries; i++) {
566 		entry = &ei->entries[i];
567 		if (strcmp(name, entry->name) == 0) {
568 			void *cdata = data;
569 			mutex_lock(&eventfs_mutex);
570 			/* If ei->is_freed, then the event itself may be too */
571 			if (!ei->is_freed)
572 				r = entry->callback(name, &mode, &cdata, &fops);
573 			else
574 				r = -1;
575 			mutex_unlock(&eventfs_mutex);
576 			if (r <= 0)
577 				continue;
578 			ret = simple_lookup(dir, dentry, flags);
579 			if (IS_ERR(ret))
580 				goto out;
581 			create_file_dentry(ei, i, ei_dentry, name, mode, cdata,
582 					   fops, true);
583 			break;
584 		}
585 	}
586  out:
587 	srcu_read_unlock(&eventfs_srcu, idx);
588 	return ret;
589 }
590 
591 struct dentry_list {
592 	void			*cursor;
593 	struct dentry		**dentries;
594 };
595 
596 /**
597  * eventfs_release - called to release eventfs file/dir
598  * @inode: inode to be released
599  * @file: file to be released (not used)
600  */
601 static int eventfs_release(struct inode *inode, struct file *file)
602 {
603 	struct tracefs_inode *ti;
604 	struct dentry_list *dlist = file->private_data;
605 	void *cursor;
606 	int i;
607 
608 	ti = get_tracefs(inode);
609 	if (!(ti->flags & TRACEFS_EVENT_INODE))
610 		return -EINVAL;
611 
612 	if (WARN_ON_ONCE(!dlist))
613 		return -EINVAL;
614 
615 	for (i = 0; dlist->dentries && dlist->dentries[i]; i++) {
616 		dput(dlist->dentries[i]);
617 	}
618 
619 	cursor = dlist->cursor;
620 	kfree(dlist->dentries);
621 	kfree(dlist);
622 	file->private_data = cursor;
623 	return dcache_dir_close(inode, file);
624 }
625 
626 static int add_dentries(struct dentry ***dentries, struct dentry *d, int cnt)
627 {
628 	struct dentry **tmp;
629 
630 	tmp = krealloc(*dentries, sizeof(d) * (cnt + 2), GFP_NOFS);
631 	if (!tmp)
632 		return -1;
633 	tmp[cnt] = d;
634 	tmp[cnt + 1] = NULL;
635 	*dentries = tmp;
636 	return 0;
637 }
638 
639 /**
640  * dcache_dir_open_wrapper - eventfs open wrapper
641  * @inode: not used
642  * @file: dir to be opened (to create it's children)
643  *
644  * Used to dynamic create file/dir with-in @file, all the
645  * file/dir will be created. If already created then references
646  * will be increased
647  */
648 static int dcache_dir_open_wrapper(struct inode *inode, struct file *file)
649 {
650 	const struct file_operations *fops;
651 	const struct eventfs_entry *entry;
652 	struct eventfs_inode *ei_child;
653 	struct tracefs_inode *ti;
654 	struct eventfs_inode *ei;
655 	struct dentry_list *dlist;
656 	struct dentry **dentries = NULL;
657 	struct dentry *parent = file_dentry(file);
658 	struct dentry *d;
659 	struct inode *f_inode = file_inode(file);
660 	const char *name = parent->d_name.name;
661 	umode_t mode;
662 	void *data;
663 	int cnt = 0;
664 	int idx;
665 	int ret;
666 	int i;
667 	int r;
668 
669 	ti = get_tracefs(f_inode);
670 	if (!(ti->flags & TRACEFS_EVENT_INODE))
671 		return -EINVAL;
672 
673 	if (WARN_ON_ONCE(file->private_data))
674 		return -EINVAL;
675 
676 	idx = srcu_read_lock(&eventfs_srcu);
677 
678 	mutex_lock(&eventfs_mutex);
679 	ei = READ_ONCE(ti->private);
680 	mutex_unlock(&eventfs_mutex);
681 
682 	if (!ei) {
683 		srcu_read_unlock(&eventfs_srcu, idx);
684 		return -EINVAL;
685 	}
686 
687 
688 	data = ei->data;
689 
690 	dlist = kmalloc(sizeof(*dlist), GFP_KERNEL);
691 	if (!dlist) {
692 		srcu_read_unlock(&eventfs_srcu, idx);
693 		return -ENOMEM;
694 	}
695 
696 	inode_lock(parent->d_inode);
697 	list_for_each_entry_srcu(ei_child, &ei->children, list,
698 				 srcu_read_lock_held(&eventfs_srcu)) {
699 		d = create_dir_dentry(ei, ei_child, parent, false);
700 		if (d) {
701 			ret = add_dentries(&dentries, d, cnt);
702 			if (ret < 0)
703 				break;
704 			cnt++;
705 		}
706 	}
707 
708 	for (i = 0; i < ei->nr_entries; i++) {
709 		void *cdata = data;
710 		entry = &ei->entries[i];
711 		name = entry->name;
712 		mutex_lock(&eventfs_mutex);
713 		/* If ei->is_freed, then the event itself may be too */
714 		if (!ei->is_freed)
715 			r = entry->callback(name, &mode, &cdata, &fops);
716 		else
717 			r = -1;
718 		mutex_unlock(&eventfs_mutex);
719 		if (r <= 0)
720 			continue;
721 		d = create_file_dentry(ei, i, parent, name, mode, cdata, fops, false);
722 		if (d) {
723 			ret = add_dentries(&dentries, d, cnt);
724 			if (ret < 0)
725 				break;
726 			cnt++;
727 		}
728 	}
729 	inode_unlock(parent->d_inode);
730 	srcu_read_unlock(&eventfs_srcu, idx);
731 	ret = dcache_dir_open(inode, file);
732 
733 	/*
734 	 * dcache_dir_open() sets file->private_data to a dentry cursor.
735 	 * Need to save that but also save all the dentries that were
736 	 * opened by this function.
737 	 */
738 	dlist->cursor = file->private_data;
739 	dlist->dentries = dentries;
740 	file->private_data = dlist;
741 	return ret;
742 }
743 
744 /*
745  * This just sets the file->private_data back to the cursor and back.
746  */
747 static int dcache_readdir_wrapper(struct file *file, struct dir_context *ctx)
748 {
749 	struct dentry_list *dlist = file->private_data;
750 	int ret;
751 
752 	file->private_data = dlist->cursor;
753 	ret = dcache_readdir(file, ctx);
754 	dlist->cursor = file->private_data;
755 	file->private_data = dlist;
756 	return ret;
757 }
758 
759 /**
760  * eventfs_create_dir - Create the eventfs_inode for this directory
761  * @name: The name of the directory to create.
762  * @parent: The eventfs_inode of the parent directory.
763  * @entries: A list of entries that represent the files under this directory
764  * @size: The number of @entries
765  * @data: The default data to pass to the files (an entry may override it).
766  *
767  * This function creates the descriptor to represent a directory in the
768  * eventfs. This descriptor is an eventfs_inode, and it is returned to be
769  * used to create other children underneath.
770  *
771  * The @entries is an array of eventfs_entry structures which has:
772  *	const char		 *name
773  *	eventfs_callback	callback;
774  *
775  * The name is the name of the file, and the callback is a pointer to a function
776  * that will be called when the file is reference (either by lookup or by
777  * reading a directory). The callback is of the prototype:
778  *
779  *    int callback(const char *name, umode_t *mode, void **data,
780  *		   const struct file_operations **fops);
781  *
782  * When a file needs to be created, this callback will be called with
783  *   name = the name of the file being created (so that the same callback
784  *          may be used for multiple files).
785  *   mode = a place to set the file's mode
786  *   data = A pointer to @data, and the callback may replace it, which will
787  *         cause the file created to pass the new data to the open() call.
788  *   fops = the fops to use for the created file.
789  *
790  * NB. @callback is called while holding internal locks of the eventfs
791  *     system. The callback must not call any code that might also call into
792  *     the tracefs or eventfs system or it will risk creating a deadlock.
793  */
794 struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode *parent,
795 					 const struct eventfs_entry *entries,
796 					 int size, void *data)
797 {
798 	struct eventfs_inode *ei;
799 
800 	if (!parent)
801 		return ERR_PTR(-EINVAL);
802 
803 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
804 	if (!ei)
805 		return ERR_PTR(-ENOMEM);
806 
807 	ei->name = kstrdup_const(name, GFP_KERNEL);
808 	if (!ei->name) {
809 		kfree(ei);
810 		return ERR_PTR(-ENOMEM);
811 	}
812 
813 	if (size) {
814 		ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
815 		if (!ei->d_children) {
816 			kfree_const(ei->name);
817 			kfree(ei);
818 			return ERR_PTR(-ENOMEM);
819 		}
820 	}
821 
822 	ei->entries = entries;
823 	ei->nr_entries = size;
824 	ei->data = data;
825 	INIT_LIST_HEAD(&ei->children);
826 	INIT_LIST_HEAD(&ei->list);
827 
828 	mutex_lock(&eventfs_mutex);
829 	if (!parent->is_freed) {
830 		list_add_tail(&ei->list, &parent->children);
831 		ei->d_parent = parent->dentry;
832 	}
833 	mutex_unlock(&eventfs_mutex);
834 
835 	/* Was the parent freed? */
836 	if (list_empty(&ei->list)) {
837 		free_ei(ei);
838 		ei = NULL;
839 	}
840 	return ei;
841 }
842 
843 /**
844  * eventfs_create_events_dir - create the top level events directory
845  * @name: The name of the top level directory to create.
846  * @parent: Parent dentry for this file in the tracefs directory.
847  * @entries: A list of entries that represent the files under this directory
848  * @size: The number of @entries
849  * @data: The default data to pass to the files (an entry may override it).
850  *
851  * This function creates the top of the trace event directory.
852  *
853  * See eventfs_create_dir() for use of @entries.
854  */
855 struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry *parent,
856 						const struct eventfs_entry *entries,
857 						int size, void *data)
858 {
859 	struct dentry *dentry = tracefs_start_creating(name, parent);
860 	struct eventfs_inode *ei;
861 	struct tracefs_inode *ti;
862 	struct inode *inode;
863 
864 	if (security_locked_down(LOCKDOWN_TRACEFS))
865 		return NULL;
866 
867 	if (IS_ERR(dentry))
868 		return ERR_CAST(dentry);
869 
870 	ei = kzalloc(sizeof(*ei), GFP_KERNEL);
871 	if (!ei)
872 		goto fail_ei;
873 
874 	inode = tracefs_get_inode(dentry->d_sb);
875 	if (unlikely(!inode))
876 		goto fail;
877 
878 	if (size) {
879 		ei->d_children = kzalloc(sizeof(*ei->d_children) * size, GFP_KERNEL);
880 		if (!ei->d_children)
881 			goto fail;
882 	}
883 
884 	ei->dentry = dentry;
885 	ei->entries = entries;
886 	ei->nr_entries = size;
887 	ei->data = data;
888 	ei->name = kstrdup_const(name, GFP_KERNEL);
889 	if (!ei->name)
890 		goto fail;
891 
892 	INIT_LIST_HEAD(&ei->children);
893 	INIT_LIST_HEAD(&ei->list);
894 
895 	ti = get_tracefs(inode);
896 	ti->flags |= TRACEFS_EVENT_INODE | TRACEFS_EVENT_TOP_INODE;
897 	ti->private = ei;
898 
899 	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
900 	inode->i_op = &eventfs_root_dir_inode_operations;
901 	inode->i_fop = &eventfs_file_operations;
902 
903 	dentry->d_fsdata = ei;
904 
905 	/* directory inodes start off with i_nlink == 2 (for "." entry) */
906 	inc_nlink(inode);
907 	d_instantiate(dentry, inode);
908 	inc_nlink(dentry->d_parent->d_inode);
909 	fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
910 	tracefs_end_creating(dentry);
911 
912 	return ei;
913 
914  fail:
915 	kfree(ei->d_children);
916 	kfree(ei);
917  fail_ei:
918 	tracefs_failed_creating(dentry);
919 	return ERR_PTR(-ENOMEM);
920 }
921 
922 static LLIST_HEAD(free_list);
923 
924 static void eventfs_workfn(struct work_struct *work)
925 {
926         struct eventfs_inode *ei, *tmp;
927         struct llist_node *llnode;
928 
929 	llnode = llist_del_all(&free_list);
930         llist_for_each_entry_safe(ei, tmp, llnode, llist) {
931 		/* This dput() matches the dget() from unhook_dentry() */
932 		for (int i = 0; i < ei->nr_entries; i++) {
933 			if (ei->d_children[i])
934 				dput(ei->d_children[i]);
935 		}
936 		/* This should only get here if it had a dentry */
937 		if (!WARN_ON_ONCE(!ei->dentry))
938 			dput(ei->dentry);
939         }
940 }
941 
942 static DECLARE_WORK(eventfs_work, eventfs_workfn);
943 
944 static void free_rcu_ei(struct rcu_head *head)
945 {
946 	struct eventfs_inode *ei = container_of(head, struct eventfs_inode, rcu);
947 
948 	if (ei->dentry) {
949 		/* Do not free the ei until all references of dentry are gone */
950 		if (llist_add(&ei->llist, &free_list))
951 			queue_work(system_unbound_wq, &eventfs_work);
952 		return;
953 	}
954 
955 	/* If the ei doesn't have a dentry, neither should its children */
956 	for (int i = 0; i < ei->nr_entries; i++) {
957 		WARN_ON_ONCE(ei->d_children[i]);
958 	}
959 
960 	free_ei(ei);
961 }
962 
963 static void unhook_dentry(struct dentry *dentry)
964 {
965 	if (!dentry)
966 		return;
967 	/*
968 	 * Need to add a reference to the dentry that is expected by
969 	 * simple_recursive_removal(), which will include a dput().
970 	 */
971 	dget(dentry);
972 
973 	/*
974 	 * Also add a reference for the dput() in eventfs_workfn().
975 	 * That is required as that dput() will free the ei after
976 	 * the SRCU grace period is over.
977 	 */
978 	dget(dentry);
979 }
980 
981 /**
982  * eventfs_remove_rec - remove eventfs dir or file from list
983  * @ei: eventfs_inode to be removed.
984  * @level: prevent recursion from going more than 3 levels deep.
985  *
986  * This function recursively removes eventfs_inodes which
987  * contains info of files and/or directories.
988  */
989 static void eventfs_remove_rec(struct eventfs_inode *ei, int level)
990 {
991 	struct eventfs_inode *ei_child;
992 
993 	if (!ei)
994 		return;
995 	/*
996 	 * Check recursion depth. It should never be greater than 3:
997 	 * 0 - events/
998 	 * 1 - events/group/
999 	 * 2 - events/group/event/
1000 	 * 3 - events/group/event/file
1001 	 */
1002 	if (WARN_ON_ONCE(level > 3))
1003 		return;
1004 
1005 	/* search for nested folders or files */
1006 	list_for_each_entry_srcu(ei_child, &ei->children, list,
1007 				 lockdep_is_held(&eventfs_mutex)) {
1008 		/* Children only have dentry if parent does */
1009 		WARN_ON_ONCE(ei_child->dentry && !ei->dentry);
1010 		eventfs_remove_rec(ei_child, level + 1);
1011 	}
1012 
1013 
1014 	ei->is_freed = 1;
1015 
1016 	for (int i = 0; i < ei->nr_entries; i++) {
1017 		if (ei->d_children[i]) {
1018 			/* Children only have dentry if parent does */
1019 			WARN_ON_ONCE(!ei->dentry);
1020 			unhook_dentry(ei->d_children[i]);
1021 		}
1022 	}
1023 
1024 	unhook_dentry(ei->dentry);
1025 
1026 	list_del_rcu(&ei->list);
1027 	call_srcu(&eventfs_srcu, &ei->rcu, free_rcu_ei);
1028 }
1029 
1030 /**
1031  * eventfs_remove_dir - remove eventfs dir or file from list
1032  * @ei: eventfs_inode to be removed.
1033  *
1034  * This function acquire the eventfs_mutex lock and call eventfs_remove_rec()
1035  */
1036 void eventfs_remove_dir(struct eventfs_inode *ei)
1037 {
1038 	struct dentry *dentry;
1039 
1040 	if (!ei)
1041 		return;
1042 
1043 	mutex_lock(&eventfs_mutex);
1044 	dentry = ei->dentry;
1045 	eventfs_remove_rec(ei, 0);
1046 	mutex_unlock(&eventfs_mutex);
1047 
1048 	/*
1049 	 * If any of the ei children has a dentry, then the ei itself
1050 	 * must have a dentry.
1051 	 */
1052 	if (dentry)
1053 		simple_recursive_removal(dentry, NULL);
1054 }
1055 
1056 /**
1057  * eventfs_remove_events_dir - remove the top level eventfs directory
1058  * @ei: the event_inode returned by eventfs_create_events_dir().
1059  *
1060  * This function removes the events main directory
1061  */
1062 void eventfs_remove_events_dir(struct eventfs_inode *ei)
1063 {
1064 	struct dentry *dentry;
1065 
1066 	dentry = ei->dentry;
1067 	eventfs_remove_dir(ei);
1068 
1069 	/*
1070 	 * Matches the dget() done by tracefs_start_creating()
1071 	 * in eventfs_create_events_dir() when it the dentry was
1072 	 * created. In other words, it's a normal dentry that
1073 	 * sticks around while the other ei->dentry are created
1074 	 * and destroyed dynamically.
1075 	 */
1076 	dput(dentry);
1077 }
1078