xref: /linux/fs/notify/inotify/inotify_user.c (revision 3dbc6fb6a3c8a7dc164ae330ab024a3fe65ae53e)
1272eb014SEric Paris /*
2272eb014SEric Paris  * fs/inotify_user.c - inotify support for userspace
3272eb014SEric Paris  *
4272eb014SEric Paris  * Authors:
5272eb014SEric Paris  *	John McCutchan	<ttb@tentacle.dhs.org>
6272eb014SEric Paris  *	Robert Love	<rml@novell.com>
7272eb014SEric Paris  *
8272eb014SEric Paris  * Copyright (C) 2005 John McCutchan
9272eb014SEric Paris  * Copyright 2006 Hewlett-Packard Development Company, L.P.
10272eb014SEric Paris  *
1163c882a0SEric Paris  * Copyright (C) 2009 Eric Paris <Red Hat Inc>
1263c882a0SEric Paris  * inotify was largely rewriten to make use of the fsnotify infrastructure
1363c882a0SEric Paris  *
14272eb014SEric Paris  * This program is free software; you can redistribute it and/or modify it
15272eb014SEric Paris  * under the terms of the GNU General Public License as published by the
16272eb014SEric Paris  * Free Software Foundation; either version 2, or (at your option) any
17272eb014SEric Paris  * later version.
18272eb014SEric Paris  *
19272eb014SEric Paris  * This program is distributed in the hope that it will be useful, but
20272eb014SEric Paris  * WITHOUT ANY WARRANTY; without even the implied warranty of
21272eb014SEric Paris  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22272eb014SEric Paris  * General Public License for more details.
23272eb014SEric Paris  */
24272eb014SEric Paris 
25272eb014SEric Paris #include <linux/file.h>
2663c882a0SEric Paris #include <linux/fs.h> /* struct inode */
2763c882a0SEric Paris #include <linux/fsnotify_backend.h>
2863c882a0SEric Paris #include <linux/idr.h>
2963c882a0SEric Paris #include <linux/init.h> /* module_init */
30272eb014SEric Paris #include <linux/inotify.h>
3163c882a0SEric Paris #include <linux/kernel.h> /* roundup() */
3263c882a0SEric Paris #include <linux/namei.h> /* LOOKUP_FOLLOW */
3363c882a0SEric Paris #include <linux/sched.h> /* struct user */
3463c882a0SEric Paris #include <linux/slab.h> /* struct kmem_cache */
35272eb014SEric Paris #include <linux/syscalls.h>
3663c882a0SEric Paris #include <linux/types.h>
37c44dcc56SAl Viro #include <linux/anon_inodes.h>
3863c882a0SEric Paris #include <linux/uaccess.h>
3963c882a0SEric Paris #include <linux/poll.h>
4063c882a0SEric Paris #include <linux/wait.h>
4163c882a0SEric Paris 
4263c882a0SEric Paris #include "inotify.h"
43272eb014SEric Paris 
44272eb014SEric Paris #include <asm/ioctls.h>
45272eb014SEric Paris 
46272eb014SEric Paris /* these are configurable via /proc/sys/fs/inotify/ */
47272eb014SEric Paris static int inotify_max_user_instances __read_mostly;
48272eb014SEric Paris static int inotify_max_queued_events __read_mostly;
4963c882a0SEric Paris int inotify_max_user_watches __read_mostly;
5063c882a0SEric Paris 
5163c882a0SEric Paris static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
5263c882a0SEric Paris struct kmem_cache *event_priv_cachep __read_mostly;
53272eb014SEric Paris 
54272eb014SEric Paris /*
5563c882a0SEric Paris  * When inotify registers a new group it increments this and uses that
5663c882a0SEric Paris  * value as an offset to set the fsnotify group "name" and priority.
57272eb014SEric Paris  */
5863c882a0SEric Paris static atomic_t inotify_grp_num;
59272eb014SEric Paris 
60272eb014SEric Paris #ifdef CONFIG_SYSCTL
61272eb014SEric Paris 
62272eb014SEric Paris #include <linux/sysctl.h>
63272eb014SEric Paris 
64272eb014SEric Paris static int zero;
65272eb014SEric Paris 
66272eb014SEric Paris ctl_table inotify_table[] = {
67272eb014SEric Paris 	{
68272eb014SEric Paris 		.procname	= "max_user_instances",
69272eb014SEric Paris 		.data		= &inotify_max_user_instances,
70272eb014SEric Paris 		.maxlen		= sizeof(int),
71272eb014SEric Paris 		.mode		= 0644,
726d456111SEric W. Biederman 		.proc_handler	= proc_dointvec_minmax,
73272eb014SEric Paris 		.extra1		= &zero,
74272eb014SEric Paris 	},
75272eb014SEric Paris 	{
76272eb014SEric Paris 		.procname	= "max_user_watches",
77272eb014SEric Paris 		.data		= &inotify_max_user_watches,
78272eb014SEric Paris 		.maxlen		= sizeof(int),
79272eb014SEric Paris 		.mode		= 0644,
806d456111SEric W. Biederman 		.proc_handler	= proc_dointvec_minmax,
81272eb014SEric Paris 		.extra1		= &zero,
82272eb014SEric Paris 	},
83272eb014SEric Paris 	{
84272eb014SEric Paris 		.procname	= "max_queued_events",
85272eb014SEric Paris 		.data		= &inotify_max_queued_events,
86272eb014SEric Paris 		.maxlen		= sizeof(int),
87272eb014SEric Paris 		.mode		= 0644,
886d456111SEric W. Biederman 		.proc_handler	= proc_dointvec_minmax,
89272eb014SEric Paris 		.extra1		= &zero
90272eb014SEric Paris 	},
91ab09203eSEric W. Biederman 	{ }
92272eb014SEric Paris };
93272eb014SEric Paris #endif /* CONFIG_SYSCTL */
94272eb014SEric Paris 
9563c882a0SEric Paris static inline __u32 inotify_arg_to_mask(u32 arg)
96272eb014SEric Paris {
9763c882a0SEric Paris 	__u32 mask;
9863c882a0SEric Paris 
9963c882a0SEric Paris 	/* everything should accept their own ignored and cares about children */
10063c882a0SEric Paris 	mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD);
10163c882a0SEric Paris 
10263c882a0SEric Paris 	/* mask off the flags used to open the fd */
10363c882a0SEric Paris 	mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT));
10463c882a0SEric Paris 
10563c882a0SEric Paris 	return mask;
106272eb014SEric Paris }
107272eb014SEric Paris 
10863c882a0SEric Paris static inline u32 inotify_mask_to_arg(__u32 mask)
109272eb014SEric Paris {
11063c882a0SEric Paris 	return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
11163c882a0SEric Paris 		       IN_Q_OVERFLOW);
112272eb014SEric Paris }
113272eb014SEric Paris 
11463c882a0SEric Paris /* intofiy userspace file descriptor functions */
115272eb014SEric Paris static unsigned int inotify_poll(struct file *file, poll_table *wait)
116272eb014SEric Paris {
11763c882a0SEric Paris 	struct fsnotify_group *group = file->private_data;
118272eb014SEric Paris 	int ret = 0;
119272eb014SEric Paris 
12063c882a0SEric Paris 	poll_wait(file, &group->notification_waitq, wait);
12163c882a0SEric Paris 	mutex_lock(&group->notification_mutex);
12263c882a0SEric Paris 	if (!fsnotify_notify_queue_is_empty(group))
123272eb014SEric Paris 		ret = POLLIN | POLLRDNORM;
12463c882a0SEric Paris 	mutex_unlock(&group->notification_mutex);
125272eb014SEric Paris 
126272eb014SEric Paris 	return ret;
127272eb014SEric Paris }
128272eb014SEric Paris 
1293632dee2SVegard Nossum /*
1303632dee2SVegard Nossum  * Get an inotify_kernel_event if one exists and is small
1313632dee2SVegard Nossum  * enough to fit in "count". Return an error pointer if
1323632dee2SVegard Nossum  * not large enough.
1333632dee2SVegard Nossum  *
13463c882a0SEric Paris  * Called with the group->notification_mutex held.
1353632dee2SVegard Nossum  */
13663c882a0SEric Paris static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
1373632dee2SVegard Nossum 					    size_t count)
1383632dee2SVegard Nossum {
1393632dee2SVegard Nossum 	size_t event_size = sizeof(struct inotify_event);
14063c882a0SEric Paris 	struct fsnotify_event *event;
1413632dee2SVegard Nossum 
14263c882a0SEric Paris 	if (fsnotify_notify_queue_is_empty(group))
1433632dee2SVegard Nossum 		return NULL;
1443632dee2SVegard Nossum 
14563c882a0SEric Paris 	event = fsnotify_peek_notify_event(group);
14663c882a0SEric Paris 
14783cb10f0SEric Paris 	if (event->name_len)
14883cb10f0SEric Paris 		event_size += roundup(event->name_len + 1, event_size);
1493632dee2SVegard Nossum 
1503632dee2SVegard Nossum 	if (event_size > count)
1513632dee2SVegard Nossum 		return ERR_PTR(-EINVAL);
1523632dee2SVegard Nossum 
15363c882a0SEric Paris 	/* held the notification_mutex the whole time, so this is the
15463c882a0SEric Paris 	 * same event we peeked above */
15563c882a0SEric Paris 	fsnotify_remove_notify_event(group);
15663c882a0SEric Paris 
15763c882a0SEric Paris 	return event;
1583632dee2SVegard Nossum }
1593632dee2SVegard Nossum 
1603632dee2SVegard Nossum /*
1613632dee2SVegard Nossum  * Copy an event to user space, returning how much we copied.
1623632dee2SVegard Nossum  *
1633632dee2SVegard Nossum  * We already checked that the event size is smaller than the
1643632dee2SVegard Nossum  * buffer we had in "get_one_event()" above.
1653632dee2SVegard Nossum  */
16663c882a0SEric Paris static ssize_t copy_event_to_user(struct fsnotify_group *group,
16763c882a0SEric Paris 				  struct fsnotify_event *event,
1683632dee2SVegard Nossum 				  char __user *buf)
1693632dee2SVegard Nossum {
17063c882a0SEric Paris 	struct inotify_event inotify_event;
17163c882a0SEric Paris 	struct fsnotify_event_private_data *fsn_priv;
17263c882a0SEric Paris 	struct inotify_event_private_data *priv;
1733632dee2SVegard Nossum 	size_t event_size = sizeof(struct inotify_event);
174b962e731SBrian Rogers 	size_t name_len = 0;
1753632dee2SVegard Nossum 
17663c882a0SEric Paris 	/* we get the inotify watch descriptor from the event private data */
17763c882a0SEric Paris 	spin_lock(&event->lock);
17863c882a0SEric Paris 	fsn_priv = fsnotify_remove_priv_from_event(group, event);
17963c882a0SEric Paris 	spin_unlock(&event->lock);
18063c882a0SEric Paris 
18163c882a0SEric Paris 	if (!fsn_priv)
18263c882a0SEric Paris 		inotify_event.wd = -1;
18363c882a0SEric Paris 	else {
18463c882a0SEric Paris 		priv = container_of(fsn_priv, struct inotify_event_private_data,
18563c882a0SEric Paris 				    fsnotify_event_priv_data);
18663c882a0SEric Paris 		inotify_event.wd = priv->wd;
18763c882a0SEric Paris 		inotify_free_event_priv(fsn_priv);
18863c882a0SEric Paris 	}
18963c882a0SEric Paris 
190b962e731SBrian Rogers 	/*
191b962e731SBrian Rogers 	 * round up event->name_len so it is a multiple of event_size
1920db501bdSEric W. Biederman 	 * plus an extra byte for the terminating '\0'.
1930db501bdSEric W. Biederman 	 */
194b962e731SBrian Rogers 	if (event->name_len)
1950db501bdSEric W. Biederman 		name_len = roundup(event->name_len + 1, event_size);
19663c882a0SEric Paris 	inotify_event.len = name_len;
19763c882a0SEric Paris 
19863c882a0SEric Paris 	inotify_event.mask = inotify_mask_to_arg(event->mask);
19963c882a0SEric Paris 	inotify_event.cookie = event->sync_cookie;
20063c882a0SEric Paris 
20163c882a0SEric Paris 	/* send the main event */
20263c882a0SEric Paris 	if (copy_to_user(buf, &inotify_event, event_size))
2033632dee2SVegard Nossum 		return -EFAULT;
2043632dee2SVegard Nossum 
2053632dee2SVegard Nossum 	buf += event_size;
2063632dee2SVegard Nossum 
20763c882a0SEric Paris 	/*
20863c882a0SEric Paris 	 * fsnotify only stores the pathname, so here we have to send the pathname
20963c882a0SEric Paris 	 * and then pad that pathname out to a multiple of sizeof(inotify_event)
21063c882a0SEric Paris 	 * with zeros.  I get my zeros from the nul_inotify_event.
21163c882a0SEric Paris 	 */
21263c882a0SEric Paris 	if (name_len) {
21363c882a0SEric Paris 		unsigned int len_to_zero = name_len - event->name_len;
21463c882a0SEric Paris 		/* copy the path name */
21563c882a0SEric Paris 		if (copy_to_user(buf, event->file_name, event->name_len))
2163632dee2SVegard Nossum 			return -EFAULT;
21763c882a0SEric Paris 		buf += event->name_len;
2183632dee2SVegard Nossum 
2190db501bdSEric W. Biederman 		/* fill userspace with 0's */
2200db501bdSEric W. Biederman 		if (clear_user(buf, len_to_zero))
22163c882a0SEric Paris 			return -EFAULT;
22263c882a0SEric Paris 		buf += len_to_zero;
22363c882a0SEric Paris 		event_size += name_len;
2243632dee2SVegard Nossum 	}
22563c882a0SEric Paris 
2263632dee2SVegard Nossum 	return event_size;
2273632dee2SVegard Nossum }
2283632dee2SVegard Nossum 
229272eb014SEric Paris static ssize_t inotify_read(struct file *file, char __user *buf,
230272eb014SEric Paris 			    size_t count, loff_t *pos)
231272eb014SEric Paris {
23263c882a0SEric Paris 	struct fsnotify_group *group;
23363c882a0SEric Paris 	struct fsnotify_event *kevent;
234272eb014SEric Paris 	char __user *start;
235272eb014SEric Paris 	int ret;
236272eb014SEric Paris 	DEFINE_WAIT(wait);
237272eb014SEric Paris 
238272eb014SEric Paris 	start = buf;
23963c882a0SEric Paris 	group = file->private_data;
240272eb014SEric Paris 
241272eb014SEric Paris 	while (1) {
24263c882a0SEric Paris 		prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
243272eb014SEric Paris 
24463c882a0SEric Paris 		mutex_lock(&group->notification_mutex);
24563c882a0SEric Paris 		kevent = get_one_event(group, count);
24663c882a0SEric Paris 		mutex_unlock(&group->notification_mutex);
247272eb014SEric Paris 
2483632dee2SVegard Nossum 		if (kevent) {
2493632dee2SVegard Nossum 			ret = PTR_ERR(kevent);
2503632dee2SVegard Nossum 			if (IS_ERR(kevent))
251272eb014SEric Paris 				break;
25263c882a0SEric Paris 			ret = copy_event_to_user(group, kevent, buf);
25363c882a0SEric Paris 			fsnotify_put_event(kevent);
2543632dee2SVegard Nossum 			if (ret < 0)
2553632dee2SVegard Nossum 				break;
2563632dee2SVegard Nossum 			buf += ret;
2573632dee2SVegard Nossum 			count -= ret;
2583632dee2SVegard Nossum 			continue;
259272eb014SEric Paris 		}
260272eb014SEric Paris 
2613632dee2SVegard Nossum 		ret = -EAGAIN;
2623632dee2SVegard Nossum 		if (file->f_flags & O_NONBLOCK)
263272eb014SEric Paris 			break;
2643632dee2SVegard Nossum 		ret = -EINTR;
2653632dee2SVegard Nossum 		if (signal_pending(current))
2663632dee2SVegard Nossum 			break;
2673632dee2SVegard Nossum 
2683632dee2SVegard Nossum 		if (start != buf)
2693632dee2SVegard Nossum 			break;
270272eb014SEric Paris 
271272eb014SEric Paris 		schedule();
272272eb014SEric Paris 	}
273272eb014SEric Paris 
27463c882a0SEric Paris 	finish_wait(&group->notification_waitq, &wait);
2753632dee2SVegard Nossum 	if (start != buf && ret != -EFAULT)
276272eb014SEric Paris 		ret = buf - start;
277272eb014SEric Paris 	return ret;
278272eb014SEric Paris }
279272eb014SEric Paris 
280272eb014SEric Paris static int inotify_fasync(int fd, struct file *file, int on)
281272eb014SEric Paris {
28263c882a0SEric Paris 	struct fsnotify_group *group = file->private_data;
283272eb014SEric Paris 
28463c882a0SEric Paris 	return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO;
285272eb014SEric Paris }
286272eb014SEric Paris 
287272eb014SEric Paris static int inotify_release(struct inode *ignored, struct file *file)
288272eb014SEric Paris {
28963c882a0SEric Paris 	struct fsnotify_group *group = file->private_data;
290bdae997fSKeith Packard 	struct user_struct *user = group->inotify_data.user;
291272eb014SEric Paris 
29263c882a0SEric Paris 	fsnotify_clear_marks_by_group(group);
293272eb014SEric Paris 
29463c882a0SEric Paris 	/* free this group, matching get was inotify_init->fsnotify_obtain_group */
29563c882a0SEric Paris 	fsnotify_put_group(group);
296272eb014SEric Paris 
297bdae997fSKeith Packard 	atomic_dec(&user->inotify_devs);
298bdae997fSKeith Packard 
299272eb014SEric Paris 	return 0;
300272eb014SEric Paris }
301272eb014SEric Paris 
302272eb014SEric Paris static long inotify_ioctl(struct file *file, unsigned int cmd,
303272eb014SEric Paris 			  unsigned long arg)
304272eb014SEric Paris {
30563c882a0SEric Paris 	struct fsnotify_group *group;
30663c882a0SEric Paris 	struct fsnotify_event_holder *holder;
30763c882a0SEric Paris 	struct fsnotify_event *event;
308272eb014SEric Paris 	void __user *p;
309272eb014SEric Paris 	int ret = -ENOTTY;
31063c882a0SEric Paris 	size_t send_len = 0;
311272eb014SEric Paris 
31263c882a0SEric Paris 	group = file->private_data;
313272eb014SEric Paris 	p = (void __user *) arg;
314272eb014SEric Paris 
315272eb014SEric Paris 	switch (cmd) {
316272eb014SEric Paris 	case FIONREAD:
31763c882a0SEric Paris 		mutex_lock(&group->notification_mutex);
31863c882a0SEric Paris 		list_for_each_entry(holder, &group->notification_list, event_list) {
31963c882a0SEric Paris 			event = holder->event;
32063c882a0SEric Paris 			send_len += sizeof(struct inotify_event);
32183cb10f0SEric Paris 			if (event->name_len)
32283cb10f0SEric Paris 				send_len += roundup(event->name_len + 1,
32363c882a0SEric Paris 						sizeof(struct inotify_event));
32463c882a0SEric Paris 		}
32563c882a0SEric Paris 		mutex_unlock(&group->notification_mutex);
32663c882a0SEric Paris 		ret = put_user(send_len, (int __user *) p);
327272eb014SEric Paris 		break;
328272eb014SEric Paris 	}
329272eb014SEric Paris 
330272eb014SEric Paris 	return ret;
331272eb014SEric Paris }
332272eb014SEric Paris 
333272eb014SEric Paris static const struct file_operations inotify_fops = {
334272eb014SEric Paris 	.poll		= inotify_poll,
335272eb014SEric Paris 	.read		= inotify_read,
336272eb014SEric Paris 	.fasync		= inotify_fasync,
337272eb014SEric Paris 	.release	= inotify_release,
338272eb014SEric Paris 	.unlocked_ioctl	= inotify_ioctl,
339272eb014SEric Paris 	.compat_ioctl	= inotify_ioctl,
340272eb014SEric Paris };
341272eb014SEric Paris 
342272eb014SEric Paris 
34363c882a0SEric Paris /*
34463c882a0SEric Paris  * find_inode - resolve a user-given path to a specific inode
34563c882a0SEric Paris  */
34663c882a0SEric Paris static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags)
34763c882a0SEric Paris {
34863c882a0SEric Paris 	int error;
34963c882a0SEric Paris 
35063c882a0SEric Paris 	error = user_path_at(AT_FDCWD, dirname, flags, path);
35163c882a0SEric Paris 	if (error)
35263c882a0SEric Paris 		return error;
35363c882a0SEric Paris 	/* you can only watch an inode if you have read permissions on it */
35463c882a0SEric Paris 	error = inode_permission(path->dentry->d_inode, MAY_READ);
35563c882a0SEric Paris 	if (error)
35663c882a0SEric Paris 		path_put(path);
35763c882a0SEric Paris 	return error;
35863c882a0SEric Paris }
35963c882a0SEric Paris 
360dead537dSEric Paris /*
361dead537dSEric Paris  * Remove the mark from the idr (if present) and drop the reference
362dead537dSEric Paris  * on the mark because it was in the idr.
363dead537dSEric Paris  */
3647e790dd5SEric Paris static void inotify_remove_from_idr(struct fsnotify_group *group,
3657e790dd5SEric Paris 				    struct inotify_inode_mark_entry *ientry)
3667e790dd5SEric Paris {
3677e790dd5SEric Paris 	struct idr *idr;
368dead537dSEric Paris 	struct fsnotify_mark_entry *entry;
369dead537dSEric Paris 	struct inotify_inode_mark_entry *found_ientry;
370dead537dSEric Paris 	int wd;
3717e790dd5SEric Paris 
3727e790dd5SEric Paris 	spin_lock(&group->inotify_data.idr_lock);
3737e790dd5SEric Paris 	idr = &group->inotify_data.idr;
374dead537dSEric Paris 	wd = ientry->wd;
375dead537dSEric Paris 
376dead537dSEric Paris 	if (wd == -1)
377dead537dSEric Paris 		goto out;
378dead537dSEric Paris 
379dead537dSEric Paris 	entry = idr_find(&group->inotify_data.idr, wd);
380dead537dSEric Paris 	if (unlikely(!entry))
381dead537dSEric Paris 		goto out;
382dead537dSEric Paris 
383dead537dSEric Paris 	found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
384dead537dSEric Paris 	if (unlikely(found_ientry != ientry)) {
385dead537dSEric Paris 		/* We found an entry in the idr with the right wd, but it's
386dead537dSEric Paris 		 * not the entry we were told to remove.  eparis seriously
387dead537dSEric Paris 		 * fucked up somewhere. */
388dead537dSEric Paris 		WARN_ON(1);
3897e790dd5SEric Paris 		ientry->wd = -1;
390dead537dSEric Paris 		goto out;
3917e790dd5SEric Paris 	}
392dead537dSEric Paris 
393dead537dSEric Paris 	/* One ref for being in the idr, one ref held by the caller */
394dead537dSEric Paris 	BUG_ON(atomic_read(&entry->refcnt) < 2);
395dead537dSEric Paris 
396dead537dSEric Paris 	idr_remove(idr, wd);
397dead537dSEric Paris 	ientry->wd = -1;
398dead537dSEric Paris 
399dead537dSEric Paris 	/* removed from the idr, drop that ref */
400dead537dSEric Paris 	fsnotify_put_mark(entry);
401dead537dSEric Paris out:
402dead537dSEric Paris 	spin_unlock(&group->inotify_data.idr_lock);
403dead537dSEric Paris }
404dead537dSEric Paris 
40563c882a0SEric Paris /*
406dead537dSEric Paris  * Send IN_IGNORED for this wd, remove this wd from the idr.
40763c882a0SEric Paris  */
408528da3e9SEric Paris void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
409528da3e9SEric Paris 				    struct fsnotify_group *group)
41063c882a0SEric Paris {
41163c882a0SEric Paris 	struct inotify_inode_mark_entry *ientry;
412f44aebccSEric Paris 	struct fsnotify_event *ignored_event;
41363c882a0SEric Paris 	struct inotify_event_private_data *event_priv;
41463c882a0SEric Paris 	struct fsnotify_event_private_data *fsn_event_priv;
415eef3a116SEric Paris 	int ret;
41663c882a0SEric Paris 
417f44aebccSEric Paris 	ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
418f44aebccSEric Paris 					      FSNOTIFY_EVENT_NONE, NULL, 0,
419f44aebccSEric Paris 					      GFP_NOFS);
420f44aebccSEric Paris 	if (!ignored_event)
421f44aebccSEric Paris 		return;
422f44aebccSEric Paris 
42363c882a0SEric Paris 	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
42463c882a0SEric Paris 
425f44aebccSEric Paris 	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
42663c882a0SEric Paris 	if (unlikely(!event_priv))
42763c882a0SEric Paris 		goto skip_send_ignore;
42863c882a0SEric Paris 
42963c882a0SEric Paris 	fsn_event_priv = &event_priv->fsnotify_event_priv_data;
43063c882a0SEric Paris 
43163c882a0SEric Paris 	fsn_event_priv->group = group;
43263c882a0SEric Paris 	event_priv->wd = ientry->wd;
43363c882a0SEric Paris 
434eef3a116SEric Paris 	ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
435eef3a116SEric Paris 	if (ret)
43663c882a0SEric Paris 		inotify_free_event_priv(fsn_event_priv);
43763c882a0SEric Paris 
43863c882a0SEric Paris skip_send_ignore:
43963c882a0SEric Paris 
440f44aebccSEric Paris 	/* matches the reference taken when the event was created */
441f44aebccSEric Paris 	fsnotify_put_event(ignored_event);
442f44aebccSEric Paris 
44363c882a0SEric Paris 	/* remove this entry from the idr */
4447e790dd5SEric Paris 	inotify_remove_from_idr(group, ientry);
44563c882a0SEric Paris 
4465549f7cdSEric Paris 	atomic_dec(&group->inotify_data.user->inotify_watches);
44763c882a0SEric Paris }
44863c882a0SEric Paris 
44963c882a0SEric Paris /* ding dong the mark is dead */
45063c882a0SEric Paris static void inotify_free_mark(struct fsnotify_mark_entry *entry)
45163c882a0SEric Paris {
45263c882a0SEric Paris 	struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry;
45363c882a0SEric Paris 
45463c882a0SEric Paris 	kmem_cache_free(inotify_inode_mark_cachep, ientry);
45563c882a0SEric Paris }
45663c882a0SEric Paris 
45752cef755SEric Paris static int inotify_update_existing_watch(struct fsnotify_group *group,
45852cef755SEric Paris 					 struct inode *inode,
45952cef755SEric Paris 					 u32 arg)
46063c882a0SEric Paris {
46152cef755SEric Paris 	struct fsnotify_mark_entry *entry;
46263c882a0SEric Paris 	struct inotify_inode_mark_entry *ientry;
46363c882a0SEric Paris 	__u32 old_mask, new_mask;
46452cef755SEric Paris 	__u32 mask;
46552cef755SEric Paris 	int add = (arg & IN_MASK_ADD);
46652cef755SEric Paris 	int ret;
46763c882a0SEric Paris 
46863c882a0SEric Paris 	/* don't allow invalid bits: we don't want flags set */
46963c882a0SEric Paris 	mask = inotify_arg_to_mask(arg);
47063c882a0SEric Paris 	if (unlikely(!mask))
47163c882a0SEric Paris 		return -EINVAL;
47263c882a0SEric Paris 
47363c882a0SEric Paris 	spin_lock(&inode->i_lock);
47463c882a0SEric Paris 	entry = fsnotify_find_mark_entry(group, inode);
47563c882a0SEric Paris 	spin_unlock(&inode->i_lock);
47652cef755SEric Paris 	if (!entry)
47752cef755SEric Paris 		return -ENOENT;
47852cef755SEric Paris 
47963c882a0SEric Paris 	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
48075fe2b26SEric Paris 
48163c882a0SEric Paris 	spin_lock(&entry->lock);
48263c882a0SEric Paris 
48363c882a0SEric Paris 	old_mask = entry->mask;
48463c882a0SEric Paris 	if (add) {
48563c882a0SEric Paris 		entry->mask |= mask;
48663c882a0SEric Paris 		new_mask = entry->mask;
48763c882a0SEric Paris 	} else {
48863c882a0SEric Paris 		entry->mask = mask;
48963c882a0SEric Paris 		new_mask = entry->mask;
49063c882a0SEric Paris 	}
49163c882a0SEric Paris 
49263c882a0SEric Paris 	spin_unlock(&entry->lock);
49363c882a0SEric Paris 
49463c882a0SEric Paris 	if (old_mask != new_mask) {
49563c882a0SEric Paris 		/* more bits in old than in new? */
49663c882a0SEric Paris 		int dropped = (old_mask & ~new_mask);
49763c882a0SEric Paris 		/* more bits in this entry than the inode's mask? */
49863c882a0SEric Paris 		int do_inode = (new_mask & ~inode->i_fsnotify_mask);
49963c882a0SEric Paris 		/* more bits in this entry than the group? */
50063c882a0SEric Paris 		int do_group = (new_mask & ~group->mask);
50163c882a0SEric Paris 
50263c882a0SEric Paris 		/* update the inode with this new entry */
50363c882a0SEric Paris 		if (dropped || do_inode)
50463c882a0SEric Paris 			fsnotify_recalc_inode_mask(inode);
50563c882a0SEric Paris 
50663c882a0SEric Paris 		/* update the group mask with the new mask */
50763c882a0SEric Paris 		if (dropped || do_group)
50863c882a0SEric Paris 			fsnotify_recalc_group_mask(group);
50963c882a0SEric Paris 	}
51063c882a0SEric Paris 
51152cef755SEric Paris 	/* return the wd */
51252cef755SEric Paris 	ret = ientry->wd;
51352cef755SEric Paris 
51452cef755SEric Paris 	/* match the get from fsnotify_find_mark_entry() */
51575fe2b26SEric Paris 	fsnotify_put_mark(entry);
51675fe2b26SEric Paris 
51752cef755SEric Paris 	return ret;
51863c882a0SEric Paris }
5197e790dd5SEric Paris 
52052cef755SEric Paris static int inotify_new_watch(struct fsnotify_group *group,
52152cef755SEric Paris 			     struct inode *inode,
52252cef755SEric Paris 			     u32 arg)
52352cef755SEric Paris {
52452cef755SEric Paris 	struct inotify_inode_mark_entry *tmp_ientry;
52552cef755SEric Paris 	__u32 mask;
52652cef755SEric Paris 	int ret;
52752cef755SEric Paris 
52852cef755SEric Paris 	/* don't allow invalid bits: we don't want flags set */
52952cef755SEric Paris 	mask = inotify_arg_to_mask(arg);
53052cef755SEric Paris 	if (unlikely(!mask))
53152cef755SEric Paris 		return -EINVAL;
53252cef755SEric Paris 
53352cef755SEric Paris 	tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
53452cef755SEric Paris 	if (unlikely(!tmp_ientry))
53552cef755SEric Paris 		return -ENOMEM;
53652cef755SEric Paris 
53752cef755SEric Paris 	fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark);
53852cef755SEric Paris 	tmp_ientry->fsn_entry.mask = mask;
53952cef755SEric Paris 	tmp_ientry->wd = -1;
54052cef755SEric Paris 
54152cef755SEric Paris 	ret = -ENOSPC;
54252cef755SEric Paris 	if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
54352cef755SEric Paris 		goto out_err;
54452cef755SEric Paris retry:
54552cef755SEric Paris 	ret = -ENOMEM;
54652cef755SEric Paris 	if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
54752cef755SEric Paris 		goto out_err;
54852cef755SEric Paris 
54952cef755SEric Paris 	spin_lock(&group->inotify_data.idr_lock);
55052cef755SEric Paris 	ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
5519e572cc9SEric Paris 				group->inotify_data.last_wd+1,
55252cef755SEric Paris 				&tmp_ientry->wd);
55352cef755SEric Paris 	spin_unlock(&group->inotify_data.idr_lock);
55452cef755SEric Paris 	if (ret) {
55552cef755SEric Paris 		/* idr was out of memory allocate and try again */
55652cef755SEric Paris 		if (ret == -EAGAIN)
55752cef755SEric Paris 			goto retry;
55852cef755SEric Paris 		goto out_err;
55952cef755SEric Paris 	}
56052cef755SEric Paris 
561dead537dSEric Paris 	/* we put the mark on the idr, take a reference */
562dead537dSEric Paris 	fsnotify_get_mark(&tmp_ientry->fsn_entry);
563dead537dSEric Paris 
56452cef755SEric Paris 	/* we are on the idr, now get on the inode */
56552cef755SEric Paris 	ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
56652cef755SEric Paris 	if (ret) {
56752cef755SEric Paris 		/* we failed to get on the inode, get off the idr */
56852cef755SEric Paris 		inotify_remove_from_idr(group, tmp_ientry);
56952cef755SEric Paris 		goto out_err;
57052cef755SEric Paris 	}
57152cef755SEric Paris 
57252cef755SEric Paris 	/* update the idr hint, who cares about races, it's just a hint */
57352cef755SEric Paris 	group->inotify_data.last_wd = tmp_ientry->wd;
57452cef755SEric Paris 
57552cef755SEric Paris 	/* increment the number of watches the user has */
57652cef755SEric Paris 	atomic_inc(&group->inotify_data.user->inotify_watches);
57752cef755SEric Paris 
57852cef755SEric Paris 	/* return the watch descriptor for this new entry */
57952cef755SEric Paris 	ret = tmp_ientry->wd;
58052cef755SEric Paris 
581750a8870SEric Paris 	/* if this mark added a new event update the group mask */
582750a8870SEric Paris 	if (mask & ~group->mask)
583750a8870SEric Paris 		fsnotify_recalc_group_mask(group);
584750a8870SEric Paris 
58552cef755SEric Paris out_err:
586*3dbc6fb6SEric Paris 	/* match the ref from fsnotify_init_markentry() */
587*3dbc6fb6SEric Paris 	fsnotify_put_mark(&tmp_ientry->fsn_entry);
58852cef755SEric Paris 
58952cef755SEric Paris 	return ret;
59052cef755SEric Paris }
59152cef755SEric Paris 
59252cef755SEric Paris static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
59352cef755SEric Paris {
59452cef755SEric Paris 	int ret = 0;
59552cef755SEric Paris 
59652cef755SEric Paris retry:
59752cef755SEric Paris 	/* try to update and existing watch with the new arg */
59852cef755SEric Paris 	ret = inotify_update_existing_watch(group, inode, arg);
59952cef755SEric Paris 	/* no mark present, try to add a new one */
60052cef755SEric Paris 	if (ret == -ENOENT)
60152cef755SEric Paris 		ret = inotify_new_watch(group, inode, arg);
60252cef755SEric Paris 	/*
60352cef755SEric Paris 	 * inotify_new_watch could race with another thread which did an
60452cef755SEric Paris 	 * inotify_new_watch between the update_existing and the add watch
60552cef755SEric Paris 	 * here, go back and try to update an existing mark again.
60652cef755SEric Paris 	 */
60752cef755SEric Paris 	if (ret == -EEXIST)
60852cef755SEric Paris 		goto retry;
60952cef755SEric Paris 
61063c882a0SEric Paris 	return ret;
61163c882a0SEric Paris }
61263c882a0SEric Paris 
61363c882a0SEric Paris static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
61463c882a0SEric Paris {
61563c882a0SEric Paris 	struct fsnotify_group *group;
61663c882a0SEric Paris 	unsigned int grp_num;
61763c882a0SEric Paris 
61863c882a0SEric Paris 	/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
61963c882a0SEric Paris 	grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num));
62063c882a0SEric Paris 	group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops);
62163c882a0SEric Paris 	if (IS_ERR(group))
62263c882a0SEric Paris 		return group;
62363c882a0SEric Paris 
62463c882a0SEric Paris 	group->max_events = max_events;
62563c882a0SEric Paris 
62663c882a0SEric Paris 	spin_lock_init(&group->inotify_data.idr_lock);
62763c882a0SEric Paris 	idr_init(&group->inotify_data.idr);
6289e572cc9SEric Paris 	group->inotify_data.last_wd = 0;
62963c882a0SEric Paris 	group->inotify_data.user = user;
63063c882a0SEric Paris 	group->inotify_data.fa = NULL;
63163c882a0SEric Paris 
63263c882a0SEric Paris 	return group;
63363c882a0SEric Paris }
63463c882a0SEric Paris 
63563c882a0SEric Paris 
63663c882a0SEric Paris /* inotify syscalls */
637938bb9f5SHeiko Carstens SYSCALL_DEFINE1(inotify_init1, int, flags)
638272eb014SEric Paris {
63963c882a0SEric Paris 	struct fsnotify_group *group;
640272eb014SEric Paris 	struct user_struct *user;
641c44dcc56SAl Viro 	int ret;
642272eb014SEric Paris 
643272eb014SEric Paris 	/* Check the IN_* constants for consistency.  */
644272eb014SEric Paris 	BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
645272eb014SEric Paris 	BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
646272eb014SEric Paris 
647272eb014SEric Paris 	if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
648272eb014SEric Paris 		return -EINVAL;
649272eb014SEric Paris 
650272eb014SEric Paris 	user = get_current_user();
651272eb014SEric Paris 	if (unlikely(atomic_read(&user->inotify_devs) >=
652272eb014SEric Paris 			inotify_max_user_instances)) {
653272eb014SEric Paris 		ret = -EMFILE;
654272eb014SEric Paris 		goto out_free_uid;
655272eb014SEric Paris 	}
656272eb014SEric Paris 
65763c882a0SEric Paris 	/* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
65863c882a0SEric Paris 	group = inotify_new_group(user, inotify_max_queued_events);
65963c882a0SEric Paris 	if (IS_ERR(group)) {
66063c882a0SEric Paris 		ret = PTR_ERR(group);
661272eb014SEric Paris 		goto out_free_uid;
662272eb014SEric Paris 	}
663272eb014SEric Paris 
664825f9692SAl Viro 	atomic_inc(&user->inotify_devs);
665825f9692SAl Viro 
666c44dcc56SAl Viro 	ret = anon_inode_getfd("inotify", &inotify_fops, group,
667c44dcc56SAl Viro 				  O_RDONLY | flags);
668c44dcc56SAl Viro 	if (ret >= 0)
669c44dcc56SAl Viro 		return ret;
670825f9692SAl Viro 
671825f9692SAl Viro 	atomic_dec(&user->inotify_devs);
672272eb014SEric Paris out_free_uid:
673272eb014SEric Paris 	free_uid(user);
674272eb014SEric Paris 	return ret;
675272eb014SEric Paris }
676272eb014SEric Paris 
677938bb9f5SHeiko Carstens SYSCALL_DEFINE0(inotify_init)
678272eb014SEric Paris {
679272eb014SEric Paris 	return sys_inotify_init1(0);
680272eb014SEric Paris }
681272eb014SEric Paris 
6822e4d0924SHeiko Carstens SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
6832e4d0924SHeiko Carstens 		u32, mask)
684272eb014SEric Paris {
68563c882a0SEric Paris 	struct fsnotify_group *group;
686272eb014SEric Paris 	struct inode *inode;
687272eb014SEric Paris 	struct path path;
688272eb014SEric Paris 	struct file *filp;
689272eb014SEric Paris 	int ret, fput_needed;
690272eb014SEric Paris 	unsigned flags = 0;
691272eb014SEric Paris 
692272eb014SEric Paris 	filp = fget_light(fd, &fput_needed);
693272eb014SEric Paris 	if (unlikely(!filp))
694272eb014SEric Paris 		return -EBADF;
695272eb014SEric Paris 
696272eb014SEric Paris 	/* verify that this is indeed an inotify instance */
697272eb014SEric Paris 	if (unlikely(filp->f_op != &inotify_fops)) {
698272eb014SEric Paris 		ret = -EINVAL;
699272eb014SEric Paris 		goto fput_and_out;
700272eb014SEric Paris 	}
701272eb014SEric Paris 
702272eb014SEric Paris 	if (!(mask & IN_DONT_FOLLOW))
703272eb014SEric Paris 		flags |= LOOKUP_FOLLOW;
704272eb014SEric Paris 	if (mask & IN_ONLYDIR)
705272eb014SEric Paris 		flags |= LOOKUP_DIRECTORY;
706272eb014SEric Paris 
70763c882a0SEric Paris 	ret = inotify_find_inode(pathname, &path, flags);
70863c882a0SEric Paris 	if (ret)
709272eb014SEric Paris 		goto fput_and_out;
710272eb014SEric Paris 
71163c882a0SEric Paris 	/* inode held in place by reference to path; group by fget on fd */
712272eb014SEric Paris 	inode = path.dentry->d_inode;
71363c882a0SEric Paris 	group = filp->private_data;
714272eb014SEric Paris 
71563c882a0SEric Paris 	/* create/update an inode mark */
71663c882a0SEric Paris 	ret = inotify_update_watch(group, inode, mask);
717272eb014SEric Paris 	path_put(&path);
718272eb014SEric Paris fput_and_out:
719272eb014SEric Paris 	fput_light(filp, fput_needed);
720272eb014SEric Paris 	return ret;
721272eb014SEric Paris }
722272eb014SEric Paris 
7232e4d0924SHeiko Carstens SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
724272eb014SEric Paris {
72563c882a0SEric Paris 	struct fsnotify_group *group;
72663c882a0SEric Paris 	struct fsnotify_mark_entry *entry;
727272eb014SEric Paris 	struct file *filp;
72863c882a0SEric Paris 	int ret = 0, fput_needed;
729272eb014SEric Paris 
730272eb014SEric Paris 	filp = fget_light(fd, &fput_needed);
731272eb014SEric Paris 	if (unlikely(!filp))
732272eb014SEric Paris 		return -EBADF;
733272eb014SEric Paris 
734272eb014SEric Paris 	/* verify that this is indeed an inotify instance */
735272eb014SEric Paris 	if (unlikely(filp->f_op != &inotify_fops)) {
736272eb014SEric Paris 		ret = -EINVAL;
737272eb014SEric Paris 		goto out;
738272eb014SEric Paris 	}
739272eb014SEric Paris 
74063c882a0SEric Paris 	group = filp->private_data;
741272eb014SEric Paris 
74263c882a0SEric Paris 	spin_lock(&group->inotify_data.idr_lock);
74363c882a0SEric Paris 	entry = idr_find(&group->inotify_data.idr, wd);
74463c882a0SEric Paris 	if (unlikely(!entry)) {
74563c882a0SEric Paris 		spin_unlock(&group->inotify_data.idr_lock);
74663c882a0SEric Paris 		ret = -EINVAL;
74763c882a0SEric Paris 		goto out;
74863c882a0SEric Paris 	}
74963c882a0SEric Paris 	fsnotify_get_mark(entry);
75063c882a0SEric Paris 	spin_unlock(&group->inotify_data.idr_lock);
75163c882a0SEric Paris 
752528da3e9SEric Paris 	fsnotify_destroy_mark_by_entry(entry);
75363c882a0SEric Paris 	fsnotify_put_mark(entry);
754272eb014SEric Paris 
755272eb014SEric Paris out:
756272eb014SEric Paris 	fput_light(filp, fput_needed);
757272eb014SEric Paris 	return ret;
758272eb014SEric Paris }
759272eb014SEric Paris 
760272eb014SEric Paris /*
761272eb014SEric Paris  * inotify_user_setup - Our initialization function.  Note that we cannnot return
762272eb014SEric Paris  * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
763272eb014SEric Paris  * must result in panic().
764272eb014SEric Paris  */
765272eb014SEric Paris static int __init inotify_user_setup(void)
766272eb014SEric Paris {
76763c882a0SEric Paris 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
76863c882a0SEric Paris 	event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
76963c882a0SEric Paris 
770272eb014SEric Paris 	inotify_max_queued_events = 16384;
771272eb014SEric Paris 	inotify_max_user_instances = 128;
772272eb014SEric Paris 	inotify_max_user_watches = 8192;
773272eb014SEric Paris 
774272eb014SEric Paris 	return 0;
775272eb014SEric Paris }
776272eb014SEric Paris module_init(inotify_user_setup);
777