xref: /linux/fs/fcntl.c (revision 1fc3d0ee242de68267769afcaf5520439de42f64)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   *  linux/fs/fcntl.c
4   *
5   *  Copyright (C) 1991, 1992  Linus Torvalds
6   */
7  
8  #include <linux/syscalls.h>
9  #include <linux/init.h>
10  #include <linux/mm.h>
11  #include <linux/sched/task.h>
12  #include <linux/fs.h>
13  #include <linux/file.h>
14  #include <linux/fdtable.h>
15  #include <linux/capability.h>
16  #include <linux/dnotify.h>
17  #include <linux/slab.h>
18  #include <linux/module.h>
19  #include <linux/pipe_fs_i.h>
20  #include <linux/security.h>
21  #include <linux/ptrace.h>
22  #include <linux/signal.h>
23  #include <linux/rcupdate.h>
24  #include <linux/pid_namespace.h>
25  #include <linux/user_namespace.h>
26  #include <linux/memfd.h>
27  #include <linux/compat.h>
28  
29  #include <linux/poll.h>
30  #include <asm/siginfo.h>
31  #include <linux/uaccess.h>
32  
33  #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
34  
35  static int setfl(int fd, struct file * filp, unsigned long arg)
36  {
37  	struct inode * inode = file_inode(filp);
38  	int error = 0;
39  
40  	/*
41  	 * O_APPEND cannot be cleared if the file is marked as append-only
42  	 * and the file is open for write.
43  	 */
44  	if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
45  		return -EPERM;
46  
47  	/* O_NOATIME can only be set by the owner or superuser */
48  	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
49  		if (!inode_owner_or_capable(inode))
50  			return -EPERM;
51  
52  	/* required for strict SunOS emulation */
53  	if (O_NONBLOCK != O_NDELAY)
54  	       if (arg & O_NDELAY)
55  		   arg |= O_NONBLOCK;
56  
57  	/* Pipe packetized mode is controlled by O_DIRECT flag */
58  	if (!S_ISFIFO(inode->i_mode) && (arg & O_DIRECT)) {
59  		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
60  			!filp->f_mapping->a_ops->direct_IO)
61  				return -EINVAL;
62  	}
63  
64  	if (filp->f_op->check_flags)
65  		error = filp->f_op->check_flags(arg);
66  	if (error)
67  		return error;
68  
69  	/*
70  	 * ->fasync() is responsible for setting the FASYNC bit.
71  	 */
72  	if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
73  		error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
74  		if (error < 0)
75  			goto out;
76  		if (error > 0)
77  			error = 0;
78  	}
79  	spin_lock(&filp->f_lock);
80  	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
81  	spin_unlock(&filp->f_lock);
82  
83   out:
84  	return error;
85  }
86  
87  static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
88                       int force)
89  {
90  	write_lock_irq(&filp->f_owner.lock);
91  	if (force || !filp->f_owner.pid) {
92  		put_pid(filp->f_owner.pid);
93  		filp->f_owner.pid = get_pid(pid);
94  		filp->f_owner.pid_type = type;
95  
96  		if (pid) {
97  			const struct cred *cred = current_cred();
98  			filp->f_owner.uid = cred->uid;
99  			filp->f_owner.euid = cred->euid;
100  		}
101  	}
102  	write_unlock_irq(&filp->f_owner.lock);
103  }
104  
105  void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
106  		int force)
107  {
108  	security_file_set_fowner(filp);
109  	f_modown(filp, pid, type, force);
110  }
111  EXPORT_SYMBOL(__f_setown);
112  
113  int f_setown(struct file *filp, unsigned long arg, int force)
114  {
115  	enum pid_type type;
116  	struct pid *pid = NULL;
117  	int who = arg, ret = 0;
118  
119  	type = PIDTYPE_TGID;
120  	if (who < 0) {
121  		/* avoid overflow below */
122  		if (who == INT_MIN)
123  			return -EINVAL;
124  
125  		type = PIDTYPE_PGID;
126  		who = -who;
127  	}
128  
129  	rcu_read_lock();
130  	if (who) {
131  		pid = find_vpid(who);
132  		if (!pid)
133  			ret = -ESRCH;
134  	}
135  
136  	if (!ret)
137  		__f_setown(filp, pid, type, force);
138  	rcu_read_unlock();
139  
140  	return ret;
141  }
142  EXPORT_SYMBOL(f_setown);
143  
144  void f_delown(struct file *filp)
145  {
146  	f_modown(filp, NULL, PIDTYPE_TGID, 1);
147  }
148  
149  pid_t f_getown(struct file *filp)
150  {
151  	pid_t pid;
152  	read_lock(&filp->f_owner.lock);
153  	pid = pid_vnr(filp->f_owner.pid);
154  	if (filp->f_owner.pid_type == PIDTYPE_PGID)
155  		pid = -pid;
156  	read_unlock(&filp->f_owner.lock);
157  	return pid;
158  }
159  
160  static int f_setown_ex(struct file *filp, unsigned long arg)
161  {
162  	struct f_owner_ex __user *owner_p = (void __user *)arg;
163  	struct f_owner_ex owner;
164  	struct pid *pid;
165  	int type;
166  	int ret;
167  
168  	ret = copy_from_user(&owner, owner_p, sizeof(owner));
169  	if (ret)
170  		return -EFAULT;
171  
172  	switch (owner.type) {
173  	case F_OWNER_TID:
174  		type = PIDTYPE_PID;
175  		break;
176  
177  	case F_OWNER_PID:
178  		type = PIDTYPE_TGID;
179  		break;
180  
181  	case F_OWNER_PGRP:
182  		type = PIDTYPE_PGID;
183  		break;
184  
185  	default:
186  		return -EINVAL;
187  	}
188  
189  	rcu_read_lock();
190  	pid = find_vpid(owner.pid);
191  	if (owner.pid && !pid)
192  		ret = -ESRCH;
193  	else
194  		 __f_setown(filp, pid, type, 1);
195  	rcu_read_unlock();
196  
197  	return ret;
198  }
199  
200  static int f_getown_ex(struct file *filp, unsigned long arg)
201  {
202  	struct f_owner_ex __user *owner_p = (void __user *)arg;
203  	struct f_owner_ex owner;
204  	int ret = 0;
205  
206  	read_lock(&filp->f_owner.lock);
207  	owner.pid = pid_vnr(filp->f_owner.pid);
208  	switch (filp->f_owner.pid_type) {
209  	case PIDTYPE_PID:
210  		owner.type = F_OWNER_TID;
211  		break;
212  
213  	case PIDTYPE_TGID:
214  		owner.type = F_OWNER_PID;
215  		break;
216  
217  	case PIDTYPE_PGID:
218  		owner.type = F_OWNER_PGRP;
219  		break;
220  
221  	default:
222  		WARN_ON(1);
223  		ret = -EINVAL;
224  		break;
225  	}
226  	read_unlock(&filp->f_owner.lock);
227  
228  	if (!ret) {
229  		ret = copy_to_user(owner_p, &owner, sizeof(owner));
230  		if (ret)
231  			ret = -EFAULT;
232  	}
233  	return ret;
234  }
235  
236  #ifdef CONFIG_CHECKPOINT_RESTORE
237  static int f_getowner_uids(struct file *filp, unsigned long arg)
238  {
239  	struct user_namespace *user_ns = current_user_ns();
240  	uid_t __user *dst = (void __user *)arg;
241  	uid_t src[2];
242  	int err;
243  
244  	read_lock(&filp->f_owner.lock);
245  	src[0] = from_kuid(user_ns, filp->f_owner.uid);
246  	src[1] = from_kuid(user_ns, filp->f_owner.euid);
247  	read_unlock(&filp->f_owner.lock);
248  
249  	err  = put_user(src[0], &dst[0]);
250  	err |= put_user(src[1], &dst[1]);
251  
252  	return err;
253  }
254  #else
255  static int f_getowner_uids(struct file *filp, unsigned long arg)
256  {
257  	return -EINVAL;
258  }
259  #endif
260  
261  static bool rw_hint_valid(enum rw_hint hint)
262  {
263  	switch (hint) {
264  	case RWH_WRITE_LIFE_NOT_SET:
265  	case RWH_WRITE_LIFE_NONE:
266  	case RWH_WRITE_LIFE_SHORT:
267  	case RWH_WRITE_LIFE_MEDIUM:
268  	case RWH_WRITE_LIFE_LONG:
269  	case RWH_WRITE_LIFE_EXTREME:
270  		return true;
271  	default:
272  		return false;
273  	}
274  }
275  
276  static long fcntl_rw_hint(struct file *file, unsigned int cmd,
277  			  unsigned long arg)
278  {
279  	struct inode *inode = file_inode(file);
280  	u64 *argp = (u64 __user *)arg;
281  	enum rw_hint hint;
282  	u64 h;
283  
284  	switch (cmd) {
285  	case F_GET_FILE_RW_HINT:
286  		h = file_write_hint(file);
287  		if (copy_to_user(argp, &h, sizeof(*argp)))
288  			return -EFAULT;
289  		return 0;
290  	case F_SET_FILE_RW_HINT:
291  		if (copy_from_user(&h, argp, sizeof(h)))
292  			return -EFAULT;
293  		hint = (enum rw_hint) h;
294  		if (!rw_hint_valid(hint))
295  			return -EINVAL;
296  
297  		spin_lock(&file->f_lock);
298  		file->f_write_hint = hint;
299  		spin_unlock(&file->f_lock);
300  		return 0;
301  	case F_GET_RW_HINT:
302  		h = inode->i_write_hint;
303  		if (copy_to_user(argp, &h, sizeof(*argp)))
304  			return -EFAULT;
305  		return 0;
306  	case F_SET_RW_HINT:
307  		if (copy_from_user(&h, argp, sizeof(h)))
308  			return -EFAULT;
309  		hint = (enum rw_hint) h;
310  		if (!rw_hint_valid(hint))
311  			return -EINVAL;
312  
313  		inode_lock(inode);
314  		inode->i_write_hint = hint;
315  		inode_unlock(inode);
316  		return 0;
317  	default:
318  		return -EINVAL;
319  	}
320  }
321  
322  static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
323  		struct file *filp)
324  {
325  	void __user *argp = (void __user *)arg;
326  	struct flock flock;
327  	long err = -EINVAL;
328  
329  	switch (cmd) {
330  	case F_DUPFD:
331  		err = f_dupfd(arg, filp, 0);
332  		break;
333  	case F_DUPFD_CLOEXEC:
334  		err = f_dupfd(arg, filp, O_CLOEXEC);
335  		break;
336  	case F_GETFD:
337  		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
338  		break;
339  	case F_SETFD:
340  		err = 0;
341  		set_close_on_exec(fd, arg & FD_CLOEXEC);
342  		break;
343  	case F_GETFL:
344  		err = filp->f_flags;
345  		break;
346  	case F_SETFL:
347  		err = setfl(fd, filp, arg);
348  		break;
349  #if BITS_PER_LONG != 32
350  	/* 32-bit arches must use fcntl64() */
351  	case F_OFD_GETLK:
352  #endif
353  	case F_GETLK:
354  		if (copy_from_user(&flock, argp, sizeof(flock)))
355  			return -EFAULT;
356  		err = fcntl_getlk(filp, cmd, &flock);
357  		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
358  			return -EFAULT;
359  		break;
360  #if BITS_PER_LONG != 32
361  	/* 32-bit arches must use fcntl64() */
362  	case F_OFD_SETLK:
363  	case F_OFD_SETLKW:
364  #endif
365  		/* Fallthrough */
366  	case F_SETLK:
367  	case F_SETLKW:
368  		if (copy_from_user(&flock, argp, sizeof(flock)))
369  			return -EFAULT;
370  		err = fcntl_setlk(fd, filp, cmd, &flock);
371  		break;
372  	case F_GETOWN:
373  		/*
374  		 * XXX If f_owner is a process group, the
375  		 * negative return value will get converted
376  		 * into an error.  Oops.  If we keep the
377  		 * current syscall conventions, the only way
378  		 * to fix this will be in libc.
379  		 */
380  		err = f_getown(filp);
381  		force_successful_syscall_return();
382  		break;
383  	case F_SETOWN:
384  		err = f_setown(filp, arg, 1);
385  		break;
386  	case F_GETOWN_EX:
387  		err = f_getown_ex(filp, arg);
388  		break;
389  	case F_SETOWN_EX:
390  		err = f_setown_ex(filp, arg);
391  		break;
392  	case F_GETOWNER_UIDS:
393  		err = f_getowner_uids(filp, arg);
394  		break;
395  	case F_GETSIG:
396  		err = filp->f_owner.signum;
397  		break;
398  	case F_SETSIG:
399  		/* arg == 0 restores default behaviour. */
400  		if (!valid_signal(arg)) {
401  			break;
402  		}
403  		err = 0;
404  		filp->f_owner.signum = arg;
405  		break;
406  	case F_GETLEASE:
407  		err = fcntl_getlease(filp);
408  		break;
409  	case F_SETLEASE:
410  		err = fcntl_setlease(fd, filp, arg);
411  		break;
412  	case F_NOTIFY:
413  		err = fcntl_dirnotify(fd, filp, arg);
414  		break;
415  	case F_SETPIPE_SZ:
416  	case F_GETPIPE_SZ:
417  		err = pipe_fcntl(filp, cmd, arg);
418  		break;
419  	case F_ADD_SEALS:
420  	case F_GET_SEALS:
421  		err = memfd_fcntl(filp, cmd, arg);
422  		break;
423  	case F_GET_RW_HINT:
424  	case F_SET_RW_HINT:
425  	case F_GET_FILE_RW_HINT:
426  	case F_SET_FILE_RW_HINT:
427  		err = fcntl_rw_hint(filp, cmd, arg);
428  		break;
429  	default:
430  		break;
431  	}
432  	return err;
433  }
434  
435  static int check_fcntl_cmd(unsigned cmd)
436  {
437  	switch (cmd) {
438  	case F_DUPFD:
439  	case F_DUPFD_CLOEXEC:
440  	case F_GETFD:
441  	case F_SETFD:
442  	case F_GETFL:
443  		return 1;
444  	}
445  	return 0;
446  }
447  
448  SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
449  {
450  	struct fd f = fdget_raw(fd);
451  	long err = -EBADF;
452  
453  	if (!f.file)
454  		goto out;
455  
456  	if (unlikely(f.file->f_mode & FMODE_PATH)) {
457  		if (!check_fcntl_cmd(cmd))
458  			goto out1;
459  	}
460  
461  	err = security_file_fcntl(f.file, cmd, arg);
462  	if (!err)
463  		err = do_fcntl(fd, cmd, arg, f.file);
464  
465  out1:
466   	fdput(f);
467  out:
468  	return err;
469  }
470  
471  #if BITS_PER_LONG == 32
472  SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
473  		unsigned long, arg)
474  {
475  	void __user *argp = (void __user *)arg;
476  	struct fd f = fdget_raw(fd);
477  	struct flock64 flock;
478  	long err = -EBADF;
479  
480  	if (!f.file)
481  		goto out;
482  
483  	if (unlikely(f.file->f_mode & FMODE_PATH)) {
484  		if (!check_fcntl_cmd(cmd))
485  			goto out1;
486  	}
487  
488  	err = security_file_fcntl(f.file, cmd, arg);
489  	if (err)
490  		goto out1;
491  
492  	switch (cmd) {
493  	case F_GETLK64:
494  	case F_OFD_GETLK:
495  		err = -EFAULT;
496  		if (copy_from_user(&flock, argp, sizeof(flock)))
497  			break;
498  		err = fcntl_getlk64(f.file, cmd, &flock);
499  		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
500  			err = -EFAULT;
501  		break;
502  	case F_SETLK64:
503  	case F_SETLKW64:
504  	case F_OFD_SETLK:
505  	case F_OFD_SETLKW:
506  		err = -EFAULT;
507  		if (copy_from_user(&flock, argp, sizeof(flock)))
508  			break;
509  		err = fcntl_setlk64(fd, f.file, cmd, &flock);
510  		break;
511  	default:
512  		err = do_fcntl(fd, cmd, arg, f.file);
513  		break;
514  	}
515  out1:
516  	fdput(f);
517  out:
518  	return err;
519  }
520  #endif
521  
522  #ifdef CONFIG_COMPAT
523  /* careful - don't use anywhere else */
524  #define copy_flock_fields(dst, src)		\
525  	(dst)->l_type = (src)->l_type;		\
526  	(dst)->l_whence = (src)->l_whence;	\
527  	(dst)->l_start = (src)->l_start;	\
528  	(dst)->l_len = (src)->l_len;		\
529  	(dst)->l_pid = (src)->l_pid;
530  
531  static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl)
532  {
533  	struct compat_flock fl;
534  
535  	if (copy_from_user(&fl, ufl, sizeof(struct compat_flock)))
536  		return -EFAULT;
537  	copy_flock_fields(kfl, &fl);
538  	return 0;
539  }
540  
541  static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl)
542  {
543  	struct compat_flock64 fl;
544  
545  	if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64)))
546  		return -EFAULT;
547  	copy_flock_fields(kfl, &fl);
548  	return 0;
549  }
550  
551  static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl)
552  {
553  	struct compat_flock fl;
554  
555  	memset(&fl, 0, sizeof(struct compat_flock));
556  	copy_flock_fields(&fl, kfl);
557  	if (copy_to_user(ufl, &fl, sizeof(struct compat_flock)))
558  		return -EFAULT;
559  	return 0;
560  }
561  
562  static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl)
563  {
564  	struct compat_flock64 fl;
565  
566  	BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
567  	BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));
568  
569  	memset(&fl, 0, sizeof(struct compat_flock64));
570  	copy_flock_fields(&fl, kfl);
571  	if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
572  		return -EFAULT;
573  	return 0;
574  }
575  #undef copy_flock_fields
576  
577  static unsigned int
578  convert_fcntl_cmd(unsigned int cmd)
579  {
580  	switch (cmd) {
581  	case F_GETLK64:
582  		return F_GETLK;
583  	case F_SETLK64:
584  		return F_SETLK;
585  	case F_SETLKW64:
586  		return F_SETLKW;
587  	}
588  
589  	return cmd;
590  }
591  
592  /*
593   * GETLK was successful and we need to return the data, but it needs to fit in
594   * the compat structure.
595   * l_start shouldn't be too big, unless the original start + end is greater than
596   * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
597   * -EOVERFLOW in that case.  l_len could be too big, in which case we just
598   * truncate it, and only allow the app to see that part of the conflicting lock
599   * that might make sense to it anyway
600   */
601  static int fixup_compat_flock(struct flock *flock)
602  {
603  	if (flock->l_start > COMPAT_OFF_T_MAX)
604  		return -EOVERFLOW;
605  	if (flock->l_len > COMPAT_OFF_T_MAX)
606  		flock->l_len = COMPAT_OFF_T_MAX;
607  	return 0;
608  }
609  
610  static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
611  			     compat_ulong_t arg)
612  {
613  	struct fd f = fdget_raw(fd);
614  	struct flock flock;
615  	long err = -EBADF;
616  
617  	if (!f.file)
618  		return err;
619  
620  	if (unlikely(f.file->f_mode & FMODE_PATH)) {
621  		if (!check_fcntl_cmd(cmd))
622  			goto out_put;
623  	}
624  
625  	err = security_file_fcntl(f.file, cmd, arg);
626  	if (err)
627  		goto out_put;
628  
629  	switch (cmd) {
630  	case F_GETLK:
631  		err = get_compat_flock(&flock, compat_ptr(arg));
632  		if (err)
633  			break;
634  		err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
635  		if (err)
636  			break;
637  		err = fixup_compat_flock(&flock);
638  		if (!err)
639  			err = put_compat_flock(&flock, compat_ptr(arg));
640  		break;
641  	case F_GETLK64:
642  	case F_OFD_GETLK:
643  		err = get_compat_flock64(&flock, compat_ptr(arg));
644  		if (err)
645  			break;
646  		err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
647  		if (!err)
648  			err = put_compat_flock64(&flock, compat_ptr(arg));
649  		break;
650  	case F_SETLK:
651  	case F_SETLKW:
652  		err = get_compat_flock(&flock, compat_ptr(arg));
653  		if (err)
654  			break;
655  		err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
656  		break;
657  	case F_SETLK64:
658  	case F_SETLKW64:
659  	case F_OFD_SETLK:
660  	case F_OFD_SETLKW:
661  		err = get_compat_flock64(&flock, compat_ptr(arg));
662  		if (err)
663  			break;
664  		err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
665  		break;
666  	default:
667  		err = do_fcntl(fd, cmd, arg, f.file);
668  		break;
669  	}
670  out_put:
671  	fdput(f);
672  	return err;
673  }
674  
675  COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
676  		       compat_ulong_t, arg)
677  {
678  	return do_compat_fcntl64(fd, cmd, arg);
679  }
680  
681  COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
682  		       compat_ulong_t, arg)
683  {
684  	switch (cmd) {
685  	case F_GETLK64:
686  	case F_SETLK64:
687  	case F_SETLKW64:
688  	case F_OFD_GETLK:
689  	case F_OFD_SETLK:
690  	case F_OFD_SETLKW:
691  		return -EINVAL;
692  	}
693  	return do_compat_fcntl64(fd, cmd, arg);
694  }
695  #endif
696  
697  /* Table to convert sigio signal codes into poll band bitmaps */
698  
699  static const __poll_t band_table[NSIGPOLL] = {
700  	EPOLLIN | EPOLLRDNORM,			/* POLL_IN */
701  	EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND,	/* POLL_OUT */
702  	EPOLLIN | EPOLLRDNORM | EPOLLMSG,		/* POLL_MSG */
703  	EPOLLERR,				/* POLL_ERR */
704  	EPOLLPRI | EPOLLRDBAND,			/* POLL_PRI */
705  	EPOLLHUP | EPOLLERR			/* POLL_HUP */
706  };
707  
708  static inline int sigio_perm(struct task_struct *p,
709                               struct fown_struct *fown, int sig)
710  {
711  	const struct cred *cred;
712  	int ret;
713  
714  	rcu_read_lock();
715  	cred = __task_cred(p);
716  	ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
717  		uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
718  		uid_eq(fown->uid,  cred->suid) || uid_eq(fown->uid,  cred->uid)) &&
719  	       !security_file_send_sigiotask(p, fown, sig));
720  	rcu_read_unlock();
721  	return ret;
722  }
723  
724  static void send_sigio_to_task(struct task_struct *p,
725  			       struct fown_struct *fown,
726  			       int fd, int reason, enum pid_type type)
727  {
728  	/*
729  	 * F_SETSIG can change ->signum lockless in parallel, make
730  	 * sure we read it once and use the same value throughout.
731  	 */
732  	int signum = READ_ONCE(fown->signum);
733  
734  	if (!sigio_perm(p, fown, signum))
735  		return;
736  
737  	switch (signum) {
738  		kernel_siginfo_t si;
739  		default:
740  			/* Queue a rt signal with the appropriate fd as its
741  			   value.  We use SI_SIGIO as the source, not
742  			   SI_KERNEL, since kernel signals always get
743  			   delivered even if we can't queue.  Failure to
744  			   queue in this case _should_ be reported; we fall
745  			   back to SIGIO in that case. --sct */
746  			clear_siginfo(&si);
747  			si.si_signo = signum;
748  			si.si_errno = 0;
749  		        si.si_code  = reason;
750  			/*
751  			 * Posix definies POLL_IN and friends to be signal
752  			 * specific si_codes for SIG_POLL.  Linux extended
753  			 * these si_codes to other signals in a way that is
754  			 * ambiguous if other signals also have signal
755  			 * specific si_codes.  In that case use SI_SIGIO instead
756  			 * to remove the ambiguity.
757  			 */
758  			if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
759  				si.si_code = SI_SIGIO;
760  
761  			/* Make sure we are called with one of the POLL_*
762  			   reasons, otherwise we could leak kernel stack into
763  			   userspace.  */
764  			BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL));
765  			if (reason - POLL_IN >= NSIGPOLL)
766  				si.si_band  = ~0L;
767  			else
768  				si.si_band = mangle_poll(band_table[reason - POLL_IN]);
769  			si.si_fd    = fd;
770  			if (!do_send_sig_info(signum, &si, p, type))
771  				break;
772  		/* fall-through - fall back on the old plain SIGIO signal */
773  		case 0:
774  			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
775  	}
776  }
777  
778  void send_sigio(struct fown_struct *fown, int fd, int band)
779  {
780  	struct task_struct *p;
781  	enum pid_type type;
782  	struct pid *pid;
783  
784  	read_lock(&fown->lock);
785  
786  	type = fown->pid_type;
787  	pid = fown->pid;
788  	if (!pid)
789  		goto out_unlock_fown;
790  
791  	if (type <= PIDTYPE_TGID) {
792  		rcu_read_lock();
793  		p = pid_task(pid, PIDTYPE_PID);
794  		if (p)
795  			send_sigio_to_task(p, fown, fd, band, type);
796  		rcu_read_unlock();
797  	} else {
798  		read_lock(&tasklist_lock);
799  		do_each_pid_task(pid, type, p) {
800  			send_sigio_to_task(p, fown, fd, band, type);
801  		} while_each_pid_task(pid, type, p);
802  		read_unlock(&tasklist_lock);
803  	}
804   out_unlock_fown:
805  	read_unlock(&fown->lock);
806  }
807  
808  static void send_sigurg_to_task(struct task_struct *p,
809  				struct fown_struct *fown, enum pid_type type)
810  {
811  	if (sigio_perm(p, fown, SIGURG))
812  		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
813  }
814  
815  int send_sigurg(struct fown_struct *fown)
816  {
817  	struct task_struct *p;
818  	enum pid_type type;
819  	struct pid *pid;
820  	int ret = 0;
821  
822  	read_lock(&fown->lock);
823  
824  	type = fown->pid_type;
825  	pid = fown->pid;
826  	if (!pid)
827  		goto out_unlock_fown;
828  
829  	ret = 1;
830  
831  	if (type <= PIDTYPE_TGID) {
832  		rcu_read_lock();
833  		p = pid_task(pid, PIDTYPE_PID);
834  		if (p)
835  			send_sigurg_to_task(p, fown, type);
836  		rcu_read_unlock();
837  	} else {
838  		read_lock(&tasklist_lock);
839  		do_each_pid_task(pid, type, p) {
840  			send_sigurg_to_task(p, fown, type);
841  		} while_each_pid_task(pid, type, p);
842  		read_unlock(&tasklist_lock);
843  	}
844   out_unlock_fown:
845  	read_unlock(&fown->lock);
846  	return ret;
847  }
848  
849  static DEFINE_SPINLOCK(fasync_lock);
850  static struct kmem_cache *fasync_cache __read_mostly;
851  
852  static void fasync_free_rcu(struct rcu_head *head)
853  {
854  	kmem_cache_free(fasync_cache,
855  			container_of(head, struct fasync_struct, fa_rcu));
856  }
857  
858  /*
859   * Remove a fasync entry. If successfully removed, return
860   * positive and clear the FASYNC flag. If no entry exists,
861   * do nothing and return 0.
862   *
863   * NOTE! It is very important that the FASYNC flag always
864   * match the state "is the filp on a fasync list".
865   *
866   */
867  int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
868  {
869  	struct fasync_struct *fa, **fp;
870  	int result = 0;
871  
872  	spin_lock(&filp->f_lock);
873  	spin_lock(&fasync_lock);
874  	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
875  		if (fa->fa_file != filp)
876  			continue;
877  
878  		write_lock_irq(&fa->fa_lock);
879  		fa->fa_file = NULL;
880  		write_unlock_irq(&fa->fa_lock);
881  
882  		*fp = fa->fa_next;
883  		call_rcu(&fa->fa_rcu, fasync_free_rcu);
884  		filp->f_flags &= ~FASYNC;
885  		result = 1;
886  		break;
887  	}
888  	spin_unlock(&fasync_lock);
889  	spin_unlock(&filp->f_lock);
890  	return result;
891  }
892  
893  struct fasync_struct *fasync_alloc(void)
894  {
895  	return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
896  }
897  
898  /*
899   * NOTE! This can be used only for unused fasync entries:
900   * entries that actually got inserted on the fasync list
901   * need to be released by rcu - see fasync_remove_entry.
902   */
903  void fasync_free(struct fasync_struct *new)
904  {
905  	kmem_cache_free(fasync_cache, new);
906  }
907  
908  /*
909   * Insert a new entry into the fasync list.  Return the pointer to the
910   * old one if we didn't use the new one.
911   *
912   * NOTE! It is very important that the FASYNC flag always
913   * match the state "is the filp on a fasync list".
914   */
915  struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
916  {
917          struct fasync_struct *fa, **fp;
918  
919  	spin_lock(&filp->f_lock);
920  	spin_lock(&fasync_lock);
921  	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
922  		if (fa->fa_file != filp)
923  			continue;
924  
925  		write_lock_irq(&fa->fa_lock);
926  		fa->fa_fd = fd;
927  		write_unlock_irq(&fa->fa_lock);
928  		goto out;
929  	}
930  
931  	rwlock_init(&new->fa_lock);
932  	new->magic = FASYNC_MAGIC;
933  	new->fa_file = filp;
934  	new->fa_fd = fd;
935  	new->fa_next = *fapp;
936  	rcu_assign_pointer(*fapp, new);
937  	filp->f_flags |= FASYNC;
938  
939  out:
940  	spin_unlock(&fasync_lock);
941  	spin_unlock(&filp->f_lock);
942  	return fa;
943  }
944  
945  /*
946   * Add a fasync entry. Return negative on error, positive if
947   * added, and zero if did nothing but change an existing one.
948   */
949  static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
950  {
951  	struct fasync_struct *new;
952  
953  	new = fasync_alloc();
954  	if (!new)
955  		return -ENOMEM;
956  
957  	/*
958  	 * fasync_insert_entry() returns the old (update) entry if
959  	 * it existed.
960  	 *
961  	 * So free the (unused) new entry and return 0 to let the
962  	 * caller know that we didn't add any new fasync entries.
963  	 */
964  	if (fasync_insert_entry(fd, filp, fapp, new)) {
965  		fasync_free(new);
966  		return 0;
967  	}
968  
969  	return 1;
970  }
971  
972  /*
973   * fasync_helper() is used by almost all character device drivers
974   * to set up the fasync queue, and for regular files by the file
975   * lease code. It returns negative on error, 0 if it did no changes
976   * and positive if it added/deleted the entry.
977   */
978  int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
979  {
980  	if (!on)
981  		return fasync_remove_entry(filp, fapp);
982  	return fasync_add_entry(fd, filp, fapp);
983  }
984  
985  EXPORT_SYMBOL(fasync_helper);
986  
987  /*
988   * rcu_read_lock() is held
989   */
990  static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
991  {
992  	while (fa) {
993  		struct fown_struct *fown;
994  
995  		if (fa->magic != FASYNC_MAGIC) {
996  			printk(KERN_ERR "kill_fasync: bad magic number in "
997  			       "fasync_struct!\n");
998  			return;
999  		}
1000  		read_lock(&fa->fa_lock);
1001  		if (fa->fa_file) {
1002  			fown = &fa->fa_file->f_owner;
1003  			/* Don't send SIGURG to processes which have not set a
1004  			   queued signum: SIGURG has its own default signalling
1005  			   mechanism. */
1006  			if (!(sig == SIGURG && fown->signum == 0))
1007  				send_sigio(fown, fa->fa_fd, band);
1008  		}
1009  		read_unlock(&fa->fa_lock);
1010  		fa = rcu_dereference(fa->fa_next);
1011  	}
1012  }
1013  
1014  void kill_fasync(struct fasync_struct **fp, int sig, int band)
1015  {
1016  	/* First a quick test without locking: usually
1017  	 * the list is empty.
1018  	 */
1019  	if (*fp) {
1020  		rcu_read_lock();
1021  		kill_fasync_rcu(rcu_dereference(*fp), sig, band);
1022  		rcu_read_unlock();
1023  	}
1024  }
1025  EXPORT_SYMBOL(kill_fasync);
1026  
1027  static int __init fcntl_init(void)
1028  {
1029  	/*
1030  	 * Please add new bits here to ensure allocation uniqueness.
1031  	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
1032  	 * is defined as O_NONBLOCK on some platforms and not on others.
1033  	 */
1034  	BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
1035  		HWEIGHT32(
1036  			(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
1037  			__FMODE_EXEC | __FMODE_NONOTIFY));
1038  
1039  	fasync_cache = kmem_cache_create("fasync_cache",
1040  		sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
1041  	return 0;
1042  }
1043  
1044  module_init(fcntl_init)
1045