xref: /linux/fs/fcntl.c (revision 81464192839de0b5bc84c5739381101e04d94f62)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   *  linux/fs/fcntl.c
4   *
5   *  Copyright (C) 1991, 1992  Linus Torvalds
6   */
7  
8  #include <linux/syscalls.h>
9  #include <linux/init.h>
10  #include <linux/mm.h>
11  #include <linux/sched/task.h>
12  #include <linux/fs.h>
13  #include <linux/file.h>
14  #include <linux/fdtable.h>
15  #include <linux/capability.h>
16  #include <linux/dnotify.h>
17  #include <linux/slab.h>
18  #include <linux/module.h>
19  #include <linux/pipe_fs_i.h>
20  #include <linux/security.h>
21  #include <linux/ptrace.h>
22  #include <linux/signal.h>
23  #include <linux/rcupdate.h>
24  #include <linux/pid_namespace.h>
25  #include <linux/user_namespace.h>
26  #include <linux/memfd.h>
27  #include <linux/compat.h>
28  
29  #include <linux/poll.h>
30  #include <asm/siginfo.h>
31  #include <linux/uaccess.h>
32  
33  #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
34  
35  static int setfl(int fd, struct file * filp, unsigned long arg)
36  {
37  	struct inode * inode = file_inode(filp);
38  	int error = 0;
39  
40  	/*
41  	 * O_APPEND cannot be cleared if the file is marked as append-only
42  	 * and the file is open for write.
43  	 */
44  	if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
45  		return -EPERM;
46  
47  	/* O_NOATIME can only be set by the owner or superuser */
48  	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
49  		if (!inode_owner_or_capable(inode))
50  			return -EPERM;
51  
52  	/* required for strict SunOS emulation */
53  	if (O_NONBLOCK != O_NDELAY)
54  	       if (arg & O_NDELAY)
55  		   arg |= O_NONBLOCK;
56  
57  	/* Pipe packetized mode is controlled by O_DIRECT flag */
58  	if (!S_ISFIFO(inode->i_mode) && (arg & O_DIRECT)) {
59  		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
60  			!filp->f_mapping->a_ops->direct_IO)
61  				return -EINVAL;
62  	}
63  
64  	if (filp->f_op->check_flags)
65  		error = filp->f_op->check_flags(arg);
66  	if (error)
67  		return error;
68  
69  	/*
70  	 * ->fasync() is responsible for setting the FASYNC bit.
71  	 */
72  	if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
73  		error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
74  		if (error < 0)
75  			goto out;
76  		if (error > 0)
77  			error = 0;
78  	}
79  	spin_lock(&filp->f_lock);
80  	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
81  	spin_unlock(&filp->f_lock);
82  
83   out:
84  	return error;
85  }
86  
87  static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
88                       int force)
89  {
90  	write_lock_irq(&filp->f_owner.lock);
91  	if (force || !filp->f_owner.pid) {
92  		put_pid(filp->f_owner.pid);
93  		filp->f_owner.pid = get_pid(pid);
94  		filp->f_owner.pid_type = type;
95  
96  		if (pid) {
97  			const struct cred *cred = current_cred();
98  			filp->f_owner.uid = cred->uid;
99  			filp->f_owner.euid = cred->euid;
100  		}
101  	}
102  	write_unlock_irq(&filp->f_owner.lock);
103  }
104  
105  void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
106  		int force)
107  {
108  	security_file_set_fowner(filp);
109  	f_modown(filp, pid, type, force);
110  }
111  EXPORT_SYMBOL(__f_setown);
112  
113  int f_setown(struct file *filp, unsigned long arg, int force)
114  {
115  	enum pid_type type;
116  	struct pid *pid = NULL;
117  	int who = arg, ret = 0;
118  
119  	type = PIDTYPE_TGID;
120  	if (who < 0) {
121  		/* avoid overflow below */
122  		if (who == INT_MIN)
123  			return -EINVAL;
124  
125  		type = PIDTYPE_PGID;
126  		who = -who;
127  	}
128  
129  	rcu_read_lock();
130  	if (who) {
131  		pid = find_vpid(who);
132  		if (!pid)
133  			ret = -ESRCH;
134  	}
135  
136  	if (!ret)
137  		__f_setown(filp, pid, type, force);
138  	rcu_read_unlock();
139  
140  	return ret;
141  }
142  EXPORT_SYMBOL(f_setown);
143  
144  void f_delown(struct file *filp)
145  {
146  	f_modown(filp, NULL, PIDTYPE_TGID, 1);
147  }
148  
149  pid_t f_getown(struct file *filp)
150  {
151  	pid_t pid;
152  	read_lock(&filp->f_owner.lock);
153  	pid = pid_vnr(filp->f_owner.pid);
154  	if (filp->f_owner.pid_type == PIDTYPE_PGID)
155  		pid = -pid;
156  	read_unlock(&filp->f_owner.lock);
157  	return pid;
158  }
159  
160  static int f_setown_ex(struct file *filp, unsigned long arg)
161  {
162  	struct f_owner_ex __user *owner_p = (void __user *)arg;
163  	struct f_owner_ex owner;
164  	struct pid *pid;
165  	int type;
166  	int ret;
167  
168  	ret = copy_from_user(&owner, owner_p, sizeof(owner));
169  	if (ret)
170  		return -EFAULT;
171  
172  	switch (owner.type) {
173  	case F_OWNER_TID:
174  		type = PIDTYPE_PID;
175  		break;
176  
177  	case F_OWNER_PID:
178  		type = PIDTYPE_TGID;
179  		break;
180  
181  	case F_OWNER_PGRP:
182  		type = PIDTYPE_PGID;
183  		break;
184  
185  	default:
186  		return -EINVAL;
187  	}
188  
189  	rcu_read_lock();
190  	pid = find_vpid(owner.pid);
191  	if (owner.pid && !pid)
192  		ret = -ESRCH;
193  	else
194  		 __f_setown(filp, pid, type, 1);
195  	rcu_read_unlock();
196  
197  	return ret;
198  }
199  
200  static int f_getown_ex(struct file *filp, unsigned long arg)
201  {
202  	struct f_owner_ex __user *owner_p = (void __user *)arg;
203  	struct f_owner_ex owner;
204  	int ret = 0;
205  
206  	read_lock(&filp->f_owner.lock);
207  	owner.pid = pid_vnr(filp->f_owner.pid);
208  	switch (filp->f_owner.pid_type) {
209  	case PIDTYPE_PID:
210  		owner.type = F_OWNER_TID;
211  		break;
212  
213  	case PIDTYPE_TGID:
214  		owner.type = F_OWNER_PID;
215  		break;
216  
217  	case PIDTYPE_PGID:
218  		owner.type = F_OWNER_PGRP;
219  		break;
220  
221  	default:
222  		WARN_ON(1);
223  		ret = -EINVAL;
224  		break;
225  	}
226  	read_unlock(&filp->f_owner.lock);
227  
228  	if (!ret) {
229  		ret = copy_to_user(owner_p, &owner, sizeof(owner));
230  		if (ret)
231  			ret = -EFAULT;
232  	}
233  	return ret;
234  }
235  
236  #ifdef CONFIG_CHECKPOINT_RESTORE
237  static int f_getowner_uids(struct file *filp, unsigned long arg)
238  {
239  	struct user_namespace *user_ns = current_user_ns();
240  	uid_t __user *dst = (void __user *)arg;
241  	uid_t src[2];
242  	int err;
243  
244  	read_lock(&filp->f_owner.lock);
245  	src[0] = from_kuid(user_ns, filp->f_owner.uid);
246  	src[1] = from_kuid(user_ns, filp->f_owner.euid);
247  	read_unlock(&filp->f_owner.lock);
248  
249  	err  = put_user(src[0], &dst[0]);
250  	err |= put_user(src[1], &dst[1]);
251  
252  	return err;
253  }
254  #else
255  static int f_getowner_uids(struct file *filp, unsigned long arg)
256  {
257  	return -EINVAL;
258  }
259  #endif
260  
261  static bool rw_hint_valid(enum rw_hint hint)
262  {
263  	switch (hint) {
264  	case RWH_WRITE_LIFE_NOT_SET:
265  	case RWH_WRITE_LIFE_NONE:
266  	case RWH_WRITE_LIFE_SHORT:
267  	case RWH_WRITE_LIFE_MEDIUM:
268  	case RWH_WRITE_LIFE_LONG:
269  	case RWH_WRITE_LIFE_EXTREME:
270  		return true;
271  	default:
272  		return false;
273  	}
274  }
275  
276  static long fcntl_rw_hint(struct file *file, unsigned int cmd,
277  			  unsigned long arg)
278  {
279  	struct inode *inode = file_inode(file);
280  	u64 __user *argp = (u64 __user *)arg;
281  	enum rw_hint hint;
282  	u64 h;
283  
284  	switch (cmd) {
285  	case F_GET_FILE_RW_HINT:
286  		h = file_write_hint(file);
287  		if (copy_to_user(argp, &h, sizeof(*argp)))
288  			return -EFAULT;
289  		return 0;
290  	case F_SET_FILE_RW_HINT:
291  		if (copy_from_user(&h, argp, sizeof(h)))
292  			return -EFAULT;
293  		hint = (enum rw_hint) h;
294  		if (!rw_hint_valid(hint))
295  			return -EINVAL;
296  
297  		spin_lock(&file->f_lock);
298  		file->f_write_hint = hint;
299  		spin_unlock(&file->f_lock);
300  		return 0;
301  	case F_GET_RW_HINT:
302  		h = inode->i_write_hint;
303  		if (copy_to_user(argp, &h, sizeof(*argp)))
304  			return -EFAULT;
305  		return 0;
306  	case F_SET_RW_HINT:
307  		if (copy_from_user(&h, argp, sizeof(h)))
308  			return -EFAULT;
309  		hint = (enum rw_hint) h;
310  		if (!rw_hint_valid(hint))
311  			return -EINVAL;
312  
313  		inode_lock(inode);
314  		inode->i_write_hint = hint;
315  		inode_unlock(inode);
316  		return 0;
317  	default:
318  		return -EINVAL;
319  	}
320  }
321  
322  static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
323  		struct file *filp)
324  {
325  	void __user *argp = (void __user *)arg;
326  	struct flock flock;
327  	long err = -EINVAL;
328  
329  	switch (cmd) {
330  	case F_DUPFD:
331  		err = f_dupfd(arg, filp, 0);
332  		break;
333  	case F_DUPFD_CLOEXEC:
334  		err = f_dupfd(arg, filp, O_CLOEXEC);
335  		break;
336  	case F_GETFD:
337  		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
338  		break;
339  	case F_SETFD:
340  		err = 0;
341  		set_close_on_exec(fd, arg & FD_CLOEXEC);
342  		break;
343  	case F_GETFL:
344  		err = filp->f_flags;
345  		break;
346  	case F_SETFL:
347  		err = setfl(fd, filp, arg);
348  		break;
349  #if BITS_PER_LONG != 32
350  	/* 32-bit arches must use fcntl64() */
351  	case F_OFD_GETLK:
352  #endif
353  	case F_GETLK:
354  		if (copy_from_user(&flock, argp, sizeof(flock)))
355  			return -EFAULT;
356  		err = fcntl_getlk(filp, cmd, &flock);
357  		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
358  			return -EFAULT;
359  		break;
360  #if BITS_PER_LONG != 32
361  	/* 32-bit arches must use fcntl64() */
362  	case F_OFD_SETLK:
363  	case F_OFD_SETLKW:
364  #endif
365  		/* Fallthrough */
366  	case F_SETLK:
367  	case F_SETLKW:
368  		if (copy_from_user(&flock, argp, sizeof(flock)))
369  			return -EFAULT;
370  		err = fcntl_setlk(fd, filp, cmd, &flock);
371  		break;
372  	case F_GETOWN:
373  		/*
374  		 * XXX If f_owner is a process group, the
375  		 * negative return value will get converted
376  		 * into an error.  Oops.  If we keep the
377  		 * current syscall conventions, the only way
378  		 * to fix this will be in libc.
379  		 */
380  		err = f_getown(filp);
381  		force_successful_syscall_return();
382  		break;
383  	case F_SETOWN:
384  		err = f_setown(filp, arg, 1);
385  		break;
386  	case F_GETOWN_EX:
387  		err = f_getown_ex(filp, arg);
388  		break;
389  	case F_SETOWN_EX:
390  		err = f_setown_ex(filp, arg);
391  		break;
392  	case F_GETOWNER_UIDS:
393  		err = f_getowner_uids(filp, arg);
394  		break;
395  	case F_GETSIG:
396  		err = filp->f_owner.signum;
397  		break;
398  	case F_SETSIG:
399  		/* arg == 0 restores default behaviour. */
400  		if (!valid_signal(arg)) {
401  			break;
402  		}
403  		err = 0;
404  		filp->f_owner.signum = arg;
405  		break;
406  	case F_GETLEASE:
407  		err = fcntl_getlease(filp);
408  		break;
409  	case F_SETLEASE:
410  		err = fcntl_setlease(fd, filp, arg);
411  		break;
412  	case F_NOTIFY:
413  		err = fcntl_dirnotify(fd, filp, arg);
414  		break;
415  	case F_SETPIPE_SZ:
416  	case F_GETPIPE_SZ:
417  		err = pipe_fcntl(filp, cmd, arg);
418  		break;
419  	case F_ADD_SEALS:
420  	case F_GET_SEALS:
421  		err = memfd_fcntl(filp, cmd, arg);
422  		break;
423  	case F_GET_RW_HINT:
424  	case F_SET_RW_HINT:
425  	case F_GET_FILE_RW_HINT:
426  	case F_SET_FILE_RW_HINT:
427  		err = fcntl_rw_hint(filp, cmd, arg);
428  		break;
429  	default:
430  		break;
431  	}
432  	return err;
433  }
434  
435  static int check_fcntl_cmd(unsigned cmd)
436  {
437  	switch (cmd) {
438  	case F_DUPFD:
439  	case F_DUPFD_CLOEXEC:
440  	case F_GETFD:
441  	case F_SETFD:
442  	case F_GETFL:
443  		return 1;
444  	}
445  	return 0;
446  }
447  
448  SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
449  {
450  	struct fd f = fdget_raw(fd);
451  	long err = -EBADF;
452  
453  	if (!f.file)
454  		goto out;
455  
456  	if (unlikely(f.file->f_mode & FMODE_PATH)) {
457  		if (!check_fcntl_cmd(cmd))
458  			goto out1;
459  	}
460  
461  	err = security_file_fcntl(f.file, cmd, arg);
462  	if (!err)
463  		err = do_fcntl(fd, cmd, arg, f.file);
464  
465  out1:
466   	fdput(f);
467  out:
468  	return err;
469  }
470  
471  #if BITS_PER_LONG == 32
472  SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
473  		unsigned long, arg)
474  {
475  	void __user *argp = (void __user *)arg;
476  	struct fd f = fdget_raw(fd);
477  	struct flock64 flock;
478  	long err = -EBADF;
479  
480  	if (!f.file)
481  		goto out;
482  
483  	if (unlikely(f.file->f_mode & FMODE_PATH)) {
484  		if (!check_fcntl_cmd(cmd))
485  			goto out1;
486  	}
487  
488  	err = security_file_fcntl(f.file, cmd, arg);
489  	if (err)
490  		goto out1;
491  
492  	switch (cmd) {
493  	case F_GETLK64:
494  	case F_OFD_GETLK:
495  		err = -EFAULT;
496  		if (copy_from_user(&flock, argp, sizeof(flock)))
497  			break;
498  		err = fcntl_getlk64(f.file, cmd, &flock);
499  		if (!err && copy_to_user(argp, &flock, sizeof(flock)))
500  			err = -EFAULT;
501  		break;
502  	case F_SETLK64:
503  	case F_SETLKW64:
504  	case F_OFD_SETLK:
505  	case F_OFD_SETLKW:
506  		err = -EFAULT;
507  		if (copy_from_user(&flock, argp, sizeof(flock)))
508  			break;
509  		err = fcntl_setlk64(fd, f.file, cmd, &flock);
510  		break;
511  	default:
512  		err = do_fcntl(fd, cmd, arg, f.file);
513  		break;
514  	}
515  out1:
516  	fdput(f);
517  out:
518  	return err;
519  }
520  #endif
521  
522  #ifdef CONFIG_COMPAT
523  /* careful - don't use anywhere else */
524  #define copy_flock_fields(dst, src)		\
525  	(dst)->l_type = (src)->l_type;		\
526  	(dst)->l_whence = (src)->l_whence;	\
527  	(dst)->l_start = (src)->l_start;	\
528  	(dst)->l_len = (src)->l_len;		\
529  	(dst)->l_pid = (src)->l_pid;
530  
531  static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl)
532  {
533  	struct compat_flock fl;
534  
535  	if (copy_from_user(&fl, ufl, sizeof(struct compat_flock)))
536  		return -EFAULT;
537  	copy_flock_fields(kfl, &fl);
538  	return 0;
539  }
540  
541  static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl)
542  {
543  	struct compat_flock64 fl;
544  
545  	if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64)))
546  		return -EFAULT;
547  	copy_flock_fields(kfl, &fl);
548  	return 0;
549  }
550  
551  static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl)
552  {
553  	struct compat_flock fl;
554  
555  	memset(&fl, 0, sizeof(struct compat_flock));
556  	copy_flock_fields(&fl, kfl);
557  	if (copy_to_user(ufl, &fl, sizeof(struct compat_flock)))
558  		return -EFAULT;
559  	return 0;
560  }
561  
562  static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl)
563  {
564  	struct compat_flock64 fl;
565  
566  	BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
567  	BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));
568  
569  	memset(&fl, 0, sizeof(struct compat_flock64));
570  	copy_flock_fields(&fl, kfl);
571  	if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
572  		return -EFAULT;
573  	return 0;
574  }
575  #undef copy_flock_fields
576  
577  static unsigned int
578  convert_fcntl_cmd(unsigned int cmd)
579  {
580  	switch (cmd) {
581  	case F_GETLK64:
582  		return F_GETLK;
583  	case F_SETLK64:
584  		return F_SETLK;
585  	case F_SETLKW64:
586  		return F_SETLKW;
587  	}
588  
589  	return cmd;
590  }
591  
592  /*
593   * GETLK was successful and we need to return the data, but it needs to fit in
594   * the compat structure.
595   * l_start shouldn't be too big, unless the original start + end is greater than
596   * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
597   * -EOVERFLOW in that case.  l_len could be too big, in which case we just
598   * truncate it, and only allow the app to see that part of the conflicting lock
599   * that might make sense to it anyway
600   */
601  static int fixup_compat_flock(struct flock *flock)
602  {
603  	if (flock->l_start > COMPAT_OFF_T_MAX)
604  		return -EOVERFLOW;
605  	if (flock->l_len > COMPAT_OFF_T_MAX)
606  		flock->l_len = COMPAT_OFF_T_MAX;
607  	return 0;
608  }
609  
610  static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
611  			     compat_ulong_t arg)
612  {
613  	struct fd f = fdget_raw(fd);
614  	struct flock flock;
615  	long err = -EBADF;
616  
617  	if (!f.file)
618  		return err;
619  
620  	if (unlikely(f.file->f_mode & FMODE_PATH)) {
621  		if (!check_fcntl_cmd(cmd))
622  			goto out_put;
623  	}
624  
625  	err = security_file_fcntl(f.file, cmd, arg);
626  	if (err)
627  		goto out_put;
628  
629  	switch (cmd) {
630  	case F_GETLK:
631  		err = get_compat_flock(&flock, compat_ptr(arg));
632  		if (err)
633  			break;
634  		err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
635  		if (err)
636  			break;
637  		err = fixup_compat_flock(&flock);
638  		if (!err)
639  			err = put_compat_flock(&flock, compat_ptr(arg));
640  		break;
641  	case F_GETLK64:
642  	case F_OFD_GETLK:
643  		err = get_compat_flock64(&flock, compat_ptr(arg));
644  		if (err)
645  			break;
646  		err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
647  		if (!err)
648  			err = put_compat_flock64(&flock, compat_ptr(arg));
649  		break;
650  	case F_SETLK:
651  	case F_SETLKW:
652  		err = get_compat_flock(&flock, compat_ptr(arg));
653  		if (err)
654  			break;
655  		err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
656  		break;
657  	case F_SETLK64:
658  	case F_SETLKW64:
659  	case F_OFD_SETLK:
660  	case F_OFD_SETLKW:
661  		err = get_compat_flock64(&flock, compat_ptr(arg));
662  		if (err)
663  			break;
664  		err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
665  		break;
666  	default:
667  		err = do_fcntl(fd, cmd, arg, f.file);
668  		break;
669  	}
670  out_put:
671  	fdput(f);
672  	return err;
673  }
674  
675  COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
676  		       compat_ulong_t, arg)
677  {
678  	return do_compat_fcntl64(fd, cmd, arg);
679  }
680  
681  COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
682  		       compat_ulong_t, arg)
683  {
684  	switch (cmd) {
685  	case F_GETLK64:
686  	case F_SETLK64:
687  	case F_SETLKW64:
688  	case F_OFD_GETLK:
689  	case F_OFD_SETLK:
690  	case F_OFD_SETLKW:
691  		return -EINVAL;
692  	}
693  	return do_compat_fcntl64(fd, cmd, arg);
694  }
695  #endif
696  
697  /* Table to convert sigio signal codes into poll band bitmaps */
698  
699  static const __poll_t band_table[NSIGPOLL] = {
700  	EPOLLIN | EPOLLRDNORM,			/* POLL_IN */
701  	EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND,	/* POLL_OUT */
702  	EPOLLIN | EPOLLRDNORM | EPOLLMSG,		/* POLL_MSG */
703  	EPOLLERR,				/* POLL_ERR */
704  	EPOLLPRI | EPOLLRDBAND,			/* POLL_PRI */
705  	EPOLLHUP | EPOLLERR			/* POLL_HUP */
706  };
707  
708  static inline int sigio_perm(struct task_struct *p,
709                               struct fown_struct *fown, int sig)
710  {
711  	const struct cred *cred;
712  	int ret;
713  
714  	rcu_read_lock();
715  	cred = __task_cred(p);
716  	ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
717  		uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
718  		uid_eq(fown->uid,  cred->suid) || uid_eq(fown->uid,  cred->uid)) &&
719  	       !security_file_send_sigiotask(p, fown, sig));
720  	rcu_read_unlock();
721  	return ret;
722  }
723  
724  static void send_sigio_to_task(struct task_struct *p,
725  			       struct fown_struct *fown,
726  			       int fd, int reason, enum pid_type type)
727  {
728  	/*
729  	 * F_SETSIG can change ->signum lockless in parallel, make
730  	 * sure we read it once and use the same value throughout.
731  	 */
732  	int signum = READ_ONCE(fown->signum);
733  
734  	if (!sigio_perm(p, fown, signum))
735  		return;
736  
737  	switch (signum) {
738  		default: {
739  			kernel_siginfo_t si;
740  
741  			/* Queue a rt signal with the appropriate fd as its
742  			   value.  We use SI_SIGIO as the source, not
743  			   SI_KERNEL, since kernel signals always get
744  			   delivered even if we can't queue.  Failure to
745  			   queue in this case _should_ be reported; we fall
746  			   back to SIGIO in that case. --sct */
747  			clear_siginfo(&si);
748  			si.si_signo = signum;
749  			si.si_errno = 0;
750  		        si.si_code  = reason;
751  			/*
752  			 * Posix definies POLL_IN and friends to be signal
753  			 * specific si_codes for SIG_POLL.  Linux extended
754  			 * these si_codes to other signals in a way that is
755  			 * ambiguous if other signals also have signal
756  			 * specific si_codes.  In that case use SI_SIGIO instead
757  			 * to remove the ambiguity.
758  			 */
759  			if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
760  				si.si_code = SI_SIGIO;
761  
762  			/* Make sure we are called with one of the POLL_*
763  			   reasons, otherwise we could leak kernel stack into
764  			   userspace.  */
765  			BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL));
766  			if (reason - POLL_IN >= NSIGPOLL)
767  				si.si_band  = ~0L;
768  			else
769  				si.si_band = mangle_poll(band_table[reason - POLL_IN]);
770  			si.si_fd    = fd;
771  			if (!do_send_sig_info(signum, &si, p, type))
772  				break;
773  		}
774  		/* fall-through - fall back on the old plain SIGIO signal */
775  		case 0:
776  			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
777  	}
778  }
779  
780  void send_sigio(struct fown_struct *fown, int fd, int band)
781  {
782  	struct task_struct *p;
783  	enum pid_type type;
784  	struct pid *pid;
785  
786  	read_lock(&fown->lock);
787  
788  	type = fown->pid_type;
789  	pid = fown->pid;
790  	if (!pid)
791  		goto out_unlock_fown;
792  
793  	if (type <= PIDTYPE_TGID) {
794  		rcu_read_lock();
795  		p = pid_task(pid, PIDTYPE_PID);
796  		if (p)
797  			send_sigio_to_task(p, fown, fd, band, type);
798  		rcu_read_unlock();
799  	} else {
800  		read_lock(&tasklist_lock);
801  		do_each_pid_task(pid, type, p) {
802  			send_sigio_to_task(p, fown, fd, band, type);
803  		} while_each_pid_task(pid, type, p);
804  		read_unlock(&tasklist_lock);
805  	}
806   out_unlock_fown:
807  	read_unlock(&fown->lock);
808  }
809  
810  static void send_sigurg_to_task(struct task_struct *p,
811  				struct fown_struct *fown, enum pid_type type)
812  {
813  	if (sigio_perm(p, fown, SIGURG))
814  		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
815  }
816  
817  int send_sigurg(struct fown_struct *fown)
818  {
819  	struct task_struct *p;
820  	enum pid_type type;
821  	struct pid *pid;
822  	int ret = 0;
823  
824  	read_lock(&fown->lock);
825  
826  	type = fown->pid_type;
827  	pid = fown->pid;
828  	if (!pid)
829  		goto out_unlock_fown;
830  
831  	ret = 1;
832  
833  	if (type <= PIDTYPE_TGID) {
834  		rcu_read_lock();
835  		p = pid_task(pid, PIDTYPE_PID);
836  		if (p)
837  			send_sigurg_to_task(p, fown, type);
838  		rcu_read_unlock();
839  	} else {
840  		read_lock(&tasklist_lock);
841  		do_each_pid_task(pid, type, p) {
842  			send_sigurg_to_task(p, fown, type);
843  		} while_each_pid_task(pid, type, p);
844  		read_unlock(&tasklist_lock);
845  	}
846   out_unlock_fown:
847  	read_unlock(&fown->lock);
848  	return ret;
849  }
850  
851  static DEFINE_SPINLOCK(fasync_lock);
852  static struct kmem_cache *fasync_cache __read_mostly;
853  
854  static void fasync_free_rcu(struct rcu_head *head)
855  {
856  	kmem_cache_free(fasync_cache,
857  			container_of(head, struct fasync_struct, fa_rcu));
858  }
859  
860  /*
861   * Remove a fasync entry. If successfully removed, return
862   * positive and clear the FASYNC flag. If no entry exists,
863   * do nothing and return 0.
864   *
865   * NOTE! It is very important that the FASYNC flag always
866   * match the state "is the filp on a fasync list".
867   *
868   */
869  int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
870  {
871  	struct fasync_struct *fa, **fp;
872  	int result = 0;
873  
874  	spin_lock(&filp->f_lock);
875  	spin_lock(&fasync_lock);
876  	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
877  		if (fa->fa_file != filp)
878  			continue;
879  
880  		write_lock_irq(&fa->fa_lock);
881  		fa->fa_file = NULL;
882  		write_unlock_irq(&fa->fa_lock);
883  
884  		*fp = fa->fa_next;
885  		call_rcu(&fa->fa_rcu, fasync_free_rcu);
886  		filp->f_flags &= ~FASYNC;
887  		result = 1;
888  		break;
889  	}
890  	spin_unlock(&fasync_lock);
891  	spin_unlock(&filp->f_lock);
892  	return result;
893  }
894  
895  struct fasync_struct *fasync_alloc(void)
896  {
897  	return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
898  }
899  
900  /*
901   * NOTE! This can be used only for unused fasync entries:
902   * entries that actually got inserted on the fasync list
903   * need to be released by rcu - see fasync_remove_entry.
904   */
905  void fasync_free(struct fasync_struct *new)
906  {
907  	kmem_cache_free(fasync_cache, new);
908  }
909  
910  /*
911   * Insert a new entry into the fasync list.  Return the pointer to the
912   * old one if we didn't use the new one.
913   *
914   * NOTE! It is very important that the FASYNC flag always
915   * match the state "is the filp on a fasync list".
916   */
917  struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
918  {
919          struct fasync_struct *fa, **fp;
920  
921  	spin_lock(&filp->f_lock);
922  	spin_lock(&fasync_lock);
923  	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
924  		if (fa->fa_file != filp)
925  			continue;
926  
927  		write_lock_irq(&fa->fa_lock);
928  		fa->fa_fd = fd;
929  		write_unlock_irq(&fa->fa_lock);
930  		goto out;
931  	}
932  
933  	rwlock_init(&new->fa_lock);
934  	new->magic = FASYNC_MAGIC;
935  	new->fa_file = filp;
936  	new->fa_fd = fd;
937  	new->fa_next = *fapp;
938  	rcu_assign_pointer(*fapp, new);
939  	filp->f_flags |= FASYNC;
940  
941  out:
942  	spin_unlock(&fasync_lock);
943  	spin_unlock(&filp->f_lock);
944  	return fa;
945  }
946  
947  /*
948   * Add a fasync entry. Return negative on error, positive if
949   * added, and zero if did nothing but change an existing one.
950   */
951  static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
952  {
953  	struct fasync_struct *new;
954  
955  	new = fasync_alloc();
956  	if (!new)
957  		return -ENOMEM;
958  
959  	/*
960  	 * fasync_insert_entry() returns the old (update) entry if
961  	 * it existed.
962  	 *
963  	 * So free the (unused) new entry and return 0 to let the
964  	 * caller know that we didn't add any new fasync entries.
965  	 */
966  	if (fasync_insert_entry(fd, filp, fapp, new)) {
967  		fasync_free(new);
968  		return 0;
969  	}
970  
971  	return 1;
972  }
973  
974  /*
975   * fasync_helper() is used by almost all character device drivers
976   * to set up the fasync queue, and for regular files by the file
977   * lease code. It returns negative on error, 0 if it did no changes
978   * and positive if it added/deleted the entry.
979   */
980  int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
981  {
982  	if (!on)
983  		return fasync_remove_entry(filp, fapp);
984  	return fasync_add_entry(fd, filp, fapp);
985  }
986  
987  EXPORT_SYMBOL(fasync_helper);
988  
989  /*
990   * rcu_read_lock() is held
991   */
992  static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
993  {
994  	while (fa) {
995  		struct fown_struct *fown;
996  
997  		if (fa->magic != FASYNC_MAGIC) {
998  			printk(KERN_ERR "kill_fasync: bad magic number in "
999  			       "fasync_struct!\n");
1000  			return;
1001  		}
1002  		read_lock(&fa->fa_lock);
1003  		if (fa->fa_file) {
1004  			fown = &fa->fa_file->f_owner;
1005  			/* Don't send SIGURG to processes which have not set a
1006  			   queued signum: SIGURG has its own default signalling
1007  			   mechanism. */
1008  			if (!(sig == SIGURG && fown->signum == 0))
1009  				send_sigio(fown, fa->fa_fd, band);
1010  		}
1011  		read_unlock(&fa->fa_lock);
1012  		fa = rcu_dereference(fa->fa_next);
1013  	}
1014  }
1015  
1016  void kill_fasync(struct fasync_struct **fp, int sig, int band)
1017  {
1018  	/* First a quick test without locking: usually
1019  	 * the list is empty.
1020  	 */
1021  	if (*fp) {
1022  		rcu_read_lock();
1023  		kill_fasync_rcu(rcu_dereference(*fp), sig, band);
1024  		rcu_read_unlock();
1025  	}
1026  }
1027  EXPORT_SYMBOL(kill_fasync);
1028  
1029  static int __init fcntl_init(void)
1030  {
1031  	/*
1032  	 * Please add new bits here to ensure allocation uniqueness.
1033  	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
1034  	 * is defined as O_NONBLOCK on some platforms and not on others.
1035  	 */
1036  	BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
1037  		HWEIGHT32(
1038  			(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
1039  			__FMODE_EXEC | __FMODE_NONOTIFY));
1040  
1041  	fasync_cache = kmem_cache_create("fasync_cache",
1042  		sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
1043  	return 0;
1044  }
1045  
1046  module_init(fcntl_init)
1047