xref: /linux/fs/fcntl.c (revision 93d546399c2b7d66a54d5fbd5eee17de19246bf6)
1 /*
2  *  linux/fs/fcntl.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/syscalls.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/fs.h>
11 #include <linux/file.h>
12 #include <linux/fdtable.h>
13 #include <linux/capability.h>
14 #include <linux/dnotify.h>
15 #include <linux/slab.h>
16 #include <linux/module.h>
17 #include <linux/security.h>
18 #include <linux/ptrace.h>
19 #include <linux/signal.h>
20 #include <linux/rcupdate.h>
21 #include <linux/pid_namespace.h>
22 #include <linux/smp_lock.h>
23 
24 #include <asm/poll.h>
25 #include <asm/siginfo.h>
26 #include <asm/uaccess.h>
27 
28 void set_close_on_exec(unsigned int fd, int flag)
29 {
30 	struct files_struct *files = current->files;
31 	struct fdtable *fdt;
32 	spin_lock(&files->file_lock);
33 	fdt = files_fdtable(files);
34 	if (flag)
35 		FD_SET(fd, fdt->close_on_exec);
36 	else
37 		FD_CLR(fd, fdt->close_on_exec);
38 	spin_unlock(&files->file_lock);
39 }
40 
41 static int get_close_on_exec(unsigned int fd)
42 {
43 	struct files_struct *files = current->files;
44 	struct fdtable *fdt;
45 	int res;
46 	rcu_read_lock();
47 	fdt = files_fdtable(files);
48 	res = FD_ISSET(fd, fdt->close_on_exec);
49 	rcu_read_unlock();
50 	return res;
51 }
52 
53 asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
54 {
55 	int err = -EBADF;
56 	struct file * file, *tofree;
57 	struct files_struct * files = current->files;
58 	struct fdtable *fdt;
59 
60 	if ((flags & ~O_CLOEXEC) != 0)
61 		return -EINVAL;
62 
63 	if (unlikely(oldfd == newfd))
64 		return -EINVAL;
65 
66 	spin_lock(&files->file_lock);
67 	err = expand_files(files, newfd);
68 	file = fcheck(oldfd);
69 	if (unlikely(!file))
70 		goto Ebadf;
71 	if (unlikely(err < 0)) {
72 		if (err == -EMFILE)
73 			goto Ebadf;
74 		goto out_unlock;
75 	}
76 	/*
77 	 * We need to detect attempts to do dup2() over allocated but still
78 	 * not finished descriptor.  NB: OpenBSD avoids that at the price of
79 	 * extra work in their equivalent of fget() - they insert struct
80 	 * file immediately after grabbing descriptor, mark it larval if
81 	 * more work (e.g. actual opening) is needed and make sure that
82 	 * fget() treats larval files as absent.  Potentially interesting,
83 	 * but while extra work in fget() is trivial, locking implications
84 	 * and amount of surgery on open()-related paths in VFS are not.
85 	 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
86 	 * deadlocks in rather amusing ways, AFAICS.  All of that is out of
87 	 * scope of POSIX or SUS, since neither considers shared descriptor
88 	 * tables and this condition does not arise without those.
89 	 */
90 	err = -EBUSY;
91 	fdt = files_fdtable(files);
92 	tofree = fdt->fd[newfd];
93 	if (!tofree && FD_ISSET(newfd, fdt->open_fds))
94 		goto out_unlock;
95 	get_file(file);
96 	rcu_assign_pointer(fdt->fd[newfd], file);
97 	FD_SET(newfd, fdt->open_fds);
98 	if (flags & O_CLOEXEC)
99 		FD_SET(newfd, fdt->close_on_exec);
100 	else
101 		FD_CLR(newfd, fdt->close_on_exec);
102 	spin_unlock(&files->file_lock);
103 
104 	if (tofree)
105 		filp_close(tofree, files);
106 
107 	return newfd;
108 
109 Ebadf:
110 	err = -EBADF;
111 out_unlock:
112 	spin_unlock(&files->file_lock);
113 	return err;
114 }
115 
116 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
117 {
118 	if (unlikely(newfd == oldfd)) { /* corner case */
119 		struct files_struct *files = current->files;
120 		rcu_read_lock();
121 		if (!fcheck_files(files, oldfd))
122 			oldfd = -EBADF;
123 		rcu_read_unlock();
124 		return oldfd;
125 	}
126 	return sys_dup3(oldfd, newfd, 0);
127 }
128 
129 asmlinkage long sys_dup(unsigned int fildes)
130 {
131 	int ret = -EBADF;
132 	struct file *file = fget(fildes);
133 
134 	if (file) {
135 		ret = get_unused_fd();
136 		if (ret >= 0)
137 			fd_install(ret, file);
138 		else
139 			fput(file);
140 	}
141 	return ret;
142 }
143 
144 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
145 
146 static int setfl(int fd, struct file * filp, unsigned long arg)
147 {
148 	struct inode * inode = filp->f_path.dentry->d_inode;
149 	int error = 0;
150 
151 	/*
152 	 * O_APPEND cannot be cleared if the file is marked as append-only
153 	 * and the file is open for write.
154 	 */
155 	if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
156 		return -EPERM;
157 
158 	/* O_NOATIME can only be set by the owner or superuser */
159 	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
160 		if (!is_owner_or_cap(inode))
161 			return -EPERM;
162 
163 	/* required for strict SunOS emulation */
164 	if (O_NONBLOCK != O_NDELAY)
165 	       if (arg & O_NDELAY)
166 		   arg |= O_NONBLOCK;
167 
168 	if (arg & O_DIRECT) {
169 		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
170 			!filp->f_mapping->a_ops->direct_IO)
171 				return -EINVAL;
172 	}
173 
174 	if (filp->f_op && filp->f_op->check_flags)
175 		error = filp->f_op->check_flags(arg);
176 	if (error)
177 		return error;
178 
179 	/*
180 	 * We still need a lock here for now to keep multiple FASYNC calls
181 	 * from racing with each other.
182 	 */
183 	lock_kernel();
184 	if ((arg ^ filp->f_flags) & FASYNC) {
185 		if (filp->f_op && filp->f_op->fasync) {
186 			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
187 			if (error < 0)
188 				goto out;
189 		}
190 	}
191 
192 	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
193  out:
194 	unlock_kernel();
195 	return error;
196 }
197 
198 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
199                      uid_t uid, uid_t euid, int force)
200 {
201 	write_lock_irq(&filp->f_owner.lock);
202 	if (force || !filp->f_owner.pid) {
203 		put_pid(filp->f_owner.pid);
204 		filp->f_owner.pid = get_pid(pid);
205 		filp->f_owner.pid_type = type;
206 		filp->f_owner.uid = uid;
207 		filp->f_owner.euid = euid;
208 	}
209 	write_unlock_irq(&filp->f_owner.lock);
210 }
211 
212 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
213 		int force)
214 {
215 	int err;
216 
217 	err = security_file_set_fowner(filp);
218 	if (err)
219 		return err;
220 
221 	f_modown(filp, pid, type, current->uid, current->euid, force);
222 	return 0;
223 }
224 EXPORT_SYMBOL(__f_setown);
225 
226 int f_setown(struct file *filp, unsigned long arg, int force)
227 {
228 	enum pid_type type;
229 	struct pid *pid;
230 	int who = arg;
231 	int result;
232 	type = PIDTYPE_PID;
233 	if (who < 0) {
234 		type = PIDTYPE_PGID;
235 		who = -who;
236 	}
237 	rcu_read_lock();
238 	pid = find_vpid(who);
239 	result = __f_setown(filp, pid, type, force);
240 	rcu_read_unlock();
241 	return result;
242 }
243 EXPORT_SYMBOL(f_setown);
244 
245 void f_delown(struct file *filp)
246 {
247 	f_modown(filp, NULL, PIDTYPE_PID, 0, 0, 1);
248 }
249 
250 pid_t f_getown(struct file *filp)
251 {
252 	pid_t pid;
253 	read_lock(&filp->f_owner.lock);
254 	pid = pid_vnr(filp->f_owner.pid);
255 	if (filp->f_owner.pid_type == PIDTYPE_PGID)
256 		pid = -pid;
257 	read_unlock(&filp->f_owner.lock);
258 	return pid;
259 }
260 
261 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
262 		struct file *filp)
263 {
264 	long err = -EINVAL;
265 
266 	switch (cmd) {
267 	case F_DUPFD:
268 	case F_DUPFD_CLOEXEC:
269 		if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
270 			break;
271 		err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
272 		if (err >= 0) {
273 			get_file(filp);
274 			fd_install(err, filp);
275 		}
276 		break;
277 	case F_GETFD:
278 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
279 		break;
280 	case F_SETFD:
281 		err = 0;
282 		set_close_on_exec(fd, arg & FD_CLOEXEC);
283 		break;
284 	case F_GETFL:
285 		err = filp->f_flags;
286 		break;
287 	case F_SETFL:
288 		err = setfl(fd, filp, arg);
289 		break;
290 	case F_GETLK:
291 		err = fcntl_getlk(filp, (struct flock __user *) arg);
292 		break;
293 	case F_SETLK:
294 	case F_SETLKW:
295 		err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
296 		break;
297 	case F_GETOWN:
298 		/*
299 		 * XXX If f_owner is a process group, the
300 		 * negative return value will get converted
301 		 * into an error.  Oops.  If we keep the
302 		 * current syscall conventions, the only way
303 		 * to fix this will be in libc.
304 		 */
305 		err = f_getown(filp);
306 		force_successful_syscall_return();
307 		break;
308 	case F_SETOWN:
309 		err = f_setown(filp, arg, 1);
310 		break;
311 	case F_GETSIG:
312 		err = filp->f_owner.signum;
313 		break;
314 	case F_SETSIG:
315 		/* arg == 0 restores default behaviour. */
316 		if (!valid_signal(arg)) {
317 			break;
318 		}
319 		err = 0;
320 		filp->f_owner.signum = arg;
321 		break;
322 	case F_GETLEASE:
323 		err = fcntl_getlease(filp);
324 		break;
325 	case F_SETLEASE:
326 		err = fcntl_setlease(fd, filp, arg);
327 		break;
328 	case F_NOTIFY:
329 		err = fcntl_dirnotify(fd, filp, arg);
330 		break;
331 	default:
332 		break;
333 	}
334 	return err;
335 }
336 
337 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
338 {
339 	struct file *filp;
340 	long err = -EBADF;
341 
342 	filp = fget(fd);
343 	if (!filp)
344 		goto out;
345 
346 	err = security_file_fcntl(filp, cmd, arg);
347 	if (err) {
348 		fput(filp);
349 		return err;
350 	}
351 
352 	err = do_fcntl(fd, cmd, arg, filp);
353 
354  	fput(filp);
355 out:
356 	return err;
357 }
358 
359 #if BITS_PER_LONG == 32
360 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
361 {
362 	struct file * filp;
363 	long err;
364 
365 	err = -EBADF;
366 	filp = fget(fd);
367 	if (!filp)
368 		goto out;
369 
370 	err = security_file_fcntl(filp, cmd, arg);
371 	if (err) {
372 		fput(filp);
373 		return err;
374 	}
375 	err = -EBADF;
376 
377 	switch (cmd) {
378 		case F_GETLK64:
379 			err = fcntl_getlk64(filp, (struct flock64 __user *) arg);
380 			break;
381 		case F_SETLK64:
382 		case F_SETLKW64:
383 			err = fcntl_setlk64(fd, filp, cmd,
384 					(struct flock64 __user *) arg);
385 			break;
386 		default:
387 			err = do_fcntl(fd, cmd, arg, filp);
388 			break;
389 	}
390 	fput(filp);
391 out:
392 	return err;
393 }
394 #endif
395 
396 /* Table to convert sigio signal codes into poll band bitmaps */
397 
398 static const long band_table[NSIGPOLL] = {
399 	POLLIN | POLLRDNORM,			/* POLL_IN */
400 	POLLOUT | POLLWRNORM | POLLWRBAND,	/* POLL_OUT */
401 	POLLIN | POLLRDNORM | POLLMSG,		/* POLL_MSG */
402 	POLLERR,				/* POLL_ERR */
403 	POLLPRI | POLLRDBAND,			/* POLL_PRI */
404 	POLLHUP | POLLERR			/* POLL_HUP */
405 };
406 
407 static inline int sigio_perm(struct task_struct *p,
408                              struct fown_struct *fown, int sig)
409 {
410 	return (((fown->euid == 0) ||
411 		 (fown->euid == p->suid) || (fown->euid == p->uid) ||
412 		 (fown->uid == p->suid) || (fown->uid == p->uid)) &&
413 		!security_file_send_sigiotask(p, fown, sig));
414 }
415 
416 static void send_sigio_to_task(struct task_struct *p,
417 			       struct fown_struct *fown,
418 			       int fd,
419 			       int reason)
420 {
421 	if (!sigio_perm(p, fown, fown->signum))
422 		return;
423 
424 	switch (fown->signum) {
425 		siginfo_t si;
426 		default:
427 			/* Queue a rt signal with the appropriate fd as its
428 			   value.  We use SI_SIGIO as the source, not
429 			   SI_KERNEL, since kernel signals always get
430 			   delivered even if we can't queue.  Failure to
431 			   queue in this case _should_ be reported; we fall
432 			   back to SIGIO in that case. --sct */
433 			si.si_signo = fown->signum;
434 			si.si_errno = 0;
435 		        si.si_code  = reason;
436 			/* Make sure we are called with one of the POLL_*
437 			   reasons, otherwise we could leak kernel stack into
438 			   userspace.  */
439 			BUG_ON((reason & __SI_MASK) != __SI_POLL);
440 			if (reason - POLL_IN >= NSIGPOLL)
441 				si.si_band  = ~0L;
442 			else
443 				si.si_band = band_table[reason - POLL_IN];
444 			si.si_fd    = fd;
445 			if (!group_send_sig_info(fown->signum, &si, p))
446 				break;
447 		/* fall-through: fall back on the old plain SIGIO signal */
448 		case 0:
449 			group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
450 	}
451 }
452 
453 void send_sigio(struct fown_struct *fown, int fd, int band)
454 {
455 	struct task_struct *p;
456 	enum pid_type type;
457 	struct pid *pid;
458 
459 	read_lock(&fown->lock);
460 	type = fown->pid_type;
461 	pid = fown->pid;
462 	if (!pid)
463 		goto out_unlock_fown;
464 
465 	read_lock(&tasklist_lock);
466 	do_each_pid_task(pid, type, p) {
467 		send_sigio_to_task(p, fown, fd, band);
468 	} while_each_pid_task(pid, type, p);
469 	read_unlock(&tasklist_lock);
470  out_unlock_fown:
471 	read_unlock(&fown->lock);
472 }
473 
474 static void send_sigurg_to_task(struct task_struct *p,
475                                 struct fown_struct *fown)
476 {
477 	if (sigio_perm(p, fown, SIGURG))
478 		group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
479 }
480 
481 int send_sigurg(struct fown_struct *fown)
482 {
483 	struct task_struct *p;
484 	enum pid_type type;
485 	struct pid *pid;
486 	int ret = 0;
487 
488 	read_lock(&fown->lock);
489 	type = fown->pid_type;
490 	pid = fown->pid;
491 	if (!pid)
492 		goto out_unlock_fown;
493 
494 	ret = 1;
495 
496 	read_lock(&tasklist_lock);
497 	do_each_pid_task(pid, type, p) {
498 		send_sigurg_to_task(p, fown);
499 	} while_each_pid_task(pid, type, p);
500 	read_unlock(&tasklist_lock);
501  out_unlock_fown:
502 	read_unlock(&fown->lock);
503 	return ret;
504 }
505 
506 static DEFINE_RWLOCK(fasync_lock);
507 static struct kmem_cache *fasync_cache __read_mostly;
508 
509 /*
510  * fasync_helper() is used by some character device drivers (mainly mice)
511  * to set up the fasync queue. It returns negative on error, 0 if it did
512  * no changes and positive if it added/deleted the entry.
513  */
514 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
515 {
516 	struct fasync_struct *fa, **fp;
517 	struct fasync_struct *new = NULL;
518 	int result = 0;
519 
520 	if (on) {
521 		new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
522 		if (!new)
523 			return -ENOMEM;
524 	}
525 	write_lock_irq(&fasync_lock);
526 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
527 		if (fa->fa_file == filp) {
528 			if(on) {
529 				fa->fa_fd = fd;
530 				kmem_cache_free(fasync_cache, new);
531 			} else {
532 				*fp = fa->fa_next;
533 				kmem_cache_free(fasync_cache, fa);
534 				result = 1;
535 			}
536 			goto out;
537 		}
538 	}
539 
540 	if (on) {
541 		new->magic = FASYNC_MAGIC;
542 		new->fa_file = filp;
543 		new->fa_fd = fd;
544 		new->fa_next = *fapp;
545 		*fapp = new;
546 		result = 1;
547 	}
548 out:
549 	write_unlock_irq(&fasync_lock);
550 	return result;
551 }
552 
553 EXPORT_SYMBOL(fasync_helper);
554 
555 void __kill_fasync(struct fasync_struct *fa, int sig, int band)
556 {
557 	while (fa) {
558 		struct fown_struct * fown;
559 		if (fa->magic != FASYNC_MAGIC) {
560 			printk(KERN_ERR "kill_fasync: bad magic number in "
561 			       "fasync_struct!\n");
562 			return;
563 		}
564 		fown = &fa->fa_file->f_owner;
565 		/* Don't send SIGURG to processes which have not set a
566 		   queued signum: SIGURG has its own default signalling
567 		   mechanism. */
568 		if (!(sig == SIGURG && fown->signum == 0))
569 			send_sigio(fown, fa->fa_fd, band);
570 		fa = fa->fa_next;
571 	}
572 }
573 
574 EXPORT_SYMBOL(__kill_fasync);
575 
576 void kill_fasync(struct fasync_struct **fp, int sig, int band)
577 {
578 	/* First a quick test without locking: usually
579 	 * the list is empty.
580 	 */
581 	if (*fp) {
582 		read_lock(&fasync_lock);
583 		/* reread *fp after obtaining the lock */
584 		__kill_fasync(*fp, sig, band);
585 		read_unlock(&fasync_lock);
586 	}
587 }
588 EXPORT_SYMBOL(kill_fasync);
589 
590 static int __init fasync_init(void)
591 {
592 	fasync_cache = kmem_cache_create("fasync_cache",
593 		sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
594 	return 0;
595 }
596 
597 module_init(fasync_init)
598