xref: /linux/fs/fcntl.c (revision f7511d5f66f01fc451747b24e79f3ada7a3af9af)
1 /*
2  *  linux/fs/fcntl.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/syscalls.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/fs.h>
11 #include <linux/file.h>
12 #include <linux/capability.h>
13 #include <linux/dnotify.h>
14 #include <linux/smp_lock.h>
15 #include <linux/slab.h>
16 #include <linux/module.h>
17 #include <linux/security.h>
18 #include <linux/ptrace.h>
19 #include <linux/signal.h>
20 #include <linux/rcupdate.h>
21 #include <linux/pid_namespace.h>
22 
23 #include <asm/poll.h>
24 #include <asm/siginfo.h>
25 #include <asm/uaccess.h>
26 
27 void set_close_on_exec(unsigned int fd, int flag)
28 {
29 	struct files_struct *files = current->files;
30 	struct fdtable *fdt;
31 	spin_lock(&files->file_lock);
32 	fdt = files_fdtable(files);
33 	if (flag)
34 		FD_SET(fd, fdt->close_on_exec);
35 	else
36 		FD_CLR(fd, fdt->close_on_exec);
37 	spin_unlock(&files->file_lock);
38 }
39 
40 static int get_close_on_exec(unsigned int fd)
41 {
42 	struct files_struct *files = current->files;
43 	struct fdtable *fdt;
44 	int res;
45 	rcu_read_lock();
46 	fdt = files_fdtable(files);
47 	res = FD_ISSET(fd, fdt->close_on_exec);
48 	rcu_read_unlock();
49 	return res;
50 }
51 
52 /*
53  * locate_fd finds a free file descriptor in the open_fds fdset,
54  * expanding the fd arrays if necessary.  Must be called with the
55  * file_lock held for write.
56  */
57 
58 static int locate_fd(unsigned int orig_start, int cloexec)
59 {
60 	struct files_struct *files = current->files;
61 	unsigned int newfd;
62 	unsigned int start;
63 	int error;
64 	struct fdtable *fdt;
65 
66 	spin_lock(&files->file_lock);
67 
68 	error = -EINVAL;
69 	if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
70 		goto out;
71 
72 repeat:
73 	fdt = files_fdtable(files);
74 	/*
75 	 * Someone might have closed fd's in the range
76 	 * orig_start..fdt->next_fd
77 	 */
78 	start = orig_start;
79 	if (start < files->next_fd)
80 		start = files->next_fd;
81 
82 	newfd = start;
83 	if (start < fdt->max_fds)
84 		newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
85 					   fdt->max_fds, start);
86 
87 	error = -EMFILE;
88 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
89 		goto out;
90 
91 	error = expand_files(files, newfd);
92 	if (error < 0)
93 		goto out;
94 
95 	/*
96 	 * If we needed to expand the fs array we
97 	 * might have blocked - try again.
98 	 */
99 	if (error)
100 		goto repeat;
101 
102 	if (start <= files->next_fd)
103 		files->next_fd = newfd + 1;
104 
105 	FD_SET(newfd, fdt->open_fds);
106 	if (cloexec)
107 		FD_SET(newfd, fdt->close_on_exec);
108 	else
109 		FD_CLR(newfd, fdt->close_on_exec);
110 	error = newfd;
111 
112 out:
113 	spin_unlock(&files->file_lock);
114 	return error;
115 }
116 
117 static int dupfd(struct file *file, unsigned int start, int cloexec)
118 {
119 	int fd = locate_fd(start, cloexec);
120 	if (fd >= 0)
121 		fd_install(fd, file);
122 	else
123 		fput(file);
124 
125 	return fd;
126 }
127 
128 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
129 {
130 	int err = -EBADF;
131 	struct file * file, *tofree;
132 	struct files_struct * files = current->files;
133 	struct fdtable *fdt;
134 
135 	spin_lock(&files->file_lock);
136 	if (!(file = fcheck(oldfd)))
137 		goto out_unlock;
138 	err = newfd;
139 	if (newfd == oldfd)
140 		goto out_unlock;
141 	err = -EBADF;
142 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
143 		goto out_unlock;
144 	get_file(file);			/* We are now finished with oldfd */
145 
146 	err = expand_files(files, newfd);
147 	if (err < 0)
148 		goto out_fput;
149 
150 	/* To avoid races with open() and dup(), we will mark the fd as
151 	 * in-use in the open-file bitmap throughout the entire dup2()
152 	 * process.  This is quite safe: do_close() uses the fd array
153 	 * entry, not the bitmap, to decide what work needs to be
154 	 * done.  --sct */
155 	/* Doesn't work. open() might be there first. --AV */
156 
157 	/* Yes. It's a race. In user space. Nothing sane to do */
158 	err = -EBUSY;
159 	fdt = files_fdtable(files);
160 	tofree = fdt->fd[newfd];
161 	if (!tofree && FD_ISSET(newfd, fdt->open_fds))
162 		goto out_fput;
163 
164 	rcu_assign_pointer(fdt->fd[newfd], file);
165 	FD_SET(newfd, fdt->open_fds);
166 	FD_CLR(newfd, fdt->close_on_exec);
167 	spin_unlock(&files->file_lock);
168 
169 	if (tofree)
170 		filp_close(tofree, files);
171 	err = newfd;
172 out:
173 	return err;
174 out_unlock:
175 	spin_unlock(&files->file_lock);
176 	goto out;
177 
178 out_fput:
179 	spin_unlock(&files->file_lock);
180 	fput(file);
181 	goto out;
182 }
183 
184 asmlinkage long sys_dup(unsigned int fildes)
185 {
186 	int ret = -EBADF;
187 	struct file * file = fget(fildes);
188 
189 	if (file)
190 		ret = dupfd(file, 0, 0);
191 	return ret;
192 }
193 
194 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
195 
196 static int setfl(int fd, struct file * filp, unsigned long arg)
197 {
198 	struct inode * inode = filp->f_path.dentry->d_inode;
199 	int error = 0;
200 
201 	/*
202 	 * O_APPEND cannot be cleared if the file is marked as append-only
203 	 * and the file is open for write.
204 	 */
205 	if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
206 		return -EPERM;
207 
208 	/* O_NOATIME can only be set by the owner or superuser */
209 	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
210 		if (!is_owner_or_cap(inode))
211 			return -EPERM;
212 
213 	/* required for strict SunOS emulation */
214 	if (O_NONBLOCK != O_NDELAY)
215 	       if (arg & O_NDELAY)
216 		   arg |= O_NONBLOCK;
217 
218 	if (arg & O_DIRECT) {
219 		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
220 			!filp->f_mapping->a_ops->direct_IO)
221 				return -EINVAL;
222 	}
223 
224 	if (filp->f_op && filp->f_op->check_flags)
225 		error = filp->f_op->check_flags(arg);
226 	if (error)
227 		return error;
228 
229 	lock_kernel();
230 	if ((arg ^ filp->f_flags) & FASYNC) {
231 		if (filp->f_op && filp->f_op->fasync) {
232 			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
233 			if (error < 0)
234 				goto out;
235 		}
236 	}
237 
238 	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
239  out:
240 	unlock_kernel();
241 	return error;
242 }
243 
244 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
245                      uid_t uid, uid_t euid, int force)
246 {
247 	write_lock_irq(&filp->f_owner.lock);
248 	if (force || !filp->f_owner.pid) {
249 		put_pid(filp->f_owner.pid);
250 		filp->f_owner.pid = get_pid(pid);
251 		filp->f_owner.pid_type = type;
252 		filp->f_owner.uid = uid;
253 		filp->f_owner.euid = euid;
254 	}
255 	write_unlock_irq(&filp->f_owner.lock);
256 }
257 
258 int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
259 		int force)
260 {
261 	int err;
262 
263 	err = security_file_set_fowner(filp);
264 	if (err)
265 		return err;
266 
267 	f_modown(filp, pid, type, current->uid, current->euid, force);
268 	return 0;
269 }
270 EXPORT_SYMBOL(__f_setown);
271 
272 int f_setown(struct file *filp, unsigned long arg, int force)
273 {
274 	enum pid_type type;
275 	struct pid *pid;
276 	int who = arg;
277 	int result;
278 	type = PIDTYPE_PID;
279 	if (who < 0) {
280 		type = PIDTYPE_PGID;
281 		who = -who;
282 	}
283 	rcu_read_lock();
284 	pid = find_vpid(who);
285 	result = __f_setown(filp, pid, type, force);
286 	rcu_read_unlock();
287 	return result;
288 }
289 EXPORT_SYMBOL(f_setown);
290 
291 void f_delown(struct file *filp)
292 {
293 	f_modown(filp, NULL, PIDTYPE_PID, 0, 0, 1);
294 }
295 
296 pid_t f_getown(struct file *filp)
297 {
298 	pid_t pid;
299 	read_lock(&filp->f_owner.lock);
300 	pid = pid_vnr(filp->f_owner.pid);
301 	if (filp->f_owner.pid_type == PIDTYPE_PGID)
302 		pid = -pid;
303 	read_unlock(&filp->f_owner.lock);
304 	return pid;
305 }
306 
307 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
308 		struct file *filp)
309 {
310 	long err = -EINVAL;
311 
312 	switch (cmd) {
313 	case F_DUPFD:
314 	case F_DUPFD_CLOEXEC:
315 		get_file(filp);
316 		err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC);
317 		break;
318 	case F_GETFD:
319 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
320 		break;
321 	case F_SETFD:
322 		err = 0;
323 		set_close_on_exec(fd, arg & FD_CLOEXEC);
324 		break;
325 	case F_GETFL:
326 		err = filp->f_flags;
327 		break;
328 	case F_SETFL:
329 		err = setfl(fd, filp, arg);
330 		break;
331 	case F_GETLK:
332 		err = fcntl_getlk(filp, (struct flock __user *) arg);
333 		break;
334 	case F_SETLK:
335 	case F_SETLKW:
336 		err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg);
337 		break;
338 	case F_GETOWN:
339 		/*
340 		 * XXX If f_owner is a process group, the
341 		 * negative return value will get converted
342 		 * into an error.  Oops.  If we keep the
343 		 * current syscall conventions, the only way
344 		 * to fix this will be in libc.
345 		 */
346 		err = f_getown(filp);
347 		force_successful_syscall_return();
348 		break;
349 	case F_SETOWN:
350 		err = f_setown(filp, arg, 1);
351 		break;
352 	case F_GETSIG:
353 		err = filp->f_owner.signum;
354 		break;
355 	case F_SETSIG:
356 		/* arg == 0 restores default behaviour. */
357 		if (!valid_signal(arg)) {
358 			break;
359 		}
360 		err = 0;
361 		filp->f_owner.signum = arg;
362 		break;
363 	case F_GETLEASE:
364 		err = fcntl_getlease(filp);
365 		break;
366 	case F_SETLEASE:
367 		err = fcntl_setlease(fd, filp, arg);
368 		break;
369 	case F_NOTIFY:
370 		err = fcntl_dirnotify(fd, filp, arg);
371 		break;
372 	default:
373 		break;
374 	}
375 	return err;
376 }
377 
378 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
379 {
380 	struct file *filp;
381 	long err = -EBADF;
382 
383 	filp = fget(fd);
384 	if (!filp)
385 		goto out;
386 
387 	err = security_file_fcntl(filp, cmd, arg);
388 	if (err) {
389 		fput(filp);
390 		return err;
391 	}
392 
393 	err = do_fcntl(fd, cmd, arg, filp);
394 
395  	fput(filp);
396 out:
397 	return err;
398 }
399 
400 #if BITS_PER_LONG == 32
401 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
402 {
403 	struct file * filp;
404 	long err;
405 
406 	err = -EBADF;
407 	filp = fget(fd);
408 	if (!filp)
409 		goto out;
410 
411 	err = security_file_fcntl(filp, cmd, arg);
412 	if (err) {
413 		fput(filp);
414 		return err;
415 	}
416 	err = -EBADF;
417 
418 	switch (cmd) {
419 		case F_GETLK64:
420 			err = fcntl_getlk64(filp, (struct flock64 __user *) arg);
421 			break;
422 		case F_SETLK64:
423 		case F_SETLKW64:
424 			err = fcntl_setlk64(fd, filp, cmd,
425 					(struct flock64 __user *) arg);
426 			break;
427 		default:
428 			err = do_fcntl(fd, cmd, arg, filp);
429 			break;
430 	}
431 	fput(filp);
432 out:
433 	return err;
434 }
435 #endif
436 
437 /* Table to convert sigio signal codes into poll band bitmaps */
438 
439 static const long band_table[NSIGPOLL] = {
440 	POLLIN | POLLRDNORM,			/* POLL_IN */
441 	POLLOUT | POLLWRNORM | POLLWRBAND,	/* POLL_OUT */
442 	POLLIN | POLLRDNORM | POLLMSG,		/* POLL_MSG */
443 	POLLERR,				/* POLL_ERR */
444 	POLLPRI | POLLRDBAND,			/* POLL_PRI */
445 	POLLHUP | POLLERR			/* POLL_HUP */
446 };
447 
448 static inline int sigio_perm(struct task_struct *p,
449                              struct fown_struct *fown, int sig)
450 {
451 	return (((fown->euid == 0) ||
452 		 (fown->euid == p->suid) || (fown->euid == p->uid) ||
453 		 (fown->uid == p->suid) || (fown->uid == p->uid)) &&
454 		!security_file_send_sigiotask(p, fown, sig));
455 }
456 
457 static void send_sigio_to_task(struct task_struct *p,
458 			       struct fown_struct *fown,
459 			       int fd,
460 			       int reason)
461 {
462 	if (!sigio_perm(p, fown, fown->signum))
463 		return;
464 
465 	switch (fown->signum) {
466 		siginfo_t si;
467 		default:
468 			/* Queue a rt signal with the appropriate fd as its
469 			   value.  We use SI_SIGIO as the source, not
470 			   SI_KERNEL, since kernel signals always get
471 			   delivered even if we can't queue.  Failure to
472 			   queue in this case _should_ be reported; we fall
473 			   back to SIGIO in that case. --sct */
474 			si.si_signo = fown->signum;
475 			si.si_errno = 0;
476 		        si.si_code  = reason;
477 			/* Make sure we are called with one of the POLL_*
478 			   reasons, otherwise we could leak kernel stack into
479 			   userspace.  */
480 			BUG_ON((reason & __SI_MASK) != __SI_POLL);
481 			if (reason - POLL_IN >= NSIGPOLL)
482 				si.si_band  = ~0L;
483 			else
484 				si.si_band = band_table[reason - POLL_IN];
485 			si.si_fd    = fd;
486 			if (!group_send_sig_info(fown->signum, &si, p))
487 				break;
488 		/* fall-through: fall back on the old plain SIGIO signal */
489 		case 0:
490 			group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
491 	}
492 }
493 
494 void send_sigio(struct fown_struct *fown, int fd, int band)
495 {
496 	struct task_struct *p;
497 	enum pid_type type;
498 	struct pid *pid;
499 
500 	read_lock(&fown->lock);
501 	type = fown->pid_type;
502 	pid = fown->pid;
503 	if (!pid)
504 		goto out_unlock_fown;
505 
506 	read_lock(&tasklist_lock);
507 	do_each_pid_task(pid, type, p) {
508 		send_sigio_to_task(p, fown, fd, band);
509 	} while_each_pid_task(pid, type, p);
510 	read_unlock(&tasklist_lock);
511  out_unlock_fown:
512 	read_unlock(&fown->lock);
513 }
514 
515 static void send_sigurg_to_task(struct task_struct *p,
516                                 struct fown_struct *fown)
517 {
518 	if (sigio_perm(p, fown, SIGURG))
519 		group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
520 }
521 
522 int send_sigurg(struct fown_struct *fown)
523 {
524 	struct task_struct *p;
525 	enum pid_type type;
526 	struct pid *pid;
527 	int ret = 0;
528 
529 	read_lock(&fown->lock);
530 	type = fown->pid_type;
531 	pid = fown->pid;
532 	if (!pid)
533 		goto out_unlock_fown;
534 
535 	ret = 1;
536 
537 	read_lock(&tasklist_lock);
538 	do_each_pid_task(pid, type, p) {
539 		send_sigurg_to_task(p, fown);
540 	} while_each_pid_task(pid, type, p);
541 	read_unlock(&tasklist_lock);
542  out_unlock_fown:
543 	read_unlock(&fown->lock);
544 	return ret;
545 }
546 
547 static DEFINE_RWLOCK(fasync_lock);
548 static struct kmem_cache *fasync_cache __read_mostly;
549 
550 /*
551  * fasync_helper() is used by some character device drivers (mainly mice)
552  * to set up the fasync queue. It returns negative on error, 0 if it did
553  * no changes and positive if it added/deleted the entry.
554  */
555 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
556 {
557 	struct fasync_struct *fa, **fp;
558 	struct fasync_struct *new = NULL;
559 	int result = 0;
560 
561 	if (on) {
562 		new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
563 		if (!new)
564 			return -ENOMEM;
565 	}
566 	write_lock_irq(&fasync_lock);
567 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
568 		if (fa->fa_file == filp) {
569 			if(on) {
570 				fa->fa_fd = fd;
571 				kmem_cache_free(fasync_cache, new);
572 			} else {
573 				*fp = fa->fa_next;
574 				kmem_cache_free(fasync_cache, fa);
575 				result = 1;
576 			}
577 			goto out;
578 		}
579 	}
580 
581 	if (on) {
582 		new->magic = FASYNC_MAGIC;
583 		new->fa_file = filp;
584 		new->fa_fd = fd;
585 		new->fa_next = *fapp;
586 		*fapp = new;
587 		result = 1;
588 	}
589 out:
590 	write_unlock_irq(&fasync_lock);
591 	return result;
592 }
593 
594 EXPORT_SYMBOL(fasync_helper);
595 
596 void __kill_fasync(struct fasync_struct *fa, int sig, int band)
597 {
598 	while (fa) {
599 		struct fown_struct * fown;
600 		if (fa->magic != FASYNC_MAGIC) {
601 			printk(KERN_ERR "kill_fasync: bad magic number in "
602 			       "fasync_struct!\n");
603 			return;
604 		}
605 		fown = &fa->fa_file->f_owner;
606 		/* Don't send SIGURG to processes which have not set a
607 		   queued signum: SIGURG has its own default signalling
608 		   mechanism. */
609 		if (!(sig == SIGURG && fown->signum == 0))
610 			send_sigio(fown, fa->fa_fd, band);
611 		fa = fa->fa_next;
612 	}
613 }
614 
615 EXPORT_SYMBOL(__kill_fasync);
616 
617 void kill_fasync(struct fasync_struct **fp, int sig, int band)
618 {
619 	/* First a quick test without locking: usually
620 	 * the list is empty.
621 	 */
622 	if (*fp) {
623 		read_lock(&fasync_lock);
624 		/* reread *fp after obtaining the lock */
625 		__kill_fasync(*fp, sig, band);
626 		read_unlock(&fasync_lock);
627 	}
628 }
629 EXPORT_SYMBOL(kill_fasync);
630 
631 static int __init fasync_init(void)
632 {
633 	fasync_cache = kmem_cache_create("fasync_cache",
634 		sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
635 	return 0;
636 }
637 
638 module_init(fasync_init)
639