xref: /linux/fs/fcntl.c (revision 776cfebb430c7b22c208b1b17add97f354d97cab)
1 /*
2  *  linux/fs/fcntl.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/syscalls.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/fs.h>
11 #include <linux/file.h>
12 #include <linux/dnotify.h>
13 #include <linux/smp_lock.h>
14 #include <linux/slab.h>
15 #include <linux/module.h>
16 #include <linux/security.h>
17 #include <linux/ptrace.h>
18 #include <linux/signal.h>
19 
20 #include <asm/poll.h>
21 #include <asm/siginfo.h>
22 #include <asm/uaccess.h>
23 
24 void fastcall set_close_on_exec(unsigned int fd, int flag)
25 {
26 	struct files_struct *files = current->files;
27 	spin_lock(&files->file_lock);
28 	if (flag)
29 		FD_SET(fd, files->close_on_exec);
30 	else
31 		FD_CLR(fd, files->close_on_exec);
32 	spin_unlock(&files->file_lock);
33 }
34 
35 static inline int get_close_on_exec(unsigned int fd)
36 {
37 	struct files_struct *files = current->files;
38 	int res;
39 	spin_lock(&files->file_lock);
40 	res = FD_ISSET(fd, files->close_on_exec);
41 	spin_unlock(&files->file_lock);
42 	return res;
43 }
44 
45 /*
46  * locate_fd finds a free file descriptor in the open_fds fdset,
47  * expanding the fd arrays if necessary.  Must be called with the
48  * file_lock held for write.
49  */
50 
51 static int locate_fd(struct files_struct *files,
52 			    struct file *file, unsigned int orig_start)
53 {
54 	unsigned int newfd;
55 	unsigned int start;
56 	int error;
57 
58 	error = -EINVAL;
59 	if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
60 		goto out;
61 
62 repeat:
63 	/*
64 	 * Someone might have closed fd's in the range
65 	 * orig_start..files->next_fd
66 	 */
67 	start = orig_start;
68 	if (start < files->next_fd)
69 		start = files->next_fd;
70 
71 	newfd = start;
72 	if (start < files->max_fdset) {
73 		newfd = find_next_zero_bit(files->open_fds->fds_bits,
74 			files->max_fdset, start);
75 	}
76 
77 	error = -EMFILE;
78 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
79 		goto out;
80 
81 	error = expand_files(files, newfd);
82 	if (error < 0)
83 		goto out;
84 
85 	/*
86 	 * If we needed to expand the fs array we
87 	 * might have blocked - try again.
88 	 */
89 	if (error)
90 		goto repeat;
91 
92 	if (start <= files->next_fd)
93 		files->next_fd = newfd + 1;
94 
95 	error = newfd;
96 
97 out:
98 	return error;
99 }
100 
101 static int dupfd(struct file *file, unsigned int start)
102 {
103 	struct files_struct * files = current->files;
104 	int fd;
105 
106 	spin_lock(&files->file_lock);
107 	fd = locate_fd(files, file, start);
108 	if (fd >= 0) {
109 		FD_SET(fd, files->open_fds);
110 		FD_CLR(fd, files->close_on_exec);
111 		spin_unlock(&files->file_lock);
112 		fd_install(fd, file);
113 	} else {
114 		spin_unlock(&files->file_lock);
115 		fput(file);
116 	}
117 
118 	return fd;
119 }
120 
121 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
122 {
123 	int err = -EBADF;
124 	struct file * file, *tofree;
125 	struct files_struct * files = current->files;
126 
127 	spin_lock(&files->file_lock);
128 	if (!(file = fcheck(oldfd)))
129 		goto out_unlock;
130 	err = newfd;
131 	if (newfd == oldfd)
132 		goto out_unlock;
133 	err = -EBADF;
134 	if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
135 		goto out_unlock;
136 	get_file(file);			/* We are now finished with oldfd */
137 
138 	err = expand_files(files, newfd);
139 	if (err < 0)
140 		goto out_fput;
141 
142 	/* To avoid races with open() and dup(), we will mark the fd as
143 	 * in-use in the open-file bitmap throughout the entire dup2()
144 	 * process.  This is quite safe: do_close() uses the fd array
145 	 * entry, not the bitmap, to decide what work needs to be
146 	 * done.  --sct */
147 	/* Doesn't work. open() might be there first. --AV */
148 
149 	/* Yes. It's a race. In user space. Nothing sane to do */
150 	err = -EBUSY;
151 	tofree = files->fd[newfd];
152 	if (!tofree && FD_ISSET(newfd, files->open_fds))
153 		goto out_fput;
154 
155 	files->fd[newfd] = file;
156 	FD_SET(newfd, files->open_fds);
157 	FD_CLR(newfd, files->close_on_exec);
158 	spin_unlock(&files->file_lock);
159 
160 	if (tofree)
161 		filp_close(tofree, files);
162 	err = newfd;
163 out:
164 	return err;
165 out_unlock:
166 	spin_unlock(&files->file_lock);
167 	goto out;
168 
169 out_fput:
170 	spin_unlock(&files->file_lock);
171 	fput(file);
172 	goto out;
173 }
174 
175 asmlinkage long sys_dup(unsigned int fildes)
176 {
177 	int ret = -EBADF;
178 	struct file * file = fget(fildes);
179 
180 	if (file)
181 		ret = dupfd(file, 0);
182 	return ret;
183 }
184 
185 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
186 
187 static int setfl(int fd, struct file * filp, unsigned long arg)
188 {
189 	struct inode * inode = filp->f_dentry->d_inode;
190 	int error = 0;
191 
192 	/* O_APPEND cannot be cleared if the file is marked as append-only */
193 	if (!(arg & O_APPEND) && IS_APPEND(inode))
194 		return -EPERM;
195 
196 	/* O_NOATIME can only be set by the owner or superuser */
197 	if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
198 		if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER))
199 			return -EPERM;
200 
201 	/* required for strict SunOS emulation */
202 	if (O_NONBLOCK != O_NDELAY)
203 	       if (arg & O_NDELAY)
204 		   arg |= O_NONBLOCK;
205 
206 	if (arg & O_DIRECT) {
207 		if (!filp->f_mapping || !filp->f_mapping->a_ops ||
208 			!filp->f_mapping->a_ops->direct_IO)
209 				return -EINVAL;
210 	}
211 
212 	if (filp->f_op && filp->f_op->check_flags)
213 		error = filp->f_op->check_flags(arg);
214 	if (error)
215 		return error;
216 
217 	lock_kernel();
218 	if ((arg ^ filp->f_flags) & FASYNC) {
219 		if (filp->f_op && filp->f_op->fasync) {
220 			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
221 			if (error < 0)
222 				goto out;
223 		}
224 	}
225 
226 	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
227  out:
228 	unlock_kernel();
229 	return error;
230 }
231 
232 static void f_modown(struct file *filp, unsigned long pid,
233                      uid_t uid, uid_t euid, int force)
234 {
235 	write_lock_irq(&filp->f_owner.lock);
236 	if (force || !filp->f_owner.pid) {
237 		filp->f_owner.pid = pid;
238 		filp->f_owner.uid = uid;
239 		filp->f_owner.euid = euid;
240 	}
241 	write_unlock_irq(&filp->f_owner.lock);
242 }
243 
244 int f_setown(struct file *filp, unsigned long arg, int force)
245 {
246 	int err;
247 
248 	err = security_file_set_fowner(filp);
249 	if (err)
250 		return err;
251 
252 	f_modown(filp, arg, current->uid, current->euid, force);
253 	return 0;
254 }
255 
256 EXPORT_SYMBOL(f_setown);
257 
258 void f_delown(struct file *filp)
259 {
260 	f_modown(filp, 0, 0, 0, 1);
261 }
262 
263 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
264 		struct file *filp)
265 {
266 	long err = -EINVAL;
267 
268 	switch (cmd) {
269 	case F_DUPFD:
270 		get_file(filp);
271 		err = dupfd(filp, arg);
272 		break;
273 	case F_GETFD:
274 		err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
275 		break;
276 	case F_SETFD:
277 		err = 0;
278 		set_close_on_exec(fd, arg & FD_CLOEXEC);
279 		break;
280 	case F_GETFL:
281 		err = filp->f_flags;
282 		break;
283 	case F_SETFL:
284 		err = setfl(fd, filp, arg);
285 		break;
286 	case F_GETLK:
287 		err = fcntl_getlk(filp, (struct flock __user *) arg);
288 		break;
289 	case F_SETLK:
290 	case F_SETLKW:
291 		err = fcntl_setlk(filp, cmd, (struct flock __user *) arg);
292 		break;
293 	case F_GETOWN:
294 		/*
295 		 * XXX If f_owner is a process group, the
296 		 * negative return value will get converted
297 		 * into an error.  Oops.  If we keep the
298 		 * current syscall conventions, the only way
299 		 * to fix this will be in libc.
300 		 */
301 		err = filp->f_owner.pid;
302 		force_successful_syscall_return();
303 		break;
304 	case F_SETOWN:
305 		err = f_setown(filp, arg, 1);
306 		break;
307 	case F_GETSIG:
308 		err = filp->f_owner.signum;
309 		break;
310 	case F_SETSIG:
311 		/* arg == 0 restores default behaviour. */
312 		if (!valid_signal(arg)) {
313 			break;
314 		}
315 		err = 0;
316 		filp->f_owner.signum = arg;
317 		break;
318 	case F_GETLEASE:
319 		err = fcntl_getlease(filp);
320 		break;
321 	case F_SETLEASE:
322 		err = fcntl_setlease(fd, filp, arg);
323 		break;
324 	case F_NOTIFY:
325 		err = fcntl_dirnotify(fd, filp, arg);
326 		break;
327 	default:
328 		break;
329 	}
330 	return err;
331 }
332 
333 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
334 {
335 	struct file *filp;
336 	long err = -EBADF;
337 
338 	filp = fget(fd);
339 	if (!filp)
340 		goto out;
341 
342 	err = security_file_fcntl(filp, cmd, arg);
343 	if (err) {
344 		fput(filp);
345 		return err;
346 	}
347 
348 	err = do_fcntl(fd, cmd, arg, filp);
349 
350  	fput(filp);
351 out:
352 	return err;
353 }
354 
355 #if BITS_PER_LONG == 32
356 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
357 {
358 	struct file * filp;
359 	long err;
360 
361 	err = -EBADF;
362 	filp = fget(fd);
363 	if (!filp)
364 		goto out;
365 
366 	err = security_file_fcntl(filp, cmd, arg);
367 	if (err) {
368 		fput(filp);
369 		return err;
370 	}
371 	err = -EBADF;
372 
373 	switch (cmd) {
374 		case F_GETLK64:
375 			err = fcntl_getlk64(filp, (struct flock64 __user *) arg);
376 			break;
377 		case F_SETLK64:
378 		case F_SETLKW64:
379 			err = fcntl_setlk64(filp, cmd, (struct flock64 __user *) arg);
380 			break;
381 		default:
382 			err = do_fcntl(fd, cmd, arg, filp);
383 			break;
384 	}
385 	fput(filp);
386 out:
387 	return err;
388 }
389 #endif
390 
391 /* Table to convert sigio signal codes into poll band bitmaps */
392 
393 static long band_table[NSIGPOLL] = {
394 	POLLIN | POLLRDNORM,			/* POLL_IN */
395 	POLLOUT | POLLWRNORM | POLLWRBAND,	/* POLL_OUT */
396 	POLLIN | POLLRDNORM | POLLMSG,		/* POLL_MSG */
397 	POLLERR,				/* POLL_ERR */
398 	POLLPRI | POLLRDBAND,			/* POLL_PRI */
399 	POLLHUP | POLLERR			/* POLL_HUP */
400 };
401 
402 static inline int sigio_perm(struct task_struct *p,
403                              struct fown_struct *fown, int sig)
404 {
405 	return (((fown->euid == 0) ||
406 		 (fown->euid == p->suid) || (fown->euid == p->uid) ||
407 		 (fown->uid == p->suid) || (fown->uid == p->uid)) &&
408 		!security_file_send_sigiotask(p, fown, sig));
409 }
410 
411 static void send_sigio_to_task(struct task_struct *p,
412 			       struct fown_struct *fown,
413 			       int fd,
414 			       int reason)
415 {
416 	if (!sigio_perm(p, fown, fown->signum))
417 		return;
418 
419 	switch (fown->signum) {
420 		siginfo_t si;
421 		default:
422 			/* Queue a rt signal with the appropriate fd as its
423 			   value.  We use SI_SIGIO as the source, not
424 			   SI_KERNEL, since kernel signals always get
425 			   delivered even if we can't queue.  Failure to
426 			   queue in this case _should_ be reported; we fall
427 			   back to SIGIO in that case. --sct */
428 			si.si_signo = fown->signum;
429 			si.si_errno = 0;
430 		        si.si_code  = reason;
431 			/* Make sure we are called with one of the POLL_*
432 			   reasons, otherwise we could leak kernel stack into
433 			   userspace.  */
434 			if ((reason & __SI_MASK) != __SI_POLL)
435 				BUG();
436 			if (reason - POLL_IN >= NSIGPOLL)
437 				si.si_band  = ~0L;
438 			else
439 				si.si_band = band_table[reason - POLL_IN];
440 			si.si_fd    = fd;
441 			if (!send_group_sig_info(fown->signum, &si, p))
442 				break;
443 		/* fall-through: fall back on the old plain SIGIO signal */
444 		case 0:
445 			send_group_sig_info(SIGIO, SEND_SIG_PRIV, p);
446 	}
447 }
448 
449 void send_sigio(struct fown_struct *fown, int fd, int band)
450 {
451 	struct task_struct *p;
452 	int pid;
453 
454 	read_lock(&fown->lock);
455 	pid = fown->pid;
456 	if (!pid)
457 		goto out_unlock_fown;
458 
459 	read_lock(&tasklist_lock);
460 	if (pid > 0) {
461 		p = find_task_by_pid(pid);
462 		if (p) {
463 			send_sigio_to_task(p, fown, fd, band);
464 		}
465 	} else {
466 		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
467 			send_sigio_to_task(p, fown, fd, band);
468 		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
469 	}
470 	read_unlock(&tasklist_lock);
471  out_unlock_fown:
472 	read_unlock(&fown->lock);
473 }
474 
475 static void send_sigurg_to_task(struct task_struct *p,
476                                 struct fown_struct *fown)
477 {
478 	if (sigio_perm(p, fown, SIGURG))
479 		send_group_sig_info(SIGURG, SEND_SIG_PRIV, p);
480 }
481 
482 int send_sigurg(struct fown_struct *fown)
483 {
484 	struct task_struct *p;
485 	int pid, ret = 0;
486 
487 	read_lock(&fown->lock);
488 	pid = fown->pid;
489 	if (!pid)
490 		goto out_unlock_fown;
491 
492 	ret = 1;
493 
494 	read_lock(&tasklist_lock);
495 	if (pid > 0) {
496 		p = find_task_by_pid(pid);
497 		if (p) {
498 			send_sigurg_to_task(p, fown);
499 		}
500 	} else {
501 		do_each_task_pid(-pid, PIDTYPE_PGID, p) {
502 			send_sigurg_to_task(p, fown);
503 		} while_each_task_pid(-pid, PIDTYPE_PGID, p);
504 	}
505 	read_unlock(&tasklist_lock);
506  out_unlock_fown:
507 	read_unlock(&fown->lock);
508 	return ret;
509 }
510 
511 static DEFINE_RWLOCK(fasync_lock);
512 static kmem_cache_t *fasync_cache;
513 
514 /*
515  * fasync_helper() is used by some character device drivers (mainly mice)
516  * to set up the fasync queue. It returns negative on error, 0 if it did
517  * no changes and positive if it added/deleted the entry.
518  */
519 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
520 {
521 	struct fasync_struct *fa, **fp;
522 	struct fasync_struct *new = NULL;
523 	int result = 0;
524 
525 	if (on) {
526 		new = kmem_cache_alloc(fasync_cache, SLAB_KERNEL);
527 		if (!new)
528 			return -ENOMEM;
529 	}
530 	write_lock_irq(&fasync_lock);
531 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
532 		if (fa->fa_file == filp) {
533 			if(on) {
534 				fa->fa_fd = fd;
535 				kmem_cache_free(fasync_cache, new);
536 			} else {
537 				*fp = fa->fa_next;
538 				kmem_cache_free(fasync_cache, fa);
539 				result = 1;
540 			}
541 			goto out;
542 		}
543 	}
544 
545 	if (on) {
546 		new->magic = FASYNC_MAGIC;
547 		new->fa_file = filp;
548 		new->fa_fd = fd;
549 		new->fa_next = *fapp;
550 		*fapp = new;
551 		result = 1;
552 	}
553 out:
554 	write_unlock_irq(&fasync_lock);
555 	return result;
556 }
557 
558 EXPORT_SYMBOL(fasync_helper);
559 
560 void __kill_fasync(struct fasync_struct *fa, int sig, int band)
561 {
562 	while (fa) {
563 		struct fown_struct * fown;
564 		if (fa->magic != FASYNC_MAGIC) {
565 			printk(KERN_ERR "kill_fasync: bad magic number in "
566 			       "fasync_struct!\n");
567 			return;
568 		}
569 		fown = &fa->fa_file->f_owner;
570 		/* Don't send SIGURG to processes which have not set a
571 		   queued signum: SIGURG has its own default signalling
572 		   mechanism. */
573 		if (!(sig == SIGURG && fown->signum == 0))
574 			send_sigio(fown, fa->fa_fd, band);
575 		fa = fa->fa_next;
576 	}
577 }
578 
579 EXPORT_SYMBOL(__kill_fasync);
580 
581 void kill_fasync(struct fasync_struct **fp, int sig, int band)
582 {
583 	/* First a quick test without locking: usually
584 	 * the list is empty.
585 	 */
586 	if (*fp) {
587 		read_lock(&fasync_lock);
588 		/* reread *fp after obtaining the lock */
589 		__kill_fasync(*fp, sig, band);
590 		read_unlock(&fasync_lock);
591 	}
592 }
593 EXPORT_SYMBOL(kill_fasync);
594 
595 static int __init fasync_init(void)
596 {
597 	fasync_cache = kmem_cache_create("fasync_cache",
598 		sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL, NULL);
599 	return 0;
600 }
601 
602 module_init(fasync_init)
603