xref: /freebsd/sys/kern/kern_descrip.c (revision eacee0ff7ec955b32e09515246bd97b6edcd2b0f)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mutex.h>
49 #include <sys/sysproto.h>
50 #include <sys/conf.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/sysctl.h>
54 #include <sys/vnode.h>
55 #include <sys/proc.h>
56 #include <sys/file.h>
57 #include <sys/stat.h>
58 #include <sys/filio.h>
59 #include <sys/fcntl.h>
60 #include <sys/unistd.h>
61 #include <sys/resourcevar.h>
62 #include <sys/event.h>
63 #include <sys/sx.h>
64 #include <sys/socketvar.h>
65 
66 #include <machine/limits.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_extern.h>
70 
71 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
72 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
73 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
74 
75 static	 d_open_t  fdopen;
76 #define NUMFDESC 64
77 
78 #define CDEV_MAJOR 22
79 static struct cdevsw fildesc_cdevsw = {
80 	/* open */	fdopen,
81 	/* close */	noclose,
82 	/* read */	noread,
83 	/* write */	nowrite,
84 	/* ioctl */	noioctl,
85 	/* poll */	nopoll,
86 	/* mmap */	nommap,
87 	/* strategy */	nostrategy,
88 	/* name */	"FD",
89 	/* maj */	CDEV_MAJOR,
90 	/* dump */	nodump,
91 	/* psize */	nopsize,
92 	/* flags */	0,
93 };
94 
95 static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct thread *td));
96 static int badfo_readwrite __P((struct file *fp, struct uio *uio,
97     struct ucred *cred, int flags, struct thread *td));
98 static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
99     struct thread *td));
100 static int badfo_poll __P((struct file *fp, int events,
101     struct ucred *cred, struct thread *td));
102 static int badfo_kqfilter __P((struct file *fp, struct knote *kn));
103 static int badfo_stat __P((struct file *fp, struct stat *sb, struct thread *td));
104 static int badfo_close __P((struct file *fp, struct thread *td));
105 
106 /*
107  * Descriptor management.
108  */
109 struct filelist filehead;	/* head of list of open files */
110 int nfiles;			/* actual number of open files */
111 extern int cmask;
112 struct sx filelist_lock;	/* sx to protect filelist */
113 
114 /*
115  * System calls on descriptors.
116  */
117 #ifndef _SYS_SYSPROTO_H_
118 struct getdtablesize_args {
119 	int	dummy;
120 };
121 #endif
122 /*
123  * MPSAFE
124  */
125 /* ARGSUSED */
126 int
127 getdtablesize(td, uap)
128 	struct thread *td;
129 	struct getdtablesize_args *uap;
130 {
131 	struct proc *p = td->td_proc;
132 
133 	mtx_lock(&Giant);
134 	td->td_retval[0] =
135 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
136 	mtx_unlock(&Giant);
137 	return (0);
138 }
139 
140 /*
141  * Duplicate a file descriptor to a particular value.
142  *
143  * note: keep in mind that a potential race condition exists when closing
144  * descriptors from a shared descriptor table (via rfork).
145  */
146 #ifndef _SYS_SYSPROTO_H_
147 struct dup2_args {
148 	u_int	from;
149 	u_int	to;
150 };
151 #endif
152 /*
153  * MPSAFE
154  */
155 /* ARGSUSED */
156 int
157 dup2(td, uap)
158 	struct thread *td;
159 	struct dup2_args *uap;
160 {
161 	struct proc *p = td->td_proc;
162 	register struct filedesc *fdp = td->td_proc->p_fd;
163 	register u_int old = uap->from, new = uap->to;
164 	int i, error;
165 
166 	FILEDESC_LOCK(fdp);
167 retry:
168 	if (old >= fdp->fd_nfiles ||
169 	    fdp->fd_ofiles[old] == NULL ||
170 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
171 	    new >= maxfilesperproc) {
172 		FILEDESC_UNLOCK(fdp);
173 		return (EBADF);
174 	}
175 	if (old == new) {
176 		td->td_retval[0] = new;
177 		FILEDESC_UNLOCK(fdp);
178 		return (0);
179 	}
180 	if (new >= fdp->fd_nfiles) {
181 		if ((error = fdalloc(td, new, &i))) {
182 			FILEDESC_UNLOCK(fdp);
183 			return (error);
184 		}
185 		/*
186 		 * fdalloc() may block, retest everything.
187 		 */
188 		goto retry;
189 	}
190 	error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
191 	return(error);
192 }
193 
194 /*
195  * Duplicate a file descriptor.
196  */
197 #ifndef _SYS_SYSPROTO_H_
198 struct dup_args {
199 	u_int	fd;
200 };
201 #endif
202 /*
203  * MPSAFE
204  */
205 /* ARGSUSED */
206 int
207 dup(td, uap)
208 	struct thread *td;
209 	struct dup_args *uap;
210 {
211 	register struct filedesc *fdp;
212 	u_int old;
213 	int new, error;
214 
215 	old = uap->fd;
216 	fdp = td->td_proc->p_fd;
217 	FILEDESC_LOCK(fdp);
218 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
219 		FILEDESC_UNLOCK(fdp);
220 		return (EBADF);
221 	}
222 	if ((error = fdalloc(td, 0, &new))) {
223 		FILEDESC_UNLOCK(fdp);
224 		return (error);
225 	}
226 	error = do_dup(fdp, (int)old, new, td->td_retval, td);
227 	return (error);
228 }
229 
230 /*
231  * The file control system call.
232  */
233 #ifndef _SYS_SYSPROTO_H_
234 struct fcntl_args {
235 	int	fd;
236 	int	cmd;
237 	long	arg;
238 };
239 #endif
240 /*
241  * MPSAFE
242  */
243 /* ARGSUSED */
244 int
245 fcntl(td, uap)
246 	struct thread *td;
247 	register struct fcntl_args *uap;
248 {
249 	register struct proc *p = td->td_proc;
250 	register struct filedesc *fdp;
251 	register struct file *fp;
252 	register char *pop;
253 	struct vnode *vp;
254 	int i, tmp, error = 0, flg = F_POSIX;
255 	struct flock fl;
256 	u_int newmin;
257 	struct proc *leaderp;
258 
259 	mtx_lock(&Giant);
260 
261 	fdp = p->p_fd;
262 	FILEDESC_LOCK(fdp);
263 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
264 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
265 		FILEDESC_UNLOCK(fdp);
266 		error = EBADF;
267 		goto done2;
268 	}
269 	pop = &fdp->fd_ofileflags[uap->fd];
270 
271 	switch (uap->cmd) {
272 	case F_DUPFD:
273 		newmin = uap->arg;
274 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
275 		    newmin >= maxfilesperproc) {
276 			FILEDESC_UNLOCK(fdp);
277 			error = EINVAL;
278 			break;
279 		}
280 		if ((error = fdalloc(td, newmin, &i))) {
281 			FILEDESC_UNLOCK(fdp);
282 			break;
283 		}
284 		error = do_dup(fdp, uap->fd, i, td->td_retval, td);
285 		break;
286 
287 	case F_GETFD:
288 		td->td_retval[0] = *pop & 1;
289 		FILEDESC_UNLOCK(fdp);
290 		break;
291 
292 	case F_SETFD:
293 		*pop = (*pop &~ 1) | (uap->arg & 1);
294 		FILEDESC_UNLOCK(fdp);
295 		break;
296 
297 	case F_GETFL:
298 		FILE_LOCK(fp);
299 		FILEDESC_UNLOCK(fdp);
300 		td->td_retval[0] = OFLAGS(fp->f_flag);
301 		FILE_UNLOCK(fp);
302 		break;
303 
304 	case F_SETFL:
305 		fhold(fp);
306 		FILEDESC_UNLOCK(fdp);
307 		fp->f_flag &= ~FCNTLFLAGS;
308 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
309 		tmp = fp->f_flag & FNONBLOCK;
310 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
311 		if (error) {
312 			fdrop(fp, td);
313 			break;
314 		}
315 		tmp = fp->f_flag & FASYNC;
316 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
317 		if (!error) {
318 			fdrop(fp, td);
319 			break;
320 		}
321 		fp->f_flag &= ~FNONBLOCK;
322 		tmp = 0;
323 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
324 		fdrop(fp, td);
325 		break;
326 
327 	case F_GETOWN:
328 		fhold(fp);
329 		FILEDESC_UNLOCK(fdp);
330 		error = fo_ioctl(fp, FIOGETOWN, (caddr_t)td->td_retval, td);
331 		fdrop(fp, td);
332 		break;
333 
334 	case F_SETOWN:
335 		fhold(fp);
336 		FILEDESC_UNLOCK(fdp);
337 		error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td);
338 		fdrop(fp, td);
339 		break;
340 
341 	case F_SETLKW:
342 		flg |= F_WAIT;
343 		/* Fall into F_SETLK */
344 
345 	case F_SETLK:
346 		if (fp->f_type != DTYPE_VNODE) {
347 			FILEDESC_UNLOCK(fdp);
348 			error = EBADF;
349 			break;
350 		}
351 		vp = (struct vnode *)fp->f_data;
352 		/*
353 		 * copyin/lockop may block
354 		 */
355 		fhold(fp);
356 		FILEDESC_UNLOCK(fdp);
357 		vp = (struct vnode *)fp->f_data;
358 
359 		/* Copy in the lock structure */
360 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
361 		    sizeof(fl));
362 		if (error) {
363 			fdrop(fp, td);
364 			break;
365 		}
366 		if (fl.l_whence == SEEK_CUR) {
367 			if (fp->f_offset < 0 ||
368 			    (fl.l_start > 0 &&
369 			     fp->f_offset > OFF_MAX - fl.l_start)) {
370 				fdrop(fp, td);
371 				error = EOVERFLOW;
372 				break;
373 			}
374 			fl.l_start += fp->f_offset;
375 		}
376 
377 		switch (fl.l_type) {
378 		case F_RDLCK:
379 			if ((fp->f_flag & FREAD) == 0) {
380 				error = EBADF;
381 				break;
382 			}
383 			PROC_LOCK(p);
384 			p->p_flag |= P_ADVLOCK;
385 			leaderp = p->p_leader;
386 			PROC_UNLOCK(p);
387 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
388 			    &fl, flg);
389 			break;
390 		case F_WRLCK:
391 			if ((fp->f_flag & FWRITE) == 0) {
392 				error = EBADF;
393 				break;
394 			}
395 			PROC_LOCK(p);
396 			p->p_flag |= P_ADVLOCK;
397 			leaderp = p->p_leader;
398 			PROC_UNLOCK(p);
399 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
400 			    &fl, flg);
401 			break;
402 		case F_UNLCK:
403 			PROC_LOCK(p);
404 			leaderp = p->p_leader;
405 			PROC_UNLOCK(p);
406 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK,
407 				&fl, F_POSIX);
408 			break;
409 		default:
410 			error = EINVAL;
411 			break;
412 		}
413 		fdrop(fp, td);
414 		break;
415 
416 	case F_GETLK:
417 		if (fp->f_type != DTYPE_VNODE) {
418 			FILEDESC_UNLOCK(fdp);
419 			error = EBADF;
420 			break;
421 		}
422 		vp = (struct vnode *)fp->f_data;
423 		/*
424 		 * copyin/lockop may block
425 		 */
426 		fhold(fp);
427 		FILEDESC_UNLOCK(fdp);
428 		vp = (struct vnode *)fp->f_data;
429 
430 		/* Copy in the lock structure */
431 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
432 		    sizeof(fl));
433 		if (error) {
434 			fdrop(fp, td);
435 			break;
436 		}
437 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
438 		    fl.l_type != F_UNLCK) {
439 			fdrop(fp, td);
440 			error = EINVAL;
441 			break;
442 		}
443 		if (fl.l_whence == SEEK_CUR) {
444 			if ((fl.l_start > 0 &&
445 			     fp->f_offset > OFF_MAX - fl.l_start) ||
446 			    (fl.l_start < 0 &&
447 			     fp->f_offset < OFF_MIN - fl.l_start)) {
448 				fdrop(fp, td);
449 				error = EOVERFLOW;
450 				break;
451 			}
452 			fl.l_start += fp->f_offset;
453 		}
454 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
455 			    &fl, F_POSIX);
456 		fdrop(fp, td);
457 		if (error == 0) {
458 			error = copyout((caddr_t)&fl,
459 				    (caddr_t)(intptr_t)uap->arg, sizeof(fl));
460 		}
461 		break;
462 	default:
463 		FILEDESC_UNLOCK(fdp);
464 		error = EINVAL;
465 		break;
466 	}
467 done2:
468 	mtx_unlock(&Giant);
469 	return (error);
470 }
471 
472 /*
473  * Common code for dup, dup2, and fcntl(F_DUPFD).
474  * filedesc must be locked, but will be unlocked as a side effect.
475  */
476 static int
477 do_dup(fdp, old, new, retval, td)
478 	register struct filedesc *fdp;
479 	register int old, new;
480 	register_t *retval;
481 	struct thread *td;
482 {
483 	struct file *fp;
484 	struct file *delfp;
485 
486 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
487 
488 	/*
489 	 * Save info on the descriptor being overwritten.  We have
490 	 * to do the unmap now, but we cannot close it without
491 	 * introducing an ownership race for the slot.
492 	 */
493 	delfp = fdp->fd_ofiles[new];
494 #if 0
495 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
496 		(void) munmapfd(td, new);
497 #endif
498 
499 	/*
500 	 * Duplicate the source descriptor, update lastfile
501 	 */
502 	fp = fdp->fd_ofiles[old];
503 	fdp->fd_ofiles[new] = fp;
504 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
505 	fhold(fp);
506 	if (new > fdp->fd_lastfile)
507 		fdp->fd_lastfile = new;
508 	*retval = new;
509 
510 	FILEDESC_UNLOCK(fdp);
511 
512 	/*
513 	 * If we dup'd over a valid file, we now own the reference to it
514 	 * and must dispose of it using closef() semantics (as if a
515 	 * close() were performed on it).
516 	 */
517 	if (delfp) {
518 		mtx_lock(&Giant);
519 		(void) closef(delfp, td);
520 		mtx_unlock(&Giant);
521 	}
522 	return (0);
523 }
524 
525 /*
526  * If sigio is on the list associated with a process or process group,
527  * disable signalling from the device, remove sigio from the list and
528  * free sigio.
529  */
530 void
531 funsetown(sigio)
532 	struct sigio *sigio;
533 {
534 	int s;
535 
536 	if (sigio == NULL)
537 		return;
538 	s = splhigh();
539 	*(sigio->sio_myref) = NULL;
540 	splx(s);
541 	if (sigio->sio_pgid < 0) {
542 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
543 			     sigio, sio_pgsigio);
544 	} else /* if ((*sigiop)->sio_pgid > 0) */ {
545 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
546 			     sigio, sio_pgsigio);
547 	}
548 	crfree(sigio->sio_ucred);
549 	FREE(sigio, M_SIGIO);
550 }
551 
552 /* Free a list of sigio structures. */
553 void
554 funsetownlst(sigiolst)
555 	struct sigiolst *sigiolst;
556 {
557 	struct sigio *sigio;
558 
559 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
560 		funsetown(sigio);
561 }
562 
563 /*
564  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
565  *
566  * After permission checking, add a sigio structure to the sigio list for
567  * the process or process group.
568  */
569 int
570 fsetown(pgid, sigiop)
571 	pid_t pgid;
572 	struct sigio **sigiop;
573 {
574 	struct proc *proc;
575 	struct pgrp *pgrp;
576 	struct sigio *sigio;
577 	int s;
578 
579 	if (pgid == 0) {
580 		funsetown(*sigiop);
581 		return (0);
582 	}
583 	if (pgid > 0) {
584 		proc = pfind(pgid);
585 		if (proc == NULL)
586 			return (ESRCH);
587 
588 		/*
589 		 * Policy - Don't allow a process to FSETOWN a process
590 		 * in another session.
591 		 *
592 		 * Remove this test to allow maximum flexibility or
593 		 * restrict FSETOWN to the current process or process
594 		 * group for maximum safety.
595 		 */
596 		if (proc->p_session != curthread->td_proc->p_session) {
597 			PROC_UNLOCK(proc);
598 			return (EPERM);
599 		}
600 		PROC_UNLOCK(proc);
601 
602 		pgrp = NULL;
603 	} else /* if (pgid < 0) */ {
604 		pgrp = pgfind(-pgid);
605 		if (pgrp == NULL)
606 			return (ESRCH);
607 
608 		/*
609 		 * Policy - Don't allow a process to FSETOWN a process
610 		 * in another session.
611 		 *
612 		 * Remove this test to allow maximum flexibility or
613 		 * restrict FSETOWN to the current process or process
614 		 * group for maximum safety.
615 		 */
616 		if (pgrp->pg_session != curthread->td_proc->p_session)
617 			return (EPERM);
618 
619 		proc = NULL;
620 	}
621 	funsetown(*sigiop);
622 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
623 	if (pgid > 0) {
624 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
625 		sigio->sio_proc = proc;
626 	} else {
627 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
628 		sigio->sio_pgrp = pgrp;
629 	}
630 	sigio->sio_pgid = pgid;
631 	sigio->sio_ucred = crhold(curthread->td_proc->p_ucred);
632 	sigio->sio_myref = sigiop;
633 	s = splhigh();
634 	*sigiop = sigio;
635 	splx(s);
636 	return (0);
637 }
638 
639 /*
640  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
641  */
642 pid_t
643 fgetown(sigio)
644 	struct sigio *sigio;
645 {
646 	return (sigio != NULL ? sigio->sio_pgid : 0);
647 }
648 
649 /*
650  * Close a file descriptor.
651  */
652 #ifndef _SYS_SYSPROTO_H_
653 struct close_args {
654         int     fd;
655 };
656 #endif
657 /*
658  * MPSAFE
659  */
660 /* ARGSUSED */
661 int
662 close(td, uap)
663 	struct thread *td;
664 	struct close_args *uap;
665 {
666 	register struct filedesc *fdp;
667 	register struct file *fp;
668 	register int fd = uap->fd;
669 	int error = 0;
670 
671 	mtx_lock(&Giant);
672 	fdp = td->td_proc->p_fd;
673 	FILEDESC_LOCK(fdp);
674 	if ((unsigned)fd >= fdp->fd_nfiles ||
675 	    (fp = fdp->fd_ofiles[fd]) == NULL) {
676 		FILEDESC_UNLOCK(fdp);
677 		error = EBADF;
678 		goto done2;
679 	}
680 #if 0
681 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
682 		(void) munmapfd(td, fd);
683 #endif
684 	fdp->fd_ofiles[fd] = NULL;
685 	fdp->fd_ofileflags[fd] = 0;
686 
687 	/*
688 	 * we now hold the fp reference that used to be owned by the descriptor
689 	 * array.
690 	 */
691 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
692 		fdp->fd_lastfile--;
693 	if (fd < fdp->fd_freefile)
694 		fdp->fd_freefile = fd;
695 	if (fd < fdp->fd_knlistsize) {
696 		FILEDESC_UNLOCK(fdp);
697 		knote_fdclose(td, fd);
698 	} else
699 		FILEDESC_UNLOCK(fdp);
700 
701 	error = closef(fp, td);
702 done2:
703 	mtx_unlock(&Giant);
704 	return(error);
705 }
706 
707 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
708 /*
709  * Return status information about a file descriptor.
710  */
711 #ifndef _SYS_SYSPROTO_H_
712 struct ofstat_args {
713 	int	fd;
714 	struct	ostat *sb;
715 };
716 #endif
717 /*
718  * MPSAFE
719  */
720 /* ARGSUSED */
721 int
722 ofstat(td, uap)
723 	struct thread *td;
724 	register struct ofstat_args *uap;
725 {
726 	struct file *fp;
727 	struct stat ub;
728 	struct ostat oub;
729 	int error;
730 
731 	mtx_lock(&Giant);
732 	if ((error = fget(td, uap->fd, &fp)) != 0)
733 		goto done2;
734 	error = fo_stat(fp, &ub, td);
735 	if (error == 0) {
736 		cvtstat(&ub, &oub);
737 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
738 	}
739 	fdrop(fp, td);
740 done2:
741 	mtx_unlock(&Giant);
742 	return (error);
743 }
744 #endif /* COMPAT_43 || COMPAT_SUNOS */
745 
746 /*
747  * Return status information about a file descriptor.
748  */
749 #ifndef _SYS_SYSPROTO_H_
750 struct fstat_args {
751 	int	fd;
752 	struct	stat *sb;
753 };
754 #endif
755 /*
756  * MPSAFE
757  */
758 /* ARGSUSED */
759 int
760 fstat(td, uap)
761 	struct thread *td;
762 	struct fstat_args *uap;
763 {
764 	struct file *fp;
765 	struct stat ub;
766 	int error;
767 
768 	mtx_lock(&Giant);
769 	if ((error = fget(td, uap->fd, &fp)) != 0)
770 		goto done2;
771 	error = fo_stat(fp, &ub, td);
772 	if (error == 0)
773 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
774 	fdrop(fp, td);
775 done2:
776 	mtx_unlock(&Giant);
777 	return (error);
778 }
779 
780 /*
781  * Return status information about a file descriptor.
782  */
783 #ifndef _SYS_SYSPROTO_H_
784 struct nfstat_args {
785 	int	fd;
786 	struct	nstat *sb;
787 };
788 #endif
789 /*
790  * MPSAFE
791  */
792 /* ARGSUSED */
793 int
794 nfstat(td, uap)
795 	struct thread *td;
796 	register struct nfstat_args *uap;
797 {
798 	struct file *fp;
799 	struct stat ub;
800 	struct nstat nub;
801 	int error;
802 
803 	mtx_lock(&Giant);
804 	if ((error = fget(td, uap->fd, &fp)) != 0)
805 		goto done2;
806 	error = fo_stat(fp, &ub, td);
807 	if (error == 0) {
808 		cvtnstat(&ub, &nub);
809 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
810 	}
811 	fdrop(fp, td);
812 done2:
813 	mtx_unlock(&Giant);
814 	return (error);
815 }
816 
817 /*
818  * Return pathconf information about a file descriptor.
819  */
820 #ifndef _SYS_SYSPROTO_H_
821 struct fpathconf_args {
822 	int	fd;
823 	int	name;
824 };
825 #endif
826 /*
827  * MPSAFE
828  */
829 /* ARGSUSED */
830 int
831 fpathconf(td, uap)
832 	struct thread *td;
833 	register struct fpathconf_args *uap;
834 {
835 	struct file *fp;
836 	struct vnode *vp;
837 	int error;
838 
839 	if ((error = fget(td, uap->fd, &fp)) != 0)
840 		return (error);
841 
842 	switch (fp->f_type) {
843 	case DTYPE_PIPE:
844 	case DTYPE_SOCKET:
845 		if (uap->name != _PC_PIPE_BUF) {
846 			error = EINVAL;
847 		} else {
848 			td->td_retval[0] = PIPE_BUF;
849 			error = 0;
850 		}
851 		break;
852 	case DTYPE_FIFO:
853 	case DTYPE_VNODE:
854 		vp = (struct vnode *)fp->f_data;
855 		mtx_lock(&Giant);
856 		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
857 		mtx_unlock(&Giant);
858 		break;
859 	default:
860 		error = EOPNOTSUPP;
861 		break;
862 	}
863 	fdrop(fp, td);
864 	return(error);
865 }
866 
867 /*
868  * Allocate a file descriptor for the process.
869  */
870 static int fdexpand;
871 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
872 
873 int
874 fdalloc(td, want, result)
875 	struct thread *td;
876 	int want;
877 	int *result;
878 {
879 	struct proc *p = td->td_proc;
880 	register struct filedesc *fdp = td->td_proc->p_fd;
881 	register int i;
882 	int lim, last, nfiles;
883 	struct file **newofile, **oldofile;
884 	char *newofileflags;
885 
886 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
887 
888 	/*
889 	 * Search for a free descriptor starting at the higher
890 	 * of want or fd_freefile.  If that fails, consider
891 	 * expanding the ofile array.
892 	 */
893 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
894 	for (;;) {
895 		last = min(fdp->fd_nfiles, lim);
896 		if ((i = want) < fdp->fd_freefile)
897 			i = fdp->fd_freefile;
898 		for (; i < last; i++) {
899 			if (fdp->fd_ofiles[i] == NULL) {
900 				fdp->fd_ofileflags[i] = 0;
901 				if (i > fdp->fd_lastfile)
902 					fdp->fd_lastfile = i;
903 				if (want <= fdp->fd_freefile)
904 					fdp->fd_freefile = i;
905 				*result = i;
906 				return (0);
907 			}
908 		}
909 
910 		/*
911 		 * No space in current array.  Expand?
912 		 */
913 		if (fdp->fd_nfiles >= lim)
914 			return (EMFILE);
915 		if (fdp->fd_nfiles < NDEXTENT)
916 			nfiles = NDEXTENT;
917 		else
918 			nfiles = 2 * fdp->fd_nfiles;
919 		FILEDESC_UNLOCK(fdp);
920 		mtx_lock(&Giant);
921 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
922 		    M_FILEDESC, M_WAITOK);
923 		mtx_unlock(&Giant);
924 		FILEDESC_LOCK(fdp);
925 
926 		/*
927 		 * deal with file-table extend race that might have occured
928 		 * when malloc was blocked.
929 		 */
930 		if (fdp->fd_nfiles >= nfiles) {
931 			FILEDESC_UNLOCK(fdp);
932 			mtx_lock(&Giant);
933 			FREE(newofile, M_FILEDESC);
934 			mtx_unlock(&Giant);
935 			FILEDESC_LOCK(fdp);
936 			continue;
937 		}
938 		newofileflags = (char *) &newofile[nfiles];
939 		/*
940 		 * Copy the existing ofile and ofileflags arrays
941 		 * and zero the new portion of each array.
942 		 */
943 		bcopy(fdp->fd_ofiles, newofile,
944 			(i = sizeof(struct file *) * fdp->fd_nfiles));
945 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
946 		bcopy(fdp->fd_ofileflags, newofileflags,
947 			(i = sizeof(char) * fdp->fd_nfiles));
948 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
949 		if (fdp->fd_nfiles > NDFILE)
950 			oldofile = fdp->fd_ofiles;
951 		else
952 			oldofile = NULL;
953 		fdp->fd_ofiles = newofile;
954 		fdp->fd_ofileflags = newofileflags;
955 		fdp->fd_nfiles = nfiles;
956 		fdexpand++;
957 		if (oldofile != NULL) {
958 			FILEDESC_UNLOCK(fdp);
959 			mtx_lock(&Giant);
960 			FREE(oldofile, M_FILEDESC);
961 			mtx_unlock(&Giant);
962 			FILEDESC_LOCK(fdp);
963 		}
964 	}
965 	return (0);
966 }
967 
968 /*
969  * Check to see whether n user file descriptors
970  * are available to the process p.
971  */
972 int
973 fdavail(td, n)
974 	struct thread *td;
975 	register int n;
976 {
977 	struct proc *p = td->td_proc;
978 	register struct filedesc *fdp = td->td_proc->p_fd;
979 	register struct file **fpp;
980 	register int i, lim, last;
981 
982 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
983 
984 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
985 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
986 		return (1);
987 
988 	last = min(fdp->fd_nfiles, lim);
989 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
990 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
991 		if (*fpp == NULL && --n <= 0)
992 			return (1);
993 	}
994 	return (0);
995 }
996 
997 /*
998  * Create a new open file structure and allocate
999  * a file decriptor for the process that refers to it.
1000  */
1001 int
1002 falloc(td, resultfp, resultfd)
1003 	register struct thread *td;
1004 	struct file **resultfp;
1005 	int *resultfd;
1006 {
1007 	struct proc *p = td->td_proc;
1008 	register struct file *fp, *fq;
1009 	int error, i;
1010 
1011 	sx_xlock(&filelist_lock);
1012 	if (nfiles >= maxfiles) {
1013 		sx_xunlock(&filelist_lock);
1014 		tablefull("file");
1015 		return (ENFILE);
1016 	}
1017 	nfiles++;
1018 	sx_xunlock(&filelist_lock);
1019 	/*
1020 	 * Allocate a new file descriptor.
1021 	 * If the process has file descriptor zero open, add to the list
1022 	 * of open files at that point, otherwise put it at the front of
1023 	 * the list of open files.
1024 	 */
1025 	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK | M_ZERO);
1026 
1027 	/*
1028 	 * wait until after malloc (which may have blocked) returns before
1029 	 * allocating the slot, else a race might have shrunk it if we had
1030 	 * allocated it before the malloc.
1031 	 */
1032 	FILEDESC_LOCK(p->p_fd);
1033 	if ((error = fdalloc(td, 0, &i))) {
1034 		FILEDESC_UNLOCK(p->p_fd);
1035 		sx_xlock(&filelist_lock);
1036 		nfiles--;
1037 		sx_xunlock(&filelist_lock);
1038 		FREE(fp, M_FILE);
1039 		return (error);
1040 	}
1041 	fp->f_mtxp = mtx_pool_alloc();
1042 	fp->f_gcflag = 0;
1043 	fp->f_count = 1;
1044 	fp->f_cred = crhold(p->p_ucred);
1045 	fp->f_ops = &badfileops;
1046 	fp->f_seqcount = 1;
1047 	FILEDESC_UNLOCK(p->p_fd);
1048 	sx_xlock(&filelist_lock);
1049 	FILEDESC_LOCK(p->p_fd);
1050 	if ((fq = p->p_fd->fd_ofiles[0])) {
1051 		LIST_INSERT_AFTER(fq, fp, f_list);
1052 	} else {
1053 		LIST_INSERT_HEAD(&filehead, fp, f_list);
1054 	}
1055 	p->p_fd->fd_ofiles[i] = fp;
1056 	FILEDESC_UNLOCK(p->p_fd);
1057 	sx_xunlock(&filelist_lock);
1058 	if (resultfp)
1059 		*resultfp = fp;
1060 	if (resultfd)
1061 		*resultfd = i;
1062 	return (0);
1063 }
1064 
1065 /*
1066  * Free a file descriptor.
1067  */
1068 void
1069 ffree(fp)
1070 	register struct file *fp;
1071 {
1072 
1073 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1074 	sx_xlock(&filelist_lock);
1075 	LIST_REMOVE(fp, f_list);
1076 	nfiles--;
1077 	sx_xunlock(&filelist_lock);
1078 	crfree(fp->f_cred);
1079 	FREE(fp, M_FILE);
1080 }
1081 
1082 /*
1083  * Build a new filedesc structure.
1084  */
1085 struct filedesc *
1086 fdinit(td)
1087 	struct thread *td;
1088 {
1089 	register struct filedesc0 *newfdp;
1090 	register struct filedesc *fdp = td->td_proc->p_fd;
1091 
1092 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1093 	    M_FILEDESC, M_WAITOK | M_ZERO);
1094 	mtx_init(&newfdp->fd_fd.fd_mtx, "filedesc structure", MTX_DEF);
1095 	FILEDESC_LOCK(&newfdp->fd_fd);
1096 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1097 	if (newfdp->fd_fd.fd_cdir)
1098 		VREF(newfdp->fd_fd.fd_cdir);
1099 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1100 	if (newfdp->fd_fd.fd_rdir)
1101 		VREF(newfdp->fd_fd.fd_rdir);
1102 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1103 	if (newfdp->fd_fd.fd_jdir)
1104 		VREF(newfdp->fd_fd.fd_jdir);
1105 
1106 	/* Create the file descriptor table. */
1107 	newfdp->fd_fd.fd_refcnt = 1;
1108 	newfdp->fd_fd.fd_cmask = cmask;
1109 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1110 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1111 	newfdp->fd_fd.fd_nfiles = NDFILE;
1112 	newfdp->fd_fd.fd_knlistsize = -1;
1113 	FILEDESC_UNLOCK(&newfdp->fd_fd);
1114 
1115 	return (&newfdp->fd_fd);
1116 }
1117 
1118 /*
1119  * Share a filedesc structure.
1120  */
1121 struct filedesc *
1122 fdshare(p)
1123 	struct proc *p;
1124 {
1125 	FILEDESC_LOCK(p->p_fd);
1126 	p->p_fd->fd_refcnt++;
1127 	FILEDESC_UNLOCK(p->p_fd);
1128 	return (p->p_fd);
1129 }
1130 
1131 /*
1132  * Copy a filedesc structure.
1133  */
1134 struct filedesc *
1135 fdcopy(td)
1136 	struct thread *td;
1137 {
1138 	register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1139 	register struct file **fpp;
1140 	register int i, j;
1141 
1142 	/* Certain daemons might not have file descriptors. */
1143 	if (fdp == NULL)
1144 		return (NULL);
1145 
1146 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1147 
1148 	FILEDESC_UNLOCK(fdp);
1149 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1150 	    M_FILEDESC, M_WAITOK);
1151 	FILEDESC_LOCK(fdp);
1152 	bcopy(fdp, newfdp, sizeof(struct filedesc));
1153 	FILEDESC_UNLOCK(fdp);
1154 	bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1155 	mtx_init(&newfdp->fd_mtx, "filedesc structure", MTX_DEF);
1156 	if (newfdp->fd_cdir)
1157 		VREF(newfdp->fd_cdir);
1158 	if (newfdp->fd_rdir)
1159 		VREF(newfdp->fd_rdir);
1160 	if (newfdp->fd_jdir)
1161 		VREF(newfdp->fd_jdir);
1162 	newfdp->fd_refcnt = 1;
1163 
1164 	/*
1165 	 * If the number of open files fits in the internal arrays
1166 	 * of the open file structure, use them, otherwise allocate
1167 	 * additional memory for the number of descriptors currently
1168 	 * in use.
1169 	 */
1170 	FILEDESC_LOCK(fdp);
1171 	newfdp->fd_lastfile = fdp->fd_lastfile;
1172 	newfdp->fd_nfiles = fdp->fd_nfiles;
1173 	if (newfdp->fd_lastfile < NDFILE) {
1174 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1175 		newfdp->fd_ofileflags =
1176 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1177 		i = NDFILE;
1178 	} else {
1179 		/*
1180 		 * Compute the smallest multiple of NDEXTENT needed
1181 		 * for the file descriptors currently in use,
1182 		 * allowing the table to shrink.
1183 		 */
1184 retry:
1185 		i = newfdp->fd_nfiles;
1186 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1187 			i /= 2;
1188 		FILEDESC_UNLOCK(fdp);
1189 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1190 		    M_FILEDESC, M_WAITOK);
1191 		FILEDESC_LOCK(fdp);
1192 		newfdp->fd_lastfile = fdp->fd_lastfile;
1193 		newfdp->fd_nfiles = fdp->fd_nfiles;
1194 		j = newfdp->fd_nfiles;
1195 		while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1196 			j /= 2;
1197 		if (i != j) {
1198 			/*
1199 			 * The size of the original table has changed.
1200 			 * Go over once again.
1201 			 */
1202 			FILEDESC_UNLOCK(fdp);
1203 			FREE(newfdp->fd_ofiles, M_FILEDESC);
1204 			FILEDESC_LOCK(fdp);
1205 			newfdp->fd_lastfile = fdp->fd_lastfile;
1206 			newfdp->fd_nfiles = fdp->fd_nfiles;
1207 			goto retry;
1208 		}
1209 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1210 	}
1211 	newfdp->fd_nfiles = i;
1212 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1213 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1214 
1215 	/*
1216 	 * kq descriptors cannot be copied.
1217 	 */
1218 	if (newfdp->fd_knlistsize != -1) {
1219 		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1220 		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1221 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1222 				*fpp = NULL;
1223 				if (i < newfdp->fd_freefile)
1224 					newfdp->fd_freefile = i;
1225 			}
1226 			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1227 				newfdp->fd_lastfile--;
1228 		}
1229 		newfdp->fd_knlist = NULL;
1230 		newfdp->fd_knlistsize = -1;
1231 		newfdp->fd_knhash = NULL;
1232 		newfdp->fd_knhashmask = 0;
1233 	}
1234 
1235 	fpp = newfdp->fd_ofiles;
1236 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1237 		if (*fpp != NULL) {
1238 			fhold(*fpp);
1239 		}
1240 	}
1241 	return (newfdp);
1242 }
1243 
1244 /*
1245  * Release a filedesc structure.
1246  */
1247 void
1248 fdfree(td)
1249 	struct thread *td;
1250 {
1251 	register struct filedesc *fdp = td->td_proc->p_fd;
1252 	struct file **fpp;
1253 	register int i;
1254 
1255 	/* Certain daemons might not have file descriptors. */
1256 	if (fdp == NULL)
1257 		return;
1258 
1259 	FILEDESC_LOCK(fdp);
1260 	if (--fdp->fd_refcnt > 0) {
1261 		FILEDESC_UNLOCK(fdp);
1262 		return;
1263 	}
1264 	/*
1265 	 * we are the last reference to the structure, we can
1266 	 * safely assume it will not change out from under us.
1267 	 */
1268 	FILEDESC_UNLOCK(fdp);
1269 	fpp = fdp->fd_ofiles;
1270 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1271 		if (*fpp)
1272 			(void) closef(*fpp, td);
1273 	}
1274 	if (fdp->fd_nfiles > NDFILE)
1275 		FREE(fdp->fd_ofiles, M_FILEDESC);
1276 	if (fdp->fd_cdir)
1277 		vrele(fdp->fd_cdir);
1278 	if (fdp->fd_rdir)
1279 		vrele(fdp->fd_rdir);
1280 	if (fdp->fd_jdir)
1281 		vrele(fdp->fd_jdir);
1282 	if (fdp->fd_knlist)
1283 		FREE(fdp->fd_knlist, M_KQUEUE);
1284 	if (fdp->fd_knhash)
1285 		FREE(fdp->fd_knhash, M_KQUEUE);
1286 	mtx_destroy(&fdp->fd_mtx);
1287 	FREE(fdp, M_FILEDESC);
1288 }
1289 
1290 /*
1291  * For setugid programs, we don't want to people to use that setugidness
1292  * to generate error messages which write to a file which otherwise would
1293  * otherwise be off-limits to the process.
1294  *
1295  * This is a gross hack to plug the hole.  A better solution would involve
1296  * a special vop or other form of generalized access control mechanism.  We
1297  * go ahead and just reject all procfs file systems accesses as dangerous.
1298  *
1299  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1300  * sufficient.  We also don't for check setugidness since we know we are.
1301  */
1302 static int
1303 is_unsafe(struct file *fp)
1304 {
1305 	if (fp->f_type == DTYPE_VNODE &&
1306 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1307 		return (1);
1308 	return (0);
1309 }
1310 
1311 /*
1312  * Make this setguid thing safe, if at all possible.
1313  */
1314 void
1315 setugidsafety(td)
1316 	struct thread *td;
1317 {
1318 	struct filedesc *fdp = td->td_proc->p_fd;
1319 	register int i;
1320 
1321 	/* Certain daemons might not have file descriptors. */
1322 	if (fdp == NULL)
1323 		return;
1324 
1325 	/*
1326 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1327 	 * we are blocked in a close.  Be careful!
1328 	 */
1329 	FILEDESC_LOCK(fdp);
1330 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1331 		if (i > 2)
1332 			break;
1333 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1334 			struct file *fp;
1335 
1336 #if 0
1337 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1338 				(void) munmapfd(td, i);
1339 #endif
1340 			if (i < fdp->fd_knlistsize) {
1341 				FILEDESC_UNLOCK(fdp);
1342 				knote_fdclose(td, i);
1343 				FILEDESC_LOCK(fdp);
1344 			}
1345 			/*
1346 			 * NULL-out descriptor prior to close to avoid
1347 			 * a race while close blocks.
1348 			 */
1349 			fp = fdp->fd_ofiles[i];
1350 			fdp->fd_ofiles[i] = NULL;
1351 			fdp->fd_ofileflags[i] = 0;
1352 			if (i < fdp->fd_freefile)
1353 				fdp->fd_freefile = i;
1354 			FILEDESC_UNLOCK(fdp);
1355 			(void) closef(fp, td);
1356 			FILEDESC_LOCK(fdp);
1357 		}
1358 	}
1359 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1360 		fdp->fd_lastfile--;
1361 	FILEDESC_UNLOCK(fdp);
1362 }
1363 
1364 /*
1365  * Close any files on exec?
1366  */
1367 void
1368 fdcloseexec(td)
1369 	struct thread *td;
1370 {
1371 	struct filedesc *fdp = td->td_proc->p_fd;
1372 	register int i;
1373 
1374 	/* Certain daemons might not have file descriptors. */
1375 	if (fdp == NULL)
1376 		return;
1377 
1378 	FILEDESC_LOCK(fdp);
1379 
1380 	/*
1381 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1382 	 * may block and rip them out from under us.
1383 	 */
1384 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1385 		if (fdp->fd_ofiles[i] != NULL &&
1386 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1387 			struct file *fp;
1388 
1389 #if 0
1390 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1391 				(void) munmapfd(td, i);
1392 #endif
1393 			if (i < fdp->fd_knlistsize) {
1394 				FILEDESC_UNLOCK(fdp);
1395 				knote_fdclose(td, i);
1396 				FILEDESC_LOCK(fdp);
1397 			}
1398 			/*
1399 			 * NULL-out descriptor prior to close to avoid
1400 			 * a race while close blocks.
1401 			 */
1402 			fp = fdp->fd_ofiles[i];
1403 			fdp->fd_ofiles[i] = NULL;
1404 			fdp->fd_ofileflags[i] = 0;
1405 			if (i < fdp->fd_freefile)
1406 				fdp->fd_freefile = i;
1407 			FILEDESC_UNLOCK(fdp);
1408 			(void) closef(fp, td);
1409 			FILEDESC_LOCK(fdp);
1410 		}
1411 	}
1412 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1413 		fdp->fd_lastfile--;
1414 	FILEDESC_UNLOCK(fdp);
1415 }
1416 
1417 /*
1418  * Internal form of close.
1419  * Decrement reference count on file structure.
1420  * Note: td may be NULL when closing a file
1421  * that was being passed in a message.
1422  */
1423 int
1424 closef(fp, td)
1425 	register struct file *fp;
1426 	register struct thread *td;
1427 {
1428 	struct vnode *vp;
1429 	struct flock lf;
1430 
1431 	if (fp == NULL)
1432 		return (0);
1433 	/*
1434 	 * POSIX record locking dictates that any close releases ALL
1435 	 * locks owned by this process.  This is handled by setting
1436 	 * a flag in the unlock to free ONLY locks obeying POSIX
1437 	 * semantics, and not to free BSD-style file locks.
1438 	 * If the descriptor was in a message, POSIX-style locks
1439 	 * aren't passed with the descriptor.
1440 	 */
1441 	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1442 	    fp->f_type == DTYPE_VNODE) {
1443 		lf.l_whence = SEEK_SET;
1444 		lf.l_start = 0;
1445 		lf.l_len = 0;
1446 		lf.l_type = F_UNLCK;
1447 		vp = (struct vnode *)fp->f_data;
1448 		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1449 		    F_UNLCK, &lf, F_POSIX);
1450 	}
1451 	return (fdrop(fp, td));
1452 }
1453 
1454 /*
1455  * Drop reference on struct file passed in, may call closef if the
1456  * reference hits zero.
1457  */
1458 int
1459 fdrop(fp, td)
1460 	struct file *fp;
1461 	struct thread *td;
1462 {
1463 
1464 	FILE_LOCK(fp);
1465 	return (fdrop_locked(fp, td));
1466 }
1467 
1468 /*
1469  * Extract the file pointer associated with the specified descriptor for
1470  * the current user process.
1471  *
1472  * If the descriptor doesn't exist, EBADF is returned.
1473  *
1474  * If the descriptor exists but doesn't match 'flags' then
1475  * return EBADF for read attempts and EINVAL for write attempts.
1476  *
1477  * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1478  * It should be droped with fdrop().
1479  * If it is not set, then the refcount will not be bumped however the
1480  * thread's filedesc struct will be returned locked (for fgetsock).
1481  *
1482  * If an error occured the non-zero error is returned and *fpp is set to NULL.
1483  * Otherwise *fpp is set and zero is returned.
1484  */
1485 static __inline
1486 int
1487 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1488 {
1489 	struct filedesc *fdp;
1490 	struct file *fp;
1491 
1492 	*fpp = NULL;
1493 	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1494 		return(EBADF);
1495 	FILEDESC_LOCK(fdp);
1496 	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1497 		FILEDESC_UNLOCK(fdp);
1498 		return(EBADF);
1499 	}
1500 
1501 	/*
1502 	 * Note: FREAD failures returns EBADF to maintain backwards
1503 	 * compatibility with what routines returned before.
1504 	 *
1505 	 * Only one flag, or 0, may be specified.
1506 	 */
1507 	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1508 		FILEDESC_UNLOCK(fdp);
1509 		return(EBADF);
1510 	}
1511 	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1512 		FILEDESC_UNLOCK(fdp);
1513 		return(EINVAL);
1514 	}
1515 	if (hold) {
1516 		fhold(fp);
1517 		FILEDESC_UNLOCK(fdp);
1518 	}
1519 	*fpp = fp;
1520 	return(0);
1521 }
1522 
1523 int
1524 fget(struct thread *td, int fd, struct file **fpp)
1525 {
1526     return(_fget(td, fd, fpp, 0, 1));
1527 }
1528 
1529 int
1530 fget_read(struct thread *td, int fd, struct file **fpp)
1531 {
1532     return(_fget(td, fd, fpp, FREAD, 1));
1533 }
1534 
1535 int
1536 fget_write(struct thread *td, int fd, struct file **fpp)
1537 {
1538     return(_fget(td, fd, fpp, FWRITE, 1));
1539 }
1540 
1541 /*
1542  * Like fget() but loads the underlying vnode, or returns an error if
1543  * the descriptor does not represent a vnode.  Note that pipes use vnodes
1544  * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1545  * error).  The returned vnode will be vref()d.
1546  */
1547 
1548 static __inline
1549 int
1550 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1551 {
1552 	struct file *fp;
1553 	int error;
1554 
1555 	*vpp = NULL;
1556 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1557 		return (error);
1558 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1559 		error = EINVAL;
1560 	} else {
1561 		*vpp = (struct vnode *)fp->f_data;
1562 		vref(*vpp);
1563 	}
1564 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1565 	return (error);
1566 }
1567 
1568 int
1569 fgetvp(struct thread *td, int fd, struct vnode **vpp)
1570 {
1571 	return(_fgetvp(td, fd, vpp, 0));
1572 }
1573 
1574 int
1575 fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1576 {
1577 	return(_fgetvp(td, fd, vpp, FREAD));
1578 }
1579 
1580 int
1581 fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1582 {
1583 	return(_fgetvp(td, fd, vpp, FWRITE));
1584 }
1585 
1586 /*
1587  * Like fget() but loads the underlying socket, or returns an error if
1588  * the descriptor does not represent a socket.
1589  *
1590  * We bump the ref count on the returned socket.  XXX Also obtain the SX lock in
1591  * the future.
1592  */
1593 int
1594 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1595 {
1596 	struct file *fp;
1597 	int error;
1598 
1599 	*spp = NULL;
1600 	if (fflagp)
1601 		*fflagp = 0;
1602 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1603 		return (error);
1604 	if (fp->f_type != DTYPE_SOCKET) {
1605 		error = ENOTSOCK;
1606 	} else {
1607 		*spp = (struct socket *)fp->f_data;
1608 		if (fflagp)
1609 			*fflagp = fp->f_flag;
1610 		soref(*spp);
1611 	}
1612 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1613 	return(error);
1614 }
1615 
1616 /*
1617  * Drop the reference count on the the socket and XXX release the SX lock in
1618  * the future.  The last reference closes the socket.
1619  */
1620 void
1621 fputsock(struct socket *so)
1622 {
1623 	sorele(so);
1624 }
1625 
1626 /*
1627  * Drop reference on struct file passed in, may call closef if the
1628  * reference hits zero.
1629  * Expects struct file locked, and will unlock it.
1630  */
1631 int
1632 fdrop_locked(fp, td)
1633 	struct file *fp;
1634 	struct thread *td;
1635 {
1636 	struct flock lf;
1637 	struct vnode *vp;
1638 	int error;
1639 
1640 	FILE_LOCK_ASSERT(fp, MA_OWNED);
1641 
1642 	if (--fp->f_count > 0) {
1643 		FILE_UNLOCK(fp);
1644 		return (0);
1645 	}
1646 	if (fp->f_count < 0)
1647 		panic("fdrop: count < 0");
1648 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1649 		lf.l_whence = SEEK_SET;
1650 		lf.l_start = 0;
1651 		lf.l_len = 0;
1652 		lf.l_type = F_UNLCK;
1653 		vp = (struct vnode *)fp->f_data;
1654 		FILE_UNLOCK(fp);
1655 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1656 	} else
1657 		FILE_UNLOCK(fp);
1658 	if (fp->f_ops != &badfileops)
1659 		error = fo_close(fp, td);
1660 	else
1661 		error = 0;
1662 	ffree(fp);
1663 	return (error);
1664 }
1665 
1666 /*
1667  * Apply an advisory lock on a file descriptor.
1668  *
1669  * Just attempt to get a record lock of the requested type on
1670  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1671  */
1672 #ifndef _SYS_SYSPROTO_H_
1673 struct flock_args {
1674 	int	fd;
1675 	int	how;
1676 };
1677 #endif
1678 /*
1679  * MPSAFE
1680  */
1681 /* ARGSUSED */
1682 int
1683 flock(td, uap)
1684 	struct thread *td;
1685 	register struct flock_args *uap;
1686 {
1687 	struct file *fp;
1688 	struct vnode *vp;
1689 	struct flock lf;
1690 	int error;
1691 
1692 	if ((error = fget(td, uap->fd, &fp)) != 0)
1693 		return (error);
1694 	if (fp->f_type != DTYPE_VNODE) {
1695 		fdrop(fp, td);
1696 		return (EOPNOTSUPP);
1697 	}
1698 
1699 	mtx_lock(&Giant);
1700 	vp = (struct vnode *)fp->f_data;
1701 	lf.l_whence = SEEK_SET;
1702 	lf.l_start = 0;
1703 	lf.l_len = 0;
1704 	if (uap->how & LOCK_UN) {
1705 		lf.l_type = F_UNLCK;
1706 		FILE_LOCK(fp);
1707 		fp->f_flag &= ~FHASLOCK;
1708 		FILE_UNLOCK(fp);
1709 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1710 		goto done2;
1711 	}
1712 	if (uap->how & LOCK_EX)
1713 		lf.l_type = F_WRLCK;
1714 	else if (uap->how & LOCK_SH)
1715 		lf.l_type = F_RDLCK;
1716 	else {
1717 		error = EBADF;
1718 		goto done2;
1719 	}
1720 	FILE_LOCK(fp);
1721 	fp->f_flag |= FHASLOCK;
1722 	FILE_UNLOCK(fp);
1723 	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1724 	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1725 done2:
1726 	fdrop(fp, td);
1727 	mtx_unlock(&Giant);
1728 	return (error);
1729 }
1730 
1731 /*
1732  * File Descriptor pseudo-device driver (/dev/fd/).
1733  *
1734  * Opening minor device N dup()s the file (if any) connected to file
1735  * descriptor N belonging to the calling process.  Note that this driver
1736  * consists of only the ``open()'' routine, because all subsequent
1737  * references to this file will be direct to the other driver.
1738  */
1739 /* ARGSUSED */
1740 static int
1741 fdopen(dev, mode, type, td)
1742 	dev_t dev;
1743 	int mode, type;
1744 	struct thread *td;
1745 {
1746 
1747 	/*
1748 	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1749 	 * the file descriptor being sought for duplication. The error
1750 	 * return ensures that the vnode for this device will be released
1751 	 * by vn_open. Open will detect this special error and take the
1752 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1753 	 * will simply report the error.
1754 	 */
1755 	td->td_dupfd = dev2unit(dev);
1756 	return (ENODEV);
1757 }
1758 
1759 /*
1760  * Duplicate the specified descriptor to a free descriptor.
1761  */
1762 int
1763 dupfdopen(td, fdp, indx, dfd, mode, error)
1764 	struct thread *td;
1765 	struct filedesc *fdp;
1766 	int indx, dfd;
1767 	int mode;
1768 	int error;
1769 {
1770 	register struct file *wfp;
1771 	struct file *fp;
1772 
1773 	/*
1774 	 * If the to-be-dup'd fd number is greater than the allowed number
1775 	 * of file descriptors, or the fd to be dup'd has already been
1776 	 * closed, then reject.
1777 	 */
1778 	FILEDESC_LOCK(fdp);
1779 	if ((u_int)dfd >= fdp->fd_nfiles ||
1780 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1781 		FILEDESC_UNLOCK(fdp);
1782 		return (EBADF);
1783 	}
1784 
1785 	/*
1786 	 * There are two cases of interest here.
1787 	 *
1788 	 * For ENODEV simply dup (dfd) to file descriptor
1789 	 * (indx) and return.
1790 	 *
1791 	 * For ENXIO steal away the file structure from (dfd) and
1792 	 * store it in (indx).  (dfd) is effectively closed by
1793 	 * this operation.
1794 	 *
1795 	 * Any other error code is just returned.
1796 	 */
1797 	switch (error) {
1798 	case ENODEV:
1799 		/*
1800 		 * Check that the mode the file is being opened for is a
1801 		 * subset of the mode of the existing descriptor.
1802 		 */
1803 		FILE_LOCK(wfp);
1804 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1805 			FILE_UNLOCK(wfp);
1806 			FILEDESC_UNLOCK(fdp);
1807 			return (EACCES);
1808 		}
1809 		fp = fdp->fd_ofiles[indx];
1810 #if 0
1811 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1812 			(void) munmapfd(td, indx);
1813 #endif
1814 		fdp->fd_ofiles[indx] = wfp;
1815 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1816 		fhold_locked(wfp);
1817 		FILE_UNLOCK(wfp);
1818 		if (indx > fdp->fd_lastfile)
1819 			fdp->fd_lastfile = indx;
1820 		if (fp != NULL)
1821 			FILE_LOCK(fp);
1822 		FILEDESC_UNLOCK(fdp);
1823 		/*
1824 		 * we now own the reference to fp that the ofiles[] array
1825 		 * used to own.  Release it.
1826 		 */
1827 		if (fp != NULL)
1828 			fdrop_locked(fp, td);
1829 		return (0);
1830 
1831 	case ENXIO:
1832 		/*
1833 		 * Steal away the file pointer from dfd, and stuff it into indx.
1834 		 */
1835 		fp = fdp->fd_ofiles[indx];
1836 #if 0
1837 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1838 			(void) munmapfd(td, indx);
1839 #endif
1840 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1841 		fdp->fd_ofiles[dfd] = NULL;
1842 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1843 		fdp->fd_ofileflags[dfd] = 0;
1844 
1845 		/*
1846 		 * Complete the clean up of the filedesc structure by
1847 		 * recomputing the various hints.
1848 		 */
1849 		if (indx > fdp->fd_lastfile) {
1850 			fdp->fd_lastfile = indx;
1851 		} else {
1852 			while (fdp->fd_lastfile > 0 &&
1853 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1854 				fdp->fd_lastfile--;
1855 			}
1856 			if (dfd < fdp->fd_freefile)
1857 				fdp->fd_freefile = dfd;
1858 		}
1859 		if (fp != NULL)
1860 			FILE_LOCK(fp);
1861 		FILEDESC_UNLOCK(fdp);
1862 
1863 		/*
1864 		 * we now own the reference to fp that the ofiles[] array
1865 		 * used to own.  Release it.
1866 		 */
1867 		if (fp != NULL)
1868 			fdrop_locked(fp, td);
1869 		return (0);
1870 
1871 	default:
1872 		FILEDESC_UNLOCK(fdp);
1873 		return (error);
1874 	}
1875 	/* NOTREACHED */
1876 }
1877 
1878 /*
1879  * Get file structures.
1880  */
1881 static int
1882 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1883 {
1884 	int error;
1885 	struct file *fp;
1886 
1887 	sx_slock(&filelist_lock);
1888 	if (!req->oldptr) {
1889 		/*
1890 		 * overestimate by 10 files
1891 		 */
1892 		error = SYSCTL_OUT(req, 0, sizeof(filehead) +
1893 				   (nfiles + 10) * sizeof(struct file));
1894 		sx_sunlock(&filelist_lock);
1895 		return (error);
1896 	}
1897 
1898 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1899 	if (error) {
1900 		sx_sunlock(&filelist_lock);
1901 		return (error);
1902 	}
1903 
1904 	/*
1905 	 * followed by an array of file structures
1906 	 */
1907 	LIST_FOREACH(fp, &filehead, f_list) {
1908 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1909 		if (error) {
1910 			sx_sunlock(&filelist_lock);
1911 			return (error);
1912 		}
1913 	}
1914 	sx_sunlock(&filelist_lock);
1915 	return (0);
1916 }
1917 
1918 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1919     0, 0, sysctl_kern_file, "S,file", "Entire file table");
1920 
1921 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1922     &maxfilesperproc, 0, "Maximum files allowed open per process");
1923 
1924 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1925     &maxfiles, 0, "Maximum number of files");
1926 
1927 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
1928     &nfiles, 0, "System-wide number of open files");
1929 
1930 static void
1931 fildesc_drvinit(void *unused)
1932 {
1933 	dev_t dev;
1934 
1935 	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
1936 	make_dev_alias(dev, "stdin");
1937 	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
1938 	make_dev_alias(dev, "stdout");
1939 	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
1940 	make_dev_alias(dev, "stderr");
1941 	if (!devfs_present) {
1942 		int fd;
1943 
1944 		for (fd = 3; fd < NUMFDESC; fd++)
1945 			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
1946 			    "fd/%d", fd);
1947 	}
1948 }
1949 
1950 struct fileops badfileops = {
1951 	badfo_readwrite,
1952 	badfo_readwrite,
1953 	badfo_ioctl,
1954 	badfo_poll,
1955 	badfo_kqfilter,
1956 	badfo_stat,
1957 	badfo_close
1958 };
1959 
1960 static int
1961 badfo_readwrite(fp, uio, cred, flags, td)
1962 	struct file *fp;
1963 	struct uio *uio;
1964 	struct ucred *cred;
1965 	struct thread *td;
1966 	int flags;
1967 {
1968 
1969 	return (EBADF);
1970 }
1971 
1972 static int
1973 badfo_ioctl(fp, com, data, td)
1974 	struct file *fp;
1975 	u_long com;
1976 	caddr_t data;
1977 	struct thread *td;
1978 {
1979 
1980 	return (EBADF);
1981 }
1982 
1983 static int
1984 badfo_poll(fp, events, cred, td)
1985 	struct file *fp;
1986 	int events;
1987 	struct ucred *cred;
1988 	struct thread *td;
1989 {
1990 
1991 	return (0);
1992 }
1993 
1994 static int
1995 badfo_kqfilter(fp, kn)
1996 	struct file *fp;
1997 	struct knote *kn;
1998 {
1999 
2000 	return (0);
2001 }
2002 
2003 static int
2004 badfo_stat(fp, sb, td)
2005 	struct file *fp;
2006 	struct stat *sb;
2007 	struct thread *td;
2008 {
2009 
2010 	return (EBADF);
2011 }
2012 
2013 static int
2014 badfo_close(fp, td)
2015 	struct file *fp;
2016 	struct thread *td;
2017 {
2018 
2019 	return (EBADF);
2020 }
2021 
2022 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2023 					fildesc_drvinit,NULL)
2024 
2025 static void filelistinit __P((void *));
2026 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2027 
2028 /* ARGSUSED*/
2029 static void
2030 filelistinit(dummy)
2031 	void *dummy;
2032 {
2033 	sx_init(&filelist_lock, "filelist lock");
2034 }
2035