xref: /freebsd/sys/kern/kern_descrip.c (revision 3a31b7eb32ad60e1e05b2b2e184ff47e4afbb874)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mutex.h>
49 #include <sys/sysproto.h>
50 #include <sys/conf.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/sysctl.h>
54 #include <sys/vnode.h>
55 #include <sys/proc.h>
56 #include <sys/file.h>
57 #include <sys/stat.h>
58 #include <sys/filio.h>
59 #include <sys/fcntl.h>
60 #include <sys/unistd.h>
61 #include <sys/resourcevar.h>
62 #include <sys/event.h>
63 #include <sys/sx.h>
64 #include <sys/socketvar.h>
65 
66 #include <machine/limits.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_extern.h>
70 
71 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
72 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
73 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
74 
75 static	 d_open_t  fdopen;
76 #define NUMFDESC 64
77 
78 #define CDEV_MAJOR 22
79 static struct cdevsw fildesc_cdevsw = {
80 	/* open */	fdopen,
81 	/* close */	noclose,
82 	/* read */	noread,
83 	/* write */	nowrite,
84 	/* ioctl */	noioctl,
85 	/* poll */	nopoll,
86 	/* mmap */	nommap,
87 	/* strategy */	nostrategy,
88 	/* name */	"FD",
89 	/* maj */	CDEV_MAJOR,
90 	/* dump */	nodump,
91 	/* psize */	nopsize,
92 	/* flags */	0,
93 };
94 
95 static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct thread *td));
96 static int badfo_readwrite __P((struct file *fp, struct uio *uio,
97     struct ucred *cred, int flags, struct thread *td));
98 static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
99     struct thread *td));
100 static int badfo_poll __P((struct file *fp, int events,
101     struct ucred *cred, struct thread *td));
102 static int badfo_kqfilter __P((struct file *fp, struct knote *kn));
103 static int badfo_stat __P((struct file *fp, struct stat *sb, struct thread *td));
104 static int badfo_close __P((struct file *fp, struct thread *td));
105 
106 /*
107  * Descriptor management.
108  */
109 struct filelist filehead;	/* head of list of open files */
110 int nfiles;			/* actual number of open files */
111 extern int cmask;
112 
113 /*
114  * System calls on descriptors.
115  */
116 #ifndef _SYS_SYSPROTO_H_
117 struct getdtablesize_args {
118 	int	dummy;
119 };
120 #endif
121 /*
122  * MPSAFE
123  */
124 /* ARGSUSED */
125 int
126 getdtablesize(td, uap)
127 	struct thread *td;
128 	struct getdtablesize_args *uap;
129 {
130 	struct proc *p = td->td_proc;
131 
132 	mtx_lock(&Giant);
133 	td->td_retval[0] =
134 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
135 	mtx_unlock(&Giant);
136 	return (0);
137 }
138 
139 /*
140  * Duplicate a file descriptor to a particular value.
141  *
142  * note: keep in mind that a potential race condition exists when closing
143  * descriptors from a shared descriptor table (via rfork).
144  */
145 #ifndef _SYS_SYSPROTO_H_
146 struct dup2_args {
147 	u_int	from;
148 	u_int	to;
149 };
150 #endif
151 /*
152  * MPSAFE
153  */
154 /* ARGSUSED */
155 int
156 dup2(td, uap)
157 	struct thread *td;
158 	struct dup2_args *uap;
159 {
160 	struct proc *p = td->td_proc;
161 	register struct filedesc *fdp = td->td_proc->p_fd;
162 	register u_int old = uap->from, new = uap->to;
163 	int i, error;
164 
165 	mtx_lock(&Giant);
166 retry:
167 	if (old >= fdp->fd_nfiles ||
168 	    fdp->fd_ofiles[old] == NULL ||
169 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
170 	    new >= maxfilesperproc) {
171 		error = EBADF;
172 		goto done2;
173 	}
174 	if (old == new) {
175 		td->td_retval[0] = new;
176 		error = 0;
177 		goto done2;
178 	}
179 	if (new >= fdp->fd_nfiles) {
180 		if ((error = fdalloc(td, new, &i)))
181 			goto done2;
182 		if (new != i)
183 			panic("dup2: fdalloc");
184 		/*
185 		 * fdalloc() may block, retest everything.
186 		 */
187 		goto retry;
188 	}
189 	error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
190 done2:
191 	mtx_unlock(&Giant);
192 	return(error);
193 }
194 
195 /*
196  * Duplicate a file descriptor.
197  */
198 #ifndef _SYS_SYSPROTO_H_
199 struct dup_args {
200 	u_int	fd;
201 };
202 #endif
203 /*
204  * MPSAFE
205  */
206 /* ARGSUSED */
207 int
208 dup(td, uap)
209 	struct thread *td;
210 	struct dup_args *uap;
211 {
212 	register struct filedesc *fdp;
213 	u_int old;
214 	int new, error;
215 
216 	mtx_lock(&Giant);
217 	old = uap->fd;
218 	fdp = td->td_proc->p_fd;
219 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
220 		error = EBADF;
221 		goto done2;
222 	}
223 	if ((error = fdalloc(td, 0, &new)))
224 		goto done2;
225 	error = do_dup(fdp, (int)old, new, td->td_retval, td);
226 done2:
227 	mtx_unlock(&Giant);
228 	return (error);
229 }
230 
231 /*
232  * The file control system call.
233  */
234 #ifndef _SYS_SYSPROTO_H_
235 struct fcntl_args {
236 	int	fd;
237 	int	cmd;
238 	long	arg;
239 };
240 #endif
241 /*
242  * MPSAFE
243  */
244 /* ARGSUSED */
245 int
246 fcntl(td, uap)
247 	struct thread *td;
248 	register struct fcntl_args *uap;
249 {
250 	register struct proc *p = td->td_proc;
251 	register struct filedesc *fdp;
252 	register struct file *fp;
253 	register char *pop;
254 	struct vnode *vp;
255 	int i, tmp, error = 0, flg = F_POSIX;
256 	struct flock fl;
257 	u_int newmin;
258 
259 	mtx_lock(&Giant);
260 
261 	fdp = p->p_fd;
262 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
263 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
264 		error = EBADF;
265 		goto done2;
266 	}
267 	pop = &fdp->fd_ofileflags[uap->fd];
268 
269 	switch (uap->cmd) {
270 	case F_DUPFD:
271 		newmin = uap->arg;
272 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
273 		    newmin >= maxfilesperproc) {
274 			error = EINVAL;
275 			break;
276 		}
277 		if ((error = fdalloc(td, newmin, &i)))
278 			break;
279 		error = do_dup(fdp, uap->fd, i, td->td_retval, td);
280 		break;
281 
282 	case F_GETFD:
283 		td->td_retval[0] = *pop & 1;
284 		break;
285 
286 	case F_SETFD:
287 		*pop = (*pop &~ 1) | (uap->arg & 1);
288 		break;
289 
290 	case F_GETFL:
291 		td->td_retval[0] = OFLAGS(fp->f_flag);
292 		break;
293 
294 	case F_SETFL:
295 		fhold(fp);
296 		fp->f_flag &= ~FCNTLFLAGS;
297 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
298 		tmp = fp->f_flag & FNONBLOCK;
299 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
300 		if (error) {
301 			fdrop(fp, td);
302 			break;
303 		}
304 		tmp = fp->f_flag & FASYNC;
305 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
306 		if (!error) {
307 			fdrop(fp, td);
308 			break;
309 		}
310 		fp->f_flag &= ~FNONBLOCK;
311 		tmp = 0;
312 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
313 		fdrop(fp, td);
314 		break;
315 
316 	case F_GETOWN:
317 		fhold(fp);
318 		error = fo_ioctl(fp, FIOGETOWN, (caddr_t)td->td_retval, td);
319 		fdrop(fp, td);
320 		break;
321 
322 	case F_SETOWN:
323 		fhold(fp);
324 		error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td);
325 		fdrop(fp, td);
326 		break;
327 
328 	case F_SETLKW:
329 		flg |= F_WAIT;
330 		/* Fall into F_SETLK */
331 
332 	case F_SETLK:
333 		if (fp->f_type != DTYPE_VNODE) {
334 			error = EBADF;
335 			break;
336 		}
337 		vp = (struct vnode *)fp->f_data;
338 
339 		/*
340 		 * copyin/lockop may block
341 		 */
342 		fhold(fp);
343 		/* Copy in the lock structure */
344 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
345 		    sizeof(fl));
346 		if (error) {
347 			fdrop(fp, td);
348 			break;
349 		}
350 		if (fl.l_whence == SEEK_CUR) {
351 			if (fp->f_offset < 0 ||
352 			    (fl.l_start > 0 &&
353 			     fp->f_offset > OFF_MAX - fl.l_start)) {
354 				fdrop(fp, td);
355 				error = EOVERFLOW;
356 				break;
357 			}
358 			fl.l_start += fp->f_offset;
359 		}
360 
361 		switch (fl.l_type) {
362 		case F_RDLCK:
363 			if ((fp->f_flag & FREAD) == 0) {
364 				error = EBADF;
365 				break;
366 			}
367 			p->p_flag |= P_ADVLOCK;
368 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
369 			    &fl, flg);
370 			break;
371 		case F_WRLCK:
372 			if ((fp->f_flag & FWRITE) == 0) {
373 				error = EBADF;
374 				break;
375 			}
376 			p->p_flag |= P_ADVLOCK;
377 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
378 			    &fl, flg);
379 			break;
380 		case F_UNLCK:
381 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
382 				&fl, F_POSIX);
383 			break;
384 		default:
385 			error = EINVAL;
386 			break;
387 		}
388 		fdrop(fp, td);
389 		break;
390 
391 	case F_GETLK:
392 		if (fp->f_type != DTYPE_VNODE) {
393 			error = EBADF;
394 			break;
395 		}
396 		vp = (struct vnode *)fp->f_data;
397 		/*
398 		 * copyin/lockop may block
399 		 */
400 		fhold(fp);
401 		/* Copy in the lock structure */
402 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
403 		    sizeof(fl));
404 		if (error) {
405 			fdrop(fp, td);
406 			break;
407 		}
408 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
409 		    fl.l_type != F_UNLCK) {
410 			fdrop(fp, td);
411 			error = EINVAL;
412 			break;
413 		}
414 		if (fl.l_whence == SEEK_CUR) {
415 			if ((fl.l_start > 0 &&
416 			     fp->f_offset > OFF_MAX - fl.l_start) ||
417 			    (fl.l_start < 0 &&
418 			     fp->f_offset < OFF_MIN - fl.l_start)) {
419 				fdrop(fp, td);
420 				error = EOVERFLOW;
421 				break;
422 			}
423 			fl.l_start += fp->f_offset;
424 		}
425 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
426 			    &fl, F_POSIX);
427 		fdrop(fp, td);
428 		if (error == 0) {
429 			error = copyout((caddr_t)&fl,
430 				    (caddr_t)(intptr_t)uap->arg, sizeof(fl));
431 		}
432 		break;
433 	default:
434 		error = EINVAL;
435 		break;
436 	}
437 done2:
438 	mtx_unlock(&Giant);
439 	return (error);
440 }
441 
442 /*
443  * Common code for dup, dup2, and fcntl(F_DUPFD).
444  */
445 static int
446 do_dup(fdp, old, new, retval, td)
447 	register struct filedesc *fdp;
448 	register int old, new;
449 	register_t *retval;
450 	struct thread *td;
451 {
452 	struct file *fp;
453 	struct file *delfp;
454 
455 	/*
456 	 * Save info on the descriptor being overwritten.  We have
457 	 * to do the unmap now, but we cannot close it without
458 	 * introducing an ownership race for the slot.
459 	 */
460 	delfp = fdp->fd_ofiles[new];
461 #if 0
462 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
463 		(void) munmapfd(td, new);
464 #endif
465 
466 	/*
467 	 * Duplicate the source descriptor, update lastfile
468 	 */
469 	fp = fdp->fd_ofiles[old];
470 	fdp->fd_ofiles[new] = fp;
471 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
472 	fhold(fp);
473 	if (new > fdp->fd_lastfile)
474 		fdp->fd_lastfile = new;
475 	*retval = new;
476 
477 	/*
478 	 * If we dup'd over a valid file, we now own the reference to it
479 	 * and must dispose of it using closef() semantics (as if a
480 	 * close() were performed on it).
481 	 */
482 	if (delfp)
483 		(void) closef(delfp, td);
484 	return (0);
485 }
486 
487 /*
488  * If sigio is on the list associated with a process or process group,
489  * disable signalling from the device, remove sigio from the list and
490  * free sigio.
491  */
492 void
493 funsetown(sigio)
494 	struct sigio *sigio;
495 {
496 	int s;
497 
498 	if (sigio == NULL)
499 		return;
500 	s = splhigh();
501 	*(sigio->sio_myref) = NULL;
502 	splx(s);
503 	if (sigio->sio_pgid < 0) {
504 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
505 			     sigio, sio_pgsigio);
506 	} else /* if ((*sigiop)->sio_pgid > 0) */ {
507 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
508 			     sigio, sio_pgsigio);
509 	}
510 	crfree(sigio->sio_ucred);
511 	FREE(sigio, M_SIGIO);
512 }
513 
514 /* Free a list of sigio structures. */
515 void
516 funsetownlst(sigiolst)
517 	struct sigiolst *sigiolst;
518 {
519 	struct sigio *sigio;
520 
521 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
522 		funsetown(sigio);
523 }
524 
525 /*
526  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
527  *
528  * After permission checking, add a sigio structure to the sigio list for
529  * the process or process group.
530  */
531 int
532 fsetown(pgid, sigiop)
533 	pid_t pgid;
534 	struct sigio **sigiop;
535 {
536 	struct proc *proc;
537 	struct pgrp *pgrp;
538 	struct sigio *sigio;
539 	int s;
540 
541 	if (pgid == 0) {
542 		funsetown(*sigiop);
543 		return (0);
544 	}
545 	if (pgid > 0) {
546 		proc = pfind(pgid);
547 		if (proc == NULL)
548 			return (ESRCH);
549 
550 		/*
551 		 * Policy - Don't allow a process to FSETOWN a process
552 		 * in another session.
553 		 *
554 		 * Remove this test to allow maximum flexibility or
555 		 * restrict FSETOWN to the current process or process
556 		 * group for maximum safety.
557 		 */
558 		if (proc->p_session != curthread->td_proc->p_session) {
559 			PROC_UNLOCK(proc);
560 			return (EPERM);
561 		}
562 		PROC_UNLOCK(proc);
563 
564 		pgrp = NULL;
565 	} else /* if (pgid < 0) */ {
566 		pgrp = pgfind(-pgid);
567 		if (pgrp == NULL)
568 			return (ESRCH);
569 
570 		/*
571 		 * Policy - Don't allow a process to FSETOWN a process
572 		 * in another session.
573 		 *
574 		 * Remove this test to allow maximum flexibility or
575 		 * restrict FSETOWN to the current process or process
576 		 * group for maximum safety.
577 		 */
578 		if (pgrp->pg_session != curthread->td_proc->p_session)
579 			return (EPERM);
580 
581 		proc = NULL;
582 	}
583 	funsetown(*sigiop);
584 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
585 	if (pgid > 0) {
586 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
587 		sigio->sio_proc = proc;
588 	} else {
589 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
590 		sigio->sio_pgrp = pgrp;
591 	}
592 	sigio->sio_pgid = pgid;
593 	sigio->sio_ucred = crhold(curthread->td_proc->p_ucred);
594 	sigio->sio_myref = sigiop;
595 	s = splhigh();
596 	*sigiop = sigio;
597 	splx(s);
598 	return (0);
599 }
600 
601 /*
602  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
603  */
604 pid_t
605 fgetown(sigio)
606 	struct sigio *sigio;
607 {
608 	return (sigio != NULL ? sigio->sio_pgid : 0);
609 }
610 
611 /*
612  * Close a file descriptor.
613  */
614 #ifndef _SYS_SYSPROTO_H_
615 struct close_args {
616         int     fd;
617 };
618 #endif
619 /*
620  * MPSAFE
621  */
622 /* ARGSUSED */
623 int
624 close(td, uap)
625 	struct thread *td;
626 	struct close_args *uap;
627 {
628 	register struct filedesc *fdp;
629 	register struct file *fp;
630 	register int fd = uap->fd;
631 	int error = 0;
632 
633 	mtx_lock(&Giant);
634 	fdp = td->td_proc->p_fd;
635 	if ((unsigned)fd >= fdp->fd_nfiles ||
636 	    (fp = fdp->fd_ofiles[fd]) == NULL) {
637 		error = EBADF;
638 		goto done2;
639 	}
640 #if 0
641 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
642 		(void) munmapfd(td, fd);
643 #endif
644 	fdp->fd_ofiles[fd] = NULL;
645 	fdp->fd_ofileflags[fd] = 0;
646 
647 	/*
648 	 * we now hold the fp reference that used to be owned by the descriptor
649 	 * array.
650 	 */
651 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
652 		fdp->fd_lastfile--;
653 	if (fd < fdp->fd_freefile)
654 		fdp->fd_freefile = fd;
655 	if (fd < fdp->fd_knlistsize)
656 		knote_fdclose(td, fd);
657 	error = closef(fp, td);
658 done2:
659 	mtx_unlock(&Giant);
660 	return(error);
661 }
662 
663 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
664 /*
665  * Return status information about a file descriptor.
666  */
667 #ifndef _SYS_SYSPROTO_H_
668 struct ofstat_args {
669 	int	fd;
670 	struct	ostat *sb;
671 };
672 #endif
673 /*
674  * MPSAFE
675  */
676 /* ARGSUSED */
677 int
678 ofstat(td, uap)
679 	struct thread *td;
680 	register struct ofstat_args *uap;
681 {
682 	struct file *fp;
683 	struct stat ub;
684 	struct ostat oub;
685 	int error;
686 
687 	mtx_lock(&Giant);
688 	if ((error = fget(td, uap->fd, &fp)) != 0)
689 		goto done2;
690 	error = fo_stat(fp, &ub, td);
691 	if (error == 0) {
692 		cvtstat(&ub, &oub);
693 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
694 	}
695 	fdrop(fp, td);
696 done2:
697 	mtx_unlock(&Giant);
698 	return (error);
699 }
700 #endif /* COMPAT_43 || COMPAT_SUNOS */
701 
702 /*
703  * Return status information about a file descriptor.
704  */
705 #ifndef _SYS_SYSPROTO_H_
706 struct fstat_args {
707 	int	fd;
708 	struct	stat *sb;
709 };
710 #endif
711 /*
712  * MPSAFE
713  */
714 /* ARGSUSED */
715 int
716 fstat(td, uap)
717 	struct thread *td;
718 	struct fstat_args *uap;
719 {
720 	struct file *fp;
721 	struct stat ub;
722 	int error;
723 
724 	mtx_lock(&Giant);
725 	if ((error = fget(td, uap->fd, &fp)) != 0)
726 		goto done2;
727 	error = fo_stat(fp, &ub, td);
728 	if (error == 0)
729 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
730 	fdrop(fp, td);
731 done2:
732 	mtx_unlock(&Giant);
733 	return (error);
734 }
735 
736 /*
737  * Return status information about a file descriptor.
738  */
739 #ifndef _SYS_SYSPROTO_H_
740 struct nfstat_args {
741 	int	fd;
742 	struct	nstat *sb;
743 };
744 #endif
745 /*
746  * MPSAFE
747  */
748 /* ARGSUSED */
749 int
750 nfstat(td, uap)
751 	struct thread *td;
752 	register struct nfstat_args *uap;
753 {
754 	struct file *fp;
755 	struct stat ub;
756 	struct nstat nub;
757 	int error;
758 
759 	mtx_lock(&Giant);
760 	if ((error = fget(td, uap->fd, &fp)) != 0)
761 		goto done2;
762 	error = fo_stat(fp, &ub, td);
763 	if (error == 0) {
764 		cvtnstat(&ub, &nub);
765 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
766 	}
767 	fdrop(fp, td);
768 done2:
769 	mtx_unlock(&Giant);
770 	return (error);
771 }
772 
773 /*
774  * Return pathconf information about a file descriptor.
775  */
776 #ifndef _SYS_SYSPROTO_H_
777 struct fpathconf_args {
778 	int	fd;
779 	int	name;
780 };
781 #endif
782 /*
783  * MPSAFE
784  */
785 /* ARGSUSED */
786 int
787 fpathconf(td, uap)
788 	struct thread *td;
789 	register struct fpathconf_args *uap;
790 {
791 	struct file *fp;
792 	struct vnode *vp;
793 	int error;
794 
795 	mtx_lock(&Giant);
796 	if ((error = fget(td, uap->fd, &fp)) != 0)
797 		goto done2;
798 
799 	switch (fp->f_type) {
800 	case DTYPE_PIPE:
801 	case DTYPE_SOCKET:
802 		if (uap->name != _PC_PIPE_BUF) {
803 			error = EINVAL;
804 			goto done2;
805 		}
806 		td->td_retval[0] = PIPE_BUF;
807 		error = 0;
808 		break;
809 	case DTYPE_FIFO:
810 	case DTYPE_VNODE:
811 		vp = (struct vnode *)fp->f_data;
812 		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
813 		break;
814 	default:
815 		error = EOPNOTSUPP;
816 		break;
817 	}
818 	fdrop(fp, td);
819 done2:
820 	mtx_unlock(&Giant);
821 	return(error);
822 }
823 
824 /*
825  * Allocate a file descriptor for the process.
826  */
827 static int fdexpand;
828 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
829 
830 int
831 fdalloc(td, want, result)
832 	struct thread *td;
833 	int want;
834 	int *result;
835 {
836 	struct proc *p = td->td_proc;
837 	register struct filedesc *fdp = td->td_proc->p_fd;
838 	register int i;
839 	int lim, last, nfiles;
840 	struct file **newofile;
841 	char *newofileflags;
842 
843 	/*
844 	 * Search for a free descriptor starting at the higher
845 	 * of want or fd_freefile.  If that fails, consider
846 	 * expanding the ofile array.
847 	 */
848 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
849 	for (;;) {
850 		last = min(fdp->fd_nfiles, lim);
851 		if ((i = want) < fdp->fd_freefile)
852 			i = fdp->fd_freefile;
853 		for (; i < last; i++) {
854 			if (fdp->fd_ofiles[i] == NULL) {
855 				fdp->fd_ofileflags[i] = 0;
856 				if (i > fdp->fd_lastfile)
857 					fdp->fd_lastfile = i;
858 				if (want <= fdp->fd_freefile)
859 					fdp->fd_freefile = i;
860 				*result = i;
861 				return (0);
862 			}
863 		}
864 
865 		/*
866 		 * No space in current array.  Expand?
867 		 */
868 		if (fdp->fd_nfiles >= lim)
869 			return (EMFILE);
870 		if (fdp->fd_nfiles < NDEXTENT)
871 			nfiles = NDEXTENT;
872 		else
873 			nfiles = 2 * fdp->fd_nfiles;
874 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
875 		    M_FILEDESC, M_WAITOK);
876 
877 		/*
878 		 * deal with file-table extend race that might have occured
879 		 * when malloc was blocked.
880 		 */
881 		if (fdp->fd_nfiles >= nfiles) {
882 			FREE(newofile, M_FILEDESC);
883 			continue;
884 		}
885 		newofileflags = (char *) &newofile[nfiles];
886 		/*
887 		 * Copy the existing ofile and ofileflags arrays
888 		 * and zero the new portion of each array.
889 		 */
890 		bcopy(fdp->fd_ofiles, newofile,
891 			(i = sizeof(struct file *) * fdp->fd_nfiles));
892 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
893 		bcopy(fdp->fd_ofileflags, newofileflags,
894 			(i = sizeof(char) * fdp->fd_nfiles));
895 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
896 		if (fdp->fd_nfiles > NDFILE)
897 			FREE(fdp->fd_ofiles, M_FILEDESC);
898 		fdp->fd_ofiles = newofile;
899 		fdp->fd_ofileflags = newofileflags;
900 		fdp->fd_nfiles = nfiles;
901 		fdexpand++;
902 	}
903 	return (0);
904 }
905 
906 /*
907  * Check to see whether n user file descriptors
908  * are available to the process p.
909  */
910 int
911 fdavail(td, n)
912 	struct thread *td;
913 	register int n;
914 {
915 	struct proc *p = td->td_proc;
916 	register struct filedesc *fdp = td->td_proc->p_fd;
917 	register struct file **fpp;
918 	register int i, lim, last;
919 
920 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
921 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
922 		return (1);
923 
924 	last = min(fdp->fd_nfiles, lim);
925 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
926 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
927 		if (*fpp == NULL && --n <= 0)
928 			return (1);
929 	}
930 	return (0);
931 }
932 
933 /*
934  * Create a new open file structure and allocate
935  * a file decriptor for the process that refers to it.
936  */
937 int
938 falloc(td, resultfp, resultfd)
939 	register struct thread *td;
940 	struct file **resultfp;
941 	int *resultfd;
942 {
943 	struct proc *p = td->td_proc;
944 	register struct file *fp, *fq;
945 	int error, i;
946 
947 	if (nfiles >= maxfiles) {
948 		tablefull("file");
949 		return (ENFILE);
950 	}
951 	/*
952 	 * Allocate a new file descriptor.
953 	 * If the process has file descriptor zero open, add to the list
954 	 * of open files at that point, otherwise put it at the front of
955 	 * the list of open files.
956 	 */
957 	nfiles++;
958 	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK | M_ZERO);
959 
960 	/*
961 	 * wait until after malloc (which may have blocked) returns before
962 	 * allocating the slot, else a race might have shrunk it if we had
963 	 * allocated it before the malloc.
964 	 */
965 	if ((error = fdalloc(td, 0, &i))) {
966 		nfiles--;
967 		FREE(fp, M_FILE);
968 		return (error);
969 	}
970 	fp->f_count = 1;
971 	fp->f_cred = crhold(p->p_ucred);
972 	fp->f_ops = &badfileops;
973 	fp->f_seqcount = 1;
974 	if ((fq = p->p_fd->fd_ofiles[0])) {
975 		LIST_INSERT_AFTER(fq, fp, f_list);
976 	} else {
977 		LIST_INSERT_HEAD(&filehead, fp, f_list);
978 	}
979 	p->p_fd->fd_ofiles[i] = fp;
980 	if (resultfp)
981 		*resultfp = fp;
982 	if (resultfd)
983 		*resultfd = i;
984 	return (0);
985 }
986 
987 /*
988  * Free a file descriptor.
989  */
990 void
991 ffree(fp)
992 	register struct file *fp;
993 {
994 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
995 	LIST_REMOVE(fp, f_list);
996 	crfree(fp->f_cred);
997 	nfiles--;
998 	FREE(fp, M_FILE);
999 }
1000 
1001 /*
1002  * Build a new filedesc structure.
1003  */
1004 struct filedesc *
1005 fdinit(td)
1006 	struct thread *td;
1007 {
1008 	register struct filedesc0 *newfdp;
1009 	register struct filedesc *fdp = td->td_proc->p_fd;
1010 
1011 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1012 	    M_FILEDESC, M_WAITOK | M_ZERO);
1013 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1014 	if (newfdp->fd_fd.fd_cdir)
1015 		VREF(newfdp->fd_fd.fd_cdir);
1016 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1017 	if (newfdp->fd_fd.fd_rdir)
1018 		VREF(newfdp->fd_fd.fd_rdir);
1019 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1020 	if (newfdp->fd_fd.fd_jdir)
1021 		VREF(newfdp->fd_fd.fd_jdir);
1022 
1023 	/* Create the file descriptor table. */
1024 	newfdp->fd_fd.fd_refcnt = 1;
1025 	newfdp->fd_fd.fd_cmask = cmask;
1026 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1027 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1028 	newfdp->fd_fd.fd_nfiles = NDFILE;
1029 	newfdp->fd_fd.fd_knlistsize = -1;
1030 
1031 	return (&newfdp->fd_fd);
1032 }
1033 
1034 /*
1035  * Share a filedesc structure.
1036  */
1037 struct filedesc *
1038 fdshare(p)
1039 	struct proc *p;
1040 {
1041 	p->p_fd->fd_refcnt++;
1042 	return (p->p_fd);
1043 }
1044 
1045 /*
1046  * Copy a filedesc structure.
1047  */
1048 struct filedesc *
1049 fdcopy(td)
1050 	struct thread *td;
1051 {
1052 	register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1053 	register struct file **fpp;
1054 	register int i;
1055 
1056 	/* Certain daemons might not have file descriptors. */
1057 	if (fdp == NULL)
1058 		return (NULL);
1059 
1060 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1061 	    M_FILEDESC, M_WAITOK);
1062 	bcopy(fdp, newfdp, sizeof(struct filedesc));
1063 	if (newfdp->fd_cdir)
1064 		VREF(newfdp->fd_cdir);
1065 	if (newfdp->fd_rdir)
1066 		VREF(newfdp->fd_rdir);
1067 	if (newfdp->fd_jdir)
1068 		VREF(newfdp->fd_jdir);
1069 	newfdp->fd_refcnt = 1;
1070 
1071 	/*
1072 	 * If the number of open files fits in the internal arrays
1073 	 * of the open file structure, use them, otherwise allocate
1074 	 * additional memory for the number of descriptors currently
1075 	 * in use.
1076 	 */
1077 	if (newfdp->fd_lastfile < NDFILE) {
1078 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1079 		newfdp->fd_ofileflags =
1080 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1081 		i = NDFILE;
1082 	} else {
1083 		/*
1084 		 * Compute the smallest multiple of NDEXTENT needed
1085 		 * for the file descriptors currently in use,
1086 		 * allowing the table to shrink.
1087 		 */
1088 		i = newfdp->fd_nfiles;
1089 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1090 			i /= 2;
1091 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1092 		    M_FILEDESC, M_WAITOK);
1093 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1094 	}
1095 	newfdp->fd_nfiles = i;
1096 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1097 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1098 
1099 	/*
1100 	 * kq descriptors cannot be copied.
1101 	 */
1102 	if (newfdp->fd_knlistsize != -1) {
1103 		fpp = newfdp->fd_ofiles;
1104 		for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1105 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE)
1106 				*fpp = NULL;
1107 		}
1108 		newfdp->fd_knlist = NULL;
1109 		newfdp->fd_knlistsize = -1;
1110 		newfdp->fd_knhash = NULL;
1111 		newfdp->fd_knhashmask = 0;
1112 	}
1113 
1114 	fpp = newfdp->fd_ofiles;
1115 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1116 		if (*fpp != NULL)
1117 			fhold(*fpp);
1118 	}
1119 	return (newfdp);
1120 }
1121 
1122 /*
1123  * Release a filedesc structure.
1124  */
1125 void
1126 fdfree(td)
1127 	struct thread *td;
1128 {
1129 	register struct filedesc *fdp = td->td_proc->p_fd;
1130 	struct file **fpp;
1131 	register int i;
1132 
1133 	/* Certain daemons might not have file descriptors. */
1134 	if (fdp == NULL)
1135 		return;
1136 
1137 	if (--fdp->fd_refcnt > 0)
1138 		return;
1139 	/*
1140 	 * we are the last reference to the structure, we can
1141 	 * safely assume it will not change out from under us.
1142 	 */
1143 	fpp = fdp->fd_ofiles;
1144 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1145 		if (*fpp)
1146 			(void) closef(*fpp, td);
1147 	}
1148 	if (fdp->fd_nfiles > NDFILE)
1149 		FREE(fdp->fd_ofiles, M_FILEDESC);
1150 	if (fdp->fd_cdir)
1151 		vrele(fdp->fd_cdir);
1152 	if (fdp->fd_rdir)
1153 		vrele(fdp->fd_rdir);
1154 	if (fdp->fd_jdir)
1155 		vrele(fdp->fd_jdir);
1156 	if (fdp->fd_knlist)
1157 		FREE(fdp->fd_knlist, M_KQUEUE);
1158 	if (fdp->fd_knhash)
1159 		FREE(fdp->fd_knhash, M_KQUEUE);
1160 	FREE(fdp, M_FILEDESC);
1161 }
1162 
1163 /*
1164  * For setugid programs, we don't want to people to use that setugidness
1165  * to generate error messages which write to a file which otherwise would
1166  * otherwise be off-limits to the process.
1167  *
1168  * This is a gross hack to plug the hole.  A better solution would involve
1169  * a special vop or other form of generalized access control mechanism.  We
1170  * go ahead and just reject all procfs file systems accesses as dangerous.
1171  *
1172  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1173  * sufficient.  We also don't for check setugidness since we know we are.
1174  */
1175 static int
1176 is_unsafe(struct file *fp)
1177 {
1178 	if (fp->f_type == DTYPE_VNODE &&
1179 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1180 		return (1);
1181 	return (0);
1182 }
1183 
1184 /*
1185  * Make this setguid thing safe, if at all possible.
1186  */
1187 void
1188 setugidsafety(td)
1189 	struct thread *td;
1190 {
1191 	struct filedesc *fdp = td->td_proc->p_fd;
1192 	register int i;
1193 
1194 	/* Certain daemons might not have file descriptors. */
1195 	if (fdp == NULL)
1196 		return;
1197 
1198 	/*
1199 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1200 	 * we are blocked in a close.  Be careful!
1201 	 */
1202 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1203 		if (i > 2)
1204 			break;
1205 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1206 			struct file *fp;
1207 
1208 #if 0
1209 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1210 				(void) munmapfd(td, i);
1211 #endif
1212 			if (i < fdp->fd_knlistsize)
1213 				knote_fdclose(td, i);
1214 			/*
1215 			 * NULL-out descriptor prior to close to avoid
1216 			 * a race while close blocks.
1217 			 */
1218 			fp = fdp->fd_ofiles[i];
1219 			fdp->fd_ofiles[i] = NULL;
1220 			fdp->fd_ofileflags[i] = 0;
1221 			if (i < fdp->fd_freefile)
1222 				fdp->fd_freefile = i;
1223 			(void) closef(fp, td);
1224 		}
1225 	}
1226 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1227 		fdp->fd_lastfile--;
1228 }
1229 
1230 /*
1231  * Close any files on exec?
1232  */
1233 void
1234 fdcloseexec(td)
1235 	struct thread *td;
1236 {
1237 	struct filedesc *fdp = td->td_proc->p_fd;
1238 	register int i;
1239 
1240 	/* Certain daemons might not have file descriptors. */
1241 	if (fdp == NULL)
1242 		return;
1243 
1244 	/*
1245 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1246 	 * may block and rip them out from under us.
1247 	 */
1248 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1249 		if (fdp->fd_ofiles[i] != NULL &&
1250 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1251 			struct file *fp;
1252 
1253 #if 0
1254 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1255 				(void) munmapfd(td, i);
1256 #endif
1257 			if (i < fdp->fd_knlistsize)
1258 				knote_fdclose(td, i);
1259 			/*
1260 			 * NULL-out descriptor prior to close to avoid
1261 			 * a race while close blocks.
1262 			 */
1263 			fp = fdp->fd_ofiles[i];
1264 			fdp->fd_ofiles[i] = NULL;
1265 			fdp->fd_ofileflags[i] = 0;
1266 			if (i < fdp->fd_freefile)
1267 				fdp->fd_freefile = i;
1268 			(void) closef(fp, td);
1269 		}
1270 	}
1271 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1272 		fdp->fd_lastfile--;
1273 }
1274 
1275 /*
1276  * Internal form of close.
1277  * Decrement reference count on file structure.
1278  * Note: td may be NULL when closing a file
1279  * that was being passed in a message.
1280  */
1281 int
1282 closef(fp, td)
1283 	register struct file *fp;
1284 	register struct thread *td;
1285 {
1286 	struct vnode *vp;
1287 	struct flock lf;
1288 
1289 	if (fp == NULL)
1290 		return (0);
1291 	/*
1292 	 * POSIX record locking dictates that any close releases ALL
1293 	 * locks owned by this process.  This is handled by setting
1294 	 * a flag in the unlock to free ONLY locks obeying POSIX
1295 	 * semantics, and not to free BSD-style file locks.
1296 	 * If the descriptor was in a message, POSIX-style locks
1297 	 * aren't passed with the descriptor.
1298 	 */
1299 	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1300 	    fp->f_type == DTYPE_VNODE) {
1301 		lf.l_whence = SEEK_SET;
1302 		lf.l_start = 0;
1303 		lf.l_len = 0;
1304 		lf.l_type = F_UNLCK;
1305 		vp = (struct vnode *)fp->f_data;
1306 		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1307 		    F_UNLCK, &lf, F_POSIX);
1308 	}
1309 	return (fdrop(fp, td));
1310 }
1311 
1312 /*
1313  * Extract the file pointer associated with the specified descriptor for
1314  * the current user process.  If no error occured 0 is returned, *fpp
1315  * will be set to the file pointer, and the file pointer's ref count
1316  * will be bumped.  Use fdrop() to drop it.  If an error occured the
1317  * non-zero error is returned and *fpp is set to NULL.
1318  *
1319  * This routine requires Giant for the moment.  Once enough of the
1320  * system is converted over to this and other encapsulated APIs we
1321  * will be able to mutex it and call it without Giant.
1322  */
1323 static __inline
1324 int
1325 _fget(struct thread *td, int fd, struct file **fpp, int flags)
1326 {
1327 	struct filedesc *fdp;
1328 	struct file *fp;
1329 
1330 	GIANT_REQUIRED;
1331 	fdp = td->td_proc->p_fd;
1332 	*fpp = NULL;
1333 	if ((u_int)fd >= fdp->fd_nfiles)
1334 		return(EBADF);
1335 	if ((fp = fdp->fd_ofiles[fd]) == NULL)
1336 		return(EBADF);
1337 
1338 	/*
1339 	 * Note: FREAD failures returns EBADF to maintain backwards
1340 	 * compatibility with what routines returned before.
1341 	 *
1342 	 * Only one flag, or 0, may be specified.
1343 	 */
1344 	if (flags == FREAD && (fp->f_flag & FREAD) == 0)
1345 		return(EBADF);
1346 	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0)
1347 		return(EINVAL);
1348 	++fp->f_count;
1349 	*fpp = fp;
1350 	return(0);
1351 }
1352 
1353 int
1354 fget(struct thread *td, int fd, struct file **fpp)
1355 {
1356     return(_fget(td, fd, fpp, 0));
1357 }
1358 
1359 int
1360 fget_read(struct thread *td, int fd, struct file **fpp)
1361 {
1362     return(_fget(td, fd, fpp, FREAD));
1363 }
1364 
1365 int
1366 fget_write(struct thread *td, int fd, struct file **fpp)
1367 {
1368     return(_fget(td, fd, fpp, FWRITE));
1369 }
1370 
1371 /*
1372  * Like fget() but loads the underlying vnode, or returns an error if
1373  * the descriptor does not represent a vnode.  Note that pipes use vnodes
1374  * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1375  * error).  The returned vnode will be vref()d.
1376  */
1377 
1378 static __inline
1379 int
1380 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1381 {
1382 	struct filedesc *fdp;
1383 	struct file *fp;
1384 
1385 	GIANT_REQUIRED;
1386 	fdp = td->td_proc->p_fd;
1387 	*vpp = NULL;
1388 	if ((u_int)fd >= fdp->fd_nfiles)
1389 		return(EBADF);
1390 	if ((fp = fdp->fd_ofiles[fd]) == NULL)
1391 		return(EBADF);
1392 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
1393 		return(EINVAL);
1394 	if (fp->f_data == NULL)
1395 		return(EINVAL);
1396 
1397 	/*
1398 	 * Note: FREAD failures returns EBADF to maintain backwards
1399 	 * compatibility with what routines returned before.
1400 	 *
1401 	 * Only one flag, or 0, may be specified.
1402 	 */
1403 	if (flags == FREAD && (fp->f_flag & FREAD) == 0)
1404 		return(EBADF);
1405 	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0)
1406 		return(EINVAL);
1407 	*vpp = (struct vnode *)fp->f_data;
1408 	vref(*vpp);
1409 	return(0);
1410 }
1411 
1412 int
1413 fgetvp(struct thread *td, int fd, struct vnode **vpp)
1414 {
1415 	return(_fgetvp(td, fd, vpp, 0));
1416 }
1417 
1418 int
1419 fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1420 {
1421 	return(_fgetvp(td, fd, vpp, FREAD));
1422 }
1423 
1424 int
1425 fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1426 {
1427 	return(_fgetvp(td, fd, vpp, FWRITE));
1428 }
1429 
1430 /*
1431  * Like fget() but loads the underlying socket, or returns an error if
1432  * the descriptor does not represent a socket.
1433  *
1434  * We bump the ref count on the returned socket.  XXX Also obtain the SX lock in
1435  * the future.
1436  */
1437 int
1438 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1439 {
1440 	struct filedesc *fdp;
1441 	struct file *fp;
1442 	struct socket *so;
1443 
1444 	GIANT_REQUIRED;
1445 	fdp = td->td_proc->p_fd;
1446 	*spp = NULL;
1447 	if (fflagp)
1448 		*fflagp = 0;
1449 	if ((u_int)fd >= fdp->fd_nfiles)
1450 		return(EBADF);
1451 	if ((fp = fdp->fd_ofiles[fd]) == NULL)
1452 		return(EBADF);
1453 	if (fp->f_type != DTYPE_SOCKET)
1454 		return(ENOTSOCK);
1455 	if (fp->f_data == NULL)
1456 		return(EINVAL);
1457 	so = (struct socket *)fp->f_data;
1458 	if (fflagp)
1459 		*fflagp = fp->f_flag;
1460 	soref(so);
1461 	*spp = so;
1462 	return(0);
1463 }
1464 
1465 /*
1466  * Drop the reference count on the the socket and XXX release the SX lock in
1467  * the future.  The last reference closes the socket.
1468  */
1469 void
1470 fputsock(struct socket *so)
1471 {
1472 	sorele(so);
1473 }
1474 
1475 int
1476 fdrop(fp, td)
1477 	struct file *fp;
1478 	struct thread *td;
1479 {
1480 	struct flock lf;
1481 	struct vnode *vp;
1482 	int error;
1483 
1484 	if (--fp->f_count > 0)
1485 		return (0);
1486 	if (fp->f_count < 0)
1487 		panic("fdrop: count < 0");
1488 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1489 		lf.l_whence = SEEK_SET;
1490 		lf.l_start = 0;
1491 		lf.l_len = 0;
1492 		lf.l_type = F_UNLCK;
1493 		vp = (struct vnode *)fp->f_data;
1494 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1495 	}
1496 	if (fp->f_ops != &badfileops)
1497 		error = fo_close(fp, td);
1498 	else
1499 		error = 0;
1500 	ffree(fp);
1501 	return (error);
1502 }
1503 
1504 /*
1505  * Apply an advisory lock on a file descriptor.
1506  *
1507  * Just attempt to get a record lock of the requested type on
1508  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1509  */
1510 #ifndef _SYS_SYSPROTO_H_
1511 struct flock_args {
1512 	int	fd;
1513 	int	how;
1514 };
1515 #endif
1516 /*
1517  * MPSAFE
1518  */
1519 /* ARGSUSED */
1520 int
1521 flock(td, uap)
1522 	struct thread *td;
1523 	register struct flock_args *uap;
1524 {
1525 	register struct filedesc *fdp = td->td_proc->p_fd;
1526 	register struct file *fp;
1527 	struct vnode *vp;
1528 	struct flock lf;
1529 	int error;
1530 
1531 	mtx_lock(&Giant);
1532 
1533 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1534 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
1535 		error = EBADF;
1536 		goto done2;
1537 	}
1538 	if (fp->f_type != DTYPE_VNODE) {
1539 		error = EOPNOTSUPP;
1540 		goto done2;
1541 	}
1542 	vp = (struct vnode *)fp->f_data;
1543 	lf.l_whence = SEEK_SET;
1544 	lf.l_start = 0;
1545 	lf.l_len = 0;
1546 	if (uap->how & LOCK_UN) {
1547 		lf.l_type = F_UNLCK;
1548 		fp->f_flag &= ~FHASLOCK;
1549 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1550 		goto done2;
1551 	}
1552 	if (uap->how & LOCK_EX)
1553 		lf.l_type = F_WRLCK;
1554 	else if (uap->how & LOCK_SH)
1555 		lf.l_type = F_RDLCK;
1556 	else {
1557 		error = EBADF;
1558 		goto done2;
1559 	}
1560 	fp->f_flag |= FHASLOCK;
1561 	if (uap->how & LOCK_NB)
1562 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK);
1563 	else
1564 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
1565 done2:
1566 	mtx_unlock(&Giant);
1567 	return (error);
1568 }
1569 
1570 /*
1571  * File Descriptor pseudo-device driver (/dev/fd/).
1572  *
1573  * Opening minor device N dup()s the file (if any) connected to file
1574  * descriptor N belonging to the calling process.  Note that this driver
1575  * consists of only the ``open()'' routine, because all subsequent
1576  * references to this file will be direct to the other driver.
1577  */
1578 /* ARGSUSED */
1579 static int
1580 fdopen(dev, mode, type, td)
1581 	dev_t dev;
1582 	int mode, type;
1583 	struct thread *td;
1584 {
1585 
1586 	/*
1587 	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1588 	 * the file descriptor being sought for duplication. The error
1589 	 * return ensures that the vnode for this device will be released
1590 	 * by vn_open. Open will detect this special error and take the
1591 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1592 	 * will simply report the error.
1593 	 */
1594 	td->td_dupfd = dev2unit(dev);
1595 	return (ENODEV);
1596 }
1597 
1598 /*
1599  * Duplicate the specified descriptor to a free descriptor.
1600  */
1601 int
1602 dupfdopen(td, fdp, indx, dfd, mode, error)
1603 	struct thread *td;
1604 	struct filedesc *fdp;
1605 	int indx, dfd;
1606 	int mode;
1607 	int error;
1608 {
1609 	register struct file *wfp;
1610 	struct file *fp;
1611 
1612 	/*
1613 	 * If the to-be-dup'd fd number is greater than the allowed number
1614 	 * of file descriptors, or the fd to be dup'd has already been
1615 	 * closed, then reject.
1616 	 */
1617 	if ((u_int)dfd >= fdp->fd_nfiles ||
1618 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1619 		return (EBADF);
1620 	}
1621 
1622 	/*
1623 	 * There are two cases of interest here.
1624 	 *
1625 	 * For ENODEV simply dup (dfd) to file descriptor
1626 	 * (indx) and return.
1627 	 *
1628 	 * For ENXIO steal away the file structure from (dfd) and
1629 	 * store it in (indx).  (dfd) is effectively closed by
1630 	 * this operation.
1631 	 *
1632 	 * Any other error code is just returned.
1633 	 */
1634 	switch (error) {
1635 	case ENODEV:
1636 		/*
1637 		 * Check that the mode the file is being opened for is a
1638 		 * subset of the mode of the existing descriptor.
1639 		 */
1640 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1641 			return (EACCES);
1642 		fp = fdp->fd_ofiles[indx];
1643 #if 0
1644 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1645 			(void) munmapfd(td, indx);
1646 #endif
1647 		fdp->fd_ofiles[indx] = wfp;
1648 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1649 		fhold(wfp);
1650 		if (indx > fdp->fd_lastfile)
1651 			fdp->fd_lastfile = indx;
1652 		/*
1653 		 * we now own the reference to fp that the ofiles[] array
1654 		 * used to own.  Release it.
1655 		 */
1656 		if (fp)
1657 			fdrop(fp, td);
1658 		return (0);
1659 
1660 	case ENXIO:
1661 		/*
1662 		 * Steal away the file pointer from dfd, and stuff it into indx.
1663 		 */
1664 		fp = fdp->fd_ofiles[indx];
1665 #if 0
1666 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1667 			(void) munmapfd(td, indx);
1668 #endif
1669 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1670 		fdp->fd_ofiles[dfd] = NULL;
1671 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1672 		fdp->fd_ofileflags[dfd] = 0;
1673 
1674 		/*
1675 		 * we now own the reference to fp that the ofiles[] array
1676 		 * used to own.  Release it.
1677 		 */
1678 		if (fp)
1679 			fdrop(fp, td);
1680 		/*
1681 		 * Complete the clean up of the filedesc structure by
1682 		 * recomputing the various hints.
1683 		 */
1684 		if (indx > fdp->fd_lastfile) {
1685 			fdp->fd_lastfile = indx;
1686 		} else {
1687 			while (fdp->fd_lastfile > 0 &&
1688 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1689 				fdp->fd_lastfile--;
1690 			}
1691 			if (dfd < fdp->fd_freefile)
1692 				fdp->fd_freefile = dfd;
1693 		}
1694 		return (0);
1695 
1696 	default:
1697 		return (error);
1698 	}
1699 	/* NOTREACHED */
1700 }
1701 
1702 /*
1703  * Get file structures.
1704  */
1705 static int
1706 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1707 {
1708 	int error;
1709 	struct file *fp;
1710 
1711 	if (!req->oldptr) {
1712 		/*
1713 		 * overestimate by 10 files
1714 		 */
1715 		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1716 				(nfiles + 10) * sizeof(struct file)));
1717 	}
1718 
1719 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1720 	if (error)
1721 		return (error);
1722 
1723 	/*
1724 	 * followed by an array of file structures
1725 	 */
1726 	LIST_FOREACH(fp, &filehead, f_list) {
1727 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1728 		if (error)
1729 			return (error);
1730 	}
1731 	return (0);
1732 }
1733 
1734 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1735     0, 0, sysctl_kern_file, "S,file", "Entire file table");
1736 
1737 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1738     &maxfilesperproc, 0, "Maximum files allowed open per process");
1739 
1740 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1741     &maxfiles, 0, "Maximum number of files");
1742 
1743 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
1744     &nfiles, 0, "System-wide number of open files");
1745 
1746 static void
1747 fildesc_drvinit(void *unused)
1748 {
1749 	dev_t dev;
1750 
1751 	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
1752 	make_dev_alias(dev, "stdin");
1753 	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
1754 	make_dev_alias(dev, "stdout");
1755 	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
1756 	make_dev_alias(dev, "stderr");
1757 	if (!devfs_present) {
1758 		int fd;
1759 
1760 		for (fd = 3; fd < NUMFDESC; fd++)
1761 			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
1762 			    "fd/%d", fd);
1763 	}
1764 }
1765 
1766 struct fileops badfileops = {
1767 	badfo_readwrite,
1768 	badfo_readwrite,
1769 	badfo_ioctl,
1770 	badfo_poll,
1771 	badfo_kqfilter,
1772 	badfo_stat,
1773 	badfo_close
1774 };
1775 
1776 static int
1777 badfo_readwrite(fp, uio, cred, flags, td)
1778 	struct file *fp;
1779 	struct uio *uio;
1780 	struct ucred *cred;
1781 	struct thread *td;
1782 	int flags;
1783 {
1784 
1785 	return (EBADF);
1786 }
1787 
1788 static int
1789 badfo_ioctl(fp, com, data, td)
1790 	struct file *fp;
1791 	u_long com;
1792 	caddr_t data;
1793 	struct thread *td;
1794 {
1795 
1796 	return (EBADF);
1797 }
1798 
1799 static int
1800 badfo_poll(fp, events, cred, td)
1801 	struct file *fp;
1802 	int events;
1803 	struct ucred *cred;
1804 	struct thread *td;
1805 {
1806 
1807 	return (0);
1808 }
1809 
1810 static int
1811 badfo_kqfilter(fp, kn)
1812 	struct file *fp;
1813 	struct knote *kn;
1814 {
1815 
1816 	return (0);
1817 }
1818 
1819 static int
1820 badfo_stat(fp, sb, td)
1821 	struct file *fp;
1822 	struct stat *sb;
1823 	struct thread *td;
1824 {
1825 
1826 	return (EBADF);
1827 }
1828 
1829 static int
1830 badfo_close(fp, td)
1831 	struct file *fp;
1832 	struct thread *td;
1833 {
1834 
1835 	return (EBADF);
1836 }
1837 
1838 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1839 					fildesc_drvinit,NULL)
1840