xref: /freebsd/sys/kern/kern_descrip.c (revision 9207b4cff7b8d483f4dd3c62266c2b58819eb7f9)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mutex.h>
49 #include <sys/sysproto.h>
50 #include <sys/conf.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/sysctl.h>
54 #include <sys/vnode.h>
55 #include <sys/proc.h>
56 #include <sys/file.h>
57 #include <sys/stat.h>
58 #include <sys/filio.h>
59 #include <sys/fcntl.h>
60 #include <sys/unistd.h>
61 #include <sys/resourcevar.h>
62 #include <sys/event.h>
63 #include <sys/sx.h>
64 #include <sys/socketvar.h>
65 
66 #include <machine/limits.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_extern.h>
70 
71 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
72 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
73 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
74 
75 static	 d_open_t  fdopen;
76 #define NUMFDESC 64
77 
78 #define CDEV_MAJOR 22
79 static struct cdevsw fildesc_cdevsw = {
80 	/* open */	fdopen,
81 	/* close */	noclose,
82 	/* read */	noread,
83 	/* write */	nowrite,
84 	/* ioctl */	noioctl,
85 	/* poll */	nopoll,
86 	/* mmap */	nommap,
87 	/* strategy */	nostrategy,
88 	/* name */	"FD",
89 	/* maj */	CDEV_MAJOR,
90 	/* dump */	nodump,
91 	/* psize */	nopsize,
92 	/* flags */	0,
93 };
94 
95 static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct thread *td));
96 static int badfo_readwrite __P((struct file *fp, struct uio *uio,
97     struct ucred *cred, int flags, struct thread *td));
98 static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
99     struct thread *td));
100 static int badfo_poll __P((struct file *fp, int events,
101     struct ucred *cred, struct thread *td));
102 static int badfo_kqfilter __P((struct file *fp, struct knote *kn));
103 static int badfo_stat __P((struct file *fp, struct stat *sb, struct thread *td));
104 static int badfo_close __P((struct file *fp, struct thread *td));
105 
106 /*
107  * Descriptor management.
108  */
109 struct filelist filehead;	/* head of list of open files */
110 int nfiles;			/* actual number of open files */
111 extern int cmask;
112 
113 /*
114  * System calls on descriptors.
115  */
116 #ifndef _SYS_SYSPROTO_H_
117 struct getdtablesize_args {
118 	int	dummy;
119 };
120 #endif
121 /*
122  * MPSAFE
123  */
124 /* ARGSUSED */
125 int
126 getdtablesize(td, uap)
127 	struct thread *td;
128 	struct getdtablesize_args *uap;
129 {
130 	struct proc *p = td->td_proc;
131 
132 	mtx_lock(&Giant);
133 	td->td_retval[0] =
134 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
135 	mtx_unlock(&Giant);
136 	return (0);
137 }
138 
139 /*
140  * Duplicate a file descriptor to a particular value.
141  *
142  * note: keep in mind that a potential race condition exists when closing
143  * descriptors from a shared descriptor table (via rfork).
144  */
145 #ifndef _SYS_SYSPROTO_H_
146 struct dup2_args {
147 	u_int	from;
148 	u_int	to;
149 };
150 #endif
151 /*
152  * MPSAFE
153  */
154 /* ARGSUSED */
155 int
156 dup2(td, uap)
157 	struct thread *td;
158 	struct dup2_args *uap;
159 {
160 	struct proc *p = td->td_proc;
161 	register struct filedesc *fdp = td->td_proc->p_fd;
162 	register u_int old = uap->from, new = uap->to;
163 	int i, error;
164 
165 	mtx_lock(&Giant);
166 retry:
167 	if (old >= fdp->fd_nfiles ||
168 	    fdp->fd_ofiles[old] == NULL ||
169 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
170 	    new >= maxfilesperproc) {
171 		error = EBADF;
172 		goto done2;
173 	}
174 	if (old == new) {
175 		td->td_retval[0] = new;
176 		error = 0;
177 		goto done2;
178 	}
179 	if (new >= fdp->fd_nfiles) {
180 		if ((error = fdalloc(td, new, &i)))
181 			goto done2;
182 		if (new != i)
183 			panic("dup2: fdalloc");
184 		/*
185 		 * fdalloc() may block, retest everything.
186 		 */
187 		goto retry;
188 	}
189 	error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
190 done2:
191 	mtx_unlock(&Giant);
192 	return(error);
193 }
194 
195 /*
196  * Duplicate a file descriptor.
197  */
198 #ifndef _SYS_SYSPROTO_H_
199 struct dup_args {
200 	u_int	fd;
201 };
202 #endif
203 /*
204  * MPSAFE
205  */
206 /* ARGSUSED */
207 int
208 dup(td, uap)
209 	struct thread *td;
210 	struct dup_args *uap;
211 {
212 	register struct filedesc *fdp;
213 	u_int old;
214 	int new, error;
215 
216 	mtx_lock(&Giant);
217 	old = uap->fd;
218 	fdp = td->td_proc->p_fd;
219 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
220 		error = EBADF;
221 		goto done2;
222 	}
223 	if ((error = fdalloc(td, 0, &new)))
224 		goto done2;
225 	error = do_dup(fdp, (int)old, new, td->td_retval, td);
226 done2:
227 	mtx_unlock(&Giant);
228 	return (error);
229 }
230 
231 /*
232  * The file control system call.
233  */
234 #ifndef _SYS_SYSPROTO_H_
235 struct fcntl_args {
236 	int	fd;
237 	int	cmd;
238 	long	arg;
239 };
240 #endif
241 /*
242  * MPSAFE
243  */
244 /* ARGSUSED */
245 int
246 fcntl(td, uap)
247 	struct thread *td;
248 	register struct fcntl_args *uap;
249 {
250 	register struct proc *p = td->td_proc;
251 	register struct filedesc *fdp;
252 	register struct file *fp;
253 	register char *pop;
254 	struct vnode *vp;
255 	int i, tmp, error = 0, flg = F_POSIX;
256 	struct flock fl;
257 	u_int newmin;
258 
259 	mtx_lock(&Giant);
260 
261 	fdp = p->p_fd;
262 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
263 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
264 		error = EBADF;
265 		goto done2;
266 	}
267 	pop = &fdp->fd_ofileflags[uap->fd];
268 
269 	switch (uap->cmd) {
270 	case F_DUPFD:
271 		newmin = uap->arg;
272 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
273 		    newmin >= maxfilesperproc) {
274 			error = EINVAL;
275 			break;
276 		}
277 		if ((error = fdalloc(td, newmin, &i)))
278 			break;
279 		error = do_dup(fdp, uap->fd, i, td->td_retval, td);
280 		break;
281 
282 	case F_GETFD:
283 		td->td_retval[0] = *pop & 1;
284 		break;
285 
286 	case F_SETFD:
287 		*pop = (*pop &~ 1) | (uap->arg & 1);
288 		break;
289 
290 	case F_GETFL:
291 		td->td_retval[0] = OFLAGS(fp->f_flag);
292 		break;
293 
294 	case F_SETFL:
295 		fhold(fp);
296 		fp->f_flag &= ~FCNTLFLAGS;
297 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
298 		tmp = fp->f_flag & FNONBLOCK;
299 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
300 		if (error) {
301 			fdrop(fp, td);
302 			break;
303 		}
304 		tmp = fp->f_flag & FASYNC;
305 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
306 		if (!error) {
307 			fdrop(fp, td);
308 			break;
309 		}
310 		fp->f_flag &= ~FNONBLOCK;
311 		tmp = 0;
312 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
313 		fdrop(fp, td);
314 		break;
315 
316 	case F_GETOWN:
317 		fhold(fp);
318 		error = fo_ioctl(fp, FIOGETOWN, (caddr_t)td->td_retval, td);
319 		fdrop(fp, td);
320 		break;
321 
322 	case F_SETOWN:
323 		fhold(fp);
324 		error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td);
325 		fdrop(fp, td);
326 		break;
327 
328 	case F_SETLKW:
329 		flg |= F_WAIT;
330 		/* Fall into F_SETLK */
331 
332 	case F_SETLK:
333 		if (fp->f_type != DTYPE_VNODE) {
334 			error = EBADF;
335 			break;
336 		}
337 		vp = (struct vnode *)fp->f_data;
338 
339 		/*
340 		 * copyin/lockop may block
341 		 */
342 		fhold(fp);
343 		/* Copy in the lock structure */
344 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
345 		    sizeof(fl));
346 		if (error) {
347 			fdrop(fp, td);
348 			break;
349 		}
350 		if (fl.l_whence == SEEK_CUR) {
351 			if (fp->f_offset < 0 ||
352 			    (fl.l_start > 0 &&
353 			     fp->f_offset > OFF_MAX - fl.l_start)) {
354 				fdrop(fp, td);
355 				error = EOVERFLOW;
356 				break;
357 			}
358 			fl.l_start += fp->f_offset;
359 		}
360 
361 		switch (fl.l_type) {
362 		case F_RDLCK:
363 			if ((fp->f_flag & FREAD) == 0) {
364 				error = EBADF;
365 				break;
366 			}
367 			p->p_flag |= P_ADVLOCK;
368 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
369 			    &fl, flg);
370 			break;
371 		case F_WRLCK:
372 			if ((fp->f_flag & FWRITE) == 0) {
373 				error = EBADF;
374 				break;
375 			}
376 			p->p_flag |= P_ADVLOCK;
377 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
378 			    &fl, flg);
379 			break;
380 		case F_UNLCK:
381 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
382 				&fl, F_POSIX);
383 			break;
384 		default:
385 			error = EINVAL;
386 			break;
387 		}
388 		fdrop(fp, td);
389 		break;
390 
391 	case F_GETLK:
392 		if (fp->f_type != DTYPE_VNODE) {
393 			error = EBADF;
394 			break;
395 		}
396 		vp = (struct vnode *)fp->f_data;
397 		/*
398 		 * copyin/lockop may block
399 		 */
400 		fhold(fp);
401 		/* Copy in the lock structure */
402 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
403 		    sizeof(fl));
404 		if (error) {
405 			fdrop(fp, td);
406 			break;
407 		}
408 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
409 		    fl.l_type != F_UNLCK) {
410 			fdrop(fp, td);
411 			error = EINVAL;
412 			break;
413 		}
414 		if (fl.l_whence == SEEK_CUR) {
415 			if ((fl.l_start > 0 &&
416 			     fp->f_offset > OFF_MAX - fl.l_start) ||
417 			    (fl.l_start < 0 &&
418 			     fp->f_offset < OFF_MIN - fl.l_start)) {
419 				fdrop(fp, td);
420 				error = EOVERFLOW;
421 				break;
422 			}
423 			fl.l_start += fp->f_offset;
424 		}
425 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
426 			    &fl, F_POSIX);
427 		fdrop(fp, td);
428 		if (error == 0) {
429 			error = copyout((caddr_t)&fl,
430 				    (caddr_t)(intptr_t)uap->arg, sizeof(fl));
431 		}
432 		break;
433 	default:
434 		error = EINVAL;
435 		break;
436 	}
437 done2:
438 	mtx_unlock(&Giant);
439 	return (error);
440 }
441 
442 /*
443  * Common code for dup, dup2, and fcntl(F_DUPFD).
444  */
445 static int
446 do_dup(fdp, old, new, retval, td)
447 	register struct filedesc *fdp;
448 	register int old, new;
449 	register_t *retval;
450 	struct thread *td;
451 {
452 	struct file *fp;
453 	struct file *delfp;
454 
455 	/*
456 	 * Save info on the descriptor being overwritten.  We have
457 	 * to do the unmap now, but we cannot close it without
458 	 * introducing an ownership race for the slot.
459 	 */
460 	delfp = fdp->fd_ofiles[new];
461 #if 0
462 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
463 		(void) munmapfd(td, new);
464 #endif
465 
466 	/*
467 	 * Duplicate the source descriptor, update lastfile
468 	 */
469 	fp = fdp->fd_ofiles[old];
470 	fdp->fd_ofiles[new] = fp;
471 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
472 	fhold(fp);
473 	if (new > fdp->fd_lastfile)
474 		fdp->fd_lastfile = new;
475 	*retval = new;
476 
477 	/*
478 	 * If we dup'd over a valid file, we now own the reference to it
479 	 * and must dispose of it using closef() semantics (as if a
480 	 * close() were performed on it).
481 	 */
482 	if (delfp)
483 		(void) closef(delfp, td);
484 	return (0);
485 }
486 
487 /*
488  * If sigio is on the list associated with a process or process group,
489  * disable signalling from the device, remove sigio from the list and
490  * free sigio.
491  */
492 void
493 funsetown(sigio)
494 	struct sigio *sigio;
495 {
496 	int s;
497 
498 	if (sigio == NULL)
499 		return;
500 	s = splhigh();
501 	*(sigio->sio_myref) = NULL;
502 	splx(s);
503 	if (sigio->sio_pgid < 0) {
504 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
505 			     sigio, sio_pgsigio);
506 	} else /* if ((*sigiop)->sio_pgid > 0) */ {
507 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
508 			     sigio, sio_pgsigio);
509 	}
510 	crfree(sigio->sio_ucred);
511 	FREE(sigio, M_SIGIO);
512 }
513 
514 /* Free a list of sigio structures. */
515 void
516 funsetownlst(sigiolst)
517 	struct sigiolst *sigiolst;
518 {
519 	struct sigio *sigio;
520 
521 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
522 		funsetown(sigio);
523 }
524 
525 /*
526  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
527  *
528  * After permission checking, add a sigio structure to the sigio list for
529  * the process or process group.
530  */
531 int
532 fsetown(pgid, sigiop)
533 	pid_t pgid;
534 	struct sigio **sigiop;
535 {
536 	struct proc *proc;
537 	struct pgrp *pgrp;
538 	struct sigio *sigio;
539 	int s;
540 
541 	if (pgid == 0) {
542 		funsetown(*sigiop);
543 		return (0);
544 	}
545 	if (pgid > 0) {
546 		proc = pfind(pgid);
547 		if (proc == NULL)
548 			return (ESRCH);
549 
550 		/*
551 		 * Policy - Don't allow a process to FSETOWN a process
552 		 * in another session.
553 		 *
554 		 * Remove this test to allow maximum flexibility or
555 		 * restrict FSETOWN to the current process or process
556 		 * group for maximum safety.
557 		 */
558 		if (proc->p_session != curthread->td_proc->p_session) {
559 			PROC_UNLOCK(proc);
560 			return (EPERM);
561 		}
562 		PROC_UNLOCK(proc);
563 
564 		pgrp = NULL;
565 	} else /* if (pgid < 0) */ {
566 		pgrp = pgfind(-pgid);
567 		if (pgrp == NULL)
568 			return (ESRCH);
569 
570 		/*
571 		 * Policy - Don't allow a process to FSETOWN a process
572 		 * in another session.
573 		 *
574 		 * Remove this test to allow maximum flexibility or
575 		 * restrict FSETOWN to the current process or process
576 		 * group for maximum safety.
577 		 */
578 		if (pgrp->pg_session != curthread->td_proc->p_session)
579 			return (EPERM);
580 
581 		proc = NULL;
582 	}
583 	funsetown(*sigiop);
584 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
585 	if (pgid > 0) {
586 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
587 		sigio->sio_proc = proc;
588 	} else {
589 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
590 		sigio->sio_pgrp = pgrp;
591 	}
592 	sigio->sio_pgid = pgid;
593 	sigio->sio_ucred = crhold(curthread->td_proc->p_ucred);
594 	sigio->sio_myref = sigiop;
595 	s = splhigh();
596 	*sigiop = sigio;
597 	splx(s);
598 	return (0);
599 }
600 
601 /*
602  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
603  */
604 pid_t
605 fgetown(sigio)
606 	struct sigio *sigio;
607 {
608 	return (sigio != NULL ? sigio->sio_pgid : 0);
609 }
610 
611 /*
612  * Close a file descriptor.
613  */
614 #ifndef _SYS_SYSPROTO_H_
615 struct close_args {
616         int     fd;
617 };
618 #endif
619 /*
620  * MPSAFE
621  */
622 /* ARGSUSED */
623 int
624 close(td, uap)
625 	struct thread *td;
626 	struct close_args *uap;
627 {
628 	register struct filedesc *fdp;
629 	register struct file *fp;
630 	register int fd = uap->fd;
631 	int error = 0;
632 
633 	mtx_lock(&Giant);
634 	fdp = td->td_proc->p_fd;
635 	if ((unsigned)fd >= fdp->fd_nfiles ||
636 	    (fp = fdp->fd_ofiles[fd]) == NULL) {
637 		error = EBADF;
638 		goto done2;
639 	}
640 #if 0
641 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
642 		(void) munmapfd(td, fd);
643 #endif
644 	fdp->fd_ofiles[fd] = NULL;
645 	fdp->fd_ofileflags[fd] = 0;
646 
647 	/*
648 	 * we now hold the fp reference that used to be owned by the descriptor
649 	 * array.
650 	 */
651 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
652 		fdp->fd_lastfile--;
653 	if (fd < fdp->fd_freefile)
654 		fdp->fd_freefile = fd;
655 	if (fd < fdp->fd_knlistsize)
656 		knote_fdclose(td, fd);
657 	error = closef(fp, td);
658 done2:
659 	mtx_unlock(&Giant);
660 	return(error);
661 }
662 
663 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
664 /*
665  * Return status information about a file descriptor.
666  */
667 #ifndef _SYS_SYSPROTO_H_
668 struct ofstat_args {
669 	int	fd;
670 	struct	ostat *sb;
671 };
672 #endif
673 /*
674  * MPSAFE
675  */
676 /* ARGSUSED */
677 int
678 ofstat(td, uap)
679 	struct thread *td;
680 	register struct ofstat_args *uap;
681 {
682 	struct file *fp;
683 	struct stat ub;
684 	struct ostat oub;
685 	int error;
686 
687 	mtx_lock(&Giant);
688 	if ((error = fget(td, uap->fd, &fp)) != 0)
689 		goto done2;
690 	error = fo_stat(fp, &ub, td);
691 	if (error == 0) {
692 		cvtstat(&ub, &oub);
693 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
694 	}
695 	fdrop(fp, td);
696 done2:
697 	mtx_unlock(&Giant);
698 	return (error);
699 }
700 #endif /* COMPAT_43 || COMPAT_SUNOS */
701 
702 /*
703  * Return status information about a file descriptor.
704  */
705 #ifndef _SYS_SYSPROTO_H_
706 struct fstat_args {
707 	int	fd;
708 	struct	stat *sb;
709 };
710 #endif
711 /*
712  * MPSAFE
713  */
714 /* ARGSUSED */
715 int
716 fstat(td, uap)
717 	struct thread *td;
718 	struct fstat_args *uap;
719 {
720 	struct file *fp;
721 	struct stat ub;
722 	int error;
723 
724 	mtx_lock(&Giant);
725 	if ((error = fget(td, uap->fd, &fp)) != 0)
726 		goto done2;
727 	error = fo_stat(fp, &ub, td);
728 	if (error == 0)
729 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
730 	fdrop(fp, td);
731 done2:
732 	mtx_unlock(&Giant);
733 	return (error);
734 }
735 
736 /*
737  * Return status information about a file descriptor.
738  */
739 #ifndef _SYS_SYSPROTO_H_
740 struct nfstat_args {
741 	int	fd;
742 	struct	nstat *sb;
743 };
744 #endif
745 /*
746  * MPSAFE
747  */
748 /* ARGSUSED */
749 int
750 nfstat(td, uap)
751 	struct thread *td;
752 	register struct nfstat_args *uap;
753 {
754 	struct file *fp;
755 	struct stat ub;
756 	struct nstat nub;
757 	int error;
758 
759 	mtx_lock(&Giant);
760 	if ((error = fget(td, uap->fd, &fp)) != 0)
761 		goto done2;
762 	error = fo_stat(fp, &ub, td);
763 	if (error == 0) {
764 		cvtnstat(&ub, &nub);
765 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
766 	}
767 	fdrop(fp, td);
768 done2:
769 	mtx_unlock(&Giant);
770 	return (error);
771 }
772 
773 /*
774  * Return pathconf information about a file descriptor.
775  */
776 #ifndef _SYS_SYSPROTO_H_
777 struct fpathconf_args {
778 	int	fd;
779 	int	name;
780 };
781 #endif
782 /*
783  * MPSAFE
784  */
785 /* ARGSUSED */
786 int
787 fpathconf(td, uap)
788 	struct thread *td;
789 	register struct fpathconf_args *uap;
790 {
791 	struct file *fp;
792 	struct vnode *vp;
793 	int error;
794 
795 	mtx_lock(&Giant);
796 	if ((error = fget(td, uap->fd, &fp)) != 0)
797 		goto done2;
798 
799 	switch (fp->f_type) {
800 	case DTYPE_PIPE:
801 	case DTYPE_SOCKET:
802 		if (uap->name != _PC_PIPE_BUF) {
803 			error = EINVAL;
804 			goto done2;
805 		}
806 		td->td_retval[0] = PIPE_BUF;
807 		error = 0;
808 		break;
809 	case DTYPE_FIFO:
810 	case DTYPE_VNODE:
811 		vp = (struct vnode *)fp->f_data;
812 		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
813 		break;
814 	default:
815 		error = EOPNOTSUPP;
816 		break;
817 	}
818 	fdrop(fp, td);
819 done2:
820 	mtx_unlock(&Giant);
821 	return(error);
822 }
823 
824 /*
825  * Allocate a file descriptor for the process.
826  */
827 static int fdexpand;
828 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
829 
830 int
831 fdalloc(td, want, result)
832 	struct thread *td;
833 	int want;
834 	int *result;
835 {
836 	struct proc *p = td->td_proc;
837 	register struct filedesc *fdp = td->td_proc->p_fd;
838 	register int i;
839 	int lim, last, nfiles;
840 	struct file **newofile;
841 	char *newofileflags;
842 
843 	/*
844 	 * Search for a free descriptor starting at the higher
845 	 * of want or fd_freefile.  If that fails, consider
846 	 * expanding the ofile array.
847 	 */
848 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
849 	for (;;) {
850 		last = min(fdp->fd_nfiles, lim);
851 		if ((i = want) < fdp->fd_freefile)
852 			i = fdp->fd_freefile;
853 		for (; i < last; i++) {
854 			if (fdp->fd_ofiles[i] == NULL) {
855 				fdp->fd_ofileflags[i] = 0;
856 				if (i > fdp->fd_lastfile)
857 					fdp->fd_lastfile = i;
858 				if (want <= fdp->fd_freefile)
859 					fdp->fd_freefile = i;
860 				*result = i;
861 				return (0);
862 			}
863 		}
864 
865 		/*
866 		 * No space in current array.  Expand?
867 		 */
868 		if (fdp->fd_nfiles >= lim)
869 			return (EMFILE);
870 		if (fdp->fd_nfiles < NDEXTENT)
871 			nfiles = NDEXTENT;
872 		else
873 			nfiles = 2 * fdp->fd_nfiles;
874 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
875 		    M_FILEDESC, M_WAITOK);
876 
877 		/*
878 		 * deal with file-table extend race that might have occured
879 		 * when malloc was blocked.
880 		 */
881 		if (fdp->fd_nfiles >= nfiles) {
882 			FREE(newofile, M_FILEDESC);
883 			continue;
884 		}
885 		newofileflags = (char *) &newofile[nfiles];
886 		/*
887 		 * Copy the existing ofile and ofileflags arrays
888 		 * and zero the new portion of each array.
889 		 */
890 		bcopy(fdp->fd_ofiles, newofile,
891 			(i = sizeof(struct file *) * fdp->fd_nfiles));
892 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
893 		bcopy(fdp->fd_ofileflags, newofileflags,
894 			(i = sizeof(char) * fdp->fd_nfiles));
895 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
896 		if (fdp->fd_nfiles > NDFILE)
897 			FREE(fdp->fd_ofiles, M_FILEDESC);
898 		fdp->fd_ofiles = newofile;
899 		fdp->fd_ofileflags = newofileflags;
900 		fdp->fd_nfiles = nfiles;
901 		fdexpand++;
902 	}
903 	return (0);
904 }
905 
906 /*
907  * Check to see whether n user file descriptors
908  * are available to the process p.
909  */
910 int
911 fdavail(td, n)
912 	struct thread *td;
913 	register int n;
914 {
915 	struct proc *p = td->td_proc;
916 	register struct filedesc *fdp = td->td_proc->p_fd;
917 	register struct file **fpp;
918 	register int i, lim, last;
919 
920 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
921 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
922 		return (1);
923 
924 	last = min(fdp->fd_nfiles, lim);
925 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
926 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
927 		if (*fpp == NULL && --n <= 0)
928 			return (1);
929 	}
930 	return (0);
931 }
932 
933 /*
934  * Create a new open file structure and allocate
935  * a file decriptor for the process that refers to it.
936  */
937 int
938 falloc(td, resultfp, resultfd)
939 	register struct thread *td;
940 	struct file **resultfp;
941 	int *resultfd;
942 {
943 	struct proc *p = td->td_proc;
944 	register struct file *fp, *fq;
945 	int error, i;
946 
947 	if (nfiles >= maxfiles) {
948 		tablefull("file");
949 		return (ENFILE);
950 	}
951 	/*
952 	 * Allocate a new file descriptor.
953 	 * If the process has file descriptor zero open, add to the list
954 	 * of open files at that point, otherwise put it at the front of
955 	 * the list of open files.
956 	 */
957 	nfiles++;
958 	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK | M_ZERO);
959 
960 	/*
961 	 * wait until after malloc (which may have blocked) returns before
962 	 * allocating the slot, else a race might have shrunk it if we had
963 	 * allocated it before the malloc.
964 	 */
965 	if ((error = fdalloc(td, 0, &i))) {
966 		nfiles--;
967 		FREE(fp, M_FILE);
968 		return (error);
969 	}
970 	fp->f_count = 1;
971 	fp->f_cred = crhold(p->p_ucred);
972 	fp->f_ops = &badfileops;
973 	fp->f_seqcount = 1;
974 	if ((fq = p->p_fd->fd_ofiles[0])) {
975 		LIST_INSERT_AFTER(fq, fp, f_list);
976 	} else {
977 		LIST_INSERT_HEAD(&filehead, fp, f_list);
978 	}
979 	p->p_fd->fd_ofiles[i] = fp;
980 	if (resultfp)
981 		*resultfp = fp;
982 	if (resultfd)
983 		*resultfd = i;
984 	return (0);
985 }
986 
987 /*
988  * Free a file descriptor.
989  */
990 void
991 ffree(fp)
992 	register struct file *fp;
993 {
994 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
995 	LIST_REMOVE(fp, f_list);
996 	crfree(fp->f_cred);
997 	nfiles--;
998 	FREE(fp, M_FILE);
999 }
1000 
1001 /*
1002  * Build a new filedesc structure.
1003  */
1004 struct filedesc *
1005 fdinit(td)
1006 	struct thread *td;
1007 {
1008 	register struct filedesc0 *newfdp;
1009 	register struct filedesc *fdp = td->td_proc->p_fd;
1010 
1011 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1012 	    M_FILEDESC, M_WAITOK | M_ZERO);
1013 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1014 	if (newfdp->fd_fd.fd_cdir)
1015 		VREF(newfdp->fd_fd.fd_cdir);
1016 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1017 	if (newfdp->fd_fd.fd_rdir)
1018 		VREF(newfdp->fd_fd.fd_rdir);
1019 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1020 	if (newfdp->fd_fd.fd_jdir)
1021 		VREF(newfdp->fd_fd.fd_jdir);
1022 
1023 	/* Create the file descriptor table. */
1024 	newfdp->fd_fd.fd_refcnt = 1;
1025 	newfdp->fd_fd.fd_cmask = cmask;
1026 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1027 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1028 	newfdp->fd_fd.fd_nfiles = NDFILE;
1029 	newfdp->fd_fd.fd_knlistsize = -1;
1030 
1031 	return (&newfdp->fd_fd);
1032 }
1033 
1034 /*
1035  * Share a filedesc structure.
1036  */
1037 struct filedesc *
1038 fdshare(p)
1039 	struct proc *p;
1040 {
1041 	p->p_fd->fd_refcnt++;
1042 	return (p->p_fd);
1043 }
1044 
1045 /*
1046  * Copy a filedesc structure.
1047  */
1048 struct filedesc *
1049 fdcopy(td)
1050 	struct thread *td;
1051 {
1052 	register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1053 	register struct file **fpp;
1054 	register int i;
1055 
1056 	/* Certain daemons might not have file descriptors. */
1057 	if (fdp == NULL)
1058 		return (NULL);
1059 
1060 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1061 	    M_FILEDESC, M_WAITOK);
1062 	bcopy(fdp, newfdp, sizeof(struct filedesc));
1063 	if (newfdp->fd_cdir)
1064 		VREF(newfdp->fd_cdir);
1065 	if (newfdp->fd_rdir)
1066 		VREF(newfdp->fd_rdir);
1067 	if (newfdp->fd_jdir)
1068 		VREF(newfdp->fd_jdir);
1069 	newfdp->fd_refcnt = 1;
1070 
1071 	/*
1072 	 * If the number of open files fits in the internal arrays
1073 	 * of the open file structure, use them, otherwise allocate
1074 	 * additional memory for the number of descriptors currently
1075 	 * in use.
1076 	 */
1077 	if (newfdp->fd_lastfile < NDFILE) {
1078 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1079 		newfdp->fd_ofileflags =
1080 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1081 		i = NDFILE;
1082 	} else {
1083 		/*
1084 		 * Compute the smallest multiple of NDEXTENT needed
1085 		 * for the file descriptors currently in use,
1086 		 * allowing the table to shrink.
1087 		 */
1088 		i = newfdp->fd_nfiles;
1089 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1090 			i /= 2;
1091 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1092 		    M_FILEDESC, M_WAITOK);
1093 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1094 	}
1095 	newfdp->fd_nfiles = i;
1096 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1097 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1098 
1099 	/*
1100 	 * kq descriptors cannot be copied.
1101 	 */
1102 	if (newfdp->fd_knlistsize != -1) {
1103 		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1104 		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1105 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1106 				*fpp = NULL;
1107 				if (i < newfdp->fd_freefile)
1108 					newfdp->fd_freefile = i;
1109 			}
1110 			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1111 				newfdp->fd_lastfile--;
1112 		}
1113 		newfdp->fd_knlist = NULL;
1114 		newfdp->fd_knlistsize = -1;
1115 		newfdp->fd_knhash = NULL;
1116 		newfdp->fd_knhashmask = 0;
1117 	}
1118 
1119 	fpp = newfdp->fd_ofiles;
1120 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1121 		if (*fpp != NULL)
1122 			fhold(*fpp);
1123 	}
1124 	return (newfdp);
1125 }
1126 
1127 /*
1128  * Release a filedesc structure.
1129  */
1130 void
1131 fdfree(td)
1132 	struct thread *td;
1133 {
1134 	register struct filedesc *fdp = td->td_proc->p_fd;
1135 	struct file **fpp;
1136 	register int i;
1137 
1138 	/* Certain daemons might not have file descriptors. */
1139 	if (fdp == NULL)
1140 		return;
1141 
1142 	if (--fdp->fd_refcnt > 0)
1143 		return;
1144 	/*
1145 	 * we are the last reference to the structure, we can
1146 	 * safely assume it will not change out from under us.
1147 	 */
1148 	fpp = fdp->fd_ofiles;
1149 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1150 		if (*fpp)
1151 			(void) closef(*fpp, td);
1152 	}
1153 	if (fdp->fd_nfiles > NDFILE)
1154 		FREE(fdp->fd_ofiles, M_FILEDESC);
1155 	if (fdp->fd_cdir)
1156 		vrele(fdp->fd_cdir);
1157 	if (fdp->fd_rdir)
1158 		vrele(fdp->fd_rdir);
1159 	if (fdp->fd_jdir)
1160 		vrele(fdp->fd_jdir);
1161 	if (fdp->fd_knlist)
1162 		FREE(fdp->fd_knlist, M_KQUEUE);
1163 	if (fdp->fd_knhash)
1164 		FREE(fdp->fd_knhash, M_KQUEUE);
1165 	FREE(fdp, M_FILEDESC);
1166 }
1167 
1168 /*
1169  * For setugid programs, we don't want to people to use that setugidness
1170  * to generate error messages which write to a file which otherwise would
1171  * otherwise be off-limits to the process.
1172  *
1173  * This is a gross hack to plug the hole.  A better solution would involve
1174  * a special vop or other form of generalized access control mechanism.  We
1175  * go ahead and just reject all procfs file systems accesses as dangerous.
1176  *
1177  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1178  * sufficient.  We also don't for check setugidness since we know we are.
1179  */
1180 static int
1181 is_unsafe(struct file *fp)
1182 {
1183 	if (fp->f_type == DTYPE_VNODE &&
1184 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1185 		return (1);
1186 	return (0);
1187 }
1188 
1189 /*
1190  * Make this setguid thing safe, if at all possible.
1191  */
1192 void
1193 setugidsafety(td)
1194 	struct thread *td;
1195 {
1196 	struct filedesc *fdp = td->td_proc->p_fd;
1197 	register int i;
1198 
1199 	/* Certain daemons might not have file descriptors. */
1200 	if (fdp == NULL)
1201 		return;
1202 
1203 	/*
1204 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1205 	 * we are blocked in a close.  Be careful!
1206 	 */
1207 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1208 		if (i > 2)
1209 			break;
1210 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1211 			struct file *fp;
1212 
1213 #if 0
1214 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1215 				(void) munmapfd(td, i);
1216 #endif
1217 			if (i < fdp->fd_knlistsize)
1218 				knote_fdclose(td, i);
1219 			/*
1220 			 * NULL-out descriptor prior to close to avoid
1221 			 * a race while close blocks.
1222 			 */
1223 			fp = fdp->fd_ofiles[i];
1224 			fdp->fd_ofiles[i] = NULL;
1225 			fdp->fd_ofileflags[i] = 0;
1226 			if (i < fdp->fd_freefile)
1227 				fdp->fd_freefile = i;
1228 			(void) closef(fp, td);
1229 		}
1230 	}
1231 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1232 		fdp->fd_lastfile--;
1233 }
1234 
1235 /*
1236  * Close any files on exec?
1237  */
1238 void
1239 fdcloseexec(td)
1240 	struct thread *td;
1241 {
1242 	struct filedesc *fdp = td->td_proc->p_fd;
1243 	register int i;
1244 
1245 	/* Certain daemons might not have file descriptors. */
1246 	if (fdp == NULL)
1247 		return;
1248 
1249 	/*
1250 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1251 	 * may block and rip them out from under us.
1252 	 */
1253 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1254 		if (fdp->fd_ofiles[i] != NULL &&
1255 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1256 			struct file *fp;
1257 
1258 #if 0
1259 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1260 				(void) munmapfd(td, i);
1261 #endif
1262 			if (i < fdp->fd_knlistsize)
1263 				knote_fdclose(td, i);
1264 			/*
1265 			 * NULL-out descriptor prior to close to avoid
1266 			 * a race while close blocks.
1267 			 */
1268 			fp = fdp->fd_ofiles[i];
1269 			fdp->fd_ofiles[i] = NULL;
1270 			fdp->fd_ofileflags[i] = 0;
1271 			if (i < fdp->fd_freefile)
1272 				fdp->fd_freefile = i;
1273 			(void) closef(fp, td);
1274 		}
1275 	}
1276 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1277 		fdp->fd_lastfile--;
1278 }
1279 
1280 /*
1281  * Internal form of close.
1282  * Decrement reference count on file structure.
1283  * Note: td may be NULL when closing a file
1284  * that was being passed in a message.
1285  */
1286 int
1287 closef(fp, td)
1288 	register struct file *fp;
1289 	register struct thread *td;
1290 {
1291 	struct vnode *vp;
1292 	struct flock lf;
1293 
1294 	if (fp == NULL)
1295 		return (0);
1296 	/*
1297 	 * POSIX record locking dictates that any close releases ALL
1298 	 * locks owned by this process.  This is handled by setting
1299 	 * a flag in the unlock to free ONLY locks obeying POSIX
1300 	 * semantics, and not to free BSD-style file locks.
1301 	 * If the descriptor was in a message, POSIX-style locks
1302 	 * aren't passed with the descriptor.
1303 	 */
1304 	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1305 	    fp->f_type == DTYPE_VNODE) {
1306 		lf.l_whence = SEEK_SET;
1307 		lf.l_start = 0;
1308 		lf.l_len = 0;
1309 		lf.l_type = F_UNLCK;
1310 		vp = (struct vnode *)fp->f_data;
1311 		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1312 		    F_UNLCK, &lf, F_POSIX);
1313 	}
1314 	return (fdrop(fp, td));
1315 }
1316 
1317 /*
1318  * Extract the file pointer associated with the specified descriptor for
1319  * the current user process.  If no error occured 0 is returned, *fpp
1320  * will be set to the file pointer, and the file pointer's ref count
1321  * will be bumped.  Use fdrop() to drop it.  If an error occured the
1322  * non-zero error is returned and *fpp is set to NULL.
1323  *
1324  * This routine requires Giant for the moment.  Once enough of the
1325  * system is converted over to this and other encapsulated APIs we
1326  * will be able to mutex it and call it without Giant.
1327  */
1328 static __inline
1329 int
1330 _fget(struct thread *td, int fd, struct file **fpp, int flags)
1331 {
1332 	struct filedesc *fdp;
1333 	struct file *fp;
1334 
1335 	GIANT_REQUIRED;
1336 	fdp = td->td_proc->p_fd;
1337 	*fpp = NULL;
1338 	if ((u_int)fd >= fdp->fd_nfiles)
1339 		return(EBADF);
1340 	if ((fp = fdp->fd_ofiles[fd]) == NULL)
1341 		return(EBADF);
1342 
1343 	/*
1344 	 * Note: FREAD failures returns EBADF to maintain backwards
1345 	 * compatibility with what routines returned before.
1346 	 *
1347 	 * Only one flag, or 0, may be specified.
1348 	 */
1349 	if (flags == FREAD && (fp->f_flag & FREAD) == 0)
1350 		return(EBADF);
1351 	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0)
1352 		return(EINVAL);
1353 	++fp->f_count;
1354 	*fpp = fp;
1355 	return(0);
1356 }
1357 
1358 int
1359 fget(struct thread *td, int fd, struct file **fpp)
1360 {
1361     return(_fget(td, fd, fpp, 0));
1362 }
1363 
1364 int
1365 fget_read(struct thread *td, int fd, struct file **fpp)
1366 {
1367     return(_fget(td, fd, fpp, FREAD));
1368 }
1369 
1370 int
1371 fget_write(struct thread *td, int fd, struct file **fpp)
1372 {
1373     return(_fget(td, fd, fpp, FWRITE));
1374 }
1375 
1376 /*
1377  * Like fget() but loads the underlying vnode, or returns an error if
1378  * the descriptor does not represent a vnode.  Note that pipes use vnodes
1379  * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1380  * error).  The returned vnode will be vref()d.
1381  */
1382 
1383 static __inline
1384 int
1385 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1386 {
1387 	struct filedesc *fdp;
1388 	struct file *fp;
1389 
1390 	GIANT_REQUIRED;
1391 	fdp = td->td_proc->p_fd;
1392 	*vpp = NULL;
1393 	if ((u_int)fd >= fdp->fd_nfiles)
1394 		return(EBADF);
1395 	if ((fp = fdp->fd_ofiles[fd]) == NULL)
1396 		return(EBADF);
1397 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
1398 		return(EINVAL);
1399 	if (fp->f_data == NULL)
1400 		return(EINVAL);
1401 
1402 	/*
1403 	 * Note: FREAD failures returns EBADF to maintain backwards
1404 	 * compatibility with what routines returned before.
1405 	 *
1406 	 * Only one flag, or 0, may be specified.
1407 	 */
1408 	if (flags == FREAD && (fp->f_flag & FREAD) == 0)
1409 		return(EBADF);
1410 	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0)
1411 		return(EINVAL);
1412 	*vpp = (struct vnode *)fp->f_data;
1413 	vref(*vpp);
1414 	return(0);
1415 }
1416 
1417 int
1418 fgetvp(struct thread *td, int fd, struct vnode **vpp)
1419 {
1420 	return(_fgetvp(td, fd, vpp, 0));
1421 }
1422 
1423 int
1424 fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1425 {
1426 	return(_fgetvp(td, fd, vpp, FREAD));
1427 }
1428 
1429 int
1430 fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1431 {
1432 	return(_fgetvp(td, fd, vpp, FWRITE));
1433 }
1434 
1435 /*
1436  * Like fget() but loads the underlying socket, or returns an error if
1437  * the descriptor does not represent a socket.
1438  *
1439  * We bump the ref count on the returned socket.  XXX Also obtain the SX lock in
1440  * the future.
1441  */
1442 int
1443 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1444 {
1445 	struct filedesc *fdp;
1446 	struct file *fp;
1447 	struct socket *so;
1448 
1449 	GIANT_REQUIRED;
1450 	fdp = td->td_proc->p_fd;
1451 	*spp = NULL;
1452 	if (fflagp)
1453 		*fflagp = 0;
1454 	if ((u_int)fd >= fdp->fd_nfiles)
1455 		return(EBADF);
1456 	if ((fp = fdp->fd_ofiles[fd]) == NULL)
1457 		return(EBADF);
1458 	if (fp->f_type != DTYPE_SOCKET)
1459 		return(ENOTSOCK);
1460 	if (fp->f_data == NULL)
1461 		return(EINVAL);
1462 	so = (struct socket *)fp->f_data;
1463 	if (fflagp)
1464 		*fflagp = fp->f_flag;
1465 	soref(so);
1466 	*spp = so;
1467 	return(0);
1468 }
1469 
1470 /*
1471  * Drop the reference count on the the socket and XXX release the SX lock in
1472  * the future.  The last reference closes the socket.
1473  */
1474 void
1475 fputsock(struct socket *so)
1476 {
1477 	sorele(so);
1478 }
1479 
1480 int
1481 fdrop(fp, td)
1482 	struct file *fp;
1483 	struct thread *td;
1484 {
1485 	struct flock lf;
1486 	struct vnode *vp;
1487 	int error;
1488 
1489 	if (--fp->f_count > 0)
1490 		return (0);
1491 	if (fp->f_count < 0)
1492 		panic("fdrop: count < 0");
1493 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1494 		lf.l_whence = SEEK_SET;
1495 		lf.l_start = 0;
1496 		lf.l_len = 0;
1497 		lf.l_type = F_UNLCK;
1498 		vp = (struct vnode *)fp->f_data;
1499 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1500 	}
1501 	if (fp->f_ops != &badfileops)
1502 		error = fo_close(fp, td);
1503 	else
1504 		error = 0;
1505 	ffree(fp);
1506 	return (error);
1507 }
1508 
1509 /*
1510  * Apply an advisory lock on a file descriptor.
1511  *
1512  * Just attempt to get a record lock of the requested type on
1513  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1514  */
1515 #ifndef _SYS_SYSPROTO_H_
1516 struct flock_args {
1517 	int	fd;
1518 	int	how;
1519 };
1520 #endif
1521 /*
1522  * MPSAFE
1523  */
1524 /* ARGSUSED */
1525 int
1526 flock(td, uap)
1527 	struct thread *td;
1528 	register struct flock_args *uap;
1529 {
1530 	register struct filedesc *fdp = td->td_proc->p_fd;
1531 	register struct file *fp;
1532 	struct vnode *vp;
1533 	struct flock lf;
1534 	int error;
1535 
1536 	mtx_lock(&Giant);
1537 
1538 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1539 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
1540 		error = EBADF;
1541 		goto done2;
1542 	}
1543 	if (fp->f_type != DTYPE_VNODE) {
1544 		error = EOPNOTSUPP;
1545 		goto done2;
1546 	}
1547 	vp = (struct vnode *)fp->f_data;
1548 	lf.l_whence = SEEK_SET;
1549 	lf.l_start = 0;
1550 	lf.l_len = 0;
1551 	if (uap->how & LOCK_UN) {
1552 		lf.l_type = F_UNLCK;
1553 		fp->f_flag &= ~FHASLOCK;
1554 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1555 		goto done2;
1556 	}
1557 	if (uap->how & LOCK_EX)
1558 		lf.l_type = F_WRLCK;
1559 	else if (uap->how & LOCK_SH)
1560 		lf.l_type = F_RDLCK;
1561 	else {
1562 		error = EBADF;
1563 		goto done2;
1564 	}
1565 	fp->f_flag |= FHASLOCK;
1566 	if (uap->how & LOCK_NB)
1567 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK);
1568 	else
1569 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT);
1570 done2:
1571 	mtx_unlock(&Giant);
1572 	return (error);
1573 }
1574 
1575 /*
1576  * File Descriptor pseudo-device driver (/dev/fd/).
1577  *
1578  * Opening minor device N dup()s the file (if any) connected to file
1579  * descriptor N belonging to the calling process.  Note that this driver
1580  * consists of only the ``open()'' routine, because all subsequent
1581  * references to this file will be direct to the other driver.
1582  */
1583 /* ARGSUSED */
1584 static int
1585 fdopen(dev, mode, type, td)
1586 	dev_t dev;
1587 	int mode, type;
1588 	struct thread *td;
1589 {
1590 
1591 	/*
1592 	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1593 	 * the file descriptor being sought for duplication. The error
1594 	 * return ensures that the vnode for this device will be released
1595 	 * by vn_open. Open will detect this special error and take the
1596 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1597 	 * will simply report the error.
1598 	 */
1599 	td->td_dupfd = dev2unit(dev);
1600 	return (ENODEV);
1601 }
1602 
1603 /*
1604  * Duplicate the specified descriptor to a free descriptor.
1605  */
1606 int
1607 dupfdopen(td, fdp, indx, dfd, mode, error)
1608 	struct thread *td;
1609 	struct filedesc *fdp;
1610 	int indx, dfd;
1611 	int mode;
1612 	int error;
1613 {
1614 	register struct file *wfp;
1615 	struct file *fp;
1616 
1617 	/*
1618 	 * If the to-be-dup'd fd number is greater than the allowed number
1619 	 * of file descriptors, or the fd to be dup'd has already been
1620 	 * closed, then reject.
1621 	 */
1622 	if ((u_int)dfd >= fdp->fd_nfiles ||
1623 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1624 		return (EBADF);
1625 	}
1626 
1627 	/*
1628 	 * There are two cases of interest here.
1629 	 *
1630 	 * For ENODEV simply dup (dfd) to file descriptor
1631 	 * (indx) and return.
1632 	 *
1633 	 * For ENXIO steal away the file structure from (dfd) and
1634 	 * store it in (indx).  (dfd) is effectively closed by
1635 	 * this operation.
1636 	 *
1637 	 * Any other error code is just returned.
1638 	 */
1639 	switch (error) {
1640 	case ENODEV:
1641 		/*
1642 		 * Check that the mode the file is being opened for is a
1643 		 * subset of the mode of the existing descriptor.
1644 		 */
1645 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1646 			return (EACCES);
1647 		fp = fdp->fd_ofiles[indx];
1648 #if 0
1649 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1650 			(void) munmapfd(td, indx);
1651 #endif
1652 		fdp->fd_ofiles[indx] = wfp;
1653 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1654 		fhold(wfp);
1655 		if (indx > fdp->fd_lastfile)
1656 			fdp->fd_lastfile = indx;
1657 		/*
1658 		 * we now own the reference to fp that the ofiles[] array
1659 		 * used to own.  Release it.
1660 		 */
1661 		if (fp)
1662 			fdrop(fp, td);
1663 		return (0);
1664 
1665 	case ENXIO:
1666 		/*
1667 		 * Steal away the file pointer from dfd, and stuff it into indx.
1668 		 */
1669 		fp = fdp->fd_ofiles[indx];
1670 #if 0
1671 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1672 			(void) munmapfd(td, indx);
1673 #endif
1674 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1675 		fdp->fd_ofiles[dfd] = NULL;
1676 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1677 		fdp->fd_ofileflags[dfd] = 0;
1678 
1679 		/*
1680 		 * we now own the reference to fp that the ofiles[] array
1681 		 * used to own.  Release it.
1682 		 */
1683 		if (fp)
1684 			fdrop(fp, td);
1685 		/*
1686 		 * Complete the clean up of the filedesc structure by
1687 		 * recomputing the various hints.
1688 		 */
1689 		if (indx > fdp->fd_lastfile) {
1690 			fdp->fd_lastfile = indx;
1691 		} else {
1692 			while (fdp->fd_lastfile > 0 &&
1693 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1694 				fdp->fd_lastfile--;
1695 			}
1696 			if (dfd < fdp->fd_freefile)
1697 				fdp->fd_freefile = dfd;
1698 		}
1699 		return (0);
1700 
1701 	default:
1702 		return (error);
1703 	}
1704 	/* NOTREACHED */
1705 }
1706 
1707 /*
1708  * Get file structures.
1709  */
1710 static int
1711 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1712 {
1713 	int error;
1714 	struct file *fp;
1715 
1716 	if (!req->oldptr) {
1717 		/*
1718 		 * overestimate by 10 files
1719 		 */
1720 		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1721 				(nfiles + 10) * sizeof(struct file)));
1722 	}
1723 
1724 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1725 	if (error)
1726 		return (error);
1727 
1728 	/*
1729 	 * followed by an array of file structures
1730 	 */
1731 	LIST_FOREACH(fp, &filehead, f_list) {
1732 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1733 		if (error)
1734 			return (error);
1735 	}
1736 	return (0);
1737 }
1738 
1739 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1740     0, 0, sysctl_kern_file, "S,file", "Entire file table");
1741 
1742 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1743     &maxfilesperproc, 0, "Maximum files allowed open per process");
1744 
1745 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1746     &maxfiles, 0, "Maximum number of files");
1747 
1748 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
1749     &nfiles, 0, "System-wide number of open files");
1750 
1751 static void
1752 fildesc_drvinit(void *unused)
1753 {
1754 	dev_t dev;
1755 
1756 	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
1757 	make_dev_alias(dev, "stdin");
1758 	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
1759 	make_dev_alias(dev, "stdout");
1760 	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
1761 	make_dev_alias(dev, "stderr");
1762 	if (!devfs_present) {
1763 		int fd;
1764 
1765 		for (fd = 3; fd < NUMFDESC; fd++)
1766 			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
1767 			    "fd/%d", fd);
1768 	}
1769 }
1770 
1771 struct fileops badfileops = {
1772 	badfo_readwrite,
1773 	badfo_readwrite,
1774 	badfo_ioctl,
1775 	badfo_poll,
1776 	badfo_kqfilter,
1777 	badfo_stat,
1778 	badfo_close
1779 };
1780 
1781 static int
1782 badfo_readwrite(fp, uio, cred, flags, td)
1783 	struct file *fp;
1784 	struct uio *uio;
1785 	struct ucred *cred;
1786 	struct thread *td;
1787 	int flags;
1788 {
1789 
1790 	return (EBADF);
1791 }
1792 
1793 static int
1794 badfo_ioctl(fp, com, data, td)
1795 	struct file *fp;
1796 	u_long com;
1797 	caddr_t data;
1798 	struct thread *td;
1799 {
1800 
1801 	return (EBADF);
1802 }
1803 
1804 static int
1805 badfo_poll(fp, events, cred, td)
1806 	struct file *fp;
1807 	int events;
1808 	struct ucred *cred;
1809 	struct thread *td;
1810 {
1811 
1812 	return (0);
1813 }
1814 
1815 static int
1816 badfo_kqfilter(fp, kn)
1817 	struct file *fp;
1818 	struct knote *kn;
1819 {
1820 
1821 	return (0);
1822 }
1823 
1824 static int
1825 badfo_stat(fp, sb, td)
1826 	struct file *fp;
1827 	struct stat *sb;
1828 	struct thread *td;
1829 {
1830 
1831 	return (EBADF);
1832 }
1833 
1834 static int
1835 badfo_close(fp, td)
1836 	struct file *fp;
1837 	struct thread *td;
1838 {
1839 
1840 	return (EBADF);
1841 }
1842 
1843 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1844 					fildesc_drvinit,NULL)
1845