xref: /freebsd/sys/kern/kern_descrip.c (revision c17d43407fe04133a94055b0dbc7ea8965654a9f)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mutex.h>
49 #include <sys/sysproto.h>
50 #include <sys/conf.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/sysctl.h>
54 #include <sys/vnode.h>
55 #include <sys/proc.h>
56 #include <sys/file.h>
57 #include <sys/stat.h>
58 #include <sys/filio.h>
59 #include <sys/fcntl.h>
60 #include <sys/unistd.h>
61 #include <sys/resourcevar.h>
62 #include <sys/event.h>
63 #include <sys/sx.h>
64 #include <sys/socketvar.h>
65 
66 #include <machine/limits.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_extern.h>
70 #include <vm/uma.h>
71 
72 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
73 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
74 
75 uma_zone_t file_zone;
76 
77 static	 d_open_t  fdopen;
78 #define NUMFDESC 64
79 
80 #define CDEV_MAJOR 22
81 static struct cdevsw fildesc_cdevsw = {
82 	/* open */	fdopen,
83 	/* close */	noclose,
84 	/* read */	noread,
85 	/* write */	nowrite,
86 	/* ioctl */	noioctl,
87 	/* poll */	nopoll,
88 	/* mmap */	nommap,
89 	/* strategy */	nostrategy,
90 	/* name */	"FD",
91 	/* maj */	CDEV_MAJOR,
92 	/* dump */	nodump,
93 	/* psize */	nopsize,
94 	/* flags */	0,
95 };
96 
97 static int do_dup(struct filedesc *fdp, int old, int new, register_t *retval, struct thread *td);
98 static int badfo_readwrite(struct file *fp, struct uio *uio,
99     struct ucred *cred, int flags, struct thread *td);
100 static int badfo_ioctl(struct file *fp, u_long com, caddr_t data,
101     struct thread *td);
102 static int badfo_poll(struct file *fp, int events,
103     struct ucred *cred, struct thread *td);
104 static int badfo_kqfilter(struct file *fp, struct knote *kn);
105 static int badfo_stat(struct file *fp, struct stat *sb, struct thread *td);
106 static int badfo_close(struct file *fp, struct thread *td);
107 
108 /*
109  * Descriptor management.
110  */
111 struct filelist filehead;	/* head of list of open files */
112 int nfiles;			/* actual number of open files */
113 extern int cmask;
114 struct sx filelist_lock;	/* sx to protect filelist */
115 
116 /*
117  * System calls on descriptors.
118  */
119 #ifndef _SYS_SYSPROTO_H_
120 struct getdtablesize_args {
121 	int	dummy;
122 };
123 #endif
124 /*
125  * MPSAFE
126  */
127 /* ARGSUSED */
128 int
129 getdtablesize(td, uap)
130 	struct thread *td;
131 	struct getdtablesize_args *uap;
132 {
133 	struct proc *p = td->td_proc;
134 
135 	mtx_lock(&Giant);
136 	td->td_retval[0] =
137 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
138 	mtx_unlock(&Giant);
139 	return (0);
140 }
141 
142 /*
143  * Duplicate a file descriptor to a particular value.
144  *
145  * note: keep in mind that a potential race condition exists when closing
146  * descriptors from a shared descriptor table (via rfork).
147  */
148 #ifndef _SYS_SYSPROTO_H_
149 struct dup2_args {
150 	u_int	from;
151 	u_int	to;
152 };
153 #endif
154 /*
155  * MPSAFE
156  */
157 /* ARGSUSED */
158 int
159 dup2(td, uap)
160 	struct thread *td;
161 	struct dup2_args *uap;
162 {
163 	struct proc *p = td->td_proc;
164 	register struct filedesc *fdp = td->td_proc->p_fd;
165 	register u_int old = uap->from, new = uap->to;
166 	int i, error;
167 
168 	FILEDESC_LOCK(fdp);
169 retry:
170 	if (old >= fdp->fd_nfiles ||
171 	    fdp->fd_ofiles[old] == NULL ||
172 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
173 	    new >= maxfilesperproc) {
174 		FILEDESC_UNLOCK(fdp);
175 		return (EBADF);
176 	}
177 	if (old == new) {
178 		td->td_retval[0] = new;
179 		FILEDESC_UNLOCK(fdp);
180 		return (0);
181 	}
182 	if (new >= fdp->fd_nfiles) {
183 		if ((error = fdalloc(td, new, &i))) {
184 			FILEDESC_UNLOCK(fdp);
185 			return (error);
186 		}
187 		/*
188 		 * fdalloc() may block, retest everything.
189 		 */
190 		goto retry;
191 	}
192 	error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
193 	return(error);
194 }
195 
196 /*
197  * Duplicate a file descriptor.
198  */
199 #ifndef _SYS_SYSPROTO_H_
200 struct dup_args {
201 	u_int	fd;
202 };
203 #endif
204 /*
205  * MPSAFE
206  */
207 /* ARGSUSED */
208 int
209 dup(td, uap)
210 	struct thread *td;
211 	struct dup_args *uap;
212 {
213 	register struct filedesc *fdp;
214 	u_int old;
215 	int new, error;
216 
217 	old = uap->fd;
218 	fdp = td->td_proc->p_fd;
219 	FILEDESC_LOCK(fdp);
220 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
221 		FILEDESC_UNLOCK(fdp);
222 		return (EBADF);
223 	}
224 	if ((error = fdalloc(td, 0, &new))) {
225 		FILEDESC_UNLOCK(fdp);
226 		return (error);
227 	}
228 	error = do_dup(fdp, (int)old, new, td->td_retval, td);
229 	return (error);
230 }
231 
232 /*
233  * The file control system call.
234  */
235 #ifndef _SYS_SYSPROTO_H_
236 struct fcntl_args {
237 	int	fd;
238 	int	cmd;
239 	long	arg;
240 };
241 #endif
242 /*
243  * MPSAFE
244  */
245 /* ARGSUSED */
246 int
247 fcntl(td, uap)
248 	struct thread *td;
249 	register struct fcntl_args *uap;
250 {
251 	register struct proc *p = td->td_proc;
252 	register struct filedesc *fdp;
253 	register struct file *fp;
254 	register char *pop;
255 	struct vnode *vp;
256 	int i, tmp, error = 0, flg = F_POSIX;
257 	struct flock fl;
258 	u_int newmin;
259 	struct proc *leaderp;
260 
261 	mtx_lock(&Giant);
262 
263 	fdp = p->p_fd;
264 	FILEDESC_LOCK(fdp);
265 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
266 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
267 		FILEDESC_UNLOCK(fdp);
268 		error = EBADF;
269 		goto done2;
270 	}
271 	pop = &fdp->fd_ofileflags[uap->fd];
272 
273 	switch (uap->cmd) {
274 	case F_DUPFD:
275 		newmin = uap->arg;
276 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
277 		    newmin >= maxfilesperproc) {
278 			FILEDESC_UNLOCK(fdp);
279 			error = EINVAL;
280 			break;
281 		}
282 		if ((error = fdalloc(td, newmin, &i))) {
283 			FILEDESC_UNLOCK(fdp);
284 			break;
285 		}
286 		error = do_dup(fdp, uap->fd, i, td->td_retval, td);
287 		break;
288 
289 	case F_GETFD:
290 		td->td_retval[0] = *pop & 1;
291 		FILEDESC_UNLOCK(fdp);
292 		break;
293 
294 	case F_SETFD:
295 		*pop = (*pop &~ 1) | (uap->arg & 1);
296 		FILEDESC_UNLOCK(fdp);
297 		break;
298 
299 	case F_GETFL:
300 		FILE_LOCK(fp);
301 		FILEDESC_UNLOCK(fdp);
302 		td->td_retval[0] = OFLAGS(fp->f_flag);
303 		FILE_UNLOCK(fp);
304 		break;
305 
306 	case F_SETFL:
307 		fhold(fp);
308 		FILEDESC_UNLOCK(fdp);
309 		fp->f_flag &= ~FCNTLFLAGS;
310 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
311 		tmp = fp->f_flag & FNONBLOCK;
312 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
313 		if (error) {
314 			fdrop(fp, td);
315 			break;
316 		}
317 		tmp = fp->f_flag & FASYNC;
318 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
319 		if (!error) {
320 			fdrop(fp, td);
321 			break;
322 		}
323 		fp->f_flag &= ~FNONBLOCK;
324 		tmp = 0;
325 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
326 		fdrop(fp, td);
327 		break;
328 
329 	case F_GETOWN:
330 		fhold(fp);
331 		FILEDESC_UNLOCK(fdp);
332 		error = fo_ioctl(fp, FIOGETOWN, (caddr_t)td->td_retval, td);
333 		fdrop(fp, td);
334 		break;
335 
336 	case F_SETOWN:
337 		fhold(fp);
338 		FILEDESC_UNLOCK(fdp);
339 		error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td);
340 		fdrop(fp, td);
341 		break;
342 
343 	case F_SETLKW:
344 		flg |= F_WAIT;
345 		/* Fall into F_SETLK */
346 
347 	case F_SETLK:
348 		if (fp->f_type != DTYPE_VNODE) {
349 			FILEDESC_UNLOCK(fdp);
350 			error = EBADF;
351 			break;
352 		}
353 		vp = (struct vnode *)fp->f_data;
354 		/*
355 		 * copyin/lockop may block
356 		 */
357 		fhold(fp);
358 		FILEDESC_UNLOCK(fdp);
359 		vp = (struct vnode *)fp->f_data;
360 
361 		/* Copy in the lock structure */
362 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
363 		    sizeof(fl));
364 		if (error) {
365 			fdrop(fp, td);
366 			break;
367 		}
368 		if (fl.l_whence == SEEK_CUR) {
369 			if (fp->f_offset < 0 ||
370 			    (fl.l_start > 0 &&
371 			     fp->f_offset > OFF_MAX - fl.l_start)) {
372 				fdrop(fp, td);
373 				error = EOVERFLOW;
374 				break;
375 			}
376 			fl.l_start += fp->f_offset;
377 		}
378 
379 		switch (fl.l_type) {
380 		case F_RDLCK:
381 			if ((fp->f_flag & FREAD) == 0) {
382 				error = EBADF;
383 				break;
384 			}
385 			PROC_LOCK(p);
386 			p->p_flag |= P_ADVLOCK;
387 			leaderp = p->p_leader;
388 			PROC_UNLOCK(p);
389 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
390 			    &fl, flg);
391 			break;
392 		case F_WRLCK:
393 			if ((fp->f_flag & FWRITE) == 0) {
394 				error = EBADF;
395 				break;
396 			}
397 			PROC_LOCK(p);
398 			p->p_flag |= P_ADVLOCK;
399 			leaderp = p->p_leader;
400 			PROC_UNLOCK(p);
401 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
402 			    &fl, flg);
403 			break;
404 		case F_UNLCK:
405 			PROC_LOCK(p);
406 			leaderp = p->p_leader;
407 			PROC_UNLOCK(p);
408 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK,
409 				&fl, F_POSIX);
410 			break;
411 		default:
412 			error = EINVAL;
413 			break;
414 		}
415 		fdrop(fp, td);
416 		break;
417 
418 	case F_GETLK:
419 		if (fp->f_type != DTYPE_VNODE) {
420 			FILEDESC_UNLOCK(fdp);
421 			error = EBADF;
422 			break;
423 		}
424 		vp = (struct vnode *)fp->f_data;
425 		/*
426 		 * copyin/lockop may block
427 		 */
428 		fhold(fp);
429 		FILEDESC_UNLOCK(fdp);
430 		vp = (struct vnode *)fp->f_data;
431 
432 		/* Copy in the lock structure */
433 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
434 		    sizeof(fl));
435 		if (error) {
436 			fdrop(fp, td);
437 			break;
438 		}
439 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
440 		    fl.l_type != F_UNLCK) {
441 			fdrop(fp, td);
442 			error = EINVAL;
443 			break;
444 		}
445 		if (fl.l_whence == SEEK_CUR) {
446 			if ((fl.l_start > 0 &&
447 			     fp->f_offset > OFF_MAX - fl.l_start) ||
448 			    (fl.l_start < 0 &&
449 			     fp->f_offset < OFF_MIN - fl.l_start)) {
450 				fdrop(fp, td);
451 				error = EOVERFLOW;
452 				break;
453 			}
454 			fl.l_start += fp->f_offset;
455 		}
456 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
457 			    &fl, F_POSIX);
458 		fdrop(fp, td);
459 		if (error == 0) {
460 			error = copyout((caddr_t)&fl,
461 				    (caddr_t)(intptr_t)uap->arg, sizeof(fl));
462 		}
463 		break;
464 	default:
465 		FILEDESC_UNLOCK(fdp);
466 		error = EINVAL;
467 		break;
468 	}
469 done2:
470 	mtx_unlock(&Giant);
471 	return (error);
472 }
473 
474 /*
475  * Common code for dup, dup2, and fcntl(F_DUPFD).
476  * filedesc must be locked, but will be unlocked as a side effect.
477  */
478 static int
479 do_dup(fdp, old, new, retval, td)
480 	register struct filedesc *fdp;
481 	register int old, new;
482 	register_t *retval;
483 	struct thread *td;
484 {
485 	struct file *fp;
486 	struct file *delfp;
487 
488 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
489 
490 	/*
491 	 * Save info on the descriptor being overwritten.  We have
492 	 * to do the unmap now, but we cannot close it without
493 	 * introducing an ownership race for the slot.
494 	 */
495 	delfp = fdp->fd_ofiles[new];
496 #if 0
497 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
498 		(void) munmapfd(td, new);
499 #endif
500 
501 	/*
502 	 * Duplicate the source descriptor, update lastfile
503 	 */
504 	fp = fdp->fd_ofiles[old];
505 	fdp->fd_ofiles[new] = fp;
506 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
507 	fhold(fp);
508 	if (new > fdp->fd_lastfile)
509 		fdp->fd_lastfile = new;
510 	*retval = new;
511 
512 	FILEDESC_UNLOCK(fdp);
513 
514 	/*
515 	 * If we dup'd over a valid file, we now own the reference to it
516 	 * and must dispose of it using closef() semantics (as if a
517 	 * close() were performed on it).
518 	 */
519 	if (delfp) {
520 		mtx_lock(&Giant);
521 		(void) closef(delfp, td);
522 		mtx_unlock(&Giant);
523 	}
524 	return (0);
525 }
526 
527 /*
528  * If sigio is on the list associated with a process or process group,
529  * disable signalling from the device, remove sigio from the list and
530  * free sigio.
531  */
532 void
533 funsetown(sigio)
534 	struct sigio *sigio;
535 {
536 	int s;
537 
538 	if (sigio == NULL)
539 		return;
540 
541 	s = splhigh();
542 	*(sigio->sio_myref) = NULL;
543 	splx(s);
544 	if ((sigio)->sio_pgid < 0) {
545 		struct pgrp *pg = (sigio)->sio_pgrp;
546 		PGRP_LOCK(pg);
547 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
548 			     sigio, sio_pgsigio);
549 		PGRP_UNLOCK(pg);
550 	} else {
551 		struct proc *p = (sigio)->sio_proc;
552 		PROC_LOCK(p);
553 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
554 			     sigio, sio_pgsigio);
555 		PROC_UNLOCK(p);
556 	}
557 	crfree(sigio->sio_ucred);
558 	FREE(sigio, M_SIGIO);
559 }
560 
561 /* Free a list of sigio structures. */
562 void
563 funsetownlst(sigiolst)
564 	struct sigiolst *sigiolst;
565 {
566 	int s;
567 	struct sigio *sigio;
568 	struct proc *p;
569 	struct pgrp *pg;
570 
571 	sigio = SLIST_FIRST(sigiolst);
572 	if (sigio == NULL)
573 		return;
574 
575 	p = NULL;
576 	pg = NULL;
577 
578 	/*
579 	 * Every entry of the list should belong
580 	 * to a single proc or pgrp.
581 	 */
582 	if (sigio->sio_pgid < 0) {
583 		pg = sigio->sio_pgrp;
584 		PGRP_LOCK_ASSERT(pg, MA_OWNED);
585 	} else /* if (sigio->sio_pgid > 0) */ {
586 		p = sigio->sio_proc;
587 		PROC_LOCK_ASSERT(p, MA_OWNED);
588 	}
589 
590 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
591 		s = splhigh();
592 		*(sigio->sio_myref) = NULL;
593 		splx(s);
594 		if (pg != NULL) {
595 			KASSERT(sigio->sio_pgid < 0, ("Proc sigio in pgrp sigio list"));
596 			KASSERT(sigio->sio_pgrp == pg, ("Bogus pgrp in sigio list"));
597 			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, sio_pgsigio);
598 			PGRP_UNLOCK(pg);
599 			crfree(sigio->sio_ucred);
600 			FREE(sigio, M_SIGIO);
601 			PGRP_LOCK(pg);
602 		} else /* if (p != NULL) */ {
603 			KASSERT(sigio->sio_pgid > 0, ("Pgrp sigio in proc sigio list"));
604 			KASSERT(sigio->sio_proc == p, ("Bogus proc in sigio list"));
605 			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, sio_pgsigio);
606 			PROC_UNLOCK(p);
607 			crfree(sigio->sio_ucred);
608 			FREE(sigio, M_SIGIO);
609 			PROC_LOCK(p);
610 		}
611 	}
612 }
613 
614 /*
615  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
616  *
617  * After permission checking, add a sigio structure to the sigio list for
618  * the process or process group.
619  */
620 int
621 fsetown(pgid, sigiop)
622 	pid_t pgid;
623 	struct sigio **sigiop;
624 {
625 	struct proc *proc;
626 	struct pgrp *pgrp;
627 	struct sigio *sigio;
628 	int s, ret;
629 
630 	if (pgid == 0) {
631 		funsetown(*sigiop);
632 		return (0);
633 	}
634 
635 	ret = 0;
636 
637 	/* Allocate and fill in the new sigio out of locks. */
638 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
639 	sigio->sio_pgid = pgid;
640 	sigio->sio_ucred = crhold(curthread->td_ucred);
641 	sigio->sio_myref = sigiop;
642 
643 	PGRPSESS_SLOCK();
644 	if (pgid > 0) {
645 		proc = pfind(pgid);
646 		if (proc == NULL) {
647 			ret = ESRCH;
648 			goto fail;
649 		}
650 
651 		/*
652 		 * Policy - Don't allow a process to FSETOWN a process
653 		 * in another session.
654 		 *
655 		 * Remove this test to allow maximum flexibility or
656 		 * restrict FSETOWN to the current process or process
657 		 * group for maximum safety.
658 		 */
659 		PROC_UNLOCK(proc);
660 		if (proc->p_session != curthread->td_proc->p_session) {
661 			ret = EPERM;
662 			goto fail;
663 		}
664 
665 		pgrp = NULL;
666 	} else /* if (pgid < 0) */ {
667 		pgrp = pgfind(-pgid);
668 		if (pgrp == NULL) {
669 			ret = ESRCH;
670 			goto fail;
671 		}
672 		PGRP_UNLOCK(pgrp);
673 
674 		/*
675 		 * Policy - Don't allow a process to FSETOWN a process
676 		 * in another session.
677 		 *
678 		 * Remove this test to allow maximum flexibility or
679 		 * restrict FSETOWN to the current process or process
680 		 * group for maximum safety.
681 		 */
682 		if (pgrp->pg_session != curthread->td_proc->p_session) {
683 			ret = EPERM;
684 			goto fail;
685 		}
686 
687 		proc = NULL;
688 	}
689 	funsetown(*sigiop);
690 	if (pgid > 0) {
691 		PROC_LOCK(proc);
692 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
693 		sigio->sio_proc = proc;
694 		PROC_UNLOCK(proc);
695 	} else {
696 		PGRP_LOCK(pgrp);
697 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
698 		sigio->sio_pgrp = pgrp;
699 		PGRP_UNLOCK(pgrp);
700 	}
701 	PGRPSESS_SUNLOCK();
702 	s = splhigh();
703 	*sigiop = sigio;
704 	splx(s);
705 	return (0);
706 
707 fail:
708 	PGRPSESS_SUNLOCK();
709 	crfree(sigio->sio_ucred);
710 	FREE(sigio, M_SIGIO);
711 	return (ret);
712 }
713 
714 /*
715  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
716  */
717 pid_t
718 fgetown(sigio)
719 	struct sigio *sigio;
720 {
721 	return (sigio != NULL ? sigio->sio_pgid : 0);
722 }
723 
724 /*
725  * Close a file descriptor.
726  */
727 #ifndef _SYS_SYSPROTO_H_
728 struct close_args {
729         int     fd;
730 };
731 #endif
732 /*
733  * MPSAFE
734  */
735 /* ARGSUSED */
736 int
737 close(td, uap)
738 	struct thread *td;
739 	struct close_args *uap;
740 {
741 	register struct filedesc *fdp;
742 	register struct file *fp;
743 	register int fd = uap->fd;
744 	int error = 0;
745 
746 	mtx_lock(&Giant);
747 	fdp = td->td_proc->p_fd;
748 	FILEDESC_LOCK(fdp);
749 	if ((unsigned)fd >= fdp->fd_nfiles ||
750 	    (fp = fdp->fd_ofiles[fd]) == NULL) {
751 		FILEDESC_UNLOCK(fdp);
752 		error = EBADF;
753 		goto done2;
754 	}
755 #if 0
756 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
757 		(void) munmapfd(td, fd);
758 #endif
759 	fdp->fd_ofiles[fd] = NULL;
760 	fdp->fd_ofileflags[fd] = 0;
761 
762 	/*
763 	 * we now hold the fp reference that used to be owned by the descriptor
764 	 * array.
765 	 */
766 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
767 		fdp->fd_lastfile--;
768 	if (fd < fdp->fd_freefile)
769 		fdp->fd_freefile = fd;
770 	if (fd < fdp->fd_knlistsize) {
771 		FILEDESC_UNLOCK(fdp);
772 		knote_fdclose(td, fd);
773 	} else
774 		FILEDESC_UNLOCK(fdp);
775 
776 	error = closef(fp, td);
777 done2:
778 	mtx_unlock(&Giant);
779 	return(error);
780 }
781 
782 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
783 /*
784  * Return status information about a file descriptor.
785  */
786 #ifndef _SYS_SYSPROTO_H_
787 struct ofstat_args {
788 	int	fd;
789 	struct	ostat *sb;
790 };
791 #endif
792 /*
793  * MPSAFE
794  */
795 /* ARGSUSED */
796 int
797 ofstat(td, uap)
798 	struct thread *td;
799 	register struct ofstat_args *uap;
800 {
801 	struct file *fp;
802 	struct stat ub;
803 	struct ostat oub;
804 	int error;
805 
806 	mtx_lock(&Giant);
807 	if ((error = fget(td, uap->fd, &fp)) != 0)
808 		goto done2;
809 	error = fo_stat(fp, &ub, td);
810 	if (error == 0) {
811 		cvtstat(&ub, &oub);
812 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
813 	}
814 	fdrop(fp, td);
815 done2:
816 	mtx_unlock(&Giant);
817 	return (error);
818 }
819 #endif /* COMPAT_43 || COMPAT_SUNOS */
820 
821 /*
822  * Return status information about a file descriptor.
823  */
824 #ifndef _SYS_SYSPROTO_H_
825 struct fstat_args {
826 	int	fd;
827 	struct	stat *sb;
828 };
829 #endif
830 /*
831  * MPSAFE
832  */
833 /* ARGSUSED */
834 int
835 fstat(td, uap)
836 	struct thread *td;
837 	struct fstat_args *uap;
838 {
839 	struct file *fp;
840 	struct stat ub;
841 	int error;
842 
843 	mtx_lock(&Giant);
844 	if ((error = fget(td, uap->fd, &fp)) != 0)
845 		goto done2;
846 	error = fo_stat(fp, &ub, td);
847 	if (error == 0)
848 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
849 	fdrop(fp, td);
850 done2:
851 	mtx_unlock(&Giant);
852 	return (error);
853 }
854 
855 /*
856  * Return status information about a file descriptor.
857  */
858 #ifndef _SYS_SYSPROTO_H_
859 struct nfstat_args {
860 	int	fd;
861 	struct	nstat *sb;
862 };
863 #endif
864 /*
865  * MPSAFE
866  */
867 /* ARGSUSED */
868 int
869 nfstat(td, uap)
870 	struct thread *td;
871 	register struct nfstat_args *uap;
872 {
873 	struct file *fp;
874 	struct stat ub;
875 	struct nstat nub;
876 	int error;
877 
878 	mtx_lock(&Giant);
879 	if ((error = fget(td, uap->fd, &fp)) != 0)
880 		goto done2;
881 	error = fo_stat(fp, &ub, td);
882 	if (error == 0) {
883 		cvtnstat(&ub, &nub);
884 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
885 	}
886 	fdrop(fp, td);
887 done2:
888 	mtx_unlock(&Giant);
889 	return (error);
890 }
891 
892 /*
893  * Return pathconf information about a file descriptor.
894  */
895 #ifndef _SYS_SYSPROTO_H_
896 struct fpathconf_args {
897 	int	fd;
898 	int	name;
899 };
900 #endif
901 /*
902  * MPSAFE
903  */
904 /* ARGSUSED */
905 int
906 fpathconf(td, uap)
907 	struct thread *td;
908 	register struct fpathconf_args *uap;
909 {
910 	struct file *fp;
911 	struct vnode *vp;
912 	int error;
913 
914 	if ((error = fget(td, uap->fd, &fp)) != 0)
915 		return (error);
916 
917 	switch (fp->f_type) {
918 	case DTYPE_PIPE:
919 	case DTYPE_SOCKET:
920 		if (uap->name != _PC_PIPE_BUF) {
921 			error = EINVAL;
922 		} else {
923 			td->td_retval[0] = PIPE_BUF;
924 			error = 0;
925 		}
926 		break;
927 	case DTYPE_FIFO:
928 	case DTYPE_VNODE:
929 		vp = (struct vnode *)fp->f_data;
930 		mtx_lock(&Giant);
931 		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
932 		mtx_unlock(&Giant);
933 		break;
934 	default:
935 		error = EOPNOTSUPP;
936 		break;
937 	}
938 	fdrop(fp, td);
939 	return(error);
940 }
941 
942 /*
943  * Allocate a file descriptor for the process.
944  */
945 static int fdexpand;
946 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
947 
948 int
949 fdalloc(td, want, result)
950 	struct thread *td;
951 	int want;
952 	int *result;
953 {
954 	struct proc *p = td->td_proc;
955 	register struct filedesc *fdp = td->td_proc->p_fd;
956 	register int i;
957 	int lim, last, nfiles;
958 	struct file **newofile, **oldofile;
959 	char *newofileflags;
960 
961 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
962 
963 	/*
964 	 * Search for a free descriptor starting at the higher
965 	 * of want or fd_freefile.  If that fails, consider
966 	 * expanding the ofile array.
967 	 */
968 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
969 	for (;;) {
970 		last = min(fdp->fd_nfiles, lim);
971 		if ((i = want) < fdp->fd_freefile)
972 			i = fdp->fd_freefile;
973 		for (; i < last; i++) {
974 			if (fdp->fd_ofiles[i] == NULL) {
975 				fdp->fd_ofileflags[i] = 0;
976 				if (i > fdp->fd_lastfile)
977 					fdp->fd_lastfile = i;
978 				if (want <= fdp->fd_freefile)
979 					fdp->fd_freefile = i;
980 				*result = i;
981 				return (0);
982 			}
983 		}
984 
985 		/*
986 		 * No space in current array.  Expand?
987 		 */
988 		if (fdp->fd_nfiles >= lim)
989 			return (EMFILE);
990 		if (fdp->fd_nfiles < NDEXTENT)
991 			nfiles = NDEXTENT;
992 		else
993 			nfiles = 2 * fdp->fd_nfiles;
994 		FILEDESC_UNLOCK(fdp);
995 		mtx_lock(&Giant);
996 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
997 		    M_FILEDESC, M_WAITOK);
998 		mtx_unlock(&Giant);
999 		FILEDESC_LOCK(fdp);
1000 
1001 		/*
1002 		 * deal with file-table extend race that might have occured
1003 		 * when malloc was blocked.
1004 		 */
1005 		if (fdp->fd_nfiles >= nfiles) {
1006 			FILEDESC_UNLOCK(fdp);
1007 			mtx_lock(&Giant);
1008 			FREE(newofile, M_FILEDESC);
1009 			mtx_unlock(&Giant);
1010 			FILEDESC_LOCK(fdp);
1011 			continue;
1012 		}
1013 		newofileflags = (char *) &newofile[nfiles];
1014 		/*
1015 		 * Copy the existing ofile and ofileflags arrays
1016 		 * and zero the new portion of each array.
1017 		 */
1018 		bcopy(fdp->fd_ofiles, newofile,
1019 			(i = sizeof(struct file *) * fdp->fd_nfiles));
1020 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
1021 		bcopy(fdp->fd_ofileflags, newofileflags,
1022 			(i = sizeof(char) * fdp->fd_nfiles));
1023 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
1024 		if (fdp->fd_nfiles > NDFILE)
1025 			oldofile = fdp->fd_ofiles;
1026 		else
1027 			oldofile = NULL;
1028 		fdp->fd_ofiles = newofile;
1029 		fdp->fd_ofileflags = newofileflags;
1030 		fdp->fd_nfiles = nfiles;
1031 		fdexpand++;
1032 		if (oldofile != NULL) {
1033 			FILEDESC_UNLOCK(fdp);
1034 			mtx_lock(&Giant);
1035 			FREE(oldofile, M_FILEDESC);
1036 			mtx_unlock(&Giant);
1037 			FILEDESC_LOCK(fdp);
1038 		}
1039 	}
1040 	return (0);
1041 }
1042 
1043 /*
1044  * Check to see whether n user file descriptors
1045  * are available to the process p.
1046  */
1047 int
1048 fdavail(td, n)
1049 	struct thread *td;
1050 	register int n;
1051 {
1052 	struct proc *p = td->td_proc;
1053 	register struct filedesc *fdp = td->td_proc->p_fd;
1054 	register struct file **fpp;
1055 	register int i, lim, last;
1056 
1057 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1058 
1059 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1060 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1061 		return (1);
1062 
1063 	last = min(fdp->fd_nfiles, lim);
1064 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1065 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1066 		if (*fpp == NULL && --n <= 0)
1067 			return (1);
1068 	}
1069 	return (0);
1070 }
1071 
1072 /*
1073  * Create a new open file structure and allocate
1074  * a file decriptor for the process that refers to it.
1075  */
1076 int
1077 falloc(td, resultfp, resultfd)
1078 	register struct thread *td;
1079 	struct file **resultfp;
1080 	int *resultfd;
1081 {
1082 	struct proc *p = td->td_proc;
1083 	register struct file *fp, *fq;
1084 	int error, i;
1085 
1086 	sx_xlock(&filelist_lock);
1087 	if (nfiles >= maxfiles) {
1088 		sx_xunlock(&filelist_lock);
1089 		tablefull("file");
1090 		return (ENFILE);
1091 	}
1092 	nfiles++;
1093 	sx_xunlock(&filelist_lock);
1094 	/*
1095 	 * Allocate a new file descriptor.
1096 	 * If the process has file descriptor zero open, add to the list
1097 	 * of open files at that point, otherwise put it at the front of
1098 	 * the list of open files.
1099 	 */
1100 	fp = uma_zalloc(file_zone, M_WAITOK);
1101 	bzero(fp, sizeof(*fp));
1102 
1103 	/*
1104 	 * wait until after malloc (which may have blocked) returns before
1105 	 * allocating the slot, else a race might have shrunk it if we had
1106 	 * allocated it before the malloc.
1107 	 */
1108 	FILEDESC_LOCK(p->p_fd);
1109 	if ((error = fdalloc(td, 0, &i))) {
1110 		FILEDESC_UNLOCK(p->p_fd);
1111 		sx_xlock(&filelist_lock);
1112 		nfiles--;
1113 		sx_xunlock(&filelist_lock);
1114 		uma_zfree(file_zone, fp);
1115 		return (error);
1116 	}
1117 	fp->f_mtxp = mtx_pool_alloc();
1118 	fp->f_gcflag = 0;
1119 	fp->f_count = 1;
1120 	fp->f_cred = crhold(td->td_ucred);
1121 	fp->f_ops = &badfileops;
1122 	fp->f_seqcount = 1;
1123 	FILEDESC_UNLOCK(p->p_fd);
1124 	sx_xlock(&filelist_lock);
1125 	FILEDESC_LOCK(p->p_fd);
1126 	if ((fq = p->p_fd->fd_ofiles[0])) {
1127 		LIST_INSERT_AFTER(fq, fp, f_list);
1128 	} else {
1129 		LIST_INSERT_HEAD(&filehead, fp, f_list);
1130 	}
1131 	p->p_fd->fd_ofiles[i] = fp;
1132 	FILEDESC_UNLOCK(p->p_fd);
1133 	sx_xunlock(&filelist_lock);
1134 	if (resultfp)
1135 		*resultfp = fp;
1136 	if (resultfd)
1137 		*resultfd = i;
1138 	return (0);
1139 }
1140 
1141 /*
1142  * Free a file descriptor.
1143  */
1144 void
1145 ffree(fp)
1146 	register struct file *fp;
1147 {
1148 
1149 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1150 	sx_xlock(&filelist_lock);
1151 	LIST_REMOVE(fp, f_list);
1152 	nfiles--;
1153 	sx_xunlock(&filelist_lock);
1154 	crfree(fp->f_cred);
1155 	uma_zfree(file_zone, fp);
1156 }
1157 
1158 /*
1159  * Build a new filedesc structure.
1160  */
1161 struct filedesc *
1162 fdinit(td)
1163 	struct thread *td;
1164 {
1165 	register struct filedesc0 *newfdp;
1166 	register struct filedesc *fdp = td->td_proc->p_fd;
1167 
1168 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1169 	    M_FILEDESC, M_WAITOK | M_ZERO);
1170 	mtx_init(&newfdp->fd_fd.fd_mtx, "filedesc structure", MTX_DEF);
1171 	FILEDESC_LOCK(&newfdp->fd_fd);
1172 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1173 	if (newfdp->fd_fd.fd_cdir)
1174 		VREF(newfdp->fd_fd.fd_cdir);
1175 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1176 	if (newfdp->fd_fd.fd_rdir)
1177 		VREF(newfdp->fd_fd.fd_rdir);
1178 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1179 	if (newfdp->fd_fd.fd_jdir)
1180 		VREF(newfdp->fd_fd.fd_jdir);
1181 
1182 	/* Create the file descriptor table. */
1183 	newfdp->fd_fd.fd_refcnt = 1;
1184 	newfdp->fd_fd.fd_cmask = cmask;
1185 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1186 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1187 	newfdp->fd_fd.fd_nfiles = NDFILE;
1188 	newfdp->fd_fd.fd_knlistsize = -1;
1189 	FILEDESC_UNLOCK(&newfdp->fd_fd);
1190 
1191 	return (&newfdp->fd_fd);
1192 }
1193 
1194 /*
1195  * Share a filedesc structure.
1196  */
1197 struct filedesc *
1198 fdshare(p)
1199 	struct proc *p;
1200 {
1201 	FILEDESC_LOCK(p->p_fd);
1202 	p->p_fd->fd_refcnt++;
1203 	FILEDESC_UNLOCK(p->p_fd);
1204 	return (p->p_fd);
1205 }
1206 
1207 /*
1208  * Copy a filedesc structure.
1209  */
1210 struct filedesc *
1211 fdcopy(td)
1212 	struct thread *td;
1213 {
1214 	register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1215 	register struct file **fpp;
1216 	register int i, j;
1217 
1218 	/* Certain daemons might not have file descriptors. */
1219 	if (fdp == NULL)
1220 		return (NULL);
1221 
1222 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1223 
1224 	FILEDESC_UNLOCK(fdp);
1225 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1226 	    M_FILEDESC, M_WAITOK);
1227 	FILEDESC_LOCK(fdp);
1228 	bcopy(fdp, newfdp, sizeof(struct filedesc));
1229 	FILEDESC_UNLOCK(fdp);
1230 	bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1231 	mtx_init(&newfdp->fd_mtx, "filedesc structure", MTX_DEF);
1232 	if (newfdp->fd_cdir)
1233 		VREF(newfdp->fd_cdir);
1234 	if (newfdp->fd_rdir)
1235 		VREF(newfdp->fd_rdir);
1236 	if (newfdp->fd_jdir)
1237 		VREF(newfdp->fd_jdir);
1238 	newfdp->fd_refcnt = 1;
1239 
1240 	/*
1241 	 * If the number of open files fits in the internal arrays
1242 	 * of the open file structure, use them, otherwise allocate
1243 	 * additional memory for the number of descriptors currently
1244 	 * in use.
1245 	 */
1246 	FILEDESC_LOCK(fdp);
1247 	newfdp->fd_lastfile = fdp->fd_lastfile;
1248 	newfdp->fd_nfiles = fdp->fd_nfiles;
1249 	if (newfdp->fd_lastfile < NDFILE) {
1250 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1251 		newfdp->fd_ofileflags =
1252 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1253 		i = NDFILE;
1254 	} else {
1255 		/*
1256 		 * Compute the smallest multiple of NDEXTENT needed
1257 		 * for the file descriptors currently in use,
1258 		 * allowing the table to shrink.
1259 		 */
1260 retry:
1261 		i = newfdp->fd_nfiles;
1262 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1263 			i /= 2;
1264 		FILEDESC_UNLOCK(fdp);
1265 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1266 		    M_FILEDESC, M_WAITOK);
1267 		FILEDESC_LOCK(fdp);
1268 		newfdp->fd_lastfile = fdp->fd_lastfile;
1269 		newfdp->fd_nfiles = fdp->fd_nfiles;
1270 		j = newfdp->fd_nfiles;
1271 		while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1272 			j /= 2;
1273 		if (i != j) {
1274 			/*
1275 			 * The size of the original table has changed.
1276 			 * Go over once again.
1277 			 */
1278 			FILEDESC_UNLOCK(fdp);
1279 			FREE(newfdp->fd_ofiles, M_FILEDESC);
1280 			FILEDESC_LOCK(fdp);
1281 			newfdp->fd_lastfile = fdp->fd_lastfile;
1282 			newfdp->fd_nfiles = fdp->fd_nfiles;
1283 			goto retry;
1284 		}
1285 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1286 	}
1287 	newfdp->fd_nfiles = i;
1288 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1289 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1290 
1291 	/*
1292 	 * kq descriptors cannot be copied.
1293 	 */
1294 	if (newfdp->fd_knlistsize != -1) {
1295 		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1296 		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1297 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1298 				*fpp = NULL;
1299 				if (i < newfdp->fd_freefile)
1300 					newfdp->fd_freefile = i;
1301 			}
1302 			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1303 				newfdp->fd_lastfile--;
1304 		}
1305 		newfdp->fd_knlist = NULL;
1306 		newfdp->fd_knlistsize = -1;
1307 		newfdp->fd_knhash = NULL;
1308 		newfdp->fd_knhashmask = 0;
1309 	}
1310 
1311 	fpp = newfdp->fd_ofiles;
1312 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1313 		if (*fpp != NULL) {
1314 			fhold(*fpp);
1315 		}
1316 	}
1317 	return (newfdp);
1318 }
1319 
1320 /*
1321  * Release a filedesc structure.
1322  */
1323 void
1324 fdfree(td)
1325 	struct thread *td;
1326 {
1327 	register struct filedesc *fdp;
1328 	struct file **fpp;
1329 	register int i;
1330 
1331 	fdp = td->td_proc->p_fd;
1332 	/* Certain daemons might not have file descriptors. */
1333 	if (fdp == NULL)
1334 		return;
1335 
1336 	FILEDESC_LOCK(fdp);
1337 	if (--fdp->fd_refcnt > 0) {
1338 		FILEDESC_UNLOCK(fdp);
1339 		return;
1340 	}
1341 	/*
1342 	 * we are the last reference to the structure, we can
1343 	 * safely assume it will not change out from under us.
1344 	 */
1345 	FILEDESC_UNLOCK(fdp);
1346 	fpp = fdp->fd_ofiles;
1347 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1348 		if (*fpp)
1349 			(void) closef(*fpp, td);
1350 	}
1351 
1352 	PROC_LOCK(td->td_proc);
1353 	td->td_proc->p_fd = NULL;
1354 	PROC_UNLOCK(td->td_proc);
1355 
1356 	if (fdp->fd_nfiles > NDFILE)
1357 		FREE(fdp->fd_ofiles, M_FILEDESC);
1358 	if (fdp->fd_cdir)
1359 		vrele(fdp->fd_cdir);
1360 	if (fdp->fd_rdir)
1361 		vrele(fdp->fd_rdir);
1362 	if (fdp->fd_jdir)
1363 		vrele(fdp->fd_jdir);
1364 	if (fdp->fd_knlist)
1365 		FREE(fdp->fd_knlist, M_KQUEUE);
1366 	if (fdp->fd_knhash)
1367 		FREE(fdp->fd_knhash, M_KQUEUE);
1368 	mtx_destroy(&fdp->fd_mtx);
1369 	FREE(fdp, M_FILEDESC);
1370 }
1371 
1372 /*
1373  * For setugid programs, we don't want to people to use that setugidness
1374  * to generate error messages which write to a file which otherwise would
1375  * otherwise be off-limits to the process.
1376  *
1377  * This is a gross hack to plug the hole.  A better solution would involve
1378  * a special vop or other form of generalized access control mechanism.  We
1379  * go ahead and just reject all procfs file systems accesses as dangerous.
1380  *
1381  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1382  * sufficient.  We also don't for check setugidness since we know we are.
1383  */
1384 static int
1385 is_unsafe(struct file *fp)
1386 {
1387 	if (fp->f_type == DTYPE_VNODE &&
1388 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1389 		return (1);
1390 	return (0);
1391 }
1392 
1393 /*
1394  * Make this setguid thing safe, if at all possible.
1395  */
1396 void
1397 setugidsafety(td)
1398 	struct thread *td;
1399 {
1400 	struct filedesc *fdp = td->td_proc->p_fd;
1401 	register int i;
1402 
1403 	/* Certain daemons might not have file descriptors. */
1404 	if (fdp == NULL)
1405 		return;
1406 
1407 	/*
1408 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1409 	 * we are blocked in a close.  Be careful!
1410 	 */
1411 	FILEDESC_LOCK(fdp);
1412 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1413 		if (i > 2)
1414 			break;
1415 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1416 			struct file *fp;
1417 
1418 #if 0
1419 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1420 				(void) munmapfd(td, i);
1421 #endif
1422 			if (i < fdp->fd_knlistsize) {
1423 				FILEDESC_UNLOCK(fdp);
1424 				knote_fdclose(td, i);
1425 				FILEDESC_LOCK(fdp);
1426 			}
1427 			/*
1428 			 * NULL-out descriptor prior to close to avoid
1429 			 * a race while close blocks.
1430 			 */
1431 			fp = fdp->fd_ofiles[i];
1432 			fdp->fd_ofiles[i] = NULL;
1433 			fdp->fd_ofileflags[i] = 0;
1434 			if (i < fdp->fd_freefile)
1435 				fdp->fd_freefile = i;
1436 			FILEDESC_UNLOCK(fdp);
1437 			(void) closef(fp, td);
1438 			FILEDESC_LOCK(fdp);
1439 		}
1440 	}
1441 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1442 		fdp->fd_lastfile--;
1443 	FILEDESC_UNLOCK(fdp);
1444 }
1445 
1446 /*
1447  * Close any files on exec?
1448  */
1449 void
1450 fdcloseexec(td)
1451 	struct thread *td;
1452 {
1453 	struct filedesc *fdp = td->td_proc->p_fd;
1454 	register int i;
1455 
1456 	/* Certain daemons might not have file descriptors. */
1457 	if (fdp == NULL)
1458 		return;
1459 
1460 	FILEDESC_LOCK(fdp);
1461 
1462 	/*
1463 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1464 	 * may block and rip them out from under us.
1465 	 */
1466 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1467 		if (fdp->fd_ofiles[i] != NULL &&
1468 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1469 			struct file *fp;
1470 
1471 #if 0
1472 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1473 				(void) munmapfd(td, i);
1474 #endif
1475 			if (i < fdp->fd_knlistsize) {
1476 				FILEDESC_UNLOCK(fdp);
1477 				knote_fdclose(td, i);
1478 				FILEDESC_LOCK(fdp);
1479 			}
1480 			/*
1481 			 * NULL-out descriptor prior to close to avoid
1482 			 * a race while close blocks.
1483 			 */
1484 			fp = fdp->fd_ofiles[i];
1485 			fdp->fd_ofiles[i] = NULL;
1486 			fdp->fd_ofileflags[i] = 0;
1487 			if (i < fdp->fd_freefile)
1488 				fdp->fd_freefile = i;
1489 			FILEDESC_UNLOCK(fdp);
1490 			(void) closef(fp, td);
1491 			FILEDESC_LOCK(fdp);
1492 		}
1493 	}
1494 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1495 		fdp->fd_lastfile--;
1496 	FILEDESC_UNLOCK(fdp);
1497 }
1498 
1499 /*
1500  * Internal form of close.
1501  * Decrement reference count on file structure.
1502  * Note: td may be NULL when closing a file
1503  * that was being passed in a message.
1504  */
1505 int
1506 closef(fp, td)
1507 	register struct file *fp;
1508 	register struct thread *td;
1509 {
1510 	struct vnode *vp;
1511 	struct flock lf;
1512 
1513 	if (fp == NULL)
1514 		return (0);
1515 	/*
1516 	 * POSIX record locking dictates that any close releases ALL
1517 	 * locks owned by this process.  This is handled by setting
1518 	 * a flag in the unlock to free ONLY locks obeying POSIX
1519 	 * semantics, and not to free BSD-style file locks.
1520 	 * If the descriptor was in a message, POSIX-style locks
1521 	 * aren't passed with the descriptor.
1522 	 */
1523 	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1524 	    fp->f_type == DTYPE_VNODE) {
1525 		lf.l_whence = SEEK_SET;
1526 		lf.l_start = 0;
1527 		lf.l_len = 0;
1528 		lf.l_type = F_UNLCK;
1529 		vp = (struct vnode *)fp->f_data;
1530 		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1531 		    F_UNLCK, &lf, F_POSIX);
1532 	}
1533 	return (fdrop(fp, td));
1534 }
1535 
1536 /*
1537  * Drop reference on struct file passed in, may call closef if the
1538  * reference hits zero.
1539  */
1540 int
1541 fdrop(fp, td)
1542 	struct file *fp;
1543 	struct thread *td;
1544 {
1545 
1546 	FILE_LOCK(fp);
1547 	return (fdrop_locked(fp, td));
1548 }
1549 
1550 /*
1551  * Extract the file pointer associated with the specified descriptor for
1552  * the current user process.
1553  *
1554  * If the descriptor doesn't exist, EBADF is returned.
1555  *
1556  * If the descriptor exists but doesn't match 'flags' then
1557  * return EBADF for read attempts and EINVAL for write attempts.
1558  *
1559  * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1560  * It should be droped with fdrop().
1561  * If it is not set, then the refcount will not be bumped however the
1562  * thread's filedesc struct will be returned locked (for fgetsock).
1563  *
1564  * If an error occured the non-zero error is returned and *fpp is set to NULL.
1565  * Otherwise *fpp is set and zero is returned.
1566  */
1567 static __inline
1568 int
1569 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1570 {
1571 	struct filedesc *fdp;
1572 	struct file *fp;
1573 
1574 	*fpp = NULL;
1575 	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1576 		return(EBADF);
1577 	FILEDESC_LOCK(fdp);
1578 	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1579 		FILEDESC_UNLOCK(fdp);
1580 		return(EBADF);
1581 	}
1582 
1583 	/*
1584 	 * Note: FREAD failures returns EBADF to maintain backwards
1585 	 * compatibility with what routines returned before.
1586 	 *
1587 	 * Only one flag, or 0, may be specified.
1588 	 */
1589 	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1590 		FILEDESC_UNLOCK(fdp);
1591 		return(EBADF);
1592 	}
1593 	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1594 		FILEDESC_UNLOCK(fdp);
1595 		return(EINVAL);
1596 	}
1597 	if (hold) {
1598 		fhold(fp);
1599 		FILEDESC_UNLOCK(fdp);
1600 	}
1601 	*fpp = fp;
1602 	return(0);
1603 }
1604 
1605 int
1606 fget(struct thread *td, int fd, struct file **fpp)
1607 {
1608     return(_fget(td, fd, fpp, 0, 1));
1609 }
1610 
1611 int
1612 fget_read(struct thread *td, int fd, struct file **fpp)
1613 {
1614     return(_fget(td, fd, fpp, FREAD, 1));
1615 }
1616 
1617 int
1618 fget_write(struct thread *td, int fd, struct file **fpp)
1619 {
1620     return(_fget(td, fd, fpp, FWRITE, 1));
1621 }
1622 
1623 /*
1624  * Like fget() but loads the underlying vnode, or returns an error if
1625  * the descriptor does not represent a vnode.  Note that pipes use vnodes
1626  * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1627  * error).  The returned vnode will be vref()d.
1628  */
1629 
1630 static __inline
1631 int
1632 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1633 {
1634 	struct file *fp;
1635 	int error;
1636 
1637 	*vpp = NULL;
1638 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1639 		return (error);
1640 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1641 		error = EINVAL;
1642 	} else {
1643 		*vpp = (struct vnode *)fp->f_data;
1644 		vref(*vpp);
1645 	}
1646 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1647 	return (error);
1648 }
1649 
1650 int
1651 fgetvp(struct thread *td, int fd, struct vnode **vpp)
1652 {
1653 	return(_fgetvp(td, fd, vpp, 0));
1654 }
1655 
1656 int
1657 fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1658 {
1659 	return(_fgetvp(td, fd, vpp, FREAD));
1660 }
1661 
1662 int
1663 fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1664 {
1665 	return(_fgetvp(td, fd, vpp, FWRITE));
1666 }
1667 
1668 /*
1669  * Like fget() but loads the underlying socket, or returns an error if
1670  * the descriptor does not represent a socket.
1671  *
1672  * We bump the ref count on the returned socket.  XXX Also obtain the SX lock in
1673  * the future.
1674  */
1675 int
1676 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1677 {
1678 	struct file *fp;
1679 	int error;
1680 
1681 	*spp = NULL;
1682 	if (fflagp)
1683 		*fflagp = 0;
1684 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1685 		return (error);
1686 	if (fp->f_type != DTYPE_SOCKET) {
1687 		error = ENOTSOCK;
1688 	} else {
1689 		*spp = (struct socket *)fp->f_data;
1690 		if (fflagp)
1691 			*fflagp = fp->f_flag;
1692 		soref(*spp);
1693 	}
1694 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1695 	return(error);
1696 }
1697 
1698 /*
1699  * Drop the reference count on the the socket and XXX release the SX lock in
1700  * the future.  The last reference closes the socket.
1701  */
1702 void
1703 fputsock(struct socket *so)
1704 {
1705 	sorele(so);
1706 }
1707 
1708 /*
1709  * Drop reference on struct file passed in, may call closef if the
1710  * reference hits zero.
1711  * Expects struct file locked, and will unlock it.
1712  */
1713 int
1714 fdrop_locked(fp, td)
1715 	struct file *fp;
1716 	struct thread *td;
1717 {
1718 	struct flock lf;
1719 	struct vnode *vp;
1720 	int error;
1721 
1722 	FILE_LOCK_ASSERT(fp, MA_OWNED);
1723 
1724 	if (--fp->f_count > 0) {
1725 		FILE_UNLOCK(fp);
1726 		return (0);
1727 	}
1728 	mtx_lock(&Giant);
1729 	if (fp->f_count < 0)
1730 		panic("fdrop: count < 0");
1731 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1732 		lf.l_whence = SEEK_SET;
1733 		lf.l_start = 0;
1734 		lf.l_len = 0;
1735 		lf.l_type = F_UNLCK;
1736 		vp = (struct vnode *)fp->f_data;
1737 		FILE_UNLOCK(fp);
1738 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1739 	} else
1740 		FILE_UNLOCK(fp);
1741 	if (fp->f_ops != &badfileops)
1742 		error = fo_close(fp, td);
1743 	else
1744 		error = 0;
1745 	ffree(fp);
1746 	mtx_unlock(&Giant);
1747 	return (error);
1748 }
1749 
1750 /*
1751  * Apply an advisory lock on a file descriptor.
1752  *
1753  * Just attempt to get a record lock of the requested type on
1754  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1755  */
1756 #ifndef _SYS_SYSPROTO_H_
1757 struct flock_args {
1758 	int	fd;
1759 	int	how;
1760 };
1761 #endif
1762 /*
1763  * MPSAFE
1764  */
1765 /* ARGSUSED */
1766 int
1767 flock(td, uap)
1768 	struct thread *td;
1769 	register struct flock_args *uap;
1770 {
1771 	struct file *fp;
1772 	struct vnode *vp;
1773 	struct flock lf;
1774 	int error;
1775 
1776 	if ((error = fget(td, uap->fd, &fp)) != 0)
1777 		return (error);
1778 	if (fp->f_type != DTYPE_VNODE) {
1779 		fdrop(fp, td);
1780 		return (EOPNOTSUPP);
1781 	}
1782 
1783 	mtx_lock(&Giant);
1784 	vp = (struct vnode *)fp->f_data;
1785 	lf.l_whence = SEEK_SET;
1786 	lf.l_start = 0;
1787 	lf.l_len = 0;
1788 	if (uap->how & LOCK_UN) {
1789 		lf.l_type = F_UNLCK;
1790 		FILE_LOCK(fp);
1791 		fp->f_flag &= ~FHASLOCK;
1792 		FILE_UNLOCK(fp);
1793 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1794 		goto done2;
1795 	}
1796 	if (uap->how & LOCK_EX)
1797 		lf.l_type = F_WRLCK;
1798 	else if (uap->how & LOCK_SH)
1799 		lf.l_type = F_RDLCK;
1800 	else {
1801 		error = EBADF;
1802 		goto done2;
1803 	}
1804 	FILE_LOCK(fp);
1805 	fp->f_flag |= FHASLOCK;
1806 	FILE_UNLOCK(fp);
1807 	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1808 	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1809 done2:
1810 	fdrop(fp, td);
1811 	mtx_unlock(&Giant);
1812 	return (error);
1813 }
1814 
1815 /*
1816  * File Descriptor pseudo-device driver (/dev/fd/).
1817  *
1818  * Opening minor device N dup()s the file (if any) connected to file
1819  * descriptor N belonging to the calling process.  Note that this driver
1820  * consists of only the ``open()'' routine, because all subsequent
1821  * references to this file will be direct to the other driver.
1822  */
1823 /* ARGSUSED */
1824 static int
1825 fdopen(dev, mode, type, td)
1826 	dev_t dev;
1827 	int mode, type;
1828 	struct thread *td;
1829 {
1830 
1831 	/*
1832 	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1833 	 * the file descriptor being sought for duplication. The error
1834 	 * return ensures that the vnode for this device will be released
1835 	 * by vn_open. Open will detect this special error and take the
1836 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1837 	 * will simply report the error.
1838 	 */
1839 	td->td_dupfd = dev2unit(dev);
1840 	return (ENODEV);
1841 }
1842 
1843 /*
1844  * Duplicate the specified descriptor to a free descriptor.
1845  */
1846 int
1847 dupfdopen(td, fdp, indx, dfd, mode, error)
1848 	struct thread *td;
1849 	struct filedesc *fdp;
1850 	int indx, dfd;
1851 	int mode;
1852 	int error;
1853 {
1854 	register struct file *wfp;
1855 	struct file *fp;
1856 
1857 	/*
1858 	 * If the to-be-dup'd fd number is greater than the allowed number
1859 	 * of file descriptors, or the fd to be dup'd has already been
1860 	 * closed, then reject.
1861 	 */
1862 	FILEDESC_LOCK(fdp);
1863 	if ((u_int)dfd >= fdp->fd_nfiles ||
1864 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1865 		FILEDESC_UNLOCK(fdp);
1866 		return (EBADF);
1867 	}
1868 
1869 	/*
1870 	 * There are two cases of interest here.
1871 	 *
1872 	 * For ENODEV simply dup (dfd) to file descriptor
1873 	 * (indx) and return.
1874 	 *
1875 	 * For ENXIO steal away the file structure from (dfd) and
1876 	 * store it in (indx).  (dfd) is effectively closed by
1877 	 * this operation.
1878 	 *
1879 	 * Any other error code is just returned.
1880 	 */
1881 	switch (error) {
1882 	case ENODEV:
1883 		/*
1884 		 * Check that the mode the file is being opened for is a
1885 		 * subset of the mode of the existing descriptor.
1886 		 */
1887 		FILE_LOCK(wfp);
1888 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1889 			FILE_UNLOCK(wfp);
1890 			FILEDESC_UNLOCK(fdp);
1891 			return (EACCES);
1892 		}
1893 		fp = fdp->fd_ofiles[indx];
1894 #if 0
1895 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1896 			(void) munmapfd(td, indx);
1897 #endif
1898 		fdp->fd_ofiles[indx] = wfp;
1899 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1900 		fhold_locked(wfp);
1901 		FILE_UNLOCK(wfp);
1902 		if (indx > fdp->fd_lastfile)
1903 			fdp->fd_lastfile = indx;
1904 		if (fp != NULL)
1905 			FILE_LOCK(fp);
1906 		FILEDESC_UNLOCK(fdp);
1907 		/*
1908 		 * we now own the reference to fp that the ofiles[] array
1909 		 * used to own.  Release it.
1910 		 */
1911 		if (fp != NULL)
1912 			fdrop_locked(fp, td);
1913 		return (0);
1914 
1915 	case ENXIO:
1916 		/*
1917 		 * Steal away the file pointer from dfd, and stuff it into indx.
1918 		 */
1919 		fp = fdp->fd_ofiles[indx];
1920 #if 0
1921 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1922 			(void) munmapfd(td, indx);
1923 #endif
1924 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1925 		fdp->fd_ofiles[dfd] = NULL;
1926 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1927 		fdp->fd_ofileflags[dfd] = 0;
1928 
1929 		/*
1930 		 * Complete the clean up of the filedesc structure by
1931 		 * recomputing the various hints.
1932 		 */
1933 		if (indx > fdp->fd_lastfile) {
1934 			fdp->fd_lastfile = indx;
1935 		} else {
1936 			while (fdp->fd_lastfile > 0 &&
1937 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1938 				fdp->fd_lastfile--;
1939 			}
1940 			if (dfd < fdp->fd_freefile)
1941 				fdp->fd_freefile = dfd;
1942 		}
1943 		if (fp != NULL)
1944 			FILE_LOCK(fp);
1945 		FILEDESC_UNLOCK(fdp);
1946 
1947 		/*
1948 		 * we now own the reference to fp that the ofiles[] array
1949 		 * used to own.  Release it.
1950 		 */
1951 		if (fp != NULL)
1952 			fdrop_locked(fp, td);
1953 		return (0);
1954 
1955 	default:
1956 		FILEDESC_UNLOCK(fdp);
1957 		return (error);
1958 	}
1959 	/* NOTREACHED */
1960 }
1961 
1962 /*
1963  * Get file structures.
1964  */
1965 static int
1966 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1967 {
1968 	int error;
1969 	struct file *fp;
1970 
1971 	sx_slock(&filelist_lock);
1972 	if (!req->oldptr) {
1973 		/*
1974 		 * overestimate by 10 files
1975 		 */
1976 		error = SYSCTL_OUT(req, 0, sizeof(filehead) +
1977 				   (nfiles + 10) * sizeof(struct file));
1978 		sx_sunlock(&filelist_lock);
1979 		return (error);
1980 	}
1981 
1982 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1983 	if (error) {
1984 		sx_sunlock(&filelist_lock);
1985 		return (error);
1986 	}
1987 
1988 	/*
1989 	 * followed by an array of file structures
1990 	 */
1991 	LIST_FOREACH(fp, &filehead, f_list) {
1992 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1993 		if (error) {
1994 			sx_sunlock(&filelist_lock);
1995 			return (error);
1996 		}
1997 	}
1998 	sx_sunlock(&filelist_lock);
1999 	return (0);
2000 }
2001 
2002 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2003     0, 0, sysctl_kern_file, "S,file", "Entire file table");
2004 
2005 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2006     &maxfilesperproc, 0, "Maximum files allowed open per process");
2007 
2008 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2009     &maxfiles, 0, "Maximum number of files");
2010 
2011 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2012     &nfiles, 0, "System-wide number of open files");
2013 
2014 static void
2015 fildesc_drvinit(void *unused)
2016 {
2017 	dev_t dev;
2018 
2019 	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
2020 	make_dev_alias(dev, "stdin");
2021 	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
2022 	make_dev_alias(dev, "stdout");
2023 	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
2024 	make_dev_alias(dev, "stderr");
2025 	if (!devfs_present) {
2026 		int fd;
2027 
2028 		for (fd = 3; fd < NUMFDESC; fd++)
2029 			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
2030 			    "fd/%d", fd);
2031 	}
2032 }
2033 
2034 struct fileops badfileops = {
2035 	badfo_readwrite,
2036 	badfo_readwrite,
2037 	badfo_ioctl,
2038 	badfo_poll,
2039 	badfo_kqfilter,
2040 	badfo_stat,
2041 	badfo_close
2042 };
2043 
2044 static int
2045 badfo_readwrite(fp, uio, cred, flags, td)
2046 	struct file *fp;
2047 	struct uio *uio;
2048 	struct ucred *cred;
2049 	struct thread *td;
2050 	int flags;
2051 {
2052 
2053 	return (EBADF);
2054 }
2055 
2056 static int
2057 badfo_ioctl(fp, com, data, td)
2058 	struct file *fp;
2059 	u_long com;
2060 	caddr_t data;
2061 	struct thread *td;
2062 {
2063 
2064 	return (EBADF);
2065 }
2066 
2067 static int
2068 badfo_poll(fp, events, cred, td)
2069 	struct file *fp;
2070 	int events;
2071 	struct ucred *cred;
2072 	struct thread *td;
2073 {
2074 
2075 	return (0);
2076 }
2077 
2078 static int
2079 badfo_kqfilter(fp, kn)
2080 	struct file *fp;
2081 	struct knote *kn;
2082 {
2083 
2084 	return (0);
2085 }
2086 
2087 static int
2088 badfo_stat(fp, sb, td)
2089 	struct file *fp;
2090 	struct stat *sb;
2091 	struct thread *td;
2092 {
2093 
2094 	return (EBADF);
2095 }
2096 
2097 static int
2098 badfo_close(fp, td)
2099 	struct file *fp;
2100 	struct thread *td;
2101 {
2102 
2103 	return (EBADF);
2104 }
2105 
2106 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2107 					fildesc_drvinit,NULL)
2108 
2109 static void filelistinit(void *);
2110 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2111 
2112 /* ARGSUSED*/
2113 static void
2114 filelistinit(dummy)
2115 	void *dummy;
2116 {
2117 	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2118 	    NULL, NULL, UMA_ALIGN_PTR, 0);
2119 
2120 	sx_init(&filelist_lock, "filelist lock");
2121 }
2122