xref: /freebsd/sys/kern/kern_descrip.c (revision d2893b161bbea64e099654c0dd78073cfb5667b0)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mutex.h>
49 #include <sys/sysproto.h>
50 #include <sys/conf.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/sysctl.h>
54 #include <sys/vnode.h>
55 #include <sys/proc.h>
56 #include <sys/namei.h>
57 #include <sys/file.h>
58 #include <sys/stat.h>
59 #include <sys/filio.h>
60 #include <sys/fcntl.h>
61 #include <sys/unistd.h>
62 #include <sys/resourcevar.h>
63 #include <sys/event.h>
64 #include <sys/sx.h>
65 #include <sys/socketvar.h>
66 #include <sys/signalvar.h>
67 
68 #include <machine/limits.h>
69 
70 #include <vm/vm.h>
71 #include <vm/vm_extern.h>
72 #include <vm/uma.h>
73 
74 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
75 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
76 
77 uma_zone_t file_zone;
78 
79 static	 d_open_t  fdopen;
80 #define NUMFDESC 64
81 
82 #define CDEV_MAJOR 22
83 static struct cdevsw fildesc_cdevsw = {
84 	/* open */	fdopen,
85 	/* close */	noclose,
86 	/* read */	noread,
87 	/* write */	nowrite,
88 	/* ioctl */	noioctl,
89 	/* poll */	nopoll,
90 	/* mmap */	nommap,
91 	/* strategy */	nostrategy,
92 	/* name */	"FD",
93 	/* maj */	CDEV_MAJOR,
94 	/* dump */	nodump,
95 	/* psize */	nopsize,
96 	/* flags */	0,
97 };
98 
99 static int do_dup(struct filedesc *fdp, int old, int new, register_t *retval, struct thread *td);
100 static int badfo_readwrite(struct file *fp, struct uio *uio,
101     struct ucred *cred, int flags, struct thread *td);
102 static int badfo_ioctl(struct file *fp, u_long com, void *data,
103     struct thread *td);
104 static int badfo_poll(struct file *fp, int events,
105     struct ucred *cred, struct thread *td);
106 static int badfo_kqfilter(struct file *fp, struct knote *kn);
107 static int badfo_stat(struct file *fp, struct stat *sb, struct thread *td);
108 static int badfo_close(struct file *fp, struct thread *td);
109 
110 /*
111  * Descriptor management.
112  */
113 struct filelist filehead;	/* head of list of open files */
114 int nfiles;			/* actual number of open files */
115 extern int cmask;
116 struct sx filelist_lock;	/* sx to protect filelist */
117 struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
118 
119 /*
120  * System calls on descriptors.
121  */
122 #ifndef _SYS_SYSPROTO_H_
123 struct getdtablesize_args {
124 	int	dummy;
125 };
126 #endif
127 /*
128  * MPSAFE
129  */
130 /* ARGSUSED */
131 int
132 getdtablesize(td, uap)
133 	struct thread *td;
134 	struct getdtablesize_args *uap;
135 {
136 	struct proc *p = td->td_proc;
137 
138 	mtx_lock(&Giant);
139 	td->td_retval[0] =
140 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
141 	mtx_unlock(&Giant);
142 	return (0);
143 }
144 
145 /*
146  * Duplicate a file descriptor to a particular value.
147  *
148  * note: keep in mind that a potential race condition exists when closing
149  * descriptors from a shared descriptor table (via rfork).
150  */
151 #ifndef _SYS_SYSPROTO_H_
152 struct dup2_args {
153 	u_int	from;
154 	u_int	to;
155 };
156 #endif
157 /*
158  * MPSAFE
159  */
160 /* ARGSUSED */
161 int
162 dup2(td, uap)
163 	struct thread *td;
164 	struct dup2_args *uap;
165 {
166 	struct proc *p = td->td_proc;
167 	register struct filedesc *fdp = td->td_proc->p_fd;
168 	register u_int old = uap->from, new = uap->to;
169 	int i, error;
170 
171 	FILEDESC_LOCK(fdp);
172 retry:
173 	if (old >= fdp->fd_nfiles ||
174 	    fdp->fd_ofiles[old] == NULL ||
175 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
176 	    new >= maxfilesperproc) {
177 		FILEDESC_UNLOCK(fdp);
178 		return (EBADF);
179 	}
180 	if (old == new) {
181 		td->td_retval[0] = new;
182 		FILEDESC_UNLOCK(fdp);
183 		return (0);
184 	}
185 	if (new >= fdp->fd_nfiles) {
186 		if ((error = fdalloc(td, new, &i))) {
187 			FILEDESC_UNLOCK(fdp);
188 			return (error);
189 		}
190 		/*
191 		 * fdalloc() may block, retest everything.
192 		 */
193 		goto retry;
194 	}
195 	error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
196 	return(error);
197 }
198 
199 /*
200  * Duplicate a file descriptor.
201  */
202 #ifndef _SYS_SYSPROTO_H_
203 struct dup_args {
204 	u_int	fd;
205 };
206 #endif
207 /*
208  * MPSAFE
209  */
210 /* ARGSUSED */
211 int
212 dup(td, uap)
213 	struct thread *td;
214 	struct dup_args *uap;
215 {
216 	register struct filedesc *fdp;
217 	u_int old;
218 	int new, error;
219 
220 	old = uap->fd;
221 	fdp = td->td_proc->p_fd;
222 	FILEDESC_LOCK(fdp);
223 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
224 		FILEDESC_UNLOCK(fdp);
225 		return (EBADF);
226 	}
227 	if ((error = fdalloc(td, 0, &new))) {
228 		FILEDESC_UNLOCK(fdp);
229 		return (error);
230 	}
231 	error = do_dup(fdp, (int)old, new, td->td_retval, td);
232 	return (error);
233 }
234 
235 /*
236  * The file control system call.
237  */
238 #ifndef _SYS_SYSPROTO_H_
239 struct fcntl_args {
240 	int	fd;
241 	int	cmd;
242 	long	arg;
243 };
244 #endif
245 /*
246  * MPSAFE
247  */
248 /* ARGSUSED */
249 int
250 fcntl(td, uap)
251 	struct thread *td;
252 	register struct fcntl_args *uap;
253 {
254 	register struct proc *p = td->td_proc;
255 	register struct filedesc *fdp;
256 	register struct file *fp;
257 	register char *pop;
258 	struct vnode *vp;
259 	int i, tmp, error = 0, flg = F_POSIX;
260 	struct flock fl;
261 	u_int newmin;
262 	struct proc *leaderp;
263 
264 	mtx_lock(&Giant);
265 
266 	fdp = p->p_fd;
267 	FILEDESC_LOCK(fdp);
268 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
269 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
270 		FILEDESC_UNLOCK(fdp);
271 		error = EBADF;
272 		goto done2;
273 	}
274 	pop = &fdp->fd_ofileflags[uap->fd];
275 
276 	switch (uap->cmd) {
277 	case F_DUPFD:
278 		newmin = uap->arg;
279 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
280 		    newmin >= maxfilesperproc) {
281 			FILEDESC_UNLOCK(fdp);
282 			error = EINVAL;
283 			break;
284 		}
285 		if ((error = fdalloc(td, newmin, &i))) {
286 			FILEDESC_UNLOCK(fdp);
287 			break;
288 		}
289 		error = do_dup(fdp, uap->fd, i, td->td_retval, td);
290 		break;
291 
292 	case F_GETFD:
293 		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
294 		FILEDESC_UNLOCK(fdp);
295 		break;
296 
297 	case F_SETFD:
298 		*pop = (*pop &~ UF_EXCLOSE) |
299 		    (uap->arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
300 		FILEDESC_UNLOCK(fdp);
301 		break;
302 
303 	case F_GETFL:
304 		FILE_LOCK(fp);
305 		FILEDESC_UNLOCK(fdp);
306 		td->td_retval[0] = OFLAGS(fp->f_flag);
307 		FILE_UNLOCK(fp);
308 		break;
309 
310 	case F_SETFL:
311 		fhold(fp);
312 		FILEDESC_UNLOCK(fdp);
313 		fp->f_flag &= ~FCNTLFLAGS;
314 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
315 		tmp = fp->f_flag & FNONBLOCK;
316 		error = fo_ioctl(fp, FIONBIO, &tmp, td);
317 		if (error) {
318 			fdrop(fp, td);
319 			break;
320 		}
321 		tmp = fp->f_flag & FASYNC;
322 		error = fo_ioctl(fp, FIOASYNC, &tmp, td);
323 		if (!error) {
324 			fdrop(fp, td);
325 			break;
326 		}
327 		fp->f_flag &= ~FNONBLOCK;
328 		tmp = 0;
329 		(void)fo_ioctl(fp, FIONBIO, &tmp, td);
330 		fdrop(fp, td);
331 		break;
332 
333 	case F_GETOWN:
334 		fhold(fp);
335 		FILEDESC_UNLOCK(fdp);
336 		error = fo_ioctl(fp, FIOGETOWN, (void *)td->td_retval, td);
337 		fdrop(fp, td);
338 		break;
339 
340 	case F_SETOWN:
341 		fhold(fp);
342 		FILEDESC_UNLOCK(fdp);
343 		error = fo_ioctl(fp, FIOSETOWN, &uap->arg, td);
344 		fdrop(fp, td);
345 		break;
346 
347 	case F_SETLKW:
348 		flg |= F_WAIT;
349 		/* Fall into F_SETLK */
350 
351 	case F_SETLK:
352 		if (fp->f_type != DTYPE_VNODE) {
353 			FILEDESC_UNLOCK(fdp);
354 			error = EBADF;
355 			break;
356 		}
357 		vp = (struct vnode *)fp->f_data;
358 		/*
359 		 * copyin/lockop may block
360 		 */
361 		fhold(fp);
362 		FILEDESC_UNLOCK(fdp);
363 		vp = (struct vnode *)fp->f_data;
364 
365 		/* Copy in the lock structure */
366 		error = copyin((caddr_t)(intptr_t)uap->arg, &fl, sizeof(fl));
367 		if (error) {
368 			fdrop(fp, td);
369 			break;
370 		}
371 		if (fl.l_whence == SEEK_CUR) {
372 			if (fp->f_offset < 0 ||
373 			    (fl.l_start > 0 &&
374 			     fp->f_offset > OFF_MAX - fl.l_start)) {
375 				fdrop(fp, td);
376 				error = EOVERFLOW;
377 				break;
378 			}
379 			fl.l_start += fp->f_offset;
380 		}
381 
382 		switch (fl.l_type) {
383 		case F_RDLCK:
384 			if ((fp->f_flag & FREAD) == 0) {
385 				error = EBADF;
386 				break;
387 			}
388 			PROC_LOCK(p);
389 			p->p_flag |= P_ADVLOCK;
390 			leaderp = p->p_leader;
391 			PROC_UNLOCK(p);
392 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
393 			    &fl, flg);
394 			break;
395 		case F_WRLCK:
396 			if ((fp->f_flag & FWRITE) == 0) {
397 				error = EBADF;
398 				break;
399 			}
400 			PROC_LOCK(p);
401 			p->p_flag |= P_ADVLOCK;
402 			leaderp = p->p_leader;
403 			PROC_UNLOCK(p);
404 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
405 			    &fl, flg);
406 			break;
407 		case F_UNLCK:
408 			PROC_LOCK(p);
409 			leaderp = p->p_leader;
410 			PROC_UNLOCK(p);
411 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK,
412 				&fl, F_POSIX);
413 			break;
414 		default:
415 			error = EINVAL;
416 			break;
417 		}
418 		fdrop(fp, td);
419 		break;
420 
421 	case F_GETLK:
422 		if (fp->f_type != DTYPE_VNODE) {
423 			FILEDESC_UNLOCK(fdp);
424 			error = EBADF;
425 			break;
426 		}
427 		vp = (struct vnode *)fp->f_data;
428 		/*
429 		 * copyin/lockop may block
430 		 */
431 		fhold(fp);
432 		FILEDESC_UNLOCK(fdp);
433 		vp = (struct vnode *)fp->f_data;
434 
435 		/* Copy in the lock structure */
436 		error = copyin((caddr_t)(intptr_t)uap->arg, &fl, sizeof(fl));
437 		if (error) {
438 			fdrop(fp, td);
439 			break;
440 		}
441 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
442 		    fl.l_type != F_UNLCK) {
443 			fdrop(fp, td);
444 			error = EINVAL;
445 			break;
446 		}
447 		if (fl.l_whence == SEEK_CUR) {
448 			if ((fl.l_start > 0 &&
449 			     fp->f_offset > OFF_MAX - fl.l_start) ||
450 			    (fl.l_start < 0 &&
451 			     fp->f_offset < OFF_MIN - fl.l_start)) {
452 				fdrop(fp, td);
453 				error = EOVERFLOW;
454 				break;
455 			}
456 			fl.l_start += fp->f_offset;
457 		}
458 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
459 			    &fl, F_POSIX);
460 		fdrop(fp, td);
461 		if (error == 0) {
462 			error = copyout(&fl, (caddr_t)(intptr_t)uap->arg,
463 			    sizeof(fl));
464 		}
465 		break;
466 	default:
467 		FILEDESC_UNLOCK(fdp);
468 		error = EINVAL;
469 		break;
470 	}
471 done2:
472 	mtx_unlock(&Giant);
473 	return (error);
474 }
475 
476 /*
477  * Common code for dup, dup2, and fcntl(F_DUPFD).
478  * filedesc must be locked, but will be unlocked as a side effect.
479  */
480 static int
481 do_dup(fdp, old, new, retval, td)
482 	register struct filedesc *fdp;
483 	register int old, new;
484 	register_t *retval;
485 	struct thread *td;
486 {
487 	struct file *fp;
488 	struct file *delfp;
489 
490 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
491 
492 	/*
493 	 * Save info on the descriptor being overwritten.  We have
494 	 * to do the unmap now, but we cannot close it without
495 	 * introducing an ownership race for the slot.
496 	 */
497 	delfp = fdp->fd_ofiles[new];
498 #if 0
499 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
500 		(void) munmapfd(td, new);
501 #endif
502 
503 	/*
504 	 * Duplicate the source descriptor, update lastfile
505 	 */
506 	fp = fdp->fd_ofiles[old];
507 	fdp->fd_ofiles[new] = fp;
508 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
509 	fhold(fp);
510 	if (new > fdp->fd_lastfile)
511 		fdp->fd_lastfile = new;
512 	*retval = new;
513 
514 	FILEDESC_UNLOCK(fdp);
515 
516 	/*
517 	 * If we dup'd over a valid file, we now own the reference to it
518 	 * and must dispose of it using closef() semantics (as if a
519 	 * close() were performed on it).
520 	 */
521 	if (delfp) {
522 		mtx_lock(&Giant);
523 		(void) closef(delfp, td);
524 		mtx_unlock(&Giant);
525 	}
526 	return (0);
527 }
528 
529 /*
530  * If sigio is on the list associated with a process or process group,
531  * disable signalling from the device, remove sigio from the list and
532  * free sigio.
533  */
534 void
535 funsetown(sigiop)
536 	struct sigio **sigiop;
537 {
538 	struct sigio *sigio;
539 
540 	SIGIO_LOCK();
541 	sigio = *sigiop;
542 	if (sigio == NULL) {
543 		SIGIO_UNLOCK();
544 		return;
545 	}
546 	*(sigio->sio_myref) = NULL;
547 	if ((sigio)->sio_pgid < 0) {
548 		struct pgrp *pg = (sigio)->sio_pgrp;
549 		PGRP_LOCK(pg);
550 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
551 			     sigio, sio_pgsigio);
552 		PGRP_UNLOCK(pg);
553 	} else {
554 		struct proc *p = (sigio)->sio_proc;
555 		PROC_LOCK(p);
556 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
557 			     sigio, sio_pgsigio);
558 		PROC_UNLOCK(p);
559 	}
560 	SIGIO_UNLOCK();
561 	crfree(sigio->sio_ucred);
562 	FREE(sigio, M_SIGIO);
563 }
564 
565 /*
566  * Free a list of sigio structures.
567  * We only need to lock the SIGIO_LOCK because we have made ourselves
568  * inaccessable to callers of fsetown and therefore do not need to lock
569  * the proc or pgrp struct for the list manipulation.
570  */
571 void
572 funsetownlst(sigiolst)
573 	struct sigiolst *sigiolst;
574 {
575 	struct sigio *sigio;
576 	struct proc *p;
577 	struct pgrp *pg;
578 
579 	sigio = SLIST_FIRST(sigiolst);
580 	if (sigio == NULL)
581 		return;
582 
583 	p = NULL;
584 	pg = NULL;
585 
586 	/*
587 	 * Every entry of the list should belong
588 	 * to a single proc or pgrp.
589 	 */
590 	if (sigio->sio_pgid < 0) {
591 		pg = sigio->sio_pgrp;
592 		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
593 	} else /* if (sigio->sio_pgid > 0) */ {
594 		p = sigio->sio_proc;
595 		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
596 	}
597 
598 	SIGIO_LOCK();
599 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
600 		*(sigio->sio_myref) = NULL;
601 		if (pg != NULL) {
602 			KASSERT(sigio->sio_pgid < 0,
603 			    ("Proc sigio in pgrp sigio list"));
604 			KASSERT(sigio->sio_pgrp == pg,
605 			    ("Bogus pgrp in sigio list"));
606 			PGRP_LOCK(pg);
607 			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
608 			    sio_pgsigio);
609 			PGRP_UNLOCK(pg);
610 		} else /* if (p != NULL) */ {
611 			KASSERT(sigio->sio_pgid > 0,
612 			    ("Pgrp sigio in proc sigio list"));
613 			KASSERT(sigio->sio_proc == p,
614 			    ("Bogus proc in sigio list"));
615 			PROC_LOCK(p);
616 			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
617 			    sio_pgsigio);
618 			PROC_UNLOCK(p);
619 		}
620 		SIGIO_UNLOCK();
621 		crfree(sigio->sio_ucred);
622 		FREE(sigio, M_SIGIO);
623 		SIGIO_LOCK();
624 	}
625 	SIGIO_UNLOCK();
626 }
627 
628 /*
629  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
630  *
631  * After permission checking, add a sigio structure to the sigio list for
632  * the process or process group.
633  */
634 int
635 fsetown(pgid, sigiop)
636 	pid_t pgid;
637 	struct sigio **sigiop;
638 {
639 	struct proc *proc;
640 	struct pgrp *pgrp;
641 	struct sigio *sigio;
642 	int ret;
643 
644 	if (pgid == 0) {
645 		funsetown(sigiop);
646 		return (0);
647 	}
648 
649 	ret = 0;
650 
651 	/* Allocate and fill in the new sigio out of locks. */
652 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
653 	sigio->sio_pgid = pgid;
654 	sigio->sio_ucred = crhold(curthread->td_ucred);
655 	sigio->sio_myref = sigiop;
656 
657 	sx_slock(&proctree_lock);
658 	if (pgid > 0) {
659 		proc = pfind(pgid);
660 		if (proc == NULL) {
661 			ret = ESRCH;
662 			goto fail;
663 		}
664 
665 		/*
666 		 * Policy - Don't allow a process to FSETOWN a process
667 		 * in another session.
668 		 *
669 		 * Remove this test to allow maximum flexibility or
670 		 * restrict FSETOWN to the current process or process
671 		 * group for maximum safety.
672 		 */
673 		PROC_UNLOCK(proc);
674 		if (proc->p_session != curthread->td_proc->p_session) {
675 			ret = EPERM;
676 			goto fail;
677 		}
678 
679 		pgrp = NULL;
680 	} else /* if (pgid < 0) */ {
681 		pgrp = pgfind(-pgid);
682 		if (pgrp == NULL) {
683 			ret = ESRCH;
684 			goto fail;
685 		}
686 		PGRP_UNLOCK(pgrp);
687 
688 		/*
689 		 * Policy - Don't allow a process to FSETOWN a process
690 		 * in another session.
691 		 *
692 		 * Remove this test to allow maximum flexibility or
693 		 * restrict FSETOWN to the current process or process
694 		 * group for maximum safety.
695 		 */
696 		if (pgrp->pg_session != curthread->td_proc->p_session) {
697 			ret = EPERM;
698 			goto fail;
699 		}
700 
701 		proc = NULL;
702 	}
703 	funsetown(sigiop);
704 	if (pgid > 0) {
705 		PROC_LOCK(proc);
706 		/*
707 		 * since funsetownlst() is called without the proctree
708 		 * locked we need to check for P_WEXIT.
709 		 * XXX: is ESRCH correct?
710 		 */
711 		if ((proc->p_flag & P_WEXIT) != 0) {
712 			PROC_UNLOCK(proc);
713 			ret = ESRCH;
714 			goto fail;
715 		}
716 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
717 		sigio->sio_proc = proc;
718 		PROC_UNLOCK(proc);
719 	} else {
720 		PGRP_LOCK(pgrp);
721 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
722 		sigio->sio_pgrp = pgrp;
723 		PGRP_UNLOCK(pgrp);
724 	}
725 	sx_sunlock(&proctree_lock);
726 	SIGIO_LOCK();
727 	*sigiop = sigio;
728 	SIGIO_UNLOCK();
729 	return (0);
730 
731 fail:
732 	sx_sunlock(&proctree_lock);
733 	crfree(sigio->sio_ucred);
734 	FREE(sigio, M_SIGIO);
735 	return (ret);
736 }
737 
738 /*
739  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
740  */
741 pid_t
742 fgetown(sigio)
743 	struct sigio *sigio;
744 {
745 	return (sigio != NULL ? sigio->sio_pgid : 0);
746 }
747 
748 /*
749  * Close a file descriptor.
750  */
751 #ifndef _SYS_SYSPROTO_H_
752 struct close_args {
753         int     fd;
754 };
755 #endif
756 /*
757  * MPSAFE
758  */
759 /* ARGSUSED */
760 int
761 close(td, uap)
762 	struct thread *td;
763 	struct close_args *uap;
764 {
765 	register struct filedesc *fdp;
766 	register struct file *fp;
767 	register int fd = uap->fd;
768 	int error = 0;
769 
770 	mtx_lock(&Giant);
771 	fdp = td->td_proc->p_fd;
772 	FILEDESC_LOCK(fdp);
773 	if ((unsigned)fd >= fdp->fd_nfiles ||
774 	    (fp = fdp->fd_ofiles[fd]) == NULL) {
775 		FILEDESC_UNLOCK(fdp);
776 		error = EBADF;
777 		goto done2;
778 	}
779 #if 0
780 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
781 		(void) munmapfd(td, fd);
782 #endif
783 	fdp->fd_ofiles[fd] = NULL;
784 	fdp->fd_ofileflags[fd] = 0;
785 
786 	/*
787 	 * we now hold the fp reference that used to be owned by the descriptor
788 	 * array.
789 	 */
790 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
791 		fdp->fd_lastfile--;
792 	if (fd < fdp->fd_freefile)
793 		fdp->fd_freefile = fd;
794 	if (fd < fdp->fd_knlistsize) {
795 		FILEDESC_UNLOCK(fdp);
796 		knote_fdclose(td, fd);
797 	} else
798 		FILEDESC_UNLOCK(fdp);
799 
800 	error = closef(fp, td);
801 done2:
802 	mtx_unlock(&Giant);
803 	return(error);
804 }
805 
806 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
807 /*
808  * Return status information about a file descriptor.
809  */
810 #ifndef _SYS_SYSPROTO_H_
811 struct ofstat_args {
812 	int	fd;
813 	struct	ostat *sb;
814 };
815 #endif
816 /*
817  * MPSAFE
818  */
819 /* ARGSUSED */
820 int
821 ofstat(td, uap)
822 	struct thread *td;
823 	register struct ofstat_args *uap;
824 {
825 	struct file *fp;
826 	struct stat ub;
827 	struct ostat oub;
828 	int error;
829 
830 	mtx_lock(&Giant);
831 	if ((error = fget(td, uap->fd, &fp)) != 0)
832 		goto done2;
833 	error = fo_stat(fp, &ub, td);
834 	if (error == 0) {
835 		cvtstat(&ub, &oub);
836 		error = copyout(&oub, uap->sb, sizeof (oub));
837 	}
838 	fdrop(fp, td);
839 done2:
840 	mtx_unlock(&Giant);
841 	return (error);
842 }
843 #endif /* COMPAT_43 || COMPAT_SUNOS */
844 
845 /*
846  * Return status information about a file descriptor.
847  */
848 #ifndef _SYS_SYSPROTO_H_
849 struct fstat_args {
850 	int	fd;
851 	struct	stat *sb;
852 };
853 #endif
854 /*
855  * MPSAFE
856  */
857 /* ARGSUSED */
858 int
859 fstat(td, uap)
860 	struct thread *td;
861 	struct fstat_args *uap;
862 {
863 	struct file *fp;
864 	struct stat ub;
865 	int error;
866 
867 	mtx_lock(&Giant);
868 	if ((error = fget(td, uap->fd, &fp)) != 0)
869 		goto done2;
870 	error = fo_stat(fp, &ub, td);
871 	if (error == 0)
872 		error = copyout(&ub, uap->sb, sizeof (ub));
873 	fdrop(fp, td);
874 done2:
875 	mtx_unlock(&Giant);
876 	return (error);
877 }
878 
879 /*
880  * Return status information about a file descriptor.
881  */
882 #ifndef _SYS_SYSPROTO_H_
883 struct nfstat_args {
884 	int	fd;
885 	struct	nstat *sb;
886 };
887 #endif
888 /*
889  * MPSAFE
890  */
891 /* ARGSUSED */
892 int
893 nfstat(td, uap)
894 	struct thread *td;
895 	register struct nfstat_args *uap;
896 {
897 	struct file *fp;
898 	struct stat ub;
899 	struct nstat nub;
900 	int error;
901 
902 	mtx_lock(&Giant);
903 	if ((error = fget(td, uap->fd, &fp)) != 0)
904 		goto done2;
905 	error = fo_stat(fp, &ub, td);
906 	if (error == 0) {
907 		cvtnstat(&ub, &nub);
908 		error = copyout(&nub, uap->sb, sizeof (nub));
909 	}
910 	fdrop(fp, td);
911 done2:
912 	mtx_unlock(&Giant);
913 	return (error);
914 }
915 
916 /*
917  * Return pathconf information about a file descriptor.
918  */
919 #ifndef _SYS_SYSPROTO_H_
920 struct fpathconf_args {
921 	int	fd;
922 	int	name;
923 };
924 #endif
925 /*
926  * MPSAFE
927  */
928 /* ARGSUSED */
929 int
930 fpathconf(td, uap)
931 	struct thread *td;
932 	register struct fpathconf_args *uap;
933 {
934 	struct file *fp;
935 	struct vnode *vp;
936 	int error;
937 
938 	if ((error = fget(td, uap->fd, &fp)) != 0)
939 		return (error);
940 
941 	switch (fp->f_type) {
942 	case DTYPE_PIPE:
943 	case DTYPE_SOCKET:
944 		if (uap->name != _PC_PIPE_BUF) {
945 			error = EINVAL;
946 		} else {
947 			td->td_retval[0] = PIPE_BUF;
948 			error = 0;
949 		}
950 		break;
951 	case DTYPE_FIFO:
952 	case DTYPE_VNODE:
953 		vp = (struct vnode *)fp->f_data;
954 		mtx_lock(&Giant);
955 		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
956 		mtx_unlock(&Giant);
957 		break;
958 	default:
959 		error = EOPNOTSUPP;
960 		break;
961 	}
962 	fdrop(fp, td);
963 	return(error);
964 }
965 
966 /*
967  * Allocate a file descriptor for the process.
968  */
969 static int fdexpand;
970 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
971 
972 int
973 fdalloc(td, want, result)
974 	struct thread *td;
975 	int want;
976 	int *result;
977 {
978 	struct proc *p = td->td_proc;
979 	register struct filedesc *fdp = td->td_proc->p_fd;
980 	register int i;
981 	int lim, last, nfiles;
982 	struct file **newofile, **oldofile;
983 	char *newofileflags;
984 
985 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
986 
987 	/*
988 	 * Search for a free descriptor starting at the higher
989 	 * of want or fd_freefile.  If that fails, consider
990 	 * expanding the ofile array.
991 	 */
992 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
993 	for (;;) {
994 		last = min(fdp->fd_nfiles, lim);
995 		if ((i = want) < fdp->fd_freefile)
996 			i = fdp->fd_freefile;
997 		for (; i < last; i++) {
998 			if (fdp->fd_ofiles[i] == NULL) {
999 				fdp->fd_ofileflags[i] = 0;
1000 				if (i > fdp->fd_lastfile)
1001 					fdp->fd_lastfile = i;
1002 				if (want <= fdp->fd_freefile)
1003 					fdp->fd_freefile = i;
1004 				*result = i;
1005 				return (0);
1006 			}
1007 		}
1008 
1009 		/*
1010 		 * No space in current array.  Expand?
1011 		 */
1012 		if (fdp->fd_nfiles >= lim)
1013 			return (EMFILE);
1014 		if (fdp->fd_nfiles < NDEXTENT)
1015 			nfiles = NDEXTENT;
1016 		else
1017 			nfiles = 2 * fdp->fd_nfiles;
1018 		FILEDESC_UNLOCK(fdp);
1019 		mtx_lock(&Giant);
1020 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
1021 		    M_FILEDESC, M_WAITOK);
1022 		mtx_unlock(&Giant);
1023 		FILEDESC_LOCK(fdp);
1024 
1025 		/*
1026 		 * deal with file-table extend race that might have occured
1027 		 * when malloc was blocked.
1028 		 */
1029 		if (fdp->fd_nfiles >= nfiles) {
1030 			FILEDESC_UNLOCK(fdp);
1031 			mtx_lock(&Giant);
1032 			FREE(newofile, M_FILEDESC);
1033 			mtx_unlock(&Giant);
1034 			FILEDESC_LOCK(fdp);
1035 			continue;
1036 		}
1037 		newofileflags = (char *) &newofile[nfiles];
1038 		/*
1039 		 * Copy the existing ofile and ofileflags arrays
1040 		 * and zero the new portion of each array.
1041 		 */
1042 		bcopy(fdp->fd_ofiles, newofile,
1043 			(i = sizeof(struct file *) * fdp->fd_nfiles));
1044 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
1045 		bcopy(fdp->fd_ofileflags, newofileflags,
1046 			(i = sizeof(char) * fdp->fd_nfiles));
1047 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
1048 		if (fdp->fd_nfiles > NDFILE)
1049 			oldofile = fdp->fd_ofiles;
1050 		else
1051 			oldofile = NULL;
1052 		fdp->fd_ofiles = newofile;
1053 		fdp->fd_ofileflags = newofileflags;
1054 		fdp->fd_nfiles = nfiles;
1055 		fdexpand++;
1056 		if (oldofile != NULL) {
1057 			FILEDESC_UNLOCK(fdp);
1058 			mtx_lock(&Giant);
1059 			FREE(oldofile, M_FILEDESC);
1060 			mtx_unlock(&Giant);
1061 			FILEDESC_LOCK(fdp);
1062 		}
1063 	}
1064 	return (0);
1065 }
1066 
1067 /*
1068  * Check to see whether n user file descriptors
1069  * are available to the process p.
1070  */
1071 int
1072 fdavail(td, n)
1073 	struct thread *td;
1074 	register int n;
1075 {
1076 	struct proc *p = td->td_proc;
1077 	register struct filedesc *fdp = td->td_proc->p_fd;
1078 	register struct file **fpp;
1079 	register int i, lim, last;
1080 
1081 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1082 
1083 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1084 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1085 		return (1);
1086 
1087 	last = min(fdp->fd_nfiles, lim);
1088 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1089 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1090 		if (*fpp == NULL && --n <= 0)
1091 			return (1);
1092 	}
1093 	return (0);
1094 }
1095 
1096 /*
1097  * Create a new open file structure and allocate
1098  * a file decriptor for the process that refers to it.
1099  */
1100 int
1101 falloc(td, resultfp, resultfd)
1102 	register struct thread *td;
1103 	struct file **resultfp;
1104 	int *resultfd;
1105 {
1106 	struct proc *p = td->td_proc;
1107 	register struct file *fp, *fq;
1108 	int error, i;
1109 
1110 	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
1111 	sx_xlock(&filelist_lock);
1112 	if (nfiles >= maxfiles) {
1113 		sx_xunlock(&filelist_lock);
1114 		uma_zfree(file_zone, fp);
1115 		tablefull("file");
1116 		return (ENFILE);
1117 	}
1118 	nfiles++;
1119 
1120 	/*
1121 	 * If the process has file descriptor zero open, add the new file
1122 	 * descriptor to the list of open files at that point, otherwise
1123 	 * put it at the front of the list of open files.
1124 	 */
1125 	FILEDESC_LOCK(p->p_fd);
1126 	if ((error = fdalloc(td, 0, &i))) {
1127 		FILEDESC_UNLOCK(p->p_fd);
1128 		nfiles--;
1129 		sx_xunlock(&filelist_lock);
1130 		uma_zfree(file_zone, fp);
1131 		return (error);
1132 	}
1133 	fp->f_mtxp = mtx_pool_alloc();
1134 	fp->f_gcflag = 0;
1135 	fp->f_count = 1;
1136 	fp->f_cred = crhold(td->td_ucred);
1137 	fp->f_ops = &badfileops;
1138 	fp->f_seqcount = 1;
1139 	if ((fq = p->p_fd->fd_ofiles[0])) {
1140 		LIST_INSERT_AFTER(fq, fp, f_list);
1141 	} else {
1142 		LIST_INSERT_HEAD(&filehead, fp, f_list);
1143 	}
1144 	p->p_fd->fd_ofiles[i] = fp;
1145 	FILEDESC_UNLOCK(p->p_fd);
1146 	sx_xunlock(&filelist_lock);
1147 	if (resultfp)
1148 		*resultfp = fp;
1149 	if (resultfd)
1150 		*resultfd = i;
1151 	return (0);
1152 }
1153 
1154 /*
1155  * Free a file descriptor.
1156  */
1157 void
1158 ffree(fp)
1159 	register struct file *fp;
1160 {
1161 
1162 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1163 	sx_xlock(&filelist_lock);
1164 	LIST_REMOVE(fp, f_list);
1165 	nfiles--;
1166 	sx_xunlock(&filelist_lock);
1167 	crfree(fp->f_cred);
1168 	uma_zfree(file_zone, fp);
1169 }
1170 
1171 /*
1172  * Build a new filedesc structure.
1173  */
1174 struct filedesc *
1175 fdinit(td)
1176 	struct thread *td;
1177 {
1178 	register struct filedesc0 *newfdp;
1179 	register struct filedesc *fdp = td->td_proc->p_fd;
1180 
1181 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1182 	    M_FILEDESC, M_WAITOK | M_ZERO);
1183 	mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1184 	FILEDESC_LOCK(&newfdp->fd_fd);
1185 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1186 	if (newfdp->fd_fd.fd_cdir)
1187 		VREF(newfdp->fd_fd.fd_cdir);
1188 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1189 	if (newfdp->fd_fd.fd_rdir)
1190 		VREF(newfdp->fd_fd.fd_rdir);
1191 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1192 	if (newfdp->fd_fd.fd_jdir)
1193 		VREF(newfdp->fd_fd.fd_jdir);
1194 
1195 	/* Create the file descriptor table. */
1196 	newfdp->fd_fd.fd_refcnt = 1;
1197 	newfdp->fd_fd.fd_cmask = cmask;
1198 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1199 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1200 	newfdp->fd_fd.fd_nfiles = NDFILE;
1201 	newfdp->fd_fd.fd_knlistsize = -1;
1202 	FILEDESC_UNLOCK(&newfdp->fd_fd);
1203 
1204 	return (&newfdp->fd_fd);
1205 }
1206 
1207 /*
1208  * Share a filedesc structure.
1209  */
1210 struct filedesc *
1211 fdshare(p)
1212 	struct proc *p;
1213 {
1214 	FILEDESC_LOCK(p->p_fd);
1215 	p->p_fd->fd_refcnt++;
1216 	FILEDESC_UNLOCK(p->p_fd);
1217 	return (p->p_fd);
1218 }
1219 
1220 /*
1221  * Copy a filedesc structure.
1222  */
1223 struct filedesc *
1224 fdcopy(td)
1225 	struct thread *td;
1226 {
1227 	register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1228 	register struct file **fpp;
1229 	register int i, j;
1230 
1231 	/* Certain daemons might not have file descriptors. */
1232 	if (fdp == NULL)
1233 		return (NULL);
1234 
1235 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1236 
1237 	FILEDESC_UNLOCK(fdp);
1238 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1239 	    M_FILEDESC, M_WAITOK);
1240 	FILEDESC_LOCK(fdp);
1241 	bcopy(fdp, newfdp, sizeof(struct filedesc));
1242 	FILEDESC_UNLOCK(fdp);
1243 	bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1244 	mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1245 	if (newfdp->fd_cdir)
1246 		VREF(newfdp->fd_cdir);
1247 	if (newfdp->fd_rdir)
1248 		VREF(newfdp->fd_rdir);
1249 	if (newfdp->fd_jdir)
1250 		VREF(newfdp->fd_jdir);
1251 	newfdp->fd_refcnt = 1;
1252 
1253 	/*
1254 	 * If the number of open files fits in the internal arrays
1255 	 * of the open file structure, use them, otherwise allocate
1256 	 * additional memory for the number of descriptors currently
1257 	 * in use.
1258 	 */
1259 	FILEDESC_LOCK(fdp);
1260 	newfdp->fd_lastfile = fdp->fd_lastfile;
1261 	newfdp->fd_nfiles = fdp->fd_nfiles;
1262 	if (newfdp->fd_lastfile < NDFILE) {
1263 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1264 		newfdp->fd_ofileflags =
1265 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1266 		i = NDFILE;
1267 	} else {
1268 		/*
1269 		 * Compute the smallest multiple of NDEXTENT needed
1270 		 * for the file descriptors currently in use,
1271 		 * allowing the table to shrink.
1272 		 */
1273 retry:
1274 		i = newfdp->fd_nfiles;
1275 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1276 			i /= 2;
1277 		FILEDESC_UNLOCK(fdp);
1278 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1279 		    M_FILEDESC, M_WAITOK);
1280 		FILEDESC_LOCK(fdp);
1281 		newfdp->fd_lastfile = fdp->fd_lastfile;
1282 		newfdp->fd_nfiles = fdp->fd_nfiles;
1283 		j = newfdp->fd_nfiles;
1284 		while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1285 			j /= 2;
1286 		if (i != j) {
1287 			/*
1288 			 * The size of the original table has changed.
1289 			 * Go over once again.
1290 			 */
1291 			FILEDESC_UNLOCK(fdp);
1292 			FREE(newfdp->fd_ofiles, M_FILEDESC);
1293 			FILEDESC_LOCK(fdp);
1294 			newfdp->fd_lastfile = fdp->fd_lastfile;
1295 			newfdp->fd_nfiles = fdp->fd_nfiles;
1296 			goto retry;
1297 		}
1298 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1299 	}
1300 	newfdp->fd_nfiles = i;
1301 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1302 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1303 
1304 	/*
1305 	 * kq descriptors cannot be copied.
1306 	 */
1307 	if (newfdp->fd_knlistsize != -1) {
1308 		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1309 		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1310 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1311 				*fpp = NULL;
1312 				if (i < newfdp->fd_freefile)
1313 					newfdp->fd_freefile = i;
1314 			}
1315 			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1316 				newfdp->fd_lastfile--;
1317 		}
1318 		newfdp->fd_knlist = NULL;
1319 		newfdp->fd_knlistsize = -1;
1320 		newfdp->fd_knhash = NULL;
1321 		newfdp->fd_knhashmask = 0;
1322 	}
1323 
1324 	fpp = newfdp->fd_ofiles;
1325 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1326 		if (*fpp != NULL) {
1327 			fhold(*fpp);
1328 		}
1329 	}
1330 	return (newfdp);
1331 }
1332 
1333 /*
1334  * Release a filedesc structure.
1335  */
1336 void
1337 fdfree(td)
1338 	struct thread *td;
1339 {
1340 	register struct filedesc *fdp;
1341 	struct file **fpp;
1342 	register int i;
1343 
1344 	fdp = td->td_proc->p_fd;
1345 	/* Certain daemons might not have file descriptors. */
1346 	if (fdp == NULL)
1347 		return;
1348 
1349 	FILEDESC_LOCK(fdp);
1350 	if (--fdp->fd_refcnt > 0) {
1351 		FILEDESC_UNLOCK(fdp);
1352 		return;
1353 	}
1354 	/*
1355 	 * we are the last reference to the structure, we can
1356 	 * safely assume it will not change out from under us.
1357 	 */
1358 	FILEDESC_UNLOCK(fdp);
1359 	fpp = fdp->fd_ofiles;
1360 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1361 		if (*fpp)
1362 			(void) closef(*fpp, td);
1363 	}
1364 
1365 	PROC_LOCK(td->td_proc);
1366 	td->td_proc->p_fd = NULL;
1367 	PROC_UNLOCK(td->td_proc);
1368 
1369 	if (fdp->fd_nfiles > NDFILE)
1370 		FREE(fdp->fd_ofiles, M_FILEDESC);
1371 	if (fdp->fd_cdir)
1372 		vrele(fdp->fd_cdir);
1373 	if (fdp->fd_rdir)
1374 		vrele(fdp->fd_rdir);
1375 	if (fdp->fd_jdir)
1376 		vrele(fdp->fd_jdir);
1377 	if (fdp->fd_knlist)
1378 		FREE(fdp->fd_knlist, M_KQUEUE);
1379 	if (fdp->fd_knhash)
1380 		FREE(fdp->fd_knhash, M_KQUEUE);
1381 	mtx_destroy(&fdp->fd_mtx);
1382 	FREE(fdp, M_FILEDESC);
1383 }
1384 
1385 /*
1386  * For setugid programs, we don't want to people to use that setugidness
1387  * to generate error messages which write to a file which otherwise would
1388  * otherwise be off-limits to the process.
1389  *
1390  * This is a gross hack to plug the hole.  A better solution would involve
1391  * a special vop or other form of generalized access control mechanism.  We
1392  * go ahead and just reject all procfs filesystems accesses as dangerous.
1393  *
1394  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1395  * sufficient.  We also don't for check setugidness since we know we are.
1396  */
1397 static int
1398 is_unsafe(struct file *fp)
1399 {
1400 	if (fp->f_type == DTYPE_VNODE &&
1401 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1402 		return (1);
1403 	return (0);
1404 }
1405 
1406 /*
1407  * Make this setguid thing safe, if at all possible.
1408  */
1409 void
1410 setugidsafety(td)
1411 	struct thread *td;
1412 {
1413 	struct filedesc *fdp = td->td_proc->p_fd;
1414 	register int i;
1415 
1416 	/* Certain daemons might not have file descriptors. */
1417 	if (fdp == NULL)
1418 		return;
1419 
1420 	/*
1421 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1422 	 * we are blocked in a close.  Be careful!
1423 	 */
1424 	FILEDESC_LOCK(fdp);
1425 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1426 		if (i > 2)
1427 			break;
1428 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1429 			struct file *fp;
1430 
1431 #if 0
1432 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1433 				(void) munmapfd(td, i);
1434 #endif
1435 			if (i < fdp->fd_knlistsize) {
1436 				FILEDESC_UNLOCK(fdp);
1437 				knote_fdclose(td, i);
1438 				FILEDESC_LOCK(fdp);
1439 			}
1440 			/*
1441 			 * NULL-out descriptor prior to close to avoid
1442 			 * a race while close blocks.
1443 			 */
1444 			fp = fdp->fd_ofiles[i];
1445 			fdp->fd_ofiles[i] = NULL;
1446 			fdp->fd_ofileflags[i] = 0;
1447 			if (i < fdp->fd_freefile)
1448 				fdp->fd_freefile = i;
1449 			FILEDESC_UNLOCK(fdp);
1450 			(void) closef(fp, td);
1451 			FILEDESC_LOCK(fdp);
1452 		}
1453 	}
1454 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1455 		fdp->fd_lastfile--;
1456 	FILEDESC_UNLOCK(fdp);
1457 }
1458 
1459 /*
1460  * Close any files on exec?
1461  */
1462 void
1463 fdcloseexec(td)
1464 	struct thread *td;
1465 {
1466 	struct filedesc *fdp = td->td_proc->p_fd;
1467 	register int i;
1468 
1469 	/* Certain daemons might not have file descriptors. */
1470 	if (fdp == NULL)
1471 		return;
1472 
1473 	FILEDESC_LOCK(fdp);
1474 
1475 	/*
1476 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1477 	 * may block and rip them out from under us.
1478 	 */
1479 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1480 		if (fdp->fd_ofiles[i] != NULL &&
1481 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1482 			struct file *fp;
1483 
1484 #if 0
1485 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1486 				(void) munmapfd(td, i);
1487 #endif
1488 			if (i < fdp->fd_knlistsize) {
1489 				FILEDESC_UNLOCK(fdp);
1490 				knote_fdclose(td, i);
1491 				FILEDESC_LOCK(fdp);
1492 			}
1493 			/*
1494 			 * NULL-out descriptor prior to close to avoid
1495 			 * a race while close blocks.
1496 			 */
1497 			fp = fdp->fd_ofiles[i];
1498 			fdp->fd_ofiles[i] = NULL;
1499 			fdp->fd_ofileflags[i] = 0;
1500 			if (i < fdp->fd_freefile)
1501 				fdp->fd_freefile = i;
1502 			FILEDESC_UNLOCK(fdp);
1503 			(void) closef(fp, td);
1504 			FILEDESC_LOCK(fdp);
1505 		}
1506 	}
1507 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1508 		fdp->fd_lastfile--;
1509 	FILEDESC_UNLOCK(fdp);
1510 }
1511 
1512 /*
1513  * It is unsafe for set[ug]id processes to be started with file
1514  * descriptors 0..2 closed, as these descriptors are given implicit
1515  * significance in the Standard C library.  fdcheckstd() will create a
1516  * descriptor referencing /dev/null for each of stdin, stdout, and
1517  * stderr that is not already open.
1518  */
1519 int
1520 fdcheckstd(td)
1521 	struct thread *td;
1522 {
1523 	struct nameidata nd;
1524 	struct filedesc *fdp;
1525 	struct file *fp;
1526 	register_t retval;
1527 	int fd, i, error, flags, devnull;
1528 
1529 	fdp = td->td_proc->p_fd;
1530 	if (fdp == NULL)
1531 		return (0);
1532 	devnull = -1;
1533 	error = 0;
1534 	for (i = 0; i < 3; i++) {
1535 		if (fdp->fd_ofiles[i] != NULL)
1536 			continue;
1537 		if (devnull < 0) {
1538 			error = falloc(td, &fp, &fd);
1539 			if (error != 0)
1540 				break;
1541 			NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1542 			    td);
1543 			flags = FREAD | FWRITE;
1544 			error = vn_open(&nd, &flags, 0);
1545 			if (error != 0) {
1546 				FILEDESC_LOCK(fdp);
1547 				fdp->fd_ofiles[i] = NULL;
1548 				FILEDESC_UNLOCK(fdp);
1549 				fdrop(fp, td);
1550 				break;
1551 			}
1552 			NDFREE(&nd, NDF_ONLY_PNBUF);
1553 			fp->f_data = nd.ni_vp;
1554 			fp->f_flag = flags;
1555 			fp->f_ops = &vnops;
1556 			fp->f_type = DTYPE_VNODE;
1557 			VOP_UNLOCK(nd.ni_vp, 0, td);
1558 			devnull = fd;
1559 		} else {
1560 			FILEDESC_LOCK(fdp);
1561 			error = fdalloc(td, 0, &fd);
1562 			if (error != 0) {
1563 				FILEDESC_UNLOCK(fdp);
1564 				break;
1565 			}
1566 			error = do_dup(fdp, devnull, fd, &retval, td);
1567 			if (error != 0)
1568 				break;
1569 		}
1570 	}
1571 	return (error);
1572 }
1573 
1574 /*
1575  * Internal form of close.
1576  * Decrement reference count on file structure.
1577  * Note: td may be NULL when closing a file
1578  * that was being passed in a message.
1579  */
1580 int
1581 closef(fp, td)
1582 	register struct file *fp;
1583 	register struct thread *td;
1584 {
1585 	struct vnode *vp;
1586 	struct flock lf;
1587 
1588 	if (fp == NULL)
1589 		return (0);
1590 	/*
1591 	 * POSIX record locking dictates that any close releases ALL
1592 	 * locks owned by this process.  This is handled by setting
1593 	 * a flag in the unlock to free ONLY locks obeying POSIX
1594 	 * semantics, and not to free BSD-style file locks.
1595 	 * If the descriptor was in a message, POSIX-style locks
1596 	 * aren't passed with the descriptor.
1597 	 */
1598 	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1599 	    fp->f_type == DTYPE_VNODE) {
1600 		lf.l_whence = SEEK_SET;
1601 		lf.l_start = 0;
1602 		lf.l_len = 0;
1603 		lf.l_type = F_UNLCK;
1604 		vp = (struct vnode *)fp->f_data;
1605 		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1606 		    F_UNLCK, &lf, F_POSIX);
1607 	}
1608 	return (fdrop(fp, td));
1609 }
1610 
1611 /*
1612  * Drop reference on struct file passed in, may call closef if the
1613  * reference hits zero.
1614  */
1615 int
1616 fdrop(fp, td)
1617 	struct file *fp;
1618 	struct thread *td;
1619 {
1620 
1621 	FILE_LOCK(fp);
1622 	return (fdrop_locked(fp, td));
1623 }
1624 
1625 /*
1626  * Extract the file pointer associated with the specified descriptor for
1627  * the current user process.
1628  *
1629  * If the descriptor doesn't exist, EBADF is returned.
1630  *
1631  * If the descriptor exists but doesn't match 'flags' then
1632  * return EBADF for read attempts and EINVAL for write attempts.
1633  *
1634  * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1635  * It should be droped with fdrop().
1636  * If it is not set, then the refcount will not be bumped however the
1637  * thread's filedesc struct will be returned locked (for fgetsock).
1638  *
1639  * If an error occured the non-zero error is returned and *fpp is set to NULL.
1640  * Otherwise *fpp is set and zero is returned.
1641  */
1642 static __inline
1643 int
1644 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1645 {
1646 	struct filedesc *fdp;
1647 	struct file *fp;
1648 
1649 	*fpp = NULL;
1650 	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1651 		return(EBADF);
1652 	FILEDESC_LOCK(fdp);
1653 	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1654 		FILEDESC_UNLOCK(fdp);
1655 		return(EBADF);
1656 	}
1657 
1658 	/*
1659 	 * Note: FREAD failures returns EBADF to maintain backwards
1660 	 * compatibility with what routines returned before.
1661 	 *
1662 	 * Only one flag, or 0, may be specified.
1663 	 */
1664 	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1665 		FILEDESC_UNLOCK(fdp);
1666 		return(EBADF);
1667 	}
1668 	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1669 		FILEDESC_UNLOCK(fdp);
1670 		return(EINVAL);
1671 	}
1672 	if (hold) {
1673 		fhold(fp);
1674 		FILEDESC_UNLOCK(fdp);
1675 	}
1676 	*fpp = fp;
1677 	return(0);
1678 }
1679 
1680 int
1681 fget(struct thread *td, int fd, struct file **fpp)
1682 {
1683     return(_fget(td, fd, fpp, 0, 1));
1684 }
1685 
1686 int
1687 fget_read(struct thread *td, int fd, struct file **fpp)
1688 {
1689     return(_fget(td, fd, fpp, FREAD, 1));
1690 }
1691 
1692 int
1693 fget_write(struct thread *td, int fd, struct file **fpp)
1694 {
1695     return(_fget(td, fd, fpp, FWRITE, 1));
1696 }
1697 
1698 /*
1699  * Like fget() but loads the underlying vnode, or returns an error if
1700  * the descriptor does not represent a vnode.  Note that pipes use vnodes
1701  * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1702  * error).  The returned vnode will be vref()d.
1703  */
1704 
1705 static __inline
1706 int
1707 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1708 {
1709 	struct file *fp;
1710 	int error;
1711 
1712 	*vpp = NULL;
1713 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1714 		return (error);
1715 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1716 		error = EINVAL;
1717 	} else {
1718 		*vpp = (struct vnode *)fp->f_data;
1719 		vref(*vpp);
1720 	}
1721 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1722 	return (error);
1723 }
1724 
1725 int
1726 fgetvp(struct thread *td, int fd, struct vnode **vpp)
1727 {
1728 	return(_fgetvp(td, fd, vpp, 0));
1729 }
1730 
1731 int
1732 fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1733 {
1734 	return(_fgetvp(td, fd, vpp, FREAD));
1735 }
1736 
1737 int
1738 fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1739 {
1740 	return(_fgetvp(td, fd, vpp, FWRITE));
1741 }
1742 
1743 /*
1744  * Like fget() but loads the underlying socket, or returns an error if
1745  * the descriptor does not represent a socket.
1746  *
1747  * We bump the ref count on the returned socket.  XXX Also obtain the SX lock in
1748  * the future.
1749  */
1750 int
1751 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1752 {
1753 	struct file *fp;
1754 	int error;
1755 
1756 	*spp = NULL;
1757 	if (fflagp)
1758 		*fflagp = 0;
1759 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1760 		return (error);
1761 	if (fp->f_type != DTYPE_SOCKET) {
1762 		error = ENOTSOCK;
1763 	} else {
1764 		*spp = (struct socket *)fp->f_data;
1765 		if (fflagp)
1766 			*fflagp = fp->f_flag;
1767 		soref(*spp);
1768 	}
1769 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1770 	return(error);
1771 }
1772 
1773 /*
1774  * Drop the reference count on the the socket and XXX release the SX lock in
1775  * the future.  The last reference closes the socket.
1776  */
1777 void
1778 fputsock(struct socket *so)
1779 {
1780 	sorele(so);
1781 }
1782 
1783 /*
1784  * Drop reference on struct file passed in, may call closef if the
1785  * reference hits zero.
1786  * Expects struct file locked, and will unlock it.
1787  */
1788 int
1789 fdrop_locked(fp, td)
1790 	struct file *fp;
1791 	struct thread *td;
1792 {
1793 	struct flock lf;
1794 	struct vnode *vp;
1795 	int error;
1796 
1797 	FILE_LOCK_ASSERT(fp, MA_OWNED);
1798 
1799 	if (--fp->f_count > 0) {
1800 		FILE_UNLOCK(fp);
1801 		return (0);
1802 	}
1803 	mtx_lock(&Giant);
1804 	if (fp->f_count < 0)
1805 		panic("fdrop: count < 0");
1806 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1807 		lf.l_whence = SEEK_SET;
1808 		lf.l_start = 0;
1809 		lf.l_len = 0;
1810 		lf.l_type = F_UNLCK;
1811 		vp = (struct vnode *)fp->f_data;
1812 		FILE_UNLOCK(fp);
1813 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1814 	} else
1815 		FILE_UNLOCK(fp);
1816 	if (fp->f_ops != &badfileops)
1817 		error = fo_close(fp, td);
1818 	else
1819 		error = 0;
1820 	ffree(fp);
1821 	mtx_unlock(&Giant);
1822 	return (error);
1823 }
1824 
1825 /*
1826  * Apply an advisory lock on a file descriptor.
1827  *
1828  * Just attempt to get a record lock of the requested type on
1829  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1830  */
1831 #ifndef _SYS_SYSPROTO_H_
1832 struct flock_args {
1833 	int	fd;
1834 	int	how;
1835 };
1836 #endif
1837 /*
1838  * MPSAFE
1839  */
1840 /* ARGSUSED */
1841 int
1842 flock(td, uap)
1843 	struct thread *td;
1844 	register struct flock_args *uap;
1845 {
1846 	struct file *fp;
1847 	struct vnode *vp;
1848 	struct flock lf;
1849 	int error;
1850 
1851 	if ((error = fget(td, uap->fd, &fp)) != 0)
1852 		return (error);
1853 	if (fp->f_type != DTYPE_VNODE) {
1854 		fdrop(fp, td);
1855 		return (EOPNOTSUPP);
1856 	}
1857 
1858 	mtx_lock(&Giant);
1859 	vp = (struct vnode *)fp->f_data;
1860 	lf.l_whence = SEEK_SET;
1861 	lf.l_start = 0;
1862 	lf.l_len = 0;
1863 	if (uap->how & LOCK_UN) {
1864 		lf.l_type = F_UNLCK;
1865 		FILE_LOCK(fp);
1866 		fp->f_flag &= ~FHASLOCK;
1867 		FILE_UNLOCK(fp);
1868 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1869 		goto done2;
1870 	}
1871 	if (uap->how & LOCK_EX)
1872 		lf.l_type = F_WRLCK;
1873 	else if (uap->how & LOCK_SH)
1874 		lf.l_type = F_RDLCK;
1875 	else {
1876 		error = EBADF;
1877 		goto done2;
1878 	}
1879 	FILE_LOCK(fp);
1880 	fp->f_flag |= FHASLOCK;
1881 	FILE_UNLOCK(fp);
1882 	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1883 	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1884 done2:
1885 	fdrop(fp, td);
1886 	mtx_unlock(&Giant);
1887 	return (error);
1888 }
1889 
1890 /*
1891  * File Descriptor pseudo-device driver (/dev/fd/).
1892  *
1893  * Opening minor device N dup()s the file (if any) connected to file
1894  * descriptor N belonging to the calling process.  Note that this driver
1895  * consists of only the ``open()'' routine, because all subsequent
1896  * references to this file will be direct to the other driver.
1897  */
1898 /* ARGSUSED */
1899 static int
1900 fdopen(dev, mode, type, td)
1901 	dev_t dev;
1902 	int mode, type;
1903 	struct thread *td;
1904 {
1905 
1906 	/*
1907 	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1908 	 * the file descriptor being sought for duplication. The error
1909 	 * return ensures that the vnode for this device will be released
1910 	 * by vn_open. Open will detect this special error and take the
1911 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1912 	 * will simply report the error.
1913 	 */
1914 	td->td_dupfd = dev2unit(dev);
1915 	return (ENODEV);
1916 }
1917 
1918 /*
1919  * Duplicate the specified descriptor to a free descriptor.
1920  */
1921 int
1922 dupfdopen(td, fdp, indx, dfd, mode, error)
1923 	struct thread *td;
1924 	struct filedesc *fdp;
1925 	int indx, dfd;
1926 	int mode;
1927 	int error;
1928 {
1929 	register struct file *wfp;
1930 	struct file *fp;
1931 
1932 	/*
1933 	 * If the to-be-dup'd fd number is greater than the allowed number
1934 	 * of file descriptors, or the fd to be dup'd has already been
1935 	 * closed, then reject.
1936 	 */
1937 	FILEDESC_LOCK(fdp);
1938 	if ((u_int)dfd >= fdp->fd_nfiles ||
1939 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1940 		FILEDESC_UNLOCK(fdp);
1941 		return (EBADF);
1942 	}
1943 
1944 	/*
1945 	 * There are two cases of interest here.
1946 	 *
1947 	 * For ENODEV simply dup (dfd) to file descriptor
1948 	 * (indx) and return.
1949 	 *
1950 	 * For ENXIO steal away the file structure from (dfd) and
1951 	 * store it in (indx).  (dfd) is effectively closed by
1952 	 * this operation.
1953 	 *
1954 	 * Any other error code is just returned.
1955 	 */
1956 	switch (error) {
1957 	case ENODEV:
1958 		/*
1959 		 * Check that the mode the file is being opened for is a
1960 		 * subset of the mode of the existing descriptor.
1961 		 */
1962 		FILE_LOCK(wfp);
1963 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1964 			FILE_UNLOCK(wfp);
1965 			FILEDESC_UNLOCK(fdp);
1966 			return (EACCES);
1967 		}
1968 		fp = fdp->fd_ofiles[indx];
1969 #if 0
1970 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1971 			(void) munmapfd(td, indx);
1972 #endif
1973 		fdp->fd_ofiles[indx] = wfp;
1974 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1975 		fhold_locked(wfp);
1976 		FILE_UNLOCK(wfp);
1977 		if (indx > fdp->fd_lastfile)
1978 			fdp->fd_lastfile = indx;
1979 		if (fp != NULL)
1980 			FILE_LOCK(fp);
1981 		FILEDESC_UNLOCK(fdp);
1982 		/*
1983 		 * we now own the reference to fp that the ofiles[] array
1984 		 * used to own.  Release it.
1985 		 */
1986 		if (fp != NULL)
1987 			fdrop_locked(fp, td);
1988 		return (0);
1989 
1990 	case ENXIO:
1991 		/*
1992 		 * Steal away the file pointer from dfd, and stuff it into indx.
1993 		 */
1994 		fp = fdp->fd_ofiles[indx];
1995 #if 0
1996 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1997 			(void) munmapfd(td, indx);
1998 #endif
1999 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
2000 		fdp->fd_ofiles[dfd] = NULL;
2001 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2002 		fdp->fd_ofileflags[dfd] = 0;
2003 
2004 		/*
2005 		 * Complete the clean up of the filedesc structure by
2006 		 * recomputing the various hints.
2007 		 */
2008 		if (indx > fdp->fd_lastfile) {
2009 			fdp->fd_lastfile = indx;
2010 		} else {
2011 			while (fdp->fd_lastfile > 0 &&
2012 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2013 				fdp->fd_lastfile--;
2014 			}
2015 			if (dfd < fdp->fd_freefile)
2016 				fdp->fd_freefile = dfd;
2017 		}
2018 		if (fp != NULL)
2019 			FILE_LOCK(fp);
2020 		FILEDESC_UNLOCK(fdp);
2021 
2022 		/*
2023 		 * we now own the reference to fp that the ofiles[] array
2024 		 * used to own.  Release it.
2025 		 */
2026 		if (fp != NULL)
2027 			fdrop_locked(fp, td);
2028 		return (0);
2029 
2030 	default:
2031 		FILEDESC_UNLOCK(fdp);
2032 		return (error);
2033 	}
2034 	/* NOTREACHED */
2035 }
2036 
2037 /*
2038  * Get file structures.
2039  */
2040 static int
2041 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2042 {
2043 	int error;
2044 	struct file *fp;
2045 
2046 	sysctl_wire_old_buffer(req, 0);
2047 	sx_slock(&filelist_lock);
2048 	if (!req->oldptr) {
2049 		/*
2050 		 * overestimate by 10 files
2051 		 */
2052 		error = SYSCTL_OUT(req, 0, sizeof(filehead) +
2053 				   (nfiles + 10) * sizeof(struct file));
2054 		sx_sunlock(&filelist_lock);
2055 		return (error);
2056 	}
2057 
2058 	error = SYSCTL_OUT(req, &filehead, sizeof(filehead));
2059 	if (error) {
2060 		sx_sunlock(&filelist_lock);
2061 		return (error);
2062 	}
2063 
2064 	/*
2065 	 * followed by an array of file structures
2066 	 */
2067 	LIST_FOREACH(fp, &filehead, f_list) {
2068 		error = SYSCTL_OUT(req, fp, sizeof (struct file));
2069 		if (error) {
2070 			sx_sunlock(&filelist_lock);
2071 			return (error);
2072 		}
2073 	}
2074 	sx_sunlock(&filelist_lock);
2075 	return (0);
2076 }
2077 
2078 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2079     0, 0, sysctl_kern_file, "S,file", "Entire file table");
2080 
2081 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2082     &maxfilesperproc, 0, "Maximum files allowed open per process");
2083 
2084 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2085     &maxfiles, 0, "Maximum number of files");
2086 
2087 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2088     &nfiles, 0, "System-wide number of open files");
2089 
2090 static void
2091 fildesc_drvinit(void *unused)
2092 {
2093 	dev_t dev;
2094 
2095 	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
2096 	make_dev_alias(dev, "stdin");
2097 	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
2098 	make_dev_alias(dev, "stdout");
2099 	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
2100 	make_dev_alias(dev, "stderr");
2101 	if (!devfs_present) {
2102 		int fd;
2103 
2104 		for (fd = 3; fd < NUMFDESC; fd++)
2105 			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
2106 			    "fd/%d", fd);
2107 	}
2108 }
2109 
2110 struct fileops badfileops = {
2111 	badfo_readwrite,
2112 	badfo_readwrite,
2113 	badfo_ioctl,
2114 	badfo_poll,
2115 	badfo_kqfilter,
2116 	badfo_stat,
2117 	badfo_close
2118 };
2119 
2120 static int
2121 badfo_readwrite(fp, uio, cred, flags, td)
2122 	struct file *fp;
2123 	struct uio *uio;
2124 	struct ucred *cred;
2125 	struct thread *td;
2126 	int flags;
2127 {
2128 
2129 	return (EBADF);
2130 }
2131 
2132 static int
2133 badfo_ioctl(fp, com, data, td)
2134 	struct file *fp;
2135 	u_long com;
2136 	void *data;
2137 	struct thread *td;
2138 {
2139 
2140 	return (EBADF);
2141 }
2142 
2143 static int
2144 badfo_poll(fp, events, cred, td)
2145 	struct file *fp;
2146 	int events;
2147 	struct ucred *cred;
2148 	struct thread *td;
2149 {
2150 
2151 	return (0);
2152 }
2153 
2154 static int
2155 badfo_kqfilter(fp, kn)
2156 	struct file *fp;
2157 	struct knote *kn;
2158 {
2159 
2160 	return (0);
2161 }
2162 
2163 static int
2164 badfo_stat(fp, sb, td)
2165 	struct file *fp;
2166 	struct stat *sb;
2167 	struct thread *td;
2168 {
2169 
2170 	return (EBADF);
2171 }
2172 
2173 static int
2174 badfo_close(fp, td)
2175 	struct file *fp;
2176 	struct thread *td;
2177 {
2178 
2179 	return (EBADF);
2180 }
2181 
2182 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2183 					fildesc_drvinit,NULL)
2184 
2185 static void filelistinit(void *);
2186 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2187 
2188 /* ARGSUSED*/
2189 static void
2190 filelistinit(dummy)
2191 	void *dummy;
2192 {
2193 	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2194 	    NULL, NULL, UMA_ALIGN_PTR, 0);
2195 
2196 	sx_init(&filelist_lock, "filelist lock");
2197 	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2198 }
2199