xref: /freebsd/sys/kern/kern_descrip.c (revision ee2ea5ceafed78a5bd9810beb9e3ca927180c226)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mutex.h>
49 #include <sys/sysproto.h>
50 #include <sys/conf.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/sysctl.h>
54 #include <sys/vnode.h>
55 #include <sys/proc.h>
56 #include <sys/namei.h>
57 #include <sys/file.h>
58 #include <sys/stat.h>
59 #include <sys/filio.h>
60 #include <sys/fcntl.h>
61 #include <sys/unistd.h>
62 #include <sys/resourcevar.h>
63 #include <sys/event.h>
64 #include <sys/sx.h>
65 #include <sys/socketvar.h>
66 #include <sys/signalvar.h>
67 
68 #include <machine/limits.h>
69 
70 #include <vm/vm.h>
71 #include <vm/vm_extern.h>
72 #include <vm/uma.h>
73 
74 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
75 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
76 
77 uma_zone_t file_zone;
78 
79 static	 d_open_t  fdopen;
80 #define NUMFDESC 64
81 
82 #define CDEV_MAJOR 22
83 static struct cdevsw fildesc_cdevsw = {
84 	/* open */	fdopen,
85 	/* close */	noclose,
86 	/* read */	noread,
87 	/* write */	nowrite,
88 	/* ioctl */	noioctl,
89 	/* poll */	nopoll,
90 	/* mmap */	nommap,
91 	/* strategy */	nostrategy,
92 	/* name */	"FD",
93 	/* maj */	CDEV_MAJOR,
94 	/* dump */	nodump,
95 	/* psize */	nopsize,
96 	/* flags */	0,
97 };
98 
99 static int do_dup(struct filedesc *fdp, int old, int new, register_t *retval, struct thread *td);
100 static int badfo_readwrite(struct file *fp, struct uio *uio,
101     struct ucred *cred, int flags, struct thread *td);
102 static int badfo_ioctl(struct file *fp, u_long com, caddr_t data,
103     struct thread *td);
104 static int badfo_poll(struct file *fp, int events,
105     struct ucred *cred, struct thread *td);
106 static int badfo_kqfilter(struct file *fp, struct knote *kn);
107 static int badfo_stat(struct file *fp, struct stat *sb, struct thread *td);
108 static int badfo_close(struct file *fp, struct thread *td);
109 
110 /*
111  * Descriptor management.
112  */
113 struct filelist filehead;	/* head of list of open files */
114 int nfiles;			/* actual number of open files */
115 extern int cmask;
116 struct sx filelist_lock;	/* sx to protect filelist */
117 struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
118 
119 /*
120  * System calls on descriptors.
121  */
122 #ifndef _SYS_SYSPROTO_H_
123 struct getdtablesize_args {
124 	int	dummy;
125 };
126 #endif
127 /*
128  * MPSAFE
129  */
130 /* ARGSUSED */
131 int
132 getdtablesize(td, uap)
133 	struct thread *td;
134 	struct getdtablesize_args *uap;
135 {
136 	struct proc *p = td->td_proc;
137 
138 	mtx_lock(&Giant);
139 	td->td_retval[0] =
140 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
141 	mtx_unlock(&Giant);
142 	return (0);
143 }
144 
145 /*
146  * Duplicate a file descriptor to a particular value.
147  *
148  * note: keep in mind that a potential race condition exists when closing
149  * descriptors from a shared descriptor table (via rfork).
150  */
151 #ifndef _SYS_SYSPROTO_H_
152 struct dup2_args {
153 	u_int	from;
154 	u_int	to;
155 };
156 #endif
157 /*
158  * MPSAFE
159  */
160 /* ARGSUSED */
161 int
162 dup2(td, uap)
163 	struct thread *td;
164 	struct dup2_args *uap;
165 {
166 	struct proc *p = td->td_proc;
167 	register struct filedesc *fdp = td->td_proc->p_fd;
168 	register u_int old = uap->from, new = uap->to;
169 	int i, error;
170 
171 	FILEDESC_LOCK(fdp);
172 retry:
173 	if (old >= fdp->fd_nfiles ||
174 	    fdp->fd_ofiles[old] == NULL ||
175 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
176 	    new >= maxfilesperproc) {
177 		FILEDESC_UNLOCK(fdp);
178 		return (EBADF);
179 	}
180 	if (old == new) {
181 		td->td_retval[0] = new;
182 		FILEDESC_UNLOCK(fdp);
183 		return (0);
184 	}
185 	if (new >= fdp->fd_nfiles) {
186 		if ((error = fdalloc(td, new, &i))) {
187 			FILEDESC_UNLOCK(fdp);
188 			return (error);
189 		}
190 		/*
191 		 * fdalloc() may block, retest everything.
192 		 */
193 		goto retry;
194 	}
195 	error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
196 	return(error);
197 }
198 
199 /*
200  * Duplicate a file descriptor.
201  */
202 #ifndef _SYS_SYSPROTO_H_
203 struct dup_args {
204 	u_int	fd;
205 };
206 #endif
207 /*
208  * MPSAFE
209  */
210 /* ARGSUSED */
211 int
212 dup(td, uap)
213 	struct thread *td;
214 	struct dup_args *uap;
215 {
216 	register struct filedesc *fdp;
217 	u_int old;
218 	int new, error;
219 
220 	old = uap->fd;
221 	fdp = td->td_proc->p_fd;
222 	FILEDESC_LOCK(fdp);
223 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
224 		FILEDESC_UNLOCK(fdp);
225 		return (EBADF);
226 	}
227 	if ((error = fdalloc(td, 0, &new))) {
228 		FILEDESC_UNLOCK(fdp);
229 		return (error);
230 	}
231 	error = do_dup(fdp, (int)old, new, td->td_retval, td);
232 	return (error);
233 }
234 
235 /*
236  * The file control system call.
237  */
238 #ifndef _SYS_SYSPROTO_H_
239 struct fcntl_args {
240 	int	fd;
241 	int	cmd;
242 	long	arg;
243 };
244 #endif
245 /*
246  * MPSAFE
247  */
248 /* ARGSUSED */
249 int
250 fcntl(td, uap)
251 	struct thread *td;
252 	register struct fcntl_args *uap;
253 {
254 	register struct proc *p = td->td_proc;
255 	register struct filedesc *fdp;
256 	register struct file *fp;
257 	register char *pop;
258 	struct vnode *vp;
259 	int i, tmp, error = 0, flg = F_POSIX;
260 	struct flock fl;
261 	u_int newmin;
262 	struct proc *leaderp;
263 
264 	mtx_lock(&Giant);
265 
266 	fdp = p->p_fd;
267 	FILEDESC_LOCK(fdp);
268 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
269 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
270 		FILEDESC_UNLOCK(fdp);
271 		error = EBADF;
272 		goto done2;
273 	}
274 	pop = &fdp->fd_ofileflags[uap->fd];
275 
276 	switch (uap->cmd) {
277 	case F_DUPFD:
278 		newmin = uap->arg;
279 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
280 		    newmin >= maxfilesperproc) {
281 			FILEDESC_UNLOCK(fdp);
282 			error = EINVAL;
283 			break;
284 		}
285 		if ((error = fdalloc(td, newmin, &i))) {
286 			FILEDESC_UNLOCK(fdp);
287 			break;
288 		}
289 		error = do_dup(fdp, uap->fd, i, td->td_retval, td);
290 		break;
291 
292 	case F_GETFD:
293 		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
294 		FILEDESC_UNLOCK(fdp);
295 		break;
296 
297 	case F_SETFD:
298 		*pop = (*pop &~ UF_EXCLOSE) |
299 		    (uap->arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
300 		FILEDESC_UNLOCK(fdp);
301 		break;
302 
303 	case F_GETFL:
304 		FILE_LOCK(fp);
305 		FILEDESC_UNLOCK(fdp);
306 		td->td_retval[0] = OFLAGS(fp->f_flag);
307 		FILE_UNLOCK(fp);
308 		break;
309 
310 	case F_SETFL:
311 		fhold(fp);
312 		FILEDESC_UNLOCK(fdp);
313 		fp->f_flag &= ~FCNTLFLAGS;
314 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
315 		tmp = fp->f_flag & FNONBLOCK;
316 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
317 		if (error) {
318 			fdrop(fp, td);
319 			break;
320 		}
321 		tmp = fp->f_flag & FASYNC;
322 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
323 		if (!error) {
324 			fdrop(fp, td);
325 			break;
326 		}
327 		fp->f_flag &= ~FNONBLOCK;
328 		tmp = 0;
329 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
330 		fdrop(fp, td);
331 		break;
332 
333 	case F_GETOWN:
334 		fhold(fp);
335 		FILEDESC_UNLOCK(fdp);
336 		error = fo_ioctl(fp, FIOGETOWN, (caddr_t)td->td_retval, td);
337 		fdrop(fp, td);
338 		break;
339 
340 	case F_SETOWN:
341 		fhold(fp);
342 		FILEDESC_UNLOCK(fdp);
343 		error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td);
344 		fdrop(fp, td);
345 		break;
346 
347 	case F_SETLKW:
348 		flg |= F_WAIT;
349 		/* Fall into F_SETLK */
350 
351 	case F_SETLK:
352 		if (fp->f_type != DTYPE_VNODE) {
353 			FILEDESC_UNLOCK(fdp);
354 			error = EBADF;
355 			break;
356 		}
357 		vp = (struct vnode *)fp->f_data;
358 		/*
359 		 * copyin/lockop may block
360 		 */
361 		fhold(fp);
362 		FILEDESC_UNLOCK(fdp);
363 		vp = (struct vnode *)fp->f_data;
364 
365 		/* Copy in the lock structure */
366 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
367 		    sizeof(fl));
368 		if (error) {
369 			fdrop(fp, td);
370 			break;
371 		}
372 		if (fl.l_whence == SEEK_CUR) {
373 			if (fp->f_offset < 0 ||
374 			    (fl.l_start > 0 &&
375 			     fp->f_offset > OFF_MAX - fl.l_start)) {
376 				fdrop(fp, td);
377 				error = EOVERFLOW;
378 				break;
379 			}
380 			fl.l_start += fp->f_offset;
381 		}
382 
383 		switch (fl.l_type) {
384 		case F_RDLCK:
385 			if ((fp->f_flag & FREAD) == 0) {
386 				error = EBADF;
387 				break;
388 			}
389 			PROC_LOCK(p);
390 			p->p_flag |= P_ADVLOCK;
391 			leaderp = p->p_leader;
392 			PROC_UNLOCK(p);
393 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
394 			    &fl, flg);
395 			break;
396 		case F_WRLCK:
397 			if ((fp->f_flag & FWRITE) == 0) {
398 				error = EBADF;
399 				break;
400 			}
401 			PROC_LOCK(p);
402 			p->p_flag |= P_ADVLOCK;
403 			leaderp = p->p_leader;
404 			PROC_UNLOCK(p);
405 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
406 			    &fl, flg);
407 			break;
408 		case F_UNLCK:
409 			PROC_LOCK(p);
410 			leaderp = p->p_leader;
411 			PROC_UNLOCK(p);
412 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK,
413 				&fl, F_POSIX);
414 			break;
415 		default:
416 			error = EINVAL;
417 			break;
418 		}
419 		fdrop(fp, td);
420 		break;
421 
422 	case F_GETLK:
423 		if (fp->f_type != DTYPE_VNODE) {
424 			FILEDESC_UNLOCK(fdp);
425 			error = EBADF;
426 			break;
427 		}
428 		vp = (struct vnode *)fp->f_data;
429 		/*
430 		 * copyin/lockop may block
431 		 */
432 		fhold(fp);
433 		FILEDESC_UNLOCK(fdp);
434 		vp = (struct vnode *)fp->f_data;
435 
436 		/* Copy in the lock structure */
437 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
438 		    sizeof(fl));
439 		if (error) {
440 			fdrop(fp, td);
441 			break;
442 		}
443 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
444 		    fl.l_type != F_UNLCK) {
445 			fdrop(fp, td);
446 			error = EINVAL;
447 			break;
448 		}
449 		if (fl.l_whence == SEEK_CUR) {
450 			if ((fl.l_start > 0 &&
451 			     fp->f_offset > OFF_MAX - fl.l_start) ||
452 			    (fl.l_start < 0 &&
453 			     fp->f_offset < OFF_MIN - fl.l_start)) {
454 				fdrop(fp, td);
455 				error = EOVERFLOW;
456 				break;
457 			}
458 			fl.l_start += fp->f_offset;
459 		}
460 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
461 			    &fl, F_POSIX);
462 		fdrop(fp, td);
463 		if (error == 0) {
464 			error = copyout((caddr_t)&fl,
465 				    (caddr_t)(intptr_t)uap->arg, sizeof(fl));
466 		}
467 		break;
468 	default:
469 		FILEDESC_UNLOCK(fdp);
470 		error = EINVAL;
471 		break;
472 	}
473 done2:
474 	mtx_unlock(&Giant);
475 	return (error);
476 }
477 
478 /*
479  * Common code for dup, dup2, and fcntl(F_DUPFD).
480  * filedesc must be locked, but will be unlocked as a side effect.
481  */
482 static int
483 do_dup(fdp, old, new, retval, td)
484 	register struct filedesc *fdp;
485 	register int old, new;
486 	register_t *retval;
487 	struct thread *td;
488 {
489 	struct file *fp;
490 	struct file *delfp;
491 
492 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
493 
494 	/*
495 	 * Save info on the descriptor being overwritten.  We have
496 	 * to do the unmap now, but we cannot close it without
497 	 * introducing an ownership race for the slot.
498 	 */
499 	delfp = fdp->fd_ofiles[new];
500 #if 0
501 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
502 		(void) munmapfd(td, new);
503 #endif
504 
505 	/*
506 	 * Duplicate the source descriptor, update lastfile
507 	 */
508 	fp = fdp->fd_ofiles[old];
509 	fdp->fd_ofiles[new] = fp;
510 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
511 	fhold(fp);
512 	if (new > fdp->fd_lastfile)
513 		fdp->fd_lastfile = new;
514 	*retval = new;
515 
516 	FILEDESC_UNLOCK(fdp);
517 
518 	/*
519 	 * If we dup'd over a valid file, we now own the reference to it
520 	 * and must dispose of it using closef() semantics (as if a
521 	 * close() were performed on it).
522 	 */
523 	if (delfp) {
524 		mtx_lock(&Giant);
525 		(void) closef(delfp, td);
526 		mtx_unlock(&Giant);
527 	}
528 	return (0);
529 }
530 
531 /*
532  * If sigio is on the list associated with a process or process group,
533  * disable signalling from the device, remove sigio from the list and
534  * free sigio.
535  */
536 void
537 funsetown(sigio)
538 	struct sigio *sigio;
539 {
540 
541 	SIGIO_LOCK();
542 	if (sigio == NULL) {
543 		SIGIO_UNLOCK();
544 		return;
545 	}
546 	*(sigio->sio_myref) = NULL;
547 	SIGIO_UNLOCK();
548 	if ((sigio)->sio_pgid < 0) {
549 		struct pgrp *pg = (sigio)->sio_pgrp;
550 		PGRP_LOCK(pg);
551 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
552 			     sigio, sio_pgsigio);
553 		PGRP_UNLOCK(pg);
554 	} else {
555 		struct proc *p = (sigio)->sio_proc;
556 		PROC_LOCK(p);
557 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
558 			     sigio, sio_pgsigio);
559 		PROC_UNLOCK(p);
560 	}
561 	crfree(sigio->sio_ucred);
562 	FREE(sigio, M_SIGIO);
563 }
564 
565 /* Free a list of sigio structures. */
566 void
567 funsetownlst(sigiolst)
568 	struct sigiolst *sigiolst;
569 {
570 	struct sigio *sigio;
571 	struct proc *p;
572 	struct pgrp *pg;
573 
574 	SIGIO_ASSERT(MA_OWNED);
575 
576 	sigio = SLIST_FIRST(sigiolst);
577 	if (sigio == NULL)
578 		return;
579 
580 	p = NULL;
581 	pg = NULL;
582 
583 	/*
584 	 * Every entry of the list should belong
585 	 * to a single proc or pgrp.
586 	 */
587 	if (sigio->sio_pgid < 0) {
588 		pg = sigio->sio_pgrp;
589 		PGRP_LOCK_ASSERT(pg, MA_OWNED);
590 	} else /* if (sigio->sio_pgid > 0) */ {
591 		p = sigio->sio_proc;
592 		PROC_LOCK_ASSERT(p, MA_OWNED);
593 	}
594 
595 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
596 		*(sigio->sio_myref) = NULL;
597 		if (pg != NULL) {
598 			KASSERT(sigio->sio_pgid < 0, ("Proc sigio in pgrp sigio list"));
599 			KASSERT(sigio->sio_pgrp == pg, ("Bogus pgrp in sigio list"));
600 			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, sio_pgsigio);
601 			PGRP_UNLOCK(pg);
602 			SIGIO_UNLOCK();
603 			crfree(sigio->sio_ucred);
604 			FREE(sigio, M_SIGIO);
605 			SIGIO_LOCK();
606 			PGRP_LOCK(pg);
607 		} else /* if (p != NULL) */ {
608 			KASSERT(sigio->sio_pgid > 0, ("Pgrp sigio in proc sigio list"));
609 			KASSERT(sigio->sio_proc == p, ("Bogus proc in sigio list"));
610 			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, sio_pgsigio);
611 			PROC_UNLOCK(p);
612 			SIGIO_UNLOCK();
613 			crfree(sigio->sio_ucred);
614 			FREE(sigio, M_SIGIO);
615 			SIGIO_LOCK();
616 			PROC_LOCK(p);
617 		}
618 	}
619 }
620 
621 /*
622  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
623  *
624  * After permission checking, add a sigio structure to the sigio list for
625  * the process or process group.
626  */
627 int
628 fsetown(pgid, sigiop)
629 	pid_t pgid;
630 	struct sigio **sigiop;
631 {
632 	struct proc *proc;
633 	struct pgrp *pgrp;
634 	struct sigio *sigio;
635 	int ret;
636 
637 	if (pgid == 0) {
638 		funsetown(*sigiop);
639 		return (0);
640 	}
641 
642 	ret = 0;
643 
644 	/* Allocate and fill in the new sigio out of locks. */
645 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
646 	sigio->sio_pgid = pgid;
647 	sigio->sio_ucred = crhold(curthread->td_ucred);
648 	sigio->sio_myref = sigiop;
649 
650 	sx_slock(&proctree_lock);
651 	if (pgid > 0) {
652 		proc = pfind(pgid);
653 		if (proc == NULL) {
654 			ret = ESRCH;
655 			goto fail;
656 		}
657 
658 		/*
659 		 * Policy - Don't allow a process to FSETOWN a process
660 		 * in another session.
661 		 *
662 		 * Remove this test to allow maximum flexibility or
663 		 * restrict FSETOWN to the current process or process
664 		 * group for maximum safety.
665 		 */
666 		PROC_UNLOCK(proc);
667 		if (proc->p_session != curthread->td_proc->p_session) {
668 			ret = EPERM;
669 			goto fail;
670 		}
671 
672 		pgrp = NULL;
673 	} else /* if (pgid < 0) */ {
674 		pgrp = pgfind(-pgid);
675 		if (pgrp == NULL) {
676 			ret = ESRCH;
677 			goto fail;
678 		}
679 		PGRP_UNLOCK(pgrp);
680 
681 		/*
682 		 * Policy - Don't allow a process to FSETOWN a process
683 		 * in another session.
684 		 *
685 		 * Remove this test to allow maximum flexibility or
686 		 * restrict FSETOWN to the current process or process
687 		 * group for maximum safety.
688 		 */
689 		if (pgrp->pg_session != curthread->td_proc->p_session) {
690 			ret = EPERM;
691 			goto fail;
692 		}
693 
694 		proc = NULL;
695 	}
696 	funsetown(*sigiop);
697 	if (pgid > 0) {
698 		PROC_LOCK(proc);
699 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
700 		sigio->sio_proc = proc;
701 		PROC_UNLOCK(proc);
702 	} else {
703 		PGRP_LOCK(pgrp);
704 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
705 		sigio->sio_pgrp = pgrp;
706 		PGRP_UNLOCK(pgrp);
707 	}
708 	sx_sunlock(&proctree_lock);
709 	SIGIO_LOCK();
710 	*sigiop = sigio;
711 	SIGIO_UNLOCK();
712 	return (0);
713 
714 fail:
715 	sx_sunlock(&proctree_lock);
716 	crfree(sigio->sio_ucred);
717 	FREE(sigio, M_SIGIO);
718 	return (ret);
719 }
720 
721 /*
722  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
723  */
724 pid_t
725 fgetown(sigio)
726 	struct sigio *sigio;
727 {
728 	return (sigio != NULL ? sigio->sio_pgid : 0);
729 }
730 
731 /*
732  * Close a file descriptor.
733  */
734 #ifndef _SYS_SYSPROTO_H_
735 struct close_args {
736         int     fd;
737 };
738 #endif
739 /*
740  * MPSAFE
741  */
742 /* ARGSUSED */
743 int
744 close(td, uap)
745 	struct thread *td;
746 	struct close_args *uap;
747 {
748 	register struct filedesc *fdp;
749 	register struct file *fp;
750 	register int fd = uap->fd;
751 	int error = 0;
752 
753 	mtx_lock(&Giant);
754 	fdp = td->td_proc->p_fd;
755 	FILEDESC_LOCK(fdp);
756 	if ((unsigned)fd >= fdp->fd_nfiles ||
757 	    (fp = fdp->fd_ofiles[fd]) == NULL) {
758 		FILEDESC_UNLOCK(fdp);
759 		error = EBADF;
760 		goto done2;
761 	}
762 #if 0
763 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
764 		(void) munmapfd(td, fd);
765 #endif
766 	fdp->fd_ofiles[fd] = NULL;
767 	fdp->fd_ofileflags[fd] = 0;
768 
769 	/*
770 	 * we now hold the fp reference that used to be owned by the descriptor
771 	 * array.
772 	 */
773 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
774 		fdp->fd_lastfile--;
775 	if (fd < fdp->fd_freefile)
776 		fdp->fd_freefile = fd;
777 	if (fd < fdp->fd_knlistsize) {
778 		FILEDESC_UNLOCK(fdp);
779 		knote_fdclose(td, fd);
780 	} else
781 		FILEDESC_UNLOCK(fdp);
782 
783 	error = closef(fp, td);
784 done2:
785 	mtx_unlock(&Giant);
786 	return(error);
787 }
788 
789 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
790 /*
791  * Return status information about a file descriptor.
792  */
793 #ifndef _SYS_SYSPROTO_H_
794 struct ofstat_args {
795 	int	fd;
796 	struct	ostat *sb;
797 };
798 #endif
799 /*
800  * MPSAFE
801  */
802 /* ARGSUSED */
803 int
804 ofstat(td, uap)
805 	struct thread *td;
806 	register struct ofstat_args *uap;
807 {
808 	struct file *fp;
809 	struct stat ub;
810 	struct ostat oub;
811 	int error;
812 
813 	mtx_lock(&Giant);
814 	if ((error = fget(td, uap->fd, &fp)) != 0)
815 		goto done2;
816 	error = fo_stat(fp, &ub, td);
817 	if (error == 0) {
818 		cvtstat(&ub, &oub);
819 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
820 	}
821 	fdrop(fp, td);
822 done2:
823 	mtx_unlock(&Giant);
824 	return (error);
825 }
826 #endif /* COMPAT_43 || COMPAT_SUNOS */
827 
828 /*
829  * Return status information about a file descriptor.
830  */
831 #ifndef _SYS_SYSPROTO_H_
832 struct fstat_args {
833 	int	fd;
834 	struct	stat *sb;
835 };
836 #endif
837 /*
838  * MPSAFE
839  */
840 /* ARGSUSED */
841 int
842 fstat(td, uap)
843 	struct thread *td;
844 	struct fstat_args *uap;
845 {
846 	struct file *fp;
847 	struct stat ub;
848 	int error;
849 
850 	mtx_lock(&Giant);
851 	if ((error = fget(td, uap->fd, &fp)) != 0)
852 		goto done2;
853 	error = fo_stat(fp, &ub, td);
854 	if (error == 0)
855 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
856 	fdrop(fp, td);
857 done2:
858 	mtx_unlock(&Giant);
859 	return (error);
860 }
861 
862 /*
863  * Return status information about a file descriptor.
864  */
865 #ifndef _SYS_SYSPROTO_H_
866 struct nfstat_args {
867 	int	fd;
868 	struct	nstat *sb;
869 };
870 #endif
871 /*
872  * MPSAFE
873  */
874 /* ARGSUSED */
875 int
876 nfstat(td, uap)
877 	struct thread *td;
878 	register struct nfstat_args *uap;
879 {
880 	struct file *fp;
881 	struct stat ub;
882 	struct nstat nub;
883 	int error;
884 
885 	mtx_lock(&Giant);
886 	if ((error = fget(td, uap->fd, &fp)) != 0)
887 		goto done2;
888 	error = fo_stat(fp, &ub, td);
889 	if (error == 0) {
890 		cvtnstat(&ub, &nub);
891 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
892 	}
893 	fdrop(fp, td);
894 done2:
895 	mtx_unlock(&Giant);
896 	return (error);
897 }
898 
899 /*
900  * Return pathconf information about a file descriptor.
901  */
902 #ifndef _SYS_SYSPROTO_H_
903 struct fpathconf_args {
904 	int	fd;
905 	int	name;
906 };
907 #endif
908 /*
909  * MPSAFE
910  */
911 /* ARGSUSED */
912 int
913 fpathconf(td, uap)
914 	struct thread *td;
915 	register struct fpathconf_args *uap;
916 {
917 	struct file *fp;
918 	struct vnode *vp;
919 	int error;
920 
921 	if ((error = fget(td, uap->fd, &fp)) != 0)
922 		return (error);
923 
924 	switch (fp->f_type) {
925 	case DTYPE_PIPE:
926 	case DTYPE_SOCKET:
927 		if (uap->name != _PC_PIPE_BUF) {
928 			error = EINVAL;
929 		} else {
930 			td->td_retval[0] = PIPE_BUF;
931 			error = 0;
932 		}
933 		break;
934 	case DTYPE_FIFO:
935 	case DTYPE_VNODE:
936 		vp = (struct vnode *)fp->f_data;
937 		mtx_lock(&Giant);
938 		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
939 		mtx_unlock(&Giant);
940 		break;
941 	default:
942 		error = EOPNOTSUPP;
943 		break;
944 	}
945 	fdrop(fp, td);
946 	return(error);
947 }
948 
949 /*
950  * Allocate a file descriptor for the process.
951  */
952 static int fdexpand;
953 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
954 
955 int
956 fdalloc(td, want, result)
957 	struct thread *td;
958 	int want;
959 	int *result;
960 {
961 	struct proc *p = td->td_proc;
962 	register struct filedesc *fdp = td->td_proc->p_fd;
963 	register int i;
964 	int lim, last, nfiles;
965 	struct file **newofile, **oldofile;
966 	char *newofileflags;
967 
968 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
969 
970 	/*
971 	 * Search for a free descriptor starting at the higher
972 	 * of want or fd_freefile.  If that fails, consider
973 	 * expanding the ofile array.
974 	 */
975 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
976 	for (;;) {
977 		last = min(fdp->fd_nfiles, lim);
978 		if ((i = want) < fdp->fd_freefile)
979 			i = fdp->fd_freefile;
980 		for (; i < last; i++) {
981 			if (fdp->fd_ofiles[i] == NULL) {
982 				fdp->fd_ofileflags[i] = 0;
983 				if (i > fdp->fd_lastfile)
984 					fdp->fd_lastfile = i;
985 				if (want <= fdp->fd_freefile)
986 					fdp->fd_freefile = i;
987 				*result = i;
988 				return (0);
989 			}
990 		}
991 
992 		/*
993 		 * No space in current array.  Expand?
994 		 */
995 		if (fdp->fd_nfiles >= lim)
996 			return (EMFILE);
997 		if (fdp->fd_nfiles < NDEXTENT)
998 			nfiles = NDEXTENT;
999 		else
1000 			nfiles = 2 * fdp->fd_nfiles;
1001 		FILEDESC_UNLOCK(fdp);
1002 		mtx_lock(&Giant);
1003 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
1004 		    M_FILEDESC, M_WAITOK);
1005 		mtx_unlock(&Giant);
1006 		FILEDESC_LOCK(fdp);
1007 
1008 		/*
1009 		 * deal with file-table extend race that might have occured
1010 		 * when malloc was blocked.
1011 		 */
1012 		if (fdp->fd_nfiles >= nfiles) {
1013 			FILEDESC_UNLOCK(fdp);
1014 			mtx_lock(&Giant);
1015 			FREE(newofile, M_FILEDESC);
1016 			mtx_unlock(&Giant);
1017 			FILEDESC_LOCK(fdp);
1018 			continue;
1019 		}
1020 		newofileflags = (char *) &newofile[nfiles];
1021 		/*
1022 		 * Copy the existing ofile and ofileflags arrays
1023 		 * and zero the new portion of each array.
1024 		 */
1025 		bcopy(fdp->fd_ofiles, newofile,
1026 			(i = sizeof(struct file *) * fdp->fd_nfiles));
1027 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
1028 		bcopy(fdp->fd_ofileflags, newofileflags,
1029 			(i = sizeof(char) * fdp->fd_nfiles));
1030 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
1031 		if (fdp->fd_nfiles > NDFILE)
1032 			oldofile = fdp->fd_ofiles;
1033 		else
1034 			oldofile = NULL;
1035 		fdp->fd_ofiles = newofile;
1036 		fdp->fd_ofileflags = newofileflags;
1037 		fdp->fd_nfiles = nfiles;
1038 		fdexpand++;
1039 		if (oldofile != NULL) {
1040 			FILEDESC_UNLOCK(fdp);
1041 			mtx_lock(&Giant);
1042 			FREE(oldofile, M_FILEDESC);
1043 			mtx_unlock(&Giant);
1044 			FILEDESC_LOCK(fdp);
1045 		}
1046 	}
1047 	return (0);
1048 }
1049 
1050 /*
1051  * Check to see whether n user file descriptors
1052  * are available to the process p.
1053  */
1054 int
1055 fdavail(td, n)
1056 	struct thread *td;
1057 	register int n;
1058 {
1059 	struct proc *p = td->td_proc;
1060 	register struct filedesc *fdp = td->td_proc->p_fd;
1061 	register struct file **fpp;
1062 	register int i, lim, last;
1063 
1064 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1065 
1066 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1067 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1068 		return (1);
1069 
1070 	last = min(fdp->fd_nfiles, lim);
1071 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1072 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1073 		if (*fpp == NULL && --n <= 0)
1074 			return (1);
1075 	}
1076 	return (0);
1077 }
1078 
1079 /*
1080  * Create a new open file structure and allocate
1081  * a file decriptor for the process that refers to it.
1082  */
1083 int
1084 falloc(td, resultfp, resultfd)
1085 	register struct thread *td;
1086 	struct file **resultfp;
1087 	int *resultfd;
1088 {
1089 	struct proc *p = td->td_proc;
1090 	register struct file *fp, *fq;
1091 	int error, i;
1092 
1093 	sx_xlock(&filelist_lock);
1094 	if (nfiles >= maxfiles) {
1095 		sx_xunlock(&filelist_lock);
1096 		tablefull("file");
1097 		return (ENFILE);
1098 	}
1099 	nfiles++;
1100 	sx_xunlock(&filelist_lock);
1101 	/*
1102 	 * Allocate a new file descriptor.
1103 	 * If the process has file descriptor zero open, add to the list
1104 	 * of open files at that point, otherwise put it at the front of
1105 	 * the list of open files.
1106 	 */
1107 	fp = uma_zalloc(file_zone, M_WAITOK);
1108 	bzero(fp, sizeof(*fp));
1109 
1110 	/*
1111 	 * wait until after malloc (which may have blocked) returns before
1112 	 * allocating the slot, else a race might have shrunk it if we had
1113 	 * allocated it before the malloc.
1114 	 */
1115 	FILEDESC_LOCK(p->p_fd);
1116 	if ((error = fdalloc(td, 0, &i))) {
1117 		FILEDESC_UNLOCK(p->p_fd);
1118 		sx_xlock(&filelist_lock);
1119 		nfiles--;
1120 		sx_xunlock(&filelist_lock);
1121 		uma_zfree(file_zone, fp);
1122 		return (error);
1123 	}
1124 	fp->f_mtxp = mtx_pool_alloc();
1125 	fp->f_gcflag = 0;
1126 	fp->f_count = 1;
1127 	fp->f_cred = crhold(td->td_ucred);
1128 	fp->f_ops = &badfileops;
1129 	fp->f_seqcount = 1;
1130 	FILEDESC_UNLOCK(p->p_fd);
1131 	sx_xlock(&filelist_lock);
1132 	FILEDESC_LOCK(p->p_fd);
1133 	if ((fq = p->p_fd->fd_ofiles[0])) {
1134 		LIST_INSERT_AFTER(fq, fp, f_list);
1135 	} else {
1136 		LIST_INSERT_HEAD(&filehead, fp, f_list);
1137 	}
1138 	p->p_fd->fd_ofiles[i] = fp;
1139 	FILEDESC_UNLOCK(p->p_fd);
1140 	sx_xunlock(&filelist_lock);
1141 	if (resultfp)
1142 		*resultfp = fp;
1143 	if (resultfd)
1144 		*resultfd = i;
1145 	return (0);
1146 }
1147 
1148 /*
1149  * Free a file descriptor.
1150  */
1151 void
1152 ffree(fp)
1153 	register struct file *fp;
1154 {
1155 
1156 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1157 	sx_xlock(&filelist_lock);
1158 	LIST_REMOVE(fp, f_list);
1159 	nfiles--;
1160 	sx_xunlock(&filelist_lock);
1161 	crfree(fp->f_cred);
1162 	uma_zfree(file_zone, fp);
1163 }
1164 
1165 /*
1166  * Build a new filedesc structure.
1167  */
1168 struct filedesc *
1169 fdinit(td)
1170 	struct thread *td;
1171 {
1172 	register struct filedesc0 *newfdp;
1173 	register struct filedesc *fdp = td->td_proc->p_fd;
1174 
1175 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1176 	    M_FILEDESC, M_WAITOK | M_ZERO);
1177 	mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1178 	FILEDESC_LOCK(&newfdp->fd_fd);
1179 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1180 	if (newfdp->fd_fd.fd_cdir)
1181 		VREF(newfdp->fd_fd.fd_cdir);
1182 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1183 	if (newfdp->fd_fd.fd_rdir)
1184 		VREF(newfdp->fd_fd.fd_rdir);
1185 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1186 	if (newfdp->fd_fd.fd_jdir)
1187 		VREF(newfdp->fd_fd.fd_jdir);
1188 
1189 	/* Create the file descriptor table. */
1190 	newfdp->fd_fd.fd_refcnt = 1;
1191 	newfdp->fd_fd.fd_cmask = cmask;
1192 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1193 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1194 	newfdp->fd_fd.fd_nfiles = NDFILE;
1195 	newfdp->fd_fd.fd_knlistsize = -1;
1196 	FILEDESC_UNLOCK(&newfdp->fd_fd);
1197 
1198 	return (&newfdp->fd_fd);
1199 }
1200 
1201 /*
1202  * Share a filedesc structure.
1203  */
1204 struct filedesc *
1205 fdshare(p)
1206 	struct proc *p;
1207 {
1208 	FILEDESC_LOCK(p->p_fd);
1209 	p->p_fd->fd_refcnt++;
1210 	FILEDESC_UNLOCK(p->p_fd);
1211 	return (p->p_fd);
1212 }
1213 
1214 /*
1215  * Copy a filedesc structure.
1216  */
1217 struct filedesc *
1218 fdcopy(td)
1219 	struct thread *td;
1220 {
1221 	register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1222 	register struct file **fpp;
1223 	register int i, j;
1224 
1225 	/* Certain daemons might not have file descriptors. */
1226 	if (fdp == NULL)
1227 		return (NULL);
1228 
1229 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1230 
1231 	FILEDESC_UNLOCK(fdp);
1232 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1233 	    M_FILEDESC, M_WAITOK);
1234 	FILEDESC_LOCK(fdp);
1235 	bcopy(fdp, newfdp, sizeof(struct filedesc));
1236 	FILEDESC_UNLOCK(fdp);
1237 	bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1238 	mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1239 	if (newfdp->fd_cdir)
1240 		VREF(newfdp->fd_cdir);
1241 	if (newfdp->fd_rdir)
1242 		VREF(newfdp->fd_rdir);
1243 	if (newfdp->fd_jdir)
1244 		VREF(newfdp->fd_jdir);
1245 	newfdp->fd_refcnt = 1;
1246 
1247 	/*
1248 	 * If the number of open files fits in the internal arrays
1249 	 * of the open file structure, use them, otherwise allocate
1250 	 * additional memory for the number of descriptors currently
1251 	 * in use.
1252 	 */
1253 	FILEDESC_LOCK(fdp);
1254 	newfdp->fd_lastfile = fdp->fd_lastfile;
1255 	newfdp->fd_nfiles = fdp->fd_nfiles;
1256 	if (newfdp->fd_lastfile < NDFILE) {
1257 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1258 		newfdp->fd_ofileflags =
1259 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1260 		i = NDFILE;
1261 	} else {
1262 		/*
1263 		 * Compute the smallest multiple of NDEXTENT needed
1264 		 * for the file descriptors currently in use,
1265 		 * allowing the table to shrink.
1266 		 */
1267 retry:
1268 		i = newfdp->fd_nfiles;
1269 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1270 			i /= 2;
1271 		FILEDESC_UNLOCK(fdp);
1272 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1273 		    M_FILEDESC, M_WAITOK);
1274 		FILEDESC_LOCK(fdp);
1275 		newfdp->fd_lastfile = fdp->fd_lastfile;
1276 		newfdp->fd_nfiles = fdp->fd_nfiles;
1277 		j = newfdp->fd_nfiles;
1278 		while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1279 			j /= 2;
1280 		if (i != j) {
1281 			/*
1282 			 * The size of the original table has changed.
1283 			 * Go over once again.
1284 			 */
1285 			FILEDESC_UNLOCK(fdp);
1286 			FREE(newfdp->fd_ofiles, M_FILEDESC);
1287 			FILEDESC_LOCK(fdp);
1288 			newfdp->fd_lastfile = fdp->fd_lastfile;
1289 			newfdp->fd_nfiles = fdp->fd_nfiles;
1290 			goto retry;
1291 		}
1292 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1293 	}
1294 	newfdp->fd_nfiles = i;
1295 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1296 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1297 
1298 	/*
1299 	 * kq descriptors cannot be copied.
1300 	 */
1301 	if (newfdp->fd_knlistsize != -1) {
1302 		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1303 		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1304 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1305 				*fpp = NULL;
1306 				if (i < newfdp->fd_freefile)
1307 					newfdp->fd_freefile = i;
1308 			}
1309 			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1310 				newfdp->fd_lastfile--;
1311 		}
1312 		newfdp->fd_knlist = NULL;
1313 		newfdp->fd_knlistsize = -1;
1314 		newfdp->fd_knhash = NULL;
1315 		newfdp->fd_knhashmask = 0;
1316 	}
1317 
1318 	fpp = newfdp->fd_ofiles;
1319 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1320 		if (*fpp != NULL) {
1321 			fhold(*fpp);
1322 		}
1323 	}
1324 	return (newfdp);
1325 }
1326 
1327 /*
1328  * Release a filedesc structure.
1329  */
1330 void
1331 fdfree(td)
1332 	struct thread *td;
1333 {
1334 	register struct filedesc *fdp;
1335 	struct file **fpp;
1336 	register int i;
1337 
1338 	fdp = td->td_proc->p_fd;
1339 	/* Certain daemons might not have file descriptors. */
1340 	if (fdp == NULL)
1341 		return;
1342 
1343 	FILEDESC_LOCK(fdp);
1344 	if (--fdp->fd_refcnt > 0) {
1345 		FILEDESC_UNLOCK(fdp);
1346 		return;
1347 	}
1348 	/*
1349 	 * we are the last reference to the structure, we can
1350 	 * safely assume it will not change out from under us.
1351 	 */
1352 	FILEDESC_UNLOCK(fdp);
1353 	fpp = fdp->fd_ofiles;
1354 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1355 		if (*fpp)
1356 			(void) closef(*fpp, td);
1357 	}
1358 
1359 	PROC_LOCK(td->td_proc);
1360 	td->td_proc->p_fd = NULL;
1361 	PROC_UNLOCK(td->td_proc);
1362 
1363 	if (fdp->fd_nfiles > NDFILE)
1364 		FREE(fdp->fd_ofiles, M_FILEDESC);
1365 	if (fdp->fd_cdir)
1366 		vrele(fdp->fd_cdir);
1367 	if (fdp->fd_rdir)
1368 		vrele(fdp->fd_rdir);
1369 	if (fdp->fd_jdir)
1370 		vrele(fdp->fd_jdir);
1371 	if (fdp->fd_knlist)
1372 		FREE(fdp->fd_knlist, M_KQUEUE);
1373 	if (fdp->fd_knhash)
1374 		FREE(fdp->fd_knhash, M_KQUEUE);
1375 	mtx_destroy(&fdp->fd_mtx);
1376 	FREE(fdp, M_FILEDESC);
1377 }
1378 
1379 /*
1380  * For setugid programs, we don't want to people to use that setugidness
1381  * to generate error messages which write to a file which otherwise would
1382  * otherwise be off-limits to the process.
1383  *
1384  * This is a gross hack to plug the hole.  A better solution would involve
1385  * a special vop or other form of generalized access control mechanism.  We
1386  * go ahead and just reject all procfs file systems accesses as dangerous.
1387  *
1388  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1389  * sufficient.  We also don't for check setugidness since we know we are.
1390  */
1391 static int
1392 is_unsafe(struct file *fp)
1393 {
1394 	if (fp->f_type == DTYPE_VNODE &&
1395 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1396 		return (1);
1397 	return (0);
1398 }
1399 
1400 /*
1401  * Make this setguid thing safe, if at all possible.
1402  */
1403 void
1404 setugidsafety(td)
1405 	struct thread *td;
1406 {
1407 	struct filedesc *fdp = td->td_proc->p_fd;
1408 	register int i;
1409 
1410 	/* Certain daemons might not have file descriptors. */
1411 	if (fdp == NULL)
1412 		return;
1413 
1414 	/*
1415 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1416 	 * we are blocked in a close.  Be careful!
1417 	 */
1418 	FILEDESC_LOCK(fdp);
1419 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1420 		if (i > 2)
1421 			break;
1422 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1423 			struct file *fp;
1424 
1425 #if 0
1426 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1427 				(void) munmapfd(td, i);
1428 #endif
1429 			if (i < fdp->fd_knlistsize) {
1430 				FILEDESC_UNLOCK(fdp);
1431 				knote_fdclose(td, i);
1432 				FILEDESC_LOCK(fdp);
1433 			}
1434 			/*
1435 			 * NULL-out descriptor prior to close to avoid
1436 			 * a race while close blocks.
1437 			 */
1438 			fp = fdp->fd_ofiles[i];
1439 			fdp->fd_ofiles[i] = NULL;
1440 			fdp->fd_ofileflags[i] = 0;
1441 			if (i < fdp->fd_freefile)
1442 				fdp->fd_freefile = i;
1443 			FILEDESC_UNLOCK(fdp);
1444 			(void) closef(fp, td);
1445 			FILEDESC_LOCK(fdp);
1446 		}
1447 	}
1448 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1449 		fdp->fd_lastfile--;
1450 	FILEDESC_UNLOCK(fdp);
1451 }
1452 
1453 /*
1454  * Close any files on exec?
1455  */
1456 void
1457 fdcloseexec(td)
1458 	struct thread *td;
1459 {
1460 	struct filedesc *fdp = td->td_proc->p_fd;
1461 	register int i;
1462 
1463 	/* Certain daemons might not have file descriptors. */
1464 	if (fdp == NULL)
1465 		return;
1466 
1467 	FILEDESC_LOCK(fdp);
1468 
1469 	/*
1470 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1471 	 * may block and rip them out from under us.
1472 	 */
1473 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1474 		if (fdp->fd_ofiles[i] != NULL &&
1475 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1476 			struct file *fp;
1477 
1478 #if 0
1479 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1480 				(void) munmapfd(td, i);
1481 #endif
1482 			if (i < fdp->fd_knlistsize) {
1483 				FILEDESC_UNLOCK(fdp);
1484 				knote_fdclose(td, i);
1485 				FILEDESC_LOCK(fdp);
1486 			}
1487 			/*
1488 			 * NULL-out descriptor prior to close to avoid
1489 			 * a race while close blocks.
1490 			 */
1491 			fp = fdp->fd_ofiles[i];
1492 			fdp->fd_ofiles[i] = NULL;
1493 			fdp->fd_ofileflags[i] = 0;
1494 			if (i < fdp->fd_freefile)
1495 				fdp->fd_freefile = i;
1496 			FILEDESC_UNLOCK(fdp);
1497 			(void) closef(fp, td);
1498 			FILEDESC_LOCK(fdp);
1499 		}
1500 	}
1501 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1502 		fdp->fd_lastfile--;
1503 	FILEDESC_UNLOCK(fdp);
1504 }
1505 
1506 /*
1507  * It is unsafe for set[ug]id processes to be started with file
1508  * descriptors 0..2 closed, as these descriptors are given implicit
1509  * significance in the Standard C library.  fdcheckstd() will create a
1510  * descriptor referencing /dev/null for each of stdin, stdout, and
1511  * stderr that is not already open.
1512  */
1513 int
1514 fdcheckstd(td)
1515 	struct thread *td;
1516 {
1517 	struct nameidata nd;
1518 	struct filedesc *fdp;
1519 	struct file *fp;
1520 	register_t retval;
1521 	int fd, i, error, flags, devnull;
1522 
1523 	fdp = td->td_proc->p_fd;
1524 	if (fdp == NULL)
1525 		return (0);
1526 	devnull = -1;
1527 	error = 0;
1528 	for (i = 0; i < 3; i++) {
1529 		if (fdp->fd_ofiles[i] != NULL)
1530 			continue;
1531 		if (devnull < 0) {
1532 			error = falloc(td, &fp, &fd);
1533 			if (error != 0)
1534 				break;
1535 			NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1536 			    td);
1537 			flags = FREAD | FWRITE;
1538 			error = vn_open(&nd, &flags, 0);
1539 			if (error != 0) {
1540 				FILEDESC_LOCK(fdp);
1541 				fdp->fd_ofiles[i] = NULL;
1542 				FILEDESC_UNLOCK(fdp);
1543 				fdrop(fp, td);
1544 				break;
1545 			}
1546 			NDFREE(&nd, NDF_ONLY_PNBUF);
1547 			fp->f_data = (caddr_t)nd.ni_vp;
1548 			fp->f_flag = flags;
1549 			fp->f_ops = &vnops;
1550 			fp->f_type = DTYPE_VNODE;
1551 			VOP_UNLOCK(nd.ni_vp, 0, td);
1552 			devnull = fd;
1553 		} else {
1554 			FILEDESC_LOCK(fdp);
1555 			error = fdalloc(td, 0, &fd);
1556 			if (error != 0) {
1557 				FILEDESC_UNLOCK(fdp);
1558 				break;
1559 			}
1560 			error = do_dup(fdp, devnull, fd, &retval, td);
1561 			if (error != 0)
1562 				break;
1563 		}
1564 	}
1565 	return (error);
1566 }
1567 
1568 /*
1569  * Internal form of close.
1570  * Decrement reference count on file structure.
1571  * Note: td may be NULL when closing a file
1572  * that was being passed in a message.
1573  */
1574 int
1575 closef(fp, td)
1576 	register struct file *fp;
1577 	register struct thread *td;
1578 {
1579 	struct vnode *vp;
1580 	struct flock lf;
1581 
1582 	if (fp == NULL)
1583 		return (0);
1584 	/*
1585 	 * POSIX record locking dictates that any close releases ALL
1586 	 * locks owned by this process.  This is handled by setting
1587 	 * a flag in the unlock to free ONLY locks obeying POSIX
1588 	 * semantics, and not to free BSD-style file locks.
1589 	 * If the descriptor was in a message, POSIX-style locks
1590 	 * aren't passed with the descriptor.
1591 	 */
1592 	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1593 	    fp->f_type == DTYPE_VNODE) {
1594 		lf.l_whence = SEEK_SET;
1595 		lf.l_start = 0;
1596 		lf.l_len = 0;
1597 		lf.l_type = F_UNLCK;
1598 		vp = (struct vnode *)fp->f_data;
1599 		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1600 		    F_UNLCK, &lf, F_POSIX);
1601 	}
1602 	return (fdrop(fp, td));
1603 }
1604 
1605 /*
1606  * Drop reference on struct file passed in, may call closef if the
1607  * reference hits zero.
1608  */
1609 int
1610 fdrop(fp, td)
1611 	struct file *fp;
1612 	struct thread *td;
1613 {
1614 
1615 	FILE_LOCK(fp);
1616 	return (fdrop_locked(fp, td));
1617 }
1618 
1619 /*
1620  * Extract the file pointer associated with the specified descriptor for
1621  * the current user process.
1622  *
1623  * If the descriptor doesn't exist, EBADF is returned.
1624  *
1625  * If the descriptor exists but doesn't match 'flags' then
1626  * return EBADF for read attempts and EINVAL for write attempts.
1627  *
1628  * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1629  * It should be droped with fdrop().
1630  * If it is not set, then the refcount will not be bumped however the
1631  * thread's filedesc struct will be returned locked (for fgetsock).
1632  *
1633  * If an error occured the non-zero error is returned and *fpp is set to NULL.
1634  * Otherwise *fpp is set and zero is returned.
1635  */
1636 static __inline
1637 int
1638 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1639 {
1640 	struct filedesc *fdp;
1641 	struct file *fp;
1642 
1643 	*fpp = NULL;
1644 	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1645 		return(EBADF);
1646 	FILEDESC_LOCK(fdp);
1647 	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1648 		FILEDESC_UNLOCK(fdp);
1649 		return(EBADF);
1650 	}
1651 
1652 	/*
1653 	 * Note: FREAD failures returns EBADF to maintain backwards
1654 	 * compatibility with what routines returned before.
1655 	 *
1656 	 * Only one flag, or 0, may be specified.
1657 	 */
1658 	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1659 		FILEDESC_UNLOCK(fdp);
1660 		return(EBADF);
1661 	}
1662 	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1663 		FILEDESC_UNLOCK(fdp);
1664 		return(EINVAL);
1665 	}
1666 	if (hold) {
1667 		fhold(fp);
1668 		FILEDESC_UNLOCK(fdp);
1669 	}
1670 	*fpp = fp;
1671 	return(0);
1672 }
1673 
1674 int
1675 fget(struct thread *td, int fd, struct file **fpp)
1676 {
1677     return(_fget(td, fd, fpp, 0, 1));
1678 }
1679 
1680 int
1681 fget_read(struct thread *td, int fd, struct file **fpp)
1682 {
1683     return(_fget(td, fd, fpp, FREAD, 1));
1684 }
1685 
1686 int
1687 fget_write(struct thread *td, int fd, struct file **fpp)
1688 {
1689     return(_fget(td, fd, fpp, FWRITE, 1));
1690 }
1691 
1692 /*
1693  * Like fget() but loads the underlying vnode, or returns an error if
1694  * the descriptor does not represent a vnode.  Note that pipes use vnodes
1695  * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1696  * error).  The returned vnode will be vref()d.
1697  */
1698 
1699 static __inline
1700 int
1701 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1702 {
1703 	struct file *fp;
1704 	int error;
1705 
1706 	*vpp = NULL;
1707 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1708 		return (error);
1709 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1710 		error = EINVAL;
1711 	} else {
1712 		*vpp = (struct vnode *)fp->f_data;
1713 		vref(*vpp);
1714 	}
1715 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1716 	return (error);
1717 }
1718 
1719 int
1720 fgetvp(struct thread *td, int fd, struct vnode **vpp)
1721 {
1722 	return(_fgetvp(td, fd, vpp, 0));
1723 }
1724 
1725 int
1726 fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1727 {
1728 	return(_fgetvp(td, fd, vpp, FREAD));
1729 }
1730 
1731 int
1732 fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1733 {
1734 	return(_fgetvp(td, fd, vpp, FWRITE));
1735 }
1736 
1737 /*
1738  * Like fget() but loads the underlying socket, or returns an error if
1739  * the descriptor does not represent a socket.
1740  *
1741  * We bump the ref count on the returned socket.  XXX Also obtain the SX lock in
1742  * the future.
1743  */
1744 int
1745 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1746 {
1747 	struct file *fp;
1748 	int error;
1749 
1750 	*spp = NULL;
1751 	if (fflagp)
1752 		*fflagp = 0;
1753 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1754 		return (error);
1755 	if (fp->f_type != DTYPE_SOCKET) {
1756 		error = ENOTSOCK;
1757 	} else {
1758 		*spp = (struct socket *)fp->f_data;
1759 		if (fflagp)
1760 			*fflagp = fp->f_flag;
1761 		soref(*spp);
1762 	}
1763 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1764 	return(error);
1765 }
1766 
1767 /*
1768  * Drop the reference count on the the socket and XXX release the SX lock in
1769  * the future.  The last reference closes the socket.
1770  */
1771 void
1772 fputsock(struct socket *so)
1773 {
1774 	sorele(so);
1775 }
1776 
1777 /*
1778  * Drop reference on struct file passed in, may call closef if the
1779  * reference hits zero.
1780  * Expects struct file locked, and will unlock it.
1781  */
1782 int
1783 fdrop_locked(fp, td)
1784 	struct file *fp;
1785 	struct thread *td;
1786 {
1787 	struct flock lf;
1788 	struct vnode *vp;
1789 	int error;
1790 
1791 	FILE_LOCK_ASSERT(fp, MA_OWNED);
1792 
1793 	if (--fp->f_count > 0) {
1794 		FILE_UNLOCK(fp);
1795 		return (0);
1796 	}
1797 	mtx_lock(&Giant);
1798 	if (fp->f_count < 0)
1799 		panic("fdrop: count < 0");
1800 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1801 		lf.l_whence = SEEK_SET;
1802 		lf.l_start = 0;
1803 		lf.l_len = 0;
1804 		lf.l_type = F_UNLCK;
1805 		vp = (struct vnode *)fp->f_data;
1806 		FILE_UNLOCK(fp);
1807 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1808 	} else
1809 		FILE_UNLOCK(fp);
1810 	if (fp->f_ops != &badfileops)
1811 		error = fo_close(fp, td);
1812 	else
1813 		error = 0;
1814 	ffree(fp);
1815 	mtx_unlock(&Giant);
1816 	return (error);
1817 }
1818 
1819 /*
1820  * Apply an advisory lock on a file descriptor.
1821  *
1822  * Just attempt to get a record lock of the requested type on
1823  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1824  */
1825 #ifndef _SYS_SYSPROTO_H_
1826 struct flock_args {
1827 	int	fd;
1828 	int	how;
1829 };
1830 #endif
1831 /*
1832  * MPSAFE
1833  */
1834 /* ARGSUSED */
1835 int
1836 flock(td, uap)
1837 	struct thread *td;
1838 	register struct flock_args *uap;
1839 {
1840 	struct file *fp;
1841 	struct vnode *vp;
1842 	struct flock lf;
1843 	int error;
1844 
1845 	if ((error = fget(td, uap->fd, &fp)) != 0)
1846 		return (error);
1847 	if (fp->f_type != DTYPE_VNODE) {
1848 		fdrop(fp, td);
1849 		return (EOPNOTSUPP);
1850 	}
1851 
1852 	mtx_lock(&Giant);
1853 	vp = (struct vnode *)fp->f_data;
1854 	lf.l_whence = SEEK_SET;
1855 	lf.l_start = 0;
1856 	lf.l_len = 0;
1857 	if (uap->how & LOCK_UN) {
1858 		lf.l_type = F_UNLCK;
1859 		FILE_LOCK(fp);
1860 		fp->f_flag &= ~FHASLOCK;
1861 		FILE_UNLOCK(fp);
1862 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1863 		goto done2;
1864 	}
1865 	if (uap->how & LOCK_EX)
1866 		lf.l_type = F_WRLCK;
1867 	else if (uap->how & LOCK_SH)
1868 		lf.l_type = F_RDLCK;
1869 	else {
1870 		error = EBADF;
1871 		goto done2;
1872 	}
1873 	FILE_LOCK(fp);
1874 	fp->f_flag |= FHASLOCK;
1875 	FILE_UNLOCK(fp);
1876 	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1877 	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1878 done2:
1879 	fdrop(fp, td);
1880 	mtx_unlock(&Giant);
1881 	return (error);
1882 }
1883 
1884 /*
1885  * File Descriptor pseudo-device driver (/dev/fd/).
1886  *
1887  * Opening minor device N dup()s the file (if any) connected to file
1888  * descriptor N belonging to the calling process.  Note that this driver
1889  * consists of only the ``open()'' routine, because all subsequent
1890  * references to this file will be direct to the other driver.
1891  */
1892 /* ARGSUSED */
1893 static int
1894 fdopen(dev, mode, type, td)
1895 	dev_t dev;
1896 	int mode, type;
1897 	struct thread *td;
1898 {
1899 
1900 	/*
1901 	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1902 	 * the file descriptor being sought for duplication. The error
1903 	 * return ensures that the vnode for this device will be released
1904 	 * by vn_open. Open will detect this special error and take the
1905 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1906 	 * will simply report the error.
1907 	 */
1908 	td->td_dupfd = dev2unit(dev);
1909 	return (ENODEV);
1910 }
1911 
1912 /*
1913  * Duplicate the specified descriptor to a free descriptor.
1914  */
1915 int
1916 dupfdopen(td, fdp, indx, dfd, mode, error)
1917 	struct thread *td;
1918 	struct filedesc *fdp;
1919 	int indx, dfd;
1920 	int mode;
1921 	int error;
1922 {
1923 	register struct file *wfp;
1924 	struct file *fp;
1925 
1926 	/*
1927 	 * If the to-be-dup'd fd number is greater than the allowed number
1928 	 * of file descriptors, or the fd to be dup'd has already been
1929 	 * closed, then reject.
1930 	 */
1931 	FILEDESC_LOCK(fdp);
1932 	if ((u_int)dfd >= fdp->fd_nfiles ||
1933 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1934 		FILEDESC_UNLOCK(fdp);
1935 		return (EBADF);
1936 	}
1937 
1938 	/*
1939 	 * There are two cases of interest here.
1940 	 *
1941 	 * For ENODEV simply dup (dfd) to file descriptor
1942 	 * (indx) and return.
1943 	 *
1944 	 * For ENXIO steal away the file structure from (dfd) and
1945 	 * store it in (indx).  (dfd) is effectively closed by
1946 	 * this operation.
1947 	 *
1948 	 * Any other error code is just returned.
1949 	 */
1950 	switch (error) {
1951 	case ENODEV:
1952 		/*
1953 		 * Check that the mode the file is being opened for is a
1954 		 * subset of the mode of the existing descriptor.
1955 		 */
1956 		FILE_LOCK(wfp);
1957 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1958 			FILE_UNLOCK(wfp);
1959 			FILEDESC_UNLOCK(fdp);
1960 			return (EACCES);
1961 		}
1962 		fp = fdp->fd_ofiles[indx];
1963 #if 0
1964 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1965 			(void) munmapfd(td, indx);
1966 #endif
1967 		fdp->fd_ofiles[indx] = wfp;
1968 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1969 		fhold_locked(wfp);
1970 		FILE_UNLOCK(wfp);
1971 		if (indx > fdp->fd_lastfile)
1972 			fdp->fd_lastfile = indx;
1973 		if (fp != NULL)
1974 			FILE_LOCK(fp);
1975 		FILEDESC_UNLOCK(fdp);
1976 		/*
1977 		 * we now own the reference to fp that the ofiles[] array
1978 		 * used to own.  Release it.
1979 		 */
1980 		if (fp != NULL)
1981 			fdrop_locked(fp, td);
1982 		return (0);
1983 
1984 	case ENXIO:
1985 		/*
1986 		 * Steal away the file pointer from dfd, and stuff it into indx.
1987 		 */
1988 		fp = fdp->fd_ofiles[indx];
1989 #if 0
1990 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1991 			(void) munmapfd(td, indx);
1992 #endif
1993 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1994 		fdp->fd_ofiles[dfd] = NULL;
1995 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1996 		fdp->fd_ofileflags[dfd] = 0;
1997 
1998 		/*
1999 		 * Complete the clean up of the filedesc structure by
2000 		 * recomputing the various hints.
2001 		 */
2002 		if (indx > fdp->fd_lastfile) {
2003 			fdp->fd_lastfile = indx;
2004 		} else {
2005 			while (fdp->fd_lastfile > 0 &&
2006 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2007 				fdp->fd_lastfile--;
2008 			}
2009 			if (dfd < fdp->fd_freefile)
2010 				fdp->fd_freefile = dfd;
2011 		}
2012 		if (fp != NULL)
2013 			FILE_LOCK(fp);
2014 		FILEDESC_UNLOCK(fdp);
2015 
2016 		/*
2017 		 * we now own the reference to fp that the ofiles[] array
2018 		 * used to own.  Release it.
2019 		 */
2020 		if (fp != NULL)
2021 			fdrop_locked(fp, td);
2022 		return (0);
2023 
2024 	default:
2025 		FILEDESC_UNLOCK(fdp);
2026 		return (error);
2027 	}
2028 	/* NOTREACHED */
2029 }
2030 
2031 /*
2032  * Get file structures.
2033  */
2034 static int
2035 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2036 {
2037 	int error;
2038 	struct file *fp;
2039 
2040 	sx_slock(&filelist_lock);
2041 	if (!req->oldptr) {
2042 		/*
2043 		 * overestimate by 10 files
2044 		 */
2045 		error = SYSCTL_OUT(req, 0, sizeof(filehead) +
2046 				   (nfiles + 10) * sizeof(struct file));
2047 		sx_sunlock(&filelist_lock);
2048 		return (error);
2049 	}
2050 
2051 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
2052 	if (error) {
2053 		sx_sunlock(&filelist_lock);
2054 		return (error);
2055 	}
2056 
2057 	/*
2058 	 * followed by an array of file structures
2059 	 */
2060 	LIST_FOREACH(fp, &filehead, f_list) {
2061 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
2062 		if (error) {
2063 			sx_sunlock(&filelist_lock);
2064 			return (error);
2065 		}
2066 	}
2067 	sx_sunlock(&filelist_lock);
2068 	return (0);
2069 }
2070 
2071 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2072     0, 0, sysctl_kern_file, "S,file", "Entire file table");
2073 
2074 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2075     &maxfilesperproc, 0, "Maximum files allowed open per process");
2076 
2077 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2078     &maxfiles, 0, "Maximum number of files");
2079 
2080 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2081     &nfiles, 0, "System-wide number of open files");
2082 
2083 static void
2084 fildesc_drvinit(void *unused)
2085 {
2086 	dev_t dev;
2087 
2088 	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
2089 	make_dev_alias(dev, "stdin");
2090 	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
2091 	make_dev_alias(dev, "stdout");
2092 	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
2093 	make_dev_alias(dev, "stderr");
2094 	if (!devfs_present) {
2095 		int fd;
2096 
2097 		for (fd = 3; fd < NUMFDESC; fd++)
2098 			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
2099 			    "fd/%d", fd);
2100 	}
2101 }
2102 
2103 struct fileops badfileops = {
2104 	badfo_readwrite,
2105 	badfo_readwrite,
2106 	badfo_ioctl,
2107 	badfo_poll,
2108 	badfo_kqfilter,
2109 	badfo_stat,
2110 	badfo_close
2111 };
2112 
2113 static int
2114 badfo_readwrite(fp, uio, cred, flags, td)
2115 	struct file *fp;
2116 	struct uio *uio;
2117 	struct ucred *cred;
2118 	struct thread *td;
2119 	int flags;
2120 {
2121 
2122 	return (EBADF);
2123 }
2124 
2125 static int
2126 badfo_ioctl(fp, com, data, td)
2127 	struct file *fp;
2128 	u_long com;
2129 	caddr_t data;
2130 	struct thread *td;
2131 {
2132 
2133 	return (EBADF);
2134 }
2135 
2136 static int
2137 badfo_poll(fp, events, cred, td)
2138 	struct file *fp;
2139 	int events;
2140 	struct ucred *cred;
2141 	struct thread *td;
2142 {
2143 
2144 	return (0);
2145 }
2146 
2147 static int
2148 badfo_kqfilter(fp, kn)
2149 	struct file *fp;
2150 	struct knote *kn;
2151 {
2152 
2153 	return (0);
2154 }
2155 
2156 static int
2157 badfo_stat(fp, sb, td)
2158 	struct file *fp;
2159 	struct stat *sb;
2160 	struct thread *td;
2161 {
2162 
2163 	return (EBADF);
2164 }
2165 
2166 static int
2167 badfo_close(fp, td)
2168 	struct file *fp;
2169 	struct thread *td;
2170 {
2171 
2172 	return (EBADF);
2173 }
2174 
2175 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2176 					fildesc_drvinit,NULL)
2177 
2178 static void filelistinit(void *);
2179 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2180 
2181 /* ARGSUSED*/
2182 static void
2183 filelistinit(dummy)
2184 	void *dummy;
2185 {
2186 	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2187 	    NULL, NULL, UMA_ALIGN_PTR, 0);
2188 
2189 	sx_init(&filelist_lock, "filelist lock");
2190 	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2191 }
2192