xref: /freebsd/sys/kern/kern_descrip.c (revision 3ff369fed2a08f32dda232c10470b949bef9489f)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mutex.h>
49 #include <sys/sysproto.h>
50 #include <sys/conf.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/sysctl.h>
54 #include <sys/vnode.h>
55 #include <sys/proc.h>
56 #include <sys/namei.h>
57 #include <sys/file.h>
58 #include <sys/stat.h>
59 #include <sys/filio.h>
60 #include <sys/fcntl.h>
61 #include <sys/unistd.h>
62 #include <sys/resourcevar.h>
63 #include <sys/event.h>
64 #include <sys/sx.h>
65 #include <sys/socketvar.h>
66 #include <sys/signalvar.h>
67 
68 #include <machine/limits.h>
69 
70 #include <vm/vm.h>
71 #include <vm/vm_extern.h>
72 #include <vm/uma.h>
73 
74 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
75 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
76 
77 uma_zone_t file_zone;
78 
79 static	 d_open_t  fdopen;
80 #define NUMFDESC 64
81 
82 #define CDEV_MAJOR 22
83 static struct cdevsw fildesc_cdevsw = {
84 	/* open */	fdopen,
85 	/* close */	noclose,
86 	/* read */	noread,
87 	/* write */	nowrite,
88 	/* ioctl */	noioctl,
89 	/* poll */	nopoll,
90 	/* mmap */	nommap,
91 	/* strategy */	nostrategy,
92 	/* name */	"FD",
93 	/* maj */	CDEV_MAJOR,
94 	/* dump */	nodump,
95 	/* psize */	nopsize,
96 	/* flags */	0,
97 };
98 
99 static int do_dup(struct filedesc *fdp, int old, int new, register_t *retval, struct thread *td);
100 static int badfo_readwrite(struct file *fp, struct uio *uio,
101     struct ucred *cred, int flags, struct thread *td);
102 static int badfo_ioctl(struct file *fp, u_long com, caddr_t data,
103     struct thread *td);
104 static int badfo_poll(struct file *fp, int events,
105     struct ucred *cred, struct thread *td);
106 static int badfo_kqfilter(struct file *fp, struct knote *kn);
107 static int badfo_stat(struct file *fp, struct stat *sb, struct thread *td);
108 static int badfo_close(struct file *fp, struct thread *td);
109 
110 /*
111  * Descriptor management.
112  */
113 struct filelist filehead;	/* head of list of open files */
114 int nfiles;			/* actual number of open files */
115 extern int cmask;
116 struct sx filelist_lock;	/* sx to protect filelist */
117 struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
118 
119 /*
120  * System calls on descriptors.
121  */
122 #ifndef _SYS_SYSPROTO_H_
123 struct getdtablesize_args {
124 	int	dummy;
125 };
126 #endif
127 /*
128  * MPSAFE
129  */
130 /* ARGSUSED */
131 int
132 getdtablesize(td, uap)
133 	struct thread *td;
134 	struct getdtablesize_args *uap;
135 {
136 	struct proc *p = td->td_proc;
137 
138 	mtx_lock(&Giant);
139 	td->td_retval[0] =
140 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
141 	mtx_unlock(&Giant);
142 	return (0);
143 }
144 
145 /*
146  * Duplicate a file descriptor to a particular value.
147  *
148  * note: keep in mind that a potential race condition exists when closing
149  * descriptors from a shared descriptor table (via rfork).
150  */
151 #ifndef _SYS_SYSPROTO_H_
152 struct dup2_args {
153 	u_int	from;
154 	u_int	to;
155 };
156 #endif
157 /*
158  * MPSAFE
159  */
160 /* ARGSUSED */
161 int
162 dup2(td, uap)
163 	struct thread *td;
164 	struct dup2_args *uap;
165 {
166 	struct proc *p = td->td_proc;
167 	register struct filedesc *fdp = td->td_proc->p_fd;
168 	register u_int old = uap->from, new = uap->to;
169 	int i, error;
170 
171 	FILEDESC_LOCK(fdp);
172 retry:
173 	if (old >= fdp->fd_nfiles ||
174 	    fdp->fd_ofiles[old] == NULL ||
175 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
176 	    new >= maxfilesperproc) {
177 		FILEDESC_UNLOCK(fdp);
178 		return (EBADF);
179 	}
180 	if (old == new) {
181 		td->td_retval[0] = new;
182 		FILEDESC_UNLOCK(fdp);
183 		return (0);
184 	}
185 	if (new >= fdp->fd_nfiles) {
186 		if ((error = fdalloc(td, new, &i))) {
187 			FILEDESC_UNLOCK(fdp);
188 			return (error);
189 		}
190 		/*
191 		 * fdalloc() may block, retest everything.
192 		 */
193 		goto retry;
194 	}
195 	error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
196 	return(error);
197 }
198 
199 /*
200  * Duplicate a file descriptor.
201  */
202 #ifndef _SYS_SYSPROTO_H_
203 struct dup_args {
204 	u_int	fd;
205 };
206 #endif
207 /*
208  * MPSAFE
209  */
210 /* ARGSUSED */
211 int
212 dup(td, uap)
213 	struct thread *td;
214 	struct dup_args *uap;
215 {
216 	register struct filedesc *fdp;
217 	u_int old;
218 	int new, error;
219 
220 	old = uap->fd;
221 	fdp = td->td_proc->p_fd;
222 	FILEDESC_LOCK(fdp);
223 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
224 		FILEDESC_UNLOCK(fdp);
225 		return (EBADF);
226 	}
227 	if ((error = fdalloc(td, 0, &new))) {
228 		FILEDESC_UNLOCK(fdp);
229 		return (error);
230 	}
231 	error = do_dup(fdp, (int)old, new, td->td_retval, td);
232 	return (error);
233 }
234 
235 /*
236  * The file control system call.
237  */
238 #ifndef _SYS_SYSPROTO_H_
239 struct fcntl_args {
240 	int	fd;
241 	int	cmd;
242 	long	arg;
243 };
244 #endif
245 /*
246  * MPSAFE
247  */
248 /* ARGSUSED */
249 int
250 fcntl(td, uap)
251 	struct thread *td;
252 	register struct fcntl_args *uap;
253 {
254 	register struct proc *p = td->td_proc;
255 	register struct filedesc *fdp;
256 	register struct file *fp;
257 	register char *pop;
258 	struct vnode *vp;
259 	int i, tmp, error = 0, flg = F_POSIX;
260 	struct flock fl;
261 	u_int newmin;
262 	struct proc *leaderp;
263 
264 	mtx_lock(&Giant);
265 
266 	fdp = p->p_fd;
267 	FILEDESC_LOCK(fdp);
268 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
269 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
270 		FILEDESC_UNLOCK(fdp);
271 		error = EBADF;
272 		goto done2;
273 	}
274 	pop = &fdp->fd_ofileflags[uap->fd];
275 
276 	switch (uap->cmd) {
277 	case F_DUPFD:
278 		newmin = uap->arg;
279 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
280 		    newmin >= maxfilesperproc) {
281 			FILEDESC_UNLOCK(fdp);
282 			error = EINVAL;
283 			break;
284 		}
285 		if ((error = fdalloc(td, newmin, &i))) {
286 			FILEDESC_UNLOCK(fdp);
287 			break;
288 		}
289 		error = do_dup(fdp, uap->fd, i, td->td_retval, td);
290 		break;
291 
292 	case F_GETFD:
293 		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
294 		FILEDESC_UNLOCK(fdp);
295 		break;
296 
297 	case F_SETFD:
298 		*pop = (*pop &~ UF_EXCLOSE) |
299 		    (uap->arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
300 		FILEDESC_UNLOCK(fdp);
301 		break;
302 
303 	case F_GETFL:
304 		FILE_LOCK(fp);
305 		FILEDESC_UNLOCK(fdp);
306 		td->td_retval[0] = OFLAGS(fp->f_flag);
307 		FILE_UNLOCK(fp);
308 		break;
309 
310 	case F_SETFL:
311 		fhold(fp);
312 		FILEDESC_UNLOCK(fdp);
313 		fp->f_flag &= ~FCNTLFLAGS;
314 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
315 		tmp = fp->f_flag & FNONBLOCK;
316 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
317 		if (error) {
318 			fdrop(fp, td);
319 			break;
320 		}
321 		tmp = fp->f_flag & FASYNC;
322 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
323 		if (!error) {
324 			fdrop(fp, td);
325 			break;
326 		}
327 		fp->f_flag &= ~FNONBLOCK;
328 		tmp = 0;
329 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
330 		fdrop(fp, td);
331 		break;
332 
333 	case F_GETOWN:
334 		fhold(fp);
335 		FILEDESC_UNLOCK(fdp);
336 		error = fo_ioctl(fp, FIOGETOWN, (caddr_t)td->td_retval, td);
337 		fdrop(fp, td);
338 		break;
339 
340 	case F_SETOWN:
341 		fhold(fp);
342 		FILEDESC_UNLOCK(fdp);
343 		error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td);
344 		fdrop(fp, td);
345 		break;
346 
347 	case F_SETLKW:
348 		flg |= F_WAIT;
349 		/* Fall into F_SETLK */
350 
351 	case F_SETLK:
352 		if (fp->f_type != DTYPE_VNODE) {
353 			FILEDESC_UNLOCK(fdp);
354 			error = EBADF;
355 			break;
356 		}
357 		vp = (struct vnode *)fp->f_data;
358 		/*
359 		 * copyin/lockop may block
360 		 */
361 		fhold(fp);
362 		FILEDESC_UNLOCK(fdp);
363 		vp = (struct vnode *)fp->f_data;
364 
365 		/* Copy in the lock structure */
366 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
367 		    sizeof(fl));
368 		if (error) {
369 			fdrop(fp, td);
370 			break;
371 		}
372 		if (fl.l_whence == SEEK_CUR) {
373 			if (fp->f_offset < 0 ||
374 			    (fl.l_start > 0 &&
375 			     fp->f_offset > OFF_MAX - fl.l_start)) {
376 				fdrop(fp, td);
377 				error = EOVERFLOW;
378 				break;
379 			}
380 			fl.l_start += fp->f_offset;
381 		}
382 
383 		switch (fl.l_type) {
384 		case F_RDLCK:
385 			if ((fp->f_flag & FREAD) == 0) {
386 				error = EBADF;
387 				break;
388 			}
389 			PROC_LOCK(p);
390 			p->p_flag |= P_ADVLOCK;
391 			leaderp = p->p_leader;
392 			PROC_UNLOCK(p);
393 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
394 			    &fl, flg);
395 			break;
396 		case F_WRLCK:
397 			if ((fp->f_flag & FWRITE) == 0) {
398 				error = EBADF;
399 				break;
400 			}
401 			PROC_LOCK(p);
402 			p->p_flag |= P_ADVLOCK;
403 			leaderp = p->p_leader;
404 			PROC_UNLOCK(p);
405 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
406 			    &fl, flg);
407 			break;
408 		case F_UNLCK:
409 			PROC_LOCK(p);
410 			leaderp = p->p_leader;
411 			PROC_UNLOCK(p);
412 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK,
413 				&fl, F_POSIX);
414 			break;
415 		default:
416 			error = EINVAL;
417 			break;
418 		}
419 		fdrop(fp, td);
420 		break;
421 
422 	case F_GETLK:
423 		if (fp->f_type != DTYPE_VNODE) {
424 			FILEDESC_UNLOCK(fdp);
425 			error = EBADF;
426 			break;
427 		}
428 		vp = (struct vnode *)fp->f_data;
429 		/*
430 		 * copyin/lockop may block
431 		 */
432 		fhold(fp);
433 		FILEDESC_UNLOCK(fdp);
434 		vp = (struct vnode *)fp->f_data;
435 
436 		/* Copy in the lock structure */
437 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
438 		    sizeof(fl));
439 		if (error) {
440 			fdrop(fp, td);
441 			break;
442 		}
443 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
444 		    fl.l_type != F_UNLCK) {
445 			fdrop(fp, td);
446 			error = EINVAL;
447 			break;
448 		}
449 		if (fl.l_whence == SEEK_CUR) {
450 			if ((fl.l_start > 0 &&
451 			     fp->f_offset > OFF_MAX - fl.l_start) ||
452 			    (fl.l_start < 0 &&
453 			     fp->f_offset < OFF_MIN - fl.l_start)) {
454 				fdrop(fp, td);
455 				error = EOVERFLOW;
456 				break;
457 			}
458 			fl.l_start += fp->f_offset;
459 		}
460 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
461 			    &fl, F_POSIX);
462 		fdrop(fp, td);
463 		if (error == 0) {
464 			error = copyout((caddr_t)&fl,
465 				    (caddr_t)(intptr_t)uap->arg, sizeof(fl));
466 		}
467 		break;
468 	default:
469 		FILEDESC_UNLOCK(fdp);
470 		error = EINVAL;
471 		break;
472 	}
473 done2:
474 	mtx_unlock(&Giant);
475 	return (error);
476 }
477 
478 /*
479  * Common code for dup, dup2, and fcntl(F_DUPFD).
480  * filedesc must be locked, but will be unlocked as a side effect.
481  */
482 static int
483 do_dup(fdp, old, new, retval, td)
484 	register struct filedesc *fdp;
485 	register int old, new;
486 	register_t *retval;
487 	struct thread *td;
488 {
489 	struct file *fp;
490 	struct file *delfp;
491 
492 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
493 
494 	/*
495 	 * Save info on the descriptor being overwritten.  We have
496 	 * to do the unmap now, but we cannot close it without
497 	 * introducing an ownership race for the slot.
498 	 */
499 	delfp = fdp->fd_ofiles[new];
500 #if 0
501 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
502 		(void) munmapfd(td, new);
503 #endif
504 
505 	/*
506 	 * Duplicate the source descriptor, update lastfile
507 	 */
508 	fp = fdp->fd_ofiles[old];
509 	fdp->fd_ofiles[new] = fp;
510 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
511 	fhold(fp);
512 	if (new > fdp->fd_lastfile)
513 		fdp->fd_lastfile = new;
514 	*retval = new;
515 
516 	FILEDESC_UNLOCK(fdp);
517 
518 	/*
519 	 * If we dup'd over a valid file, we now own the reference to it
520 	 * and must dispose of it using closef() semantics (as if a
521 	 * close() were performed on it).
522 	 */
523 	if (delfp) {
524 		mtx_lock(&Giant);
525 		(void) closef(delfp, td);
526 		mtx_unlock(&Giant);
527 	}
528 	return (0);
529 }
530 
531 /*
532  * If sigio is on the list associated with a process or process group,
533  * disable signalling from the device, remove sigio from the list and
534  * free sigio.
535  */
536 void
537 funsetown(sigiop)
538 	struct sigio **sigiop;
539 {
540 	struct sigio *sigio;
541 
542 	SIGIO_LOCK();
543 	sigio = *sigiop;
544 	if (sigio == NULL) {
545 		SIGIO_UNLOCK();
546 		return;
547 	}
548 	*(sigio->sio_myref) = NULL;
549 	if ((sigio)->sio_pgid < 0) {
550 		struct pgrp *pg = (sigio)->sio_pgrp;
551 		PGRP_LOCK(pg);
552 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
553 			     sigio, sio_pgsigio);
554 		PGRP_UNLOCK(pg);
555 	} else {
556 		struct proc *p = (sigio)->sio_proc;
557 		PROC_LOCK(p);
558 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
559 			     sigio, sio_pgsigio);
560 		PROC_UNLOCK(p);
561 	}
562 	SIGIO_UNLOCK();
563 	crfree(sigio->sio_ucred);
564 	FREE(sigio, M_SIGIO);
565 }
566 
567 /*
568  * Free a list of sigio structures.
569  * We only need to lock the SIGIO_LOCK because we have made ourselves
570  * inaccessable to callers of fsetown and therefore do not need to lock
571  * the proc or pgrp struct for the list manipulation.
572  */
573 void
574 funsetownlst(sigiolst)
575 	struct sigiolst *sigiolst;
576 {
577 	struct sigio *sigio;
578 	struct proc *p;
579 	struct pgrp *pg;
580 
581 	sigio = SLIST_FIRST(sigiolst);
582 	if (sigio == NULL)
583 		return;
584 
585 	p = NULL;
586 	pg = NULL;
587 
588 	/*
589 	 * Every entry of the list should belong
590 	 * to a single proc or pgrp.
591 	 */
592 	if (sigio->sio_pgid < 0) {
593 		pg = sigio->sio_pgrp;
594 		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
595 	} else /* if (sigio->sio_pgid > 0) */ {
596 		p = sigio->sio_proc;
597 		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
598 	}
599 
600 	SIGIO_LOCK();
601 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
602 		*(sigio->sio_myref) = NULL;
603 		if (pg != NULL) {
604 			KASSERT(sigio->sio_pgid < 0,
605 			    ("Proc sigio in pgrp sigio list"));
606 			KASSERT(sigio->sio_pgrp == pg,
607 			    ("Bogus pgrp in sigio list"));
608 			PGRP_LOCK(pg);
609 			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
610 			    sio_pgsigio);
611 			PGRP_UNLOCK(pg);
612 		} else /* if (p != NULL) */ {
613 			KASSERT(sigio->sio_pgid > 0,
614 			    ("Pgrp sigio in proc sigio list"));
615 			KASSERT(sigio->sio_proc == p,
616 			    ("Bogus proc in sigio list"));
617 			PROC_LOCK(p);
618 			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
619 			    sio_pgsigio);
620 			PROC_UNLOCK(p);
621 		}
622 		SIGIO_UNLOCK();
623 		crfree(sigio->sio_ucred);
624 		FREE(sigio, M_SIGIO);
625 		SIGIO_LOCK();
626 	}
627 	SIGIO_UNLOCK();
628 }
629 
630 /*
631  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
632  *
633  * After permission checking, add a sigio structure to the sigio list for
634  * the process or process group.
635  */
636 int
637 fsetown(pgid, sigiop)
638 	pid_t pgid;
639 	struct sigio **sigiop;
640 {
641 	struct proc *proc;
642 	struct pgrp *pgrp;
643 	struct sigio *sigio;
644 	int ret;
645 
646 	if (pgid == 0) {
647 		funsetown(sigiop);
648 		return (0);
649 	}
650 
651 	ret = 0;
652 
653 	/* Allocate and fill in the new sigio out of locks. */
654 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
655 	sigio->sio_pgid = pgid;
656 	sigio->sio_ucred = crhold(curthread->td_ucred);
657 	sigio->sio_myref = sigiop;
658 
659 	sx_slock(&proctree_lock);
660 	if (pgid > 0) {
661 		proc = pfind(pgid);
662 		if (proc == NULL) {
663 			ret = ESRCH;
664 			goto fail;
665 		}
666 
667 		/*
668 		 * Policy - Don't allow a process to FSETOWN a process
669 		 * in another session.
670 		 *
671 		 * Remove this test to allow maximum flexibility or
672 		 * restrict FSETOWN to the current process or process
673 		 * group for maximum safety.
674 		 */
675 		PROC_UNLOCK(proc);
676 		if (proc->p_session != curthread->td_proc->p_session) {
677 			ret = EPERM;
678 			goto fail;
679 		}
680 
681 		pgrp = NULL;
682 	} else /* if (pgid < 0) */ {
683 		pgrp = pgfind(-pgid);
684 		if (pgrp == NULL) {
685 			ret = ESRCH;
686 			goto fail;
687 		}
688 		PGRP_UNLOCK(pgrp);
689 
690 		/*
691 		 * Policy - Don't allow a process to FSETOWN a process
692 		 * in another session.
693 		 *
694 		 * Remove this test to allow maximum flexibility or
695 		 * restrict FSETOWN to the current process or process
696 		 * group for maximum safety.
697 		 */
698 		if (pgrp->pg_session != curthread->td_proc->p_session) {
699 			ret = EPERM;
700 			goto fail;
701 		}
702 
703 		proc = NULL;
704 	}
705 	funsetown(sigiop);
706 	if (pgid > 0) {
707 		PROC_LOCK(proc);
708 		/*
709 		 * since funsetownlst() is called without the proctree
710 		 * locked we need to check for P_WEXIT.
711 		 * XXX: is ESRCH correct?
712 		 */
713 		if ((proc->p_flag & P_WEXIT) != 0) {
714 			PROC_UNLOCK(proc);
715 			ret = ESRCH;
716 			goto fail;
717 		}
718 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
719 		sigio->sio_proc = proc;
720 		PROC_UNLOCK(proc);
721 	} else {
722 		PGRP_LOCK(pgrp);
723 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
724 		sigio->sio_pgrp = pgrp;
725 		PGRP_UNLOCK(pgrp);
726 	}
727 	sx_sunlock(&proctree_lock);
728 	SIGIO_LOCK();
729 	*sigiop = sigio;
730 	SIGIO_UNLOCK();
731 	return (0);
732 
733 fail:
734 	sx_sunlock(&proctree_lock);
735 	crfree(sigio->sio_ucred);
736 	FREE(sigio, M_SIGIO);
737 	return (ret);
738 }
739 
740 /*
741  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
742  */
743 pid_t
744 fgetown(sigio)
745 	struct sigio *sigio;
746 {
747 	return (sigio != NULL ? sigio->sio_pgid : 0);
748 }
749 
750 /*
751  * Close a file descriptor.
752  */
753 #ifndef _SYS_SYSPROTO_H_
754 struct close_args {
755         int     fd;
756 };
757 #endif
758 /*
759  * MPSAFE
760  */
761 /* ARGSUSED */
762 int
763 close(td, uap)
764 	struct thread *td;
765 	struct close_args *uap;
766 {
767 	register struct filedesc *fdp;
768 	register struct file *fp;
769 	register int fd = uap->fd;
770 	int error = 0;
771 
772 	mtx_lock(&Giant);
773 	fdp = td->td_proc->p_fd;
774 	FILEDESC_LOCK(fdp);
775 	if ((unsigned)fd >= fdp->fd_nfiles ||
776 	    (fp = fdp->fd_ofiles[fd]) == NULL) {
777 		FILEDESC_UNLOCK(fdp);
778 		error = EBADF;
779 		goto done2;
780 	}
781 #if 0
782 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
783 		(void) munmapfd(td, fd);
784 #endif
785 	fdp->fd_ofiles[fd] = NULL;
786 	fdp->fd_ofileflags[fd] = 0;
787 
788 	/*
789 	 * we now hold the fp reference that used to be owned by the descriptor
790 	 * array.
791 	 */
792 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
793 		fdp->fd_lastfile--;
794 	if (fd < fdp->fd_freefile)
795 		fdp->fd_freefile = fd;
796 	if (fd < fdp->fd_knlistsize) {
797 		FILEDESC_UNLOCK(fdp);
798 		knote_fdclose(td, fd);
799 	} else
800 		FILEDESC_UNLOCK(fdp);
801 
802 	error = closef(fp, td);
803 done2:
804 	mtx_unlock(&Giant);
805 	return(error);
806 }
807 
808 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
809 /*
810  * Return status information about a file descriptor.
811  */
812 #ifndef _SYS_SYSPROTO_H_
813 struct ofstat_args {
814 	int	fd;
815 	struct	ostat *sb;
816 };
817 #endif
818 /*
819  * MPSAFE
820  */
821 /* ARGSUSED */
822 int
823 ofstat(td, uap)
824 	struct thread *td;
825 	register struct ofstat_args *uap;
826 {
827 	struct file *fp;
828 	struct stat ub;
829 	struct ostat oub;
830 	int error;
831 
832 	mtx_lock(&Giant);
833 	if ((error = fget(td, uap->fd, &fp)) != 0)
834 		goto done2;
835 	error = fo_stat(fp, &ub, td);
836 	if (error == 0) {
837 		cvtstat(&ub, &oub);
838 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
839 	}
840 	fdrop(fp, td);
841 done2:
842 	mtx_unlock(&Giant);
843 	return (error);
844 }
845 #endif /* COMPAT_43 || COMPAT_SUNOS */
846 
847 /*
848  * Return status information about a file descriptor.
849  */
850 #ifndef _SYS_SYSPROTO_H_
851 struct fstat_args {
852 	int	fd;
853 	struct	stat *sb;
854 };
855 #endif
856 /*
857  * MPSAFE
858  */
859 /* ARGSUSED */
860 int
861 fstat(td, uap)
862 	struct thread *td;
863 	struct fstat_args *uap;
864 {
865 	struct file *fp;
866 	struct stat ub;
867 	int error;
868 
869 	mtx_lock(&Giant);
870 	if ((error = fget(td, uap->fd, &fp)) != 0)
871 		goto done2;
872 	error = fo_stat(fp, &ub, td);
873 	if (error == 0)
874 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
875 	fdrop(fp, td);
876 done2:
877 	mtx_unlock(&Giant);
878 	return (error);
879 }
880 
881 /*
882  * Return status information about a file descriptor.
883  */
884 #ifndef _SYS_SYSPROTO_H_
885 struct nfstat_args {
886 	int	fd;
887 	struct	nstat *sb;
888 };
889 #endif
890 /*
891  * MPSAFE
892  */
893 /* ARGSUSED */
894 int
895 nfstat(td, uap)
896 	struct thread *td;
897 	register struct nfstat_args *uap;
898 {
899 	struct file *fp;
900 	struct stat ub;
901 	struct nstat nub;
902 	int error;
903 
904 	mtx_lock(&Giant);
905 	if ((error = fget(td, uap->fd, &fp)) != 0)
906 		goto done2;
907 	error = fo_stat(fp, &ub, td);
908 	if (error == 0) {
909 		cvtnstat(&ub, &nub);
910 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
911 	}
912 	fdrop(fp, td);
913 done2:
914 	mtx_unlock(&Giant);
915 	return (error);
916 }
917 
918 /*
919  * Return pathconf information about a file descriptor.
920  */
921 #ifndef _SYS_SYSPROTO_H_
922 struct fpathconf_args {
923 	int	fd;
924 	int	name;
925 };
926 #endif
927 /*
928  * MPSAFE
929  */
930 /* ARGSUSED */
931 int
932 fpathconf(td, uap)
933 	struct thread *td;
934 	register struct fpathconf_args *uap;
935 {
936 	struct file *fp;
937 	struct vnode *vp;
938 	int error;
939 
940 	if ((error = fget(td, uap->fd, &fp)) != 0)
941 		return (error);
942 
943 	switch (fp->f_type) {
944 	case DTYPE_PIPE:
945 	case DTYPE_SOCKET:
946 		if (uap->name != _PC_PIPE_BUF) {
947 			error = EINVAL;
948 		} else {
949 			td->td_retval[0] = PIPE_BUF;
950 			error = 0;
951 		}
952 		break;
953 	case DTYPE_FIFO:
954 	case DTYPE_VNODE:
955 		vp = (struct vnode *)fp->f_data;
956 		mtx_lock(&Giant);
957 		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
958 		mtx_unlock(&Giant);
959 		break;
960 	default:
961 		error = EOPNOTSUPP;
962 		break;
963 	}
964 	fdrop(fp, td);
965 	return(error);
966 }
967 
968 /*
969  * Allocate a file descriptor for the process.
970  */
971 static int fdexpand;
972 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
973 
974 int
975 fdalloc(td, want, result)
976 	struct thread *td;
977 	int want;
978 	int *result;
979 {
980 	struct proc *p = td->td_proc;
981 	register struct filedesc *fdp = td->td_proc->p_fd;
982 	register int i;
983 	int lim, last, nfiles;
984 	struct file **newofile, **oldofile;
985 	char *newofileflags;
986 
987 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
988 
989 	/*
990 	 * Search for a free descriptor starting at the higher
991 	 * of want or fd_freefile.  If that fails, consider
992 	 * expanding the ofile array.
993 	 */
994 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
995 	for (;;) {
996 		last = min(fdp->fd_nfiles, lim);
997 		if ((i = want) < fdp->fd_freefile)
998 			i = fdp->fd_freefile;
999 		for (; i < last; i++) {
1000 			if (fdp->fd_ofiles[i] == NULL) {
1001 				fdp->fd_ofileflags[i] = 0;
1002 				if (i > fdp->fd_lastfile)
1003 					fdp->fd_lastfile = i;
1004 				if (want <= fdp->fd_freefile)
1005 					fdp->fd_freefile = i;
1006 				*result = i;
1007 				return (0);
1008 			}
1009 		}
1010 
1011 		/*
1012 		 * No space in current array.  Expand?
1013 		 */
1014 		if (fdp->fd_nfiles >= lim)
1015 			return (EMFILE);
1016 		if (fdp->fd_nfiles < NDEXTENT)
1017 			nfiles = NDEXTENT;
1018 		else
1019 			nfiles = 2 * fdp->fd_nfiles;
1020 		FILEDESC_UNLOCK(fdp);
1021 		mtx_lock(&Giant);
1022 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
1023 		    M_FILEDESC, M_WAITOK);
1024 		mtx_unlock(&Giant);
1025 		FILEDESC_LOCK(fdp);
1026 
1027 		/*
1028 		 * deal with file-table extend race that might have occured
1029 		 * when malloc was blocked.
1030 		 */
1031 		if (fdp->fd_nfiles >= nfiles) {
1032 			FILEDESC_UNLOCK(fdp);
1033 			mtx_lock(&Giant);
1034 			FREE(newofile, M_FILEDESC);
1035 			mtx_unlock(&Giant);
1036 			FILEDESC_LOCK(fdp);
1037 			continue;
1038 		}
1039 		newofileflags = (char *) &newofile[nfiles];
1040 		/*
1041 		 * Copy the existing ofile and ofileflags arrays
1042 		 * and zero the new portion of each array.
1043 		 */
1044 		bcopy(fdp->fd_ofiles, newofile,
1045 			(i = sizeof(struct file *) * fdp->fd_nfiles));
1046 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
1047 		bcopy(fdp->fd_ofileflags, newofileflags,
1048 			(i = sizeof(char) * fdp->fd_nfiles));
1049 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
1050 		if (fdp->fd_nfiles > NDFILE)
1051 			oldofile = fdp->fd_ofiles;
1052 		else
1053 			oldofile = NULL;
1054 		fdp->fd_ofiles = newofile;
1055 		fdp->fd_ofileflags = newofileflags;
1056 		fdp->fd_nfiles = nfiles;
1057 		fdexpand++;
1058 		if (oldofile != NULL) {
1059 			FILEDESC_UNLOCK(fdp);
1060 			mtx_lock(&Giant);
1061 			FREE(oldofile, M_FILEDESC);
1062 			mtx_unlock(&Giant);
1063 			FILEDESC_LOCK(fdp);
1064 		}
1065 	}
1066 	return (0);
1067 }
1068 
1069 /*
1070  * Check to see whether n user file descriptors
1071  * are available to the process p.
1072  */
1073 int
1074 fdavail(td, n)
1075 	struct thread *td;
1076 	register int n;
1077 {
1078 	struct proc *p = td->td_proc;
1079 	register struct filedesc *fdp = td->td_proc->p_fd;
1080 	register struct file **fpp;
1081 	register int i, lim, last;
1082 
1083 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1084 
1085 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1086 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1087 		return (1);
1088 
1089 	last = min(fdp->fd_nfiles, lim);
1090 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1091 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1092 		if (*fpp == NULL && --n <= 0)
1093 			return (1);
1094 	}
1095 	return (0);
1096 }
1097 
1098 /*
1099  * Create a new open file structure and allocate
1100  * a file decriptor for the process that refers to it.
1101  */
1102 int
1103 falloc(td, resultfp, resultfd)
1104 	register struct thread *td;
1105 	struct file **resultfp;
1106 	int *resultfd;
1107 {
1108 	struct proc *p = td->td_proc;
1109 	register struct file *fp, *fq;
1110 	int error, i;
1111 
1112 	sx_xlock(&filelist_lock);
1113 	if (nfiles >= maxfiles) {
1114 		sx_xunlock(&filelist_lock);
1115 		tablefull("file");
1116 		return (ENFILE);
1117 	}
1118 	nfiles++;
1119 	sx_xunlock(&filelist_lock);
1120 	/*
1121 	 * Allocate a new file descriptor.
1122 	 * If the process has file descriptor zero open, add to the list
1123 	 * of open files at that point, otherwise put it at the front of
1124 	 * the list of open files.
1125 	 */
1126 	fp = uma_zalloc(file_zone, M_WAITOK);
1127 	bzero(fp, sizeof(*fp));
1128 
1129 	/*
1130 	 * wait until after malloc (which may have blocked) returns before
1131 	 * allocating the slot, else a race might have shrunk it if we had
1132 	 * allocated it before the malloc.
1133 	 */
1134 	FILEDESC_LOCK(p->p_fd);
1135 	if ((error = fdalloc(td, 0, &i))) {
1136 		FILEDESC_UNLOCK(p->p_fd);
1137 		sx_xlock(&filelist_lock);
1138 		nfiles--;
1139 		sx_xunlock(&filelist_lock);
1140 		uma_zfree(file_zone, fp);
1141 		return (error);
1142 	}
1143 	fp->f_mtxp = mtx_pool_alloc();
1144 	fp->f_gcflag = 0;
1145 	fp->f_count = 1;
1146 	fp->f_cred = crhold(td->td_ucred);
1147 	fp->f_ops = &badfileops;
1148 	fp->f_seqcount = 1;
1149 	FILEDESC_UNLOCK(p->p_fd);
1150 	sx_xlock(&filelist_lock);
1151 	FILEDESC_LOCK(p->p_fd);
1152 	if ((fq = p->p_fd->fd_ofiles[0])) {
1153 		LIST_INSERT_AFTER(fq, fp, f_list);
1154 	} else {
1155 		LIST_INSERT_HEAD(&filehead, fp, f_list);
1156 	}
1157 	p->p_fd->fd_ofiles[i] = fp;
1158 	FILEDESC_UNLOCK(p->p_fd);
1159 	sx_xunlock(&filelist_lock);
1160 	if (resultfp)
1161 		*resultfp = fp;
1162 	if (resultfd)
1163 		*resultfd = i;
1164 	return (0);
1165 }
1166 
1167 /*
1168  * Free a file descriptor.
1169  */
1170 void
1171 ffree(fp)
1172 	register struct file *fp;
1173 {
1174 
1175 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1176 	sx_xlock(&filelist_lock);
1177 	LIST_REMOVE(fp, f_list);
1178 	nfiles--;
1179 	sx_xunlock(&filelist_lock);
1180 	crfree(fp->f_cred);
1181 	uma_zfree(file_zone, fp);
1182 }
1183 
1184 /*
1185  * Build a new filedesc structure.
1186  */
1187 struct filedesc *
1188 fdinit(td)
1189 	struct thread *td;
1190 {
1191 	register struct filedesc0 *newfdp;
1192 	register struct filedesc *fdp = td->td_proc->p_fd;
1193 
1194 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1195 	    M_FILEDESC, M_WAITOK | M_ZERO);
1196 	mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1197 	FILEDESC_LOCK(&newfdp->fd_fd);
1198 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1199 	if (newfdp->fd_fd.fd_cdir)
1200 		VREF(newfdp->fd_fd.fd_cdir);
1201 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1202 	if (newfdp->fd_fd.fd_rdir)
1203 		VREF(newfdp->fd_fd.fd_rdir);
1204 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1205 	if (newfdp->fd_fd.fd_jdir)
1206 		VREF(newfdp->fd_fd.fd_jdir);
1207 
1208 	/* Create the file descriptor table. */
1209 	newfdp->fd_fd.fd_refcnt = 1;
1210 	newfdp->fd_fd.fd_cmask = cmask;
1211 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1212 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1213 	newfdp->fd_fd.fd_nfiles = NDFILE;
1214 	newfdp->fd_fd.fd_knlistsize = -1;
1215 	FILEDESC_UNLOCK(&newfdp->fd_fd);
1216 
1217 	return (&newfdp->fd_fd);
1218 }
1219 
1220 /*
1221  * Share a filedesc structure.
1222  */
1223 struct filedesc *
1224 fdshare(p)
1225 	struct proc *p;
1226 {
1227 	FILEDESC_LOCK(p->p_fd);
1228 	p->p_fd->fd_refcnt++;
1229 	FILEDESC_UNLOCK(p->p_fd);
1230 	return (p->p_fd);
1231 }
1232 
1233 /*
1234  * Copy a filedesc structure.
1235  */
1236 struct filedesc *
1237 fdcopy(td)
1238 	struct thread *td;
1239 {
1240 	register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1241 	register struct file **fpp;
1242 	register int i, j;
1243 
1244 	/* Certain daemons might not have file descriptors. */
1245 	if (fdp == NULL)
1246 		return (NULL);
1247 
1248 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1249 
1250 	FILEDESC_UNLOCK(fdp);
1251 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1252 	    M_FILEDESC, M_WAITOK);
1253 	FILEDESC_LOCK(fdp);
1254 	bcopy(fdp, newfdp, sizeof(struct filedesc));
1255 	FILEDESC_UNLOCK(fdp);
1256 	bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1257 	mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1258 	if (newfdp->fd_cdir)
1259 		VREF(newfdp->fd_cdir);
1260 	if (newfdp->fd_rdir)
1261 		VREF(newfdp->fd_rdir);
1262 	if (newfdp->fd_jdir)
1263 		VREF(newfdp->fd_jdir);
1264 	newfdp->fd_refcnt = 1;
1265 
1266 	/*
1267 	 * If the number of open files fits in the internal arrays
1268 	 * of the open file structure, use them, otherwise allocate
1269 	 * additional memory for the number of descriptors currently
1270 	 * in use.
1271 	 */
1272 	FILEDESC_LOCK(fdp);
1273 	newfdp->fd_lastfile = fdp->fd_lastfile;
1274 	newfdp->fd_nfiles = fdp->fd_nfiles;
1275 	if (newfdp->fd_lastfile < NDFILE) {
1276 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1277 		newfdp->fd_ofileflags =
1278 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1279 		i = NDFILE;
1280 	} else {
1281 		/*
1282 		 * Compute the smallest multiple of NDEXTENT needed
1283 		 * for the file descriptors currently in use,
1284 		 * allowing the table to shrink.
1285 		 */
1286 retry:
1287 		i = newfdp->fd_nfiles;
1288 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1289 			i /= 2;
1290 		FILEDESC_UNLOCK(fdp);
1291 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1292 		    M_FILEDESC, M_WAITOK);
1293 		FILEDESC_LOCK(fdp);
1294 		newfdp->fd_lastfile = fdp->fd_lastfile;
1295 		newfdp->fd_nfiles = fdp->fd_nfiles;
1296 		j = newfdp->fd_nfiles;
1297 		while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1298 			j /= 2;
1299 		if (i != j) {
1300 			/*
1301 			 * The size of the original table has changed.
1302 			 * Go over once again.
1303 			 */
1304 			FILEDESC_UNLOCK(fdp);
1305 			FREE(newfdp->fd_ofiles, M_FILEDESC);
1306 			FILEDESC_LOCK(fdp);
1307 			newfdp->fd_lastfile = fdp->fd_lastfile;
1308 			newfdp->fd_nfiles = fdp->fd_nfiles;
1309 			goto retry;
1310 		}
1311 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1312 	}
1313 	newfdp->fd_nfiles = i;
1314 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1315 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1316 
1317 	/*
1318 	 * kq descriptors cannot be copied.
1319 	 */
1320 	if (newfdp->fd_knlistsize != -1) {
1321 		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1322 		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1323 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1324 				*fpp = NULL;
1325 				if (i < newfdp->fd_freefile)
1326 					newfdp->fd_freefile = i;
1327 			}
1328 			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1329 				newfdp->fd_lastfile--;
1330 		}
1331 		newfdp->fd_knlist = NULL;
1332 		newfdp->fd_knlistsize = -1;
1333 		newfdp->fd_knhash = NULL;
1334 		newfdp->fd_knhashmask = 0;
1335 	}
1336 
1337 	fpp = newfdp->fd_ofiles;
1338 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1339 		if (*fpp != NULL) {
1340 			fhold(*fpp);
1341 		}
1342 	}
1343 	return (newfdp);
1344 }
1345 
1346 /*
1347  * Release a filedesc structure.
1348  */
1349 void
1350 fdfree(td)
1351 	struct thread *td;
1352 {
1353 	register struct filedesc *fdp;
1354 	struct file **fpp;
1355 	register int i;
1356 
1357 	fdp = td->td_proc->p_fd;
1358 	/* Certain daemons might not have file descriptors. */
1359 	if (fdp == NULL)
1360 		return;
1361 
1362 	FILEDESC_LOCK(fdp);
1363 	if (--fdp->fd_refcnt > 0) {
1364 		FILEDESC_UNLOCK(fdp);
1365 		return;
1366 	}
1367 	/*
1368 	 * we are the last reference to the structure, we can
1369 	 * safely assume it will not change out from under us.
1370 	 */
1371 	FILEDESC_UNLOCK(fdp);
1372 	fpp = fdp->fd_ofiles;
1373 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1374 		if (*fpp)
1375 			(void) closef(*fpp, td);
1376 	}
1377 
1378 	PROC_LOCK(td->td_proc);
1379 	td->td_proc->p_fd = NULL;
1380 	PROC_UNLOCK(td->td_proc);
1381 
1382 	if (fdp->fd_nfiles > NDFILE)
1383 		FREE(fdp->fd_ofiles, M_FILEDESC);
1384 	if (fdp->fd_cdir)
1385 		vrele(fdp->fd_cdir);
1386 	if (fdp->fd_rdir)
1387 		vrele(fdp->fd_rdir);
1388 	if (fdp->fd_jdir)
1389 		vrele(fdp->fd_jdir);
1390 	if (fdp->fd_knlist)
1391 		FREE(fdp->fd_knlist, M_KQUEUE);
1392 	if (fdp->fd_knhash)
1393 		FREE(fdp->fd_knhash, M_KQUEUE);
1394 	mtx_destroy(&fdp->fd_mtx);
1395 	FREE(fdp, M_FILEDESC);
1396 }
1397 
1398 /*
1399  * For setugid programs, we don't want to people to use that setugidness
1400  * to generate error messages which write to a file which otherwise would
1401  * otherwise be off-limits to the process.
1402  *
1403  * This is a gross hack to plug the hole.  A better solution would involve
1404  * a special vop or other form of generalized access control mechanism.  We
1405  * go ahead and just reject all procfs filesystems accesses as dangerous.
1406  *
1407  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1408  * sufficient.  We also don't for check setugidness since we know we are.
1409  */
1410 static int
1411 is_unsafe(struct file *fp)
1412 {
1413 	if (fp->f_type == DTYPE_VNODE &&
1414 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1415 		return (1);
1416 	return (0);
1417 }
1418 
1419 /*
1420  * Make this setguid thing safe, if at all possible.
1421  */
1422 void
1423 setugidsafety(td)
1424 	struct thread *td;
1425 {
1426 	struct filedesc *fdp = td->td_proc->p_fd;
1427 	register int i;
1428 
1429 	/* Certain daemons might not have file descriptors. */
1430 	if (fdp == NULL)
1431 		return;
1432 
1433 	/*
1434 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1435 	 * we are blocked in a close.  Be careful!
1436 	 */
1437 	FILEDESC_LOCK(fdp);
1438 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1439 		if (i > 2)
1440 			break;
1441 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1442 			struct file *fp;
1443 
1444 #if 0
1445 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1446 				(void) munmapfd(td, i);
1447 #endif
1448 			if (i < fdp->fd_knlistsize) {
1449 				FILEDESC_UNLOCK(fdp);
1450 				knote_fdclose(td, i);
1451 				FILEDESC_LOCK(fdp);
1452 			}
1453 			/*
1454 			 * NULL-out descriptor prior to close to avoid
1455 			 * a race while close blocks.
1456 			 */
1457 			fp = fdp->fd_ofiles[i];
1458 			fdp->fd_ofiles[i] = NULL;
1459 			fdp->fd_ofileflags[i] = 0;
1460 			if (i < fdp->fd_freefile)
1461 				fdp->fd_freefile = i;
1462 			FILEDESC_UNLOCK(fdp);
1463 			(void) closef(fp, td);
1464 			FILEDESC_LOCK(fdp);
1465 		}
1466 	}
1467 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1468 		fdp->fd_lastfile--;
1469 	FILEDESC_UNLOCK(fdp);
1470 }
1471 
1472 /*
1473  * Close any files on exec?
1474  */
1475 void
1476 fdcloseexec(td)
1477 	struct thread *td;
1478 {
1479 	struct filedesc *fdp = td->td_proc->p_fd;
1480 	register int i;
1481 
1482 	/* Certain daemons might not have file descriptors. */
1483 	if (fdp == NULL)
1484 		return;
1485 
1486 	FILEDESC_LOCK(fdp);
1487 
1488 	/*
1489 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1490 	 * may block and rip them out from under us.
1491 	 */
1492 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1493 		if (fdp->fd_ofiles[i] != NULL &&
1494 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1495 			struct file *fp;
1496 
1497 #if 0
1498 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1499 				(void) munmapfd(td, i);
1500 #endif
1501 			if (i < fdp->fd_knlistsize) {
1502 				FILEDESC_UNLOCK(fdp);
1503 				knote_fdclose(td, i);
1504 				FILEDESC_LOCK(fdp);
1505 			}
1506 			/*
1507 			 * NULL-out descriptor prior to close to avoid
1508 			 * a race while close blocks.
1509 			 */
1510 			fp = fdp->fd_ofiles[i];
1511 			fdp->fd_ofiles[i] = NULL;
1512 			fdp->fd_ofileflags[i] = 0;
1513 			if (i < fdp->fd_freefile)
1514 				fdp->fd_freefile = i;
1515 			FILEDESC_UNLOCK(fdp);
1516 			(void) closef(fp, td);
1517 			FILEDESC_LOCK(fdp);
1518 		}
1519 	}
1520 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1521 		fdp->fd_lastfile--;
1522 	FILEDESC_UNLOCK(fdp);
1523 }
1524 
1525 /*
1526  * It is unsafe for set[ug]id processes to be started with file
1527  * descriptors 0..2 closed, as these descriptors are given implicit
1528  * significance in the Standard C library.  fdcheckstd() will create a
1529  * descriptor referencing /dev/null for each of stdin, stdout, and
1530  * stderr that is not already open.
1531  */
1532 int
1533 fdcheckstd(td)
1534 	struct thread *td;
1535 {
1536 	struct nameidata nd;
1537 	struct filedesc *fdp;
1538 	struct file *fp;
1539 	register_t retval;
1540 	int fd, i, error, flags, devnull;
1541 
1542 	fdp = td->td_proc->p_fd;
1543 	if (fdp == NULL)
1544 		return (0);
1545 	devnull = -1;
1546 	error = 0;
1547 	for (i = 0; i < 3; i++) {
1548 		if (fdp->fd_ofiles[i] != NULL)
1549 			continue;
1550 		if (devnull < 0) {
1551 			error = falloc(td, &fp, &fd);
1552 			if (error != 0)
1553 				break;
1554 			NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1555 			    td);
1556 			flags = FREAD | FWRITE;
1557 			error = vn_open(&nd, &flags, 0);
1558 			if (error != 0) {
1559 				FILEDESC_LOCK(fdp);
1560 				fdp->fd_ofiles[i] = NULL;
1561 				FILEDESC_UNLOCK(fdp);
1562 				fdrop(fp, td);
1563 				break;
1564 			}
1565 			NDFREE(&nd, NDF_ONLY_PNBUF);
1566 			fp->f_data = (caddr_t)nd.ni_vp;
1567 			fp->f_flag = flags;
1568 			fp->f_ops = &vnops;
1569 			fp->f_type = DTYPE_VNODE;
1570 			VOP_UNLOCK(nd.ni_vp, 0, td);
1571 			devnull = fd;
1572 		} else {
1573 			FILEDESC_LOCK(fdp);
1574 			error = fdalloc(td, 0, &fd);
1575 			if (error != 0) {
1576 				FILEDESC_UNLOCK(fdp);
1577 				break;
1578 			}
1579 			error = do_dup(fdp, devnull, fd, &retval, td);
1580 			if (error != 0)
1581 				break;
1582 		}
1583 	}
1584 	return (error);
1585 }
1586 
1587 /*
1588  * Internal form of close.
1589  * Decrement reference count on file structure.
1590  * Note: td may be NULL when closing a file
1591  * that was being passed in a message.
1592  */
1593 int
1594 closef(fp, td)
1595 	register struct file *fp;
1596 	register struct thread *td;
1597 {
1598 	struct vnode *vp;
1599 	struct flock lf;
1600 
1601 	if (fp == NULL)
1602 		return (0);
1603 	/*
1604 	 * POSIX record locking dictates that any close releases ALL
1605 	 * locks owned by this process.  This is handled by setting
1606 	 * a flag in the unlock to free ONLY locks obeying POSIX
1607 	 * semantics, and not to free BSD-style file locks.
1608 	 * If the descriptor was in a message, POSIX-style locks
1609 	 * aren't passed with the descriptor.
1610 	 */
1611 	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1612 	    fp->f_type == DTYPE_VNODE) {
1613 		lf.l_whence = SEEK_SET;
1614 		lf.l_start = 0;
1615 		lf.l_len = 0;
1616 		lf.l_type = F_UNLCK;
1617 		vp = (struct vnode *)fp->f_data;
1618 		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1619 		    F_UNLCK, &lf, F_POSIX);
1620 	}
1621 	return (fdrop(fp, td));
1622 }
1623 
1624 /*
1625  * Drop reference on struct file passed in, may call closef if the
1626  * reference hits zero.
1627  */
1628 int
1629 fdrop(fp, td)
1630 	struct file *fp;
1631 	struct thread *td;
1632 {
1633 
1634 	FILE_LOCK(fp);
1635 	return (fdrop_locked(fp, td));
1636 }
1637 
1638 /*
1639  * Extract the file pointer associated with the specified descriptor for
1640  * the current user process.
1641  *
1642  * If the descriptor doesn't exist, EBADF is returned.
1643  *
1644  * If the descriptor exists but doesn't match 'flags' then
1645  * return EBADF for read attempts and EINVAL for write attempts.
1646  *
1647  * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1648  * It should be droped with fdrop().
1649  * If it is not set, then the refcount will not be bumped however the
1650  * thread's filedesc struct will be returned locked (for fgetsock).
1651  *
1652  * If an error occured the non-zero error is returned and *fpp is set to NULL.
1653  * Otherwise *fpp is set and zero is returned.
1654  */
1655 static __inline
1656 int
1657 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1658 {
1659 	struct filedesc *fdp;
1660 	struct file *fp;
1661 
1662 	*fpp = NULL;
1663 	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1664 		return(EBADF);
1665 	FILEDESC_LOCK(fdp);
1666 	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1667 		FILEDESC_UNLOCK(fdp);
1668 		return(EBADF);
1669 	}
1670 
1671 	/*
1672 	 * Note: FREAD failures returns EBADF to maintain backwards
1673 	 * compatibility with what routines returned before.
1674 	 *
1675 	 * Only one flag, or 0, may be specified.
1676 	 */
1677 	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1678 		FILEDESC_UNLOCK(fdp);
1679 		return(EBADF);
1680 	}
1681 	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1682 		FILEDESC_UNLOCK(fdp);
1683 		return(EINVAL);
1684 	}
1685 	if (hold) {
1686 		fhold(fp);
1687 		FILEDESC_UNLOCK(fdp);
1688 	}
1689 	*fpp = fp;
1690 	return(0);
1691 }
1692 
1693 int
1694 fget(struct thread *td, int fd, struct file **fpp)
1695 {
1696     return(_fget(td, fd, fpp, 0, 1));
1697 }
1698 
1699 int
1700 fget_read(struct thread *td, int fd, struct file **fpp)
1701 {
1702     return(_fget(td, fd, fpp, FREAD, 1));
1703 }
1704 
1705 int
1706 fget_write(struct thread *td, int fd, struct file **fpp)
1707 {
1708     return(_fget(td, fd, fpp, FWRITE, 1));
1709 }
1710 
1711 /*
1712  * Like fget() but loads the underlying vnode, or returns an error if
1713  * the descriptor does not represent a vnode.  Note that pipes use vnodes
1714  * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1715  * error).  The returned vnode will be vref()d.
1716  */
1717 
1718 static __inline
1719 int
1720 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1721 {
1722 	struct file *fp;
1723 	int error;
1724 
1725 	*vpp = NULL;
1726 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1727 		return (error);
1728 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1729 		error = EINVAL;
1730 	} else {
1731 		*vpp = (struct vnode *)fp->f_data;
1732 		vref(*vpp);
1733 	}
1734 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1735 	return (error);
1736 }
1737 
1738 int
1739 fgetvp(struct thread *td, int fd, struct vnode **vpp)
1740 {
1741 	return(_fgetvp(td, fd, vpp, 0));
1742 }
1743 
1744 int
1745 fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1746 {
1747 	return(_fgetvp(td, fd, vpp, FREAD));
1748 }
1749 
1750 int
1751 fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1752 {
1753 	return(_fgetvp(td, fd, vpp, FWRITE));
1754 }
1755 
1756 /*
1757  * Like fget() but loads the underlying socket, or returns an error if
1758  * the descriptor does not represent a socket.
1759  *
1760  * We bump the ref count on the returned socket.  XXX Also obtain the SX lock in
1761  * the future.
1762  */
1763 int
1764 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1765 {
1766 	struct file *fp;
1767 	int error;
1768 
1769 	*spp = NULL;
1770 	if (fflagp)
1771 		*fflagp = 0;
1772 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1773 		return (error);
1774 	if (fp->f_type != DTYPE_SOCKET) {
1775 		error = ENOTSOCK;
1776 	} else {
1777 		*spp = (struct socket *)fp->f_data;
1778 		if (fflagp)
1779 			*fflagp = fp->f_flag;
1780 		soref(*spp);
1781 	}
1782 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1783 	return(error);
1784 }
1785 
1786 /*
1787  * Drop the reference count on the the socket and XXX release the SX lock in
1788  * the future.  The last reference closes the socket.
1789  */
1790 void
1791 fputsock(struct socket *so)
1792 {
1793 	sorele(so);
1794 }
1795 
1796 /*
1797  * Drop reference on struct file passed in, may call closef if the
1798  * reference hits zero.
1799  * Expects struct file locked, and will unlock it.
1800  */
1801 int
1802 fdrop_locked(fp, td)
1803 	struct file *fp;
1804 	struct thread *td;
1805 {
1806 	struct flock lf;
1807 	struct vnode *vp;
1808 	int error;
1809 
1810 	FILE_LOCK_ASSERT(fp, MA_OWNED);
1811 
1812 	if (--fp->f_count > 0) {
1813 		FILE_UNLOCK(fp);
1814 		return (0);
1815 	}
1816 	mtx_lock(&Giant);
1817 	if (fp->f_count < 0)
1818 		panic("fdrop: count < 0");
1819 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1820 		lf.l_whence = SEEK_SET;
1821 		lf.l_start = 0;
1822 		lf.l_len = 0;
1823 		lf.l_type = F_UNLCK;
1824 		vp = (struct vnode *)fp->f_data;
1825 		FILE_UNLOCK(fp);
1826 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1827 	} else
1828 		FILE_UNLOCK(fp);
1829 	if (fp->f_ops != &badfileops)
1830 		error = fo_close(fp, td);
1831 	else
1832 		error = 0;
1833 	ffree(fp);
1834 	mtx_unlock(&Giant);
1835 	return (error);
1836 }
1837 
1838 /*
1839  * Apply an advisory lock on a file descriptor.
1840  *
1841  * Just attempt to get a record lock of the requested type on
1842  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1843  */
1844 #ifndef _SYS_SYSPROTO_H_
1845 struct flock_args {
1846 	int	fd;
1847 	int	how;
1848 };
1849 #endif
1850 /*
1851  * MPSAFE
1852  */
1853 /* ARGSUSED */
1854 int
1855 flock(td, uap)
1856 	struct thread *td;
1857 	register struct flock_args *uap;
1858 {
1859 	struct file *fp;
1860 	struct vnode *vp;
1861 	struct flock lf;
1862 	int error;
1863 
1864 	if ((error = fget(td, uap->fd, &fp)) != 0)
1865 		return (error);
1866 	if (fp->f_type != DTYPE_VNODE) {
1867 		fdrop(fp, td);
1868 		return (EOPNOTSUPP);
1869 	}
1870 
1871 	mtx_lock(&Giant);
1872 	vp = (struct vnode *)fp->f_data;
1873 	lf.l_whence = SEEK_SET;
1874 	lf.l_start = 0;
1875 	lf.l_len = 0;
1876 	if (uap->how & LOCK_UN) {
1877 		lf.l_type = F_UNLCK;
1878 		FILE_LOCK(fp);
1879 		fp->f_flag &= ~FHASLOCK;
1880 		FILE_UNLOCK(fp);
1881 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1882 		goto done2;
1883 	}
1884 	if (uap->how & LOCK_EX)
1885 		lf.l_type = F_WRLCK;
1886 	else if (uap->how & LOCK_SH)
1887 		lf.l_type = F_RDLCK;
1888 	else {
1889 		error = EBADF;
1890 		goto done2;
1891 	}
1892 	FILE_LOCK(fp);
1893 	fp->f_flag |= FHASLOCK;
1894 	FILE_UNLOCK(fp);
1895 	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1896 	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1897 done2:
1898 	fdrop(fp, td);
1899 	mtx_unlock(&Giant);
1900 	return (error);
1901 }
1902 
1903 /*
1904  * File Descriptor pseudo-device driver (/dev/fd/).
1905  *
1906  * Opening minor device N dup()s the file (if any) connected to file
1907  * descriptor N belonging to the calling process.  Note that this driver
1908  * consists of only the ``open()'' routine, because all subsequent
1909  * references to this file will be direct to the other driver.
1910  */
1911 /* ARGSUSED */
1912 static int
1913 fdopen(dev, mode, type, td)
1914 	dev_t dev;
1915 	int mode, type;
1916 	struct thread *td;
1917 {
1918 
1919 	/*
1920 	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1921 	 * the file descriptor being sought for duplication. The error
1922 	 * return ensures that the vnode for this device will be released
1923 	 * by vn_open. Open will detect this special error and take the
1924 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1925 	 * will simply report the error.
1926 	 */
1927 	td->td_dupfd = dev2unit(dev);
1928 	return (ENODEV);
1929 }
1930 
1931 /*
1932  * Duplicate the specified descriptor to a free descriptor.
1933  */
1934 int
1935 dupfdopen(td, fdp, indx, dfd, mode, error)
1936 	struct thread *td;
1937 	struct filedesc *fdp;
1938 	int indx, dfd;
1939 	int mode;
1940 	int error;
1941 {
1942 	register struct file *wfp;
1943 	struct file *fp;
1944 
1945 	/*
1946 	 * If the to-be-dup'd fd number is greater than the allowed number
1947 	 * of file descriptors, or the fd to be dup'd has already been
1948 	 * closed, then reject.
1949 	 */
1950 	FILEDESC_LOCK(fdp);
1951 	if ((u_int)dfd >= fdp->fd_nfiles ||
1952 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1953 		FILEDESC_UNLOCK(fdp);
1954 		return (EBADF);
1955 	}
1956 
1957 	/*
1958 	 * There are two cases of interest here.
1959 	 *
1960 	 * For ENODEV simply dup (dfd) to file descriptor
1961 	 * (indx) and return.
1962 	 *
1963 	 * For ENXIO steal away the file structure from (dfd) and
1964 	 * store it in (indx).  (dfd) is effectively closed by
1965 	 * this operation.
1966 	 *
1967 	 * Any other error code is just returned.
1968 	 */
1969 	switch (error) {
1970 	case ENODEV:
1971 		/*
1972 		 * Check that the mode the file is being opened for is a
1973 		 * subset of the mode of the existing descriptor.
1974 		 */
1975 		FILE_LOCK(wfp);
1976 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1977 			FILE_UNLOCK(wfp);
1978 			FILEDESC_UNLOCK(fdp);
1979 			return (EACCES);
1980 		}
1981 		fp = fdp->fd_ofiles[indx];
1982 #if 0
1983 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1984 			(void) munmapfd(td, indx);
1985 #endif
1986 		fdp->fd_ofiles[indx] = wfp;
1987 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1988 		fhold_locked(wfp);
1989 		FILE_UNLOCK(wfp);
1990 		if (indx > fdp->fd_lastfile)
1991 			fdp->fd_lastfile = indx;
1992 		if (fp != NULL)
1993 			FILE_LOCK(fp);
1994 		FILEDESC_UNLOCK(fdp);
1995 		/*
1996 		 * we now own the reference to fp that the ofiles[] array
1997 		 * used to own.  Release it.
1998 		 */
1999 		if (fp != NULL)
2000 			fdrop_locked(fp, td);
2001 		return (0);
2002 
2003 	case ENXIO:
2004 		/*
2005 		 * Steal away the file pointer from dfd, and stuff it into indx.
2006 		 */
2007 		fp = fdp->fd_ofiles[indx];
2008 #if 0
2009 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2010 			(void) munmapfd(td, indx);
2011 #endif
2012 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
2013 		fdp->fd_ofiles[dfd] = NULL;
2014 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2015 		fdp->fd_ofileflags[dfd] = 0;
2016 
2017 		/*
2018 		 * Complete the clean up of the filedesc structure by
2019 		 * recomputing the various hints.
2020 		 */
2021 		if (indx > fdp->fd_lastfile) {
2022 			fdp->fd_lastfile = indx;
2023 		} else {
2024 			while (fdp->fd_lastfile > 0 &&
2025 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2026 				fdp->fd_lastfile--;
2027 			}
2028 			if (dfd < fdp->fd_freefile)
2029 				fdp->fd_freefile = dfd;
2030 		}
2031 		if (fp != NULL)
2032 			FILE_LOCK(fp);
2033 		FILEDESC_UNLOCK(fdp);
2034 
2035 		/*
2036 		 * we now own the reference to fp that the ofiles[] array
2037 		 * used to own.  Release it.
2038 		 */
2039 		if (fp != NULL)
2040 			fdrop_locked(fp, td);
2041 		return (0);
2042 
2043 	default:
2044 		FILEDESC_UNLOCK(fdp);
2045 		return (error);
2046 	}
2047 	/* NOTREACHED */
2048 }
2049 
2050 /*
2051  * Get file structures.
2052  */
2053 static int
2054 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2055 {
2056 	int error;
2057 	struct file *fp;
2058 
2059 	sx_slock(&filelist_lock);
2060 	if (!req->oldptr) {
2061 		/*
2062 		 * overestimate by 10 files
2063 		 */
2064 		error = SYSCTL_OUT(req, 0, sizeof(filehead) +
2065 				   (nfiles + 10) * sizeof(struct file));
2066 		sx_sunlock(&filelist_lock);
2067 		return (error);
2068 	}
2069 
2070 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
2071 	if (error) {
2072 		sx_sunlock(&filelist_lock);
2073 		return (error);
2074 	}
2075 
2076 	/*
2077 	 * followed by an array of file structures
2078 	 */
2079 	LIST_FOREACH(fp, &filehead, f_list) {
2080 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
2081 		if (error) {
2082 			sx_sunlock(&filelist_lock);
2083 			return (error);
2084 		}
2085 	}
2086 	sx_sunlock(&filelist_lock);
2087 	return (0);
2088 }
2089 
2090 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2091     0, 0, sysctl_kern_file, "S,file", "Entire file table");
2092 
2093 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2094     &maxfilesperproc, 0, "Maximum files allowed open per process");
2095 
2096 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2097     &maxfiles, 0, "Maximum number of files");
2098 
2099 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2100     &nfiles, 0, "System-wide number of open files");
2101 
2102 static void
2103 fildesc_drvinit(void *unused)
2104 {
2105 	dev_t dev;
2106 
2107 	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
2108 	make_dev_alias(dev, "stdin");
2109 	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
2110 	make_dev_alias(dev, "stdout");
2111 	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
2112 	make_dev_alias(dev, "stderr");
2113 	if (!devfs_present) {
2114 		int fd;
2115 
2116 		for (fd = 3; fd < NUMFDESC; fd++)
2117 			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
2118 			    "fd/%d", fd);
2119 	}
2120 }
2121 
2122 struct fileops badfileops = {
2123 	badfo_readwrite,
2124 	badfo_readwrite,
2125 	badfo_ioctl,
2126 	badfo_poll,
2127 	badfo_kqfilter,
2128 	badfo_stat,
2129 	badfo_close
2130 };
2131 
2132 static int
2133 badfo_readwrite(fp, uio, cred, flags, td)
2134 	struct file *fp;
2135 	struct uio *uio;
2136 	struct ucred *cred;
2137 	struct thread *td;
2138 	int flags;
2139 {
2140 
2141 	return (EBADF);
2142 }
2143 
2144 static int
2145 badfo_ioctl(fp, com, data, td)
2146 	struct file *fp;
2147 	u_long com;
2148 	caddr_t data;
2149 	struct thread *td;
2150 {
2151 
2152 	return (EBADF);
2153 }
2154 
2155 static int
2156 badfo_poll(fp, events, cred, td)
2157 	struct file *fp;
2158 	int events;
2159 	struct ucred *cred;
2160 	struct thread *td;
2161 {
2162 
2163 	return (0);
2164 }
2165 
2166 static int
2167 badfo_kqfilter(fp, kn)
2168 	struct file *fp;
2169 	struct knote *kn;
2170 {
2171 
2172 	return (0);
2173 }
2174 
2175 static int
2176 badfo_stat(fp, sb, td)
2177 	struct file *fp;
2178 	struct stat *sb;
2179 	struct thread *td;
2180 {
2181 
2182 	return (EBADF);
2183 }
2184 
2185 static int
2186 badfo_close(fp, td)
2187 	struct file *fp;
2188 	struct thread *td;
2189 {
2190 
2191 	return (EBADF);
2192 }
2193 
2194 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2195 					fildesc_drvinit,NULL)
2196 
2197 static void filelistinit(void *);
2198 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2199 
2200 /* ARGSUSED*/
2201 static void
2202 filelistinit(dummy)
2203 	void *dummy;
2204 {
2205 	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2206 	    NULL, NULL, UMA_ALIGN_PTR, 0);
2207 
2208 	sx_init(&filelist_lock, "filelist lock");
2209 	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2210 }
2211