xref: /freebsd/sys/kern/kern_descrip.c (revision c4f6a2a9e1b1879b618c436ab4f56ff75c73a0f5)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mutex.h>
49 #include <sys/sysproto.h>
50 #include <sys/conf.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/sysctl.h>
54 #include <sys/vnode.h>
55 #include <sys/proc.h>
56 #include <sys/namei.h>
57 #include <sys/file.h>
58 #include <sys/stat.h>
59 #include <sys/filio.h>
60 #include <sys/fcntl.h>
61 #include <sys/unistd.h>
62 #include <sys/resourcevar.h>
63 #include <sys/event.h>
64 #include <sys/sx.h>
65 #include <sys/socketvar.h>
66 #include <sys/signalvar.h>
67 
68 #include <machine/limits.h>
69 
70 #include <vm/vm.h>
71 #include <vm/vm_extern.h>
72 #include <vm/uma.h>
73 
74 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
75 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
76 
77 uma_zone_t file_zone;
78 
79 static	 d_open_t  fdopen;
80 #define NUMFDESC 64
81 
82 #define CDEV_MAJOR 22
83 static struct cdevsw fildesc_cdevsw = {
84 	/* open */	fdopen,
85 	/* close */	noclose,
86 	/* read */	noread,
87 	/* write */	nowrite,
88 	/* ioctl */	noioctl,
89 	/* poll */	nopoll,
90 	/* mmap */	nommap,
91 	/* strategy */	nostrategy,
92 	/* name */	"FD",
93 	/* maj */	CDEV_MAJOR,
94 	/* dump */	nodump,
95 	/* psize */	nopsize,
96 	/* flags */	0,
97 };
98 
99 static int do_dup(struct filedesc *fdp, int old, int new, register_t *retval,
100     struct thread *td);
101 static int badfo_readwrite(struct file *fp, struct uio *uio,
102     struct ucred *active_cred, int flags, struct thread *td);
103 static int badfo_ioctl(struct file *fp, u_long com, void *data,
104     struct ucred *active_cred, struct thread *td);
105 static int badfo_poll(struct file *fp, int events,
106     struct ucred *active_cred, struct thread *td);
107 static int badfo_kqfilter(struct file *fp, struct knote *kn);
108 static int badfo_stat(struct file *fp, struct stat *sb,
109     struct ucred *active_cred, struct thread *td);
110 static int badfo_close(struct file *fp, struct thread *td);
111 
112 /*
113  * Descriptor management.
114  */
115 struct filelist filehead;	/* head of list of open files */
116 int nfiles;			/* actual number of open files */
117 extern int cmask;
118 struct sx filelist_lock;	/* sx to protect filelist */
119 struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
120 
121 /*
122  * System calls on descriptors.
123  */
124 #ifndef _SYS_SYSPROTO_H_
125 struct getdtablesize_args {
126 	int	dummy;
127 };
128 #endif
129 /*
130  * MPSAFE
131  */
132 /* ARGSUSED */
133 int
134 getdtablesize(td, uap)
135 	struct thread *td;
136 	struct getdtablesize_args *uap;
137 {
138 	struct proc *p = td->td_proc;
139 
140 	mtx_lock(&Giant);
141 	td->td_retval[0] =
142 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
143 	mtx_unlock(&Giant);
144 	return (0);
145 }
146 
147 /*
148  * Duplicate a file descriptor to a particular value.
149  *
150  * note: keep in mind that a potential race condition exists when closing
151  * descriptors from a shared descriptor table (via rfork).
152  */
153 #ifndef _SYS_SYSPROTO_H_
154 struct dup2_args {
155 	u_int	from;
156 	u_int	to;
157 };
158 #endif
159 /*
160  * MPSAFE
161  */
162 /* ARGSUSED */
163 int
164 dup2(td, uap)
165 	struct thread *td;
166 	struct dup2_args *uap;
167 {
168 	struct proc *p = td->td_proc;
169 	register struct filedesc *fdp = td->td_proc->p_fd;
170 	register u_int old = uap->from, new = uap->to;
171 	int i, error;
172 
173 	FILEDESC_LOCK(fdp);
174 retry:
175 	if (old >= fdp->fd_nfiles ||
176 	    fdp->fd_ofiles[old] == NULL ||
177 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
178 	    new >= maxfilesperproc) {
179 		FILEDESC_UNLOCK(fdp);
180 		return (EBADF);
181 	}
182 	if (old == new) {
183 		td->td_retval[0] = new;
184 		FILEDESC_UNLOCK(fdp);
185 		return (0);
186 	}
187 	if (new >= fdp->fd_nfiles) {
188 		if ((error = fdalloc(td, new, &i))) {
189 			FILEDESC_UNLOCK(fdp);
190 			return (error);
191 		}
192 		/*
193 		 * fdalloc() may block, retest everything.
194 		 */
195 		goto retry;
196 	}
197 	error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
198 	return(error);
199 }
200 
201 /*
202  * Duplicate a file descriptor.
203  */
204 #ifndef _SYS_SYSPROTO_H_
205 struct dup_args {
206 	u_int	fd;
207 };
208 #endif
209 /*
210  * MPSAFE
211  */
212 /* ARGSUSED */
213 int
214 dup(td, uap)
215 	struct thread *td;
216 	struct dup_args *uap;
217 {
218 	register struct filedesc *fdp;
219 	u_int old;
220 	int new, error;
221 
222 	old = uap->fd;
223 	fdp = td->td_proc->p_fd;
224 	FILEDESC_LOCK(fdp);
225 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
226 		FILEDESC_UNLOCK(fdp);
227 		return (EBADF);
228 	}
229 	if ((error = fdalloc(td, 0, &new))) {
230 		FILEDESC_UNLOCK(fdp);
231 		return (error);
232 	}
233 	error = do_dup(fdp, (int)old, new, td->td_retval, td);
234 	return (error);
235 }
236 
237 /*
238  * The file control system call.
239  */
240 #ifndef _SYS_SYSPROTO_H_
241 struct fcntl_args {
242 	int	fd;
243 	int	cmd;
244 	long	arg;
245 };
246 #endif
247 /*
248  * MPSAFE
249  */
250 /* ARGSUSED */
251 int
252 fcntl(td, uap)
253 	struct thread *td;
254 	register struct fcntl_args *uap;
255 {
256 	register struct proc *p = td->td_proc;
257 	register struct filedesc *fdp;
258 	register struct file *fp;
259 	register char *pop;
260 	struct vnode *vp;
261 	int i, tmp, error = 0, flg = F_POSIX;
262 	struct flock fl;
263 	u_int newmin;
264 	struct proc *leaderp;
265 
266 	mtx_lock(&Giant);
267 
268 	fdp = p->p_fd;
269 	FILEDESC_LOCK(fdp);
270 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
271 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
272 		FILEDESC_UNLOCK(fdp);
273 		error = EBADF;
274 		goto done2;
275 	}
276 	pop = &fdp->fd_ofileflags[uap->fd];
277 
278 	switch (uap->cmd) {
279 	case F_DUPFD:
280 		newmin = uap->arg;
281 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
282 		    newmin >= maxfilesperproc) {
283 			FILEDESC_UNLOCK(fdp);
284 			error = EINVAL;
285 			break;
286 		}
287 		if ((error = fdalloc(td, newmin, &i))) {
288 			FILEDESC_UNLOCK(fdp);
289 			break;
290 		}
291 		error = do_dup(fdp, uap->fd, i, td->td_retval, td);
292 		break;
293 
294 	case F_GETFD:
295 		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
296 		FILEDESC_UNLOCK(fdp);
297 		break;
298 
299 	case F_SETFD:
300 		*pop = (*pop &~ UF_EXCLOSE) |
301 		    (uap->arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
302 		FILEDESC_UNLOCK(fdp);
303 		break;
304 
305 	case F_GETFL:
306 		FILE_LOCK(fp);
307 		FILEDESC_UNLOCK(fdp);
308 		td->td_retval[0] = OFLAGS(fp->f_flag);
309 		FILE_UNLOCK(fp);
310 		break;
311 
312 	case F_SETFL:
313 		fhold(fp);
314 		FILEDESC_UNLOCK(fdp);
315 		fp->f_flag &= ~FCNTLFLAGS;
316 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
317 		tmp = fp->f_flag & FNONBLOCK;
318 		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
319 		if (error) {
320 			fdrop(fp, td);
321 			break;
322 		}
323 		tmp = fp->f_flag & FASYNC;
324 		error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
325 		if (!error) {
326 			fdrop(fp, td);
327 			break;
328 		}
329 		fp->f_flag &= ~FNONBLOCK;
330 		tmp = 0;
331 		(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
332 		fdrop(fp, td);
333 		break;
334 
335 	case F_GETOWN:
336 		fhold(fp);
337 		FILEDESC_UNLOCK(fdp);
338 		error = fo_ioctl(fp, FIOGETOWN, (void *)td->td_retval,
339 		    td->td_ucred, td);
340 		fdrop(fp, td);
341 		break;
342 
343 	case F_SETOWN:
344 		fhold(fp);
345 		FILEDESC_UNLOCK(fdp);
346 		error = fo_ioctl(fp, FIOSETOWN, &uap->arg, td->td_ucred, td);
347 		fdrop(fp, td);
348 		break;
349 
350 	case F_SETLKW:
351 		flg |= F_WAIT;
352 		/* Fall into F_SETLK */
353 
354 	case F_SETLK:
355 		if (fp->f_type != DTYPE_VNODE) {
356 			FILEDESC_UNLOCK(fdp);
357 			error = EBADF;
358 			break;
359 		}
360 		vp = (struct vnode *)fp->f_data;
361 		/*
362 		 * copyin/lockop may block
363 		 */
364 		fhold(fp);
365 		FILEDESC_UNLOCK(fdp);
366 		vp = (struct vnode *)fp->f_data;
367 
368 		/* Copy in the lock structure */
369 		error = copyin((caddr_t)(intptr_t)uap->arg, &fl, sizeof(fl));
370 		if (error) {
371 			fdrop(fp, td);
372 			break;
373 		}
374 		if (fl.l_whence == SEEK_CUR) {
375 			if (fp->f_offset < 0 ||
376 			    (fl.l_start > 0 &&
377 			     fp->f_offset > OFF_MAX - fl.l_start)) {
378 				fdrop(fp, td);
379 				error = EOVERFLOW;
380 				break;
381 			}
382 			fl.l_start += fp->f_offset;
383 		}
384 
385 		switch (fl.l_type) {
386 		case F_RDLCK:
387 			if ((fp->f_flag & FREAD) == 0) {
388 				error = EBADF;
389 				break;
390 			}
391 			PROC_LOCK(p);
392 			p->p_flag |= P_ADVLOCK;
393 			leaderp = p->p_leader;
394 			PROC_UNLOCK(p);
395 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
396 			    &fl, flg);
397 			break;
398 		case F_WRLCK:
399 			if ((fp->f_flag & FWRITE) == 0) {
400 				error = EBADF;
401 				break;
402 			}
403 			PROC_LOCK(p);
404 			p->p_flag |= P_ADVLOCK;
405 			leaderp = p->p_leader;
406 			PROC_UNLOCK(p);
407 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
408 			    &fl, flg);
409 			break;
410 		case F_UNLCK:
411 			PROC_LOCK(p);
412 			leaderp = p->p_leader;
413 			PROC_UNLOCK(p);
414 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK,
415 				&fl, F_POSIX);
416 			break;
417 		default:
418 			error = EINVAL;
419 			break;
420 		}
421 		fdrop(fp, td);
422 		break;
423 
424 	case F_GETLK:
425 		if (fp->f_type != DTYPE_VNODE) {
426 			FILEDESC_UNLOCK(fdp);
427 			error = EBADF;
428 			break;
429 		}
430 		vp = (struct vnode *)fp->f_data;
431 		/*
432 		 * copyin/lockop may block
433 		 */
434 		fhold(fp);
435 		FILEDESC_UNLOCK(fdp);
436 		vp = (struct vnode *)fp->f_data;
437 
438 		/* Copy in the lock structure */
439 		error = copyin((caddr_t)(intptr_t)uap->arg, &fl, sizeof(fl));
440 		if (error) {
441 			fdrop(fp, td);
442 			break;
443 		}
444 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
445 		    fl.l_type != F_UNLCK) {
446 			fdrop(fp, td);
447 			error = EINVAL;
448 			break;
449 		}
450 		if (fl.l_whence == SEEK_CUR) {
451 			if ((fl.l_start > 0 &&
452 			     fp->f_offset > OFF_MAX - fl.l_start) ||
453 			    (fl.l_start < 0 &&
454 			     fp->f_offset < OFF_MIN - fl.l_start)) {
455 				fdrop(fp, td);
456 				error = EOVERFLOW;
457 				break;
458 			}
459 			fl.l_start += fp->f_offset;
460 		}
461 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
462 			    &fl, F_POSIX);
463 		fdrop(fp, td);
464 		if (error == 0) {
465 			error = copyout(&fl, (caddr_t)(intptr_t)uap->arg,
466 			    sizeof(fl));
467 		}
468 		break;
469 	default:
470 		FILEDESC_UNLOCK(fdp);
471 		error = EINVAL;
472 		break;
473 	}
474 done2:
475 	mtx_unlock(&Giant);
476 	return (error);
477 }
478 
479 /*
480  * Common code for dup, dup2, and fcntl(F_DUPFD).
481  * filedesc must be locked, but will be unlocked as a side effect.
482  */
483 static int
484 do_dup(fdp, old, new, retval, td)
485 	register struct filedesc *fdp;
486 	register int old, new;
487 	register_t *retval;
488 	struct thread *td;
489 {
490 	struct file *fp;
491 	struct file *delfp;
492 
493 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
494 
495 	/*
496 	 * Save info on the descriptor being overwritten.  We have
497 	 * to do the unmap now, but we cannot close it without
498 	 * introducing an ownership race for the slot.
499 	 */
500 	delfp = fdp->fd_ofiles[new];
501 #if 0
502 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
503 		(void) munmapfd(td, new);
504 #endif
505 
506 	/*
507 	 * Duplicate the source descriptor, update lastfile
508 	 */
509 	fp = fdp->fd_ofiles[old];
510 	fdp->fd_ofiles[new] = fp;
511 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
512 	fhold(fp);
513 	if (new > fdp->fd_lastfile)
514 		fdp->fd_lastfile = new;
515 	*retval = new;
516 
517 	FILEDESC_UNLOCK(fdp);
518 
519 	/*
520 	 * If we dup'd over a valid file, we now own the reference to it
521 	 * and must dispose of it using closef() semantics (as if a
522 	 * close() were performed on it).
523 	 */
524 	if (delfp) {
525 		mtx_lock(&Giant);
526 		(void) closef(delfp, td);
527 		mtx_unlock(&Giant);
528 	}
529 	return (0);
530 }
531 
532 /*
533  * If sigio is on the list associated with a process or process group,
534  * disable signalling from the device, remove sigio from the list and
535  * free sigio.
536  */
537 void
538 funsetown(sigiop)
539 	struct sigio **sigiop;
540 {
541 	struct sigio *sigio;
542 
543 	SIGIO_LOCK();
544 	sigio = *sigiop;
545 	if (sigio == NULL) {
546 		SIGIO_UNLOCK();
547 		return;
548 	}
549 	*(sigio->sio_myref) = NULL;
550 	if ((sigio)->sio_pgid < 0) {
551 		struct pgrp *pg = (sigio)->sio_pgrp;
552 		PGRP_LOCK(pg);
553 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
554 			     sigio, sio_pgsigio);
555 		PGRP_UNLOCK(pg);
556 	} else {
557 		struct proc *p = (sigio)->sio_proc;
558 		PROC_LOCK(p);
559 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
560 			     sigio, sio_pgsigio);
561 		PROC_UNLOCK(p);
562 	}
563 	SIGIO_UNLOCK();
564 	crfree(sigio->sio_ucred);
565 	FREE(sigio, M_SIGIO);
566 }
567 
568 /*
569  * Free a list of sigio structures.
570  * We only need to lock the SIGIO_LOCK because we have made ourselves
571  * inaccessable to callers of fsetown and therefore do not need to lock
572  * the proc or pgrp struct for the list manipulation.
573  */
574 void
575 funsetownlst(sigiolst)
576 	struct sigiolst *sigiolst;
577 {
578 	struct sigio *sigio;
579 	struct proc *p;
580 	struct pgrp *pg;
581 
582 	sigio = SLIST_FIRST(sigiolst);
583 	if (sigio == NULL)
584 		return;
585 
586 	p = NULL;
587 	pg = NULL;
588 
589 	/*
590 	 * Every entry of the list should belong
591 	 * to a single proc or pgrp.
592 	 */
593 	if (sigio->sio_pgid < 0) {
594 		pg = sigio->sio_pgrp;
595 		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
596 	} else /* if (sigio->sio_pgid > 0) */ {
597 		p = sigio->sio_proc;
598 		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
599 	}
600 
601 	SIGIO_LOCK();
602 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
603 		*(sigio->sio_myref) = NULL;
604 		if (pg != NULL) {
605 			KASSERT(sigio->sio_pgid < 0,
606 			    ("Proc sigio in pgrp sigio list"));
607 			KASSERT(sigio->sio_pgrp == pg,
608 			    ("Bogus pgrp in sigio list"));
609 			PGRP_LOCK(pg);
610 			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
611 			    sio_pgsigio);
612 			PGRP_UNLOCK(pg);
613 		} else /* if (p != NULL) */ {
614 			KASSERT(sigio->sio_pgid > 0,
615 			    ("Pgrp sigio in proc sigio list"));
616 			KASSERT(sigio->sio_proc == p,
617 			    ("Bogus proc in sigio list"));
618 			PROC_LOCK(p);
619 			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
620 			    sio_pgsigio);
621 			PROC_UNLOCK(p);
622 		}
623 		SIGIO_UNLOCK();
624 		crfree(sigio->sio_ucred);
625 		FREE(sigio, M_SIGIO);
626 		SIGIO_LOCK();
627 	}
628 	SIGIO_UNLOCK();
629 }
630 
631 /*
632  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
633  *
634  * After permission checking, add a sigio structure to the sigio list for
635  * the process or process group.
636  */
637 int
638 fsetown(pgid, sigiop)
639 	pid_t pgid;
640 	struct sigio **sigiop;
641 {
642 	struct proc *proc;
643 	struct pgrp *pgrp;
644 	struct sigio *sigio;
645 	int ret;
646 
647 	if (pgid == 0) {
648 		funsetown(sigiop);
649 		return (0);
650 	}
651 
652 	ret = 0;
653 
654 	/* Allocate and fill in the new sigio out of locks. */
655 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
656 	sigio->sio_pgid = pgid;
657 	sigio->sio_ucred = crhold(curthread->td_ucred);
658 	sigio->sio_myref = sigiop;
659 
660 	sx_slock(&proctree_lock);
661 	if (pgid > 0) {
662 		proc = pfind(pgid);
663 		if (proc == NULL) {
664 			ret = ESRCH;
665 			goto fail;
666 		}
667 
668 		/*
669 		 * Policy - Don't allow a process to FSETOWN a process
670 		 * in another session.
671 		 *
672 		 * Remove this test to allow maximum flexibility or
673 		 * restrict FSETOWN to the current process or process
674 		 * group for maximum safety.
675 		 */
676 		PROC_UNLOCK(proc);
677 		if (proc->p_session != curthread->td_proc->p_session) {
678 			ret = EPERM;
679 			goto fail;
680 		}
681 
682 		pgrp = NULL;
683 	} else /* if (pgid < 0) */ {
684 		pgrp = pgfind(-pgid);
685 		if (pgrp == NULL) {
686 			ret = ESRCH;
687 			goto fail;
688 		}
689 		PGRP_UNLOCK(pgrp);
690 
691 		/*
692 		 * Policy - Don't allow a process to FSETOWN a process
693 		 * in another session.
694 		 *
695 		 * Remove this test to allow maximum flexibility or
696 		 * restrict FSETOWN to the current process or process
697 		 * group for maximum safety.
698 		 */
699 		if (pgrp->pg_session != curthread->td_proc->p_session) {
700 			ret = EPERM;
701 			goto fail;
702 		}
703 
704 		proc = NULL;
705 	}
706 	funsetown(sigiop);
707 	if (pgid > 0) {
708 		PROC_LOCK(proc);
709 		/*
710 		 * since funsetownlst() is called without the proctree
711 		 * locked we need to check for P_WEXIT.
712 		 * XXX: is ESRCH correct?
713 		 */
714 		if ((proc->p_flag & P_WEXIT) != 0) {
715 			PROC_UNLOCK(proc);
716 			ret = ESRCH;
717 			goto fail;
718 		}
719 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
720 		sigio->sio_proc = proc;
721 		PROC_UNLOCK(proc);
722 	} else {
723 		PGRP_LOCK(pgrp);
724 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
725 		sigio->sio_pgrp = pgrp;
726 		PGRP_UNLOCK(pgrp);
727 	}
728 	sx_sunlock(&proctree_lock);
729 	SIGIO_LOCK();
730 	*sigiop = sigio;
731 	SIGIO_UNLOCK();
732 	return (0);
733 
734 fail:
735 	sx_sunlock(&proctree_lock);
736 	crfree(sigio->sio_ucred);
737 	FREE(sigio, M_SIGIO);
738 	return (ret);
739 }
740 
741 /*
742  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
743  */
744 pid_t
745 fgetown(sigio)
746 	struct sigio *sigio;
747 {
748 	return (sigio != NULL ? sigio->sio_pgid : 0);
749 }
750 
751 /*
752  * Close a file descriptor.
753  */
754 #ifndef _SYS_SYSPROTO_H_
755 struct close_args {
756         int     fd;
757 };
758 #endif
759 /*
760  * MPSAFE
761  */
762 /* ARGSUSED */
763 int
764 close(td, uap)
765 	struct thread *td;
766 	struct close_args *uap;
767 {
768 	register struct filedesc *fdp;
769 	register struct file *fp;
770 	register int fd = uap->fd;
771 	int error = 0;
772 
773 	mtx_lock(&Giant);
774 	fdp = td->td_proc->p_fd;
775 	FILEDESC_LOCK(fdp);
776 	if ((unsigned)fd >= fdp->fd_nfiles ||
777 	    (fp = fdp->fd_ofiles[fd]) == NULL) {
778 		FILEDESC_UNLOCK(fdp);
779 		error = EBADF;
780 		goto done2;
781 	}
782 #if 0
783 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
784 		(void) munmapfd(td, fd);
785 #endif
786 	fdp->fd_ofiles[fd] = NULL;
787 	fdp->fd_ofileflags[fd] = 0;
788 
789 	/*
790 	 * we now hold the fp reference that used to be owned by the descriptor
791 	 * array.
792 	 */
793 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
794 		fdp->fd_lastfile--;
795 	if (fd < fdp->fd_freefile)
796 		fdp->fd_freefile = fd;
797 	if (fd < fdp->fd_knlistsize) {
798 		FILEDESC_UNLOCK(fdp);
799 		knote_fdclose(td, fd);
800 	} else
801 		FILEDESC_UNLOCK(fdp);
802 
803 	error = closef(fp, td);
804 done2:
805 	mtx_unlock(&Giant);
806 	return(error);
807 }
808 
809 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
810 /*
811  * Return status information about a file descriptor.
812  */
813 #ifndef _SYS_SYSPROTO_H_
814 struct ofstat_args {
815 	int	fd;
816 	struct	ostat *sb;
817 };
818 #endif
819 /*
820  * MPSAFE
821  */
822 /* ARGSUSED */
823 int
824 ofstat(td, uap)
825 	struct thread *td;
826 	register struct ofstat_args *uap;
827 {
828 	struct file *fp;
829 	struct stat ub;
830 	struct ostat oub;
831 	int error;
832 
833 	mtx_lock(&Giant);
834 	if ((error = fget(td, uap->fd, &fp)) != 0)
835 		goto done2;
836 	error = fo_stat(fp, &ub, td->td_ucred, td);
837 	if (error == 0) {
838 		cvtstat(&ub, &oub);
839 		error = copyout(&oub, uap->sb, sizeof (oub));
840 	}
841 	fdrop(fp, td);
842 done2:
843 	mtx_unlock(&Giant);
844 	return (error);
845 }
846 #endif /* COMPAT_43 || COMPAT_SUNOS */
847 
848 /*
849  * Return status information about a file descriptor.
850  */
851 #ifndef _SYS_SYSPROTO_H_
852 struct fstat_args {
853 	int	fd;
854 	struct	stat *sb;
855 };
856 #endif
857 /*
858  * MPSAFE
859  */
860 /* ARGSUSED */
861 int
862 fstat(td, uap)
863 	struct thread *td;
864 	struct fstat_args *uap;
865 {
866 	struct file *fp;
867 	struct stat ub;
868 	int error;
869 
870 	mtx_lock(&Giant);
871 	if ((error = fget(td, uap->fd, &fp)) != 0)
872 		goto done2;
873 	error = fo_stat(fp, &ub, td->td_ucred, td);
874 	if (error == 0)
875 		error = copyout(&ub, uap->sb, sizeof (ub));
876 	fdrop(fp, td);
877 done2:
878 	mtx_unlock(&Giant);
879 	return (error);
880 }
881 
882 /*
883  * Return status information about a file descriptor.
884  */
885 #ifndef _SYS_SYSPROTO_H_
886 struct nfstat_args {
887 	int	fd;
888 	struct	nstat *sb;
889 };
890 #endif
891 /*
892  * MPSAFE
893  */
894 /* ARGSUSED */
895 int
896 nfstat(td, uap)
897 	struct thread *td;
898 	register struct nfstat_args *uap;
899 {
900 	struct file *fp;
901 	struct stat ub;
902 	struct nstat nub;
903 	int error;
904 
905 	mtx_lock(&Giant);
906 	if ((error = fget(td, uap->fd, &fp)) != 0)
907 		goto done2;
908 	error = fo_stat(fp, &ub, td->td_ucred, td);
909 	if (error == 0) {
910 		cvtnstat(&ub, &nub);
911 		error = copyout(&nub, uap->sb, sizeof (nub));
912 	}
913 	fdrop(fp, td);
914 done2:
915 	mtx_unlock(&Giant);
916 	return (error);
917 }
918 
919 /*
920  * Return pathconf information about a file descriptor.
921  */
922 #ifndef _SYS_SYSPROTO_H_
923 struct fpathconf_args {
924 	int	fd;
925 	int	name;
926 };
927 #endif
928 /*
929  * MPSAFE
930  */
931 /* ARGSUSED */
932 int
933 fpathconf(td, uap)
934 	struct thread *td;
935 	register struct fpathconf_args *uap;
936 {
937 	struct file *fp;
938 	struct vnode *vp;
939 	int error;
940 
941 	if ((error = fget(td, uap->fd, &fp)) != 0)
942 		return (error);
943 
944 	switch (fp->f_type) {
945 	case DTYPE_PIPE:
946 	case DTYPE_SOCKET:
947 		if (uap->name != _PC_PIPE_BUF) {
948 			error = EINVAL;
949 		} else {
950 			td->td_retval[0] = PIPE_BUF;
951 			error = 0;
952 		}
953 		break;
954 	case DTYPE_FIFO:
955 	case DTYPE_VNODE:
956 		vp = (struct vnode *)fp->f_data;
957 		mtx_lock(&Giant);
958 		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
959 		mtx_unlock(&Giant);
960 		break;
961 	default:
962 		error = EOPNOTSUPP;
963 		break;
964 	}
965 	fdrop(fp, td);
966 	return(error);
967 }
968 
969 /*
970  * Allocate a file descriptor for the process.
971  */
972 static int fdexpand;
973 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
974 
975 int
976 fdalloc(td, want, result)
977 	struct thread *td;
978 	int want;
979 	int *result;
980 {
981 	struct proc *p = td->td_proc;
982 	register struct filedesc *fdp = td->td_proc->p_fd;
983 	register int i;
984 	int lim, last, nfiles;
985 	struct file **newofile, **oldofile;
986 	char *newofileflags;
987 
988 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
989 
990 	/*
991 	 * Search for a free descriptor starting at the higher
992 	 * of want or fd_freefile.  If that fails, consider
993 	 * expanding the ofile array.
994 	 */
995 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
996 	for (;;) {
997 		last = min(fdp->fd_nfiles, lim);
998 		if ((i = want) < fdp->fd_freefile)
999 			i = fdp->fd_freefile;
1000 		for (; i < last; i++) {
1001 			if (fdp->fd_ofiles[i] == NULL) {
1002 				fdp->fd_ofileflags[i] = 0;
1003 				if (i > fdp->fd_lastfile)
1004 					fdp->fd_lastfile = i;
1005 				if (want <= fdp->fd_freefile)
1006 					fdp->fd_freefile = i;
1007 				*result = i;
1008 				return (0);
1009 			}
1010 		}
1011 
1012 		/*
1013 		 * No space in current array.  Expand?
1014 		 */
1015 		if (fdp->fd_nfiles >= lim)
1016 			return (EMFILE);
1017 		if (fdp->fd_nfiles < NDEXTENT)
1018 			nfiles = NDEXTENT;
1019 		else
1020 			nfiles = 2 * fdp->fd_nfiles;
1021 		FILEDESC_UNLOCK(fdp);
1022 		mtx_lock(&Giant);
1023 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
1024 		    M_FILEDESC, M_WAITOK);
1025 		mtx_unlock(&Giant);
1026 		FILEDESC_LOCK(fdp);
1027 
1028 		/*
1029 		 * deal with file-table extend race that might have occured
1030 		 * when malloc was blocked.
1031 		 */
1032 		if (fdp->fd_nfiles >= nfiles) {
1033 			FILEDESC_UNLOCK(fdp);
1034 			mtx_lock(&Giant);
1035 			FREE(newofile, M_FILEDESC);
1036 			mtx_unlock(&Giant);
1037 			FILEDESC_LOCK(fdp);
1038 			continue;
1039 		}
1040 		newofileflags = (char *) &newofile[nfiles];
1041 		/*
1042 		 * Copy the existing ofile and ofileflags arrays
1043 		 * and zero the new portion of each array.
1044 		 */
1045 		bcopy(fdp->fd_ofiles, newofile,
1046 			(i = sizeof(struct file *) * fdp->fd_nfiles));
1047 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
1048 		bcopy(fdp->fd_ofileflags, newofileflags,
1049 			(i = sizeof(char) * fdp->fd_nfiles));
1050 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
1051 		if (fdp->fd_nfiles > NDFILE)
1052 			oldofile = fdp->fd_ofiles;
1053 		else
1054 			oldofile = NULL;
1055 		fdp->fd_ofiles = newofile;
1056 		fdp->fd_ofileflags = newofileflags;
1057 		fdp->fd_nfiles = nfiles;
1058 		fdexpand++;
1059 		if (oldofile != NULL) {
1060 			FILEDESC_UNLOCK(fdp);
1061 			mtx_lock(&Giant);
1062 			FREE(oldofile, M_FILEDESC);
1063 			mtx_unlock(&Giant);
1064 			FILEDESC_LOCK(fdp);
1065 		}
1066 	}
1067 	return (0);
1068 }
1069 
1070 /*
1071  * Check to see whether n user file descriptors
1072  * are available to the process p.
1073  */
1074 int
1075 fdavail(td, n)
1076 	struct thread *td;
1077 	register int n;
1078 {
1079 	struct proc *p = td->td_proc;
1080 	register struct filedesc *fdp = td->td_proc->p_fd;
1081 	register struct file **fpp;
1082 	register int i, lim, last;
1083 
1084 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1085 
1086 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1087 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1088 		return (1);
1089 
1090 	last = min(fdp->fd_nfiles, lim);
1091 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1092 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1093 		if (*fpp == NULL && --n <= 0)
1094 			return (1);
1095 	}
1096 	return (0);
1097 }
1098 
1099 /*
1100  * Create a new open file structure and allocate
1101  * a file decriptor for the process that refers to it.
1102  */
1103 int
1104 falloc(td, resultfp, resultfd)
1105 	register struct thread *td;
1106 	struct file **resultfp;
1107 	int *resultfd;
1108 {
1109 	struct proc *p = td->td_proc;
1110 	register struct file *fp, *fq;
1111 	int error, i;
1112 
1113 	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
1114 	sx_xlock(&filelist_lock);
1115 	if (nfiles >= maxfiles) {
1116 		sx_xunlock(&filelist_lock);
1117 		uma_zfree(file_zone, fp);
1118 		tablefull("file");
1119 		return (ENFILE);
1120 	}
1121 	nfiles++;
1122 
1123 	/*
1124 	 * If the process has file descriptor zero open, add the new file
1125 	 * descriptor to the list of open files at that point, otherwise
1126 	 * put it at the front of the list of open files.
1127 	 */
1128 	FILEDESC_LOCK(p->p_fd);
1129 	if ((error = fdalloc(td, 0, &i))) {
1130 		FILEDESC_UNLOCK(p->p_fd);
1131 		nfiles--;
1132 		sx_xunlock(&filelist_lock);
1133 		uma_zfree(file_zone, fp);
1134 		return (error);
1135 	}
1136 	fp->f_mtxp = mtx_pool_alloc();
1137 	fp->f_gcflag = 0;
1138 	fp->f_count = 1;
1139 	fp->f_cred = crhold(td->td_ucred);
1140 	fp->f_ops = &badfileops;
1141 	fp->f_seqcount = 1;
1142 	if ((fq = p->p_fd->fd_ofiles[0])) {
1143 		LIST_INSERT_AFTER(fq, fp, f_list);
1144 	} else {
1145 		LIST_INSERT_HEAD(&filehead, fp, f_list);
1146 	}
1147 	p->p_fd->fd_ofiles[i] = fp;
1148 	FILEDESC_UNLOCK(p->p_fd);
1149 	sx_xunlock(&filelist_lock);
1150 	if (resultfp)
1151 		*resultfp = fp;
1152 	if (resultfd)
1153 		*resultfd = i;
1154 	return (0);
1155 }
1156 
1157 /*
1158  * Free a file descriptor.
1159  */
1160 void
1161 ffree(fp)
1162 	register struct file *fp;
1163 {
1164 
1165 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1166 	sx_xlock(&filelist_lock);
1167 	LIST_REMOVE(fp, f_list);
1168 	nfiles--;
1169 	sx_xunlock(&filelist_lock);
1170 	crfree(fp->f_cred);
1171 	uma_zfree(file_zone, fp);
1172 }
1173 
1174 /*
1175  * Build a new filedesc structure.
1176  */
1177 struct filedesc *
1178 fdinit(td)
1179 	struct thread *td;
1180 {
1181 	register struct filedesc0 *newfdp;
1182 	register struct filedesc *fdp = td->td_proc->p_fd;
1183 
1184 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1185 	    M_FILEDESC, M_WAITOK | M_ZERO);
1186 	mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1187 	FILEDESC_LOCK(&newfdp->fd_fd);
1188 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1189 	if (newfdp->fd_fd.fd_cdir)
1190 		VREF(newfdp->fd_fd.fd_cdir);
1191 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1192 	if (newfdp->fd_fd.fd_rdir)
1193 		VREF(newfdp->fd_fd.fd_rdir);
1194 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1195 	if (newfdp->fd_fd.fd_jdir)
1196 		VREF(newfdp->fd_fd.fd_jdir);
1197 
1198 	/* Create the file descriptor table. */
1199 	newfdp->fd_fd.fd_refcnt = 1;
1200 	newfdp->fd_fd.fd_cmask = cmask;
1201 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1202 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1203 	newfdp->fd_fd.fd_nfiles = NDFILE;
1204 	newfdp->fd_fd.fd_knlistsize = -1;
1205 	FILEDESC_UNLOCK(&newfdp->fd_fd);
1206 
1207 	return (&newfdp->fd_fd);
1208 }
1209 
1210 /*
1211  * Share a filedesc structure.
1212  */
1213 struct filedesc *
1214 fdshare(p)
1215 	struct proc *p;
1216 {
1217 	FILEDESC_LOCK(p->p_fd);
1218 	p->p_fd->fd_refcnt++;
1219 	FILEDESC_UNLOCK(p->p_fd);
1220 	return (p->p_fd);
1221 }
1222 
1223 /*
1224  * Copy a filedesc structure.
1225  */
1226 struct filedesc *
1227 fdcopy(td)
1228 	struct thread *td;
1229 {
1230 	register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1231 	register struct file **fpp;
1232 	register int i, j;
1233 
1234 	/* Certain daemons might not have file descriptors. */
1235 	if (fdp == NULL)
1236 		return (NULL);
1237 
1238 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1239 
1240 	FILEDESC_UNLOCK(fdp);
1241 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1242 	    M_FILEDESC, M_WAITOK);
1243 	FILEDESC_LOCK(fdp);
1244 	bcopy(fdp, newfdp, sizeof(struct filedesc));
1245 	FILEDESC_UNLOCK(fdp);
1246 	bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1247 	mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1248 	if (newfdp->fd_cdir)
1249 		VREF(newfdp->fd_cdir);
1250 	if (newfdp->fd_rdir)
1251 		VREF(newfdp->fd_rdir);
1252 	if (newfdp->fd_jdir)
1253 		VREF(newfdp->fd_jdir);
1254 	newfdp->fd_refcnt = 1;
1255 
1256 	/*
1257 	 * If the number of open files fits in the internal arrays
1258 	 * of the open file structure, use them, otherwise allocate
1259 	 * additional memory for the number of descriptors currently
1260 	 * in use.
1261 	 */
1262 	FILEDESC_LOCK(fdp);
1263 	newfdp->fd_lastfile = fdp->fd_lastfile;
1264 	newfdp->fd_nfiles = fdp->fd_nfiles;
1265 	if (newfdp->fd_lastfile < NDFILE) {
1266 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1267 		newfdp->fd_ofileflags =
1268 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1269 		i = NDFILE;
1270 	} else {
1271 		/*
1272 		 * Compute the smallest multiple of NDEXTENT needed
1273 		 * for the file descriptors currently in use,
1274 		 * allowing the table to shrink.
1275 		 */
1276 retry:
1277 		i = newfdp->fd_nfiles;
1278 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1279 			i /= 2;
1280 		FILEDESC_UNLOCK(fdp);
1281 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1282 		    M_FILEDESC, M_WAITOK);
1283 		FILEDESC_LOCK(fdp);
1284 		newfdp->fd_lastfile = fdp->fd_lastfile;
1285 		newfdp->fd_nfiles = fdp->fd_nfiles;
1286 		j = newfdp->fd_nfiles;
1287 		while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1288 			j /= 2;
1289 		if (i != j) {
1290 			/*
1291 			 * The size of the original table has changed.
1292 			 * Go over once again.
1293 			 */
1294 			FILEDESC_UNLOCK(fdp);
1295 			FREE(newfdp->fd_ofiles, M_FILEDESC);
1296 			FILEDESC_LOCK(fdp);
1297 			newfdp->fd_lastfile = fdp->fd_lastfile;
1298 			newfdp->fd_nfiles = fdp->fd_nfiles;
1299 			goto retry;
1300 		}
1301 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1302 	}
1303 	newfdp->fd_nfiles = i;
1304 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1305 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1306 
1307 	/*
1308 	 * kq descriptors cannot be copied.
1309 	 */
1310 	if (newfdp->fd_knlistsize != -1) {
1311 		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1312 		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1313 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1314 				*fpp = NULL;
1315 				if (i < newfdp->fd_freefile)
1316 					newfdp->fd_freefile = i;
1317 			}
1318 			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1319 				newfdp->fd_lastfile--;
1320 		}
1321 		newfdp->fd_knlist = NULL;
1322 		newfdp->fd_knlistsize = -1;
1323 		newfdp->fd_knhash = NULL;
1324 		newfdp->fd_knhashmask = 0;
1325 	}
1326 
1327 	fpp = newfdp->fd_ofiles;
1328 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1329 		if (*fpp != NULL) {
1330 			fhold(*fpp);
1331 		}
1332 	}
1333 	return (newfdp);
1334 }
1335 
1336 /*
1337  * Release a filedesc structure.
1338  */
1339 void
1340 fdfree(td)
1341 	struct thread *td;
1342 {
1343 	register struct filedesc *fdp;
1344 	struct file **fpp;
1345 	register int i;
1346 
1347 	fdp = td->td_proc->p_fd;
1348 	/* Certain daemons might not have file descriptors. */
1349 	if (fdp == NULL)
1350 		return;
1351 
1352 	FILEDESC_LOCK(fdp);
1353 	if (--fdp->fd_refcnt > 0) {
1354 		FILEDESC_UNLOCK(fdp);
1355 		return;
1356 	}
1357 	/*
1358 	 * we are the last reference to the structure, we can
1359 	 * safely assume it will not change out from under us.
1360 	 */
1361 	FILEDESC_UNLOCK(fdp);
1362 	fpp = fdp->fd_ofiles;
1363 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1364 		if (*fpp)
1365 			(void) closef(*fpp, td);
1366 	}
1367 
1368 	PROC_LOCK(td->td_proc);
1369 	td->td_proc->p_fd = NULL;
1370 	PROC_UNLOCK(td->td_proc);
1371 
1372 	if (fdp->fd_nfiles > NDFILE)
1373 		FREE(fdp->fd_ofiles, M_FILEDESC);
1374 	if (fdp->fd_cdir)
1375 		vrele(fdp->fd_cdir);
1376 	if (fdp->fd_rdir)
1377 		vrele(fdp->fd_rdir);
1378 	if (fdp->fd_jdir)
1379 		vrele(fdp->fd_jdir);
1380 	if (fdp->fd_knlist)
1381 		FREE(fdp->fd_knlist, M_KQUEUE);
1382 	if (fdp->fd_knhash)
1383 		FREE(fdp->fd_knhash, M_KQUEUE);
1384 	mtx_destroy(&fdp->fd_mtx);
1385 	FREE(fdp, M_FILEDESC);
1386 }
1387 
1388 /*
1389  * For setugid programs, we don't want to people to use that setugidness
1390  * to generate error messages which write to a file which otherwise would
1391  * otherwise be off-limits to the process.
1392  *
1393  * This is a gross hack to plug the hole.  A better solution would involve
1394  * a special vop or other form of generalized access control mechanism.  We
1395  * go ahead and just reject all procfs filesystems accesses as dangerous.
1396  *
1397  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1398  * sufficient.  We also don't for check setugidness since we know we are.
1399  */
1400 static int
1401 is_unsafe(struct file *fp)
1402 {
1403 	if (fp->f_type == DTYPE_VNODE &&
1404 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1405 		return (1);
1406 	return (0);
1407 }
1408 
1409 /*
1410  * Make this setguid thing safe, if at all possible.
1411  */
1412 void
1413 setugidsafety(td)
1414 	struct thread *td;
1415 {
1416 	struct filedesc *fdp = td->td_proc->p_fd;
1417 	register int i;
1418 
1419 	/* Certain daemons might not have file descriptors. */
1420 	if (fdp == NULL)
1421 		return;
1422 
1423 	/*
1424 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1425 	 * we are blocked in a close.  Be careful!
1426 	 */
1427 	FILEDESC_LOCK(fdp);
1428 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1429 		if (i > 2)
1430 			break;
1431 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1432 			struct file *fp;
1433 
1434 #if 0
1435 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1436 				(void) munmapfd(td, i);
1437 #endif
1438 			if (i < fdp->fd_knlistsize) {
1439 				FILEDESC_UNLOCK(fdp);
1440 				knote_fdclose(td, i);
1441 				FILEDESC_LOCK(fdp);
1442 			}
1443 			/*
1444 			 * NULL-out descriptor prior to close to avoid
1445 			 * a race while close blocks.
1446 			 */
1447 			fp = fdp->fd_ofiles[i];
1448 			fdp->fd_ofiles[i] = NULL;
1449 			fdp->fd_ofileflags[i] = 0;
1450 			if (i < fdp->fd_freefile)
1451 				fdp->fd_freefile = i;
1452 			FILEDESC_UNLOCK(fdp);
1453 			(void) closef(fp, td);
1454 			FILEDESC_LOCK(fdp);
1455 		}
1456 	}
1457 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1458 		fdp->fd_lastfile--;
1459 	FILEDESC_UNLOCK(fdp);
1460 }
1461 
1462 /*
1463  * Close any files on exec?
1464  */
1465 void
1466 fdcloseexec(td)
1467 	struct thread *td;
1468 {
1469 	struct filedesc *fdp = td->td_proc->p_fd;
1470 	register int i;
1471 
1472 	/* Certain daemons might not have file descriptors. */
1473 	if (fdp == NULL)
1474 		return;
1475 
1476 	FILEDESC_LOCK(fdp);
1477 
1478 	/*
1479 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1480 	 * may block and rip them out from under us.
1481 	 */
1482 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1483 		if (fdp->fd_ofiles[i] != NULL &&
1484 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1485 			struct file *fp;
1486 
1487 #if 0
1488 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1489 				(void) munmapfd(td, i);
1490 #endif
1491 			if (i < fdp->fd_knlistsize) {
1492 				FILEDESC_UNLOCK(fdp);
1493 				knote_fdclose(td, i);
1494 				FILEDESC_LOCK(fdp);
1495 			}
1496 			/*
1497 			 * NULL-out descriptor prior to close to avoid
1498 			 * a race while close blocks.
1499 			 */
1500 			fp = fdp->fd_ofiles[i];
1501 			fdp->fd_ofiles[i] = NULL;
1502 			fdp->fd_ofileflags[i] = 0;
1503 			if (i < fdp->fd_freefile)
1504 				fdp->fd_freefile = i;
1505 			FILEDESC_UNLOCK(fdp);
1506 			(void) closef(fp, td);
1507 			FILEDESC_LOCK(fdp);
1508 		}
1509 	}
1510 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1511 		fdp->fd_lastfile--;
1512 	FILEDESC_UNLOCK(fdp);
1513 }
1514 
1515 /*
1516  * It is unsafe for set[ug]id processes to be started with file
1517  * descriptors 0..2 closed, as these descriptors are given implicit
1518  * significance in the Standard C library.  fdcheckstd() will create a
1519  * descriptor referencing /dev/null for each of stdin, stdout, and
1520  * stderr that is not already open.
1521  */
1522 int
1523 fdcheckstd(td)
1524 	struct thread *td;
1525 {
1526 	struct nameidata nd;
1527 	struct filedesc *fdp;
1528 	struct file *fp;
1529 	register_t retval;
1530 	int fd, i, error, flags, devnull;
1531 
1532 	fdp = td->td_proc->p_fd;
1533 	if (fdp == NULL)
1534 		return (0);
1535 	devnull = -1;
1536 	error = 0;
1537 	for (i = 0; i < 3; i++) {
1538 		if (fdp->fd_ofiles[i] != NULL)
1539 			continue;
1540 		if (devnull < 0) {
1541 			error = falloc(td, &fp, &fd);
1542 			if (error != 0)
1543 				break;
1544 			NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1545 			    td);
1546 			flags = FREAD | FWRITE;
1547 			error = vn_open(&nd, &flags, 0);
1548 			if (error != 0) {
1549 				FILEDESC_LOCK(fdp);
1550 				fdp->fd_ofiles[i] = NULL;
1551 				FILEDESC_UNLOCK(fdp);
1552 				fdrop(fp, td);
1553 				break;
1554 			}
1555 			NDFREE(&nd, NDF_ONLY_PNBUF);
1556 			fp->f_data = nd.ni_vp;
1557 			fp->f_flag = flags;
1558 			fp->f_ops = &vnops;
1559 			fp->f_type = DTYPE_VNODE;
1560 			VOP_UNLOCK(nd.ni_vp, 0, td);
1561 			devnull = fd;
1562 		} else {
1563 			FILEDESC_LOCK(fdp);
1564 			error = fdalloc(td, 0, &fd);
1565 			if (error != 0) {
1566 				FILEDESC_UNLOCK(fdp);
1567 				break;
1568 			}
1569 			error = do_dup(fdp, devnull, fd, &retval, td);
1570 			if (error != 0)
1571 				break;
1572 		}
1573 	}
1574 	return (error);
1575 }
1576 
1577 /*
1578  * Internal form of close.
1579  * Decrement reference count on file structure.
1580  * Note: td may be NULL when closing a file
1581  * that was being passed in a message.
1582  */
1583 int
1584 closef(fp, td)
1585 	register struct file *fp;
1586 	register struct thread *td;
1587 {
1588 	struct vnode *vp;
1589 	struct flock lf;
1590 
1591 	if (fp == NULL)
1592 		return (0);
1593 	/*
1594 	 * POSIX record locking dictates that any close releases ALL
1595 	 * locks owned by this process.  This is handled by setting
1596 	 * a flag in the unlock to free ONLY locks obeying POSIX
1597 	 * semantics, and not to free BSD-style file locks.
1598 	 * If the descriptor was in a message, POSIX-style locks
1599 	 * aren't passed with the descriptor.
1600 	 */
1601 	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1602 	    fp->f_type == DTYPE_VNODE) {
1603 		lf.l_whence = SEEK_SET;
1604 		lf.l_start = 0;
1605 		lf.l_len = 0;
1606 		lf.l_type = F_UNLCK;
1607 		vp = (struct vnode *)fp->f_data;
1608 		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1609 		    F_UNLCK, &lf, F_POSIX);
1610 	}
1611 	return (fdrop(fp, td));
1612 }
1613 
1614 /*
1615  * Drop reference on struct file passed in, may call closef if the
1616  * reference hits zero.
1617  */
1618 int
1619 fdrop(fp, td)
1620 	struct file *fp;
1621 	struct thread *td;
1622 {
1623 
1624 	FILE_LOCK(fp);
1625 	return (fdrop_locked(fp, td));
1626 }
1627 
1628 /*
1629  * Extract the file pointer associated with the specified descriptor for
1630  * the current user process.
1631  *
1632  * If the descriptor doesn't exist, EBADF is returned.
1633  *
1634  * If the descriptor exists but doesn't match 'flags' then
1635  * return EBADF for read attempts and EINVAL for write attempts.
1636  *
1637  * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1638  * It should be droped with fdrop().
1639  * If it is not set, then the refcount will not be bumped however the
1640  * thread's filedesc struct will be returned locked (for fgetsock).
1641  *
1642  * If an error occured the non-zero error is returned and *fpp is set to NULL.
1643  * Otherwise *fpp is set and zero is returned.
1644  */
1645 static __inline
1646 int
1647 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1648 {
1649 	struct filedesc *fdp;
1650 	struct file *fp;
1651 
1652 	*fpp = NULL;
1653 	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1654 		return(EBADF);
1655 	FILEDESC_LOCK(fdp);
1656 	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1657 		FILEDESC_UNLOCK(fdp);
1658 		return(EBADF);
1659 	}
1660 
1661 	/*
1662 	 * Note: FREAD failures returns EBADF to maintain backwards
1663 	 * compatibility with what routines returned before.
1664 	 *
1665 	 * Only one flag, or 0, may be specified.
1666 	 */
1667 	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1668 		FILEDESC_UNLOCK(fdp);
1669 		return(EBADF);
1670 	}
1671 	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1672 		FILEDESC_UNLOCK(fdp);
1673 		return(EINVAL);
1674 	}
1675 	if (hold) {
1676 		fhold(fp);
1677 		FILEDESC_UNLOCK(fdp);
1678 	}
1679 	*fpp = fp;
1680 	return(0);
1681 }
1682 
1683 int
1684 fget(struct thread *td, int fd, struct file **fpp)
1685 {
1686     return(_fget(td, fd, fpp, 0, 1));
1687 }
1688 
1689 int
1690 fget_read(struct thread *td, int fd, struct file **fpp)
1691 {
1692     return(_fget(td, fd, fpp, FREAD, 1));
1693 }
1694 
1695 int
1696 fget_write(struct thread *td, int fd, struct file **fpp)
1697 {
1698     return(_fget(td, fd, fpp, FWRITE, 1));
1699 }
1700 
1701 /*
1702  * Like fget() but loads the underlying vnode, or returns an error if
1703  * the descriptor does not represent a vnode.  Note that pipes use vnodes
1704  * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1705  * error).  The returned vnode will be vref()d.
1706  */
1707 
1708 static __inline
1709 int
1710 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1711 {
1712 	struct file *fp;
1713 	int error;
1714 
1715 	*vpp = NULL;
1716 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1717 		return (error);
1718 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1719 		error = EINVAL;
1720 	} else {
1721 		*vpp = (struct vnode *)fp->f_data;
1722 		vref(*vpp);
1723 	}
1724 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1725 	return (error);
1726 }
1727 
1728 int
1729 fgetvp(struct thread *td, int fd, struct vnode **vpp)
1730 {
1731 	return(_fgetvp(td, fd, vpp, 0));
1732 }
1733 
1734 int
1735 fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1736 {
1737 	return(_fgetvp(td, fd, vpp, FREAD));
1738 }
1739 
1740 int
1741 fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1742 {
1743 	return(_fgetvp(td, fd, vpp, FWRITE));
1744 }
1745 
1746 /*
1747  * Like fget() but loads the underlying socket, or returns an error if
1748  * the descriptor does not represent a socket.
1749  *
1750  * We bump the ref count on the returned socket.  XXX Also obtain the SX lock in
1751  * the future.
1752  */
1753 int
1754 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1755 {
1756 	struct file *fp;
1757 	int error;
1758 
1759 	*spp = NULL;
1760 	if (fflagp)
1761 		*fflagp = 0;
1762 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1763 		return (error);
1764 	if (fp->f_type != DTYPE_SOCKET) {
1765 		error = ENOTSOCK;
1766 	} else {
1767 		*spp = (struct socket *)fp->f_data;
1768 		if (fflagp)
1769 			*fflagp = fp->f_flag;
1770 		soref(*spp);
1771 	}
1772 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1773 	return(error);
1774 }
1775 
1776 /*
1777  * Drop the reference count on the the socket and XXX release the SX lock in
1778  * the future.  The last reference closes the socket.
1779  */
1780 void
1781 fputsock(struct socket *so)
1782 {
1783 	sorele(so);
1784 }
1785 
1786 /*
1787  * Drop reference on struct file passed in, may call closef if the
1788  * reference hits zero.
1789  * Expects struct file locked, and will unlock it.
1790  */
1791 int
1792 fdrop_locked(fp, td)
1793 	struct file *fp;
1794 	struct thread *td;
1795 {
1796 	struct flock lf;
1797 	struct vnode *vp;
1798 	int error;
1799 
1800 	FILE_LOCK_ASSERT(fp, MA_OWNED);
1801 
1802 	if (--fp->f_count > 0) {
1803 		FILE_UNLOCK(fp);
1804 		return (0);
1805 	}
1806 	mtx_lock(&Giant);
1807 	if (fp->f_count < 0)
1808 		panic("fdrop: count < 0");
1809 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1810 		lf.l_whence = SEEK_SET;
1811 		lf.l_start = 0;
1812 		lf.l_len = 0;
1813 		lf.l_type = F_UNLCK;
1814 		vp = (struct vnode *)fp->f_data;
1815 		FILE_UNLOCK(fp);
1816 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1817 	} else
1818 		FILE_UNLOCK(fp);
1819 	if (fp->f_ops != &badfileops)
1820 		error = fo_close(fp, td);
1821 	else
1822 		error = 0;
1823 	ffree(fp);
1824 	mtx_unlock(&Giant);
1825 	return (error);
1826 }
1827 
1828 /*
1829  * Apply an advisory lock on a file descriptor.
1830  *
1831  * Just attempt to get a record lock of the requested type on
1832  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1833  */
1834 #ifndef _SYS_SYSPROTO_H_
1835 struct flock_args {
1836 	int	fd;
1837 	int	how;
1838 };
1839 #endif
1840 /*
1841  * MPSAFE
1842  */
1843 /* ARGSUSED */
1844 int
1845 flock(td, uap)
1846 	struct thread *td;
1847 	register struct flock_args *uap;
1848 {
1849 	struct file *fp;
1850 	struct vnode *vp;
1851 	struct flock lf;
1852 	int error;
1853 
1854 	if ((error = fget(td, uap->fd, &fp)) != 0)
1855 		return (error);
1856 	if (fp->f_type != DTYPE_VNODE) {
1857 		fdrop(fp, td);
1858 		return (EOPNOTSUPP);
1859 	}
1860 
1861 	mtx_lock(&Giant);
1862 	vp = (struct vnode *)fp->f_data;
1863 	lf.l_whence = SEEK_SET;
1864 	lf.l_start = 0;
1865 	lf.l_len = 0;
1866 	if (uap->how & LOCK_UN) {
1867 		lf.l_type = F_UNLCK;
1868 		FILE_LOCK(fp);
1869 		fp->f_flag &= ~FHASLOCK;
1870 		FILE_UNLOCK(fp);
1871 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1872 		goto done2;
1873 	}
1874 	if (uap->how & LOCK_EX)
1875 		lf.l_type = F_WRLCK;
1876 	else if (uap->how & LOCK_SH)
1877 		lf.l_type = F_RDLCK;
1878 	else {
1879 		error = EBADF;
1880 		goto done2;
1881 	}
1882 	FILE_LOCK(fp);
1883 	fp->f_flag |= FHASLOCK;
1884 	FILE_UNLOCK(fp);
1885 	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1886 	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1887 done2:
1888 	fdrop(fp, td);
1889 	mtx_unlock(&Giant);
1890 	return (error);
1891 }
1892 
1893 /*
1894  * File Descriptor pseudo-device driver (/dev/fd/).
1895  *
1896  * Opening minor device N dup()s the file (if any) connected to file
1897  * descriptor N belonging to the calling process.  Note that this driver
1898  * consists of only the ``open()'' routine, because all subsequent
1899  * references to this file will be direct to the other driver.
1900  */
1901 /* ARGSUSED */
1902 static int
1903 fdopen(dev, mode, type, td)
1904 	dev_t dev;
1905 	int mode, type;
1906 	struct thread *td;
1907 {
1908 
1909 	/*
1910 	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1911 	 * the file descriptor being sought for duplication. The error
1912 	 * return ensures that the vnode for this device will be released
1913 	 * by vn_open. Open will detect this special error and take the
1914 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1915 	 * will simply report the error.
1916 	 */
1917 	td->td_dupfd = dev2unit(dev);
1918 	return (ENODEV);
1919 }
1920 
1921 /*
1922  * Duplicate the specified descriptor to a free descriptor.
1923  */
1924 int
1925 dupfdopen(td, fdp, indx, dfd, mode, error)
1926 	struct thread *td;
1927 	struct filedesc *fdp;
1928 	int indx, dfd;
1929 	int mode;
1930 	int error;
1931 {
1932 	register struct file *wfp;
1933 	struct file *fp;
1934 
1935 	/*
1936 	 * If the to-be-dup'd fd number is greater than the allowed number
1937 	 * of file descriptors, or the fd to be dup'd has already been
1938 	 * closed, then reject.
1939 	 */
1940 	FILEDESC_LOCK(fdp);
1941 	if ((u_int)dfd >= fdp->fd_nfiles ||
1942 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1943 		FILEDESC_UNLOCK(fdp);
1944 		return (EBADF);
1945 	}
1946 
1947 	/*
1948 	 * There are two cases of interest here.
1949 	 *
1950 	 * For ENODEV simply dup (dfd) to file descriptor
1951 	 * (indx) and return.
1952 	 *
1953 	 * For ENXIO steal away the file structure from (dfd) and
1954 	 * store it in (indx).  (dfd) is effectively closed by
1955 	 * this operation.
1956 	 *
1957 	 * Any other error code is just returned.
1958 	 */
1959 	switch (error) {
1960 	case ENODEV:
1961 		/*
1962 		 * Check that the mode the file is being opened for is a
1963 		 * subset of the mode of the existing descriptor.
1964 		 */
1965 		FILE_LOCK(wfp);
1966 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1967 			FILE_UNLOCK(wfp);
1968 			FILEDESC_UNLOCK(fdp);
1969 			return (EACCES);
1970 		}
1971 		fp = fdp->fd_ofiles[indx];
1972 #if 0
1973 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1974 			(void) munmapfd(td, indx);
1975 #endif
1976 		fdp->fd_ofiles[indx] = wfp;
1977 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1978 		fhold_locked(wfp);
1979 		FILE_UNLOCK(wfp);
1980 		if (indx > fdp->fd_lastfile)
1981 			fdp->fd_lastfile = indx;
1982 		if (fp != NULL)
1983 			FILE_LOCK(fp);
1984 		FILEDESC_UNLOCK(fdp);
1985 		/*
1986 		 * we now own the reference to fp that the ofiles[] array
1987 		 * used to own.  Release it.
1988 		 */
1989 		if (fp != NULL)
1990 			fdrop_locked(fp, td);
1991 		return (0);
1992 
1993 	case ENXIO:
1994 		/*
1995 		 * Steal away the file pointer from dfd, and stuff it into indx.
1996 		 */
1997 		fp = fdp->fd_ofiles[indx];
1998 #if 0
1999 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2000 			(void) munmapfd(td, indx);
2001 #endif
2002 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
2003 		fdp->fd_ofiles[dfd] = NULL;
2004 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2005 		fdp->fd_ofileflags[dfd] = 0;
2006 
2007 		/*
2008 		 * Complete the clean up of the filedesc structure by
2009 		 * recomputing the various hints.
2010 		 */
2011 		if (indx > fdp->fd_lastfile) {
2012 			fdp->fd_lastfile = indx;
2013 		} else {
2014 			while (fdp->fd_lastfile > 0 &&
2015 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2016 				fdp->fd_lastfile--;
2017 			}
2018 			if (dfd < fdp->fd_freefile)
2019 				fdp->fd_freefile = dfd;
2020 		}
2021 		if (fp != NULL)
2022 			FILE_LOCK(fp);
2023 		FILEDESC_UNLOCK(fdp);
2024 
2025 		/*
2026 		 * we now own the reference to fp that the ofiles[] array
2027 		 * used to own.  Release it.
2028 		 */
2029 		if (fp != NULL)
2030 			fdrop_locked(fp, td);
2031 		return (0);
2032 
2033 	default:
2034 		FILEDESC_UNLOCK(fdp);
2035 		return (error);
2036 	}
2037 	/* NOTREACHED */
2038 }
2039 
2040 /*
2041  * Get file structures.
2042  */
2043 static int
2044 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2045 {
2046 	struct proc *p;
2047 	struct filedesc *fdp;
2048 	struct file *fp;
2049 	struct xfile xf;
2050 	int error, n;
2051 
2052 	sysctl_wire_old_buffer(req, 0);
2053 	if (!req->oldptr) {
2054 		n = 16; /* slight overestimate */
2055 		sx_slock(&filelist_lock);
2056 		LIST_FOREACH(fp, &filehead, f_list) {
2057 			/*
2058 			 * We should grab the lock, but this is an
2059 			 * estimate, so does it really matter?
2060 			 */
2061 			/* mtx_lock(fp->f_mtxp); */
2062 			n += fp->f_count;
2063 			/* mtx_unlock(f->f_mtxp); */
2064 		}
2065 		sx_sunlock(&filelist_lock);
2066 		return (SYSCTL_OUT(req, 0, n * sizeof xf));
2067 	}
2068 
2069 	error = 0;
2070 	bzero(&xf, sizeof xf);
2071 	xf.xf_size = sizeof xf;
2072 	sx_slock(&allproc_lock);
2073 	LIST_FOREACH(p, &allproc, p_list) {
2074 		PROC_LOCK(p);
2075 		xf.xf_pid = p->p_pid;
2076 		xf.xf_uid = p->p_ucred->cr_uid;
2077 		if ((fdp = p->p_fd) == NULL) {
2078 			PROC_UNLOCK(p);
2079 			continue;
2080 		}
2081 		FILEDESC_LOCK(fdp);
2082 		for (n = 0; n < fdp->fd_nfiles; ++n) {
2083 			if ((fp = fdp->fd_ofiles[n]) == NULL)
2084 				continue;
2085 			xf.xf_fd = n;
2086 			xf.xf_file = fp;
2087 #define XF_COPY(field) xf.xf_##field = fp->f_##field
2088 			XF_COPY(type);
2089 			XF_COPY(count);
2090 			XF_COPY(msgcount);
2091 			XF_COPY(offset);
2092 			XF_COPY(data);
2093 			XF_COPY(flag);
2094 #undef XF_COPY
2095 			error = SYSCTL_OUT(req, &xf, sizeof xf);
2096 			if (error)
2097 				break;
2098 		}
2099 		FILEDESC_UNLOCK(fdp);
2100 		PROC_UNLOCK(p);
2101 		if (error)
2102 			break;
2103 	}
2104 	sx_sunlock(&allproc_lock);
2105 	return (error);
2106 }
2107 
2108 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2109     0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
2110 
2111 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2112     &maxfilesperproc, 0, "Maximum files allowed open per process");
2113 
2114 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2115     &maxfiles, 0, "Maximum number of files");
2116 
2117 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2118     &nfiles, 0, "System-wide number of open files");
2119 
2120 static void
2121 fildesc_drvinit(void *unused)
2122 {
2123 	dev_t dev;
2124 
2125 	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
2126 	make_dev_alias(dev, "stdin");
2127 	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
2128 	make_dev_alias(dev, "stdout");
2129 	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
2130 	make_dev_alias(dev, "stderr");
2131 	if (!devfs_present) {
2132 		int fd;
2133 
2134 		for (fd = 3; fd < NUMFDESC; fd++)
2135 			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
2136 			    "fd/%d", fd);
2137 	}
2138 }
2139 
2140 struct fileops badfileops = {
2141 	badfo_readwrite,
2142 	badfo_readwrite,
2143 	badfo_ioctl,
2144 	badfo_poll,
2145 	badfo_kqfilter,
2146 	badfo_stat,
2147 	badfo_close
2148 };
2149 
2150 static int
2151 badfo_readwrite(fp, uio, active_cred, flags, td)
2152 	struct file *fp;
2153 	struct uio *uio;
2154 	struct ucred *active_cred;
2155 	struct thread *td;
2156 	int flags;
2157 {
2158 
2159 	return (EBADF);
2160 }
2161 
2162 static int
2163 badfo_ioctl(fp, com, data, active_cred, td)
2164 	struct file *fp;
2165 	u_long com;
2166 	void *data;
2167 	struct ucred *active_cred;
2168 	struct thread *td;
2169 {
2170 
2171 	return (EBADF);
2172 }
2173 
2174 static int
2175 badfo_poll(fp, events, active_cred, td)
2176 	struct file *fp;
2177 	int events;
2178 	struct ucred *active_cred;
2179 	struct thread *td;
2180 {
2181 
2182 	return (0);
2183 }
2184 
2185 static int
2186 badfo_kqfilter(fp, kn)
2187 	struct file *fp;
2188 	struct knote *kn;
2189 {
2190 
2191 	return (0);
2192 }
2193 
2194 static int
2195 badfo_stat(fp, sb, active_cred, td)
2196 	struct file *fp;
2197 	struct stat *sb;
2198 	struct ucred *active_cred;
2199 	struct thread *td;
2200 {
2201 
2202 	return (EBADF);
2203 }
2204 
2205 static int
2206 badfo_close(fp, td)
2207 	struct file *fp;
2208 	struct thread *td;
2209 {
2210 
2211 	return (EBADF);
2212 }
2213 
2214 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2215 					fildesc_drvinit,NULL)
2216 
2217 static void filelistinit(void *);
2218 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2219 
2220 /* ARGSUSED*/
2221 static void
2222 filelistinit(dummy)
2223 	void *dummy;
2224 {
2225 	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2226 	    NULL, NULL, UMA_ALIGN_PTR, 0);
2227 
2228 	sx_init(&filelist_lock, "filelist lock");
2229 	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2230 }
2231