xref: /freebsd/sys/kern/kern_descrip.c (revision 11f0b352e05306cf6f1f85e9087022c0a92624a3)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mutex.h>
49 #include <sys/sysproto.h>
50 #include <sys/conf.h>
51 #include <sys/filedesc.h>
52 #include <sys/kernel.h>
53 #include <sys/sysctl.h>
54 #include <sys/vnode.h>
55 #include <sys/proc.h>
56 #include <sys/namei.h>
57 #include <sys/file.h>
58 #include <sys/stat.h>
59 #include <sys/filio.h>
60 #include <sys/fcntl.h>
61 #include <sys/unistd.h>
62 #include <sys/resourcevar.h>
63 #include <sys/event.h>
64 #include <sys/sx.h>
65 #include <sys/socketvar.h>
66 #include <sys/signalvar.h>
67 
68 #include <machine/limits.h>
69 
70 #include <vm/vm.h>
71 #include <vm/vm_extern.h>
72 #include <vm/uma.h>
73 
74 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
75 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
76 
77 uma_zone_t file_zone;
78 
79 static	 d_open_t  fdopen;
80 #define NUMFDESC 64
81 
82 #define CDEV_MAJOR 22
83 static struct cdevsw fildesc_cdevsw = {
84 	/* open */	fdopen,
85 	/* close */	noclose,
86 	/* read */	noread,
87 	/* write */	nowrite,
88 	/* ioctl */	noioctl,
89 	/* poll */	nopoll,
90 	/* mmap */	nommap,
91 	/* strategy */	nostrategy,
92 	/* name */	"FD",
93 	/* maj */	CDEV_MAJOR,
94 	/* dump */	nodump,
95 	/* psize */	nopsize,
96 	/* flags */	0,
97 };
98 
99 static int do_dup(struct filedesc *fdp, int old, int new, register_t *retval, struct thread *td);
100 static int badfo_readwrite(struct file *fp, struct uio *uio,
101     struct ucred *cred, int flags, struct thread *td);
102 static int badfo_ioctl(struct file *fp, u_long com, void *data,
103     struct thread *td);
104 static int badfo_poll(struct file *fp, int events,
105     struct ucred *cred, struct thread *td);
106 static int badfo_kqfilter(struct file *fp, struct knote *kn);
107 static int badfo_stat(struct file *fp, struct stat *sb, struct thread *td);
108 static int badfo_close(struct file *fp, struct thread *td);
109 
110 /*
111  * Descriptor management.
112  */
113 struct filelist filehead;	/* head of list of open files */
114 int nfiles;			/* actual number of open files */
115 extern int cmask;
116 struct sx filelist_lock;	/* sx to protect filelist */
117 struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
118 
119 /*
120  * System calls on descriptors.
121  */
122 #ifndef _SYS_SYSPROTO_H_
123 struct getdtablesize_args {
124 	int	dummy;
125 };
126 #endif
127 /*
128  * MPSAFE
129  */
130 /* ARGSUSED */
131 int
132 getdtablesize(td, uap)
133 	struct thread *td;
134 	struct getdtablesize_args *uap;
135 {
136 	struct proc *p = td->td_proc;
137 
138 	mtx_lock(&Giant);
139 	td->td_retval[0] =
140 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
141 	mtx_unlock(&Giant);
142 	return (0);
143 }
144 
145 /*
146  * Duplicate a file descriptor to a particular value.
147  *
148  * note: keep in mind that a potential race condition exists when closing
149  * descriptors from a shared descriptor table (via rfork).
150  */
151 #ifndef _SYS_SYSPROTO_H_
152 struct dup2_args {
153 	u_int	from;
154 	u_int	to;
155 };
156 #endif
157 /*
158  * MPSAFE
159  */
160 /* ARGSUSED */
161 int
162 dup2(td, uap)
163 	struct thread *td;
164 	struct dup2_args *uap;
165 {
166 	struct proc *p = td->td_proc;
167 	register struct filedesc *fdp = td->td_proc->p_fd;
168 	register u_int old = uap->from, new = uap->to;
169 	int i, error;
170 
171 	FILEDESC_LOCK(fdp);
172 retry:
173 	if (old >= fdp->fd_nfiles ||
174 	    fdp->fd_ofiles[old] == NULL ||
175 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
176 	    new >= maxfilesperproc) {
177 		FILEDESC_UNLOCK(fdp);
178 		return (EBADF);
179 	}
180 	if (old == new) {
181 		td->td_retval[0] = new;
182 		FILEDESC_UNLOCK(fdp);
183 		return (0);
184 	}
185 	if (new >= fdp->fd_nfiles) {
186 		if ((error = fdalloc(td, new, &i))) {
187 			FILEDESC_UNLOCK(fdp);
188 			return (error);
189 		}
190 		/*
191 		 * fdalloc() may block, retest everything.
192 		 */
193 		goto retry;
194 	}
195 	error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
196 	return(error);
197 }
198 
199 /*
200  * Duplicate a file descriptor.
201  */
202 #ifndef _SYS_SYSPROTO_H_
203 struct dup_args {
204 	u_int	fd;
205 };
206 #endif
207 /*
208  * MPSAFE
209  */
210 /* ARGSUSED */
211 int
212 dup(td, uap)
213 	struct thread *td;
214 	struct dup_args *uap;
215 {
216 	register struct filedesc *fdp;
217 	u_int old;
218 	int new, error;
219 
220 	old = uap->fd;
221 	fdp = td->td_proc->p_fd;
222 	FILEDESC_LOCK(fdp);
223 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
224 		FILEDESC_UNLOCK(fdp);
225 		return (EBADF);
226 	}
227 	if ((error = fdalloc(td, 0, &new))) {
228 		FILEDESC_UNLOCK(fdp);
229 		return (error);
230 	}
231 	error = do_dup(fdp, (int)old, new, td->td_retval, td);
232 	return (error);
233 }
234 
235 /*
236  * The file control system call.
237  */
238 #ifndef _SYS_SYSPROTO_H_
239 struct fcntl_args {
240 	int	fd;
241 	int	cmd;
242 	long	arg;
243 };
244 #endif
245 /*
246  * MPSAFE
247  */
248 /* ARGSUSED */
249 int
250 fcntl(td, uap)
251 	struct thread *td;
252 	register struct fcntl_args *uap;
253 {
254 	register struct proc *p = td->td_proc;
255 	register struct filedesc *fdp;
256 	register struct file *fp;
257 	register char *pop;
258 	struct vnode *vp;
259 	int i, tmp, error = 0, flg = F_POSIX;
260 	struct flock fl;
261 	u_int newmin;
262 	struct proc *leaderp;
263 
264 	mtx_lock(&Giant);
265 
266 	fdp = p->p_fd;
267 	FILEDESC_LOCK(fdp);
268 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
269 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
270 		FILEDESC_UNLOCK(fdp);
271 		error = EBADF;
272 		goto done2;
273 	}
274 	pop = &fdp->fd_ofileflags[uap->fd];
275 
276 	switch (uap->cmd) {
277 	case F_DUPFD:
278 		newmin = uap->arg;
279 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
280 		    newmin >= maxfilesperproc) {
281 			FILEDESC_UNLOCK(fdp);
282 			error = EINVAL;
283 			break;
284 		}
285 		if ((error = fdalloc(td, newmin, &i))) {
286 			FILEDESC_UNLOCK(fdp);
287 			break;
288 		}
289 		error = do_dup(fdp, uap->fd, i, td->td_retval, td);
290 		break;
291 
292 	case F_GETFD:
293 		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
294 		FILEDESC_UNLOCK(fdp);
295 		break;
296 
297 	case F_SETFD:
298 		*pop = (*pop &~ UF_EXCLOSE) |
299 		    (uap->arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
300 		FILEDESC_UNLOCK(fdp);
301 		break;
302 
303 	case F_GETFL:
304 		FILE_LOCK(fp);
305 		FILEDESC_UNLOCK(fdp);
306 		td->td_retval[0] = OFLAGS(fp->f_flag);
307 		FILE_UNLOCK(fp);
308 		break;
309 
310 	case F_SETFL:
311 		fhold(fp);
312 		FILEDESC_UNLOCK(fdp);
313 		fp->f_flag &= ~FCNTLFLAGS;
314 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
315 		tmp = fp->f_flag & FNONBLOCK;
316 		error = fo_ioctl(fp, FIONBIO, &tmp, td);
317 		if (error) {
318 			fdrop(fp, td);
319 			break;
320 		}
321 		tmp = fp->f_flag & FASYNC;
322 		error = fo_ioctl(fp, FIOASYNC, &tmp, td);
323 		if (!error) {
324 			fdrop(fp, td);
325 			break;
326 		}
327 		fp->f_flag &= ~FNONBLOCK;
328 		tmp = 0;
329 		(void)fo_ioctl(fp, FIONBIO, &tmp, td);
330 		fdrop(fp, td);
331 		break;
332 
333 	case F_GETOWN:
334 		fhold(fp);
335 		FILEDESC_UNLOCK(fdp);
336 		error = fo_ioctl(fp, FIOGETOWN, (void *)td->td_retval, td);
337 		fdrop(fp, td);
338 		break;
339 
340 	case F_SETOWN:
341 		fhold(fp);
342 		FILEDESC_UNLOCK(fdp);
343 		error = fo_ioctl(fp, FIOSETOWN, &uap->arg, td);
344 		fdrop(fp, td);
345 		break;
346 
347 	case F_SETLKW:
348 		flg |= F_WAIT;
349 		/* Fall into F_SETLK */
350 
351 	case F_SETLK:
352 		if (fp->f_type != DTYPE_VNODE) {
353 			FILEDESC_UNLOCK(fdp);
354 			error = EBADF;
355 			break;
356 		}
357 		vp = (struct vnode *)fp->f_data;
358 		/*
359 		 * copyin/lockop may block
360 		 */
361 		fhold(fp);
362 		FILEDESC_UNLOCK(fdp);
363 		vp = (struct vnode *)fp->f_data;
364 
365 		/* Copy in the lock structure */
366 		error = copyin((caddr_t)(intptr_t)uap->arg, &fl, sizeof(fl));
367 		if (error) {
368 			fdrop(fp, td);
369 			break;
370 		}
371 		if (fl.l_whence == SEEK_CUR) {
372 			if (fp->f_offset < 0 ||
373 			    (fl.l_start > 0 &&
374 			     fp->f_offset > OFF_MAX - fl.l_start)) {
375 				fdrop(fp, td);
376 				error = EOVERFLOW;
377 				break;
378 			}
379 			fl.l_start += fp->f_offset;
380 		}
381 
382 		switch (fl.l_type) {
383 		case F_RDLCK:
384 			if ((fp->f_flag & FREAD) == 0) {
385 				error = EBADF;
386 				break;
387 			}
388 			PROC_LOCK(p);
389 			p->p_flag |= P_ADVLOCK;
390 			leaderp = p->p_leader;
391 			PROC_UNLOCK(p);
392 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
393 			    &fl, flg);
394 			break;
395 		case F_WRLCK:
396 			if ((fp->f_flag & FWRITE) == 0) {
397 				error = EBADF;
398 				break;
399 			}
400 			PROC_LOCK(p);
401 			p->p_flag |= P_ADVLOCK;
402 			leaderp = p->p_leader;
403 			PROC_UNLOCK(p);
404 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
405 			    &fl, flg);
406 			break;
407 		case F_UNLCK:
408 			PROC_LOCK(p);
409 			leaderp = p->p_leader;
410 			PROC_UNLOCK(p);
411 			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK,
412 				&fl, F_POSIX);
413 			break;
414 		default:
415 			error = EINVAL;
416 			break;
417 		}
418 		fdrop(fp, td);
419 		break;
420 
421 	case F_GETLK:
422 		if (fp->f_type != DTYPE_VNODE) {
423 			FILEDESC_UNLOCK(fdp);
424 			error = EBADF;
425 			break;
426 		}
427 		vp = (struct vnode *)fp->f_data;
428 		/*
429 		 * copyin/lockop may block
430 		 */
431 		fhold(fp);
432 		FILEDESC_UNLOCK(fdp);
433 		vp = (struct vnode *)fp->f_data;
434 
435 		/* Copy in the lock structure */
436 		error = copyin((caddr_t)(intptr_t)uap->arg, &fl, sizeof(fl));
437 		if (error) {
438 			fdrop(fp, td);
439 			break;
440 		}
441 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
442 		    fl.l_type != F_UNLCK) {
443 			fdrop(fp, td);
444 			error = EINVAL;
445 			break;
446 		}
447 		if (fl.l_whence == SEEK_CUR) {
448 			if ((fl.l_start > 0 &&
449 			     fp->f_offset > OFF_MAX - fl.l_start) ||
450 			    (fl.l_start < 0 &&
451 			     fp->f_offset < OFF_MIN - fl.l_start)) {
452 				fdrop(fp, td);
453 				error = EOVERFLOW;
454 				break;
455 			}
456 			fl.l_start += fp->f_offset;
457 		}
458 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
459 			    &fl, F_POSIX);
460 		fdrop(fp, td);
461 		if (error == 0) {
462 			error = copyout(&fl, (caddr_t)(intptr_t)uap->arg,
463 			    sizeof(fl));
464 		}
465 		break;
466 	default:
467 		FILEDESC_UNLOCK(fdp);
468 		error = EINVAL;
469 		break;
470 	}
471 done2:
472 	mtx_unlock(&Giant);
473 	return (error);
474 }
475 
476 /*
477  * Common code for dup, dup2, and fcntl(F_DUPFD).
478  * filedesc must be locked, but will be unlocked as a side effect.
479  */
480 static int
481 do_dup(fdp, old, new, retval, td)
482 	register struct filedesc *fdp;
483 	register int old, new;
484 	register_t *retval;
485 	struct thread *td;
486 {
487 	struct file *fp;
488 	struct file *delfp;
489 
490 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
491 
492 	/*
493 	 * Save info on the descriptor being overwritten.  We have
494 	 * to do the unmap now, but we cannot close it without
495 	 * introducing an ownership race for the slot.
496 	 */
497 	delfp = fdp->fd_ofiles[new];
498 #if 0
499 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
500 		(void) munmapfd(td, new);
501 #endif
502 
503 	/*
504 	 * Duplicate the source descriptor, update lastfile
505 	 */
506 	fp = fdp->fd_ofiles[old];
507 	fdp->fd_ofiles[new] = fp;
508 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
509 	fhold(fp);
510 	if (new > fdp->fd_lastfile)
511 		fdp->fd_lastfile = new;
512 	*retval = new;
513 
514 	FILEDESC_UNLOCK(fdp);
515 
516 	/*
517 	 * If we dup'd over a valid file, we now own the reference to it
518 	 * and must dispose of it using closef() semantics (as if a
519 	 * close() were performed on it).
520 	 */
521 	if (delfp) {
522 		mtx_lock(&Giant);
523 		(void) closef(delfp, td);
524 		mtx_unlock(&Giant);
525 	}
526 	return (0);
527 }
528 
529 /*
530  * If sigio is on the list associated with a process or process group,
531  * disable signalling from the device, remove sigio from the list and
532  * free sigio.
533  */
534 void
535 funsetown(sigiop)
536 	struct sigio **sigiop;
537 {
538 	struct sigio *sigio;
539 
540 	SIGIO_LOCK();
541 	sigio = *sigiop;
542 	if (sigio == NULL) {
543 		SIGIO_UNLOCK();
544 		return;
545 	}
546 	*(sigio->sio_myref) = NULL;
547 	if ((sigio)->sio_pgid < 0) {
548 		struct pgrp *pg = (sigio)->sio_pgrp;
549 		PGRP_LOCK(pg);
550 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
551 			     sigio, sio_pgsigio);
552 		PGRP_UNLOCK(pg);
553 	} else {
554 		struct proc *p = (sigio)->sio_proc;
555 		PROC_LOCK(p);
556 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
557 			     sigio, sio_pgsigio);
558 		PROC_UNLOCK(p);
559 	}
560 	SIGIO_UNLOCK();
561 	crfree(sigio->sio_ucred);
562 	FREE(sigio, M_SIGIO);
563 }
564 
565 /*
566  * Free a list of sigio structures.
567  * We only need to lock the SIGIO_LOCK because we have made ourselves
568  * inaccessable to callers of fsetown and therefore do not need to lock
569  * the proc or pgrp struct for the list manipulation.
570  */
571 void
572 funsetownlst(sigiolst)
573 	struct sigiolst *sigiolst;
574 {
575 	struct sigio *sigio;
576 	struct proc *p;
577 	struct pgrp *pg;
578 
579 	sigio = SLIST_FIRST(sigiolst);
580 	if (sigio == NULL)
581 		return;
582 
583 	p = NULL;
584 	pg = NULL;
585 
586 	/*
587 	 * Every entry of the list should belong
588 	 * to a single proc or pgrp.
589 	 */
590 	if (sigio->sio_pgid < 0) {
591 		pg = sigio->sio_pgrp;
592 		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
593 	} else /* if (sigio->sio_pgid > 0) */ {
594 		p = sigio->sio_proc;
595 		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
596 	}
597 
598 	SIGIO_LOCK();
599 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
600 		*(sigio->sio_myref) = NULL;
601 		if (pg != NULL) {
602 			KASSERT(sigio->sio_pgid < 0,
603 			    ("Proc sigio in pgrp sigio list"));
604 			KASSERT(sigio->sio_pgrp == pg,
605 			    ("Bogus pgrp in sigio list"));
606 			PGRP_LOCK(pg);
607 			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
608 			    sio_pgsigio);
609 			PGRP_UNLOCK(pg);
610 		} else /* if (p != NULL) */ {
611 			KASSERT(sigio->sio_pgid > 0,
612 			    ("Pgrp sigio in proc sigio list"));
613 			KASSERT(sigio->sio_proc == p,
614 			    ("Bogus proc in sigio list"));
615 			PROC_LOCK(p);
616 			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
617 			    sio_pgsigio);
618 			PROC_UNLOCK(p);
619 		}
620 		SIGIO_UNLOCK();
621 		crfree(sigio->sio_ucred);
622 		FREE(sigio, M_SIGIO);
623 		SIGIO_LOCK();
624 	}
625 	SIGIO_UNLOCK();
626 }
627 
628 /*
629  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
630  *
631  * After permission checking, add a sigio structure to the sigio list for
632  * the process or process group.
633  */
634 int
635 fsetown(pgid, sigiop)
636 	pid_t pgid;
637 	struct sigio **sigiop;
638 {
639 	struct proc *proc;
640 	struct pgrp *pgrp;
641 	struct sigio *sigio;
642 	int ret;
643 
644 	if (pgid == 0) {
645 		funsetown(sigiop);
646 		return (0);
647 	}
648 
649 	ret = 0;
650 
651 	/* Allocate and fill in the new sigio out of locks. */
652 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
653 	sigio->sio_pgid = pgid;
654 	sigio->sio_ucred = crhold(curthread->td_ucred);
655 	sigio->sio_myref = sigiop;
656 
657 	sx_slock(&proctree_lock);
658 	if (pgid > 0) {
659 		proc = pfind(pgid);
660 		if (proc == NULL) {
661 			ret = ESRCH;
662 			goto fail;
663 		}
664 
665 		/*
666 		 * Policy - Don't allow a process to FSETOWN a process
667 		 * in another session.
668 		 *
669 		 * Remove this test to allow maximum flexibility or
670 		 * restrict FSETOWN to the current process or process
671 		 * group for maximum safety.
672 		 */
673 		PROC_UNLOCK(proc);
674 		if (proc->p_session != curthread->td_proc->p_session) {
675 			ret = EPERM;
676 			goto fail;
677 		}
678 
679 		pgrp = NULL;
680 	} else /* if (pgid < 0) */ {
681 		pgrp = pgfind(-pgid);
682 		if (pgrp == NULL) {
683 			ret = ESRCH;
684 			goto fail;
685 		}
686 		PGRP_UNLOCK(pgrp);
687 
688 		/*
689 		 * Policy - Don't allow a process to FSETOWN a process
690 		 * in another session.
691 		 *
692 		 * Remove this test to allow maximum flexibility or
693 		 * restrict FSETOWN to the current process or process
694 		 * group for maximum safety.
695 		 */
696 		if (pgrp->pg_session != curthread->td_proc->p_session) {
697 			ret = EPERM;
698 			goto fail;
699 		}
700 
701 		proc = NULL;
702 	}
703 	funsetown(sigiop);
704 	if (pgid > 0) {
705 		PROC_LOCK(proc);
706 		/*
707 		 * since funsetownlst() is called without the proctree
708 		 * locked we need to check for P_WEXIT.
709 		 * XXX: is ESRCH correct?
710 		 */
711 		if ((proc->p_flag & P_WEXIT) != 0) {
712 			PROC_UNLOCK(proc);
713 			ret = ESRCH;
714 			goto fail;
715 		}
716 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
717 		sigio->sio_proc = proc;
718 		PROC_UNLOCK(proc);
719 	} else {
720 		PGRP_LOCK(pgrp);
721 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
722 		sigio->sio_pgrp = pgrp;
723 		PGRP_UNLOCK(pgrp);
724 	}
725 	sx_sunlock(&proctree_lock);
726 	SIGIO_LOCK();
727 	*sigiop = sigio;
728 	SIGIO_UNLOCK();
729 	return (0);
730 
731 fail:
732 	sx_sunlock(&proctree_lock);
733 	crfree(sigio->sio_ucred);
734 	FREE(sigio, M_SIGIO);
735 	return (ret);
736 }
737 
738 /*
739  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
740  */
741 pid_t
742 fgetown(sigio)
743 	struct sigio *sigio;
744 {
745 	return (sigio != NULL ? sigio->sio_pgid : 0);
746 }
747 
748 /*
749  * Close a file descriptor.
750  */
751 #ifndef _SYS_SYSPROTO_H_
752 struct close_args {
753         int     fd;
754 };
755 #endif
756 /*
757  * MPSAFE
758  */
759 /* ARGSUSED */
760 int
761 close(td, uap)
762 	struct thread *td;
763 	struct close_args *uap;
764 {
765 	register struct filedesc *fdp;
766 	register struct file *fp;
767 	register int fd = uap->fd;
768 	int error = 0;
769 
770 	mtx_lock(&Giant);
771 	fdp = td->td_proc->p_fd;
772 	FILEDESC_LOCK(fdp);
773 	if ((unsigned)fd >= fdp->fd_nfiles ||
774 	    (fp = fdp->fd_ofiles[fd]) == NULL) {
775 		FILEDESC_UNLOCK(fdp);
776 		error = EBADF;
777 		goto done2;
778 	}
779 #if 0
780 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
781 		(void) munmapfd(td, fd);
782 #endif
783 	fdp->fd_ofiles[fd] = NULL;
784 	fdp->fd_ofileflags[fd] = 0;
785 
786 	/*
787 	 * we now hold the fp reference that used to be owned by the descriptor
788 	 * array.
789 	 */
790 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
791 		fdp->fd_lastfile--;
792 	if (fd < fdp->fd_freefile)
793 		fdp->fd_freefile = fd;
794 	if (fd < fdp->fd_knlistsize) {
795 		FILEDESC_UNLOCK(fdp);
796 		knote_fdclose(td, fd);
797 	} else
798 		FILEDESC_UNLOCK(fdp);
799 
800 	error = closef(fp, td);
801 done2:
802 	mtx_unlock(&Giant);
803 	return(error);
804 }
805 
806 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
807 /*
808  * Return status information about a file descriptor.
809  */
810 #ifndef _SYS_SYSPROTO_H_
811 struct ofstat_args {
812 	int	fd;
813 	struct	ostat *sb;
814 };
815 #endif
816 /*
817  * MPSAFE
818  */
819 /* ARGSUSED */
820 int
821 ofstat(td, uap)
822 	struct thread *td;
823 	register struct ofstat_args *uap;
824 {
825 	struct file *fp;
826 	struct stat ub;
827 	struct ostat oub;
828 	int error;
829 
830 	mtx_lock(&Giant);
831 	if ((error = fget(td, uap->fd, &fp)) != 0)
832 		goto done2;
833 	error = fo_stat(fp, &ub, td);
834 	if (error == 0) {
835 		cvtstat(&ub, &oub);
836 		error = copyout(&oub, uap->sb, sizeof (oub));
837 	}
838 	fdrop(fp, td);
839 done2:
840 	mtx_unlock(&Giant);
841 	return (error);
842 }
843 #endif /* COMPAT_43 || COMPAT_SUNOS */
844 
845 /*
846  * Return status information about a file descriptor.
847  */
848 #ifndef _SYS_SYSPROTO_H_
849 struct fstat_args {
850 	int	fd;
851 	struct	stat *sb;
852 };
853 #endif
854 /*
855  * MPSAFE
856  */
857 /* ARGSUSED */
858 int
859 fstat(td, uap)
860 	struct thread *td;
861 	struct fstat_args *uap;
862 {
863 	struct file *fp;
864 	struct stat ub;
865 	int error;
866 
867 	mtx_lock(&Giant);
868 	if ((error = fget(td, uap->fd, &fp)) != 0)
869 		goto done2;
870 	error = fo_stat(fp, &ub, td);
871 	if (error == 0)
872 		error = copyout(&ub, uap->sb, sizeof (ub));
873 	fdrop(fp, td);
874 done2:
875 	mtx_unlock(&Giant);
876 	return (error);
877 }
878 
879 /*
880  * Return status information about a file descriptor.
881  */
882 #ifndef _SYS_SYSPROTO_H_
883 struct nfstat_args {
884 	int	fd;
885 	struct	nstat *sb;
886 };
887 #endif
888 /*
889  * MPSAFE
890  */
891 /* ARGSUSED */
892 int
893 nfstat(td, uap)
894 	struct thread *td;
895 	register struct nfstat_args *uap;
896 {
897 	struct file *fp;
898 	struct stat ub;
899 	struct nstat nub;
900 	int error;
901 
902 	mtx_lock(&Giant);
903 	if ((error = fget(td, uap->fd, &fp)) != 0)
904 		goto done2;
905 	error = fo_stat(fp, &ub, td);
906 	if (error == 0) {
907 		cvtnstat(&ub, &nub);
908 		error = copyout(&nub, uap->sb, sizeof (nub));
909 	}
910 	fdrop(fp, td);
911 done2:
912 	mtx_unlock(&Giant);
913 	return (error);
914 }
915 
916 /*
917  * Return pathconf information about a file descriptor.
918  */
919 #ifndef _SYS_SYSPROTO_H_
920 struct fpathconf_args {
921 	int	fd;
922 	int	name;
923 };
924 #endif
925 /*
926  * MPSAFE
927  */
928 /* ARGSUSED */
929 int
930 fpathconf(td, uap)
931 	struct thread *td;
932 	register struct fpathconf_args *uap;
933 {
934 	struct file *fp;
935 	struct vnode *vp;
936 	int error;
937 
938 	if ((error = fget(td, uap->fd, &fp)) != 0)
939 		return (error);
940 
941 	switch (fp->f_type) {
942 	case DTYPE_PIPE:
943 	case DTYPE_SOCKET:
944 		if (uap->name != _PC_PIPE_BUF) {
945 			error = EINVAL;
946 		} else {
947 			td->td_retval[0] = PIPE_BUF;
948 			error = 0;
949 		}
950 		break;
951 	case DTYPE_FIFO:
952 	case DTYPE_VNODE:
953 		vp = (struct vnode *)fp->f_data;
954 		mtx_lock(&Giant);
955 		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
956 		mtx_unlock(&Giant);
957 		break;
958 	default:
959 		error = EOPNOTSUPP;
960 		break;
961 	}
962 	fdrop(fp, td);
963 	return(error);
964 }
965 
966 /*
967  * Allocate a file descriptor for the process.
968  */
969 static int fdexpand;
970 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
971 
972 int
973 fdalloc(td, want, result)
974 	struct thread *td;
975 	int want;
976 	int *result;
977 {
978 	struct proc *p = td->td_proc;
979 	register struct filedesc *fdp = td->td_proc->p_fd;
980 	register int i;
981 	int lim, last, nfiles;
982 	struct file **newofile, **oldofile;
983 	char *newofileflags;
984 
985 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
986 
987 	/*
988 	 * Search for a free descriptor starting at the higher
989 	 * of want or fd_freefile.  If that fails, consider
990 	 * expanding the ofile array.
991 	 */
992 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
993 	for (;;) {
994 		last = min(fdp->fd_nfiles, lim);
995 		if ((i = want) < fdp->fd_freefile)
996 			i = fdp->fd_freefile;
997 		for (; i < last; i++) {
998 			if (fdp->fd_ofiles[i] == NULL) {
999 				fdp->fd_ofileflags[i] = 0;
1000 				if (i > fdp->fd_lastfile)
1001 					fdp->fd_lastfile = i;
1002 				if (want <= fdp->fd_freefile)
1003 					fdp->fd_freefile = i;
1004 				*result = i;
1005 				return (0);
1006 			}
1007 		}
1008 
1009 		/*
1010 		 * No space in current array.  Expand?
1011 		 */
1012 		if (fdp->fd_nfiles >= lim)
1013 			return (EMFILE);
1014 		if (fdp->fd_nfiles < NDEXTENT)
1015 			nfiles = NDEXTENT;
1016 		else
1017 			nfiles = 2 * fdp->fd_nfiles;
1018 		FILEDESC_UNLOCK(fdp);
1019 		mtx_lock(&Giant);
1020 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
1021 		    M_FILEDESC, M_WAITOK);
1022 		mtx_unlock(&Giant);
1023 		FILEDESC_LOCK(fdp);
1024 
1025 		/*
1026 		 * deal with file-table extend race that might have occured
1027 		 * when malloc was blocked.
1028 		 */
1029 		if (fdp->fd_nfiles >= nfiles) {
1030 			FILEDESC_UNLOCK(fdp);
1031 			mtx_lock(&Giant);
1032 			FREE(newofile, M_FILEDESC);
1033 			mtx_unlock(&Giant);
1034 			FILEDESC_LOCK(fdp);
1035 			continue;
1036 		}
1037 		newofileflags = (char *) &newofile[nfiles];
1038 		/*
1039 		 * Copy the existing ofile and ofileflags arrays
1040 		 * and zero the new portion of each array.
1041 		 */
1042 		bcopy(fdp->fd_ofiles, newofile,
1043 			(i = sizeof(struct file *) * fdp->fd_nfiles));
1044 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
1045 		bcopy(fdp->fd_ofileflags, newofileflags,
1046 			(i = sizeof(char) * fdp->fd_nfiles));
1047 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
1048 		if (fdp->fd_nfiles > NDFILE)
1049 			oldofile = fdp->fd_ofiles;
1050 		else
1051 			oldofile = NULL;
1052 		fdp->fd_ofiles = newofile;
1053 		fdp->fd_ofileflags = newofileflags;
1054 		fdp->fd_nfiles = nfiles;
1055 		fdexpand++;
1056 		if (oldofile != NULL) {
1057 			FILEDESC_UNLOCK(fdp);
1058 			mtx_lock(&Giant);
1059 			FREE(oldofile, M_FILEDESC);
1060 			mtx_unlock(&Giant);
1061 			FILEDESC_LOCK(fdp);
1062 		}
1063 	}
1064 	return (0);
1065 }
1066 
1067 /*
1068  * Check to see whether n user file descriptors
1069  * are available to the process p.
1070  */
1071 int
1072 fdavail(td, n)
1073 	struct thread *td;
1074 	register int n;
1075 {
1076 	struct proc *p = td->td_proc;
1077 	register struct filedesc *fdp = td->td_proc->p_fd;
1078 	register struct file **fpp;
1079 	register int i, lim, last;
1080 
1081 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1082 
1083 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1084 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1085 		return (1);
1086 
1087 	last = min(fdp->fd_nfiles, lim);
1088 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1089 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1090 		if (*fpp == NULL && --n <= 0)
1091 			return (1);
1092 	}
1093 	return (0);
1094 }
1095 
1096 /*
1097  * Create a new open file structure and allocate
1098  * a file decriptor for the process that refers to it.
1099  */
1100 int
1101 falloc(td, resultfp, resultfd)
1102 	register struct thread *td;
1103 	struct file **resultfp;
1104 	int *resultfd;
1105 {
1106 	struct proc *p = td->td_proc;
1107 	register struct file *fp, *fq;
1108 	int error, i;
1109 
1110 	sx_xlock(&filelist_lock);
1111 	if (nfiles >= maxfiles) {
1112 		sx_xunlock(&filelist_lock);
1113 		tablefull("file");
1114 		return (ENFILE);
1115 	}
1116 	nfiles++;
1117 	sx_xunlock(&filelist_lock);
1118 	/*
1119 	 * Allocate a new file descriptor.
1120 	 * If the process has file descriptor zero open, add to the list
1121 	 * of open files at that point, otherwise put it at the front of
1122 	 * the list of open files.
1123 	 */
1124 	fp = uma_zalloc(file_zone, M_WAITOK);
1125 	bzero(fp, sizeof(*fp));
1126 
1127 	/*
1128 	 * wait until after malloc (which may have blocked) returns before
1129 	 * allocating the slot, else a race might have shrunk it if we had
1130 	 * allocated it before the malloc.
1131 	 */
1132 	FILEDESC_LOCK(p->p_fd);
1133 	if ((error = fdalloc(td, 0, &i))) {
1134 		FILEDESC_UNLOCK(p->p_fd);
1135 		sx_xlock(&filelist_lock);
1136 		nfiles--;
1137 		sx_xunlock(&filelist_lock);
1138 		uma_zfree(file_zone, fp);
1139 		return (error);
1140 	}
1141 	fp->f_mtxp = mtx_pool_alloc();
1142 	fp->f_gcflag = 0;
1143 	fp->f_count = 1;
1144 	fp->f_cred = crhold(td->td_ucred);
1145 	fp->f_ops = &badfileops;
1146 	fp->f_seqcount = 1;
1147 	FILEDESC_UNLOCK(p->p_fd);
1148 	sx_xlock(&filelist_lock);
1149 	FILEDESC_LOCK(p->p_fd);
1150 	if ((fq = p->p_fd->fd_ofiles[0])) {
1151 		LIST_INSERT_AFTER(fq, fp, f_list);
1152 	} else {
1153 		LIST_INSERT_HEAD(&filehead, fp, f_list);
1154 	}
1155 	p->p_fd->fd_ofiles[i] = fp;
1156 	FILEDESC_UNLOCK(p->p_fd);
1157 	sx_xunlock(&filelist_lock);
1158 	if (resultfp)
1159 		*resultfp = fp;
1160 	if (resultfd)
1161 		*resultfd = i;
1162 	return (0);
1163 }
1164 
1165 /*
1166  * Free a file descriptor.
1167  */
1168 void
1169 ffree(fp)
1170 	register struct file *fp;
1171 {
1172 
1173 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1174 	sx_xlock(&filelist_lock);
1175 	LIST_REMOVE(fp, f_list);
1176 	nfiles--;
1177 	sx_xunlock(&filelist_lock);
1178 	crfree(fp->f_cred);
1179 	uma_zfree(file_zone, fp);
1180 }
1181 
1182 /*
1183  * Build a new filedesc structure.
1184  */
1185 struct filedesc *
1186 fdinit(td)
1187 	struct thread *td;
1188 {
1189 	register struct filedesc0 *newfdp;
1190 	register struct filedesc *fdp = td->td_proc->p_fd;
1191 
1192 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1193 	    M_FILEDESC, M_WAITOK | M_ZERO);
1194 	mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1195 	FILEDESC_LOCK(&newfdp->fd_fd);
1196 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1197 	if (newfdp->fd_fd.fd_cdir)
1198 		VREF(newfdp->fd_fd.fd_cdir);
1199 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1200 	if (newfdp->fd_fd.fd_rdir)
1201 		VREF(newfdp->fd_fd.fd_rdir);
1202 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1203 	if (newfdp->fd_fd.fd_jdir)
1204 		VREF(newfdp->fd_fd.fd_jdir);
1205 
1206 	/* Create the file descriptor table. */
1207 	newfdp->fd_fd.fd_refcnt = 1;
1208 	newfdp->fd_fd.fd_cmask = cmask;
1209 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1210 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1211 	newfdp->fd_fd.fd_nfiles = NDFILE;
1212 	newfdp->fd_fd.fd_knlistsize = -1;
1213 	FILEDESC_UNLOCK(&newfdp->fd_fd);
1214 
1215 	return (&newfdp->fd_fd);
1216 }
1217 
1218 /*
1219  * Share a filedesc structure.
1220  */
1221 struct filedesc *
1222 fdshare(p)
1223 	struct proc *p;
1224 {
1225 	FILEDESC_LOCK(p->p_fd);
1226 	p->p_fd->fd_refcnt++;
1227 	FILEDESC_UNLOCK(p->p_fd);
1228 	return (p->p_fd);
1229 }
1230 
1231 /*
1232  * Copy a filedesc structure.
1233  */
1234 struct filedesc *
1235 fdcopy(td)
1236 	struct thread *td;
1237 {
1238 	register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1239 	register struct file **fpp;
1240 	register int i, j;
1241 
1242 	/* Certain daemons might not have file descriptors. */
1243 	if (fdp == NULL)
1244 		return (NULL);
1245 
1246 	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1247 
1248 	FILEDESC_UNLOCK(fdp);
1249 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1250 	    M_FILEDESC, M_WAITOK);
1251 	FILEDESC_LOCK(fdp);
1252 	bcopy(fdp, newfdp, sizeof(struct filedesc));
1253 	FILEDESC_UNLOCK(fdp);
1254 	bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1255 	mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1256 	if (newfdp->fd_cdir)
1257 		VREF(newfdp->fd_cdir);
1258 	if (newfdp->fd_rdir)
1259 		VREF(newfdp->fd_rdir);
1260 	if (newfdp->fd_jdir)
1261 		VREF(newfdp->fd_jdir);
1262 	newfdp->fd_refcnt = 1;
1263 
1264 	/*
1265 	 * If the number of open files fits in the internal arrays
1266 	 * of the open file structure, use them, otherwise allocate
1267 	 * additional memory for the number of descriptors currently
1268 	 * in use.
1269 	 */
1270 	FILEDESC_LOCK(fdp);
1271 	newfdp->fd_lastfile = fdp->fd_lastfile;
1272 	newfdp->fd_nfiles = fdp->fd_nfiles;
1273 	if (newfdp->fd_lastfile < NDFILE) {
1274 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1275 		newfdp->fd_ofileflags =
1276 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1277 		i = NDFILE;
1278 	} else {
1279 		/*
1280 		 * Compute the smallest multiple of NDEXTENT needed
1281 		 * for the file descriptors currently in use,
1282 		 * allowing the table to shrink.
1283 		 */
1284 retry:
1285 		i = newfdp->fd_nfiles;
1286 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1287 			i /= 2;
1288 		FILEDESC_UNLOCK(fdp);
1289 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1290 		    M_FILEDESC, M_WAITOK);
1291 		FILEDESC_LOCK(fdp);
1292 		newfdp->fd_lastfile = fdp->fd_lastfile;
1293 		newfdp->fd_nfiles = fdp->fd_nfiles;
1294 		j = newfdp->fd_nfiles;
1295 		while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1296 			j /= 2;
1297 		if (i != j) {
1298 			/*
1299 			 * The size of the original table has changed.
1300 			 * Go over once again.
1301 			 */
1302 			FILEDESC_UNLOCK(fdp);
1303 			FREE(newfdp->fd_ofiles, M_FILEDESC);
1304 			FILEDESC_LOCK(fdp);
1305 			newfdp->fd_lastfile = fdp->fd_lastfile;
1306 			newfdp->fd_nfiles = fdp->fd_nfiles;
1307 			goto retry;
1308 		}
1309 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1310 	}
1311 	newfdp->fd_nfiles = i;
1312 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1313 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1314 
1315 	/*
1316 	 * kq descriptors cannot be copied.
1317 	 */
1318 	if (newfdp->fd_knlistsize != -1) {
1319 		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1320 		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1321 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1322 				*fpp = NULL;
1323 				if (i < newfdp->fd_freefile)
1324 					newfdp->fd_freefile = i;
1325 			}
1326 			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1327 				newfdp->fd_lastfile--;
1328 		}
1329 		newfdp->fd_knlist = NULL;
1330 		newfdp->fd_knlistsize = -1;
1331 		newfdp->fd_knhash = NULL;
1332 		newfdp->fd_knhashmask = 0;
1333 	}
1334 
1335 	fpp = newfdp->fd_ofiles;
1336 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1337 		if (*fpp != NULL) {
1338 			fhold(*fpp);
1339 		}
1340 	}
1341 	return (newfdp);
1342 }
1343 
1344 /*
1345  * Release a filedesc structure.
1346  */
1347 void
1348 fdfree(td)
1349 	struct thread *td;
1350 {
1351 	register struct filedesc *fdp;
1352 	struct file **fpp;
1353 	register int i;
1354 
1355 	fdp = td->td_proc->p_fd;
1356 	/* Certain daemons might not have file descriptors. */
1357 	if (fdp == NULL)
1358 		return;
1359 
1360 	FILEDESC_LOCK(fdp);
1361 	if (--fdp->fd_refcnt > 0) {
1362 		FILEDESC_UNLOCK(fdp);
1363 		return;
1364 	}
1365 	/*
1366 	 * we are the last reference to the structure, we can
1367 	 * safely assume it will not change out from under us.
1368 	 */
1369 	FILEDESC_UNLOCK(fdp);
1370 	fpp = fdp->fd_ofiles;
1371 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1372 		if (*fpp)
1373 			(void) closef(*fpp, td);
1374 	}
1375 
1376 	PROC_LOCK(td->td_proc);
1377 	td->td_proc->p_fd = NULL;
1378 	PROC_UNLOCK(td->td_proc);
1379 
1380 	if (fdp->fd_nfiles > NDFILE)
1381 		FREE(fdp->fd_ofiles, M_FILEDESC);
1382 	if (fdp->fd_cdir)
1383 		vrele(fdp->fd_cdir);
1384 	if (fdp->fd_rdir)
1385 		vrele(fdp->fd_rdir);
1386 	if (fdp->fd_jdir)
1387 		vrele(fdp->fd_jdir);
1388 	if (fdp->fd_knlist)
1389 		FREE(fdp->fd_knlist, M_KQUEUE);
1390 	if (fdp->fd_knhash)
1391 		FREE(fdp->fd_knhash, M_KQUEUE);
1392 	mtx_destroy(&fdp->fd_mtx);
1393 	FREE(fdp, M_FILEDESC);
1394 }
1395 
1396 /*
1397  * For setugid programs, we don't want to people to use that setugidness
1398  * to generate error messages which write to a file which otherwise would
1399  * otherwise be off-limits to the process.
1400  *
1401  * This is a gross hack to plug the hole.  A better solution would involve
1402  * a special vop or other form of generalized access control mechanism.  We
1403  * go ahead and just reject all procfs filesystems accesses as dangerous.
1404  *
1405  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1406  * sufficient.  We also don't for check setugidness since we know we are.
1407  */
1408 static int
1409 is_unsafe(struct file *fp)
1410 {
1411 	if (fp->f_type == DTYPE_VNODE &&
1412 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1413 		return (1);
1414 	return (0);
1415 }
1416 
1417 /*
1418  * Make this setguid thing safe, if at all possible.
1419  */
1420 void
1421 setugidsafety(td)
1422 	struct thread *td;
1423 {
1424 	struct filedesc *fdp = td->td_proc->p_fd;
1425 	register int i;
1426 
1427 	/* Certain daemons might not have file descriptors. */
1428 	if (fdp == NULL)
1429 		return;
1430 
1431 	/*
1432 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1433 	 * we are blocked in a close.  Be careful!
1434 	 */
1435 	FILEDESC_LOCK(fdp);
1436 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1437 		if (i > 2)
1438 			break;
1439 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1440 			struct file *fp;
1441 
1442 #if 0
1443 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1444 				(void) munmapfd(td, i);
1445 #endif
1446 			if (i < fdp->fd_knlistsize) {
1447 				FILEDESC_UNLOCK(fdp);
1448 				knote_fdclose(td, i);
1449 				FILEDESC_LOCK(fdp);
1450 			}
1451 			/*
1452 			 * NULL-out descriptor prior to close to avoid
1453 			 * a race while close blocks.
1454 			 */
1455 			fp = fdp->fd_ofiles[i];
1456 			fdp->fd_ofiles[i] = NULL;
1457 			fdp->fd_ofileflags[i] = 0;
1458 			if (i < fdp->fd_freefile)
1459 				fdp->fd_freefile = i;
1460 			FILEDESC_UNLOCK(fdp);
1461 			(void) closef(fp, td);
1462 			FILEDESC_LOCK(fdp);
1463 		}
1464 	}
1465 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1466 		fdp->fd_lastfile--;
1467 	FILEDESC_UNLOCK(fdp);
1468 }
1469 
1470 /*
1471  * Close any files on exec?
1472  */
1473 void
1474 fdcloseexec(td)
1475 	struct thread *td;
1476 {
1477 	struct filedesc *fdp = td->td_proc->p_fd;
1478 	register int i;
1479 
1480 	/* Certain daemons might not have file descriptors. */
1481 	if (fdp == NULL)
1482 		return;
1483 
1484 	FILEDESC_LOCK(fdp);
1485 
1486 	/*
1487 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1488 	 * may block and rip them out from under us.
1489 	 */
1490 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1491 		if (fdp->fd_ofiles[i] != NULL &&
1492 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1493 			struct file *fp;
1494 
1495 #if 0
1496 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1497 				(void) munmapfd(td, i);
1498 #endif
1499 			if (i < fdp->fd_knlistsize) {
1500 				FILEDESC_UNLOCK(fdp);
1501 				knote_fdclose(td, i);
1502 				FILEDESC_LOCK(fdp);
1503 			}
1504 			/*
1505 			 * NULL-out descriptor prior to close to avoid
1506 			 * a race while close blocks.
1507 			 */
1508 			fp = fdp->fd_ofiles[i];
1509 			fdp->fd_ofiles[i] = NULL;
1510 			fdp->fd_ofileflags[i] = 0;
1511 			if (i < fdp->fd_freefile)
1512 				fdp->fd_freefile = i;
1513 			FILEDESC_UNLOCK(fdp);
1514 			(void) closef(fp, td);
1515 			FILEDESC_LOCK(fdp);
1516 		}
1517 	}
1518 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1519 		fdp->fd_lastfile--;
1520 	FILEDESC_UNLOCK(fdp);
1521 }
1522 
1523 /*
1524  * It is unsafe for set[ug]id processes to be started with file
1525  * descriptors 0..2 closed, as these descriptors are given implicit
1526  * significance in the Standard C library.  fdcheckstd() will create a
1527  * descriptor referencing /dev/null for each of stdin, stdout, and
1528  * stderr that is not already open.
1529  */
1530 int
1531 fdcheckstd(td)
1532 	struct thread *td;
1533 {
1534 	struct nameidata nd;
1535 	struct filedesc *fdp;
1536 	struct file *fp;
1537 	register_t retval;
1538 	int fd, i, error, flags, devnull;
1539 
1540 	fdp = td->td_proc->p_fd;
1541 	if (fdp == NULL)
1542 		return (0);
1543 	devnull = -1;
1544 	error = 0;
1545 	for (i = 0; i < 3; i++) {
1546 		if (fdp->fd_ofiles[i] != NULL)
1547 			continue;
1548 		if (devnull < 0) {
1549 			error = falloc(td, &fp, &fd);
1550 			if (error != 0)
1551 				break;
1552 			NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1553 			    td);
1554 			flags = FREAD | FWRITE;
1555 			error = vn_open(&nd, &flags, 0);
1556 			if (error != 0) {
1557 				FILEDESC_LOCK(fdp);
1558 				fdp->fd_ofiles[i] = NULL;
1559 				FILEDESC_UNLOCK(fdp);
1560 				fdrop(fp, td);
1561 				break;
1562 			}
1563 			NDFREE(&nd, NDF_ONLY_PNBUF);
1564 			fp->f_data = nd.ni_vp;
1565 			fp->f_flag = flags;
1566 			fp->f_ops = &vnops;
1567 			fp->f_type = DTYPE_VNODE;
1568 			VOP_UNLOCK(nd.ni_vp, 0, td);
1569 			devnull = fd;
1570 		} else {
1571 			FILEDESC_LOCK(fdp);
1572 			error = fdalloc(td, 0, &fd);
1573 			if (error != 0) {
1574 				FILEDESC_UNLOCK(fdp);
1575 				break;
1576 			}
1577 			error = do_dup(fdp, devnull, fd, &retval, td);
1578 			if (error != 0)
1579 				break;
1580 		}
1581 	}
1582 	return (error);
1583 }
1584 
1585 /*
1586  * Internal form of close.
1587  * Decrement reference count on file structure.
1588  * Note: td may be NULL when closing a file
1589  * that was being passed in a message.
1590  */
1591 int
1592 closef(fp, td)
1593 	register struct file *fp;
1594 	register struct thread *td;
1595 {
1596 	struct vnode *vp;
1597 	struct flock lf;
1598 
1599 	if (fp == NULL)
1600 		return (0);
1601 	/*
1602 	 * POSIX record locking dictates that any close releases ALL
1603 	 * locks owned by this process.  This is handled by setting
1604 	 * a flag in the unlock to free ONLY locks obeying POSIX
1605 	 * semantics, and not to free BSD-style file locks.
1606 	 * If the descriptor was in a message, POSIX-style locks
1607 	 * aren't passed with the descriptor.
1608 	 */
1609 	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1610 	    fp->f_type == DTYPE_VNODE) {
1611 		lf.l_whence = SEEK_SET;
1612 		lf.l_start = 0;
1613 		lf.l_len = 0;
1614 		lf.l_type = F_UNLCK;
1615 		vp = (struct vnode *)fp->f_data;
1616 		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1617 		    F_UNLCK, &lf, F_POSIX);
1618 	}
1619 	return (fdrop(fp, td));
1620 }
1621 
1622 /*
1623  * Drop reference on struct file passed in, may call closef if the
1624  * reference hits zero.
1625  */
1626 int
1627 fdrop(fp, td)
1628 	struct file *fp;
1629 	struct thread *td;
1630 {
1631 
1632 	FILE_LOCK(fp);
1633 	return (fdrop_locked(fp, td));
1634 }
1635 
1636 /*
1637  * Extract the file pointer associated with the specified descriptor for
1638  * the current user process.
1639  *
1640  * If the descriptor doesn't exist, EBADF is returned.
1641  *
1642  * If the descriptor exists but doesn't match 'flags' then
1643  * return EBADF for read attempts and EINVAL for write attempts.
1644  *
1645  * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1646  * It should be droped with fdrop().
1647  * If it is not set, then the refcount will not be bumped however the
1648  * thread's filedesc struct will be returned locked (for fgetsock).
1649  *
1650  * If an error occured the non-zero error is returned and *fpp is set to NULL.
1651  * Otherwise *fpp is set and zero is returned.
1652  */
1653 static __inline
1654 int
1655 _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1656 {
1657 	struct filedesc *fdp;
1658 	struct file *fp;
1659 
1660 	*fpp = NULL;
1661 	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1662 		return(EBADF);
1663 	FILEDESC_LOCK(fdp);
1664 	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1665 		FILEDESC_UNLOCK(fdp);
1666 		return(EBADF);
1667 	}
1668 
1669 	/*
1670 	 * Note: FREAD failures returns EBADF to maintain backwards
1671 	 * compatibility with what routines returned before.
1672 	 *
1673 	 * Only one flag, or 0, may be specified.
1674 	 */
1675 	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1676 		FILEDESC_UNLOCK(fdp);
1677 		return(EBADF);
1678 	}
1679 	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1680 		FILEDESC_UNLOCK(fdp);
1681 		return(EINVAL);
1682 	}
1683 	if (hold) {
1684 		fhold(fp);
1685 		FILEDESC_UNLOCK(fdp);
1686 	}
1687 	*fpp = fp;
1688 	return(0);
1689 }
1690 
1691 int
1692 fget(struct thread *td, int fd, struct file **fpp)
1693 {
1694     return(_fget(td, fd, fpp, 0, 1));
1695 }
1696 
1697 int
1698 fget_read(struct thread *td, int fd, struct file **fpp)
1699 {
1700     return(_fget(td, fd, fpp, FREAD, 1));
1701 }
1702 
1703 int
1704 fget_write(struct thread *td, int fd, struct file **fpp)
1705 {
1706     return(_fget(td, fd, fpp, FWRITE, 1));
1707 }
1708 
1709 /*
1710  * Like fget() but loads the underlying vnode, or returns an error if
1711  * the descriptor does not represent a vnode.  Note that pipes use vnodes
1712  * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1713  * error).  The returned vnode will be vref()d.
1714  */
1715 
1716 static __inline
1717 int
1718 _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1719 {
1720 	struct file *fp;
1721 	int error;
1722 
1723 	*vpp = NULL;
1724 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1725 		return (error);
1726 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1727 		error = EINVAL;
1728 	} else {
1729 		*vpp = (struct vnode *)fp->f_data;
1730 		vref(*vpp);
1731 	}
1732 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1733 	return (error);
1734 }
1735 
1736 int
1737 fgetvp(struct thread *td, int fd, struct vnode **vpp)
1738 {
1739 	return(_fgetvp(td, fd, vpp, 0));
1740 }
1741 
1742 int
1743 fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1744 {
1745 	return(_fgetvp(td, fd, vpp, FREAD));
1746 }
1747 
1748 int
1749 fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1750 {
1751 	return(_fgetvp(td, fd, vpp, FWRITE));
1752 }
1753 
1754 /*
1755  * Like fget() but loads the underlying socket, or returns an error if
1756  * the descriptor does not represent a socket.
1757  *
1758  * We bump the ref count on the returned socket.  XXX Also obtain the SX lock in
1759  * the future.
1760  */
1761 int
1762 fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1763 {
1764 	struct file *fp;
1765 	int error;
1766 
1767 	*spp = NULL;
1768 	if (fflagp)
1769 		*fflagp = 0;
1770 	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1771 		return (error);
1772 	if (fp->f_type != DTYPE_SOCKET) {
1773 		error = ENOTSOCK;
1774 	} else {
1775 		*spp = (struct socket *)fp->f_data;
1776 		if (fflagp)
1777 			*fflagp = fp->f_flag;
1778 		soref(*spp);
1779 	}
1780 	FILEDESC_UNLOCK(td->td_proc->p_fd);
1781 	return(error);
1782 }
1783 
1784 /*
1785  * Drop the reference count on the the socket and XXX release the SX lock in
1786  * the future.  The last reference closes the socket.
1787  */
1788 void
1789 fputsock(struct socket *so)
1790 {
1791 	sorele(so);
1792 }
1793 
1794 /*
1795  * Drop reference on struct file passed in, may call closef if the
1796  * reference hits zero.
1797  * Expects struct file locked, and will unlock it.
1798  */
1799 int
1800 fdrop_locked(fp, td)
1801 	struct file *fp;
1802 	struct thread *td;
1803 {
1804 	struct flock lf;
1805 	struct vnode *vp;
1806 	int error;
1807 
1808 	FILE_LOCK_ASSERT(fp, MA_OWNED);
1809 
1810 	if (--fp->f_count > 0) {
1811 		FILE_UNLOCK(fp);
1812 		return (0);
1813 	}
1814 	mtx_lock(&Giant);
1815 	if (fp->f_count < 0)
1816 		panic("fdrop: count < 0");
1817 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1818 		lf.l_whence = SEEK_SET;
1819 		lf.l_start = 0;
1820 		lf.l_len = 0;
1821 		lf.l_type = F_UNLCK;
1822 		vp = (struct vnode *)fp->f_data;
1823 		FILE_UNLOCK(fp);
1824 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1825 	} else
1826 		FILE_UNLOCK(fp);
1827 	if (fp->f_ops != &badfileops)
1828 		error = fo_close(fp, td);
1829 	else
1830 		error = 0;
1831 	ffree(fp);
1832 	mtx_unlock(&Giant);
1833 	return (error);
1834 }
1835 
1836 /*
1837  * Apply an advisory lock on a file descriptor.
1838  *
1839  * Just attempt to get a record lock of the requested type on
1840  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1841  */
1842 #ifndef _SYS_SYSPROTO_H_
1843 struct flock_args {
1844 	int	fd;
1845 	int	how;
1846 };
1847 #endif
1848 /*
1849  * MPSAFE
1850  */
1851 /* ARGSUSED */
1852 int
1853 flock(td, uap)
1854 	struct thread *td;
1855 	register struct flock_args *uap;
1856 {
1857 	struct file *fp;
1858 	struct vnode *vp;
1859 	struct flock lf;
1860 	int error;
1861 
1862 	if ((error = fget(td, uap->fd, &fp)) != 0)
1863 		return (error);
1864 	if (fp->f_type != DTYPE_VNODE) {
1865 		fdrop(fp, td);
1866 		return (EOPNOTSUPP);
1867 	}
1868 
1869 	mtx_lock(&Giant);
1870 	vp = (struct vnode *)fp->f_data;
1871 	lf.l_whence = SEEK_SET;
1872 	lf.l_start = 0;
1873 	lf.l_len = 0;
1874 	if (uap->how & LOCK_UN) {
1875 		lf.l_type = F_UNLCK;
1876 		FILE_LOCK(fp);
1877 		fp->f_flag &= ~FHASLOCK;
1878 		FILE_UNLOCK(fp);
1879 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1880 		goto done2;
1881 	}
1882 	if (uap->how & LOCK_EX)
1883 		lf.l_type = F_WRLCK;
1884 	else if (uap->how & LOCK_SH)
1885 		lf.l_type = F_RDLCK;
1886 	else {
1887 		error = EBADF;
1888 		goto done2;
1889 	}
1890 	FILE_LOCK(fp);
1891 	fp->f_flag |= FHASLOCK;
1892 	FILE_UNLOCK(fp);
1893 	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1894 	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1895 done2:
1896 	fdrop(fp, td);
1897 	mtx_unlock(&Giant);
1898 	return (error);
1899 }
1900 
1901 /*
1902  * File Descriptor pseudo-device driver (/dev/fd/).
1903  *
1904  * Opening minor device N dup()s the file (if any) connected to file
1905  * descriptor N belonging to the calling process.  Note that this driver
1906  * consists of only the ``open()'' routine, because all subsequent
1907  * references to this file will be direct to the other driver.
1908  */
1909 /* ARGSUSED */
1910 static int
1911 fdopen(dev, mode, type, td)
1912 	dev_t dev;
1913 	int mode, type;
1914 	struct thread *td;
1915 {
1916 
1917 	/*
1918 	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1919 	 * the file descriptor being sought for duplication. The error
1920 	 * return ensures that the vnode for this device will be released
1921 	 * by vn_open. Open will detect this special error and take the
1922 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1923 	 * will simply report the error.
1924 	 */
1925 	td->td_dupfd = dev2unit(dev);
1926 	return (ENODEV);
1927 }
1928 
1929 /*
1930  * Duplicate the specified descriptor to a free descriptor.
1931  */
1932 int
1933 dupfdopen(td, fdp, indx, dfd, mode, error)
1934 	struct thread *td;
1935 	struct filedesc *fdp;
1936 	int indx, dfd;
1937 	int mode;
1938 	int error;
1939 {
1940 	register struct file *wfp;
1941 	struct file *fp;
1942 
1943 	/*
1944 	 * If the to-be-dup'd fd number is greater than the allowed number
1945 	 * of file descriptors, or the fd to be dup'd has already been
1946 	 * closed, then reject.
1947 	 */
1948 	FILEDESC_LOCK(fdp);
1949 	if ((u_int)dfd >= fdp->fd_nfiles ||
1950 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1951 		FILEDESC_UNLOCK(fdp);
1952 		return (EBADF);
1953 	}
1954 
1955 	/*
1956 	 * There are two cases of interest here.
1957 	 *
1958 	 * For ENODEV simply dup (dfd) to file descriptor
1959 	 * (indx) and return.
1960 	 *
1961 	 * For ENXIO steal away the file structure from (dfd) and
1962 	 * store it in (indx).  (dfd) is effectively closed by
1963 	 * this operation.
1964 	 *
1965 	 * Any other error code is just returned.
1966 	 */
1967 	switch (error) {
1968 	case ENODEV:
1969 		/*
1970 		 * Check that the mode the file is being opened for is a
1971 		 * subset of the mode of the existing descriptor.
1972 		 */
1973 		FILE_LOCK(wfp);
1974 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1975 			FILE_UNLOCK(wfp);
1976 			FILEDESC_UNLOCK(fdp);
1977 			return (EACCES);
1978 		}
1979 		fp = fdp->fd_ofiles[indx];
1980 #if 0
1981 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1982 			(void) munmapfd(td, indx);
1983 #endif
1984 		fdp->fd_ofiles[indx] = wfp;
1985 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1986 		fhold_locked(wfp);
1987 		FILE_UNLOCK(wfp);
1988 		if (indx > fdp->fd_lastfile)
1989 			fdp->fd_lastfile = indx;
1990 		if (fp != NULL)
1991 			FILE_LOCK(fp);
1992 		FILEDESC_UNLOCK(fdp);
1993 		/*
1994 		 * we now own the reference to fp that the ofiles[] array
1995 		 * used to own.  Release it.
1996 		 */
1997 		if (fp != NULL)
1998 			fdrop_locked(fp, td);
1999 		return (0);
2000 
2001 	case ENXIO:
2002 		/*
2003 		 * Steal away the file pointer from dfd, and stuff it into indx.
2004 		 */
2005 		fp = fdp->fd_ofiles[indx];
2006 #if 0
2007 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2008 			(void) munmapfd(td, indx);
2009 #endif
2010 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
2011 		fdp->fd_ofiles[dfd] = NULL;
2012 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2013 		fdp->fd_ofileflags[dfd] = 0;
2014 
2015 		/*
2016 		 * Complete the clean up of the filedesc structure by
2017 		 * recomputing the various hints.
2018 		 */
2019 		if (indx > fdp->fd_lastfile) {
2020 			fdp->fd_lastfile = indx;
2021 		} else {
2022 			while (fdp->fd_lastfile > 0 &&
2023 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2024 				fdp->fd_lastfile--;
2025 			}
2026 			if (dfd < fdp->fd_freefile)
2027 				fdp->fd_freefile = dfd;
2028 		}
2029 		if (fp != NULL)
2030 			FILE_LOCK(fp);
2031 		FILEDESC_UNLOCK(fdp);
2032 
2033 		/*
2034 		 * we now own the reference to fp that the ofiles[] array
2035 		 * used to own.  Release it.
2036 		 */
2037 		if (fp != NULL)
2038 			fdrop_locked(fp, td);
2039 		return (0);
2040 
2041 	default:
2042 		FILEDESC_UNLOCK(fdp);
2043 		return (error);
2044 	}
2045 	/* NOTREACHED */
2046 }
2047 
2048 /*
2049  * Get file structures.
2050  */
2051 static int
2052 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2053 {
2054 	int error;
2055 	struct file *fp;
2056 
2057 	sx_slock(&filelist_lock);
2058 	if (!req->oldptr) {
2059 		/*
2060 		 * overestimate by 10 files
2061 		 */
2062 		error = SYSCTL_OUT(req, 0, sizeof(filehead) +
2063 				   (nfiles + 10) * sizeof(struct file));
2064 		sx_sunlock(&filelist_lock);
2065 		return (error);
2066 	}
2067 
2068 	error = SYSCTL_OUT(req, &filehead, sizeof(filehead));
2069 	if (error) {
2070 		sx_sunlock(&filelist_lock);
2071 		return (error);
2072 	}
2073 
2074 	/*
2075 	 * followed by an array of file structures
2076 	 */
2077 	LIST_FOREACH(fp, &filehead, f_list) {
2078 		error = SYSCTL_OUT(req, fp, sizeof (struct file));
2079 		if (error) {
2080 			sx_sunlock(&filelist_lock);
2081 			return (error);
2082 		}
2083 	}
2084 	sx_sunlock(&filelist_lock);
2085 	return (0);
2086 }
2087 
2088 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2089     0, 0, sysctl_kern_file, "S,file", "Entire file table");
2090 
2091 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2092     &maxfilesperproc, 0, "Maximum files allowed open per process");
2093 
2094 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2095     &maxfiles, 0, "Maximum number of files");
2096 
2097 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2098     &nfiles, 0, "System-wide number of open files");
2099 
2100 static void
2101 fildesc_drvinit(void *unused)
2102 {
2103 	dev_t dev;
2104 
2105 	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
2106 	make_dev_alias(dev, "stdin");
2107 	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
2108 	make_dev_alias(dev, "stdout");
2109 	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
2110 	make_dev_alias(dev, "stderr");
2111 	if (!devfs_present) {
2112 		int fd;
2113 
2114 		for (fd = 3; fd < NUMFDESC; fd++)
2115 			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
2116 			    "fd/%d", fd);
2117 	}
2118 }
2119 
2120 struct fileops badfileops = {
2121 	badfo_readwrite,
2122 	badfo_readwrite,
2123 	badfo_ioctl,
2124 	badfo_poll,
2125 	badfo_kqfilter,
2126 	badfo_stat,
2127 	badfo_close
2128 };
2129 
2130 static int
2131 badfo_readwrite(fp, uio, cred, flags, td)
2132 	struct file *fp;
2133 	struct uio *uio;
2134 	struct ucred *cred;
2135 	struct thread *td;
2136 	int flags;
2137 {
2138 
2139 	return (EBADF);
2140 }
2141 
2142 static int
2143 badfo_ioctl(fp, com, data, td)
2144 	struct file *fp;
2145 	u_long com;
2146 	void *data;
2147 	struct thread *td;
2148 {
2149 
2150 	return (EBADF);
2151 }
2152 
2153 static int
2154 badfo_poll(fp, events, cred, td)
2155 	struct file *fp;
2156 	int events;
2157 	struct ucred *cred;
2158 	struct thread *td;
2159 {
2160 
2161 	return (0);
2162 }
2163 
2164 static int
2165 badfo_kqfilter(fp, kn)
2166 	struct file *fp;
2167 	struct knote *kn;
2168 {
2169 
2170 	return (0);
2171 }
2172 
2173 static int
2174 badfo_stat(fp, sb, td)
2175 	struct file *fp;
2176 	struct stat *sb;
2177 	struct thread *td;
2178 {
2179 
2180 	return (EBADF);
2181 }
2182 
2183 static int
2184 badfo_close(fp, td)
2185 	struct file *fp;
2186 	struct thread *td;
2187 {
2188 
2189 	return (EBADF);
2190 }
2191 
2192 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2193 					fildesc_drvinit,NULL)
2194 
2195 static void filelistinit(void *);
2196 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2197 
2198 /* ARGSUSED*/
2199 static void
2200 filelistinit(dummy)
2201 	void *dummy;
2202 {
2203 	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2204 	    NULL, NULL, UMA_ALIGN_PTR, 0);
2205 
2206 	sx_init(&filelist_lock, "filelist lock");
2207 	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2208 }
2209