xref: /freebsd/sys/kern/kern_descrip.c (revision c68159a6d8eede11766cf13896d0f7670dbd51aa)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/conf.h>
47 #include <sys/filedesc.h>
48 #include <sys/kernel.h>
49 #include <sys/sysctl.h>
50 #include <sys/vnode.h>
51 #include <sys/proc.h>
52 #include <sys/file.h>
53 #include <sys/stat.h>
54 #include <sys/filio.h>
55 #include <sys/fcntl.h>
56 #include <sys/malloc.h>
57 #include <sys/unistd.h>
58 #include <sys/resourcevar.h>
59 #include <sys/event.h>
60 
61 #include <vm/vm.h>
62 #include <vm/vm_extern.h>
63 
64 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
65 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
66 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
67 
68 static	 d_open_t  fdopen;
69 #define NUMFDESC 64
70 
71 #define CDEV_MAJOR 22
72 static struct cdevsw fildesc_cdevsw = {
73 	/* open */	fdopen,
74 	/* close */	noclose,
75 	/* read */	noread,
76 	/* write */	nowrite,
77 	/* ioctl */	noioctl,
78 	/* poll */	nopoll,
79 	/* mmap */	nommap,
80 	/* strategy */	nostrategy,
81 	/* name */	"FD",
82 	/* maj */	CDEV_MAJOR,
83 	/* dump */	nodump,
84 	/* psize */	nopsize,
85 	/* flags */	0,
86 	/* bmaj */	-1
87 };
88 
89 static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct proc *p));
90 static int badfo_readwrite __P((struct file *fp, struct uio *uio,
91     struct ucred *cred, int flags, struct proc *p));
92 static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
93     struct proc *p));
94 static int badfo_poll __P((struct file *fp, int events,
95     struct ucred *cred, struct proc *p));
96 static int badfo_stat __P((struct file *fp, struct stat *sb, struct proc *p));
97 static int badfo_close __P((struct file *fp, struct proc *p));
98 
99 /*
100  * Descriptor management.
101  */
102 struct filelist filehead;	/* head of list of open files */
103 int nfiles;			/* actual number of open files */
104 extern int cmask;
105 
106 /*
107  * System calls on descriptors.
108  */
109 #ifndef _SYS_SYSPROTO_H_
110 struct getdtablesize_args {
111 	int	dummy;
112 };
113 #endif
114 /* ARGSUSED */
115 int
116 getdtablesize(p, uap)
117 	struct proc *p;
118 	struct getdtablesize_args *uap;
119 {
120 
121 	p->p_retval[0] =
122 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
123 	return (0);
124 }
125 
126 /*
127  * Duplicate a file descriptor to a particular value.
128  *
129  * note: keep in mind that a potential race condition exists when closing
130  * descriptors from a shared descriptor table (via rfork).
131  */
132 #ifndef _SYS_SYSPROTO_H_
133 struct dup2_args {
134 	u_int	from;
135 	u_int	to;
136 };
137 #endif
138 /* ARGSUSED */
139 int
140 dup2(p, uap)
141 	struct proc *p;
142 	struct dup2_args *uap;
143 {
144 	register struct filedesc *fdp = p->p_fd;
145 	register u_int old = uap->from, new = uap->to;
146 	int i, error;
147 
148 retry:
149 	if (old >= fdp->fd_nfiles ||
150 	    fdp->fd_ofiles[old] == NULL ||
151 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
152 	    new >= maxfilesperproc) {
153 		return (EBADF);
154 	}
155 	if (old == new) {
156 		p->p_retval[0] = new;
157 		return (0);
158 	}
159 	if (new >= fdp->fd_nfiles) {
160 		if ((error = fdalloc(p, new, &i)))
161 			return (error);
162 		if (new != i)
163 			panic("dup2: fdalloc");
164 		/*
165 		 * fdalloc() may block, retest everything.
166 		 */
167 		goto retry;
168 	}
169 	return (do_dup(fdp, (int)old, (int)new, p->p_retval, p));
170 }
171 
172 /*
173  * Duplicate a file descriptor.
174  */
175 #ifndef _SYS_SYSPROTO_H_
176 struct dup_args {
177 	u_int	fd;
178 };
179 #endif
180 /* ARGSUSED */
181 int
182 dup(p, uap)
183 	struct proc *p;
184 	struct dup_args *uap;
185 {
186 	register struct filedesc *fdp;
187 	u_int old;
188 	int new, error;
189 
190 	old = uap->fd;
191 	fdp = p->p_fd;
192 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
193 		return (EBADF);
194 	if ((error = fdalloc(p, 0, &new)))
195 		return (error);
196 	return (do_dup(fdp, (int)old, new, p->p_retval, p));
197 }
198 
199 /*
200  * The file control system call.
201  */
202 #ifndef _SYS_SYSPROTO_H_
203 struct fcntl_args {
204 	int	fd;
205 	int	cmd;
206 	long	arg;
207 };
208 #endif
209 /* ARGSUSED */
210 int
211 fcntl(p, uap)
212 	struct proc *p;
213 	register struct fcntl_args *uap;
214 {
215 	register struct filedesc *fdp = p->p_fd;
216 	register struct file *fp;
217 	register char *pop;
218 	struct vnode *vp;
219 	int i, tmp, error, flg = F_POSIX;
220 	struct flock fl;
221 	u_int newmin;
222 
223 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
224 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
225 		return (EBADF);
226 	pop = &fdp->fd_ofileflags[uap->fd];
227 
228 	switch (uap->cmd) {
229 	case F_DUPFD:
230 		newmin = uap->arg;
231 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
232 		    newmin >= maxfilesperproc)
233 			return (EINVAL);
234 		if ((error = fdalloc(p, newmin, &i)))
235 			return (error);
236 		return (do_dup(fdp, uap->fd, i, p->p_retval, p));
237 
238 	case F_GETFD:
239 		p->p_retval[0] = *pop & 1;
240 		return (0);
241 
242 	case F_SETFD:
243 		*pop = (*pop &~ 1) | (uap->arg & 1);
244 		return (0);
245 
246 	case F_GETFL:
247 		p->p_retval[0] = OFLAGS(fp->f_flag);
248 		return (0);
249 
250 	case F_SETFL:
251 		fhold(fp);
252 		fp->f_flag &= ~FCNTLFLAGS;
253 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
254 		tmp = fp->f_flag & FNONBLOCK;
255 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
256 		if (error) {
257 			fdrop(fp, p);
258 			return (error);
259 		}
260 		tmp = fp->f_flag & FASYNC;
261 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
262 		if (!error) {
263 			fdrop(fp, p);
264 			return (0);
265 		}
266 		fp->f_flag &= ~FNONBLOCK;
267 		tmp = 0;
268 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
269 		fdrop(fp, p);
270 		return (error);
271 
272 	case F_GETOWN:
273 		fhold(fp);
274 		error = fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p);
275 		fdrop(fp, p);
276 		return(error);
277 
278 	case F_SETOWN:
279 		fhold(fp);
280 		error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p);
281 		fdrop(fp, p);
282 		return(error);
283 
284 	case F_SETLKW:
285 		flg |= F_WAIT;
286 		/* Fall into F_SETLK */
287 
288 	case F_SETLK:
289 		if (fp->f_type != DTYPE_VNODE)
290 			return (EBADF);
291 		vp = (struct vnode *)fp->f_data;
292 
293 		/*
294 		 * copyin/lockop may block
295 		 */
296 		fhold(fp);
297 		/* Copy in the lock structure */
298 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
299 		    sizeof(fl));
300 		if (error) {
301 			fdrop(fp, p);
302 			return (error);
303 		}
304 		if (fl.l_whence == SEEK_CUR)
305 			fl.l_start += fp->f_offset;
306 
307 		switch (fl.l_type) {
308 		case F_RDLCK:
309 			if ((fp->f_flag & FREAD) == 0) {
310 				error = EBADF;
311 				break;
312 			}
313 			p->p_flag |= P_ADVLOCK;
314 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
315 			    &fl, flg);
316 			break;
317 		case F_WRLCK:
318 			if ((fp->f_flag & FWRITE) == 0) {
319 				error = EBADF;
320 				break;
321 			}
322 			p->p_flag |= P_ADVLOCK;
323 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
324 			    &fl, flg);
325 			break;
326 		case F_UNLCK:
327 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
328 				&fl, F_POSIX);
329 			break;
330 		default:
331 			error = EINVAL;
332 			break;
333 		}
334 		fdrop(fp, p);
335 		return(error);
336 
337 	case F_GETLK:
338 		if (fp->f_type != DTYPE_VNODE)
339 			return (EBADF);
340 		vp = (struct vnode *)fp->f_data;
341 		/*
342 		 * copyin/lockop may block
343 		 */
344 		fhold(fp);
345 		/* Copy in the lock structure */
346 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
347 		    sizeof(fl));
348 		if (error) {
349 			fdrop(fp, p);
350 			return (error);
351 		}
352 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
353 		    fl.l_type != F_UNLCK) {
354 			fdrop(fp, p);
355 			return (EINVAL);
356 		}
357 		if (fl.l_whence == SEEK_CUR)
358 			fl.l_start += fp->f_offset;
359 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
360 			    &fl, F_POSIX);
361 		fdrop(fp, p);
362 		if (error == 0) {
363 			error = copyout((caddr_t)&fl,
364 				    (caddr_t)(intptr_t)uap->arg, sizeof(fl));
365 		}
366 		return(error);
367 	default:
368 		return (EINVAL);
369 	}
370 	/* NOTREACHED */
371 }
372 
373 /*
374  * Common code for dup, dup2, and fcntl(F_DUPFD).
375  */
376 static int
377 do_dup(fdp, old, new, retval, p)
378 	register struct filedesc *fdp;
379 	register int old, new;
380 	register_t *retval;
381 	struct proc *p;
382 {
383 	struct file *fp;
384 	struct file *delfp;
385 
386 	/*
387 	 * Save info on the descriptor being overwritten.  We have
388 	 * to do the unmap now, but we cannot close it without
389 	 * introducing an ownership race for the slot.
390 	 */
391 	delfp = fdp->fd_ofiles[new];
392 #if 0
393 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
394 		(void) munmapfd(p, new);
395 #endif
396 
397 	/*
398 	 * Duplicate the source descriptor, update lastfile
399 	 */
400 	fp = fdp->fd_ofiles[old];
401 	fdp->fd_ofiles[new] = fp;
402 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
403 	fhold(fp);
404 	if (new > fdp->fd_lastfile)
405 		fdp->fd_lastfile = new;
406 	*retval = new;
407 
408 	/*
409 	 * If we dup'd over a valid file, we now own the reference to it
410 	 * and must dispose of it using closef() semantics (as if a
411 	 * close() were performed on it).
412 	 */
413 	if (delfp)
414 		(void) closef(delfp, p);
415 	return (0);
416 }
417 
418 /*
419  * If sigio is on the list associated with a process or process group,
420  * disable signalling from the device, remove sigio from the list and
421  * free sigio.
422  */
423 void
424 funsetown(sigio)
425 	struct sigio *sigio;
426 {
427 	int s;
428 
429 	if (sigio == NULL)
430 		return;
431 	s = splhigh();
432 	*(sigio->sio_myref) = NULL;
433 	splx(s);
434 	if (sigio->sio_pgid < 0) {
435 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
436 			     sigio, sio_pgsigio);
437 	} else /* if ((*sigiop)->sio_pgid > 0) */ {
438 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
439 			     sigio, sio_pgsigio);
440 	}
441 	crfree(sigio->sio_ucred);
442 	FREE(sigio, M_SIGIO);
443 }
444 
445 /* Free a list of sigio structures. */
446 void
447 funsetownlst(sigiolst)
448 	struct sigiolst *sigiolst;
449 {
450 	struct sigio *sigio;
451 
452 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
453 		funsetown(sigio);
454 }
455 
456 /*
457  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
458  *
459  * After permission checking, add a sigio structure to the sigio list for
460  * the process or process group.
461  */
462 int
463 fsetown(pgid, sigiop)
464 	pid_t pgid;
465 	struct sigio **sigiop;
466 {
467 	struct proc *proc;
468 	struct pgrp *pgrp;
469 	struct sigio *sigio;
470 	int s;
471 
472 	if (pgid == 0) {
473 		funsetown(*sigiop);
474 		return (0);
475 	}
476 	if (pgid > 0) {
477 		proc = pfind(pgid);
478 		if (proc == NULL)
479 			return (ESRCH);
480 
481 		/*
482 		 * Policy - Don't allow a process to FSETOWN a process
483 		 * in another session.
484 		 *
485 		 * Remove this test to allow maximum flexibility or
486 		 * restrict FSETOWN to the current process or process
487 		 * group for maximum safety.
488 		 */
489 		if (proc->p_session != curproc->p_session)
490 			return (EPERM);
491 
492 		pgrp = NULL;
493 	} else /* if (pgid < 0) */ {
494 		pgrp = pgfind(-pgid);
495 		if (pgrp == NULL)
496 			return (ESRCH);
497 
498 		/*
499 		 * Policy - Don't allow a process to FSETOWN a process
500 		 * in another session.
501 		 *
502 		 * Remove this test to allow maximum flexibility or
503 		 * restrict FSETOWN to the current process or process
504 		 * group for maximum safety.
505 		 */
506 		if (pgrp->pg_session != curproc->p_session)
507 			return (EPERM);
508 
509 		proc = NULL;
510 	}
511 	funsetown(*sigiop);
512 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
513 	if (pgid > 0) {
514 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
515 		sigio->sio_proc = proc;
516 	} else {
517 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
518 		sigio->sio_pgrp = pgrp;
519 	}
520 	sigio->sio_pgid = pgid;
521 	crhold(curproc->p_ucred);
522 	sigio->sio_ucred = curproc->p_ucred;
523 	/* It would be convenient if p_ruid was in ucred. */
524 	sigio->sio_ruid = curproc->p_cred->p_ruid;
525 	sigio->sio_myref = sigiop;
526 	s = splhigh();
527 	*sigiop = sigio;
528 	splx(s);
529 	return (0);
530 }
531 
532 /*
533  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
534  */
535 pid_t
536 fgetown(sigio)
537 	struct sigio *sigio;
538 {
539 	return (sigio != NULL ? sigio->sio_pgid : 0);
540 }
541 
542 /*
543  * Close a file descriptor.
544  */
545 #ifndef _SYS_SYSPROTO_H_
546 struct close_args {
547         int     fd;
548 };
549 #endif
550 /* ARGSUSED */
551 int
552 close(p, uap)
553 	struct proc *p;
554 	struct close_args *uap;
555 {
556 	register struct filedesc *fdp = p->p_fd;
557 	register struct file *fp;
558 	register int fd = uap->fd;
559 
560 	if ((unsigned)fd >= fdp->fd_nfiles ||
561 	    (fp = fdp->fd_ofiles[fd]) == NULL)
562 		return (EBADF);
563 #if 0
564 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
565 		(void) munmapfd(p, fd);
566 #endif
567 	fdp->fd_ofiles[fd] = NULL;
568 	fdp->fd_ofileflags[fd] = 0;
569 
570 	/*
571 	 * we now hold the fp reference that used to be owned by the descriptor
572 	 * array.
573 	 */
574 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
575 		fdp->fd_lastfile--;
576 	if (fd < fdp->fd_freefile)
577 		fdp->fd_freefile = fd;
578 	if (fd < fdp->fd_knlistsize)
579 		knote_fdclose(p, fd);
580 	return (closef(fp, p));
581 }
582 
583 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
584 /*
585  * Return status information about a file descriptor.
586  */
587 #ifndef _SYS_SYSPROTO_H_
588 struct ofstat_args {
589 	int	fd;
590 	struct	ostat *sb;
591 };
592 #endif
593 /* ARGSUSED */
594 int
595 ofstat(p, uap)
596 	struct proc *p;
597 	register struct ofstat_args *uap;
598 {
599 	register struct filedesc *fdp = p->p_fd;
600 	register struct file *fp;
601 	struct stat ub;
602 	struct ostat oub;
603 	int error;
604 
605 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
606 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
607 		return (EBADF);
608 	fhold(fp);
609 	error = fo_stat(fp, &ub, p);
610 	if (error == 0) {
611 		cvtstat(&ub, &oub);
612 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
613 	}
614 	fdrop(fp, p);
615 	return (error);
616 }
617 #endif /* COMPAT_43 || COMPAT_SUNOS */
618 
619 /*
620  * Return status information about a file descriptor.
621  */
622 #ifndef _SYS_SYSPROTO_H_
623 struct fstat_args {
624 	int	fd;
625 	struct	stat *sb;
626 };
627 #endif
628 /* ARGSUSED */
629 int
630 fstat(p, uap)
631 	struct proc *p;
632 	register struct fstat_args *uap;
633 {
634 	register struct filedesc *fdp = p->p_fd;
635 	register struct file *fp;
636 	struct stat ub;
637 	int error;
638 
639 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
640 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
641 		return (EBADF);
642 	fhold(fp);
643 	error = fo_stat(fp, &ub, p);
644 	if (error == 0)
645 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
646 	fdrop(fp, p);
647 	return (error);
648 }
649 
650 /*
651  * Return status information about a file descriptor.
652  */
653 #ifndef _SYS_SYSPROTO_H_
654 struct nfstat_args {
655 	int	fd;
656 	struct	nstat *sb;
657 };
658 #endif
659 /* ARGSUSED */
660 int
661 nfstat(p, uap)
662 	struct proc *p;
663 	register struct nfstat_args *uap;
664 {
665 	register struct filedesc *fdp = p->p_fd;
666 	register struct file *fp;
667 	struct stat ub;
668 	struct nstat nub;
669 	int error;
670 
671 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
672 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
673 		return (EBADF);
674 	fhold(fp);
675 	error = fo_stat(fp, &ub, p);
676 	if (error == 0) {
677 		cvtnstat(&ub, &nub);
678 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
679 	}
680 	fdrop(fp, p);
681 	return (error);
682 }
683 
684 /*
685  * Return pathconf information about a file descriptor.
686  */
687 #ifndef _SYS_SYSPROTO_H_
688 struct fpathconf_args {
689 	int	fd;
690 	int	name;
691 };
692 #endif
693 /* ARGSUSED */
694 int
695 fpathconf(p, uap)
696 	struct proc *p;
697 	register struct fpathconf_args *uap;
698 {
699 	struct filedesc *fdp = p->p_fd;
700 	struct file *fp;
701 	struct vnode *vp;
702 	int error = 0;
703 
704 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
705 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
706 		return (EBADF);
707 
708 	fhold(fp);
709 
710 	switch (fp->f_type) {
711 	case DTYPE_PIPE:
712 	case DTYPE_SOCKET:
713 		if (uap->name != _PC_PIPE_BUF)
714 			return (EINVAL);
715 		p->p_retval[0] = PIPE_BUF;
716 		error = 0;
717 		break;
718 	case DTYPE_FIFO:
719 	case DTYPE_VNODE:
720 		vp = (struct vnode *)fp->f_data;
721 		error = VOP_PATHCONF(vp, uap->name, p->p_retval);
722 		break;
723 	default:
724 		error = EOPNOTSUPP;
725 		break;
726 	}
727 	fdrop(fp, p);
728 	return(error);
729 }
730 
731 /*
732  * Allocate a file descriptor for the process.
733  */
734 static int fdexpand;
735 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
736 
737 int
738 fdalloc(p, want, result)
739 	struct proc *p;
740 	int want;
741 	int *result;
742 {
743 	register struct filedesc *fdp = p->p_fd;
744 	register int i;
745 	int lim, last, nfiles;
746 	struct file **newofile;
747 	char *newofileflags;
748 
749 	/*
750 	 * Search for a free descriptor starting at the higher
751 	 * of want or fd_freefile.  If that fails, consider
752 	 * expanding the ofile array.
753 	 */
754 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
755 	for (;;) {
756 		last = min(fdp->fd_nfiles, lim);
757 		if ((i = want) < fdp->fd_freefile)
758 			i = fdp->fd_freefile;
759 		for (; i < last; i++) {
760 			if (fdp->fd_ofiles[i] == NULL) {
761 				fdp->fd_ofileflags[i] = 0;
762 				if (i > fdp->fd_lastfile)
763 					fdp->fd_lastfile = i;
764 				if (want <= fdp->fd_freefile)
765 					fdp->fd_freefile = i;
766 				*result = i;
767 				return (0);
768 			}
769 		}
770 
771 		/*
772 		 * No space in current array.  Expand?
773 		 */
774 		if (fdp->fd_nfiles >= lim)
775 			return (EMFILE);
776 		if (fdp->fd_nfiles < NDEXTENT)
777 			nfiles = NDEXTENT;
778 		else
779 			nfiles = 2 * fdp->fd_nfiles;
780 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
781 		    M_FILEDESC, M_WAITOK);
782 
783 		/*
784 		 * deal with file-table extend race that might have occured
785 		 * when malloc was blocked.
786 		 */
787 		if (fdp->fd_nfiles >= nfiles) {
788 			FREE(newofile, M_FILEDESC);
789 			continue;
790 		}
791 		newofileflags = (char *) &newofile[nfiles];
792 		/*
793 		 * Copy the existing ofile and ofileflags arrays
794 		 * and zero the new portion of each array.
795 		 */
796 		bcopy(fdp->fd_ofiles, newofile,
797 			(i = sizeof(struct file *) * fdp->fd_nfiles));
798 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
799 		bcopy(fdp->fd_ofileflags, newofileflags,
800 			(i = sizeof(char) * fdp->fd_nfiles));
801 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
802 		if (fdp->fd_nfiles > NDFILE)
803 			FREE(fdp->fd_ofiles, M_FILEDESC);
804 		fdp->fd_ofiles = newofile;
805 		fdp->fd_ofileflags = newofileflags;
806 		fdp->fd_nfiles = nfiles;
807 		fdexpand++;
808 	}
809 	return (0);
810 }
811 
812 /*
813  * Check to see whether n user file descriptors
814  * are available to the process p.
815  */
816 int
817 fdavail(p, n)
818 	struct proc *p;
819 	register int n;
820 {
821 	register struct filedesc *fdp = p->p_fd;
822 	register struct file **fpp;
823 	register int i, lim, last;
824 
825 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
826 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
827 		return (1);
828 
829 	last = min(fdp->fd_nfiles, lim);
830 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
831 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
832 		if (*fpp == NULL && --n <= 0)
833 			return (1);
834 	}
835 	return (0);
836 }
837 
838 /*
839  * Create a new open file structure and allocate
840  * a file decriptor for the process that refers to it.
841  */
842 int
843 falloc(p, resultfp, resultfd)
844 	register struct proc *p;
845 	struct file **resultfp;
846 	int *resultfd;
847 {
848 	register struct file *fp, *fq;
849 	int error, i;
850 
851 	if (nfiles >= maxfiles) {
852 		tablefull("file");
853 		return (ENFILE);
854 	}
855 	/*
856 	 * Allocate a new file descriptor.
857 	 * If the process has file descriptor zero open, add to the list
858 	 * of open files at that point, otherwise put it at the front of
859 	 * the list of open files.
860 	 */
861 	nfiles++;
862 	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK | M_ZERO);
863 
864 	/*
865 	 * wait until after malloc (which may have blocked) returns before
866 	 * allocating the slot, else a race might have shrunk it if we had
867 	 * allocated it before the malloc.
868 	 */
869 	if ((error = fdalloc(p, 0, &i))) {
870 		nfiles--;
871 		FREE(fp, M_FILE);
872 		return (error);
873 	}
874 	fp->f_count = 1;
875 	fp->f_cred = p->p_ucred;
876 	fp->f_ops = &badfileops;
877 	fp->f_seqcount = 1;
878 	crhold(fp->f_cred);
879 	if ((fq = p->p_fd->fd_ofiles[0])) {
880 		LIST_INSERT_AFTER(fq, fp, f_list);
881 	} else {
882 		LIST_INSERT_HEAD(&filehead, fp, f_list);
883 	}
884 	p->p_fd->fd_ofiles[i] = fp;
885 	if (resultfp)
886 		*resultfp = fp;
887 	if (resultfd)
888 		*resultfd = i;
889 	return (0);
890 }
891 
892 /*
893  * Free a file descriptor.
894  */
895 void
896 ffree(fp)
897 	register struct file *fp;
898 {
899 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
900 	LIST_REMOVE(fp, f_list);
901 	crfree(fp->f_cred);
902 	nfiles--;
903 	FREE(fp, M_FILE);
904 }
905 
906 /*
907  * Build a new filedesc structure.
908  */
909 struct filedesc *
910 fdinit(p)
911 	struct proc *p;
912 {
913 	register struct filedesc0 *newfdp;
914 	register struct filedesc *fdp = p->p_fd;
915 
916 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
917 	    M_FILEDESC, M_WAITOK | M_ZERO);
918 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
919 	if (newfdp->fd_fd.fd_cdir)
920 		VREF(newfdp->fd_fd.fd_cdir);
921 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
922 	if (newfdp->fd_fd.fd_rdir)
923 		VREF(newfdp->fd_fd.fd_rdir);
924 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
925 	if (newfdp->fd_fd.fd_jdir)
926 		VREF(newfdp->fd_fd.fd_jdir);
927 
928 	/* Create the file descriptor table. */
929 	newfdp->fd_fd.fd_refcnt = 1;
930 	newfdp->fd_fd.fd_cmask = cmask;
931 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
932 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
933 	newfdp->fd_fd.fd_nfiles = NDFILE;
934 	newfdp->fd_fd.fd_knlistsize = -1;
935 
936 	return (&newfdp->fd_fd);
937 }
938 
939 /*
940  * Share a filedesc structure.
941  */
942 struct filedesc *
943 fdshare(p)
944 	struct proc *p;
945 {
946 	p->p_fd->fd_refcnt++;
947 	return (p->p_fd);
948 }
949 
950 /*
951  * Copy a filedesc structure.
952  */
953 struct filedesc *
954 fdcopy(p)
955 	struct proc *p;
956 {
957 	register struct filedesc *newfdp, *fdp = p->p_fd;
958 	register struct file **fpp;
959 	register int i;
960 
961 	/* Certain daemons might not have file descriptors. */
962 	if (fdp == NULL)
963 		return (NULL);
964 
965 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
966 	    M_FILEDESC, M_WAITOK);
967 	bcopy(fdp, newfdp, sizeof(struct filedesc));
968 	if (newfdp->fd_cdir)
969 		VREF(newfdp->fd_cdir);
970 	if (newfdp->fd_rdir)
971 		VREF(newfdp->fd_rdir);
972 	if (newfdp->fd_jdir)
973 		VREF(newfdp->fd_jdir);
974 	newfdp->fd_refcnt = 1;
975 
976 	/*
977 	 * If the number of open files fits in the internal arrays
978 	 * of the open file structure, use them, otherwise allocate
979 	 * additional memory for the number of descriptors currently
980 	 * in use.
981 	 */
982 	if (newfdp->fd_lastfile < NDFILE) {
983 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
984 		newfdp->fd_ofileflags =
985 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
986 		i = NDFILE;
987 	} else {
988 		/*
989 		 * Compute the smallest multiple of NDEXTENT needed
990 		 * for the file descriptors currently in use,
991 		 * allowing the table to shrink.
992 		 */
993 		i = newfdp->fd_nfiles;
994 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
995 			i /= 2;
996 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
997 		    M_FILEDESC, M_WAITOK);
998 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
999 	}
1000 	newfdp->fd_nfiles = i;
1001 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1002 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1003 
1004 	/*
1005 	 * kq descriptors cannot be copied.
1006 	 */
1007 	if (newfdp->fd_knlistsize != -1) {
1008 		fpp = newfdp->fd_ofiles;
1009 		for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1010 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE)
1011 				*fpp = NULL;
1012 		}
1013 		newfdp->fd_knlist = NULL;
1014 		newfdp->fd_knlistsize = -1;
1015 		newfdp->fd_knhash = NULL;
1016 		newfdp->fd_knhashmask = 0;
1017 	}
1018 
1019 	fpp = newfdp->fd_ofiles;
1020 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1021 		if (*fpp != NULL)
1022 			fhold(*fpp);
1023 	}
1024 	return (newfdp);
1025 }
1026 
1027 /*
1028  * Release a filedesc structure.
1029  */
1030 void
1031 fdfree(p)
1032 	struct proc *p;
1033 {
1034 	register struct filedesc *fdp = p->p_fd;
1035 	struct file **fpp;
1036 	register int i;
1037 
1038 	/* Certain daemons might not have file descriptors. */
1039 	if (fdp == NULL)
1040 		return;
1041 
1042 	if (--fdp->fd_refcnt > 0)
1043 		return;
1044 	/*
1045 	 * we are the last reference to the structure, we can
1046 	 * safely assume it will not change out from under us.
1047 	 */
1048 	fpp = fdp->fd_ofiles;
1049 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1050 		if (*fpp)
1051 			(void) closef(*fpp, p);
1052 	}
1053 	if (fdp->fd_nfiles > NDFILE)
1054 		FREE(fdp->fd_ofiles, M_FILEDESC);
1055 	if (fdp->fd_cdir)
1056 		vrele(fdp->fd_cdir);
1057 	if (fdp->fd_rdir)
1058 		vrele(fdp->fd_rdir);
1059 	if (fdp->fd_jdir)
1060 		vrele(fdp->fd_jdir);
1061 	if (fdp->fd_knlist)
1062 		FREE(fdp->fd_knlist, M_TEMP);
1063 	if (fdp->fd_knhash)
1064 		FREE(fdp->fd_knhash, M_TEMP);
1065 	FREE(fdp, M_FILEDESC);
1066 }
1067 
1068 /*
1069  * For setugid programs, we don't want to people to use that setugidness
1070  * to generate error messages which write to a file which otherwise would
1071  * otherwise be off-limits to the process.
1072  *
1073  * This is a gross hack to plug the hole.  A better solution would involve
1074  * a special vop or other form of generalized access control mechanism.  We
1075  * go ahead and just reject all procfs file systems accesses as dangerous.
1076  *
1077  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1078  * sufficient.  We also don't for check setugidness since we know we are.
1079  */
1080 static int
1081 is_unsafe(struct file *fp)
1082 {
1083 	if (fp->f_type == DTYPE_VNODE &&
1084 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1085 		return (1);
1086 	return (0);
1087 }
1088 
1089 /*
1090  * Make this setguid thing safe, if at all possible.
1091  */
1092 void
1093 setugidsafety(p)
1094 	struct proc *p;
1095 {
1096 	struct filedesc *fdp = p->p_fd;
1097 	register int i;
1098 
1099 	/* Certain daemons might not have file descriptors. */
1100 	if (fdp == NULL)
1101 		return;
1102 
1103 	/*
1104 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1105 	 * we are blocked in a close.  Be careful!
1106 	 */
1107 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1108 		if (i > 2)
1109 			break;
1110 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1111 			struct file *fp;
1112 
1113 #if 0
1114 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1115 				(void) munmapfd(p, i);
1116 #endif
1117 			if (i < fdp->fd_knlistsize)
1118 				knote_fdclose(p, i);
1119 			/*
1120 			 * NULL-out descriptor prior to close to avoid
1121 			 * a race while close blocks.
1122 			 */
1123 			fp = fdp->fd_ofiles[i];
1124 			fdp->fd_ofiles[i] = NULL;
1125 			fdp->fd_ofileflags[i] = 0;
1126 			if (i < fdp->fd_freefile)
1127 				fdp->fd_freefile = i;
1128 			(void) closef(fp, p);
1129 		}
1130 	}
1131 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1132 		fdp->fd_lastfile--;
1133 }
1134 
1135 /*
1136  * Close any files on exec?
1137  */
1138 void
1139 fdcloseexec(p)
1140 	struct proc *p;
1141 {
1142 	struct filedesc *fdp = p->p_fd;
1143 	register int i;
1144 
1145 	/* Certain daemons might not have file descriptors. */
1146 	if (fdp == NULL)
1147 		return;
1148 
1149 	/*
1150 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1151 	 * may block and rip them out from under us.
1152 	 */
1153 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1154 		if (fdp->fd_ofiles[i] != NULL &&
1155 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1156 			struct file *fp;
1157 
1158 #if 0
1159 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1160 				(void) munmapfd(p, i);
1161 #endif
1162 			if (i < fdp->fd_knlistsize)
1163 				knote_fdclose(p, i);
1164 			/*
1165 			 * NULL-out descriptor prior to close to avoid
1166 			 * a race while close blocks.
1167 			 */
1168 			fp = fdp->fd_ofiles[i];
1169 			fdp->fd_ofiles[i] = NULL;
1170 			fdp->fd_ofileflags[i] = 0;
1171 			if (i < fdp->fd_freefile)
1172 				fdp->fd_freefile = i;
1173 			(void) closef(fp, p);
1174 		}
1175 	}
1176 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1177 		fdp->fd_lastfile--;
1178 }
1179 
1180 /*
1181  * Internal form of close.
1182  * Decrement reference count on file structure.
1183  * Note: p may be NULL when closing a file
1184  * that was being passed in a message.
1185  */
1186 int
1187 closef(fp, p)
1188 	register struct file *fp;
1189 	register struct proc *p;
1190 {
1191 	struct vnode *vp;
1192 	struct flock lf;
1193 
1194 	if (fp == NULL)
1195 		return (0);
1196 	/*
1197 	 * POSIX record locking dictates that any close releases ALL
1198 	 * locks owned by this process.  This is handled by setting
1199 	 * a flag in the unlock to free ONLY locks obeying POSIX
1200 	 * semantics, and not to free BSD-style file locks.
1201 	 * If the descriptor was in a message, POSIX-style locks
1202 	 * aren't passed with the descriptor.
1203 	 */
1204 	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1205 		lf.l_whence = SEEK_SET;
1206 		lf.l_start = 0;
1207 		lf.l_len = 0;
1208 		lf.l_type = F_UNLCK;
1209 		vp = (struct vnode *)fp->f_data;
1210 		(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &lf, F_POSIX);
1211 	}
1212 	return (fdrop(fp, p));
1213 }
1214 
1215 int
1216 fdrop(fp, p)
1217 	struct file *fp;
1218 	struct proc *p;
1219 {
1220 	struct flock lf;
1221 	struct vnode *vp;
1222 	int error;
1223 
1224 	if (--fp->f_count > 0)
1225 		return (0);
1226 	if (fp->f_count < 0)
1227 		panic("fdrop: count < 0");
1228 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1229 		lf.l_whence = SEEK_SET;
1230 		lf.l_start = 0;
1231 		lf.l_len = 0;
1232 		lf.l_type = F_UNLCK;
1233 		vp = (struct vnode *)fp->f_data;
1234 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1235 	}
1236 	if (fp->f_ops != &badfileops)
1237 		error = fo_close(fp, p);
1238 	else
1239 		error = 0;
1240 	ffree(fp);
1241 	return (error);
1242 }
1243 
1244 /*
1245  * Apply an advisory lock on a file descriptor.
1246  *
1247  * Just attempt to get a record lock of the requested type on
1248  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1249  */
1250 #ifndef _SYS_SYSPROTO_H_
1251 struct flock_args {
1252 	int	fd;
1253 	int	how;
1254 };
1255 #endif
1256 /* ARGSUSED */
1257 int
1258 flock(p, uap)
1259 	struct proc *p;
1260 	register struct flock_args *uap;
1261 {
1262 	register struct filedesc *fdp = p->p_fd;
1263 	register struct file *fp;
1264 	struct vnode *vp;
1265 	struct flock lf;
1266 
1267 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1268 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
1269 		return (EBADF);
1270 	if (fp->f_type != DTYPE_VNODE)
1271 		return (EOPNOTSUPP);
1272 	vp = (struct vnode *)fp->f_data;
1273 	lf.l_whence = SEEK_SET;
1274 	lf.l_start = 0;
1275 	lf.l_len = 0;
1276 	if (uap->how & LOCK_UN) {
1277 		lf.l_type = F_UNLCK;
1278 		fp->f_flag &= ~FHASLOCK;
1279 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1280 	}
1281 	if (uap->how & LOCK_EX)
1282 		lf.l_type = F_WRLCK;
1283 	else if (uap->how & LOCK_SH)
1284 		lf.l_type = F_RDLCK;
1285 	else
1286 		return (EBADF);
1287 	fp->f_flag |= FHASLOCK;
1288 	if (uap->how & LOCK_NB)
1289 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1290 	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1291 }
1292 
1293 /*
1294  * File Descriptor pseudo-device driver (/dev/fd/).
1295  *
1296  * Opening minor device N dup()s the file (if any) connected to file
1297  * descriptor N belonging to the calling process.  Note that this driver
1298  * consists of only the ``open()'' routine, because all subsequent
1299  * references to this file will be direct to the other driver.
1300  */
1301 /* ARGSUSED */
1302 static int
1303 fdopen(dev, mode, type, p)
1304 	dev_t dev;
1305 	int mode, type;
1306 	struct proc *p;
1307 {
1308 
1309 	/*
1310 	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1311 	 * the file descriptor being sought for duplication. The error
1312 	 * return ensures that the vnode for this device will be released
1313 	 * by vn_open. Open will detect this special error and take the
1314 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1315 	 * will simply report the error.
1316 	 */
1317 	p->p_dupfd = minor(dev);
1318 	return (ENODEV);
1319 }
1320 
1321 /*
1322  * Duplicate the specified descriptor to a free descriptor.
1323  */
1324 int
1325 dupfdopen(p, fdp, indx, dfd, mode, error)
1326 	struct proc *p;
1327 	struct filedesc *fdp;
1328 	int indx, dfd;
1329 	int mode;
1330 	int error;
1331 {
1332 	register struct file *wfp;
1333 	struct file *fp;
1334 
1335 	/*
1336 	 * If the to-be-dup'd fd number is greater than the allowed number
1337 	 * of file descriptors, or the fd to be dup'd has already been
1338 	 * closed, then reject.
1339 	 */
1340 	if ((u_int)dfd >= fdp->fd_nfiles ||
1341 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1342 		return (EBADF);
1343 	}
1344 
1345 	/*
1346 	 * There are two cases of interest here.
1347 	 *
1348 	 * For ENODEV simply dup (dfd) to file descriptor
1349 	 * (indx) and return.
1350 	 *
1351 	 * For ENXIO steal away the file structure from (dfd) and
1352 	 * store it in (indx).  (dfd) is effectively closed by
1353 	 * this operation.
1354 	 *
1355 	 * Any other error code is just returned.
1356 	 */
1357 	switch (error) {
1358 	case ENODEV:
1359 		/*
1360 		 * Check that the mode the file is being opened for is a
1361 		 * subset of the mode of the existing descriptor.
1362 		 */
1363 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1364 			return (EACCES);
1365 		fp = fdp->fd_ofiles[indx];
1366 #if 0
1367 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1368 			(void) munmapfd(p, indx);
1369 #endif
1370 		fdp->fd_ofiles[indx] = wfp;
1371 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1372 		fhold(wfp);
1373 		if (indx > fdp->fd_lastfile)
1374 			fdp->fd_lastfile = indx;
1375 		/*
1376 		 * we now own the reference to fp that the ofiles[] array
1377 		 * used to own.  Release it.
1378 		 */
1379 		if (fp)
1380 			fdrop(fp, p);
1381 		return (0);
1382 
1383 	case ENXIO:
1384 		/*
1385 		 * Steal away the file pointer from dfd, and stuff it into indx.
1386 		 */
1387 		fp = fdp->fd_ofiles[indx];
1388 #if 0
1389 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1390 			(void) munmapfd(p, indx);
1391 #endif
1392 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1393 		fdp->fd_ofiles[dfd] = NULL;
1394 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1395 		fdp->fd_ofileflags[dfd] = 0;
1396 
1397 		/*
1398 		 * we now own the reference to fp that the ofiles[] array
1399 		 * used to own.  Release it.
1400 		 */
1401 		if (fp)
1402 			fdrop(fp, p);
1403 		/*
1404 		 * Complete the clean up of the filedesc structure by
1405 		 * recomputing the various hints.
1406 		 */
1407 		if (indx > fdp->fd_lastfile) {
1408 			fdp->fd_lastfile = indx;
1409 		} else {
1410 			while (fdp->fd_lastfile > 0 &&
1411 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1412 				fdp->fd_lastfile--;
1413 			}
1414 			if (dfd < fdp->fd_freefile)
1415 				fdp->fd_freefile = dfd;
1416 		}
1417 		return (0);
1418 
1419 	default:
1420 		return (error);
1421 	}
1422 	/* NOTREACHED */
1423 }
1424 
1425 /*
1426  * Get file structures.
1427  */
1428 static int
1429 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1430 {
1431 	int error;
1432 	struct file *fp;
1433 
1434 	if (!req->oldptr) {
1435 		/*
1436 		 * overestimate by 10 files
1437 		 */
1438 		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1439 				(nfiles + 10) * sizeof(struct file)));
1440 	}
1441 
1442 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1443 	if (error)
1444 		return (error);
1445 
1446 	/*
1447 	 * followed by an array of file structures
1448 	 */
1449 	LIST_FOREACH(fp, &filehead, f_list) {
1450 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1451 		if (error)
1452 			return (error);
1453 	}
1454 	return (0);
1455 }
1456 
1457 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1458     0, 0, sysctl_kern_file, "S,file", "Entire file table");
1459 
1460 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1461     &maxfilesperproc, 0, "Maximum files allowed open per process");
1462 
1463 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1464     &maxfiles, 0, "Maximum number of files");
1465 
1466 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
1467     &nfiles, 0, "System-wide number of open files");
1468 
1469 static void
1470 fildesc_clone(void *arg, char *name, int namelen, dev_t *dev)
1471 {
1472 	int u;
1473 
1474 	if (*dev != NODEV)
1475 		return;
1476 	if (dev_stdclone(name, NULL, "fd/", &u) != 1)
1477 		return;
1478 	if (u <= 2)
1479 		return;
1480 	*dev = make_dev(&fildesc_cdevsw, u, UID_BIN, GID_BIN, 0666, name);
1481 	return;
1482 }
1483 
1484 static void
1485 fildesc_drvinit(void *unused)
1486 {
1487 	dev_t dev;
1488 
1489 	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
1490 	make_dev_alias(dev, "stdin");
1491 	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
1492 	make_dev_alias(dev, "stdout");
1493 	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
1494 	make_dev_alias(dev, "stderr");
1495 	EVENTHANDLER_REGISTER(dev_clone, fildesc_clone, 0, 1000);
1496 	if (!devfs_present) {
1497 		int fd;
1498 
1499 		for (fd = 3; fd < NUMFDESC; fd++)
1500 			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
1501 			    "fd/%d", fd);
1502 	}
1503 }
1504 
1505 struct fileops badfileops = {
1506 	badfo_readwrite,
1507 	badfo_readwrite,
1508 	badfo_ioctl,
1509 	badfo_poll,
1510 	badfo_stat,
1511 	badfo_close
1512 };
1513 
1514 static int
1515 badfo_readwrite(fp, uio, cred, flags, p)
1516 	struct file *fp;
1517 	struct uio *uio;
1518 	struct ucred *cred;
1519 	struct proc *p;
1520 	int flags;
1521 {
1522 
1523 	return (EBADF);
1524 }
1525 
1526 static int
1527 badfo_ioctl(fp, com, data, p)
1528 	struct file *fp;
1529 	u_long com;
1530 	caddr_t data;
1531 	struct proc *p;
1532 {
1533 
1534 	return (EBADF);
1535 }
1536 
1537 static int
1538 badfo_poll(fp, events, cred, p)
1539 	struct file *fp;
1540 	int events;
1541 	struct ucred *cred;
1542 	struct proc *p;
1543 {
1544 
1545 	return (0);
1546 }
1547 
1548 static int
1549 badfo_stat(fp, sb, p)
1550 	struct file *fp;
1551 	struct stat *sb;
1552 	struct proc *p;
1553 {
1554 
1555 	return (EBADF);
1556 }
1557 
1558 static int
1559 badfo_close(fp, p)
1560 	struct file *fp;
1561 	struct proc *p;
1562 {
1563 
1564 	return (EBADF);
1565 }
1566 
1567 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1568 					fildesc_drvinit,NULL)
1569