xref: /freebsd/sys/kern/kern_descrip.c (revision 1b6c76a2fe091c74f08427e6c870851025a9cf67)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $FreeBSD$
40  */
41 
42 #include "opt_compat.h"
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/lock.h>
46 #include <sys/mutex.h>
47 #include <sys/sysproto.h>
48 #include <sys/conf.h>
49 #include <sys/filedesc.h>
50 #include <sys/kernel.h>
51 #include <sys/sysctl.h>
52 #include <sys/vnode.h>
53 #include <sys/proc.h>
54 #include <sys/file.h>
55 #include <sys/stat.h>
56 #include <sys/filio.h>
57 #include <sys/fcntl.h>
58 #include <sys/malloc.h>
59 #include <sys/unistd.h>
60 #include <sys/resourcevar.h>
61 #include <sys/event.h>
62 
63 #include <vm/vm.h>
64 #include <vm/vm_extern.h>
65 
66 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
67 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
68 static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
69 
70 static	 d_open_t  fdopen;
71 #define NUMFDESC 64
72 
73 #define CDEV_MAJOR 22
74 static struct cdevsw fildesc_cdevsw = {
75 	/* open */	fdopen,
76 	/* close */	noclose,
77 	/* read */	noread,
78 	/* write */	nowrite,
79 	/* ioctl */	noioctl,
80 	/* poll */	nopoll,
81 	/* mmap */	nommap,
82 	/* strategy */	nostrategy,
83 	/* name */	"FD",
84 	/* maj */	CDEV_MAJOR,
85 	/* dump */	nodump,
86 	/* psize */	nopsize,
87 	/* flags */	0,
88 };
89 
90 static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct proc *p));
91 static int badfo_readwrite __P((struct file *fp, struct uio *uio,
92     struct ucred *cred, int flags, struct proc *p));
93 static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
94     struct proc *p));
95 static int badfo_poll __P((struct file *fp, int events,
96     struct ucred *cred, struct proc *p));
97 static int badfo_kqfilter __P((struct file *fp, struct knote *kn));
98 static int badfo_stat __P((struct file *fp, struct stat *sb, struct proc *p));
99 static int badfo_close __P((struct file *fp, struct proc *p));
100 
101 /*
102  * Descriptor management.
103  */
104 struct filelist filehead;	/* head of list of open files */
105 int nfiles;			/* actual number of open files */
106 extern int cmask;
107 
108 /*
109  * System calls on descriptors.
110  */
111 #ifndef _SYS_SYSPROTO_H_
112 struct getdtablesize_args {
113 	int	dummy;
114 };
115 #endif
116 /* ARGSUSED */
117 int
118 getdtablesize(p, uap)
119 	struct proc *p;
120 	struct getdtablesize_args *uap;
121 {
122 
123 	p->p_retval[0] =
124 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
125 	return (0);
126 }
127 
128 /*
129  * Duplicate a file descriptor to a particular value.
130  *
131  * note: keep in mind that a potential race condition exists when closing
132  * descriptors from a shared descriptor table (via rfork).
133  */
134 #ifndef _SYS_SYSPROTO_H_
135 struct dup2_args {
136 	u_int	from;
137 	u_int	to;
138 };
139 #endif
140 /* ARGSUSED */
141 int
142 dup2(p, uap)
143 	struct proc *p;
144 	struct dup2_args *uap;
145 {
146 	register struct filedesc *fdp = p->p_fd;
147 	register u_int old = uap->from, new = uap->to;
148 	int i, error;
149 
150 retry:
151 	if (old >= fdp->fd_nfiles ||
152 	    fdp->fd_ofiles[old] == NULL ||
153 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
154 	    new >= maxfilesperproc) {
155 		return (EBADF);
156 	}
157 	if (old == new) {
158 		p->p_retval[0] = new;
159 		return (0);
160 	}
161 	if (new >= fdp->fd_nfiles) {
162 		if ((error = fdalloc(p, new, &i)))
163 			return (error);
164 		if (new != i)
165 			panic("dup2: fdalloc");
166 		/*
167 		 * fdalloc() may block, retest everything.
168 		 */
169 		goto retry;
170 	}
171 	return (do_dup(fdp, (int)old, (int)new, p->p_retval, p));
172 }
173 
174 /*
175  * Duplicate a file descriptor.
176  */
177 #ifndef _SYS_SYSPROTO_H_
178 struct dup_args {
179 	u_int	fd;
180 };
181 #endif
182 /* ARGSUSED */
183 int
184 dup(p, uap)
185 	struct proc *p;
186 	struct dup_args *uap;
187 {
188 	register struct filedesc *fdp;
189 	u_int old;
190 	int new, error;
191 
192 	old = uap->fd;
193 	fdp = p->p_fd;
194 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
195 		return (EBADF);
196 	if ((error = fdalloc(p, 0, &new)))
197 		return (error);
198 	return (do_dup(fdp, (int)old, new, p->p_retval, p));
199 }
200 
201 /*
202  * The file control system call.
203  */
204 #ifndef _SYS_SYSPROTO_H_
205 struct fcntl_args {
206 	int	fd;
207 	int	cmd;
208 	long	arg;
209 };
210 #endif
211 /* ARGSUSED */
212 int
213 fcntl(p, uap)
214 	struct proc *p;
215 	register struct fcntl_args *uap;
216 {
217 	register struct filedesc *fdp = p->p_fd;
218 	register struct file *fp;
219 	register char *pop;
220 	struct vnode *vp;
221 	int i, tmp, error, flg = F_POSIX;
222 	struct flock fl;
223 	u_int newmin;
224 
225 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
226 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
227 		return (EBADF);
228 	pop = &fdp->fd_ofileflags[uap->fd];
229 
230 	switch (uap->cmd) {
231 	case F_DUPFD:
232 		newmin = uap->arg;
233 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
234 		    newmin >= maxfilesperproc)
235 			return (EINVAL);
236 		if ((error = fdalloc(p, newmin, &i)))
237 			return (error);
238 		return (do_dup(fdp, uap->fd, i, p->p_retval, p));
239 
240 	case F_GETFD:
241 		p->p_retval[0] = *pop & 1;
242 		return (0);
243 
244 	case F_SETFD:
245 		*pop = (*pop &~ 1) | (uap->arg & 1);
246 		return (0);
247 
248 	case F_GETFL:
249 		p->p_retval[0] = OFLAGS(fp->f_flag);
250 		return (0);
251 
252 	case F_SETFL:
253 		fhold(fp);
254 		fp->f_flag &= ~FCNTLFLAGS;
255 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
256 		tmp = fp->f_flag & FNONBLOCK;
257 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
258 		if (error) {
259 			fdrop(fp, p);
260 			return (error);
261 		}
262 		tmp = fp->f_flag & FASYNC;
263 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
264 		if (!error) {
265 			fdrop(fp, p);
266 			return (0);
267 		}
268 		fp->f_flag &= ~FNONBLOCK;
269 		tmp = 0;
270 		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
271 		fdrop(fp, p);
272 		return (error);
273 
274 	case F_GETOWN:
275 		fhold(fp);
276 		error = fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p);
277 		fdrop(fp, p);
278 		return(error);
279 
280 	case F_SETOWN:
281 		fhold(fp);
282 		error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p);
283 		fdrop(fp, p);
284 		return(error);
285 
286 	case F_SETLKW:
287 		flg |= F_WAIT;
288 		/* Fall into F_SETLK */
289 
290 	case F_SETLK:
291 		if (fp->f_type != DTYPE_VNODE)
292 			return (EBADF);
293 		vp = (struct vnode *)fp->f_data;
294 
295 		/*
296 		 * copyin/lockop may block
297 		 */
298 		fhold(fp);
299 		/* Copy in the lock structure */
300 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
301 		    sizeof(fl));
302 		if (error) {
303 			fdrop(fp, p);
304 			return (error);
305 		}
306 		if (fl.l_whence == SEEK_CUR)
307 			fl.l_start += fp->f_offset;
308 
309 		switch (fl.l_type) {
310 		case F_RDLCK:
311 			if ((fp->f_flag & FREAD) == 0) {
312 				error = EBADF;
313 				break;
314 			}
315 			p->p_flag |= P_ADVLOCK;
316 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
317 			    &fl, flg);
318 			break;
319 		case F_WRLCK:
320 			if ((fp->f_flag & FWRITE) == 0) {
321 				error = EBADF;
322 				break;
323 			}
324 			p->p_flag |= P_ADVLOCK;
325 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
326 			    &fl, flg);
327 			break;
328 		case F_UNLCK:
329 			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
330 				&fl, F_POSIX);
331 			break;
332 		default:
333 			error = EINVAL;
334 			break;
335 		}
336 		fdrop(fp, p);
337 		return(error);
338 
339 	case F_GETLK:
340 		if (fp->f_type != DTYPE_VNODE)
341 			return (EBADF);
342 		vp = (struct vnode *)fp->f_data;
343 		/*
344 		 * copyin/lockop may block
345 		 */
346 		fhold(fp);
347 		/* Copy in the lock structure */
348 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
349 		    sizeof(fl));
350 		if (error) {
351 			fdrop(fp, p);
352 			return (error);
353 		}
354 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
355 		    fl.l_type != F_UNLCK) {
356 			fdrop(fp, p);
357 			return (EINVAL);
358 		}
359 		if (fl.l_whence == SEEK_CUR)
360 			fl.l_start += fp->f_offset;
361 		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
362 			    &fl, F_POSIX);
363 		fdrop(fp, p);
364 		if (error == 0) {
365 			error = copyout((caddr_t)&fl,
366 				    (caddr_t)(intptr_t)uap->arg, sizeof(fl));
367 		}
368 		return(error);
369 	default:
370 		return (EINVAL);
371 	}
372 	/* NOTREACHED */
373 }
374 
375 /*
376  * Common code for dup, dup2, and fcntl(F_DUPFD).
377  */
378 static int
379 do_dup(fdp, old, new, retval, p)
380 	register struct filedesc *fdp;
381 	register int old, new;
382 	register_t *retval;
383 	struct proc *p;
384 {
385 	struct file *fp;
386 	struct file *delfp;
387 
388 	/*
389 	 * Save info on the descriptor being overwritten.  We have
390 	 * to do the unmap now, but we cannot close it without
391 	 * introducing an ownership race for the slot.
392 	 */
393 	delfp = fdp->fd_ofiles[new];
394 #if 0
395 	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
396 		(void) munmapfd(p, new);
397 #endif
398 
399 	/*
400 	 * Duplicate the source descriptor, update lastfile
401 	 */
402 	fp = fdp->fd_ofiles[old];
403 	fdp->fd_ofiles[new] = fp;
404 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
405 	fhold(fp);
406 	if (new > fdp->fd_lastfile)
407 		fdp->fd_lastfile = new;
408 	*retval = new;
409 
410 	/*
411 	 * If we dup'd over a valid file, we now own the reference to it
412 	 * and must dispose of it using closef() semantics (as if a
413 	 * close() were performed on it).
414 	 */
415 	if (delfp)
416 		(void) closef(delfp, p);
417 	return (0);
418 }
419 
420 /*
421  * If sigio is on the list associated with a process or process group,
422  * disable signalling from the device, remove sigio from the list and
423  * free sigio.
424  */
425 void
426 funsetown(sigio)
427 	struct sigio *sigio;
428 {
429 	int s;
430 
431 	if (sigio == NULL)
432 		return;
433 	s = splhigh();
434 	*(sigio->sio_myref) = NULL;
435 	splx(s);
436 	if (sigio->sio_pgid < 0) {
437 		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
438 			     sigio, sio_pgsigio);
439 	} else /* if ((*sigiop)->sio_pgid > 0) */ {
440 		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
441 			     sigio, sio_pgsigio);
442 	}
443 	crfree(sigio->sio_ucred);
444 	FREE(sigio, M_SIGIO);
445 }
446 
447 /* Free a list of sigio structures. */
448 void
449 funsetownlst(sigiolst)
450 	struct sigiolst *sigiolst;
451 {
452 	struct sigio *sigio;
453 
454 	while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
455 		funsetown(sigio);
456 }
457 
458 /*
459  * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
460  *
461  * After permission checking, add a sigio structure to the sigio list for
462  * the process or process group.
463  */
464 int
465 fsetown(pgid, sigiop)
466 	pid_t pgid;
467 	struct sigio **sigiop;
468 {
469 	struct proc *proc;
470 	struct pgrp *pgrp;
471 	struct sigio *sigio;
472 	int s;
473 
474 	if (pgid == 0) {
475 		funsetown(*sigiop);
476 		return (0);
477 	}
478 	if (pgid > 0) {
479 		proc = pfind(pgid);
480 		if (proc == NULL)
481 			return (ESRCH);
482 
483 		/*
484 		 * Policy - Don't allow a process to FSETOWN a process
485 		 * in another session.
486 		 *
487 		 * Remove this test to allow maximum flexibility or
488 		 * restrict FSETOWN to the current process or process
489 		 * group for maximum safety.
490 		 */
491 		if (proc->p_session != curproc->p_session) {
492 			PROC_UNLOCK(proc);
493 			return (EPERM);
494 		}
495 		PROC_UNLOCK(proc);
496 
497 		pgrp = NULL;
498 	} else /* if (pgid < 0) */ {
499 		pgrp = pgfind(-pgid);
500 		if (pgrp == NULL)
501 			return (ESRCH);
502 
503 		/*
504 		 * Policy - Don't allow a process to FSETOWN a process
505 		 * in another session.
506 		 *
507 		 * Remove this test to allow maximum flexibility or
508 		 * restrict FSETOWN to the current process or process
509 		 * group for maximum safety.
510 		 */
511 		if (pgrp->pg_session != curproc->p_session)
512 			return (EPERM);
513 
514 		proc = NULL;
515 	}
516 	funsetown(*sigiop);
517 	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
518 	if (pgid > 0) {
519 		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
520 		sigio->sio_proc = proc;
521 	} else {
522 		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
523 		sigio->sio_pgrp = pgrp;
524 	}
525 	sigio->sio_pgid = pgid;
526 	crhold(curproc->p_ucred);
527 	sigio->sio_ucred = curproc->p_ucred;
528 	sigio->sio_myref = sigiop;
529 	s = splhigh();
530 	*sigiop = sigio;
531 	splx(s);
532 	return (0);
533 }
534 
535 /*
536  * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
537  */
538 pid_t
539 fgetown(sigio)
540 	struct sigio *sigio;
541 {
542 	return (sigio != NULL ? sigio->sio_pgid : 0);
543 }
544 
545 /*
546  * Close a file descriptor.
547  */
548 #ifndef _SYS_SYSPROTO_H_
549 struct close_args {
550         int     fd;
551 };
552 #endif
553 /* ARGSUSED */
554 int
555 close(p, uap)
556 	struct proc *p;
557 	struct close_args *uap;
558 {
559 	register struct filedesc *fdp = p->p_fd;
560 	register struct file *fp;
561 	register int fd = uap->fd;
562 
563 	if ((unsigned)fd >= fdp->fd_nfiles ||
564 	    (fp = fdp->fd_ofiles[fd]) == NULL)
565 		return (EBADF);
566 #if 0
567 	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
568 		(void) munmapfd(p, fd);
569 #endif
570 	fdp->fd_ofiles[fd] = NULL;
571 	fdp->fd_ofileflags[fd] = 0;
572 
573 	/*
574 	 * we now hold the fp reference that used to be owned by the descriptor
575 	 * array.
576 	 */
577 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
578 		fdp->fd_lastfile--;
579 	if (fd < fdp->fd_freefile)
580 		fdp->fd_freefile = fd;
581 	if (fd < fdp->fd_knlistsize)
582 		knote_fdclose(p, fd);
583 	return (closef(fp, p));
584 }
585 
586 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
587 /*
588  * Return status information about a file descriptor.
589  */
590 #ifndef _SYS_SYSPROTO_H_
591 struct ofstat_args {
592 	int	fd;
593 	struct	ostat *sb;
594 };
595 #endif
596 /* ARGSUSED */
597 int
598 ofstat(p, uap)
599 	struct proc *p;
600 	register struct ofstat_args *uap;
601 {
602 	register struct filedesc *fdp = p->p_fd;
603 	register struct file *fp;
604 	struct stat ub;
605 	struct ostat oub;
606 	int error;
607 
608 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
609 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
610 		return (EBADF);
611 	fhold(fp);
612 	error = fo_stat(fp, &ub, p);
613 	if (error == 0) {
614 		cvtstat(&ub, &oub);
615 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
616 	}
617 	fdrop(fp, p);
618 	return (error);
619 }
620 #endif /* COMPAT_43 || COMPAT_SUNOS */
621 
622 /*
623  * Return status information about a file descriptor.
624  */
625 #ifndef _SYS_SYSPROTO_H_
626 struct fstat_args {
627 	int	fd;
628 	struct	stat *sb;
629 };
630 #endif
631 /* ARGSUSED */
632 int
633 fstat(p, uap)
634 	struct proc *p;
635 	register struct fstat_args *uap;
636 {
637 	register struct filedesc *fdp = p->p_fd;
638 	register struct file *fp;
639 	struct stat ub;
640 	int error;
641 
642 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
643 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
644 		return (EBADF);
645 	fhold(fp);
646 	error = fo_stat(fp, &ub, p);
647 	if (error == 0)
648 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
649 	fdrop(fp, p);
650 	return (error);
651 }
652 
653 /*
654  * Return status information about a file descriptor.
655  */
656 #ifndef _SYS_SYSPROTO_H_
657 struct nfstat_args {
658 	int	fd;
659 	struct	nstat *sb;
660 };
661 #endif
662 /* ARGSUSED */
663 int
664 nfstat(p, uap)
665 	struct proc *p;
666 	register struct nfstat_args *uap;
667 {
668 	register struct filedesc *fdp = p->p_fd;
669 	register struct file *fp;
670 	struct stat ub;
671 	struct nstat nub;
672 	int error;
673 
674 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
675 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
676 		return (EBADF);
677 	fhold(fp);
678 	error = fo_stat(fp, &ub, p);
679 	if (error == 0) {
680 		cvtnstat(&ub, &nub);
681 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
682 	}
683 	fdrop(fp, p);
684 	return (error);
685 }
686 
687 /*
688  * Return pathconf information about a file descriptor.
689  */
690 #ifndef _SYS_SYSPROTO_H_
691 struct fpathconf_args {
692 	int	fd;
693 	int	name;
694 };
695 #endif
696 /* ARGSUSED */
697 int
698 fpathconf(p, uap)
699 	struct proc *p;
700 	register struct fpathconf_args *uap;
701 {
702 	struct filedesc *fdp = p->p_fd;
703 	struct file *fp;
704 	struct vnode *vp;
705 	int error = 0;
706 
707 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
708 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
709 		return (EBADF);
710 
711 	fhold(fp);
712 
713 	switch (fp->f_type) {
714 	case DTYPE_PIPE:
715 	case DTYPE_SOCKET:
716 		if (uap->name != _PC_PIPE_BUF)
717 			return (EINVAL);
718 		p->p_retval[0] = PIPE_BUF;
719 		error = 0;
720 		break;
721 	case DTYPE_FIFO:
722 	case DTYPE_VNODE:
723 		vp = (struct vnode *)fp->f_data;
724 		error = VOP_PATHCONF(vp, uap->name, p->p_retval);
725 		break;
726 	default:
727 		error = EOPNOTSUPP;
728 		break;
729 	}
730 	fdrop(fp, p);
731 	return(error);
732 }
733 
734 /*
735  * Allocate a file descriptor for the process.
736  */
737 static int fdexpand;
738 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
739 
740 int
741 fdalloc(p, want, result)
742 	struct proc *p;
743 	int want;
744 	int *result;
745 {
746 	register struct filedesc *fdp = p->p_fd;
747 	register int i;
748 	int lim, last, nfiles;
749 	struct file **newofile;
750 	char *newofileflags;
751 
752 	/*
753 	 * Search for a free descriptor starting at the higher
754 	 * of want or fd_freefile.  If that fails, consider
755 	 * expanding the ofile array.
756 	 */
757 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
758 	for (;;) {
759 		last = min(fdp->fd_nfiles, lim);
760 		if ((i = want) < fdp->fd_freefile)
761 			i = fdp->fd_freefile;
762 		for (; i < last; i++) {
763 			if (fdp->fd_ofiles[i] == NULL) {
764 				fdp->fd_ofileflags[i] = 0;
765 				if (i > fdp->fd_lastfile)
766 					fdp->fd_lastfile = i;
767 				if (want <= fdp->fd_freefile)
768 					fdp->fd_freefile = i;
769 				*result = i;
770 				return (0);
771 			}
772 		}
773 
774 		/*
775 		 * No space in current array.  Expand?
776 		 */
777 		if (fdp->fd_nfiles >= lim)
778 			return (EMFILE);
779 		if (fdp->fd_nfiles < NDEXTENT)
780 			nfiles = NDEXTENT;
781 		else
782 			nfiles = 2 * fdp->fd_nfiles;
783 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
784 		    M_FILEDESC, M_WAITOK);
785 
786 		/*
787 		 * deal with file-table extend race that might have occured
788 		 * when malloc was blocked.
789 		 */
790 		if (fdp->fd_nfiles >= nfiles) {
791 			FREE(newofile, M_FILEDESC);
792 			continue;
793 		}
794 		newofileflags = (char *) &newofile[nfiles];
795 		/*
796 		 * Copy the existing ofile and ofileflags arrays
797 		 * and zero the new portion of each array.
798 		 */
799 		bcopy(fdp->fd_ofiles, newofile,
800 			(i = sizeof(struct file *) * fdp->fd_nfiles));
801 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
802 		bcopy(fdp->fd_ofileflags, newofileflags,
803 			(i = sizeof(char) * fdp->fd_nfiles));
804 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
805 		if (fdp->fd_nfiles > NDFILE)
806 			FREE(fdp->fd_ofiles, M_FILEDESC);
807 		fdp->fd_ofiles = newofile;
808 		fdp->fd_ofileflags = newofileflags;
809 		fdp->fd_nfiles = nfiles;
810 		fdexpand++;
811 	}
812 	return (0);
813 }
814 
815 /*
816  * Check to see whether n user file descriptors
817  * are available to the process p.
818  */
819 int
820 fdavail(p, n)
821 	struct proc *p;
822 	register int n;
823 {
824 	register struct filedesc *fdp = p->p_fd;
825 	register struct file **fpp;
826 	register int i, lim, last;
827 
828 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
829 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
830 		return (1);
831 
832 	last = min(fdp->fd_nfiles, lim);
833 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
834 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
835 		if (*fpp == NULL && --n <= 0)
836 			return (1);
837 	}
838 	return (0);
839 }
840 
841 /*
842  * Create a new open file structure and allocate
843  * a file decriptor for the process that refers to it.
844  */
845 int
846 falloc(p, resultfp, resultfd)
847 	register struct proc *p;
848 	struct file **resultfp;
849 	int *resultfd;
850 {
851 	register struct file *fp, *fq;
852 	int error, i;
853 
854 	if (nfiles >= maxfiles) {
855 		tablefull("file");
856 		return (ENFILE);
857 	}
858 	/*
859 	 * Allocate a new file descriptor.
860 	 * If the process has file descriptor zero open, add to the list
861 	 * of open files at that point, otherwise put it at the front of
862 	 * the list of open files.
863 	 */
864 	nfiles++;
865 	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK | M_ZERO);
866 
867 	/*
868 	 * wait until after malloc (which may have blocked) returns before
869 	 * allocating the slot, else a race might have shrunk it if we had
870 	 * allocated it before the malloc.
871 	 */
872 	if ((error = fdalloc(p, 0, &i))) {
873 		nfiles--;
874 		FREE(fp, M_FILE);
875 		return (error);
876 	}
877 	fp->f_count = 1;
878 	fp->f_cred = p->p_ucred;
879 	fp->f_ops = &badfileops;
880 	fp->f_seqcount = 1;
881 	crhold(fp->f_cred);
882 	if ((fq = p->p_fd->fd_ofiles[0])) {
883 		LIST_INSERT_AFTER(fq, fp, f_list);
884 	} else {
885 		LIST_INSERT_HEAD(&filehead, fp, f_list);
886 	}
887 	p->p_fd->fd_ofiles[i] = fp;
888 	if (resultfp)
889 		*resultfp = fp;
890 	if (resultfd)
891 		*resultfd = i;
892 	return (0);
893 }
894 
895 /*
896  * Free a file descriptor.
897  */
898 void
899 ffree(fp)
900 	register struct file *fp;
901 {
902 	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
903 	LIST_REMOVE(fp, f_list);
904 	crfree(fp->f_cred);
905 	nfiles--;
906 	FREE(fp, M_FILE);
907 }
908 
909 /*
910  * Build a new filedesc structure.
911  */
912 struct filedesc *
913 fdinit(p)
914 	struct proc *p;
915 {
916 	register struct filedesc0 *newfdp;
917 	register struct filedesc *fdp = p->p_fd;
918 
919 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
920 	    M_FILEDESC, M_WAITOK | M_ZERO);
921 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
922 	if (newfdp->fd_fd.fd_cdir)
923 		VREF(newfdp->fd_fd.fd_cdir);
924 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
925 	if (newfdp->fd_fd.fd_rdir)
926 		VREF(newfdp->fd_fd.fd_rdir);
927 	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
928 	if (newfdp->fd_fd.fd_jdir)
929 		VREF(newfdp->fd_fd.fd_jdir);
930 
931 	/* Create the file descriptor table. */
932 	newfdp->fd_fd.fd_refcnt = 1;
933 	newfdp->fd_fd.fd_cmask = cmask;
934 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
935 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
936 	newfdp->fd_fd.fd_nfiles = NDFILE;
937 	newfdp->fd_fd.fd_knlistsize = -1;
938 
939 	return (&newfdp->fd_fd);
940 }
941 
942 /*
943  * Share a filedesc structure.
944  */
945 struct filedesc *
946 fdshare(p)
947 	struct proc *p;
948 {
949 	p->p_fd->fd_refcnt++;
950 	return (p->p_fd);
951 }
952 
953 /*
954  * Copy a filedesc structure.
955  */
956 struct filedesc *
957 fdcopy(p)
958 	struct proc *p;
959 {
960 	register struct filedesc *newfdp, *fdp = p->p_fd;
961 	register struct file **fpp;
962 	register int i;
963 
964 	/* Certain daemons might not have file descriptors. */
965 	if (fdp == NULL)
966 		return (NULL);
967 
968 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
969 	    M_FILEDESC, M_WAITOK);
970 	bcopy(fdp, newfdp, sizeof(struct filedesc));
971 	if (newfdp->fd_cdir)
972 		VREF(newfdp->fd_cdir);
973 	if (newfdp->fd_rdir)
974 		VREF(newfdp->fd_rdir);
975 	if (newfdp->fd_jdir)
976 		VREF(newfdp->fd_jdir);
977 	newfdp->fd_refcnt = 1;
978 
979 	/*
980 	 * If the number of open files fits in the internal arrays
981 	 * of the open file structure, use them, otherwise allocate
982 	 * additional memory for the number of descriptors currently
983 	 * in use.
984 	 */
985 	if (newfdp->fd_lastfile < NDFILE) {
986 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
987 		newfdp->fd_ofileflags =
988 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
989 		i = NDFILE;
990 	} else {
991 		/*
992 		 * Compute the smallest multiple of NDEXTENT needed
993 		 * for the file descriptors currently in use,
994 		 * allowing the table to shrink.
995 		 */
996 		i = newfdp->fd_nfiles;
997 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
998 			i /= 2;
999 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1000 		    M_FILEDESC, M_WAITOK);
1001 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1002 	}
1003 	newfdp->fd_nfiles = i;
1004 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1005 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1006 
1007 	/*
1008 	 * kq descriptors cannot be copied.
1009 	 */
1010 	if (newfdp->fd_knlistsize != -1) {
1011 		fpp = newfdp->fd_ofiles;
1012 		for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1013 			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE)
1014 				*fpp = NULL;
1015 		}
1016 		newfdp->fd_knlist = NULL;
1017 		newfdp->fd_knlistsize = -1;
1018 		newfdp->fd_knhash = NULL;
1019 		newfdp->fd_knhashmask = 0;
1020 	}
1021 
1022 	fpp = newfdp->fd_ofiles;
1023 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1024 		if (*fpp != NULL)
1025 			fhold(*fpp);
1026 	}
1027 	return (newfdp);
1028 }
1029 
1030 /*
1031  * Release a filedesc structure.
1032  */
1033 void
1034 fdfree(p)
1035 	struct proc *p;
1036 {
1037 	register struct filedesc *fdp = p->p_fd;
1038 	struct file **fpp;
1039 	register int i;
1040 
1041 	/* Certain daemons might not have file descriptors. */
1042 	if (fdp == NULL)
1043 		return;
1044 
1045 	if (--fdp->fd_refcnt > 0)
1046 		return;
1047 	/*
1048 	 * we are the last reference to the structure, we can
1049 	 * safely assume it will not change out from under us.
1050 	 */
1051 	fpp = fdp->fd_ofiles;
1052 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1053 		if (*fpp)
1054 			(void) closef(*fpp, p);
1055 	}
1056 	if (fdp->fd_nfiles > NDFILE)
1057 		FREE(fdp->fd_ofiles, M_FILEDESC);
1058 	if (fdp->fd_cdir)
1059 		vrele(fdp->fd_cdir);
1060 	if (fdp->fd_rdir)
1061 		vrele(fdp->fd_rdir);
1062 	if (fdp->fd_jdir)
1063 		vrele(fdp->fd_jdir);
1064 	if (fdp->fd_knlist)
1065 		FREE(fdp->fd_knlist, M_TEMP);
1066 	if (fdp->fd_knhash)
1067 		FREE(fdp->fd_knhash, M_TEMP);
1068 	FREE(fdp, M_FILEDESC);
1069 }
1070 
1071 /*
1072  * For setugid programs, we don't want to people to use that setugidness
1073  * to generate error messages which write to a file which otherwise would
1074  * otherwise be off-limits to the process.
1075  *
1076  * This is a gross hack to plug the hole.  A better solution would involve
1077  * a special vop or other form of generalized access control mechanism.  We
1078  * go ahead and just reject all procfs file systems accesses as dangerous.
1079  *
1080  * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1081  * sufficient.  We also don't for check setugidness since we know we are.
1082  */
1083 static int
1084 is_unsafe(struct file *fp)
1085 {
1086 	if (fp->f_type == DTYPE_VNODE &&
1087 	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1088 		return (1);
1089 	return (0);
1090 }
1091 
1092 /*
1093  * Make this setguid thing safe, if at all possible.
1094  */
1095 void
1096 setugidsafety(p)
1097 	struct proc *p;
1098 {
1099 	struct filedesc *fdp = p->p_fd;
1100 	register int i;
1101 
1102 	/* Certain daemons might not have file descriptors. */
1103 	if (fdp == NULL)
1104 		return;
1105 
1106 	/*
1107 	 * note: fdp->fd_ofiles may be reallocated out from under us while
1108 	 * we are blocked in a close.  Be careful!
1109 	 */
1110 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1111 		if (i > 2)
1112 			break;
1113 		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1114 			struct file *fp;
1115 
1116 #if 0
1117 			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1118 				(void) munmapfd(p, i);
1119 #endif
1120 			if (i < fdp->fd_knlistsize)
1121 				knote_fdclose(p, i);
1122 			/*
1123 			 * NULL-out descriptor prior to close to avoid
1124 			 * a race while close blocks.
1125 			 */
1126 			fp = fdp->fd_ofiles[i];
1127 			fdp->fd_ofiles[i] = NULL;
1128 			fdp->fd_ofileflags[i] = 0;
1129 			if (i < fdp->fd_freefile)
1130 				fdp->fd_freefile = i;
1131 			(void) closef(fp, p);
1132 		}
1133 	}
1134 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1135 		fdp->fd_lastfile--;
1136 }
1137 
1138 /*
1139  * Close any files on exec?
1140  */
1141 void
1142 fdcloseexec(p)
1143 	struct proc *p;
1144 {
1145 	struct filedesc *fdp = p->p_fd;
1146 	register int i;
1147 
1148 	/* Certain daemons might not have file descriptors. */
1149 	if (fdp == NULL)
1150 		return;
1151 
1152 	/*
1153 	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1154 	 * may block and rip them out from under us.
1155 	 */
1156 	for (i = 0; i <= fdp->fd_lastfile; i++) {
1157 		if (fdp->fd_ofiles[i] != NULL &&
1158 		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1159 			struct file *fp;
1160 
1161 #if 0
1162 			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1163 				(void) munmapfd(p, i);
1164 #endif
1165 			if (i < fdp->fd_knlistsize)
1166 				knote_fdclose(p, i);
1167 			/*
1168 			 * NULL-out descriptor prior to close to avoid
1169 			 * a race while close blocks.
1170 			 */
1171 			fp = fdp->fd_ofiles[i];
1172 			fdp->fd_ofiles[i] = NULL;
1173 			fdp->fd_ofileflags[i] = 0;
1174 			if (i < fdp->fd_freefile)
1175 				fdp->fd_freefile = i;
1176 			(void) closef(fp, p);
1177 		}
1178 	}
1179 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1180 		fdp->fd_lastfile--;
1181 }
1182 
1183 /*
1184  * Internal form of close.
1185  * Decrement reference count on file structure.
1186  * Note: p may be NULL when closing a file
1187  * that was being passed in a message.
1188  */
1189 int
1190 closef(fp, p)
1191 	register struct file *fp;
1192 	register struct proc *p;
1193 {
1194 	struct vnode *vp;
1195 	struct flock lf;
1196 
1197 	if (fp == NULL)
1198 		return (0);
1199 	/*
1200 	 * POSIX record locking dictates that any close releases ALL
1201 	 * locks owned by this process.  This is handled by setting
1202 	 * a flag in the unlock to free ONLY locks obeying POSIX
1203 	 * semantics, and not to free BSD-style file locks.
1204 	 * If the descriptor was in a message, POSIX-style locks
1205 	 * aren't passed with the descriptor.
1206 	 */
1207 	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1208 		lf.l_whence = SEEK_SET;
1209 		lf.l_start = 0;
1210 		lf.l_len = 0;
1211 		lf.l_type = F_UNLCK;
1212 		vp = (struct vnode *)fp->f_data;
1213 		(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &lf, F_POSIX);
1214 	}
1215 	return (fdrop(fp, p));
1216 }
1217 
1218 int
1219 fdrop(fp, p)
1220 	struct file *fp;
1221 	struct proc *p;
1222 {
1223 	struct flock lf;
1224 	struct vnode *vp;
1225 	int error;
1226 
1227 	if (--fp->f_count > 0)
1228 		return (0);
1229 	if (fp->f_count < 0)
1230 		panic("fdrop: count < 0");
1231 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1232 		lf.l_whence = SEEK_SET;
1233 		lf.l_start = 0;
1234 		lf.l_len = 0;
1235 		lf.l_type = F_UNLCK;
1236 		vp = (struct vnode *)fp->f_data;
1237 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1238 	}
1239 	if (fp->f_ops != &badfileops)
1240 		error = fo_close(fp, p);
1241 	else
1242 		error = 0;
1243 	ffree(fp);
1244 	return (error);
1245 }
1246 
1247 /*
1248  * Apply an advisory lock on a file descriptor.
1249  *
1250  * Just attempt to get a record lock of the requested type on
1251  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1252  */
1253 #ifndef _SYS_SYSPROTO_H_
1254 struct flock_args {
1255 	int	fd;
1256 	int	how;
1257 };
1258 #endif
1259 /* ARGSUSED */
1260 int
1261 flock(p, uap)
1262 	struct proc *p;
1263 	register struct flock_args *uap;
1264 {
1265 	register struct filedesc *fdp = p->p_fd;
1266 	register struct file *fp;
1267 	struct vnode *vp;
1268 	struct flock lf;
1269 
1270 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1271 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
1272 		return (EBADF);
1273 	if (fp->f_type != DTYPE_VNODE)
1274 		return (EOPNOTSUPP);
1275 	vp = (struct vnode *)fp->f_data;
1276 	lf.l_whence = SEEK_SET;
1277 	lf.l_start = 0;
1278 	lf.l_len = 0;
1279 	if (uap->how & LOCK_UN) {
1280 		lf.l_type = F_UNLCK;
1281 		fp->f_flag &= ~FHASLOCK;
1282 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1283 	}
1284 	if (uap->how & LOCK_EX)
1285 		lf.l_type = F_WRLCK;
1286 	else if (uap->how & LOCK_SH)
1287 		lf.l_type = F_RDLCK;
1288 	else
1289 		return (EBADF);
1290 	fp->f_flag |= FHASLOCK;
1291 	if (uap->how & LOCK_NB)
1292 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1293 	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1294 }
1295 
1296 /*
1297  * File Descriptor pseudo-device driver (/dev/fd/).
1298  *
1299  * Opening minor device N dup()s the file (if any) connected to file
1300  * descriptor N belonging to the calling process.  Note that this driver
1301  * consists of only the ``open()'' routine, because all subsequent
1302  * references to this file will be direct to the other driver.
1303  */
1304 /* ARGSUSED */
1305 static int
1306 fdopen(dev, mode, type, p)
1307 	dev_t dev;
1308 	int mode, type;
1309 	struct proc *p;
1310 {
1311 
1312 	/*
1313 	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1314 	 * the file descriptor being sought for duplication. The error
1315 	 * return ensures that the vnode for this device will be released
1316 	 * by vn_open. Open will detect this special error and take the
1317 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1318 	 * will simply report the error.
1319 	 */
1320 	p->p_dupfd = dev2unit(dev);
1321 	return (ENODEV);
1322 }
1323 
1324 /*
1325  * Duplicate the specified descriptor to a free descriptor.
1326  */
1327 int
1328 dupfdopen(p, fdp, indx, dfd, mode, error)
1329 	struct proc *p;
1330 	struct filedesc *fdp;
1331 	int indx, dfd;
1332 	int mode;
1333 	int error;
1334 {
1335 	register struct file *wfp;
1336 	struct file *fp;
1337 
1338 	/*
1339 	 * If the to-be-dup'd fd number is greater than the allowed number
1340 	 * of file descriptors, or the fd to be dup'd has already been
1341 	 * closed, then reject.
1342 	 */
1343 	if ((u_int)dfd >= fdp->fd_nfiles ||
1344 	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1345 		return (EBADF);
1346 	}
1347 
1348 	/*
1349 	 * There are two cases of interest here.
1350 	 *
1351 	 * For ENODEV simply dup (dfd) to file descriptor
1352 	 * (indx) and return.
1353 	 *
1354 	 * For ENXIO steal away the file structure from (dfd) and
1355 	 * store it in (indx).  (dfd) is effectively closed by
1356 	 * this operation.
1357 	 *
1358 	 * Any other error code is just returned.
1359 	 */
1360 	switch (error) {
1361 	case ENODEV:
1362 		/*
1363 		 * Check that the mode the file is being opened for is a
1364 		 * subset of the mode of the existing descriptor.
1365 		 */
1366 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1367 			return (EACCES);
1368 		fp = fdp->fd_ofiles[indx];
1369 #if 0
1370 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1371 			(void) munmapfd(p, indx);
1372 #endif
1373 		fdp->fd_ofiles[indx] = wfp;
1374 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1375 		fhold(wfp);
1376 		if (indx > fdp->fd_lastfile)
1377 			fdp->fd_lastfile = indx;
1378 		/*
1379 		 * we now own the reference to fp that the ofiles[] array
1380 		 * used to own.  Release it.
1381 		 */
1382 		if (fp)
1383 			fdrop(fp, p);
1384 		return (0);
1385 
1386 	case ENXIO:
1387 		/*
1388 		 * Steal away the file pointer from dfd, and stuff it into indx.
1389 		 */
1390 		fp = fdp->fd_ofiles[indx];
1391 #if 0
1392 		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1393 			(void) munmapfd(p, indx);
1394 #endif
1395 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1396 		fdp->fd_ofiles[dfd] = NULL;
1397 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1398 		fdp->fd_ofileflags[dfd] = 0;
1399 
1400 		/*
1401 		 * we now own the reference to fp that the ofiles[] array
1402 		 * used to own.  Release it.
1403 		 */
1404 		if (fp)
1405 			fdrop(fp, p);
1406 		/*
1407 		 * Complete the clean up of the filedesc structure by
1408 		 * recomputing the various hints.
1409 		 */
1410 		if (indx > fdp->fd_lastfile) {
1411 			fdp->fd_lastfile = indx;
1412 		} else {
1413 			while (fdp->fd_lastfile > 0 &&
1414 			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1415 				fdp->fd_lastfile--;
1416 			}
1417 			if (dfd < fdp->fd_freefile)
1418 				fdp->fd_freefile = dfd;
1419 		}
1420 		return (0);
1421 
1422 	default:
1423 		return (error);
1424 	}
1425 	/* NOTREACHED */
1426 }
1427 
1428 /*
1429  * Get file structures.
1430  */
1431 static int
1432 sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1433 {
1434 	int error;
1435 	struct file *fp;
1436 
1437 	if (!req->oldptr) {
1438 		/*
1439 		 * overestimate by 10 files
1440 		 */
1441 		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1442 				(nfiles + 10) * sizeof(struct file)));
1443 	}
1444 
1445 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1446 	if (error)
1447 		return (error);
1448 
1449 	/*
1450 	 * followed by an array of file structures
1451 	 */
1452 	LIST_FOREACH(fp, &filehead, f_list) {
1453 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1454 		if (error)
1455 			return (error);
1456 	}
1457 	return (0);
1458 }
1459 
1460 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1461     0, 0, sysctl_kern_file, "S,file", "Entire file table");
1462 
1463 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1464     &maxfilesperproc, 0, "Maximum files allowed open per process");
1465 
1466 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1467     &maxfiles, 0, "Maximum number of files");
1468 
1469 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
1470     &nfiles, 0, "System-wide number of open files");
1471 
1472 static void
1473 fildesc_clone(void *arg, char *name, int namelen, dev_t *dev)
1474 {
1475 	int u;
1476 
1477 	if (*dev != NODEV)
1478 		return;
1479 	if (dev_stdclone(name, NULL, "fd/", &u) != 1)
1480 		return;
1481 	if (u <= 2)
1482 		return;
1483 	/* Don't clone higher than it makes sense */
1484 	if (u >= maxfilesperproc)
1485 		return;
1486 	/* And don't clone higher than our minors will support */
1487 	if (u > 0xffffff)
1488 		return;
1489 	u = unit2minor(u);
1490 	*dev = make_dev(&fildesc_cdevsw, u, UID_BIN, GID_BIN, 0666, name);
1491 	return;
1492 }
1493 
1494 static void
1495 fildesc_drvinit(void *unused)
1496 {
1497 	dev_t dev;
1498 
1499 	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
1500 	make_dev_alias(dev, "stdin");
1501 	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
1502 	make_dev_alias(dev, "stdout");
1503 	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
1504 	make_dev_alias(dev, "stderr");
1505 	EVENTHANDLER_REGISTER(dev_clone, fildesc_clone, 0, 1000);
1506 	if (!devfs_present) {
1507 		int fd;
1508 
1509 		for (fd = 3; fd < NUMFDESC; fd++)
1510 			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
1511 			    "fd/%d", fd);
1512 	}
1513 }
1514 
1515 struct fileops badfileops = {
1516 	badfo_readwrite,
1517 	badfo_readwrite,
1518 	badfo_ioctl,
1519 	badfo_poll,
1520 	badfo_kqfilter,
1521 	badfo_stat,
1522 	badfo_close
1523 };
1524 
1525 static int
1526 badfo_readwrite(fp, uio, cred, flags, p)
1527 	struct file *fp;
1528 	struct uio *uio;
1529 	struct ucred *cred;
1530 	struct proc *p;
1531 	int flags;
1532 {
1533 
1534 	return (EBADF);
1535 }
1536 
1537 static int
1538 badfo_ioctl(fp, com, data, p)
1539 	struct file *fp;
1540 	u_long com;
1541 	caddr_t data;
1542 	struct proc *p;
1543 {
1544 
1545 	return (EBADF);
1546 }
1547 
1548 static int
1549 badfo_poll(fp, events, cred, p)
1550 	struct file *fp;
1551 	int events;
1552 	struct ucred *cred;
1553 	struct proc *p;
1554 {
1555 
1556 	return (0);
1557 }
1558 
1559 static int
1560 badfo_kqfilter(fp, kn)
1561 	struct file *fp;
1562 	struct knote *kn;
1563 {
1564 
1565 	return (0);
1566 }
1567 
1568 static int
1569 badfo_stat(fp, sb, p)
1570 	struct file *fp;
1571 	struct stat *sb;
1572 	struct proc *p;
1573 {
1574 
1575 	return (EBADF);
1576 }
1577 
1578 static int
1579 badfo_close(fp, p)
1580 	struct file *fp;
1581 	struct proc *p;
1582 {
1583 
1584 	return (EBADF);
1585 }
1586 
1587 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1588 					fildesc_drvinit,NULL)
1589