xref: /freebsd/sys/kern/kern_descrip.c (revision a8445737e740901f5f2c8d24c12ef7fc8b00134e)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $Id: kern_descrip.c,v 1.54 1998/07/15 06:10:16 bde Exp $
40  */
41 
42 #include "opt_compat.h"
43 #include "opt_devfs.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysproto.h>
48 #include <sys/conf.h>
49 #include <sys/filedesc.h>
50 #include <sys/kernel.h>
51 #include <sys/sysctl.h>
52 #include <sys/vnode.h>
53 #include <sys/proc.h>
54 #include <sys/file.h>
55 #include <sys/socketvar.h>
56 #include <sys/stat.h>
57 #include <sys/filio.h>
58 #include <sys/ttycom.h>
59 #include <sys/fcntl.h>
60 #include <sys/malloc.h>
61 #include <sys/unistd.h>
62 #include <sys/resourcevar.h>
63 #include <sys/pipe.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_extern.h>
67 
68 #ifdef DEVFS
69 #include <sys/devfsext.h>
70 #endif /*DEVFS*/
71 
72 static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
73 MALLOC_DEFINE(M_FILE, "file", "Open file structure");
74 
75 
76 static	 d_open_t  fdopen;
77 #define NUMFDESC 64
78 
79 #define CDEV_MAJOR 22
80 static struct cdevsw fildesc_cdevsw =
81 	{ fdopen,	noclose,	noread,		nowrite,
82 	  noioc,	nostop,		nullreset,	nodevtotty,
83 	  seltrue,	nommap,		nostrat };
84 
85 static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval));
86 /*
87  * Descriptor management.
88  */
89 struct filelist filehead;	/* head of list of open files */
90 int nfiles;			/* actual number of open files */
91 extern int cmask;
92 
93 /*
94  * System calls on descriptors.
95  */
96 #ifndef _SYS_SYSPROTO_H_
97 struct getdtablesize_args {
98 	int	dummy;
99 };
100 #endif
101 /* ARGSUSED */
102 int
103 getdtablesize(p, uap)
104 	struct proc *p;
105 	struct getdtablesize_args *uap;
106 {
107 
108 	p->p_retval[0] =
109 	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
110 	return (0);
111 }
112 
113 /*
114  * Duplicate a file descriptor to a particular value.
115  */
116 #ifndef _SYS_SYSPROTO_H_
117 struct dup2_args {
118 	u_int	from;
119 	u_int	to;
120 };
121 #endif
122 /* ARGSUSED */
123 int
124 dup2(p, uap)
125 	struct proc *p;
126 	struct dup2_args *uap;
127 {
128 	register struct filedesc *fdp = p->p_fd;
129 	register u_int old = uap->from, new = uap->to;
130 	int i, error;
131 
132 	if (old >= fdp->fd_nfiles ||
133 	    fdp->fd_ofiles[old] == NULL ||
134 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
135 	    new >= maxfilesperproc)
136 		return (EBADF);
137 	if (old == new) {
138 		p->p_retval[0] = new;
139 		return (0);
140 	}
141 	if (new >= fdp->fd_nfiles) {
142 		if ((error = fdalloc(p, new, &i)))
143 			return (error);
144 		if (new != i)
145 			panic("dup2: fdalloc");
146 	} else if (fdp->fd_ofiles[new]) {
147 		if (fdp->fd_ofileflags[new] & UF_MAPPED)
148 			(void) munmapfd(p, new);
149 		/*
150 		 * dup2() must succeed even if the close has an error.
151 		 */
152 		(void) closef(fdp->fd_ofiles[new], p);
153 	}
154 	return (finishdup(fdp, (int)old, (int)new, p->p_retval));
155 }
156 
157 /*
158  * Duplicate a file descriptor.
159  */
160 #ifndef _SYS_SYSPROTO_H_
161 struct dup_args {
162 	u_int	fd;
163 };
164 #endif
165 /* ARGSUSED */
166 int
167 dup(p, uap)
168 	struct proc *p;
169 	struct dup_args *uap;
170 {
171 	register struct filedesc *fdp;
172 	u_int old;
173 	int new, error;
174 
175 	old = uap->fd;
176 
177 #if 0
178 	/*
179 	 * XXX Compatibility
180 	 */
181 	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, p->p_retval)); }
182 #endif
183 
184 	fdp = p->p_fd;
185 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
186 		return (EBADF);
187 	if ((error = fdalloc(p, 0, &new)))
188 		return (error);
189 	return (finishdup(fdp, (int)old, new, p->p_retval));
190 }
191 
192 /*
193  * The file control system call.
194  */
195 #ifndef _SYS_SYSPROTO_H_
196 struct fcntl_args {
197 	int	fd;
198 	int	cmd;
199 	long	arg;
200 };
201 #endif
202 /* ARGSUSED */
203 int
204 fcntl(p, uap)
205 	struct proc *p;
206 	register struct fcntl_args *uap;
207 {
208 	register struct filedesc *fdp = p->p_fd;
209 	register struct file *fp;
210 	register char *pop;
211 	struct vnode *vp;
212 	int i, tmp, error, flg = F_POSIX;
213 	struct flock fl;
214 	u_int newmin;
215 
216 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
217 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
218 		return (EBADF);
219 	pop = &fdp->fd_ofileflags[uap->fd];
220 	switch (uap->cmd) {
221 
222 	case F_DUPFD:
223 		newmin = uap->arg;
224 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
225 		    newmin >= maxfilesperproc)
226 			return (EINVAL);
227 		if ((error = fdalloc(p, newmin, &i)))
228 			return (error);
229 		return (finishdup(fdp, uap->fd, i, p->p_retval));
230 
231 	case F_GETFD:
232 		p->p_retval[0] = *pop & 1;
233 		return (0);
234 
235 	case F_SETFD:
236 		*pop = (*pop &~ 1) | (uap->arg & 1);
237 		return (0);
238 
239 	case F_GETFL:
240 		p->p_retval[0] = OFLAGS(fp->f_flag);
241 		return (0);
242 
243 	case F_SETFL:
244 		fp->f_flag &= ~FCNTLFLAGS;
245 		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
246 		tmp = fp->f_flag & FNONBLOCK;
247 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
248 		if (error)
249 			return (error);
250 		tmp = fp->f_flag & FASYNC;
251 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
252 		if (!error)
253 			return (0);
254 		fp->f_flag &= ~FNONBLOCK;
255 		tmp = 0;
256 		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
257 		return (error);
258 
259 	case F_GETOWN:
260 		if (fp->f_type == DTYPE_SOCKET) {
261 			p->p_retval[0] = ((struct socket *)fp->f_data)->so_pgid;
262 			return (0);
263 		}
264 		error = (*fp->f_ops->fo_ioctl)
265 			(fp, TIOCGPGRP, (caddr_t)p->p_retval, p);
266 		p->p_retval[0] = - p->p_retval[0];
267 		return (error);
268 
269 	case F_SETOWN:
270 		if (fp->f_type == DTYPE_SOCKET) {
271 			((struct socket *)fp->f_data)->so_pgid = uap->arg;
272 			return (0);
273 		}
274 		if (uap->arg <= 0) {
275 			uap->arg = -uap->arg;
276 		} else {
277 			struct proc *p1 = pfind(uap->arg);
278 			if (p1 == 0)
279 				return (ESRCH);
280 			uap->arg = p1->p_pgrp->pg_id;
281 		}
282 		return ((*fp->f_ops->fo_ioctl)
283 			(fp, TIOCSPGRP, (caddr_t)&uap->arg, p));
284 
285 	case F_SETLKW:
286 		flg |= F_WAIT;
287 		/* Fall into F_SETLK */
288 
289 	case F_SETLK:
290 		if (fp->f_type != DTYPE_VNODE)
291 			return (EBADF);
292 		vp = (struct vnode *)fp->f_data;
293 		/* Copy in the lock structure */
294 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
295 		    sizeof(fl));
296 		if (error)
297 			return (error);
298 		if (fl.l_whence == SEEK_CUR)
299 			fl.l_start += fp->f_offset;
300 		switch (fl.l_type) {
301 
302 		case F_RDLCK:
303 			if ((fp->f_flag & FREAD) == 0)
304 				return (EBADF);
305 			p->p_flag |= P_ADVLOCK;
306 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
307 
308 		case F_WRLCK:
309 			if ((fp->f_flag & FWRITE) == 0)
310 				return (EBADF);
311 			p->p_flag |= P_ADVLOCK;
312 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
313 
314 		case F_UNLCK:
315 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
316 				F_POSIX));
317 
318 		default:
319 			return (EINVAL);
320 		}
321 
322 	case F_GETLK:
323 		if (fp->f_type != DTYPE_VNODE)
324 			return (EBADF);
325 		vp = (struct vnode *)fp->f_data;
326 		/* Copy in the lock structure */
327 		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
328 		    sizeof(fl));
329 		if (error)
330 			return (error);
331 		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
332 		    fl.l_type != F_UNLCK)
333 			return (EINVAL);
334 		if (fl.l_whence == SEEK_CUR)
335 			fl.l_start += fp->f_offset;
336 		if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX)))
337 			return (error);
338 		return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg,
339 		    sizeof(fl)));
340 
341 	default:
342 		return (EINVAL);
343 	}
344 	/* NOTREACHED */
345 }
346 
347 /*
348  * Common code for dup, dup2, and fcntl(F_DUPFD).
349  */
350 static int
351 finishdup(fdp, old, new, retval)
352 	register struct filedesc *fdp;
353 	register int old, new;
354 	register_t *retval;
355 {
356 	register struct file *fp;
357 
358 	fp = fdp->fd_ofiles[old];
359 	fdp->fd_ofiles[new] = fp;
360 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
361 	fp->f_count++;
362 	if (new > fdp->fd_lastfile)
363 		fdp->fd_lastfile = new;
364 	*retval = new;
365 	return (0);
366 }
367 
368 /*
369  * Close a file descriptor.
370  */
371 #ifndef _SYS_SYSPROTO_H_
372 struct close_args {
373         int     fd;
374 };
375 #endif
376 /* ARGSUSED */
377 int
378 close(p, uap)
379 	struct proc *p;
380 	struct close_args *uap;
381 {
382 	register struct filedesc *fdp = p->p_fd;
383 	register struct file *fp;
384 	register int fd = uap->fd;
385 	register u_char *pf;
386 
387 	if ((unsigned)fd >= fdp->fd_nfiles ||
388 	    (fp = fdp->fd_ofiles[fd]) == NULL)
389 		return (EBADF);
390 	pf = (u_char *)&fdp->fd_ofileflags[fd];
391 	if (*pf & UF_MAPPED)
392 		(void) munmapfd(p, fd);
393 	fdp->fd_ofiles[fd] = NULL;
394 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
395 		fdp->fd_lastfile--;
396 	if (fd < fdp->fd_freefile)
397 		fdp->fd_freefile = fd;
398 	*pf = 0;
399 	return (closef(fp, p));
400 }
401 
402 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
403 /*
404  * Return status information about a file descriptor.
405  */
406 #ifndef _SYS_SYSPROTO_H_
407 struct ofstat_args {
408 	int	fd;
409 	struct	ostat *sb;
410 };
411 #endif
412 /* ARGSUSED */
413 int
414 ofstat(p, uap)
415 	struct proc *p;
416 	register struct ofstat_args *uap;
417 {
418 	register struct filedesc *fdp = p->p_fd;
419 	register struct file *fp;
420 	struct stat ub;
421 	struct ostat oub;
422 	int error;
423 
424 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
425 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
426 		return (EBADF);
427 	switch (fp->f_type) {
428 
429 	case DTYPE_FIFO:
430 	case DTYPE_VNODE:
431 		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
432 		break;
433 
434 	case DTYPE_SOCKET:
435 		error = soo_stat((struct socket *)fp->f_data, &ub);
436 		break;
437 
438 	case DTYPE_PIPE:
439 		error = pipe_stat((struct pipe *)fp->f_data, &ub);
440 		break;
441 
442 	default:
443 		panic("ofstat");
444 		/*NOTREACHED*/
445 	}
446 	cvtstat(&ub, &oub);
447 	if (error == 0)
448 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
449 	return (error);
450 }
451 #endif /* COMPAT_43 || COMPAT_SUNOS */
452 
453 /*
454  * Return status information about a file descriptor.
455  */
456 #ifndef _SYS_SYSPROTO_H_
457 struct fstat_args {
458 	int	fd;
459 	struct	stat *sb;
460 };
461 #endif
462 /* ARGSUSED */
463 int
464 fstat(p, uap)
465 	struct proc *p;
466 	register struct fstat_args *uap;
467 {
468 	register struct filedesc *fdp = p->p_fd;
469 	register struct file *fp;
470 	struct stat ub;
471 	int error;
472 
473 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
474 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
475 		return (EBADF);
476 	switch (fp->f_type) {
477 
478 	case DTYPE_FIFO:
479 	case DTYPE_VNODE:
480 		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
481 		break;
482 
483 	case DTYPE_SOCKET:
484 		error = soo_stat((struct socket *)fp->f_data, &ub);
485 		break;
486 
487 	case DTYPE_PIPE:
488 		error = pipe_stat((struct pipe *)fp->f_data, &ub);
489 		break;
490 
491 	default:
492 		panic("fstat");
493 		/*NOTREACHED*/
494 	}
495 	if (error == 0)
496 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
497 	return (error);
498 }
499 
500 /*
501  * Return status information about a file descriptor.
502  */
503 #ifndef _SYS_SYSPROTO_H_
504 struct nfstat_args {
505 	int	fd;
506 	struct	nstat *sb;
507 };
508 #endif
509 /* ARGSUSED */
510 int
511 nfstat(p, uap)
512 	struct proc *p;
513 	register struct nfstat_args *uap;
514 {
515 	register struct filedesc *fdp = p->p_fd;
516 	register struct file *fp;
517 	struct stat ub;
518 	struct nstat nub;
519 	int error;
520 
521 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
522 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
523 		return (EBADF);
524 	switch (fp->f_type) {
525 
526 	case DTYPE_FIFO:
527 	case DTYPE_VNODE:
528 		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
529 		break;
530 
531 	case DTYPE_SOCKET:
532 		error = soo_stat((struct socket *)fp->f_data, &ub);
533 		break;
534 
535 	case DTYPE_PIPE:
536 		error = pipe_stat((struct pipe *)fp->f_data, &ub);
537 		break;
538 
539 	default:
540 		panic("fstat");
541 		/*NOTREACHED*/
542 	}
543 	if (error == 0) {
544 		cvtnstat(&ub, &nub);
545 		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
546 	}
547 	return (error);
548 }
549 
550 /*
551  * Return pathconf information about a file descriptor.
552  */
553 #ifndef _SYS_SYSPROTO_H_
554 struct fpathconf_args {
555 	int	fd;
556 	int	name;
557 };
558 #endif
559 /* ARGSUSED */
560 int
561 fpathconf(p, uap)
562 	struct proc *p;
563 	register struct fpathconf_args *uap;
564 {
565 	struct filedesc *fdp = p->p_fd;
566 	struct file *fp;
567 	struct vnode *vp;
568 
569 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
570 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
571 		return (EBADF);
572 	switch (fp->f_type) {
573 
574 	case DTYPE_PIPE:
575 	case DTYPE_SOCKET:
576 		if (uap->name != _PC_PIPE_BUF)
577 			return (EINVAL);
578 		p->p_retval[0] = PIPE_BUF;
579 		return (0);
580 
581 	case DTYPE_FIFO:
582 	case DTYPE_VNODE:
583 		vp = (struct vnode *)fp->f_data;
584 		return (VOP_PATHCONF(vp, uap->name, p->p_retval));
585 
586 	default:
587 		panic("fpathconf");
588 	}
589 	/*NOTREACHED*/
590 }
591 
592 /*
593  * Allocate a file descriptor for the process.
594  */
595 static int fdexpand;
596 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
597 
598 int
599 fdalloc(p, want, result)
600 	struct proc *p;
601 	int want;
602 	int *result;
603 {
604 	register struct filedesc *fdp = p->p_fd;
605 	register int i;
606 	int lim, last, nfiles;
607 	struct file **newofile;
608 	char *newofileflags;
609 
610 	/*
611 	 * Search for a free descriptor starting at the higher
612 	 * of want or fd_freefile.  If that fails, consider
613 	 * expanding the ofile array.
614 	 */
615 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
616 	for (;;) {
617 		last = min(fdp->fd_nfiles, lim);
618 		if ((i = want) < fdp->fd_freefile)
619 			i = fdp->fd_freefile;
620 		for (; i < last; i++) {
621 			if (fdp->fd_ofiles[i] == NULL) {
622 				fdp->fd_ofileflags[i] = 0;
623 				if (i > fdp->fd_lastfile)
624 					fdp->fd_lastfile = i;
625 				if (want <= fdp->fd_freefile)
626 					fdp->fd_freefile = i;
627 				*result = i;
628 				return (0);
629 			}
630 		}
631 
632 		/*
633 		 * No space in current array.  Expand?
634 		 */
635 		if (fdp->fd_nfiles >= lim)
636 			return (EMFILE);
637 		if (fdp->fd_nfiles < NDEXTENT)
638 			nfiles = NDEXTENT;
639 		else
640 			nfiles = 2 * fdp->fd_nfiles;
641 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
642 		    M_FILEDESC, M_WAITOK);
643 		newofileflags = (char *) &newofile[nfiles];
644 		/*
645 		 * Copy the existing ofile and ofileflags arrays
646 		 * and zero the new portion of each array.
647 		 */
648 		bcopy(fdp->fd_ofiles, newofile,
649 			(i = sizeof(struct file *) * fdp->fd_nfiles));
650 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
651 		bcopy(fdp->fd_ofileflags, newofileflags,
652 			(i = sizeof(char) * fdp->fd_nfiles));
653 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
654 		if (fdp->fd_nfiles > NDFILE)
655 			FREE(fdp->fd_ofiles, M_FILEDESC);
656 		fdp->fd_ofiles = newofile;
657 		fdp->fd_ofileflags = newofileflags;
658 		fdp->fd_nfiles = nfiles;
659 		fdexpand++;
660 	}
661 	return (0);
662 }
663 
664 /*
665  * Check to see whether n user file descriptors
666  * are available to the process p.
667  */
668 int
669 fdavail(p, n)
670 	struct proc *p;
671 	register int n;
672 {
673 	register struct filedesc *fdp = p->p_fd;
674 	register struct file **fpp;
675 	register int i, lim, last;
676 
677 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
678 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
679 		return (1);
680 
681 	last = min(fdp->fd_nfiles, lim);
682 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
683 	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
684 		if (*fpp == NULL && --n <= 0)
685 			return (1);
686 	return (0);
687 }
688 
689 /*
690  * Create a new open file structure and allocate
691  * a file decriptor for the process that refers to it.
692  */
693 int
694 falloc(p, resultfp, resultfd)
695 	register struct proc *p;
696 	struct file **resultfp;
697 	int *resultfd;
698 {
699 	register struct file *fp, *fq;
700 	int error, i;
701 
702 	if ((error = fdalloc(p, 0, &i)))
703 		return (error);
704 	if (nfiles >= maxfiles) {
705 		tablefull("file");
706 		return (ENFILE);
707 	}
708 	/*
709 	 * Allocate a new file descriptor.
710 	 * If the process has file descriptor zero open, add to the list
711 	 * of open files at that point, otherwise put it at the front of
712 	 * the list of open files.
713 	 */
714 	nfiles++;
715 	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
716 	bzero(fp, sizeof(struct file));
717 	if ((fq = p->p_fd->fd_ofiles[0])) {
718 		LIST_INSERT_AFTER(fq, fp, f_list);
719 	} else {
720 		LIST_INSERT_HEAD(&filehead, fp, f_list);
721 	}
722 	p->p_fd->fd_ofiles[i] = fp;
723 	fp->f_count = 1;
724 	fp->f_cred = p->p_ucred;
725 	fp->f_seqcount = 1;
726 	crhold(fp->f_cred);
727 	if (resultfp)
728 		*resultfp = fp;
729 	if (resultfd)
730 		*resultfd = i;
731 	return (0);
732 }
733 
734 /*
735  * Free a file descriptor.
736  */
737 void
738 ffree(fp)
739 	register struct file *fp;
740 {
741 	LIST_REMOVE(fp, f_list);
742 	crfree(fp->f_cred);
743 #ifdef DIAGNOSTIC
744 	fp->f_count = 0;
745 #endif
746 	nfiles--;
747 	FREE(fp, M_FILE);
748 }
749 
750 /*
751  * Build a new filedesc structure.
752  */
753 struct filedesc *
754 fdinit(p)
755 	struct proc *p;
756 {
757 	register struct filedesc0 *newfdp;
758 	register struct filedesc *fdp = p->p_fd;
759 
760 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
761 	    M_FILEDESC, M_WAITOK);
762 	bzero(newfdp, sizeof(struct filedesc0));
763 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
764 	VREF(newfdp->fd_fd.fd_cdir);
765 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
766 	VREF(newfdp->fd_fd.fd_rdir);
767 
768 	/* Create the file descriptor table. */
769 	newfdp->fd_fd.fd_refcnt = 1;
770 	newfdp->fd_fd.fd_cmask = cmask;
771 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
772 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
773 	newfdp->fd_fd.fd_nfiles = NDFILE;
774 
775 	newfdp->fd_fd.fd_freefile = 0;
776 	newfdp->fd_fd.fd_lastfile = 0;
777 
778 	return (&newfdp->fd_fd);
779 }
780 
781 /*
782  * Share a filedesc structure.
783  */
784 struct filedesc *
785 fdshare(p)
786 	struct proc *p;
787 {
788 	p->p_fd->fd_refcnt++;
789 	return (p->p_fd);
790 }
791 
792 /*
793  * Copy a filedesc structure.
794  */
795 struct filedesc *
796 fdcopy(p)
797 	struct proc *p;
798 {
799 	register struct filedesc *newfdp, *fdp = p->p_fd;
800 	register struct file **fpp;
801 	register int i;
802 
803 /*
804  * Certain daemons might not have file descriptors
805  */
806 	if (fdp == NULL)
807 		return NULL;
808 
809 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
810 	    M_FILEDESC, M_WAITOK);
811 	bcopy(fdp, newfdp, sizeof(struct filedesc));
812 	VREF(newfdp->fd_cdir);
813 	VREF(newfdp->fd_rdir);
814 	newfdp->fd_refcnt = 1;
815 
816 	/*
817 	 * If the number of open files fits in the internal arrays
818 	 * of the open file structure, use them, otherwise allocate
819 	 * additional memory for the number of descriptors currently
820 	 * in use.
821 	 */
822 	if (newfdp->fd_lastfile < NDFILE) {
823 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
824 		newfdp->fd_ofileflags =
825 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
826 		i = NDFILE;
827 	} else {
828 		/*
829 		 * Compute the smallest multiple of NDEXTENT needed
830 		 * for the file descriptors currently in use,
831 		 * allowing the table to shrink.
832 		 */
833 		i = newfdp->fd_nfiles;
834 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
835 			i /= 2;
836 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
837 		    M_FILEDESC, M_WAITOK);
838 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
839 	}
840 	newfdp->fd_nfiles = i;
841 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
842 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
843 	fpp = newfdp->fd_ofiles;
844 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
845 		if (*fpp != NULL)
846 			(*fpp)->f_count++;
847 	return (newfdp);
848 }
849 
850 /*
851  * Release a filedesc structure.
852  */
853 void
854 fdfree(p)
855 	struct proc *p;
856 {
857 	register struct filedesc *fdp = p->p_fd;
858 	struct file **fpp;
859 	register int i;
860 
861 /*
862  * Certain daemons might not have file descriptors
863  */
864 	if (fdp == NULL)
865 		return;
866 
867 	if (--fdp->fd_refcnt > 0)
868 		return;
869 	fpp = fdp->fd_ofiles;
870 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
871 		if (*fpp)
872 			(void) closef(*fpp, p);
873 	if (fdp->fd_nfiles > NDFILE)
874 		FREE(fdp->fd_ofiles, M_FILEDESC);
875 	vrele(fdp->fd_cdir);
876 	vrele(fdp->fd_rdir);
877 	FREE(fdp, M_FILEDESC);
878 }
879 
880 /*
881  * Close any files on exec?
882  */
883 void
884 fdcloseexec(p)
885 	struct proc *p;
886 {
887 	struct filedesc *fdp = p->p_fd;
888 	struct file **fpp;
889 	char *fdfp;
890 	register int i;
891 
892 /*
893  * Certain daemons might not have file descriptors
894  */
895 	if (fdp == NULL)
896 		return;
897 
898 	fpp = fdp->fd_ofiles;
899 	fdfp = fdp->fd_ofileflags;
900 	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
901 		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
902 			if (*fdfp & UF_MAPPED)
903 				(void) munmapfd(p, i);
904 			(void) closef(*fpp, p);
905 			*fpp = NULL;
906 			*fdfp = 0;
907 			if (i < fdp->fd_freefile)
908 				fdp->fd_freefile = i;
909 		}
910 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
911 		fdp->fd_lastfile--;
912 }
913 
914 /*
915  * Internal form of close.
916  * Decrement reference count on file structure.
917  * Note: p may be NULL when closing a file
918  * that was being passed in a message.
919  */
920 int
921 closef(fp, p)
922 	register struct file *fp;
923 	register struct proc *p;
924 {
925 	struct vnode *vp;
926 	struct flock lf;
927 	int error;
928 
929 	if (fp == NULL)
930 		return (0);
931 	/*
932 	 * POSIX record locking dictates that any close releases ALL
933 	 * locks owned by this process.  This is handled by setting
934 	 * a flag in the unlock to free ONLY locks obeying POSIX
935 	 * semantics, and not to free BSD-style file locks.
936 	 * If the descriptor was in a message, POSIX-style locks
937 	 * aren't passed with the descriptor.
938 	 */
939 	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
940 		lf.l_whence = SEEK_SET;
941 		lf.l_start = 0;
942 		lf.l_len = 0;
943 		lf.l_type = F_UNLCK;
944 		vp = (struct vnode *)fp->f_data;
945 		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
946 	}
947 	if (--fp->f_count > 0)
948 		return (0);
949 	if (fp->f_count < 0)
950 		panic("closef: count < 0");
951 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
952 		lf.l_whence = SEEK_SET;
953 		lf.l_start = 0;
954 		lf.l_len = 0;
955 		lf.l_type = F_UNLCK;
956 		vp = (struct vnode *)fp->f_data;
957 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
958 	}
959 	if (fp->f_ops)
960 		error = (*fp->f_ops->fo_close)(fp, p);
961 	else
962 		error = 0;
963 	ffree(fp);
964 	return (error);
965 }
966 
967 /*
968  * Apply an advisory lock on a file descriptor.
969  *
970  * Just attempt to get a record lock of the requested type on
971  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
972  */
973 #ifndef _SYS_SYSPROTO_H_
974 struct flock_args {
975 	int	fd;
976 	int	how;
977 };
978 #endif
979 /* ARGSUSED */
980 int
981 flock(p, uap)
982 	struct proc *p;
983 	register struct flock_args *uap;
984 {
985 	register struct filedesc *fdp = p->p_fd;
986 	register struct file *fp;
987 	struct vnode *vp;
988 	struct flock lf;
989 
990 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
991 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
992 		return (EBADF);
993 	if (fp->f_type != DTYPE_VNODE)
994 		return (EOPNOTSUPP);
995 	vp = (struct vnode *)fp->f_data;
996 	lf.l_whence = SEEK_SET;
997 	lf.l_start = 0;
998 	lf.l_len = 0;
999 	if (uap->how & LOCK_UN) {
1000 		lf.l_type = F_UNLCK;
1001 		fp->f_flag &= ~FHASLOCK;
1002 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1003 	}
1004 	if (uap->how & LOCK_EX)
1005 		lf.l_type = F_WRLCK;
1006 	else if (uap->how & LOCK_SH)
1007 		lf.l_type = F_RDLCK;
1008 	else
1009 		return (EBADF);
1010 	fp->f_flag |= FHASLOCK;
1011 	if (uap->how & LOCK_NB)
1012 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1013 	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1014 }
1015 
1016 /*
1017  * File Descriptor pseudo-device driver (/dev/fd/).
1018  *
1019  * Opening minor device N dup()s the file (if any) connected to file
1020  * descriptor N belonging to the calling process.  Note that this driver
1021  * consists of only the ``open()'' routine, because all subsequent
1022  * references to this file will be direct to the other driver.
1023  */
1024 /* ARGSUSED */
1025 static int
1026 fdopen(dev, mode, type, p)
1027 	dev_t dev;
1028 	int mode, type;
1029 	struct proc *p;
1030 {
1031 
1032 	/*
1033 	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1034 	 * the file descriptor being sought for duplication. The error
1035 	 * return ensures that the vnode for this device will be released
1036 	 * by vn_open. Open will detect this special error and take the
1037 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1038 	 * will simply report the error.
1039 	 */
1040 	p->p_dupfd = minor(dev);
1041 	return (ENODEV);
1042 }
1043 
1044 /*
1045  * Duplicate the specified descriptor to a free descriptor.
1046  */
1047 int
1048 dupfdopen(fdp, indx, dfd, mode, error)
1049 	register struct filedesc *fdp;
1050 	register int indx, dfd;
1051 	int mode;
1052 	int error;
1053 {
1054 	register struct file *wfp;
1055 	struct file *fp;
1056 
1057 	/*
1058 	 * If the to-be-dup'd fd number is greater than the allowed number
1059 	 * of file descriptors, or the fd to be dup'd has already been
1060 	 * closed, reject.  Note, check for new == old is necessary as
1061 	 * falloc could allocate an already closed to-be-dup'd descriptor
1062 	 * as the new descriptor.
1063 	 */
1064 	fp = fdp->fd_ofiles[indx];
1065 	if ((u_int)dfd >= fdp->fd_nfiles ||
1066 	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1067 		return (EBADF);
1068 
1069 	/*
1070 	 * There are two cases of interest here.
1071 	 *
1072 	 * For ENODEV simply dup (dfd) to file descriptor
1073 	 * (indx) and return.
1074 	 *
1075 	 * For ENXIO steal away the file structure from (dfd) and
1076 	 * store it in (indx).  (dfd) is effectively closed by
1077 	 * this operation.
1078 	 *
1079 	 * Any other error code is just returned.
1080 	 */
1081 	switch (error) {
1082 	case ENODEV:
1083 		/*
1084 		 * Check that the mode the file is being opened for is a
1085 		 * subset of the mode of the existing descriptor.
1086 		 */
1087 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1088 			return (EACCES);
1089 		fdp->fd_ofiles[indx] = wfp;
1090 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1091 		wfp->f_count++;
1092 		if (indx > fdp->fd_lastfile)
1093 			fdp->fd_lastfile = indx;
1094 		return (0);
1095 
1096 	case ENXIO:
1097 		/*
1098 		 * Steal away the file pointer from dfd, and stuff it into indx.
1099 		 */
1100 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1101 		fdp->fd_ofiles[dfd] = NULL;
1102 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1103 		fdp->fd_ofileflags[dfd] = 0;
1104 		/*
1105 		 * Complete the clean up of the filedesc structure by
1106 		 * recomputing the various hints.
1107 		 */
1108 		if (indx > fdp->fd_lastfile)
1109 			fdp->fd_lastfile = indx;
1110 		else
1111 			while (fdp->fd_lastfile > 0 &&
1112 			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1113 				fdp->fd_lastfile--;
1114 			if (dfd < fdp->fd_freefile)
1115 				fdp->fd_freefile = dfd;
1116 		return (0);
1117 
1118 	default:
1119 		return (error);
1120 	}
1121 	/* NOTREACHED */
1122 }
1123 
1124 /*
1125  * Get file structures.
1126  */
1127 static int
1128 sysctl_kern_file SYSCTL_HANDLER_ARGS
1129 {
1130 	int error;
1131 	struct file *fp;
1132 
1133 	if (!req->oldptr) {
1134 		/*
1135 		 * overestimate by 10 files
1136 		 */
1137 		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1138 				(nfiles + 10) * sizeof(struct file)));
1139 	}
1140 
1141 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1142 	if (error)
1143 		return (error);
1144 
1145 	/*
1146 	 * followed by an array of file structures
1147 	 */
1148 	for (fp = filehead.lh_first; fp != NULL; fp = fp->f_list.le_next) {
1149 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1150 		if (error)
1151 			return (error);
1152 	}
1153 	return (0);
1154 }
1155 
1156 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1157 	0, 0, sysctl_kern_file, "S,file", "");
1158 
1159 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc,
1160 	CTLFLAG_RW, &maxfilesperproc, 0, "");
1161 
1162 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "");
1163 
1164 static fildesc_devsw_installed = 0;
1165 #ifdef DEVFS
1166 static	void *devfs_token_stdin;
1167 static	void *devfs_token_stdout;
1168 static	void *devfs_token_stderr;
1169 static	void *devfs_token_fildesc[NUMFDESC];
1170 #endif
1171 
1172 static void 	fildesc_drvinit(void *unused)
1173 {
1174 	dev_t dev;
1175 #ifdef DEVFS
1176 	int fd;
1177 #endif
1178 
1179 	if( ! fildesc_devsw_installed ) {
1180 		dev = makedev(CDEV_MAJOR,0);
1181 		cdevsw_add(&dev,&fildesc_cdevsw,NULL);
1182 		fildesc_devsw_installed = 1;
1183 #ifdef DEVFS
1184 		for (fd = 0; fd < NUMFDESC; fd++)
1185 			devfs_token_fildesc[fd] =
1186 				devfs_add_devswf(&fildesc_cdevsw, fd, DV_CHR,
1187 						 UID_BIN, GID_BIN, 0666,
1188 						 "fd/%d", fd);
1189 		devfs_token_stdin =
1190 			devfs_add_devswf(&fildesc_cdevsw, 0, DV_CHR,
1191 					 UID_ROOT, GID_WHEEL, 0666,
1192 					 "stdin");
1193 		devfs_token_stdout =
1194 			devfs_add_devswf(&fildesc_cdevsw, 1, DV_CHR,
1195 					 UID_ROOT, GID_WHEEL, 0666,
1196 					 "stdout");
1197 		devfs_token_stderr =
1198 			devfs_add_devswf(&fildesc_cdevsw, 2, DV_CHR,
1199 					 UID_ROOT, GID_WHEEL, 0666,
1200 					 "stderr");
1201 #endif
1202     	}
1203 }
1204 
1205 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1206 					fildesc_drvinit,NULL)
1207 
1208 
1209