xref: /freebsd/sys/kern/kern_descrip.c (revision ef5d438ed4bc17ad7ece3e40fe4d1f9baf3aadf7)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39  * $Id: kern_descrip.c,v 1.25 1996/02/04 19:56:34 dyson Exp $
40  */
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/sysproto.h>
45 #include <sys/conf.h>
46 #include <sys/filedesc.h>
47 #include <sys/kernel.h>
48 #include <sys/sysctl.h>
49 #include <sys/vnode.h>
50 #include <sys/proc.h>
51 #include <sys/file.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/stat.h>
55 #include <sys/ioctl.h>
56 #include <sys/fcntl.h>
57 #include <sys/malloc.h>
58 #include <sys/unistd.h>
59 #include <sys/resourcevar.h>
60 #include <sys/pipe.h>
61 
62 #include <vm/vm.h>
63 #include <vm/vm_param.h>
64 #include <vm/vm_extern.h>
65 
66 #ifdef DEVFS
67 #include <sys/devfsext.h>
68 #endif /*DEVFS*/
69 
70 static	 d_open_t  fdopen;
71 #define NUMFDESC 64
72 
73 #define CDEV_MAJOR 22
74 static struct cdevsw fildesc_cdevsw =
75 	{ fdopen,	noclose,	noread,		nowrite,	/*22*/
76 	  noioc,	nostop,		nullreset,	nodevtotty,/*fd(!=Fd)*/
77 	  noselect,	nommap,		nostrat };
78 
79 static int finishdup(struct filedesc *fdp, int old, int new, int *retval);
80 /*
81  * Descriptor management.
82  */
83 struct file *filehead;	/* head of list of open files */
84 int nfiles;		/* actual number of open files */
85 extern int cmask;
86 
87 /*
88  * System calls on descriptors.
89  */
90 #ifndef _SYS_SYSPROTO_H_
91 struct getdtablesize_args {
92 	int	dummy;
93 };
94 #endif
95 /* ARGSUSED */
96 int
97 getdtablesize(p, uap, retval)
98 	struct proc *p;
99 	struct getdtablesize_args *uap;
100 	int *retval;
101 {
102 
103 	*retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
104 	return (0);
105 }
106 
107 /*
108  * Duplicate a file descriptor to a particular value.
109  */
110 #ifndef _SYS_SYSPROTO_H_
111 struct dup2_args {
112 	u_int	from;
113 	u_int	to;
114 };
115 #endif
116 /* ARGSUSED */
117 int
118 dup2(p, uap, retval)
119 	struct proc *p;
120 	struct dup2_args *uap;
121 	int *retval;
122 {
123 	register struct filedesc *fdp = p->p_fd;
124 	register u_int old = uap->from, new = uap->to;
125 	int i, error;
126 
127 	if (old >= fdp->fd_nfiles ||
128 	    fdp->fd_ofiles[old] == NULL ||
129 	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
130 	    new >= maxfilesperproc)
131 		return (EBADF);
132 	if (old == new) {
133 		*retval = new;
134 		return (0);
135 	}
136 	if (new >= fdp->fd_nfiles) {
137 		if ((error = fdalloc(p, new, &i)))
138 			return (error);
139 		if (new != i)
140 			panic("dup2: fdalloc");
141 	} else if (fdp->fd_ofiles[new]) {
142 		if (fdp->fd_ofileflags[new] & UF_MAPPED)
143 			(void) munmapfd(p, new);
144 		/*
145 		 * dup2() must succeed even if the close has an error.
146 		 */
147 		(void) closef(fdp->fd_ofiles[new], p);
148 	}
149 	return (finishdup(fdp, (int)old, (int)new, retval));
150 }
151 
152 /*
153  * Duplicate a file descriptor.
154  */
155 #ifndef _SYS_SYSPROTO_H_
156 struct dup_args {
157 	u_int	fd;
158 };
159 #endif
160 /* ARGSUSED */
161 int
162 dup(p, uap, retval)
163 	struct proc *p;
164 	struct dup_args *uap;
165 	int *retval;
166 {
167 	register struct filedesc *fdp;
168 	u_int old;
169 	int new, error;
170 
171 	old = uap->fd;
172 
173 #if 0
174 	/*
175 	 * XXX Compatibility
176 	 */
177 	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, retval)); }
178 #endif
179 
180 	fdp = p->p_fd;
181 	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
182 		return (EBADF);
183 	if ((error = fdalloc(p, 0, &new)))
184 		return (error);
185 	return (finishdup(fdp, (int)old, new, retval));
186 }
187 
188 /*
189  * The file control system call.
190  */
191 #ifndef _SYS_SYSPROTO_H_
192 struct fcntl_args {
193 	int	fd;
194 	int	cmd;
195 	int	arg;
196 };
197 #endif
198 /* ARGSUSED */
199 int
200 fcntl(p, uap, retval)
201 	struct proc *p;
202 	register struct fcntl_args *uap;
203 	int *retval;
204 {
205 	register struct filedesc *fdp = p->p_fd;
206 	register struct file *fp;
207 	register char *pop;
208 	struct vnode *vp;
209 	int i, tmp, error, flg = F_POSIX;
210 	struct flock fl;
211 	u_int newmin;
212 
213 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
214 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
215 		return (EBADF);
216 	pop = &fdp->fd_ofileflags[uap->fd];
217 	switch (uap->cmd) {
218 
219 	case F_DUPFD:
220 		newmin = uap->arg;
221 		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
222 		    newmin >= maxfilesperproc)
223 			return (EINVAL);
224 		if ((error = fdalloc(p, newmin, &i)))
225 			return (error);
226 		return (finishdup(fdp, uap->fd, i, retval));
227 
228 	case F_GETFD:
229 		*retval = *pop & 1;
230 		return (0);
231 
232 	case F_SETFD:
233 		*pop = (*pop &~ 1) | (uap->arg & 1);
234 		return (0);
235 
236 	case F_GETFL:
237 		*retval = OFLAGS(fp->f_flag);
238 		return (0);
239 
240 	case F_SETFL:
241 		fp->f_flag &= ~FCNTLFLAGS;
242 		fp->f_flag |= FFLAGS(uap->arg) & FCNTLFLAGS;
243 		tmp = fp->f_flag & FNONBLOCK;
244 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
245 		if (error)
246 			return (error);
247 		tmp = fp->f_flag & FASYNC;
248 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
249 		if (!error)
250 			return (0);
251 		fp->f_flag &= ~FNONBLOCK;
252 		tmp = 0;
253 		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
254 		return (error);
255 
256 	case F_GETOWN:
257 		if (fp->f_type == DTYPE_SOCKET) {
258 			*retval = ((struct socket *)fp->f_data)->so_pgid;
259 			return (0);
260 		}
261 		error = (*fp->f_ops->fo_ioctl)
262 			(fp, (int)TIOCGPGRP, (caddr_t)retval, p);
263 		*retval = -*retval;
264 		return (error);
265 
266 	case F_SETOWN:
267 		if (fp->f_type == DTYPE_SOCKET) {
268 			((struct socket *)fp->f_data)->so_pgid = uap->arg;
269 			return (0);
270 		}
271 		if (uap->arg <= 0) {
272 			uap->arg = -uap->arg;
273 		} else {
274 			struct proc *p1 = pfind(uap->arg);
275 			if (p1 == 0)
276 				return (ESRCH);
277 			uap->arg = p1->p_pgrp->pg_id;
278 		}
279 		return ((*fp->f_ops->fo_ioctl)
280 			(fp, (int)TIOCSPGRP, (caddr_t)&uap->arg, p));
281 
282 	case F_SETLKW:
283 		flg |= F_WAIT;
284 		/* Fall into F_SETLK */
285 
286 	case F_SETLK:
287 		if (fp->f_type != DTYPE_VNODE)
288 			return (EBADF);
289 		vp = (struct vnode *)fp->f_data;
290 		/* Copy in the lock structure */
291 		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
292 		if (error)
293 			return (error);
294 		if (fl.l_whence == SEEK_CUR)
295 			fl.l_start += fp->f_offset;
296 		switch (fl.l_type) {
297 
298 		case F_RDLCK:
299 			if ((fp->f_flag & FREAD) == 0)
300 				return (EBADF);
301 			p->p_flag |= P_ADVLOCK;
302 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
303 
304 		case F_WRLCK:
305 			if ((fp->f_flag & FWRITE) == 0)
306 				return (EBADF);
307 			p->p_flag |= P_ADVLOCK;
308 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
309 
310 		case F_UNLCK:
311 			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
312 				F_POSIX));
313 
314 		default:
315 			return (EINVAL);
316 		}
317 
318 	case F_GETLK:
319 		if (fp->f_type != DTYPE_VNODE)
320 			return (EBADF);
321 		vp = (struct vnode *)fp->f_data;
322 		/* Copy in the lock structure */
323 		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
324 		if (error)
325 			return (error);
326 		if (fl.l_whence == SEEK_CUR)
327 			fl.l_start += fp->f_offset;
328 		if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX)))
329 			return (error);
330 		return (copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl)));
331 
332 	default:
333 		return (EINVAL);
334 	}
335 	/* NOTREACHED */
336 }
337 
338 /*
339  * Common code for dup, dup2, and fcntl(F_DUPFD).
340  */
341 static int
342 finishdup(fdp, old, new, retval)
343 	register struct filedesc *fdp;
344 	register int old, new, *retval;
345 {
346 	register struct file *fp;
347 
348 	fp = fdp->fd_ofiles[old];
349 	fdp->fd_ofiles[new] = fp;
350 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
351 	fp->f_count++;
352 	if (new > fdp->fd_lastfile)
353 		fdp->fd_lastfile = new;
354 	*retval = new;
355 	return (0);
356 }
357 
358 /*
359  * Close a file descriptor.
360  */
361 #ifndef _SYS_SYSPROTO_H_
362 struct close_args {
363         int     fd;
364 };
365 #endif
366 /* ARGSUSED */
367 int
368 close(p, uap, retval)
369 	struct proc *p;
370 	struct close_args *uap;
371 	int *retval;
372 {
373 	register struct filedesc *fdp = p->p_fd;
374 	register struct file *fp;
375 	register int fd = uap->fd;
376 	register u_char *pf;
377 
378 	if ((unsigned)fd >= fdp->fd_nfiles ||
379 	    (fp = fdp->fd_ofiles[fd]) == NULL)
380 		return (EBADF);
381 	pf = (u_char *)&fdp->fd_ofileflags[fd];
382 	if (*pf & UF_MAPPED)
383 		(void) munmapfd(p, fd);
384 	fdp->fd_ofiles[fd] = NULL;
385 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
386 		fdp->fd_lastfile--;
387 	if (fd < fdp->fd_freefile)
388 		fdp->fd_freefile = fd;
389 	*pf = 0;
390 	return (closef(fp, p));
391 }
392 
393 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
394 /*
395  * Return status information about a file descriptor.
396  */
397 #ifndef _SYS_SYSPROTO_H_
398 struct ofstat_args {
399 	int	fd;
400 	struct	ostat *sb;
401 };
402 #endif
403 /* ARGSUSED */
404 int
405 ofstat(p, uap, retval)
406 	struct proc *p;
407 	register struct ofstat_args *uap;
408 	int *retval;
409 {
410 	register struct filedesc *fdp = p->p_fd;
411 	register struct file *fp;
412 	struct stat ub;
413 	struct ostat oub;
414 	int error;
415 
416 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
417 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
418 		return (EBADF);
419 	switch (fp->f_type) {
420 
421 	case DTYPE_VNODE:
422 		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
423 		break;
424 
425 	case DTYPE_SOCKET:
426 		error = soo_stat((struct socket *)fp->f_data, &ub);
427 		break;
428 
429 #ifndef OLD_PIPE
430 	case DTYPE_PIPE:
431 		error = pipe_stat((struct pipe *)fp->f_data, &ub);
432 		break;
433 #endif
434 
435 	default:
436 		panic("ofstat");
437 		/*NOTREACHED*/
438 	}
439 	cvtstat(&ub, &oub);
440 	if (error == 0)
441 		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
442 	return (error);
443 }
444 #endif /* COMPAT_43 || COMPAT_SUNOS */
445 
446 /*
447  * Return status information about a file descriptor.
448  */
449 #ifndef _SYS_SYSPROTO_H_
450 struct fstat_args {
451 	int	fd;
452 	struct	stat *sb;
453 };
454 #endif
455 /* ARGSUSED */
456 int
457 fstat(p, uap, retval)
458 	struct proc *p;
459 	register struct fstat_args *uap;
460 	int *retval;
461 {
462 	register struct filedesc *fdp = p->p_fd;
463 	register struct file *fp;
464 	struct stat ub;
465 	int error;
466 
467 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
468 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
469 		return (EBADF);
470 	switch (fp->f_type) {
471 
472 	case DTYPE_VNODE:
473 		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
474 		break;
475 
476 	case DTYPE_SOCKET:
477 		error = soo_stat((struct socket *)fp->f_data, &ub);
478 		break;
479 
480 #ifndef OLD_PIPE
481 	case DTYPE_PIPE:
482 		error = pipe_stat((struct pipe *)fp->f_data, &ub);
483 		break;
484 #endif
485 
486 	default:
487 		panic("fstat");
488 		/*NOTREACHED*/
489 	}
490 	if (error == 0)
491 		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
492 	return (error);
493 }
494 
495 /*
496  * Return pathconf information about a file descriptor.
497  */
498 #ifndef _SYS_SYSPROTO_H_
499 struct fpathconf_args {
500 	int	fd;
501 	int	name;
502 };
503 #endif
504 /* ARGSUSED */
505 int
506 fpathconf(p, uap, retval)
507 	struct proc *p;
508 	register struct fpathconf_args *uap;
509 	int *retval;
510 {
511 	struct filedesc *fdp = p->p_fd;
512 	struct file *fp;
513 	struct vnode *vp;
514 
515 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
516 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
517 		return (EBADF);
518 	switch (fp->f_type) {
519 
520 #ifndef OLD_PIPE
521 	case DTYPE_PIPE:
522 #endif
523 	case DTYPE_SOCKET:
524 		if (uap->name != _PC_PIPE_BUF)
525 			return (EINVAL);
526 		*retval = PIPE_BUF;
527 		return (0);
528 
529 	case DTYPE_VNODE:
530 		vp = (struct vnode *)fp->f_data;
531 		return (VOP_PATHCONF(vp, uap->name, retval));
532 
533 	default:
534 		panic("fpathconf");
535 	}
536 	/*NOTREACHED*/
537 }
538 
539 /*
540  * Allocate a file descriptor for the process.
541  */
542 static int fdexpand;
543 SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
544 
545 int
546 fdalloc(p, want, result)
547 	struct proc *p;
548 	int want;
549 	int *result;
550 {
551 	register struct filedesc *fdp = p->p_fd;
552 	register int i;
553 	int lim, last, nfiles;
554 	struct file **newofile;
555 	char *newofileflags;
556 
557 	/*
558 	 * Search for a free descriptor starting at the higher
559 	 * of want or fd_freefile.  If that fails, consider
560 	 * expanding the ofile array.
561 	 */
562 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
563 	for (;;) {
564 		last = min(fdp->fd_nfiles, lim);
565 		if ((i = want) < fdp->fd_freefile)
566 			i = fdp->fd_freefile;
567 		for (; i < last; i++) {
568 			if (fdp->fd_ofiles[i] == NULL) {
569 				fdp->fd_ofileflags[i] = 0;
570 				if (i > fdp->fd_lastfile)
571 					fdp->fd_lastfile = i;
572 				if (want <= fdp->fd_freefile)
573 					fdp->fd_freefile = i;
574 				*result = i;
575 				return (0);
576 			}
577 		}
578 
579 		/*
580 		 * No space in current array.  Expand?
581 		 */
582 		if (fdp->fd_nfiles >= lim)
583 			return (EMFILE);
584 		if (fdp->fd_nfiles < NDEXTENT)
585 			nfiles = NDEXTENT;
586 		else
587 			nfiles = 2 * fdp->fd_nfiles;
588 		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
589 		    M_FILEDESC, M_WAITOK);
590 		newofileflags = (char *) &newofile[nfiles];
591 		/*
592 		 * Copy the existing ofile and ofileflags arrays
593 		 * and zero the new portion of each array.
594 		 */
595 		bcopy(fdp->fd_ofiles, newofile,
596 			(i = sizeof(struct file *) * fdp->fd_nfiles));
597 		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
598 		bcopy(fdp->fd_ofileflags, newofileflags,
599 			(i = sizeof(char) * fdp->fd_nfiles));
600 		bzero(newofileflags + i, nfiles * sizeof(char) - i);
601 		if (fdp->fd_nfiles > NDFILE)
602 			FREE(fdp->fd_ofiles, M_FILEDESC);
603 		fdp->fd_ofiles = newofile;
604 		fdp->fd_ofileflags = newofileflags;
605 		fdp->fd_nfiles = nfiles;
606 		fdexpand++;
607 	}
608 	return (0);
609 }
610 
611 /*
612  * Check to see whether n user file descriptors
613  * are available to the process p.
614  */
615 int
616 fdavail(p, n)
617 	struct proc *p;
618 	register int n;
619 {
620 	register struct filedesc *fdp = p->p_fd;
621 	register struct file **fpp;
622 	register int i, lim;
623 
624 	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
625 	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
626 		return (1);
627 	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
628 	for (i = fdp->fd_nfiles - fdp->fd_freefile; --i >= 0; fpp++)
629 		if (*fpp == NULL && --n <= 0)
630 			return (1);
631 	return (0);
632 }
633 
634 /*
635  * Create a new open file structure and allocate
636  * a file decriptor for the process that refers to it.
637  */
638 int
639 falloc(p, resultfp, resultfd)
640 	register struct proc *p;
641 	struct file **resultfp;
642 	int *resultfd;
643 {
644 	register struct file *fp, *fq, **fpp;
645 	int error, i;
646 
647 	if ((error = fdalloc(p, 0, &i)))
648 		return (error);
649 	if (nfiles >= maxfiles) {
650 		tablefull("file");
651 		return (ENFILE);
652 	}
653 	/*
654 	 * Allocate a new file descriptor.
655 	 * If the process has file descriptor zero open, add to the list
656 	 * of open files at that point, otherwise put it at the front of
657 	 * the list of open files.
658 	 */
659 	nfiles++;
660 	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
661 	bzero(fp, sizeof(struct file));
662 	if ((fq = p->p_fd->fd_ofiles[0]))
663 		fpp = &fq->f_filef;
664 	else
665 		fpp = &filehead;
666 	p->p_fd->fd_ofiles[i] = fp;
667 	if ((fq = *fpp))
668 		fq->f_fileb = &fp->f_filef;
669 	fp->f_filef = fq;
670 	fp->f_fileb = fpp;
671 	*fpp = fp;
672 	fp->f_count = 1;
673 	fp->f_cred = p->p_ucred;
674 	crhold(fp->f_cred);
675 	if (resultfp)
676 		*resultfp = fp;
677 	if (resultfd)
678 		*resultfd = i;
679 	return (0);
680 }
681 
682 /*
683  * Free a file descriptor.
684  */
685 void
686 ffree(fp)
687 	register struct file *fp;
688 {
689 	register struct file *fq;
690 
691 	if ((fq = fp->f_filef))
692 		fq->f_fileb = fp->f_fileb;
693 	*fp->f_fileb = fq;
694 	crfree(fp->f_cred);
695 #ifdef DIAGNOSTIC
696 	fp->f_filef = NULL;
697 	fp->f_fileb = NULL;
698 	fp->f_count = 0;
699 #endif
700 	nfiles--;
701 	FREE(fp, M_FILE);
702 }
703 
704 /*
705  * Build a new filedesc structure.
706  */
707 struct filedesc *
708 fdinit(p)
709 	struct proc *p;
710 {
711 	register struct filedesc0 *newfdp;
712 	register struct filedesc *fdp = p->p_fd;
713 
714 	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
715 	    M_FILEDESC, M_WAITOK);
716 	bzero(newfdp, sizeof(struct filedesc0));
717 	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
718 	VREF(newfdp->fd_fd.fd_cdir);
719 	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
720 	if (newfdp->fd_fd.fd_rdir)
721 		VREF(newfdp->fd_fd.fd_rdir);
722 
723 	/* Create the file descriptor table. */
724 	newfdp->fd_fd.fd_refcnt = 1;
725 	newfdp->fd_fd.fd_cmask = cmask;
726 	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
727 	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
728 	newfdp->fd_fd.fd_nfiles = NDFILE;
729 
730 	newfdp->fd_fd.fd_freefile = 0;
731 	newfdp->fd_fd.fd_lastfile = 0;
732 
733 	return (&newfdp->fd_fd);
734 }
735 
736 /*
737  * Share a filedesc structure.
738  */
739 struct filedesc *
740 fdshare(p)
741 	struct proc *p;
742 {
743 	p->p_fd->fd_refcnt++;
744 	return (p->p_fd);
745 }
746 
747 /*
748  * Copy a filedesc structure.
749  */
750 struct filedesc *
751 fdcopy(p)
752 	struct proc *p;
753 {
754 	register struct filedesc *newfdp, *fdp = p->p_fd;
755 	register struct file **fpp;
756 	register int i;
757 
758 	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
759 	    M_FILEDESC, M_WAITOK);
760 	bcopy(fdp, newfdp, sizeof(struct filedesc));
761 	VREF(newfdp->fd_cdir);
762 	if (newfdp->fd_rdir)
763 		VREF(newfdp->fd_rdir);
764 	newfdp->fd_refcnt = 1;
765 
766 	/*
767 	 * If the number of open files fits in the internal arrays
768 	 * of the open file structure, use them, otherwise allocate
769 	 * additional memory for the number of descriptors currently
770 	 * in use.
771 	 */
772 	if (newfdp->fd_lastfile < NDFILE) {
773 		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
774 		newfdp->fd_ofileflags =
775 		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
776 		i = NDFILE;
777 	} else {
778 		/*
779 		 * Compute the smallest multiple of NDEXTENT needed
780 		 * for the file descriptors currently in use,
781 		 * allowing the table to shrink.
782 		 */
783 		i = newfdp->fd_nfiles;
784 		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
785 			i /= 2;
786 		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
787 		    M_FILEDESC, M_WAITOK);
788 		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
789 	}
790 	newfdp->fd_nfiles = i;
791 	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
792 	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
793 	fpp = newfdp->fd_ofiles;
794 	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
795 		if (*fpp != NULL)
796 			(*fpp)->f_count++;
797 	return (newfdp);
798 }
799 
800 /*
801  * Release a filedesc structure.
802  */
803 void
804 fdfree(p)
805 	struct proc *p;
806 {
807 	register struct filedesc *fdp = p->p_fd;
808 	struct file **fpp;
809 	register int i;
810 
811 	if (--fdp->fd_refcnt > 0)
812 		return;
813 	fpp = fdp->fd_ofiles;
814 	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
815 		if (*fpp)
816 			(void) closef(*fpp, p);
817 	if (fdp->fd_nfiles > NDFILE)
818 		FREE(fdp->fd_ofiles, M_FILEDESC);
819 	vrele(fdp->fd_cdir);
820 	if (fdp->fd_rdir)
821 		vrele(fdp->fd_rdir);
822 	FREE(fdp, M_FILEDESC);
823 }
824 
825 /*
826  * Close any files on exec?
827  */
828 void
829 fdcloseexec(p)
830 	struct proc *p;
831 {
832 	struct filedesc *fdp = p->p_fd;
833 	struct file **fpp;
834 	char *fdfp;
835 	register int i;
836 
837 	fpp = fdp->fd_ofiles;
838 	fdfp = fdp->fd_ofileflags;
839 	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
840 		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
841 			if (*fdfp & UF_MAPPED)
842 				(void) munmapfd(p, i);
843 			(void) closef(*fpp, p);
844 			*fpp = NULL;
845 			*fdfp = 0;
846 			if (i < fdp->fd_freefile)
847 				fdp->fd_freefile = i;
848 		}
849 	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
850 		fdp->fd_lastfile--;
851 }
852 
853 /*
854  * Internal form of close.
855  * Decrement reference count on file structure.
856  * Note: p may be NULL when closing a file
857  * that was being passed in a message.
858  */
859 int
860 closef(fp, p)
861 	register struct file *fp;
862 	register struct proc *p;
863 {
864 	struct vnode *vp;
865 	struct flock lf;
866 	int error;
867 
868 	if (fp == NULL)
869 		return (0);
870 	/*
871 	 * POSIX record locking dictates that any close releases ALL
872 	 * locks owned by this process.  This is handled by setting
873 	 * a flag in the unlock to free ONLY locks obeying POSIX
874 	 * semantics, and not to free BSD-style file locks.
875 	 * If the descriptor was in a message, POSIX-style locks
876 	 * aren't passed with the descriptor.
877 	 */
878 	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
879 		lf.l_whence = SEEK_SET;
880 		lf.l_start = 0;
881 		lf.l_len = 0;
882 		lf.l_type = F_UNLCK;
883 		vp = (struct vnode *)fp->f_data;
884 		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
885 	}
886 	if (--fp->f_count > 0)
887 		return (0);
888 	if (fp->f_count < 0)
889 		panic("closef: count < 0");
890 	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
891 		lf.l_whence = SEEK_SET;
892 		lf.l_start = 0;
893 		lf.l_len = 0;
894 		lf.l_type = F_UNLCK;
895 		vp = (struct vnode *)fp->f_data;
896 		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
897 	}
898 	if (fp->f_ops)
899 		error = (*fp->f_ops->fo_close)(fp, p);
900 	else
901 		error = 0;
902 	ffree(fp);
903 	return (error);
904 }
905 
906 /*
907  * Apply an advisory lock on a file descriptor.
908  *
909  * Just attempt to get a record lock of the requested type on
910  * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
911  */
912 #ifndef _SYS_SYSPROTO_H_
913 struct flock_args {
914 	int	fd;
915 	int	how;
916 };
917 #endif
918 /* ARGSUSED */
919 int
920 flock(p, uap, retval)
921 	struct proc *p;
922 	register struct flock_args *uap;
923 	int *retval;
924 {
925 	register struct filedesc *fdp = p->p_fd;
926 	register struct file *fp;
927 	struct vnode *vp;
928 	struct flock lf;
929 
930 	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
931 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
932 		return (EBADF);
933 	if (fp->f_type != DTYPE_VNODE)
934 		return (EOPNOTSUPP);
935 	vp = (struct vnode *)fp->f_data;
936 	lf.l_whence = SEEK_SET;
937 	lf.l_start = 0;
938 	lf.l_len = 0;
939 	if (uap->how & LOCK_UN) {
940 		lf.l_type = F_UNLCK;
941 		fp->f_flag &= ~FHASLOCK;
942 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
943 	}
944 	if (uap->how & LOCK_EX)
945 		lf.l_type = F_WRLCK;
946 	else if (uap->how & LOCK_SH)
947 		lf.l_type = F_RDLCK;
948 	else
949 		return (EBADF);
950 	fp->f_flag |= FHASLOCK;
951 	if (uap->how & LOCK_NB)
952 		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
953 	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
954 }
955 
956 /*
957  * File Descriptor pseudo-device driver (/dev/fd/).
958  *
959  * Opening minor device N dup()s the file (if any) connected to file
960  * descriptor N belonging to the calling process.  Note that this driver
961  * consists of only the ``open()'' routine, because all subsequent
962  * references to this file will be direct to the other driver.
963  */
964 /* ARGSUSED */
965 static int
966 fdopen(dev, mode, type, p)
967 	dev_t dev;
968 	int mode, type;
969 	struct proc *p;
970 {
971 
972 	/*
973 	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
974 	 * the file descriptor being sought for duplication. The error
975 	 * return ensures that the vnode for this device will be released
976 	 * by vn_open. Open will detect this special error and take the
977 	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
978 	 * will simply report the error.
979 	 */
980 	p->p_dupfd = minor(dev);
981 	return (ENODEV);
982 }
983 
984 /*
985  * Duplicate the specified descriptor to a free descriptor.
986  */
987 int
988 dupfdopen(fdp, indx, dfd, mode, error)
989 	register struct filedesc *fdp;
990 	register int indx, dfd;
991 	int mode;
992 	int error;
993 {
994 	register struct file *wfp;
995 	struct file *fp;
996 
997 	/*
998 	 * If the to-be-dup'd fd number is greater than the allowed number
999 	 * of file descriptors, or the fd to be dup'd has already been
1000 	 * closed, reject.  Note, check for new == old is necessary as
1001 	 * falloc could allocate an already closed to-be-dup'd descriptor
1002 	 * as the new descriptor.
1003 	 */
1004 	fp = fdp->fd_ofiles[indx];
1005 	if ((u_int)dfd >= fdp->fd_nfiles ||
1006 	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1007 		return (EBADF);
1008 
1009 	/*
1010 	 * There are two cases of interest here.
1011 	 *
1012 	 * For ENODEV simply dup (dfd) to file descriptor
1013 	 * (indx) and return.
1014 	 *
1015 	 * For ENXIO steal away the file structure from (dfd) and
1016 	 * store it in (indx).  (dfd) is effectively closed by
1017 	 * this operation.
1018 	 *
1019 	 * Any other error code is just returned.
1020 	 */
1021 	switch (error) {
1022 	case ENODEV:
1023 		/*
1024 		 * Check that the mode the file is being opened for is a
1025 		 * subset of the mode of the existing descriptor.
1026 		 */
1027 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1028 			return (EACCES);
1029 		fdp->fd_ofiles[indx] = wfp;
1030 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1031 		wfp->f_count++;
1032 		if (indx > fdp->fd_lastfile)
1033 			fdp->fd_lastfile = indx;
1034 		return (0);
1035 
1036 	case ENXIO:
1037 		/*
1038 		 * Steal away the file pointer from dfd, and stuff it into indx.
1039 		 */
1040 		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1041 		fdp->fd_ofiles[dfd] = NULL;
1042 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1043 		fdp->fd_ofileflags[dfd] = 0;
1044 		/*
1045 		 * Complete the clean up of the filedesc structure by
1046 		 * recomputing the various hints.
1047 		 */
1048 		if (indx > fdp->fd_lastfile)
1049 			fdp->fd_lastfile = indx;
1050 		else
1051 			while (fdp->fd_lastfile > 0 &&
1052 			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1053 				fdp->fd_lastfile--;
1054 			if (dfd < fdp->fd_freefile)
1055 				fdp->fd_freefile = dfd;
1056 		return (0);
1057 
1058 	default:
1059 		return (error);
1060 	}
1061 	/* NOTREACHED */
1062 }
1063 
1064 /*
1065  * Get file structures.
1066  */
1067 static int
1068 sysctl_kern_file SYSCTL_HANDLER_ARGS
1069 {
1070 	int error;
1071 	struct file *fp;
1072 
1073 	if (!req->oldptr) {
1074 		/*
1075 		 * overestimate by 10 files
1076 		 */
1077 		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1078 				(nfiles + 10) * sizeof(struct file)));
1079 	}
1080 
1081 	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1082 	if (error)
1083 		return (error);
1084 
1085 	/*
1086 	 * followed by an array of file structures
1087 	 */
1088 	for (fp = filehead; fp != NULL; fp = fp->f_filef) {
1089 		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1090 		if (error)
1091 			return (error);
1092 	}
1093 	return (0);
1094 }
1095 
1096 SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1097 	0, 0, sysctl_kern_file, "S,file", "");
1098 
1099 SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc,
1100 	CTLFLAG_RD, &maxfilesperproc, 0, "");
1101 
1102 SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "");
1103 
1104 static fildesc_devsw_installed = 0;
1105 static	void *devfs_token_stdin;
1106 static	void *devfs_token_stdout;
1107 static	void *devfs_token_stderr;
1108 static	void *devfs_token_fildesc[NUMFDESC];
1109 
1110 static void 	fildesc_drvinit(void *unused)
1111 {
1112 	dev_t dev;
1113 	int	i;
1114 	char	name[32];
1115 
1116 	if( ! fildesc_devsw_installed ) {
1117 		dev = makedev(CDEV_MAJOR,0);
1118 		cdevsw_add(&dev,&fildesc_cdevsw,NULL);
1119 		fildesc_devsw_installed = 1;
1120 #ifdef DEVFS
1121 		for ( i = 0 ; i < NUMFDESC ; i++ ) {
1122 			sprintf(name,"%d",i);
1123 			devfs_token_fildesc[i] = devfs_add_devsw("fd",name,
1124 							&fildesc_cdevsw,0,
1125 							DV_CHR, 0,  0, 0666);
1126 		}
1127 		devfs_token_stdin =
1128 			dev_link("/","stdin",devfs_token_fildesc[0]);
1129 		devfs_token_stdout =
1130 			dev_link("/","stdout",devfs_token_fildesc[1]);
1131 		devfs_token_stderr =
1132 			dev_link("/","stderr",devfs_token_fildesc[2]);
1133 #endif
1134     	}
1135 }
1136 
1137 SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1138 					fildesc_drvinit,NULL)
1139 
1140 
1141